12 files changed, 181 insertions, 374 deletions
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c
index 6b91bdce9c..aa1cd5e300 100644
--- a/src/mesa/drivers/dri/radeon/radeon_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_context.c
@@ -54,6 +54,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "drivers/common/driverfuncs.h"
 
 #include "radeon_context.h"
+#include "common_cmdbuf.h"
 #include "radeon_ioctl.h"
 #include "radeon_state.h"
 #include "radeon_span.h"
@@ -243,6 +244,7 @@ static void r100_init_vtbl(radeonContextPtr radeon)
    radeon->vtbl.update_draw_buffer = radeonUpdateDrawBuffer;
    radeon->vtbl.emit_cs_header = r100_vtbl_emit_cs_header;
    radeon->vtbl.emit_state = r100_vtbl_emit_state;
+   radeon->vtbl.swtcl_flush = r100_swtcl_flush;
 }
 
 /* Create the device specific context.
@@ -332,7 +334,7 @@ radeonCreateContext( const __GLcontextModes *glVisual,
       rmesa->radeon.texture_depth = ( screen->cpp == 4 ) ?
 	 DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
 
-   rmesa->swtcl.RenderIndex = ~0;
+   rmesa->radeon.swtcl.RenderIndex = ~0;
    rmesa->hw.all_dirty = GL_TRUE;
 
    /* Set the maximum texture size small enough that we can guarentee that
@@ -512,7 +514,7 @@ void radeonDestroyContext( __DRIcontextPrivate *driContextPriv )
       radeonReleaseArrays( rmesa->radeon.glCtx, ~0 );
       if (rmesa->radeon.dma.current) {
 	 radeonReleaseDmaRegion( &rmesa->radeon );
-	 radeonFlushCmdBuf( &rmesa->radeon, __FUNCTION__ );
+	 rcommonFlushCmdBuf( &rmesa->radeon, __FUNCTION__ );
       }
 
       _mesa_vector4f_free( &rmesa->tcl.ObjClean );
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h
index ba5c57f121..6edbaf6910 100644
--- a/src/mesa/drivers/dri/radeon/radeon_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_context.h
@@ -340,7 +340,7 @@ struct r100_state {
 
 
 #define RADEON_CMD_BUF_SZ  (8*1024)
-
+#define R200_ELT_BUF_SZ  (8*1024)
 /* radeon_tcl.c
  */
 struct radeon_tcl_info {
@@ -364,18 +364,17 @@ struct radeon_tcl_info {
 	struct radeon_dma_region fog;
 	struct radeon_dma_region tex[RADEON_MAX_TEXTURE_UNITS];
 	struct radeon_dma_region norm;
+
+        struct radeon_bo *elt_dma_bo;
+        int elt_dma_offset; /** Offset into this buffer object, in bytes */
+        int elt_used;
 };
 
 /* radeon_swtcl.c
  */
-struct radeon_swtcl_info {
-	GLuint RenderIndex;
-	GLuint vertex_size;
+struct r100_swtcl_info {
 	GLuint vertex_format;
 
-	struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
-	GLuint vertex_attr_count;
-
 	GLubyte *verts;
 
 	/* Fallback rasterization functions
@@ -384,10 +383,6 @@ struct radeon_swtcl_info {
 	radeon_line_func draw_line;
 	radeon_tri_func draw_tri;
 
-	GLuint hw_primitive;
-	GLenum render_primitive;
-	GLuint numverts;
-
    /**
     * Offset of the 4UB color data within a hardware (swtcl) vertex.
     */
@@ -448,7 +443,7 @@ struct r100_context {
 
 	/* radeon_swtcl.c
 	 */
-	struct radeon_swtcl_info swtcl;
+	struct r100_swtcl_info swtcl;
 
 	GLboolean using_hyperz;
 	GLboolean texmicrotile;
diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
index fe4f90e0b1..1c885c1612 100644
--- a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
+++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
@@ -313,7 +313,7 @@ static int cs_emit(struct radeon_cs *cs)
         cmd.boxes = (drm_clip_rect_t *) csm->ctx->pClipRects;
     }
 
-    //    dump_cmdbuf(cs);
+     //dump_cmdbuf(cs);
 
     r = drmCommandWrite(cs->csm->fd, DRM_RADEON_CMDBUF, &cmd, sizeof(cmd));
     if (r) {
diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
index b101668f77..a823c79d6c 100644
--- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
@@ -43,6 +43,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "swrast/swrast.h"
 
 #include "radeon_context.h"
+#include "common_cmdbuf.h"
+#include "radeon_cs.h"
 #include "radeon_state.h"
 #include "radeon_ioctl.h"
 #include "radeon_tcl.h"
@@ -186,7 +188,7 @@ void radeonEmitState( r100ContextPtr rmesa )
     * for enough space for the case of emitting all state, and inline the
     * radeonAllocCmdBuf code here without all the checks.
     */
-   radeonEnsureCmdBufSpace(rmesa, rmesa->hw.max_state_size);
+   rcommonEnsureCmdBufSpace(&rmesa->radeon, rmesa->hw.max_state_size, __FUNCTION__);
    dest = rmesa->store.cmd_buf + rmesa->store.cmd_used;
 
    /* We always always emit zbs, this is due to a bug found by keithw in
@@ -243,8 +245,7 @@ extern void radeonEmitVbufPrim( r100ContextPtr rmesa,
 				GLuint primitive,
 				GLuint vertex_nr )
 {
-   drm_radeon_cmd_header_t *cmd;
-
+   BATCH_LOCALS(&rmesa->radeon);
 
    assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
    
@@ -254,43 +255,16 @@ extern void radeonEmitVbufPrim( r100ContextPtr rmesa,
       fprintf(stderr, "%s cmd_used/4: %d\n", __FUNCTION__,
 	      rmesa->store.cmd_used/4);
    
-   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VBUF_BUFSZ,
-						       __FUNCTION__ );
-#if RADEON_OLD_PACKETS
-   cmd[0].i = 0;
-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
-   cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM | (3 << 16);
-   cmd[2].i = rmesa->ioctl.vertex_offset;
-   cmd[3].i = vertex_nr;
-   cmd[4].i = vertex_format;
-   cmd[5].i = (primitive | 
-	       RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
-	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
-	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
-	       (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
-
-   if (RADEON_DEBUG & DEBUG_PRIMS)
-      fprintf(stderr, "%s: header 0x%x offt 0x%x vfmt 0x%x vfcntl %x \n",
-	      __FUNCTION__,
-	      cmd[1].i, cmd[2].i, cmd[4].i, cmd[5].i);
-#else
-   cmd[0].i = 0;
-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
-   cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_VBUF | (1 << 16);
-   cmd[2].i = vertex_format;
-   cmd[3].i = (primitive | 
-	       RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
-	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
-	       RADEON_CP_VC_CNTL_MAOS_ENABLE |
-	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
-	       (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
-
-
-   if (RADEON_DEBUG & DEBUG_PRIMS)
-      fprintf(stderr, "%s: header 0x%x vfmt 0x%x vfcntl %x \n",
-	      __FUNCTION__,
-	      cmd[1].i, cmd[2].i, cmd[3].i);
-#endif
+   BEGIN_BATCH(3);
+   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_DRAW_VBUF, 0);
+   OUT_BATCH(vertex_format);
+   OUT_BATCH(primitive |
+	     RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
+	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+	     RADEON_CP_VC_CNTL_MAOS_ENABLE |
+	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+	     (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
+   END_BATCH();
 }
 
 
@@ -346,35 +320,14 @@ GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa,
    
    radeonEmitState( rmesa );
    
-   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa,
-						       ELTS_BUFSZ(min_nr),
-						       __FUNCTION__ );
-#if RADEON_OLD_PACKETS
-   cmd[0].i = 0;
-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
-   cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM;
-   cmd[2].i = rmesa->ioctl.vertex_offset;
-   cmd[3].i = 0xffff;
-   cmd[4].i = vertex_format;
-   cmd[5].i = (primitive | 
-	       RADEON_CP_VC_CNTL_PRIM_WALK_IND |
-	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
-	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
-
-   retval = (GLushort *)(cmd+6);
-#else   
-   cmd[0].i = 0;
-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
-   cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_INDX;
-   cmd[2].i = vertex_format;
-   cmd[3].i = (primitive | 
-	       RADEON_CP_VC_CNTL_PRIM_WALK_IND |
-	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
-	       RADEON_CP_VC_CNTL_MAOS_ENABLE |
-	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
-
-   retval = (GLushort *)(cmd+4);
-#endif
+   rmesa->tcl.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
+					  0, R200_ELT_BUF_SZ, 4,
+					  RADEON_GEM_DOMAIN_GTT, 0);
+   rmesa->tcl.elt_dma_offset = 0;
+   rmesa->tcl.elt_used = min_nr * 2;
+
+   radeon_bo_map(rmesa->tcl.elt_dma_bo, 1);
+   retval = rmesa->tcl.elt_dma_bo->ptr + rmesa->tcl.elt_dma_offset;
 
    if (RADEON_DEBUG & DEBUG_PRIMS)
       fprintf(stderr, "%s: header 0x%x vfmt 0x%x prim %x \n",
@@ -400,21 +353,19 @@ void radeonEmitVertexAOS( r100ContextPtr rmesa,
    rmesa->ioctl.vertex_size = vertex_size;
    rmesa->ioctl.vertex_offset = offset;
 #else
-   drm_radeon_cmd_header_t *cmd;
+   BATCH_LOCALS(&rmesa->radeon);
 
    if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
       fprintf(stderr, "%s:  vertex_size 0x%x offset 0x%x \n",
 	      __FUNCTION__, vertex_size, offset);
 
-   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VERT_AOS_BUFSZ,
-						  __FUNCTION__ );
-
-   cmd[0].i = 0;
-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
-   cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (2 << 16);
-   cmd[2].i = 1;
-   cmd[3].i = vertex_size | (vertex_size << 8);
-   cmd[4].i = offset;
+   BEGIN_BATCH(5);
+   OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, 2);
+   OUT_BATCH(1);
+   OUT_BATCH(vertex_size | (vertex_size << 8));
+   OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+   END_BATCH();
+}
 #endif
 }
 		       
diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.h b/src/mesa/drivers/dri/radeon/radeon_ioctl.h
index b4bc9b1144..87d64fb728 100644
--- a/src/mesa/drivers/dri/radeon/radeon_ioctl.h
+++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.h
@@ -153,36 +153,37 @@ do {							\
 #define VBUF_BUFSZ	(4 * sizeof(int))
 #endif
 
-/* Ensure that a minimum amount of space is available in the command buffer.
- * This is used to ensure atomicity of state updates with the rendering requests
- * that rely on them.
- *
- * An alternative would be to implement a "soft lock" such that when the buffer
- * wraps at an inopportune time, we grab the lock, flush the current buffer,
- * and hang on to the lock until the critical section is finished and we flush
- * the buffer again and unlock.
- */
-static INLINE void radeonEnsureCmdBufSpace( r100ContextPtr rmesa,
-					      int bytes )
-{
-   if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ)
-      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
-   assert( bytes <= RADEON_CMD_BUF_SZ );
-}
 
-/* Alloc space in the command buffer
- */
-static INLINE char *radeonAllocCmdBuf( r100ContextPtr rmesa,
-					 int bytes, const char *where )
+static inline uint32_t cmdpacket3(int cmd_type)
 {
-   if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ)
-      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
+  drm_radeon_cmd_header_t cmd;
+
+  cmd.i = 0;
+  cmd.header.cmd_type = cmd_type;
+
+  return (uint32_t)cmd.i;
 
-   {
-      char *head = rmesa->store.cmd_buf + rmesa->store.cmd_used;
-      rmesa->store.cmd_used += bytes;
-      return head;
-   }
 }
 
+#define OUT_BATCH_PACKET3(packet, num_extra) do {	      \
+    if (!b_l_rmesa->radeonScreen->kernel_mm) {		      \
+      OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3));				      \
+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
+    } else {						      \
+      OUT_BATCH(CP_PACKET2);				      \
+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
+    }							      \
+  } while(0)
+
+#define OUT_BATCH_PACKET3_CLIP(packet, num_extra) do {	      \
+    if (!b_l_rmesa->radeonScreen->kernel_mm) {		      \
+      OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3_CLIP));	      \
+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
+    } else {						      \
+      OUT_BATCH(CP_PACKET2);				      \
+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
+    }							      \
+  } while(0)
+
+
 #endif /* __RADEON_IOCTL_H__ */
diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
index 2abf644ef2..5a0bbaacc9 100644
--- a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
+++ b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
@@ -380,8 +380,8 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
    if (rmesa->tcl.indexed_verts.buf)
       radeonReleaseArrays( ctx, ~0 );
 
-   radeonAllocDmaRegion( rmesa,
-			 &rmesa->tcl.indexed_verts, 
+   radeonAllocDmaRegion( &rmesa->radeon,
+			 0,			 &rmesa->tcl.indexed_verts, 
 			 VB->Count * setup_tab[i].vertex_size * 4, 
 			 4);
 
@@ -425,7 +425,7 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
 		      rmesa->tcl.indexed_verts.start );
 
    rmesa->tcl.vertex_format = setup_tab[i].vertex_format;
-   rmesa->tcl.indexed_verts.aos_start = GET_START( &rmesa->tcl.indexed_verts );
+   //   rmesa->tcl.indexed_verts.aos_start = GET_START( &rmesa->tcl.indexed_verts );
    rmesa->tcl.indexed_verts.aos_size = setup_tab[i].vertex_size;
    rmesa->tcl.indexed_verts.aos_stride = setup_tab[i].vertex_size;
 
@@ -444,6 +444,6 @@ void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
       _tnl_print_vert_flags( __FUNCTION__, newinputs );
 #endif
 
-   if (newinputs) 
-     radeonReleaseDmaRegion( rmesa, &rmesa->tcl.indexed_verts, __FUNCTION__ );
+   ///   if (newinputs) 
+      ///    radeonReleaseDmaRegion( rmesa, &rmesa->tcl.indexed_verts, __FUNCTION__ );
 }
diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c
index a850baea29..332a0ca69b 100644
--- a/src/mesa/drivers/dri/radeon/radeon_span.c
+++ b/src/mesa/drivers/dri/radeon/radeon_span.c
@@ -59,21 +59,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  * information.
  */
 #define LOCAL_VARS						\
-   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
-   const __DRIdrawablePrivate *dPriv = drb->dPriv;		\
+   struct radeon_renderbuffer *rrb = (void *) rb;		\
+   const __DRIdrawablePrivate *dPriv = rrb->dPriv;		\
    const GLuint bottom = dPriv->h - 1;				\
-   GLubyte *buf = (GLubyte *) drb->flippedData			\
-      + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp;	\
    GLuint p;							\
-   (void) p;
+   (void) p;				
 
 #define LOCAL_DEPTH_VARS				\
-   driRenderbuffer *drb = (driRenderbuffer *) rb;	\
-   const __DRIdrawablePrivate *dPriv = drb->dPriv;	\
+   struct radeon_renderbuffer *rrb = (void *) rb;		\
+   const __DRIdrawablePrivate *dPriv = rrb->dPriv;	\
    const GLuint bottom = dPriv->h - 1;			\
    GLuint xo = dPriv->x;				\
-   GLuint yo = dPriv->y;				\
-   GLubyte *buf = (GLubyte *) drb->Base.Data;
+   GLuint yo = dPriv->y;
 
 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
 
@@ -94,7 +91,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define TAG(x)    radeon##x##_RGB565
 #define TAG2(x,y) radeon##x##_RGB565##y
-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
+#define GET_PTR(X,Y) radeon_ptr16(rrb, (X), (Y))
 #include "spantmp2.h"
 
 /* 32 bit, ARGB8888 color spanline and pixel functions
@@ -104,82 +101,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define TAG(x)    radeon##x##_ARGB8888
 #define TAG2(x,y) radeon##x##_ARGB8888##y
-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
+#define GET_PTR(X,Y) radeon_ptr32(rrb, (X), (Y))
 #include "spantmp2.h"
 
-/* ================================================================
- * Depth buffer
- */
-
-/* The Radeon family has depth tiling on all the time, so we have to convert
- * the x,y coordinates into the memory bus address (mba) in the same
- * manner as the engine.  In each case, the linear block address (ba)
- * is calculated, and then wired with x and y to produce the final
- * memory address.
- * The chip will do address translation on its own if the surface registers
- * are set up correctly. It is not quite enough to get it working with hyperz
- * too...
- */
-
-static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
-{
-	GLuint pitch = drb->pitch;
-	if (drb->depthHasSurface) {
-		return 4 * (x + y * pitch);
-	} else {
-		GLuint ba, address = 0;	/* a[0..1] = 0           */
-
-#ifdef COMPILE_R300
-		ba = (y / 8) * (pitch / 8) + (x / 8);
-#else
-		ba = (y / 16) * (pitch / 16) + (x / 16);
-#endif
-
-		address |= (x & 0x7) << 2;	/* a[2..4] = x[0..2]     */
-		address |= (y & 0x3) << 5;	/* a[5..6] = y[0..1]     */
-		address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5;	/* a[7]    = x[4] ^ y[2] */
-		address |= (ba & 0x3) << 8;	/* a[8..9] = ba[0..1]    */
-
-		address |= (y & 0x8) << 7;	/* a[10]   = y[3]        */
-		address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7;	/* a[11]   = x[3] ^ y[4] */
-		address |= (ba & ~0x3) << 10;	/* a[12..] = ba[2..]     */
-
-		return address;
-	}
-}
-
-static INLINE GLuint
-radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
-{
-	GLuint pitch = drb->pitch;
-	if (drb->depthHasSurface) {
-		return 2 * (x + y * pitch);
-	} else {
-		GLuint ba, address = 0;	/* a[0]    = 0           */
-
-		ba = (y / 16) * (pitch / 32) + (x / 32);
-
-		address |= (x & 0x7) << 1;	/* a[1..3] = x[0..2]     */
-		address |= (y & 0x7) << 4;	/* a[4..6] = y[0..2]     */
-		address |= (x & 0x8) << 4;	/* a[7]    = x[3]        */
-		address |= (ba & 0x3) << 8;	/* a[8..9] = ba[0..1]    */
-		address |= (y & 0x8) << 7;	/* a[10]   = y[3]        */
-		address |= ((x & 0x10) ^ (y & 0x10)) << 7;	/* a[11]   = x[4] ^ y[4] */
-		address |= (ba & ~0x3) << 10;	/* a[12..] = ba[2..]     */
-
-		return address;
-	}
-}
-
 /* 16-bit depth buffer functions
  */
 #define VALUE_TYPE GLushort
 
 #define WRITE_DEPTH( _x, _y, d )					\
-   *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
+   *(GLushort *)radeon_ptr(rrb, _x + xo, _y + yo) = d
 
 #define READ_DEPTH( d, _x, _y )						\
-   d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo ));
+   d = *(GLushort *)radeon_ptr(rrb, _x + xo, _y + yo)
 
 #define TAG(x) radeon##x##_z16
 #include "depthtmp.h"
@@ -203,24 +136,25 @@ do {									\
 #else
 #define WRITE_DEPTH( _x, _y, d )					\
 do {									\
-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
-   GLuint tmp = *(GLuint *)(buf + offset);				\
+   GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + xo, _y + yo);		\
+   GLuint tmp = *_ptr;				\
    tmp &= 0xff000000;							\
    tmp |= ((d) & 0x00ffffff);						\
-   *(GLuint *)(buf + offset) = tmp;					\
+   *_ptr = tmp;					\
 } while (0)
 #endif
 
 #ifdef COMPILE_R300
 #define READ_DEPTH( d, _x, _y )						\
-  do { \
+  do {									\
     d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,		\
 					 _y + yo )) & 0xffffff00) >> 8; \
   }while(0)
 #else
 #define READ_DEPTH( d, _x, _y )						\
-   d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,			\
-					 _y + yo )) & 0x00ffffff;
+   do {									\
+    d = (*(GLuint*)(radeon_ptr32(rrb, _x + xo, _y + yo)) & 0x00ffffff); \
+   } while (0)
 #endif
 
 #define TAG(x) radeon##x##_z24_s8
@@ -244,11 +178,11 @@ do {									\
 #else
 #define WRITE_STENCIL( _x, _y, d )					\
 do {									\
-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
-   GLuint tmp = *(GLuint *)(buf + offset);				\
+   GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + xo, _y + yo);		\
+   GLuint tmp = *_ptr;				\
    tmp &= 0x00ffffff;							\
    tmp |= (((d) & 0xff) << 24);						\
-   *(GLuint *)(buf + offset) = tmp;					\
+   *_ptr = tmp;					\
 } while (0)
 #endif
 
@@ -262,8 +196,8 @@ do {									\
 #else
 #define READ_STENCIL( d, _x, _y )					\
 do {									\
-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
-   GLuint tmp = *(GLuint *)(buf + offset);				\
+   GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + xo, _y + yo);		\
+   GLuint tmp = *_ptr;							\
    d = (tmp & 0xff000000) >> 24;					\
 } while (0)
 #endif
@@ -271,32 +205,6 @@ do {									\
 #define TAG(x) radeon##x##_z24_s8
 #include "stenciltmp.h"
 
-/* Move locking out to get reasonable span performance (10x better
- * than doing this in HW_LOCK above).  WaitForIdle() is the main
- * culprit.
- */
-
-static void radeonSpanRenderStart(GLcontext * ctx)
-{
-	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-#ifdef COMPILE_R300
-	r300ContextPtr r300 = (r300ContextPtr) rmesa;
-	R300_FIREVERTICES(r300);
-#else
-	r100ContextPtr r100 = (r100ContextPtr) rmesa;
-	RADEON_FIREVERTICES(r100);
-#endif
-	LOCK_HARDWARE(rmesa);
-	radeonWaitForIdleLocked(rmesa);
-}
-
-static void radeonSpanRenderFinish(GLcontext * ctx)
-{
-	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-	_swrast_flush(ctx);
-	UNLOCK_HARDWARE(rmesa);
-}
-
 void radeonInitSpanFuncs(GLcontext * ctx)
 {
 	struct swrast_device_driver *swdd =
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
index ef89d73bdd..4b49c141a0 100644
--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
@@ -52,8 +52,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_tcl.h"
 
 
-static void flush_last_swtcl_prim(GLcontext *ctx);
-
 /* R100: xyzw, c0, c1/fog, stq[0..2]  = 4+1+1+3*3 = 15  right? */
 /* R200: xyzw, c0, c1/fog, strq[0..5] = 4+1+1+4*6 = 30 */
 #define RADEON_MAX_TNL_VERTEX_SIZE (15 * sizeof(GLfloat))	/* for mesa _tnl stage */
@@ -64,18 +62,18 @@ static void flush_last_swtcl_prim(GLcontext *ctx);
 
 #define EMIT_ATTR( ATTR, STYLE, F0 )					\
 do {									\
-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR);	\
-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE);	\
-   rmesa->swtcl.vertex_attr_count++;					\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR);	\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE);	\
+   rmesa->radeon.swtcl.vertex_attr_count++;					\
    fmt_0 |= F0;								\
 } while (0)
 
 #define EMIT_PAD( N )							\
 do {									\
-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0;		\
-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD;	\
-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N);		\
-   rmesa->swtcl.vertex_attr_count++;					\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0;		\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD;	\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N);		\
+   rmesa->radeon.swtcl.vertex_attr_count++;					\
 } while (0)
 
 static GLuint radeon_cp_vc_frmts[3][2] =
@@ -106,7 +104,7 @@ static void radeonSetVertexFormat( GLcontext *ctx )
    }
 
    assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
-   rmesa->swtcl.vertex_attr_count = 0;
+   rmesa->radeon.swtcl.vertex_attr_count = 0;
 
    /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
     * build up a hardware vertex.
@@ -208,16 +206,16 @@ static void radeonSetVertexFormat( GLcontext *ctx )
 	fmt_0 != rmesa->swtcl.vertex_format) {
       RADEON_NEWPRIM(rmesa);
       rmesa->swtcl.vertex_format = fmt_0;
-      rmesa->swtcl.vertex_size =
+      rmesa->radeon.swtcl.vertex_size =
 	  _tnl_install_attrs( ctx,
-			      rmesa->swtcl.vertex_attrs, 
-			      rmesa->swtcl.vertex_attr_count,
+			      rmesa->radeon.swtcl.vertex_attrs, 
+			      rmesa->radeon.swtcl.vertex_attr_count,
 			      NULL, 0 );
-      rmesa->swtcl.vertex_size /= 4;
+      rmesa->radeon.swtcl.vertex_size /= 4;
       RENDERINPUTS_COPY( rmesa->radeon.tnl_index_bitset, index_bitset );
       if (RADEON_DEBUG & DEBUG_VERTS)
 	 fprintf( stderr, "%s: vertex_size= %d floats\n",
-		  __FUNCTION__, rmesa->swtcl.vertex_size);
+		  __FUNCTION__, rmesa->radeon.swtcl.vertex_size);
    }
 }
 
@@ -228,9 +226,9 @@ static void radeonRenderStart( GLcontext *ctx )
 
    radeonSetVertexFormat( ctx );
    
-   if (rmesa->dma.flush != 0 && 
-       rmesa->dma.flush != flush_last_swtcl_prim)
-      rmesa->dma.flush( ctx );
+   if (rmesa->radeon.dma.flush != 0 && 
+       rmesa->radeon.dma.flush != rcommon_flush_last_swtcl_prim)
+      rmesa->radeon.dma.flush( ctx );
 }
 
 
@@ -281,81 +279,29 @@ void radeonChooseVertexState( GLcontext *ctx )
    }
 }
 
-
-/* Flush vertices in the current dma region.
- */
-static void flush_last_swtcl_prim(GLcontext *ctx)
+void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
 {
    r100ContextPtr rmesa = R100_CONTEXT(ctx);
-   if (RADEON_DEBUG & DEBUG_IOCTL)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   rmesa->dma.flush = NULL;
-
-   if (rmesa->dma.current.buf) {
-      struct radeon_dma_region *current = &rmesa->dma.current;
-      GLuint current_offset = (rmesa->radeon.radeonScreen->gart_buffer_offset +
-			       current->buf->buf->idx * RADEON_BUFFER_SIZE + 
-			       current->start);
-
-      assert (!(rmesa->swtcl.hw_primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
-
-      assert (current->start + 
-	      rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
-	      current->ptr);
-
-      if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
-	 radeonEnsureCmdBufSpace( rmesa, VERT_AOS_BUFSZ +
-			          rmesa->hw.max_state_size + VBUF_BUFSZ );
-
-	 radeonEmitVertexAOS( rmesa,
-			      rmesa->swtcl.vertex_size,
-			      current_offset);
-
-	 radeonEmitVbufPrim( rmesa,
-			     rmesa->swtcl.vertex_format,
-			     rmesa->swtcl.hw_primitive,
-			     rmesa->swtcl.numverts);
-      }
-
-      rmesa->swtcl.numverts = 0;
-      current->start = current->ptr;
-   }
-}
-
-
-/* Alloc space in the current dma region.
- */
-static INLINE void *
-radeonAllocDmaLowVerts( r100ContextPtr rmesa, int nverts, int vsize )
-{
-   GLuint bytes = vsize * nverts;
 
-   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
-      radeonRefillCurrentDmaRegion( rmesa );
+   rcommonEnsureCmdBufSpace(&rmesa->radeon,
+			    rmesa->hw.max_state_size + (12*sizeof(int)),
+			    __FUNCTION__);
 
-   if (!rmesa->dma.flush) {
-      rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
-      rmesa->dma.flush = flush_last_swtcl_prim;
-   }
-
-   assert( vsize == rmesa->swtcl.vertex_size * 4 );
-   assert( rmesa->dma.flush == flush_last_swtcl_prim );
-   assert (rmesa->dma.current.start + 
-	   rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
-	   rmesa->dma.current.ptr);
 
+   radeonEmitState(rmesa);
+   radeonEmitVertexAOS( rmesa,
+			rmesa->radeon.swtcl.vertex_size,
+			//			rmesa->radeon.dma.current,
+			current_offset);
 
-   {
-      GLubyte *head = (GLubyte *)(rmesa->dma.current.address + rmesa->dma.current.ptr);
-      rmesa->dma.current.ptr += bytes;
-      rmesa->swtcl.numverts += nverts;
-      return head;
-   }
+		      
+   radeonEmitVbufPrim( rmesa,
+		       rmesa->swtcl.vertex_format,
+		       rmesa->radeon.swtcl.hw_primitive,
+		       rmesa->radeon.swtcl.numverts);
 
 }
 
-
 /*
  * Render unclipped vertex buffers by emitting vertices directly to
  * dma buffers.  Use strip/fan hardware primitives where possible.
@@ -391,19 +337,19 @@ static INLINE void
 radeonDmaPrimitive( r100ContextPtr rmesa, GLenum prim )
 {
    RADEON_NEWPRIM( rmesa );
-   rmesa->swtcl.hw_primitive = hw_prim[prim];
-   assert(rmesa->dma.current.ptr == rmesa->dma.current.start);
+   rmesa->radeon.swtcl.hw_primitive = hw_prim[prim];
+   //   assert(rmesa->radeon.dma.current.ptr == rmesa->radeon.dma.current.start);
 }
 
 #define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
 #define INIT( prim ) radeonDmaPrimitive( rmesa, prim )
 #define FLUSH()  RADEON_NEWPRIM( rmesa )
-#define GET_CURRENT_VB_MAX_VERTS() \
-  (((int)rmesa->dma.current.end - (int)rmesa->dma.current.ptr) / (rmesa->swtcl.vertex_size*4))
+#define GET_CURRENT_VB_MAX_VERTS()					10\
+//  (((int)rmesa->radeon.dma.current.end - (int)rmesa->radeon.dma.current.ptr) / (rmesa->radeon.swtcl.vertex_size*4))
 #define GET_SUBSEQUENT_VB_MAX_VERTS() \
-  ((RADEON_BUFFER_SIZE) / (rmesa->swtcl.vertex_size*4))
+  ((RADEON_BUFFER_SIZE) / (rmesa->radeon.swtcl.vertex_size*4))
 #define ALLOC_VERTS( nr ) \
-  radeonAllocDmaLowVerts( rmesa, nr, rmesa->swtcl.vertex_size * 4 )
+  rcommonAllocDmaLowVerts( &rmesa->radeon, nr, rmesa->radeon.swtcl.vertex_size * 4 )
 #define EMIT_VERTS( ctx, j, nr, buf ) \
   _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf)
 
@@ -428,7 +374,7 @@ static GLboolean radeon_run_render( GLcontext *ctx,
    if (rmesa->swtcl.indexed_verts.buf) 
       RELEASE_ELT_VERTS();
    	
-   if (rmesa->swtcl.RenderIndex != 0 ||   
+   if (rmesa->radeon.swtcl.RenderIndex != 0 ||   
        !radeon_dma_validate_render( ctx, VB ))
       return GL_TRUE;		
 
@@ -498,12 +444,12 @@ static void radeonResetLineStipple( GLcontext *ctx );
 #undef LOCAL_VARS
 #undef ALLOC_VERTS
 #define CTX_ARG r100ContextPtr rmesa
-#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
-#define ALLOC_VERTS( n, size ) radeonAllocDmaLowVerts( rmesa, n, (size) * 4 )
+#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
+#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, (size) * 4 )
 #undef LOCAL_VARS
 #define LOCAL_VARS						\
    r100ContextPtr rmesa = R100_CONTEXT(ctx);		\
-   const char *radeonverts = (char *)rmesa->swtcl.verts;
+   const char *radeonverts = (char *)rmesa->radeon.swtcl.verts;
 #define VERT(x) (radeonVertex *)(radeonverts + ((x) * (vertsize) * sizeof(int)))
 #define VERTEX radeonVertex 
 #undef TAG
@@ -561,7 +507,7 @@ static struct {
 #define VERT_Y(_v) _v->v.y
 #define VERT_Z(_v) _v->v.z
 #define AREA_IS_CCW( a ) (a < 0)
-#define GET_VERTEX(e) (rmesa->swtcl.verts + ((e) * rmesa->swtcl.vertex_size * sizeof(int)))
+#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + ((e) * rmesa->radeon.swtcl.vertex_size * sizeof(int)))
 
 #define VERT_SET_RGBA( v, c )  					\
 do {								\
@@ -618,7 +564,7 @@ do {							\
  ***********************************************************************/
 
 #define RASTERIZE(x) radeonRasterPrimitive( ctx, reduced_hw_prim[x] )
-#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
+#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
 #undef TAG
 #define TAG(x) x
 #include "tnl_dd/t_dd_unfilled.h"
@@ -675,8 +621,8 @@ static void init_rast_tab( void )
 #undef LOCAL_VARS
 #define LOCAL_VARS						\
    r100ContextPtr rmesa = R100_CONTEXT(ctx);		\
-   const GLuint vertsize = rmesa->swtcl.vertex_size;		\
-   const char *radeonverts = (char *)rmesa->swtcl.verts;		\
+   const GLuint vertsize = rmesa->radeon.swtcl.vertex_size;		\
+   const char *radeonverts = (char *)rmesa->radeon.swtcl.verts;		\
    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
    const GLboolean stipple = ctx->Line.StippleFlag;		\
    (void) elt; (void) stipple;
@@ -711,7 +657,7 @@ void radeonChooseRenderState( GLcontext *ctx )
    if (flags & DD_TRI_LIGHT_TWOSIDE) index |= RADEON_TWOSIDE_BIT;
    if (flags & DD_TRI_UNFILLED)      index |= RADEON_UNFILLED_BIT;
 
-   if (index != rmesa->swtcl.RenderIndex) {
+   if (index != rmesa->radeon.swtcl.RenderIndex) {
       tnl->Driver.Render.Points = rast_tab[index].points;
       tnl->Driver.Render.Line = rast_tab[index].line;
       tnl->Driver.Render.ClippedLine = rast_tab[index].line;
@@ -728,7 +674,7 @@ void radeonChooseRenderState( GLcontext *ctx )
 	 tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
       }
 
-      rmesa->swtcl.RenderIndex = index;
+      rmesa->radeon.swtcl.RenderIndex = index;
    }
 }
 
@@ -742,16 +688,16 @@ static void radeonRasterPrimitive( GLcontext *ctx, GLuint hwprim )
 {
    r100ContextPtr rmesa = R100_CONTEXT(ctx);
 
-   if (rmesa->swtcl.hw_primitive != hwprim) {
+   if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
       RADEON_NEWPRIM( rmesa );
-      rmesa->swtcl.hw_primitive = hwprim;
+      rmesa->radeon.swtcl.hw_primitive = hwprim;
    }
 }
 
 static void radeonRenderPrimitive( GLcontext *ctx, GLenum prim )
 {
    r100ContextPtr rmesa = R100_CONTEXT(ctx);
-   rmesa->swtcl.render_primitive = prim;
+   rmesa->radeon.swtcl.render_primitive = prim;
    if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) 
       radeonRasterPrimitive( ctx, reduced_hw_prim[prim] );
 }
@@ -806,7 +752,7 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
 	 RADEON_FIREVERTICES( rmesa );
 	 TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_TRUE );
 	 _swsetup_Wakeup( ctx );
-	 rmesa->swtcl.RenderIndex = ~0;
+	 rmesa->radeon.swtcl.RenderIndex = ~0;
          if (RADEON_DEBUG & DEBUG_FALLBACKS) {
             fprintf(stderr, "Radeon begin rasterization fallback: 0x%x %s\n",
                     bit, getFallbackString(bit));
@@ -873,10 +819,10 @@ void radeonInitSwtcl( GLcontext *ctx )
    _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
 		       RADEON_MAX_TNL_VERTEX_SIZE);
    
-   rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
-   rmesa->swtcl.RenderIndex = ~0;
-   rmesa->swtcl.render_primitive = GL_TRIANGLES;
-   rmesa->swtcl.hw_primitive = 0;
+   rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
+   rmesa->radeon.swtcl.RenderIndex = ~0;
+   rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
+   rmesa->radeon.swtcl.hw_primitive = 0;
 }
 
 
@@ -884,7 +830,7 @@ void radeonDestroySwtcl( GLcontext *ctx )
 {
    r100ContextPtr rmesa = R100_CONTEXT(ctx);
 
-   if (rmesa->swtcl.indexed_verts.buf) 
-      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
-			      __FUNCTION__ );
+   // if (rmesa->swtcl.indexed_verts.buf) 
+   //    radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
+   //			      __FUNCTION__ );
 }
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.h b/src/mesa/drivers/dri/radeon/radeon_swtcl.h
index e485052ad7..3ada9890b3 100644
--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.h
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.h
@@ -63,5 +63,5 @@ extern void radeon_translate_vertex( GLcontext *ctx,
 
 extern void radeon_print_vertex( GLcontext *ctx, const radeonVertex *v );
 
-
+extern void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
 #endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c
index 81c0c84257..a54e471065 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c
@@ -145,15 +145,15 @@ static GLboolean discrete_prim[0x10] = {
 
 static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr ) 
 {
-   if (rmesa->dma.flush)
-      rmesa->dma.flush( rmesa->radeon.glCtx );
+   if (rmesa->radeon.dma.flush)
+      rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
 
-   radeonEnsureCmdBufSpace(rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
+   rcommonEnsureCmdBufSpace(&rmesa->radeon, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
 			   rmesa->hw.max_state_size + ELTS_BUFSZ(nr));
 
    radeonEmitAOS( rmesa,
-		rmesa->tcl.aos_components,
-		rmesa->tcl.nr_aos_components, 0 );
+		  rmesa->tcl.aos_components,
+		  rmesa->tcl.nr_aos_components, 0 );
 
    return radeonAllocEltsOpenEnded( rmesa,
 				    rmesa->tcl.vertex_format, 
@@ -182,12 +182,14 @@ static void radeonEmitPrim( GLcontext *ctx,
 			     rmesa->hw.max_state_size + VBUF_BUFSZ );
 
    radeonEmitAOS( rmesa,
+		  rmesa->tcl.aos_components,
 		  rmesa->tcl.nr_aos_components,
 		  start );
    
    /* Why couldn't this packet have taken an offset param?
     */
    radeonEmitVbufPrim( rmesa,
+		       0,
 		       rmesa->tcl.hw_primitive,
 		       count - start );
 }
@@ -508,15 +510,15 @@ static void transition_to_hwtnl( GLcontext *ctx )
 
    tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
 
-   if ( rmesa->dma.flush )			
-      rmesa->dma.flush( rmesa->radeon.glCtx );	
+   if ( rmesa->radeon.dma.flush )			
+      rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	
 
-   rmesa->dma.flush = NULL;
+   rmesa->radeon.dma.flush = NULL;
    rmesa->swtcl.vertex_format = 0;
    
-   if (rmesa->swtcl.indexed_verts.buf) 
-      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
-			      __FUNCTION__ );
+   //   if (rmesa->swtcl.indexed_verts.buf) 
+   //      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
+   //			      __FUNCTION__ );
 
    if (RADEON_DEBUG & DEBUG_FALLBACKS) 
       fprintf(stderr, "Radeon end tcl fallback\n");
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c
index c6a911abef..9ef08fbe68 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tex.c
@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "main/texobj.h"
 
 #include "radeon_context.h"
+#include "radeon_mipmap_tree.h"
 #include "radeon_state.h"
 #include "radeon_ioctl.h"
 #include "radeon_swtcl.h"
@@ -453,7 +454,7 @@ void radeonInitTextureFuncs( struct dd_function_table *functions )
    functions->CompressedTexImage2D	= radeonCompressedTexImage2D;
    functions->CompressedTexSubImage2D	= radeonCompressedTexSubImage2D;
 
-   functions->GenerateMipmap = radeon_generate_mipmap;
+   functions->GenerateMipmap = radeonGenerateMipmap;
 
    functions->NewTextureImage = radeonNewTextureImage;
    functions->FreeTexImageData = radeonFreeTexImageData;
diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c
index 9ede92b831..3c28d70e37 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texstate.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
@@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "main/enums.h"
 
 #include "radeon_context.h"
+#include "radeon_mipmap_tree.h"
 #include "radeon_state.h"
 #include "radeon_ioctl.h"
 #include "radeon_swtcl.h"
@@ -75,10 +76,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \
 			     && (tx_table[f].format != 0xffffffff) )
 
-static const struct {
+struct tx_table {
    GLuint format, filter;
-}
-tx_table[] =
+};
+
+static const struct tx_table tx_table[] =
 {
    _ALPHA(RGBA8888),
    _ALPHA_REV(RGBA8888),
@@ -901,13 +903,13 @@ void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname,
 			      RADEON_TXFORMAT_CUBIC_MAP_ENABLE |	\
                               RADEON_TXFORMAT_NON_POWER2)
 
-
+#if 0
 static void import_tex_obj_state( r100ContextPtr rmesa,
 				  int unit,
 				  radeonTexObjPtr texobj )
 {
 /* do not use RADEON_DB_STATE to avoid stale texture caches */
-   int *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
+   uint32_t *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
    GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
 
    RADEON_STATECHANGE( rmesa, tex[unit] );
@@ -955,7 +957,7 @@ static void import_tex_obj_state( r100ContextPtr rmesa,
 
    texobj->dirty_state &= ~(1<<unit);
 }
-
+#endif
 
 
 
@@ -1354,8 +1356,7 @@ static void setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t)
 
    if (!t->image_override) {
       if (VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
-	 const struct tx_table *table = _mesa_little_endian() ? tx_table_le :
-	    tx_table_be;
+	const struct tx_table *table = tx_table;
 
 	 t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
 			     RADEON_TXFORMAT_ALPHA_IN_MAP);
@@ -1399,8 +1400,8 @@ static void setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t)
                            (log2Height << RADEON_FACE_HEIGHT_4_SHIFT));
    }
 
-   t->pp_txsize = (((firstImage->Width - 1) << RADEON_PP_TX_WIDTHMASK_SHIFT)
-		   | ((firstImage->Height - 1) << RADEON_PP_TX_HEIGHTMASK_SHIFT));
+   t->pp_txsize = (((firstImage->Width - 1) << RADEON_TEX_USIZE_SHIFT)
+		   | ((firstImage->Height - 1) << RADEON_TEX_VSIZE_SHIFT));
 
    if ( !t->image_override ) {
       if (firstImage->IsCompressed)