40 files changed, 1456 insertions, 902 deletions
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index b96ee23a99..e038a5229e 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -17,11 +17,13 @@ C_SOURCES = \
 	lp_bld_depth.c \
 	lp_bld_flow.c \
 	lp_bld_format_aos.c \
+	lp_bld_format_query.c \
 	lp_bld_format_soa.c \
 	lp_bld_interp.c \
 	lp_bld_intr.c \
 	lp_bld_logic.c \
 	lp_bld_pack.c \
+	lp_bld_sample.c \
 	lp_bld_sample_soa.c \
 	lp_bld_swizzle.c \
 	lp_bld_struct.c \
@@ -33,7 +35,6 @@ C_SOURCES = \
 	lp_draw_arrays.c \
 	lp_flush.c \
 	lp_jit.c \
-	lp_prim_setup.c \
 	lp_prim_vbuf.c \
 	lp_setup.c \
 	lp_query.c \
@@ -55,6 +56,9 @@ C_SOURCES = \
 	lp_tile_cache.c \
 	lp_tile_soa.c
 
+CPP_SOURCES = \
+	lp_bld_misc.cpp
+
 include ../../Makefile.template
 
 lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_access.py ../../auxiliary/util/u_format.csv
diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README
index 89d08834a3..0c3f00fd58 100644
--- a/src/gallium/drivers/llvmpipe/README
+++ b/src/gallium/drivers/llvmpipe/README
@@ -51,21 +51,22 @@ Requirements
 
  - Linux
  
- - udis86, http://udis86.sourceforge.net/ . Use my repository, which decodes
-   opcodes not yet supported by upstream.
+ - A x86 or amd64 processor.  64bit mode is preferred.
  
-     git clone git://people.freedesktop.org/~jrfonseca/udis86
-     cd udis86
-     ./configure --with-pic
-     make
-     sudo make install
+   Support for sse2 is strongly encouraged.  Support for ssse3, and sse4.1 will
+   yield the most efficient code.  The less features the CPU has the more
+   likely is that you ran into underperforming, buggy, or incomplete code.  
+   
+   See /proc/cpuinfo to know what your CPU supports.
+ 
+ - LLVM 2.5 or greater. LLVM 2.6 is preferred.
  
- - LLVM 2.5. On Debian based distributions do:
+   On Debian based distributions do:
  
      aptitude install llvm-dev
 
-   There is a typo in one of the llvm-dev 2.5 headers, that causes compilation
-   errors in the debug build:
+   There is a typo in one of the llvm 2.5 headers, that may cause compilation
+   errors. To fix it apply the change:
 
      --- /usr/include/llvm-c/Core.h.orig	2009-08-10 15:38:54.000000000 +0100
      +++ /usr/include/llvm-c/Core.h	2009-08-10 15:38:25.000000000 +0100
@@ -79,12 +80,17 @@ Requirements
           #endif
           return reinterpret_cast<T**>(Vals);
  
- - A x86 or amd64 processor with support for sse2, sse3, and sse4.1 SIMD
-   instructions. This is necessary because we emit several SSE intrinsics for
-   convenience. See /proc/cpuinfo to know what your CPU supports.
- 
- - scons
+ - scons (optional)
 
+ - udis86, http://udis86.sourceforge.net/ (optional):
+ 
+     git clone git://udis86.git.sourceforge.net/gitroot/udis86/udis86
+     cd udis86
+     ./autogen.sh
+     ./configure --with-pic
+     make
+     sudo make install
+ 
 
 Building
 ========
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 403e4daa43..3bd2e70013 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -30,10 +30,13 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_depth.c',
 		'lp_bld_flow.c',
 		'lp_bld_format_aos.c',
+        'lp_bld_format_query.c',
 		'lp_bld_format_soa.c',
 		'lp_bld_interp.c',
 		'lp_bld_intr.c',
+		'lp_bld_misc.cpp',
         'lp_bld_pack.c',
+        'lp_bld_sample.c',
 		'lp_bld_sample_soa.c',
 		'lp_bld_struct.c',
 		'lp_bld_logic.c',
@@ -46,7 +49,6 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_draw_arrays.c',
 		'lp_flush.c',
 		'lp_jit.c',
-		'lp_prim_setup.c',
 		'lp_prim_vbuf.c',
 		'lp_setup.c',
 		'lp_query.c',
@@ -76,7 +78,7 @@ env.Prepend(LIBS = [llvmpipe] + auxiliaries)
 
 env.Program(
     target = 'lp_test_format',
-    source = ['lp_test_format.c'],
+    source = ['lp_test_format.c', 'lp_test_main.c'],
 )
 
 env.Program(
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index 83ca06acf8..9c59677a74 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -47,6 +47,7 @@
 
 #include "util/u_memory.h"
 #include "util/u_debug.h"
+#include "util/u_math.h"
 #include "util/u_string.h"
 #include "util/u_cpu_detect.h"
 
@@ -361,6 +362,8 @@ lp_build_mul(struct lp_build_context *bld,
              LLVMValueRef b)
 {
    const struct lp_type type = bld->type;
+   LLVMValueRef shift;
+   LLVMValueRef res;
 
    if(a == bld->zero)
       return bld->zero;
@@ -394,10 +397,84 @@ lp_build_mul(struct lp_build_context *bld,
       assert(0);
    }
 
-   if(LLVMIsConstant(a) && LLVMIsConstant(b))
-      return LLVMConstMul(a, b);
+   if(type.fixed)
+      shift = lp_build_int_const_scalar(type, type.width/2);
+   else
+      shift = NULL;
+
+   if(LLVMIsConstant(a) && LLVMIsConstant(b)) {
+      res =  LLVMConstMul(a, b);
+      if(shift) {
+         if(type.sign)
+            res = LLVMConstAShr(res, shift);
+         else
+            res = LLVMConstLShr(res, shift);
+      }
+   }
+   else {
+      res = LLVMBuildMul(bld->builder, a, b, "");
+      if(shift) {
+         if(type.sign)
+            res = LLVMBuildAShr(bld->builder, res, shift, "");
+         else
+            res = LLVMBuildLShr(bld->builder, res, shift, "");
+      }
+   }
 
-   return LLVMBuildMul(bld->builder, a, b, "");
+   return res;
+}
+
+
+/**
+ * Small vector x scale multiplication optimization.
+ */
+LLVMValueRef
+lp_build_mul_imm(struct lp_build_context *bld,
+                 LLVMValueRef a,
+                 int b)
+{
+   LLVMValueRef factor;
+
+   if(b == 0)
+      return bld->zero;
+
+   if(b == 1)
+      return a;
+
+   if(b == -1)
+      return LLVMBuildNeg(bld->builder, a, "");
+
+   if(b == 2 && bld->type.floating)
+      return lp_build_add(bld, a, a);
+
+   if(util_is_pot(b)) {
+      unsigned shift = ffs(b) - 1;
+
+      if(bld->type.floating) {
+#if 0
+         /*
+          * Power of two multiplication by directly manipulating the mantissa.
+          *
+          * XXX: This might not be always faster, it will introduce a small error
+          * for multiplication by zero, and it will produce wrong results
+          * for Inf and NaN.
+          */
+         unsigned mantissa = lp_mantissa(bld->type);
+         factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa);
+         a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), "");
+         a = LLVMBuildAdd(bld->builder, a, factor, "");
+         a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), "");
+         return a;
+#endif
+      }
+      else {
+         factor = lp_build_const_scalar(bld->type, shift);
+         return LLVMBuildShl(bld->builder, a, factor, "");
+      }
+   }
+
+   factor = lp_build_const_scalar(bld->type, (double)b);
+   return lp_build_mul(bld, a, factor);
 }
 
 
@@ -432,13 +509,36 @@ lp_build_div(struct lp_build_context *bld,
 }
 
 
+/**
+ * Linear interpolation.
+ *
+ * This also works for integer values with a few caveats.
+ *
+ * @sa http://www.stereopsis.com/doubleblend.html
+ */
 LLVMValueRef
 lp_build_lerp(struct lp_build_context *bld,
               LLVMValueRef x,
               LLVMValueRef v0,
               LLVMValueRef v1)
 {
-   return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0)));
+   LLVMValueRef delta;
+   LLVMValueRef res;
+
+   delta = lp_build_sub(bld, v1, v0);
+
+   res = lp_build_mul(bld, x, delta);
+
+   res = lp_build_add(bld, v0, res);
+
+   if(bld->type.fixed)
+      /* XXX: This step is necessary for lerping 8bit colors stored on 16bits,
+       * but it will be wrong for other uses. Basically we need a more
+       * powerful lp_type, capable of further distinguishing the values
+       * interpretation from the value storage. */
+      res = LLVMBuildAnd(bld->builder, res, lp_build_int_const_scalar(bld->type, (1 << bld->type.width/2) - 1), "");
+
+   return res;
 }
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
index 4e568c055e..62be4b9aee 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
@@ -67,6 +67,11 @@ lp_build_mul(struct lp_build_context *bld,
              LLVMValueRef b);
 
 LLVMValueRef
+lp_build_mul_imm(struct lp_build_context *bld,
+                 LLVMValueRef a,
+                 int b);
+
+LLVMValueRef
 lp_build_div(struct lp_build_context *bld,
              LLVMValueRef a,
              LLVMValueRef b);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 98ec1cb1b9..d438c0e63d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -90,7 +90,7 @@ lp_depth_type(const struct util_format_description *format_desc,
 
    if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
       type.floating = TRUE;
-      assert(swizzle = 0);
+      assert(swizzle == 0);
       assert(format_desc->channel[swizzle].size == format_desc->block.bits);
    }
    else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h
index c087fc986e..970bee379f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h
@@ -42,62 +42,34 @@ struct util_format_description;
 struct lp_type;
 
 
-/**
- * Unpack a pixel into its RGBA components.
- *
- * @param packed integer.
- *
- * @return RGBA in a 4 floats vector.
- */
-LLVMValueRef
-lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
-                         enum pipe_format format,
-                         LLVMValueRef packed);
+boolean
+lp_format_is_rgba8(const struct util_format_description *desc);
 
 
-/**
- * Pack a pixel.
- *
- * @param rgba 4 float vector with the unpacked components.
- */
-LLVMValueRef
-lp_build_pack_rgba_aos(LLVMBuilderRef builder,
-                       enum pipe_format format,
-                       LLVMValueRef rgba);
+void
+lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
+                            struct lp_type type,
+                            const LLVMValueRef *unswizzled,
+                            LLVMValueRef *swizzled);
 
 
-/**
- * Load a pixel into its RGBA components.
- *
- * @param ptr value with the pointer to the packed pixel. Pointer type is
- * irrelevant.
- *
- * @return RGBA in a 4 floats vector.
- */
 LLVMValueRef
-lp_build_load_rgba_aos(LLVMBuilderRef builder,
-                       enum pipe_format format,
-                       LLVMValueRef ptr);
+lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
+                         const struct util_format_description *desc,
+                         LLVMValueRef packed);
 
 
-/**
- * Store a pixel.
- *
- * @param rgba 4 float vector with the unpacked components.
- */
-void 
-lp_build_store_rgba_aos(LLVMBuilderRef builder,
-                        enum pipe_format format,
-                        LLVMValueRef ptr,
-                        LLVMValueRef rgba);
+LLVMValueRef
+lp_build_unpack_rgba8_aos(LLVMBuilderRef builder,
+                          const struct util_format_description *desc,
+                          struct lp_type type,
+                          LLVMValueRef packed);
+
 
 LLVMValueRef
-lp_build_gather(LLVMBuilderRef builder,
-                unsigned length,
-                unsigned src_width,
-                unsigned dst_width,
-                LLVMValueRef base_ptr,
-                LLVMValueRef offsets);
+lp_build_pack_rgba_aos(LLVMBuilderRef builder,
+                       const struct util_format_description *desc,
+                       LLVMValueRef rgba);
 
 
 void
@@ -108,12 +80,4 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
                          LLVMValueRef *rgba);
 
 
-void
-lp_build_load_rgba_soa(LLVMBuilderRef builder,
-                       const struct util_format_description *format_desc,
-                       struct lp_type type,
-                       LLVMValueRef base_ptr,
-                       LLVMValueRef offsets,
-                       LLVMValueRef *rgba);
-
 #endif /* !LP_BLD_FORMAT_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
index b9b5d84bed..5836e0173f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
@@ -25,18 +25,39 @@
  *
  **************************************************************************/
 
+/**
+ * @file
+ * AoS pixel format manipulation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
 
+#include "util/u_cpu_detect.h"
 #include "util/u_format.h"
 
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_logic.h"
+#include "lp_bld_swizzle.h"
 #include "lp_bld_format.h"
 
 
+/**
+ * Unpack a single pixel into its RGBA components.
+ *
+ * @param packed integer.
+ *
+ * @return RGBA in a 4 floats vector.
+ *
+ * XXX: This is mostly for reference and testing -- operating a single pixel at
+ * a time is rarely if ever needed.
+ */
 LLVMValueRef
 lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
-                         enum pipe_format format,
+                         const struct util_format_description *desc,
                          LLVMValueRef packed)
 {
-   const struct util_format_description *desc;
    LLVMTypeRef type;
    LLVMValueRef shifted, casted, scaled, masked;
    LLVMValueRef shifts[4];
@@ -49,8 +70,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
    unsigned shift;
    unsigned i;
 
-   desc = util_format_description(format);
-
    /* FIXME: Support more formats */
    assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
    assert(desc->block.width == 1);
@@ -151,12 +170,130 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
 }
 
 
+/**
+ * Take a vector with packed pixels and unpack into a rgba8 vector.
+ *
+ * Formats with bit depth smaller than 32bits are accepted, but they must be
+ * padded to 32bits.
+ */
+LLVMValueRef
+lp_build_unpack_rgba8_aos(LLVMBuilderRef builder,
+                          const struct util_format_description *desc,
+                          struct lp_type type,
+                          LLVMValueRef packed)
+{
+   struct lp_build_context bld;
+   bool rgba8;
+   LLVMValueRef res;
+   unsigned i;
+
+   lp_build_context_init(&bld, builder, type);
+
+   /* FIXME: Support more formats */
+   assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+   assert(desc->block.width == 1);
+   assert(desc->block.height == 1);
+   assert(desc->block.bits <= 32);
+
+   assert(!type.floating);
+   assert(!type.fixed);
+   assert(type.norm);
+   assert(type.width == 8);
+   assert(type.length % 4 == 0);
+
+   rgba8 = TRUE;
+   for(i = 0; i < 4; ++i) {
+      assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED ||
+             desc->channel[i].type == UTIL_FORMAT_TYPE_VOID);
+      if(desc->channel[0].size != 8)
+         rgba8 = FALSE;
+   }
+
+   if(rgba8) {
+      /*
+       * The pixel is already in a rgba8 format variant. All it is necessary
+       * is to swizzle the channels.
+       */
+
+      unsigned char swizzles[4];
+      boolean zeros[4]; /* bitwise AND mask */
+      boolean ones[4]; /* bitwise OR mask */
+      boolean swizzles_needed = FALSE;
+      boolean zeros_needed = FALSE;
+      boolean ones_needed = FALSE;
+
+      for(i = 0; i < 4; ++i) {
+         enum util_format_swizzle swizzle = desc->swizzle[i];
+
+         /* Initialize with the no-op case */
+         swizzles[i] = util_cpu_caps.little_endian ? 3 - i : i;
+         zeros[i] = TRUE;
+         ones[i] = FALSE;
+
+         switch (swizzle) {
+         case UTIL_FORMAT_SWIZZLE_X:
+         case UTIL_FORMAT_SWIZZLE_Y:
+         case UTIL_FORMAT_SWIZZLE_Z:
+         case UTIL_FORMAT_SWIZZLE_W:
+            if(swizzle != swizzles[i]) {
+               swizzles[i] = swizzle;
+               swizzles_needed = TRUE;
+            }
+            break;
+         case UTIL_FORMAT_SWIZZLE_0:
+            zeros[i] = FALSE;
+            zeros_needed = TRUE;
+            break;
+         case UTIL_FORMAT_SWIZZLE_1:
+            ones[i] = TRUE;
+            ones_needed = TRUE;
+            break;
+         case UTIL_FORMAT_SWIZZLE_NONE:
+            assert(0);
+            break;
+         }
+      }
+
+      res = packed;
+
+      if(swizzles_needed)
+         res = lp_build_swizzle1_aos(&bld, res, swizzles);
+
+      if(zeros_needed) {
+         /* Mask out zero channels */
+         LLVMValueRef mask = lp_build_const_mask_aos(type, zeros);
+         res = LLVMBuildAnd(builder, res, mask, "");
+      }
+
+      if(ones_needed) {
+         /* Or one channels */
+         LLVMValueRef mask = lp_build_const_mask_aos(type, ones);
+         res = LLVMBuildOr(builder, res, mask, "");
+      }
+   }
+   else {
+      /* FIXME */
+      assert(0);
+      res = lp_build_undef(type);
+   }
+
+   return res;
+}
+
+
+/**
+ * Pack a single pixel.
+ *
+ * @param rgba 4 float vector with the unpacked components.
+ *
+ * XXX: This is mostly for reference and testing -- operating a single pixel at
+ * a time is rarely if ever needed.
+ */
 LLVMValueRef
 lp_build_pack_rgba_aos(LLVMBuilderRef builder,
-                       enum pipe_format format,
+                       const struct util_format_description *desc,
                        LLVMValueRef rgba)
 {
-   const struct util_format_description *desc;
    LLVMTypeRef type;
    LLVMValueRef packed = NULL;
    LLVMValueRef swizzles[4];
@@ -167,8 +304,6 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
    unsigned shift;
    unsigned i, j;
 
-   desc = util_format_description(format);
-
    assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
    assert(desc->block.width == 1);
    assert(desc->block.height == 1);
@@ -247,57 +382,3 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
 
    return packed;
 }
-
-
-LLVMValueRef
-lp_build_load_rgba_aos(LLVMBuilderRef builder,
-                       enum pipe_format format,
-                       LLVMValueRef ptr)
-{
-   const struct util_format_description *desc;
-   LLVMTypeRef type;
-   LLVMValueRef packed;
-
-   desc = util_format_description(format);
-
-   /* FIXME: Support more formats */
-   assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
-   assert(desc->block.width == 1);
-   assert(desc->block.height == 1);
-   assert(desc->block.bits <= 32);
-
-   type = LLVMIntType(desc->block.bits);
-
-   ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), "");
-
-   packed = LLVMBuildLoad(builder, ptr, "");
-
-   return lp_build_unpack_rgba_aos(builder, format, packed);
-}
-
-
-void
-lp_build_store_rgba_aos(LLVMBuilderRef builder,
-                        enum pipe_format format,
-                        LLVMValueRef ptr,
-                        LLVMValueRef rgba)
-{
-   const struct util_format_description *desc;
-   LLVMTypeRef type;
-   LLVMValueRef packed;
-
-   desc = util_format_description(format);
-
-   assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
-   assert(desc->block.width == 1);
-   assert(desc->block.height == 1);
-
-   type = LLVMIntType(desc->block.bits);
-
-   packed = lp_build_pack_rgba_aos(builder, format, rgba);
-
-   ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), "");
-
-   LLVMBuildStore(builder, packed, ptr);
-}
-
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_query.c b/src/gallium/drivers/llvmpipe/lp_bld_format_query.c
new file mode 100644
index 0000000000..f3832d07ff
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_query.c
@@ -0,0 +1,72 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Utility functions to make assertions about formats.
+ *
+ * This module centralizes most of logic used when determining what algorithm
+ * is most suitable (i.e., most efficient yet correct) for a given format.
+ *
+ * It might be possible to move some of these functions to u_format module,
+ * but since tiny differences in the format my render it more/less
+ * appropriate to a given algorithm it is impossible to make any long term
+ * guarantee about the semantics of these functions.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "util/u_format.h"
+
+#include "lp_bld_format.h"
+
+
+/**
+ * Whether this format is a 4 rgba8 variant
+ */
+boolean
+lp_format_is_rgba8(const struct util_format_description *desc)
+{
+   unsigned chan;
+
+   if(desc->block.width != 1 ||
+      desc->block.height != 1 ||
+      desc->block.bits != 32)
+      return FALSE;
+
+   for(chan = 0; chan < 4; ++chan) {
+      if(desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED &&
+         desc->channel[chan].type != UTIL_FORMAT_TYPE_SIGNED &&
+         desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID)
+         return FALSE;
+      if(desc->channel[chan].size != 8)
+         return FALSE;
+   }
+
+   return TRUE;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
index 66bebdcdec..64151d169d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
@@ -34,66 +34,17 @@
 #include "lp_bld_format.h"
 
 
-/**
- * Gather elements from scatter positions in memory into a single vector.
- *
- * @param src_width src element width
- * @param dst_width result element width (source will be expanded to fit)
- * @param length length of the offsets,
- * @param base_ptr base pointer, should be a i8 pointer type.
- * @param offsets vector with offsets
- */
-LLVMValueRef
-lp_build_gather(LLVMBuilderRef builder,
-                unsigned length,
-                unsigned src_width,
-                unsigned dst_width,
-                LLVMValueRef base_ptr,
-                LLVMValueRef offsets)
-{
-   LLVMTypeRef src_type = LLVMIntType(src_width);
-   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
-   LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
-   LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
-   LLVMValueRef res;
-   unsigned i;
-
-   res = LLVMGetUndef(dst_vec_type);
-   for(i = 0; i < length; ++i) {
-      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
-      LLVMValueRef elem_offset;
-      LLVMValueRef elem_ptr;
-      LLVMValueRef elem;
-
-      elem_offset = LLVMBuildExtractElement(builder, offsets, index, "");
-      elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, "");
-      elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, "");
-      elem = LLVMBuildLoad(builder, elem_ptr, "");
-
-      assert(src_width <= dst_width);
-      if(src_width > dst_width)
-         elem = LLVMBuildTrunc(builder, elem, dst_elem_type, "");
-      if(src_width < dst_width)
-         elem = LLVMBuildZExt(builder, elem, dst_elem_type, "");
-
-      res = LLVMBuildInsertElement(builder, res, elem, index, "");
-   }
-
-   return res;
-}
-
-
 static LLVMValueRef
-lp_build_format_swizzle(struct lp_type type,
-                        const LLVMValueRef *inputs,
-                        enum util_format_swizzle swizzle)
+lp_build_format_swizzle_chan_soa(struct lp_type type,
+                                 const LLVMValueRef *unswizzled,
+                                 enum util_format_swizzle swizzle)
 {
    switch (swizzle) {
    case UTIL_FORMAT_SWIZZLE_X:
    case UTIL_FORMAT_SWIZZLE_Y:
    case UTIL_FORMAT_SWIZZLE_Z:
    case UTIL_FORMAT_SWIZZLE_W:
-      return inputs[swizzle];
+      return unswizzled[swizzle];
    case UTIL_FORMAT_SWIZZLE_0:
       return lp_build_zero(type);
    case UTIL_FORMAT_SWIZZLE_1:
@@ -108,6 +59,28 @@ lp_build_format_swizzle(struct lp_type type,
 
 
 void
+lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
+                            struct lp_type type,
+                            const LLVMValueRef *unswizzled,
+                            LLVMValueRef *swizzled)
+{
+   if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+      enum util_format_swizzle swizzle = format_desc->swizzle[0];
+      LLVMValueRef depth = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle);
+      swizzled[2] = swizzled[1] = swizzled[0] = depth;
+      swizzled[3] = lp_build_one(type);
+   }
+   else {
+      unsigned chan;
+      for (chan = 0; chan < 4; ++chan) {
+         enum util_format_swizzle swizzle = format_desc->swizzle[chan];
+         swizzled[chan] = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle);
+      }
+   }
+}
+
+
+void
 lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
                          const struct util_format_description *format_desc,
                          struct lp_type type,
@@ -172,38 +145,5 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
       start = stop;
    }
 
-   if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
-      enum util_format_swizzle swizzle = format_desc->swizzle[0];
-      LLVMValueRef depth = lp_build_format_swizzle(type, inputs, swizzle);
-      rgba[2] = rgba[1] = rgba[0] = depth;
-      rgba[3] = lp_build_one(type);
-   }
-   else {
-      for (chan = 0; chan < 4; ++chan) {
-         enum util_format_swizzle swizzle = format_desc->swizzle[chan];
-         rgba[chan] = lp_build_format_swizzle(type, inputs, swizzle);
-      }
-   }
-}
-
-
-void
-lp_build_load_rgba_soa(LLVMBuilderRef builder,
-                       const struct util_format_description *format_desc,
-                       struct lp_type type,
-                       LLVMValueRef base_ptr,
-                       LLVMValueRef offsets,
-                       LLVMValueRef *rgba)
-{
-   LLVMValueRef packed;
-
-   assert(format_desc->block.width == 1);
-   assert(format_desc->block.height == 1);
-   assert(format_desc->block.bits <= type.width);
-
-   packed = lp_build_gather(builder,
-                            type.length, format_desc->block.bits, type.width,
-                            base_ptr, offsets);
-
-   lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba);
+   lp_build_format_swizzle_soa(format_desc, type, inputs, rgba);
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index 338dbca6d1..49dab8ab61 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -109,32 +109,6 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
 
 
 /**
- * Small vector x scale multiplication optimization.
- *
- * TODO: Should be elsewhere.
- */
-static LLVMValueRef
-coeff_multiply(struct lp_build_interp_soa_context *bld,
-               LLVMValueRef coeff,
-               int step)
-{
-   LLVMValueRef factor;
-
-   switch(step) {
-   case 0:
-      return bld->base.zero;
-   case 1:
-      return coeff;
-   case 2:
-      return lp_build_add(&bld->base, coeff, coeff);
-   default:
-      factor = lp_build_const_scalar(bld->base.type, (double)step);
-      return lp_build_mul(&bld->base, coeff, factor);
-   }
-}
-
-
-/**
  * Multiply the dadx and dady with the xstep and ystep respectively.
  */
 static void
@@ -149,8 +123,8 @@ coeffs_update(struct lp_build_interp_soa_context *bld)
       if (mode != TGSI_INTERPOLATE_CONSTANT) {
          for(chan = 0; chan < NUM_CHANNELS; ++chan) {
             if(mask & (1 << chan)) {
-               bld->dadx[attrib][chan] = coeff_multiply(bld, bld->dadx[attrib][chan], bld->xstep);
-               bld->dady[attrib][chan] = coeff_multiply(bld, bld->dady[attrib][chan], bld->ystep);
+               bld->dadx[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dadx[attrib][chan], bld->xstep);
+               bld->dady[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dady[attrib][chan], bld->ystep);
             }
          }
       }
@@ -329,8 +303,8 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
             unsigned first, last, mask;
             unsigned attrib;
 
-            first = decl->DeclarationRange.First;
-            last = decl->DeclarationRange.Last;
+            first = decl->Range.First;
+            last = decl->Range.Last;
             mask = decl->Declaration.UsageMask;
 
             for( attrib = first; attrib <= last; ++attrib ) {
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp
new file mode 100644
index 0000000000..d3f78c06d9
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp
@@ -0,0 +1,61 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_config.h"
+
+#include "lp_bld_misc.h"
+
+
+#ifndef LLVM_NATIVE_ARCH
+
+namespace llvm {
+   extern void LinkInJIT();
+}
+
+
+void
+LLVMLinkInJIT(void)
+{
+   llvm::LinkInJIT();
+}
+
+
+extern "C" int X86TargetMachineModule;
+
+
+int
+LLVMInitializeNativeTarget(void)
+{
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+   X86TargetMachineModule = 1;
+#endif
+   return 0;
+}
+
+
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.h b/src/gallium/drivers/llvmpipe/lp_bld_misc.h
new file mode 100644
index 0000000000..0e787e0b9c
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.h
@@ -0,0 +1,56 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_BLD_MISC_H
+#define LP_BLD_MISC_H
+
+
+#include "llvm/Config/config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#ifndef LLVM_NATIVE_ARCH
+
+void
+LLVMLinkInJIT(void);
+
+int
+LLVMInitializeNativeTarget(void);
+
+#endif /* !LLVM_NATIVE_ARCH */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* !LP_BLD_MISC_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.c b/src/gallium/drivers/llvmpipe/lp_bld_pack.c
index fe82fda039..bc360ad77a 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_pack.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_pack.c
@@ -159,11 +159,10 @@ lp_build_unpack2(LLVMBuilderRef builder,
 
    assert(!src_type.floating);
    assert(!dst_type.floating);
-   assert(dst_type.sign == src_type.sign);
    assert(dst_type.width == src_type.width * 2);
    assert(dst_type.length * 2 == src_type.length);
 
-   if(src_type.sign) {
+   if(dst_type.sign && src_type.sign) {
       /* Replicate the sign bit in the most significant bits */
       msb = LLVMBuildAShr(builder, src, lp_build_int_const_scalar(src_type, src_type.width - 1), "");
    }
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c
new file mode 100644
index 0000000000..af70ddc6ab
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.c
@@ -0,0 +1,190 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Texture sampling -- common code.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_type.h"
+#include "lp_bld_format.h"
+#include "lp_bld_sample.h"
+
+
+void
+lp_sampler_static_state(struct lp_sampler_static_state *state,
+                        const struct pipe_texture *texture,
+                        const struct pipe_sampler_state *sampler)
+{
+   memset(state, 0, sizeof *state);
+
+   if(!texture)
+      return;
+
+   if(!sampler)
+      return;
+
+   state->format            = texture->format;
+   state->target            = texture->target;
+   state->pot_width         = util_is_pot(texture->width0);
+   state->pot_height        = util_is_pot(texture->height0);
+   state->pot_depth         = util_is_pot(texture->depth0);
+
+   state->wrap_s            = sampler->wrap_s;
+   state->wrap_t            = sampler->wrap_t;
+   state->wrap_r            = sampler->wrap_r;
+   state->min_img_filter    = sampler->min_img_filter;
+   state->min_mip_filter    = sampler->min_mip_filter;
+   state->mag_img_filter    = sampler->mag_img_filter;
+   if(sampler->compare_mode) {
+      state->compare_mode      = sampler->compare_mode;
+      state->compare_func      = sampler->compare_func;
+   }
+   state->normalized_coords = sampler->normalized_coords;
+   state->prefilter         = sampler->prefilter;
+}
+
+
+/**
+ * Gather elements from scatter positions in memory into a single vector.
+ *
+ * @param src_width src element width
+ * @param dst_width result element width (source will be expanded to fit)
+ * @param length length of the offsets,
+ * @param base_ptr base pointer, should be a i8 pointer type.
+ * @param offsets vector with offsets
+ */
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+                unsigned length,
+                unsigned src_width,
+                unsigned dst_width,
+                LLVMValueRef base_ptr,
+                LLVMValueRef offsets)
+{
+   LLVMTypeRef src_type = LLVMIntType(src_width);
+   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
+   LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
+   LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
+   LLVMValueRef res;
+   unsigned i;
+
+   res = LLVMGetUndef(dst_vec_type);
+   for(i = 0; i < length; ++i) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+      LLVMValueRef elem_offset;
+      LLVMValueRef elem_ptr;
+      LLVMValueRef elem;
+
+      elem_offset = LLVMBuildExtractElement(builder, offsets, index, "");
+      elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, "");
+      elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, "");
+      elem = LLVMBuildLoad(builder, elem_ptr, "");
+
+      assert(src_width <= dst_width);
+      if(src_width > dst_width)
+         elem = LLVMBuildTrunc(builder, elem, dst_elem_type, "");
+      if(src_width < dst_width)
+         elem = LLVMBuildZExt(builder, elem, dst_elem_type, "");
+
+      res = LLVMBuildInsertElement(builder, res, elem, index, "");
+   }
+
+   return res;
+}
+
+
+/**
+ * Compute the offset of a pixel.
+ *
+ * x, y, y_stride are vectors
+ */
+LLVMValueRef
+lp_build_sample_offset(struct lp_build_context *bld,
+                       const struct util_format_description *format_desc,
+                       LLVMValueRef x,
+                       LLVMValueRef y,
+                       LLVMValueRef y_stride,
+                       LLVMValueRef data_ptr)
+{
+   LLVMValueRef x_stride;
+   LLVMValueRef offset;
+
+   x_stride = lp_build_const_scalar(bld->type, format_desc->block.bits/8);
+
+   if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+      LLVMValueRef x_lo, x_hi;
+      LLVMValueRef y_lo, y_hi;
+      LLVMValueRef x_stride_lo, x_stride_hi;
+      LLVMValueRef y_stride_lo, y_stride_hi;
+      LLVMValueRef x_offset_lo, x_offset_hi;
+      LLVMValueRef y_offset_lo, y_offset_hi;
+      LLVMValueRef offset_lo, offset_hi;
+
+      x_lo = LLVMBuildAnd(bld->builder, x, bld->one, "");
+      y_lo = LLVMBuildAnd(bld->builder, y, bld->one, "");
+
+      x_hi = LLVMBuildLShr(bld->builder, x, bld->one, "");
+      y_hi = LLVMBuildLShr(bld->builder, y, bld->one, "");
+
+      x_stride_lo = x_stride;
+      y_stride_lo = lp_build_const_scalar(bld->type, 2*format_desc->block.bits/8);
+
+      x_stride_hi = lp_build_const_scalar(bld->type, 4*format_desc->block.bits/8);
+      y_stride_hi = LLVMBuildShl(bld->builder, y_stride, bld->one, "");
+
+      x_offset_lo = lp_build_mul(bld, x_lo, x_stride_lo);
+      y_offset_lo = lp_build_mul(bld, y_lo, y_stride_lo);
+      offset_lo = lp_build_add(bld, x_offset_lo, y_offset_lo);
+
+      x_offset_hi = lp_build_mul(bld, x_hi, x_stride_hi);
+      y_offset_hi = lp_build_mul(bld, y_hi, y_stride_hi);
+      offset_hi = lp_build_add(bld, x_offset_hi, y_offset_hi);
+
+      offset = lp_build_add(bld, offset_hi, offset_lo);
+   }
+   else {
+      LLVMValueRef x_offset;
+      LLVMValueRef y_offset;
+
+      x_offset = lp_build_mul(bld, x, x_stride);
+      y_offset = lp_build_mul(bld, y, y_stride);
+
+      offset = lp_build_add(bld, x_offset, y_offset);
+   }
+
+   return offset;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
index 403d0e4836..8cb8210ca7 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
@@ -40,7 +40,9 @@
 
 struct pipe_texture;
 struct pipe_sampler_state;
+struct util_format_description;
 struct lp_type;
+struct lp_build_context;
 
 
 /**
@@ -119,6 +121,24 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
                         const struct pipe_sampler_state *sampler);
 
 
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+                unsigned length,
+                unsigned src_width,
+                unsigned dst_width,
+                LLVMValueRef base_ptr,
+                LLVMValueRef offsets);
+
+
+LLVMValueRef
+lp_build_sample_offset(struct lp_build_context *bld,
+                       const struct util_format_description *format_desc,
+                       LLVMValueRef x,
+                       LLVMValueRef y,
+                       LLVMValueRef y_stride,
+                       LLVMValueRef data_ptr);
+
+
 void
 lp_build_sample_soa(LLVMBuilderRef builder,
                     const struct lp_sampler_static_state *static_state,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
index 1a47ca32d2..47b68b71e2 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -27,7 +27,7 @@
 
 /**
  * @file
- * Texture sampling.
+ * Texture sampling -- SoA.
  *
  * @author Jose Fonseca <jfonseca@vmware.com>
  */
@@ -35,54 +35,23 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_state.h"
 #include "util/u_debug.h"
+#include "util/u_debug_dump.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
 #include "util/u_format.h"
+#include "util/u_cpu_detect.h"
 #include "lp_bld_debug.h"
 #include "lp_bld_type.h"
 #include "lp_bld_const.h"
+#include "lp_bld_conv.h"
 #include "lp_bld_arit.h"
 #include "lp_bld_logic.h"
 #include "lp_bld_swizzle.h"
+#include "lp_bld_pack.h"
 #include "lp_bld_format.h"
 #include "lp_bld_sample.h"
 
 
-void
-lp_sampler_static_state(struct lp_sampler_static_state *state,
-                        const struct pipe_texture *texture,
-                        const struct pipe_sampler_state *sampler)
-{
-   memset(state, 0, sizeof *state);
-
-   if(!texture)
-      return;
-
-   if(!sampler)
-      return;
-
-   state->format            = texture->format;
-   state->target            = texture->target;
-   state->pot_width         = util_is_pot(texture->width[0]);
-   state->pot_height        = util_is_pot(texture->height[0]);
-   state->pot_depth         = util_is_pot(texture->depth[0]);
-
-   state->wrap_s            = sampler->wrap_s;
-   state->wrap_t            = sampler->wrap_t;
-   state->wrap_r            = sampler->wrap_r;
-   state->min_img_filter    = sampler->min_img_filter;
-   state->min_mip_filter    = sampler->min_mip_filter;
-   state->mag_img_filter    = sampler->mag_img_filter;
-   if(sampler->compare_mode) {
-      state->compare_mode      = sampler->compare_mode;
-      state->compare_func      = sampler->compare_func;
-   }
-   state->normalized_coords = sampler->normalized_coords;
-   state->prefilter         = sampler->prefilter;
-}
-
-
-
 /**
  * Keep all information for sampling code generation in a single place.
  */
@@ -111,66 +80,61 @@ struct lp_build_sample_context
 
 
 static void
-lp_build_sample_texel(struct lp_build_sample_context *bld,
-                      LLVMValueRef x,
-                      LLVMValueRef y,
-                      LLVMValueRef y_stride,
-                      LLVMValueRef data_ptr,
-                      LLVMValueRef *texel)
+lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
+                          LLVMValueRef x,
+                          LLVMValueRef y,
+                          LLVMValueRef y_stride,
+                          LLVMValueRef data_ptr,
+                          LLVMValueRef *texel)
 {
-   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
-   LLVMValueRef x_stride;
    LLVMValueRef offset;
+   LLVMValueRef packed;
+
+   offset = lp_build_sample_offset(&bld->int_coord_bld,
+                                   bld->format_desc,
+                                   x, y, y_stride,
+                                   data_ptr);
+
+   assert(bld->format_desc->block.width == 1);
+   assert(bld->format_desc->block.height == 1);
+   assert(bld->format_desc->block.bits <= bld->texel_type.width);
+
+   packed = lp_build_gather(bld->builder,
+                            bld->texel_type.length,
+                            bld->format_desc->block.bits,
+                            bld->texel_type.width,
+                            data_ptr, offset);
+
+   lp_build_unpack_rgba_soa(bld->builder,
+                            bld->format_desc,
+                            bld->texel_type,
+                            packed, texel);
+}
 
-   x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8);
-
-   if(bld->format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
-      LLVMValueRef x_lo, x_hi;
-      LLVMValueRef y_lo, y_hi;
-      LLVMValueRef x_stride_lo, x_stride_hi;
-      LLVMValueRef y_stride_lo, y_stride_hi;
-      LLVMValueRef x_offset_lo, x_offset_hi;
-      LLVMValueRef y_offset_lo, y_offset_hi;
-      LLVMValueRef offset_lo, offset_hi;
-
-      x_lo = LLVMBuildAnd(bld->builder, x, int_coord_bld->one, "");
-      y_lo = LLVMBuildAnd(bld->builder, y, int_coord_bld->one, "");
-
-      x_hi = LLVMBuildLShr(bld->builder, x, int_coord_bld->one, "");
-      y_hi = LLVMBuildLShr(bld->builder, y, int_coord_bld->one, "");
-
-      x_stride_lo = x_stride;
-      y_stride_lo = lp_build_const_scalar(bld->int_coord_type, 2*bld->format_desc->block.bits/8);
-
-      x_stride_hi = lp_build_const_scalar(bld->int_coord_type, 4*bld->format_desc->block.bits/8);
-      y_stride_hi = LLVMBuildShl(bld->builder, y_stride, int_coord_bld->one, "");
-
-      x_offset_lo = lp_build_mul(int_coord_bld, x_lo, x_stride_lo);
-      y_offset_lo = lp_build_mul(int_coord_bld, y_lo, y_stride_lo);
-      offset_lo = lp_build_add(int_coord_bld, x_offset_lo, y_offset_lo);
-
-      x_offset_hi = lp_build_mul(int_coord_bld, x_hi, x_stride_hi);
-      y_offset_hi = lp_build_mul(int_coord_bld, y_hi, y_stride_hi);
-      offset_hi = lp_build_add(int_coord_bld, x_offset_hi, y_offset_hi);
 
-      offset = lp_build_add(int_coord_bld, offset_hi, offset_lo);
-   }
-   else {
-      LLVMValueRef x_offset;
-      LLVMValueRef y_offset;
+static LLVMValueRef
+lp_build_sample_packed(struct lp_build_sample_context *bld,
+                       LLVMValueRef x,
+                       LLVMValueRef y,
+                       LLVMValueRef y_stride,
+                       LLVMValueRef data_ptr)
+{
+   LLVMValueRef offset;
 
-      x_offset = lp_build_mul(int_coord_bld, x, x_stride);
-      y_offset = lp_build_mul(int_coord_bld, y, y_stride);
+   offset = lp_build_sample_offset(&bld->int_coord_bld,
+                                   bld->format_desc,
+                                   x, y, y_stride,
+                                   data_ptr);
 
-      offset = lp_build_add(int_coord_bld, x_offset, y_offset);
-   }
+   assert(bld->format_desc->block.width == 1);
+   assert(bld->format_desc->block.height == 1);
+   assert(bld->format_desc->block.bits <= bld->texel_type.width);
 
-   lp_build_load_rgba_soa(bld->builder,
-                          bld->format_desc,
-                          bld->texel_type,
-                          data_ptr,
-                          offset,
-                          texel);
+   return lp_build_gather(bld->builder,
+                          bld->texel_type.length,
+                          bld->format_desc->block.bits,
+                          bld->texel_type.width,
+                          data_ptr, offset);
 }
 
 
@@ -208,7 +172,8 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld,
    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
       /* FIXME */
-      _debug_printf("warning: failed to translate texture wrap mode %u\n", wrap_mode);
+      _debug_printf("warning: failed to translate texture wrap mode %s\n",
+                    debug_dump_tex_wrap(wrap_mode, TRUE));
       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
       break;
@@ -240,7 +205,7 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
    x = lp_build_sample_wrap(bld, x, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
    y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t);
 
-   lp_build_sample_texel(bld, x, y, stride, data_ptr, texel);
+   lp_build_sample_texel_soa(bld, x, y, stride, data_ptr, texel);
 }
 
 
@@ -286,10 +251,10 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
    x1 = lp_build_sample_wrap(bld, x1, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
    y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t);
 
-   lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]);
-   lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]);
-   lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]);
-   lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]);
+   lp_build_sample_texel_soa(bld, x0, y0, stride, data_ptr, neighbors[0][0]);
+   lp_build_sample_texel_soa(bld, x1, y0, stride, data_ptr, neighbors[0][1]);
+   lp_build_sample_texel_soa(bld, x0, y1, stride, data_ptr, neighbors[1][0]);
+   lp_build_sample_texel_soa(bld, x1, y1, stride, data_ptr, neighbors[1][1]);
 
    /* TODO: Don't interpolate missing channels */
    for(chan = 0; chan < 4; ++chan) {
@@ -304,6 +269,217 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
 
 
 static void
+lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
+                          struct lp_type dst_type,
+                          LLVMValueRef packed,
+                          LLVMValueRef *rgba)
+{
+   LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff);
+   unsigned chan;
+
+   /* Decode the input vector components */
+   for (chan = 0; chan < 4; ++chan) {
+      unsigned start = chan*8;
+      unsigned stop = start + 8;
+      LLVMValueRef input;
+
+      input = packed;
+
+      if(start)
+         input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), "");
+
+      if(stop < 32)
+         input = LLVMBuildAnd(builder, input, mask, "");
+
+      input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
+
+      rgba[chan] = input;
+   }
+}
+
+
+static void
+lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
+                              LLVMValueRef s,
+                              LLVMValueRef t,
+                              LLVMValueRef width,
+                              LLVMValueRef height,
+                              LLVMValueRef stride,
+                              LLVMValueRef data_ptr,
+                              LLVMValueRef *texel)
+{
+   LLVMBuilderRef builder = bld->builder;
+   struct lp_build_context i32, h16, u8n;
+   LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
+   LLVMValueRef i32_c8, i32_c128, i32_c255;
+   LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
+   LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
+   LLVMValueRef x0, x1;
+   LLVMValueRef y0, y1;
+   LLVMValueRef neighbors[2][2];
+   LLVMValueRef neighbors_lo[2][2];
+   LLVMValueRef neighbors_hi[2][2];
+   LLVMValueRef packed, packed_lo, packed_hi;
+   LLVMValueRef unswizzled[4];
+
+   lp_build_context_init(&i32, builder, lp_type_int(32));
+   lp_build_context_init(&h16, builder, lp_type_ufixed(16));
+   lp_build_context_init(&u8n, builder, lp_type_unorm(8));
+
+   i32_vec_type = lp_build_vec_type(i32.type);
+   h16_vec_type = lp_build_vec_type(h16.type);
+   u8n_vec_type = lp_build_vec_type(u8n.type);
+
+   s = lp_build_mul_imm(&bld->coord_bld, s, 256);
+   t = lp_build_mul_imm(&bld->coord_bld, t, 256);
+
+   s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
+   t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
+
+   i32_c128 = lp_build_int_const_scalar(i32.type, -128);
+   s = LLVMBuildAdd(builder, s, i32_c128, "");
+   t = LLVMBuildAdd(builder, t, i32_c128, "");
+
+   i32_c8 = lp_build_int_const_scalar(i32.type, 8);
+   s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
+   t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
+
+   i32_c255 = lp_build_int_const_scalar(i32.type, 255);
+   s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
+   t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
+
+   x0 = s_ipart;
+   y0 = t_ipart;
+
+   x0 = lp_build_sample_wrap(bld, x0, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
+   y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+   x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
+   y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
+
+   x1 = lp_build_sample_wrap(bld, x1, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
+   y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+   /*
+    * Transform 4 x i32 in
+    *
+    *   s_fpart = {s0, s1, s2, s3}
+    *
+    * into 8 x i16
+    *
+    *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
+    *
+    * into two 8 x i16
+    *
+    *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
+    *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
+    *
+    * and likewise for t_fpart. There is no risk of loosing precision here
+    * since the fractional parts only use the lower 8bits.
+    */
+
+   s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
+   t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
+
+   {
+      LLVMTypeRef elem_type = LLVMInt32Type();
+      LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
+      LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
+      LLVMValueRef shuffle_lo;
+      LLVMValueRef shuffle_hi;
+      unsigned i, j;
+
+      for(j = 0; j < h16.type.length; j += 4) {
+         unsigned subindex = util_cpu_caps.little_endian ? 0 : 1;
+         LLVMValueRef index;
+
+         index = LLVMConstInt(elem_type, j/2 + subindex, 0);
+         for(i = 0; i < 4; ++i)
+            shuffles_lo[j + i] = index;
+
+         index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
+         for(i = 0; i < 4; ++i)
+            shuffles_hi[j + i] = index;
+      }
+
+      shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
+      shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
+
+      s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, "");
+      t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, "");
+      s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, "");
+      t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
+   }
+
+   /*
+    * Fetch the pixels as 4 x 32bit (rgba order might differ):
+    *
+    *   rgba0 rgba1 rgba2 rgba3
+    *
+    * bit cast them into 16 x u8
+    *
+    *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
+    *
+    * unpack them into two 8 x i16:
+    *
+    *   r0 g0 b0 a0 r1 g1 b1 a1
+    *   r2 g2 b2 a2 r3 g3 b3 a3
+    *
+    * The higher 8 bits of the resulting elements will be zero.
+    */
+
+   neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_ptr);
+   neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_ptr);
+   neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_ptr);
+   neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_ptr);
+
+   neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
+   neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
+   neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
+   neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
+
+   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
+   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
+   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
+   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
+
+   /*
+    * Linear interpolate with 8.8 fixed point.
+    */
+
+   packed_lo = lp_build_lerp_2d(&h16,
+                                s_fpart_lo, t_fpart_lo,
+                                neighbors_lo[0][0],
+                                neighbors_lo[0][1],
+                                neighbors_lo[1][0],
+                                neighbors_lo[1][1]);
+
+   packed_hi = lp_build_lerp_2d(&h16,
+                                s_fpart_hi, t_fpart_hi,
+                                neighbors_hi[0][0],
+                                neighbors_hi[0][1],
+                                neighbors_hi[1][0],
+                                neighbors_hi[1][1]);
+
+   packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi);
+
+   /*
+    * Convert to SoA and swizzle.
+    */
+
+   packed = LLVMBuildBitCast(builder, packed, i32_vec_type, "");
+
+   lp_build_rgba8_to_f32_soa(bld->builder,
+                             bld->texel_type,
+                             packed, unswizzled);
+
+   lp_build_format_swizzle_soa(bld->format_desc,
+                               bld->texel_type, unswizzled,
+                               texel);
+}
+
+
+static void
 lp_build_sample_compare(struct lp_build_sample_context *bld,
                         LLVMValueRef p,
                         LLVMValueRef *texel)
@@ -402,7 +578,10 @@ lp_build_sample_soa(LLVMBuilderRef builder,
       break;
    case PIPE_TEX_FILTER_LINEAR:
    case PIPE_TEX_FILTER_ANISO:
-      lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel);
+      if(lp_format_is_rgba8(bld.format_desc))
+         lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel);
+      else
+         lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel);
       break;
    default:
       assert(0);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index 64027de6aa..fe2db04d8f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -64,7 +64,7 @@
    for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
 
 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
-   ((INST)->FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
+   ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
 
 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
    if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
@@ -157,7 +157,7 @@ emit_fetch(
    unsigned index,
    const unsigned chan_index )
 {
-   const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index];
+   const struct tgsi_full_src_register *reg = &inst->Src[index];
    unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
    LLVMValueRef res;
 
@@ -167,9 +167,9 @@ emit_fetch(
    case TGSI_SWIZZLE_Z:
    case TGSI_SWIZZLE_W:
 
-      switch (reg->SrcRegister.File) {
+      switch (reg->Register.File) {
       case TGSI_FILE_CONSTANT: {
-         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0);
+         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
          LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
          LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
          res = lp_build_broadcast_scalar(&bld->base, scalar);
@@ -177,17 +177,17 @@ emit_fetch(
       }
 
       case TGSI_FILE_IMMEDIATE:
-         res = bld->immediates[reg->SrcRegister.Index][swizzle];
+         res = bld->immediates[reg->Register.Index][swizzle];
          assert(res);
          break;
 
       case TGSI_FILE_INPUT:
-         res = bld->inputs[reg->SrcRegister.Index][swizzle];
+         res = bld->inputs[reg->Register.Index][swizzle];
          assert(res);
          break;
 
       case TGSI_FILE_TEMPORARY:
-         res = bld->temps[reg->SrcRegister.Index][swizzle];
+         res = bld->temps[reg->Register.Index][swizzle];
          if(!res)
             return bld->base.undef;
          break;
@@ -267,7 +267,7 @@ emit_store(
    unsigned chan_index,
    LLVMValueRef value)
 {
-   const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[index];
+   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
 
    switch( inst->Instruction.Saturate ) {
    case TGSI_SAT_NONE:
@@ -287,13 +287,13 @@ emit_store(
       assert(0);
    }
 
-   switch( reg->DstRegister.File ) {
+   switch( reg->Register.File ) {
    case TGSI_FILE_OUTPUT:
-      bld->outputs[reg->DstRegister.Index][chan_index] = value;
+      bld->outputs[reg->Register.Index][chan_index] = value;
       break;
 
    case TGSI_FILE_TEMPORARY:
-      bld->temps[reg->DstRegister.Index][chan_index] = value;
+      bld->temps[reg->Register.Index][chan_index] = value;
       break;
 
    case TGSI_FILE_ADDRESS:
@@ -319,14 +319,14 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
           boolean projected,
           LLVMValueRef *texel)
 {
-   const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+   const uint unit = inst->Src[1].Register.Index;
    LLVMValueRef lodbias;
    LLVMValueRef oow;
    LLVMValueRef coords[3];
    unsigned num_coords;
    unsigned i;
 
-   switch (inst->InstructionExtTexture.Texture) {
+   switch (inst->Texture.Texture) {
    case TGSI_TEXTURE_1D:
       num_coords = 1;
       break;
@@ -375,7 +375,7 @@ emit_kil(
    struct lp_build_tgsi_soa_context *bld,
    const struct tgsi_full_instruction *inst )
 {
-   const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[0];
+   const struct tgsi_full_src_register *reg = &inst->Src[0];
    LLVMValueRef terms[NUM_CHANNELS];
    LLVMValueRef mask;
    unsigned chan_index;
@@ -423,15 +423,15 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst)
 {
    uint i;
    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
-      const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i];
-      if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
-          reg->SrcRegister.Indirect)
+      const struct tgsi_full_src_register *reg = &inst->Src[i];
+      if (reg->Register.File == TGSI_FILE_TEMPORARY &&
+          reg->Register.Indirect)
          return TRUE;
    }
    for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
-      const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i];
-      if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
-          reg->DstRegister.Indirect)
+      const struct tgsi_full_dst_register *reg = &inst->Dst[i];
+      if (reg->Register.File == TGSI_FILE_TEMPORARY &&
+          reg->Register.Indirect)
          return TRUE;
    }
    return FALSE;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h
index 46c298fa20..2fb233d335 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_type.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h
@@ -43,13 +43,18 @@
 
 
 /**
+ * Native SIMD register width.
+ *
+ * 128 for all architectures we care about.
+ */
+#define LP_NATIVE_VECTOR_WIDTH 128
+
+/**
  * Several functions can only cope with vectors of length up to this value.
  * You may need to increase that value if you want to represent bigger vectors.
  */
 #define LP_MAX_VECTOR_LENGTH 16
 
-#define LP_MAX_TYPE_WIDTH 64
-
 
 /**
  * The LLVM type system can't conveniently express all the things we care about
@@ -134,6 +139,91 @@ struct lp_build_context
 };
 
 
+static INLINE struct lp_type
+lp_type_float(unsigned width)
+{
+   struct lp_type res_type;
+
+   memset(&res_type, 0, sizeof res_type);
+   res_type.floating = TRUE;
+   res_type.sign = TRUE;
+   res_type.width = width;
+   res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+   return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_int(unsigned width)
+{
+   struct lp_type res_type;
+
+   memset(&res_type, 0, sizeof res_type);
+   res_type.sign = TRUE;
+   res_type.width = width;
+   res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+   return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_uint(unsigned width)
+{
+   struct lp_type res_type;
+
+   memset(&res_type, 0, sizeof res_type);
+   res_type.width = width;
+   res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+   return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_unorm(unsigned width)
+{
+   struct lp_type res_type;
+
+   memset(&res_type, 0, sizeof res_type);
+   res_type.norm = TRUE;
+   res_type.width = width;
+   res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+   return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_fixed(unsigned width)
+{
+   struct lp_type res_type;
+
+   memset(&res_type, 0, sizeof res_type);
+   res_type.sign = TRUE;
+   res_type.fixed = TRUE;
+   res_type.width = width;
+   res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+   return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_ufixed(unsigned width)
+{
+   struct lp_type res_type;
+
+   memset(&res_type, 0, sizeof res_type);
+   res_type.fixed = TRUE;
+   res_type.width = width;
+   res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+   return res_type;
+}
+
+
 LLVMTypeRef
 lp_build_elem_type(struct lp_type type);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 202cb8ef43..c081f6de03 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -31,13 +31,13 @@
  */
 
 #include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
 #include "pipe/p_defines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "lp_clear.h"
 #include "lp_context.h"
 #include "lp_flush.h"
-#include "lp_prim_setup.h"
 #include "lp_prim_vbuf.h"
 #include "lp_state.h"
 #include "lp_surface.h"
@@ -180,7 +180,7 @@ llvmpipe_create( struct pipe_screen *screen )
    llvmpipe->pipe.delete_blend_state = llvmpipe_delete_blend_state;
 
    llvmpipe->pipe.create_sampler_state = llvmpipe_create_sampler_state;
-   llvmpipe->pipe.bind_sampler_states  = llvmpipe_bind_sampler_states;
+   llvmpipe->pipe.bind_fragment_sampler_states  = llvmpipe_bind_sampler_states;
    llvmpipe->pipe.delete_sampler_state = llvmpipe_delete_sampler_state;
 
    llvmpipe->pipe.create_depth_stencil_alpha_state = llvmpipe_create_depth_stencil_state;
@@ -205,7 +205,7 @@ llvmpipe_create( struct pipe_screen *screen )
    llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state;
    llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple;
    llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state;
-   llvmpipe->pipe.set_sampler_textures = llvmpipe_set_sampler_textures;
+   llvmpipe->pipe.set_fragment_sampler_textures = llvmpipe_set_sampler_textures;
    llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state;
 
    llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers;
@@ -264,21 +264,21 @@ llvmpipe_create( struct pipe_screen *screen )
                          (struct tgsi_sampler **)
                             llvmpipe->tgsi.vert_samplers_list);
 
-   llvmpipe->setup = lp_draw_render_stage(llvmpipe);
-   if (!llvmpipe->setup)
-      goto fail;
-
    if (debug_get_bool_option( "LP_NO_RAST", FALSE ))
       llvmpipe->no_rast = TRUE;
 
-   if (debug_get_bool_option( "LP_NO_VBUF", FALSE )) {
-      /* Deprecated path -- vbuf is the intended interface to the draw module:
-       */
-      draw_set_rasterize_stage(llvmpipe->draw, llvmpipe->setup);
-   }
-   else {
-      lp_init_vbuf(llvmpipe);
-   }
+   llvmpipe->vbuf_backend = lp_create_vbuf_backend(llvmpipe);
+   if (!llvmpipe->vbuf_backend)
+      goto fail;
+
+   llvmpipe->vbuf = draw_vbuf_stage(llvmpipe->draw, llvmpipe->vbuf_backend);
+   if (!llvmpipe->vbuf)
+      goto fail;
+
+   draw_set_rasterize_stage(llvmpipe->draw, llvmpipe->vbuf);
+   draw_set_render(llvmpipe->draw, llvmpipe->vbuf_backend);
+
+
 
    /* plug in AA line/point stages */
    draw_install_aaline_stage(llvmpipe->draw, &llvmpipe->pipe);
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 7df340554e..3ad95d0bfc 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -121,9 +121,10 @@ struct llvmpipe_context {
 
    /** The primitive drawing context */
    struct draw_context *draw;
-   struct draw_stage *setup;
+
+   /** Draw module backend */
+   struct vbuf_render *vbuf_backend;
    struct draw_stage *vbuf;
-   struct llvmpipe_vbuf_render *vbuf_render;
 
    boolean dirty_render_cache;
    
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index 1126bf90b9..bce3baec16 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -39,6 +39,7 @@
 #include "util/u_cpu_detect.h"
 #include "lp_screen.h"
 #include "lp_bld_intr.h"
+#include "lp_bld_misc.h"
 #include "lp_jit.h"
 
 
@@ -153,13 +154,12 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)
 #if 0
    /* For simulating less capable machines */
    util_cpu_caps.has_sse3 = 0;
+   util_cpu_caps.has_ssse3 = 0;
    util_cpu_caps.has_sse4_1 = 0;
 #endif
 
-#ifdef LLVM_NATIVE_ARCH
    LLVMLinkInJIT();
    LLVMInitializeNativeTarget();
-#endif
 
    screen->module = LLVMModuleCreateWithName("llvmpipe");
 
@@ -168,7 +168,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)
    if (LLVMCreateJITCompiler(&screen->engine, screen->provider, 1, &error)) {
       _debug_printf("%s\n", error);
       LLVMDisposeMessage(error);
-      abort();
+      assert(0);
    }
 
    screen->target = LLVMGetExecutionEngineTargetData(screen->engine);
diff --git a/src/gallium/drivers/llvmpipe/lp_prim_setup.c b/src/gallium/drivers/llvmpipe/lp_prim_setup.c
deleted file mode 100644
index b14f8fb99d..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_prim_setup.c
+++ /dev/null
@@ -1,190 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * \brief A draw stage that drives our triangle setup routines from
- * within the draw pipeline.  One of two ways to drive setup, the
- * other being in lp_prim_vbuf.c.
- *
- * \author  Keith Whitwell <keith@tungstengraphics.com>
- * \author  Brian Paul
- */
-
-
-#include "lp_context.h"
-#include "lp_setup.h"
-#include "lp_state.h"
-#include "lp_prim_setup.h"
-#include "draw/draw_pipe.h"
-#include "draw/draw_vertex.h"
-#include "util/u_memory.h"
-
-/**
- * Triangle setup info (derived from draw_stage).
- * Also used for line drawing (taking some liberties).
- */
-struct setup_stage {
-   struct draw_stage stage; /**< This must be first (base class) */
-
-   struct setup_context *setup;
-};
-
-
-
-/**
- * Basically a cast wrapper.
- */
-static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
-{
-   return (struct setup_stage *)stage;
-}
-
-
-typedef const float (*cptrf4)[4];
-
-static void
-do_tri(struct draw_stage *stage, struct prim_header *prim)
-{
-   struct setup_stage *setup = setup_stage( stage );
-   
-   llvmpipe_setup_tri( setup->setup,
-              (cptrf4)prim->v[0]->data,
-              (cptrf4)prim->v[1]->data,
-              (cptrf4)prim->v[2]->data );
-}
-
-static void
-do_line(struct draw_stage *stage, struct prim_header *prim)
-{
-   struct setup_stage *setup = setup_stage( stage );
-
-   llvmpipe_setup_line( setup->setup,
-               (cptrf4)prim->v[0]->data,
-               (cptrf4)prim->v[1]->data );
-}
-
-static void
-do_point(struct draw_stage *stage, struct prim_header *prim)
-{
-   struct setup_stage *setup = setup_stage( stage );
-
-   llvmpipe_setup_point( setup->setup,
-                (cptrf4)prim->v[0]->data );
-}
-
-
-
-
-static void setup_begin( struct draw_stage *stage )
-{
-   struct setup_stage *setup = setup_stage(stage);
-
-   llvmpipe_setup_prepare( setup->setup );
-
-   stage->point = do_point;
-   stage->line = do_line;
-   stage->tri = do_tri;
-}
-
-
-static void setup_first_point( struct draw_stage *stage,
-			       struct prim_header *header )
-{
-   setup_begin(stage);
-   stage->point( stage, header );
-}
-
-static void setup_first_line( struct draw_stage *stage,
-			       struct prim_header *header )
-{
-   setup_begin(stage);
-   stage->line( stage, header );
-}
-
-
-static void setup_first_tri( struct draw_stage *stage,
-			       struct prim_header *header )
-{
-   setup_begin(stage);
-   stage->tri( stage, header );
-}
-
-
-
-static void setup_flush( struct draw_stage *stage,
-			 unsigned flags )
-{
-   stage->point = setup_first_point;
-   stage->line = setup_first_line;
-   stage->tri = setup_first_tri;
-}
-
-
-static void reset_stipple_counter( struct draw_stage *stage )
-{
-}
-
-
-static void render_destroy( struct draw_stage *stage )
-{
-   struct setup_stage *ssetup = setup_stage(stage);
-   llvmpipe_setup_destroy_context(ssetup->setup);
-   FREE( stage );
-}
-
-
-/**
- * Create a new primitive setup/render stage.
- */
-struct draw_stage *lp_draw_render_stage( struct llvmpipe_context *llvmpipe )
-{
-   struct setup_stage *sstage = CALLOC_STRUCT(setup_stage);
-
-   sstage->setup = llvmpipe_setup_create_context(llvmpipe);
-   sstage->stage.draw = llvmpipe->draw;
-   sstage->stage.point = setup_first_point;
-   sstage->stage.line = setup_first_line;
-   sstage->stage.tri = setup_first_tri;
-   sstage->stage.flush = setup_flush;
-   sstage->stage.reset_stipple_counter = reset_stipple_counter;
-   sstage->stage.destroy = render_destroy;
-
-   return (struct draw_stage *)sstage;
-}
-
-struct setup_context *
-lp_draw_setup_context( struct draw_stage *stage )
-{
-   struct setup_stage *ssetup = setup_stage(stage);
-   return ssetup->setup;
-}
-
-void
-lp_draw_flush( struct draw_stage *stage )
-{
-   stage->flush( stage, 0 );
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_prim_setup.h b/src/gallium/drivers/llvmpipe/lp_prim_setup.h
deleted file mode 100644
index da6cae6375..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_prim_setup.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#ifndef LP_PRIM_SETUP_H
-#define LP_PRIM_SETUP_H
-
-
-/**
- * vbuf is a special stage to gather the stream of triangles, lines, points
- * together and reconstruct vertex buffers for hardware upload.
- *
- * First attempt, work in progress.
- * 
- * TODO:
- *    - separate out vertex buffer building and primitive emit, ie >1 draw per vb.
- *    - tell vbuf stage how to build hw vertices directly
- *    - pass vbuf stage a buffer pointer for direct emit to agp/vram.
- *
- *
- *
- * Vertices are just an array of floats, with all the attributes
- * packed.  We currently assume a layout like:
- *
- * attr[0][0..3] - window position
- * attr[1..n][0..3] - remaining attributes.
- *
- * Attributes are assumed to be 4 floats wide but are packed so that
- * all the enabled attributes run contiguously.
- */
-
-
-struct draw_stage;
-struct llvmpipe_context;
-
-
-typedef void (*vbuf_draw_func)( struct pipe_context *pipe,
-                                unsigned prim,
-                                const ushort *elements,
-                                unsigned nr_elements,
-                                const void *vertex_buffer,
-                                unsigned nr_vertices );
-
-
-extern struct draw_stage *
-lp_draw_render_stage( struct llvmpipe_context *llvmpipe );
-
-extern struct setup_context *
-lp_draw_setup_context( struct draw_stage * );
-
-extern void
-lp_draw_flush( struct draw_stage * );
-
-
-extern struct draw_stage *
-lp_draw_vbuf_stage( struct draw_context *draw_context,
-                    struct pipe_context *pipe,
-                    vbuf_draw_func draw );
-
-
-#endif /* LP_PRIM_SETUP_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c
index c394dcb61d..4abff4eccc 100644
--- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c
+++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c
@@ -37,10 +37,9 @@
 
 
 #include "lp_context.h"
+#include "lp_setup.h"
 #include "lp_state.h"
 #include "lp_prim_vbuf.h"
-#include "lp_prim_setup.h"
-#include "lp_setup.h"
 #include "draw/draw_context.h"
 #include "draw/draw_vbuf.h"
 #include "util/u_memory.h"
@@ -59,6 +58,8 @@ struct llvmpipe_vbuf_render
 {
    struct vbuf_render base;
    struct llvmpipe_context *llvmpipe;
+   struct setup_context *setup;
+
    uint prim;
    uint vertex_size;
    uint nr_vertices;
@@ -75,6 +76,11 @@ llvmpipe_vbuf_render(struct vbuf_render *vbr)
 }
 
 
+
+
+
+
+
 static const struct vertex_info *
 lp_vbuf_get_vertex_info(struct vbuf_render *vbr)
 {
@@ -105,36 +111,6 @@ lp_vbuf_allocate_vertices(struct vbuf_render *vbr,
 static void
 lp_vbuf_release_vertices(struct vbuf_render *vbr)
 {
-#if 0
-   {
-      struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr);
-      const struct vertex_info *info = 
-         llvmpipe_get_vbuf_vertex_info(cvbr->llvmpipe);
-      const float *vtx = (const float *) cvbr->vertex_buffer;
-      uint i, j;
-      debug_printf("%s (vtx_size = %u,  vtx_used = %u)\n",
-             __FUNCTION__, cvbr->vertex_size, cvbr->nr_vertices);
-      for (i = 0; i < cvbr->nr_vertices; i++) {
-         for (j = 0; j < info->num_attribs; j++) {
-            uint k;
-            switch (info->attrib[j].emit) {
-            case EMIT_4F:  k = 4;   break;
-            case EMIT_3F:  k = 3;   break;
-            case EMIT_2F:  k = 2;   break;
-            case EMIT_1F:  k = 1;   break;
-            default: assert(0);
-            }
-            debug_printf("Vert %u attr %u: ", i, j);
-            while (k-- > 0) {
-               debug_printf("%g ", vtx[0]);
-               vtx++;
-            }
-            debug_printf("\n");
-         }
-      }
-   }
-#endif
-
    /* keep the old allocation for next time */
 }
 
@@ -160,11 +136,7 @@ static boolean
 lp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim)
 {
    struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr);
-
-   /* XXX: break this dependency - make setup_context live under
-    * llvmpipe, rename the old "setup" draw stage to something else.
-    */
-   struct setup_context *setup_ctx = lp_draw_setup_context(cvbr->llvmpipe->setup);
+   struct setup_context *setup_ctx = cvbr->setup;
    
    llvmpipe_setup_prepare( setup_ctx );
 
@@ -193,14 +165,9 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
    struct llvmpipe_context *llvmpipe = cvbr->llvmpipe;
    const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float);
    const void *vertex_buffer = cvbr->vertex_buffer;
+   struct setup_context *setup_ctx = cvbr->setup;
    unsigned i;
 
-   /* XXX: break this dependency - make setup_context live under
-    * llvmpipe, rename the old "setup" draw stage to something else.
-    */
-   struct draw_stage *setup = llvmpipe->setup;
-   struct setup_context *setup_ctx = lp_draw_setup_context(setup);
-
    switch (cvbr->prim) {
    case PIPE_PRIM_POINTS:
       for (i = 0; i < nr; i++) {
@@ -367,11 +334,6 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
    default:
       assert(0);
    }
-
-   /* XXX: why are we calling this???  If we had to call something, it
-    * would be a function in lp_setup.c:
-    */
-   lp_draw_flush( setup );
 }
 
 
@@ -384,17 +346,12 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
 {
    struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr);
    struct llvmpipe_context *llvmpipe = cvbr->llvmpipe;
+   struct setup_context *setup_ctx = cvbr->setup;
    const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float);
    const void *vertex_buffer =
       (void *) get_vert(cvbr->vertex_buffer, start, stride);
    unsigned i;
 
-   /* XXX: break this dependency - make setup_context live under
-    * llvmpipe, rename the old "setup" draw stage to something else.
-    */
-   struct draw_stage *setup = llvmpipe->setup;
-   struct setup_context *setup_ctx = lp_draw_setup_context(setup);
-
    switch (cvbr->prim) {
    case PIPE_PRIM_POINTS:
       for (i = 0; i < nr; i++) {
@@ -568,40 +525,38 @@ static void
 lp_vbuf_destroy(struct vbuf_render *vbr)
 {
    struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr);
-   cvbr->llvmpipe->vbuf_render = NULL;
+   llvmpipe_setup_destroy_context(cvbr->setup);
    FREE(cvbr);
 }
 
 
 /**
- * Initialize the post-transform vertex buffer information for the given
- * context.
+ * Create the post-transform vertex handler for the given context.
  */
-void
-lp_init_vbuf(struct llvmpipe_context *lp)
+struct vbuf_render *
+lp_create_vbuf_backend(struct llvmpipe_context *lp)
 {
-   assert(lp->draw);
+   struct llvmpipe_vbuf_render *cvbr = CALLOC_STRUCT(llvmpipe_vbuf_render);
 
-   lp->vbuf_render = CALLOC_STRUCT(llvmpipe_vbuf_render);
+   assert(lp->draw);
 
-   lp->vbuf_render->base.max_indices = LP_MAX_VBUF_INDEXES;
-   lp->vbuf_render->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE;
 
-   lp->vbuf_render->base.get_vertex_info = lp_vbuf_get_vertex_info;
-   lp->vbuf_render->base.allocate_vertices = lp_vbuf_allocate_vertices;
-   lp->vbuf_render->base.map_vertices = lp_vbuf_map_vertices;
-   lp->vbuf_render->base.unmap_vertices = lp_vbuf_unmap_vertices;
-   lp->vbuf_render->base.set_primitive = lp_vbuf_set_primitive;
-   lp->vbuf_render->base.draw = lp_vbuf_draw;
-   lp->vbuf_render->base.draw_arrays = lp_vbuf_draw_arrays;
-   lp->vbuf_render->base.release_vertices = lp_vbuf_release_vertices;
-   lp->vbuf_render->base.destroy = lp_vbuf_destroy;
+   cvbr->base.max_indices = LP_MAX_VBUF_INDEXES;
+   cvbr->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE;
 
-   lp->vbuf_render->llvmpipe = lp;
+   cvbr->base.get_vertex_info = lp_vbuf_get_vertex_info;
+   cvbr->base.allocate_vertices = lp_vbuf_allocate_vertices;
+   cvbr->base.map_vertices = lp_vbuf_map_vertices;
+   cvbr->base.unmap_vertices = lp_vbuf_unmap_vertices;
+   cvbr->base.set_primitive = lp_vbuf_set_primitive;
+   cvbr->base.draw = lp_vbuf_draw;
+   cvbr->base.draw_arrays = lp_vbuf_draw_arrays;
+   cvbr->base.release_vertices = lp_vbuf_release_vertices;
+   cvbr->base.destroy = lp_vbuf_destroy;
 
-   lp->vbuf = draw_vbuf_stage(lp->draw, &lp->vbuf_render->base);
+   cvbr->llvmpipe = lp;
 
-   draw_set_rasterize_stage(lp->draw, lp->vbuf);
+   cvbr->setup = llvmpipe_setup_create_context(cvbr->llvmpipe);
 
-   draw_set_render(lp->draw, &lp->vbuf_render->base);
+   return &cvbr->base;
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h
index 6c4e6063e6..0676e2f42a 100644
--- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h
+++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h
@@ -31,8 +31,8 @@
 
 struct llvmpipe_context;
 
-extern void
-lp_init_vbuf(struct llvmpipe_context *llvmpipe);
+extern struct vbuf_render *
+lp_create_vbuf_backend(struct llvmpipe_context *llvmpipe);
 
 
 #endif /* LP_VBUF_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 0518927458..0fb133486a 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -27,6 +27,7 @@
 
 
 #include "util/u_memory.h"
+#include "util/u_format.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
 
@@ -131,17 +132,17 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
 {
    struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
    struct llvmpipe_winsys *winsys = screen->winsys;
+   const struct util_format_description *format_desc;
+
+   format_desc = util_format_description(format);
+   if(!format_desc)
+      return FALSE;
 
    assert(target == PIPE_TEXTURE_1D ||
           target == PIPE_TEXTURE_2D ||
           target == PIPE_TEXTURE_3D ||
           target == PIPE_TEXTURE_CUBE);
 
-   if(format == PIPE_FORMAT_Z16_UNORM)
-      return FALSE;
-   if(format == PIPE_FORMAT_S8_UNORM)
-      return FALSE;
-
    switch(format) {
    case PIPE_FORMAT_DXT1_RGB:
    case PIPE_FORMAT_DXT1_RGBA:
@@ -152,8 +153,51 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
       break;
    }
 
-   if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET)
-      return winsys->is_displaytarget_format_supported(winsys, format);
+   if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+      if(format_desc->block.width != 1 ||
+         format_desc->block.height != 1)
+         return FALSE;
+
+      if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR &&
+         format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH &&
+         format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY)
+         return FALSE;
+
+      if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB &&
+         format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB)
+         return FALSE;
+   }
+
+   if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) {
+      if(!winsys->is_displaytarget_format_supported(winsys, format))
+         return FALSE;
+   }
+
+   if(tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) {
+      if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
+         return FALSE;
+
+      /* FIXME: Temporary restriction. See lp_state_fs.c. */
+      if(format_desc->block.bits != 32)
+         return FALSE;
+   }
+
+   /* FIXME: Temporary restrictions. See lp_bld_sample_soa.c */
+   if(tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) {
+      if(format_desc->block.width != 1 ||
+         format_desc->block.height != 1)
+         return FALSE;
+
+      if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR &&
+         format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH &&
+         format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY)
+         return FALSE;
+
+      if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB &&
+         format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB &&
+         format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
+         return FALSE;
+   }
 
    return TRUE;
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index c43b3da450..ffcbc9a379 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -33,7 +33,6 @@
  */
 
 #include "lp_context.h"
-#include "lp_prim_setup.h"
 #include "lp_quad.h"
 #include "lp_setup.h"
 #include "lp_state.h"
@@ -90,6 +89,8 @@ struct setup_context {
    float oneoverarea;
    int facing;
 
+   float pixel_offset;
+
    struct quad_header quad[MAX_QUADS];
    struct quad_header *quad_ptrs[MAX_QUADS];
    unsigned count;
@@ -483,6 +484,16 @@ static boolean setup_sort_vertices( struct setup_context *setup,
       ((det > 0.0) ^ 
        (setup->llvmpipe->rasterizer->front_winding == PIPE_WINDING_CW));
 
+   /* Prepare pixel offset for rasterisation:
+    *  - pixel center (0.5, 0.5) for GL, or
+    *  - assume (0.0, 0.0) for other APIs.
+    */
+   if (setup->llvmpipe->rasterizer->gl_rasterization_rules) {
+      setup->pixel_offset = 0.5f;
+   } else {
+      setup->pixel_offset = 0.0f;
+   }
+
    return TRUE;
 }
 
@@ -508,7 +519,7 @@ static void tri_pos_coeff( struct setup_context *setup,
 
    /* calculate a0 as the value which would be sampled for the
     * fragment at (0,0), taking into account that we want to sample at
-    * pixel centers, in other words (0.5, 0.5).
+    * pixel centers, in other words (pixel_offset, pixel_offset).
     *
     * this is neat but unfortunately not a good way to do things for
     * triangles with very large values of dadx or dady as it will
@@ -519,8 +530,8 @@ static void tri_pos_coeff( struct setup_context *setup,
     * instead - i'll switch to this later.
     */
    setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] -
-                           (dadx * (setup->vmin[0][0] - 0.5f) +
-                            dady * (setup->vmin[0][1] - 0.5f)));
+                           (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+                            dady * (setup->vmin[0][1] - setup->pixel_offset)));
 
    /*
    debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
@@ -609,8 +620,8 @@ static void tri_linear_coeff( struct setup_context *setup,
        * instead - i'll switch to this later.
        */
       setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
-                     (dadx * (setup->vmin[0][0] - 0.5f) +
-                      dady * (setup->vmin[0][1] - 0.5f)));
+                     (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+                      dady * (setup->vmin[0][1] - setup->pixel_offset)));
 
       /*
       debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
@@ -661,8 +672,8 @@ static void tri_persp_coeff( struct setup_context *setup,
       setup->coef.dadx[1 + attrib][i] = dadx;
       setup->coef.dady[1 + attrib][i] = dady;
       setup->coef.a0[1 + attrib][i] = (mina -
-                     (dadx * (setup->vmin[0][0] - 0.5f) +
-                      dady * (setup->vmin[0][1] - 0.5f)));
+                     (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+                      dady * (setup->vmin[0][1] - setup->pixel_offset)));
    }
 }
 
@@ -746,12 +757,12 @@ static void setup_tri_coefficients( struct setup_context *setup )
 
 static void setup_tri_edges( struct setup_context *setup )
 {
-   float vmin_x = setup->vmin[0][0] + 0.5f;
-   float vmid_x = setup->vmid[0][0] + 0.5f;
+   float vmin_x = setup->vmin[0][0] + setup->pixel_offset;
+   float vmid_x = setup->vmid[0][0] + setup->pixel_offset;
 
-   float vmin_y = setup->vmin[0][1] - 0.5f;
-   float vmid_y = setup->vmid[0][1] - 0.5f;
-   float vmax_y = setup->vmax[0][1] - 0.5f;
+   float vmin_y = setup->vmin[0][1] - setup->pixel_offset;
+   float vmid_y = setup->vmid[0][1] - setup->pixel_offset;
+   float vmax_y = setup->vmax[0][1] - setup->pixel_offset;
 
    setup->emaj.sy = ceilf(vmin_y);
    setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy);
@@ -950,8 +961,8 @@ linear_pos_coeff(struct setup_context *setup,
    setup->coef.dadx[0][i] = dadx;
    setup->coef.dady[0][i] = dady;
    setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] -
-                           (dadx * (setup->vmin[0][0] - 0.5f) +
-                            dady * (setup->vmin[0][1] - 0.5f)));
+                           (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+                            dady * (setup->vmin[0][1] - setup->pixel_offset)));
 }
 
 
@@ -972,8 +983,8 @@ line_linear_coeff(struct setup_context *setup,
       setup->coef.dadx[1 + attrib][i] = dadx;
       setup->coef.dady[1 + attrib][i] = dady;
       setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
-                     (dadx * (setup->vmin[0][0] - 0.5f) +
-                      dady * (setup->vmin[0][1] - 0.5f)));
+                     (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+                      dady * (setup->vmin[0][1] - setup->pixel_offset)));
    }
 }
 
@@ -998,8 +1009,8 @@ line_persp_coeff(struct setup_context *setup,
       setup->coef.dadx[1 + attrib][i] = dadx;
       setup->coef.dady[1 + attrib][i] = dady;
       setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
-                     (dadx * (setup->vmin[0][0] - 0.5f) +
-                      dady * (setup->vmin[0][1] - 0.5f)));
+                     (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+                      dady * (setup->vmin[0][1] - setup->pixel_offset)));
    }
 }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c
index 3f03bd0057..b2e75d3b14 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c
@@ -76,7 +76,7 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe,
    for (i = 0; i < 4; ++i) {
       uint8_t c = float_to_ubyte(blend_color->color[i]);
       for (j = 0; j < 16; ++j)
-         llvmpipe->jit_context.blend_color[i*4 + j] = c;
+         llvmpipe->jit_context.blend_color[i*16 + j] = c;
    }
 }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index 30fb41ea65..c753b183c0 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -65,26 +65,19 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
    if (vinfo->num_attribs == 0) {
       /* compute vertex layout now */
       const struct lp_fragment_shader *lpfs = llvmpipe->fs;
-      const enum interp_mode colorInterp
-         = llvmpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
+      struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf;
+      const uint num = draw_num_vs_outputs(llvmpipe->draw);
       uint i;
 
-      if (llvmpipe->vbuf) {
-         /* if using the post-transform vertex buffer, tell draw_vbuf to
-          * simply emit the whole post-xform vertex as-is:
-          */
-         struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf;
-         const uint num = draw_num_vs_outputs(llvmpipe->draw);
-         uint i;
-
-         /* No longer any need to try and emit draw vertex_header info.
-          */
-         vinfo_vbuf->num_attribs = 0;
-         for (i = 0; i < num; i++) {
-            draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i);
-         }
-         draw_compute_vertex_size(vinfo_vbuf);
+      /* Tell draw_vbuf to simply emit the whole post-xform vertex
+       * as-is.  No longer any need to try and emit draw vertex_header
+       * info.
+       */
+      vinfo_vbuf->num_attribs = 0;
+      for (i = 0; i < num; i++) {
+	 draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i);
       }
+      draw_compute_vertex_size(vinfo_vbuf);
 
       /*
        * Loop over fragment shader inputs, searching for the matching output
@@ -112,33 +105,21 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
 
          switch (lpfs->info.input_semantic_name[i]) {
          case TGSI_SEMANTIC_POSITION:
-            src = draw_find_vs_output(llvmpipe->draw,
-                                      TGSI_SEMANTIC_POSITION, 0);
-            draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src);
+            interp = INTERP_POS;
             break;
 
          case TGSI_SEMANTIC_COLOR:
-            src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_COLOR, 
-                                 lpfs->info.input_semantic_index[i]);
-            draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
-            break;
-
-         case TGSI_SEMANTIC_FOG:
-            src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_FOG, 0);
-            draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
-            break;
-
-         case TGSI_SEMANTIC_GENERIC:
-         case TGSI_SEMANTIC_FACE:
-            /* this includes texcoords and varying vars */
-            src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_GENERIC,
-                                      lpfs->info.input_semantic_index[i]);
-            draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
+            if (llvmpipe->rasterizer->flatshade) {
+               interp = INTERP_CONSTANT;
+            }
             break;
-
-         default:
-            assert(0);
          }
+
+         /* this includes texcoords and varying vars */
+         src = draw_find_vs_output(llvmpipe->draw,
+                                   lpfs->info.input_semantic_name[i],
+                                   lpfs->info.input_semantic_index[i]);
+         draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
       }
 
       llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw,
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 7728ba6076..ee0f69b2af 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -148,6 +148,20 @@ generate_depth(LLVMBuilderRef builder,
    format_desc = util_format_description(key->zsbuf_format);
    assert(format_desc);
 
+   /*
+    * Depths are expected to be between 0 and 1, even if they are stored in
+    * floats. Setting these bits here will ensure that the lp_build_conv() call
+    * below won't try to unnecessarily clamp the incoming values.
+    */
+   if(src_type.floating) {
+      src_type.sign = FALSE;
+      src_type.norm = TRUE;
+   }
+   else {
+      assert(!src_type.sign);
+      assert(src_type.norm);
+   }
+
    /* Pick the depth type. */
    dst_type = lp_depth_type(format_desc, src_type.width*src_type.length);
 
@@ -155,14 +169,11 @@ generate_depth(LLVMBuilderRef builder,
    assert(dst_type.width == src_type.width);
    assert(dst_type.length == src_type.length);
 
-#if 1
-   src = lp_build_clamped_float_to_unsigned_norm(builder,
-                                                 src_type,
-                                                 dst_type.width,
-                                                 src);
-#else
    lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1);
-#endif
+
+   dst_ptr = LLVMBuildBitCast(builder,
+                              dst_ptr,
+                              LLVMPointerType(lp_build_vec_type(dst_type), 0), "");
 
    lp_build_depth_test(builder,
                        &key->depth,
@@ -400,6 +411,7 @@ generate_fragment(struct llvmpipe_context *lp,
 #ifdef DEBUG
    tgsi_dump(shader->base.tokens, 0);
    if(key->depth.enabled) {
+      debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format));
       debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE));
       debug_printf("depth.writemask = %u\n", key->depth.writemask);
    }
@@ -419,6 +431,34 @@ generate_fragment(struct llvmpipe_context *lp,
       debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE));
    }
    debug_printf("blend.colormask = 0x%x\n", key->blend.colormask);
+   for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
+      if(key->sampler[i].format) {
+         debug_printf("sampler[%u] = \n", i);
+         debug_printf("  .format = %s\n",
+                      pf_name(key->sampler[i].format));
+         debug_printf("  .target = %s\n",
+                      debug_dump_tex_target(key->sampler[i].target, TRUE));
+         debug_printf("  .pot = %u %u %u\n",
+                      key->sampler[i].pot_width,
+                      key->sampler[i].pot_height,
+                      key->sampler[i].pot_depth);
+         debug_printf("  .wrap = %s %s %s\n",
+                      debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
+                      debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
+                      debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
+         debug_printf("  .min_img_filter = %s\n",
+                      debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
+         debug_printf("  .min_mip_filter = %s\n",
+                      debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
+         debug_printf("  .mag_img_filter = %s\n",
+                      debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
+         if(key->sampler[i].compare_mode)
+            debug_printf("  .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE));
+         debug_printf("  .normalized_coords = %u\n", key->sampler[i].normalized_coords);
+         debug_printf("  .prefilter = %u\n", key->sampler[i].prefilter);
+      }
+   }
+
 #endif
 
    variant = CALLOC_STRUCT(lp_fragment_shader_variant);
@@ -582,10 +622,12 @@ generate_fragment(struct llvmpipe_context *lp,
     * Translate the LLVM IR into machine code.
     */
 
+#ifdef DEBUG
    if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
       LLVMDumpValue(variant->function);
-      abort();
+      assert(0);
    }
+#endif
 
    LLVMRunFunctionPassManager(screen->pass, variant->function);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index c69d90c723..8333805a3f 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -102,8 +102,8 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe,
       if(tex) {
          struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex);
          struct lp_jit_texture *jit_tex = &llvmpipe->jit_context.textures[i];
-         jit_tex->width = tex->width[0];
-         jit_tex->height = tex->height[0];
+         jit_tex->width = tex->width0;
+         jit_tex->height = tex->height0;
          jit_tex->stride = lp_tex->stride[0];
          if(!lp_tex->dt)
             jit_tex->data = lp_tex->data;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_vs.c b/src/gallium/drivers/llvmpipe/lp_state_vs.c
index 15c3029614..8a761648e7 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_vs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_vs.c
@@ -92,5 +92,6 @@ llvmpipe_delete_vs_state(struct pipe_context *pipe, void *vs)
       (struct lp_vertex_shader *)vs;
 
    draw_delete_vertex_shader(llvmpipe->draw, state->draw_data);
+   FREE( (void *)state->shader.tokens );
    FREE( state );
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index e3af81cffb..29fff91981 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -462,6 +462,7 @@ compute_blend_ref(const struct pipe_blend_state *blend,
 }
 
 
+ALIGN_STACK
 static boolean
 test_one(unsigned verbose,
          FILE *fp,
@@ -530,11 +531,11 @@ test_one(unsigned verbose,
    success = TRUE;
    for(i = 0; i < n && success; ++i) {
       if(mode == AoS) {
-         uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
-         uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
-         uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
-         uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
-         uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+         ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
+         ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
+         ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
+         ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
+         ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
          int64_t start_counter = 0;
          int64_t end_counter = 0;
 
@@ -595,11 +596,11 @@ test_one(unsigned verbose,
 
       if(mode == SoA) {
          const unsigned stride = type.length*type.width/8;
-         uint8_t src[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
-         uint8_t dst[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
-         uint8_t con[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
-         uint8_t res[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
-         uint8_t ref[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+         ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
+         ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
+         ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
+         ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
+         ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
          int64_t start_counter = 0;
          int64_t end_counter = 0;
          boolean mismatch;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
index ac2a6d05e3..968c7a2d4a 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -142,6 +142,7 @@ add_conv_test(LLVMModuleRef module,
 }
 
 
+ALIGN_STACK
 static boolean
 test_one(unsigned verbose,
          FILE *fp,
@@ -229,8 +230,8 @@ test_one(unsigned verbose,
    for(i = 0; i < n && success; ++i) {
       unsigned src_stride = src_type.length*src_type.width/8;
       unsigned dst_stride = dst_type.length*dst_type.width/8;
-      uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
-      uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+      ALIGN16_ATTRIB uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+      ALIGN16_ATTRIB uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
       double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
       uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
       int64_t start_counter = 0;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index 5dc8297fe9..23ea9ebbe7 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -39,6 +39,7 @@
 #include "util/u_format.h"
 
 #include "lp_bld_format.h"
+#include "lp_test.h"
 
 
 struct pixel_test_case
@@ -89,34 +90,62 @@ struct pixel_test_case test_cases[] =
 };
 
 
-typedef void (*load_ptr_t)(const void *, float *);
+void
+write_tsv_header(FILE *fp)
+{
+   fprintf(fp,
+           "result\t"
+           "format\n");
+
+   fflush(fp);
+}
+
+
+static void
+write_tsv_row(FILE *fp,
+              const struct util_format_description *desc,
+              boolean success)
+{
+   fprintf(fp, "%s\t", success ? "pass" : "fail");
+
+   fprintf(fp, "%s\n", desc->name);
+
+   fflush(fp);
+}
+
+
+typedef void (*load_ptr_t)(const uint32_t packed, float *);
 
 
 static LLVMValueRef
 add_load_rgba_test(LLVMModuleRef module,
-                   enum pipe_format format)
+                   const struct util_format_description *desc)
 {
    LLVMTypeRef args[2];
    LLVMValueRef func;
-   LLVMValueRef ptr;
+   LLVMValueRef packed;
    LLVMValueRef rgba_ptr;
    LLVMBasicBlockRef block;
    LLVMBuilderRef builder;
    LLVMValueRef rgba;
 
-   args[0] = LLVMPointerType(LLVMInt8Type(), 0);
+   args[0] = LLVMInt32Type();
    args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
 
    func = LLVMAddFunction(module, "load", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
    LLVMSetFunctionCallConv(func, LLVMCCallConv);
-   ptr = LLVMGetParam(func, 0);
+   packed = LLVMGetParam(func, 0);
    rgba_ptr = LLVMGetParam(func, 1);
 
    block = LLVMAppendBasicBlock(func, "entry");
    builder = LLVMCreateBuilder();
    LLVMPositionBuilderAtEnd(builder, block);
 
-   rgba = lp_build_load_rgba_aos(builder, format, ptr);
+   if(desc->block.bits < 32)
+      packed = LLVMBuildTrunc(builder, packed, LLVMIntType(desc->block.bits), "");
+
+   rgba = lp_build_unpack_rgba_aos(builder, desc, packed);
+
    LLVMBuildStore(builder, rgba, rgba_ptr);
 
    LLVMBuildRetVoid(builder);
@@ -126,27 +155,28 @@ add_load_rgba_test(LLVMModuleRef module,
 }
 
 
-typedef void (*store_ptr_t)(void *, const float *);
+typedef void (*store_ptr_t)(uint32_t *, const float *);
 
 
 static LLVMValueRef
 add_store_rgba_test(LLVMModuleRef module,
-                    enum pipe_format format)
+                    const struct util_format_description *desc)
 {
    LLVMTypeRef args[2];
    LLVMValueRef func;
-   LLVMValueRef ptr;
+   LLVMValueRef packed_ptr;
    LLVMValueRef rgba_ptr;
    LLVMBasicBlockRef block;
    LLVMBuilderRef builder;
    LLVMValueRef rgba;
+   LLVMValueRef packed;
 
-   args[0] = LLVMPointerType(LLVMInt8Type(), 0);
+   args[0] = LLVMPointerType(LLVMInt32Type(), 0);
    args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
 
    func = LLVMAddFunction(module, "store", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
    LLVMSetFunctionCallConv(func, LLVMCCallConv);
-   ptr = LLVMGetParam(func, 0);
+   packed_ptr = LLVMGetParam(func, 0);
    rgba_ptr = LLVMGetParam(func, 1);
 
    block = LLVMAppendBasicBlock(func, "entry");
@@ -155,7 +185,12 @@ add_store_rgba_test(LLVMModuleRef module,
 
    rgba = LLVMBuildLoad(builder, rgba_ptr, "");
 
-   lp_build_store_rgba_aos(builder, format, ptr, rgba);
+   packed = lp_build_pack_rgba_aos(builder, desc, rgba);
+
+   if(desc->block.bits < 32)
+      packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), "");
+
+   LLVMBuildStore(builder, packed, packed_ptr);
 
    LLVMBuildRetVoid(builder);
 
@@ -164,8 +199,9 @@ add_store_rgba_test(LLVMModuleRef module,
 }
 
 
+ALIGN_STACK
 static boolean
-test_format(const struct pixel_test_case *test)
+test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test)
 {
    LLVMModuleRef module = NULL;
    LLVMValueRef load = NULL;
@@ -187,8 +223,8 @@ test_format(const struct pixel_test_case *test)
 
    module = LLVMModuleCreateWithName("test");
 
-   load = add_load_rgba_test(module, test->format);
-   store = add_store_rgba_test(module, test->format);
+   load = add_load_rgba_test(module, desc);
+   store = add_store_rgba_test(module, desc);
 
    if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
       LLVMDumpModule(module);
@@ -224,7 +260,7 @@ test_format(const struct pixel_test_case *test)
    memset(unpacked, 0, sizeof unpacked);
    packed = 0;
 
-   load_ptr(&test->packed, unpacked);
+   load_ptr(test->packed, unpacked);
    store_ptr(&packed, unpacked);
 
    success = TRUE;
@@ -250,25 +286,29 @@ test_format(const struct pixel_test_case *test)
    if(pass)
       LLVMDisposePassManager(pass);
 
+   if(fp)
+      write_tsv_row(fp, desc, success);
+
    return success;
 }
 
 
-int main(int argc, char **argv)
+boolean
+test_all(unsigned verbose, FILE *fp)
 {
    unsigned i;
-   int ret;
+   bool success = TRUE;
 
-#ifdef LLVM_NATIVE_ARCH
-   LLVMLinkInJIT();
-   LLVMInitializeNativeTarget();
-#endif
+   for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i)
+      if(!test_format(verbose, fp, &test_cases[i]))
+        success = FALSE;
 
-   util_cpu_detect();
+   return success;
+}
 
-   for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i)
-      if(!test_format(&test_cases[i]))
-        ret = 1;
 
-   return ret;
+boolean
+test_some(unsigned verbose, FILE *fp, unsigned long n)
+{
+   return test_all(verbose, fp);
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c
index d4767ff52b..314544aa9a 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_main.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_main.c
@@ -37,9 +37,22 @@
 #include "util/u_cpu_detect.h"
 
 #include "lp_bld_const.h"
+#include "lp_bld_misc.h"
 #include "lp_test.h"
 
 
+#ifdef PIPE_CC_MSVC
+static INLINE double
+round(double x)
+{
+   if (x >= 0.0)
+      return floor(x + 0.5);
+   else
+      return ceil(x - 0.5);
+}
+#endif
+
+
 void
 dump_type(FILE *fp,
           struct lp_type type)
@@ -367,10 +380,8 @@ int main(int argc, char **argv)
          n = atoi(argv[i]);
    }
 
-#ifdef LLVM_NATIVE_ARCH
    LLVMLinkInJIT();
    LLVMInitializeNativeTarget();
-#endif
 
    util_cpu_detect();
 
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c
index 773e848242..c7c4143bc6 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c
@@ -36,6 +36,7 @@
 #include "util/u_memory.h"
 #include "util/u_tile.h"
 #include "util/u_format.h"
+#include "util/u_math.h"
 #include "lp_context.h"
 #include "lp_surface.h"
 #include "lp_texture.h"
@@ -270,8 +271,8 @@ lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc,
                                      addr.bits.level, 
                                      addr.bits.z, 
                                      PIPE_TRANSFER_READ, 0, 0,
-                                     tc->texture->width[addr.bits.level],
-                                     tc->texture->height[addr.bits.level]);
+                                     u_minify(tc->texture->width0, addr.bits.level),
+                                     u_minify(tc->texture->height0, addr.bits.level));
 
          tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
index a1365a045f..0d01c07fb5 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
@@ -544,7 +544,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler,
       float dsdy = s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT];
       dsdx = fabsf(dsdx);
       dsdy = fabsf(dsdy);
-      rho = MAX2(dsdx, dsdy) * texture->width[0];
+      rho = MAX2(dsdx, dsdy) * texture->width0;
    }
    if (t) {
       float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT];
@@ -552,7 +552,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler,
       float max;
       dtdx = fabsf(dtdx);
       dtdy = fabsf(dtdy);
-      max = MAX2(dtdx, dtdy) * texture->height[0];
+      max = MAX2(dtdx, dtdy) * texture->height0;
       rho = MAX2(rho, max);
    }
    if (p) {
@@ -561,7 +561,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler,
       float max;
       dpdx = fabsf(dpdx);
       dpdy = fabsf(dpdy);
-      max = MAX2(dpdx, dpdy) * texture->depth[0];
+      max = MAX2(dpdx, dpdy) * texture->depth0;
       rho = MAX2(rho, max);
    }
 
@@ -726,9 +726,9 @@ get_texel(const struct tgsi_sampler *tgsi_sampler,
    const struct pipe_texture *texture = samp->texture;
    const struct pipe_sampler_state *sampler = samp->sampler;
 
-   if (x < 0 || x >= (int) texture->width[level] ||
-       y < 0 || y >= (int) texture->height[level] ||
-       z < 0 || z >= (int) texture->depth[level]) {
+   if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
+       y < 0 || y >= (int) u_minify(texture->height0, level) ||
+       z < 0 || z >= (int) u_minify(texture->depth0, level)) {
       rgba[0][j] = sampler->border_color[0];
       rgba[1][j] = sampler->border_color[1];
       rgba[2][j] = sampler->border_color[2];
@@ -1093,8 +1093,8 @@ lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler,
 
    assert(sampler->normalized_coords);
 
-   width = texture->width[level0];
-   height = texture->height[level0];
+   width = u_minify(texture->width0, level0);
+   height = u_minify(texture->height0, level0);
 
    assert(width > 0);
 
@@ -1250,9 +1250,9 @@ lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler,
 
    assert(sampler->normalized_coords);
 
-   width = texture->width[level0];
-   height = texture->height[level0];
-   depth = texture->depth[level0];
+   width = u_minify(texture->width0, level0);
+   height = u_minify(texture->height0, level0);
+   depth = u_minify(texture->depth0, level0);
 
    assert(width > 0);
    assert(height > 0);
@@ -1394,8 +1394,8 @@ lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler,
    /* texture RECTS cannot be mipmapped */
    assert(level0 == level1);
 
-   width = texture->width[level0];
-   height = texture->height[level0];
+   width = u_minify(texture->width0, level0);
+   height = u_minify(texture->height0, level0);
 
    assert(width > 0);
 
@@ -1513,8 +1513,8 @@ lp_get_samples(struct tgsi_sampler *tgsi_sampler,
 
    /* Do this elsewhere: 
     */
-   samp->xpot = util_unsigned_logbase2( samp->texture->width[0] );
-   samp->ypot = util_unsigned_logbase2( samp->texture->height[0] );
+   samp->xpot = util_unsigned_logbase2( samp->texture->width0 );
+   samp->ypot = util_unsigned_logbase2( samp->texture->height0 );
 
    /* Try to hook in a faster sampler.  Ultimately we'll have to
     * code-generate these.  Luckily most of this looks like it is
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index a00f2495df..65d62fd072 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -57,9 +57,9 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
 {
    struct pipe_texture *pt = &lpt->base;
    unsigned level;
-   unsigned width = pt->width[0];
-   unsigned height = pt->height[0];
-   unsigned depth = pt->depth[0];
+   unsigned width = pt->width0;
+   unsigned height = pt->height0;
+   unsigned depth = pt->depth0;
 
    unsigned buffer_size = 0;
 
@@ -68,9 +68,6 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
    for (level = 0; level <= pt->last_level; level++) {
       unsigned nblocksx, nblocksy;
 
-      pt->width[level] = width;
-      pt->height[level] = height;
-      pt->depth[level] = depth;
       pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width);  
       pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height);
 
@@ -87,9 +84,9 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
                       ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) *
                       lpt->stride[level]);
 
-      width  = minify(width);
-      height = minify(height);
-      depth = minify(depth);
+      width  = u_minify(width, 1);
+      height = u_minify(height, 1);
+      depth = u_minify(depth, 1);
    }
 
    lpt->data = align_malloc(buffer_size, 16);
@@ -104,13 +101,13 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen,
    struct llvmpipe_winsys *winsys = screen->winsys;
 
    pf_get_block(lpt->base.format, &lpt->base.block);
-   lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width[0]);  
-   lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height[0]);  
+   lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width0);  
+   lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height0);  
 
    lpt->dt = winsys->displaytarget_create(winsys,
                                           lpt->base.format,
-                                          lpt->base.width[0],
-                                          lpt->base.height[0],
+                                          lpt->base.width0,
+                                          lpt->base.height0,
                                           16,
                                           &lpt->stride[0] );
 
@@ -172,7 +169,7 @@ llvmpipe_texture_blanket(struct pipe_screen * screen,
    /* Only supports one type */
    if (base->target != PIPE_TEXTURE_2D ||
        base->last_level != 0 ||
-       base->depth[0] != 1) {
+       base->depth0 != 1) {
       return NULL;
    }
 
@@ -183,8 +180,8 @@ llvmpipe_texture_blanket(struct pipe_screen * screen,
    lpt->base = *base;
    pipe_reference_init(&lpt->base.reference, 1);
    lpt->base.screen = screen;
-   lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width[0]);  
-   lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height[0]);  
+   lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width0);  
+   lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height0);  
    lpt->stride[0] = stride[0];
 
    pipe_buffer_reference(&lpt->buffer, buffer);
@@ -229,8 +226,8 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen,
       pipe_reference_init(&ps->reference, 1);
       pipe_texture_reference(&ps->texture, pt);
       ps->format = pt->format;
-      ps->width = pt->width[level];
-      ps->height = pt->height[level];
+      ps->width = u_minify(pt->width0, level);
+      ps->height = u_minify(pt->height0, level);
       ps->offset = lpt->level_offset[level];
       ps->usage = usage;