Merge branch 'master' into gallium-sampler-view

Conflicts: src/gallium/auxiliary/Makefile src/gallium/auxiliary/SConscript src/gallium/auxiliary/tgsi/tgsi_exec.c src/gallium/auxiliary/util/u_blitter.c src/gallium/drivers/i915/i915_context.h src/gallium/drivers/i965/brw_context.h src/gallium/drivers/llvmpipe/lp_context.h src/gallium/drivers/nv50/nv50_context.h src/gallium/drivers/nv50/nv50_state_validate.c src/gallium/drivers/nv50/nv50_tex.c src/gallium/drivers/r300/r300_blit.c src/gallium/drivers/r300/r300_context.h src/gallium/drivers/r300/r300_emit.c src/gallium/drivers/r300/r300_state.c src/gallium/drivers/softpipe/sp_context.h src/gallium/drivers/svga/svga_context.h src/gallium/drivers/svga/svga_pipe_sampler.c
author: Michal Krol <michal@vmware.com> 2010-03-10 15:49:30 +0100
committer: Michal Krol <michal@vmware.com> 2010-03-10 15:49:30 +0100
commit: 3ce4375912c8ea488460e593e07c5bb15b92dca9 (patch)
tree: 1011fa439bd829fd46a44fd99478135848800e73 /src/gallium/auxiliary
parent: f59f28093ea827bd234d8e1a36bdd56a9fce5f09 (diff)
parent: 9b348d0ed125a22be3f318ac60cef6f201edfdab (diff)
70 files changed, 4117 insertions, 1156 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 75dfd965af..e2796d5bd3 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -54,6 +54,7 @@ C_SOURCES = \
 	os/os_stream_str.c \
 	os/os_stream_null.c \
 	os/os_time.c \
+	pipebuffer/pb_buffer_fenced.c \
 	pipebuffer/pb_buffer_malloc.c \
 	pipebuffer/pb_bufmgr_alt.c \
 	pipebuffer/pb_bufmgr_cache.c \
@@ -104,9 +105,9 @@ C_SOURCES = \
 	util/u_cpu_detect.c \
 	util/u_dl.c \
 	util/u_draw_quad.c \
-	util/u_format.c \
 	util/u_format_access.c \
 	util/u_format_table.c \
+	util/u_format_tests.c \
 	util/u_gen_mipmap.c \
 	util/u_handle_table.c \
 	util/u_hash_table.c \
@@ -132,7 +133,8 @@ C_SOURCES = \
 	#vl/vl_mpeg12_mc_renderer.c \
 	#vl/vl_compositor.c \
 	#vl/vl_csc.c \
-	#vl/vl_shader_build.c
+	#vl/vl_shader_build.c \
+	target-helpers/wrap_screen.c
 
 GALLIVM_SOURCES = \
         gallivm/lp_bld_alpha.c \
@@ -160,7 +162,14 @@ GALLIVM_SOURCES = \
         gallivm/lp_bld_type.c
 
 GALLIVM_CPP_SOURCES = \
-        gallivm/lp_bld_misc.cpp
+        gallivm/lp_bld_init.cpp
+
+GENERATED_SOURCES = \
+	indices/u_indices_gen.c \
+	indices/u_unfilled_gen.c \
+	util/u_format_access.c \
+	util/u_format_pack.h \
+	util/u_format_table.c
 
 
 ifeq ($(MESA_LLVM),1)
@@ -186,5 +195,9 @@ indices/u_unfilled_gen.c: indices/u_unfilled_gen.py
 util/u_format_table.c: util/u_format_table.py util/u_format_parse.py util/u_format.csv
 	python util/u_format_table.py util/u_format.csv > $@
 
+util/u_format_pack.h: util/u_format_pack.py util/u_format_parse.py util/u_format.csv
+	python util/u_format_pack.py util/u_format.csv > $@
+
 util/u_format_access.c: util/u_format_access.py util/u_format_parse.py util/u_format.csv
 	python util/u_format_access.py util/u_format.csv > $@
+
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index acb03e6282..65e1dc8a58 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -7,6 +7,8 @@ env.Append(CPPPATH = [
     'util',
 ])
 
+env.Tool('udis86')
+
 env.CodeGenerate(
     target = 'indices/u_indices_gen.c', 
     script = 'indices/u_indices_gen.py', 
@@ -29,6 +31,13 @@ env.CodeGenerate(
 )
 
 env.CodeGenerate(
+    target = File('util/u_format_pack.h').srcnode(),
+    script = 'util/u_format_pack.py',
+    source = ['util/u_format.csv'],
+    command = 'python $SCRIPT $SOURCE > $TARGET'
+)
+
+env.CodeGenerate(
     target = 'util/u_format_access.c',
     script = 'util/u_format_access.py',
     source = ['util/u_format.csv'],
@@ -140,9 +149,9 @@ source = [
     'util/u_dump_state.c',
     'util/u_dl.c',
     'util/u_draw_quad.c',
-    'util/u_format.c',
     'util/u_format_access.c',
     'util/u_format_table.c',
+    'util/u_format_tests.c',
     'util/u_gen_mipmap.c',
     'util/u_handle_table.c',
     'util/u_hash.c',
@@ -168,6 +177,7 @@ source = [
     #'vl/vl_compositor.c',
     #'vl/vl_csc.c',
     #'vl/vl_shader_build.c',
+    'target-helpers/wrap_screen.c',
 ]
 
 if drawllvm:
@@ -188,7 +198,7 @@ if drawllvm:
     'gallivm/lp_bld_interp.c',
     'gallivm/lp_bld_intr.c',
     'gallivm/lp_bld_logic.c',
-    'gallivm/lp_bld_misc.cpp',
+    'gallivm/lp_bld_init.cpp',
     'gallivm/lp_bld_pack.c',
     'gallivm/lp_bld_sample.c',
     'gallivm/lp_bld_sample_soa.c',
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c
index a6a07e72c2..900c64df4b 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.c
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.c
@@ -43,6 +43,7 @@ struct cso_cache {
    struct cso_hash *vs_hash;
    struct cso_hash *rasterizer_hash;
    struct cso_hash *sampler_hash;
+   struct cso_hash *velements_hash;
    int    max_size;
 
    cso_sanitize_callback sanitize_cb;
@@ -108,6 +109,9 @@ static struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_
    case CSO_VERTEX_SHADER:
       hash = sc->vs_hash;
       break;
+   case CSO_VELEMENTS:
+      hash = sc->velements_hash;
+      break;
    }
 
    return hash;
@@ -161,6 +165,13 @@ static void delete_vs_state(void *state, void *data)
    FREE(state);
 }
 
+static void delete_velements(void *state, void *data)
+{
+   struct cso_velements *cso = (struct cso_velements *)state;
+   if (cso->delete_state)
+      cso->delete_state(cso->context, cso->data);
+   FREE(state);
+}
 
 static INLINE void delete_cso(void *state, enum cso_cache_type type)
 {
@@ -183,6 +194,9 @@ static INLINE void delete_cso(void *state, enum cso_cache_type type)
    case CSO_VERTEX_SHADER:
       delete_vs_state(state, 0);
       break;
+   case CSO_VELEMENTS:
+      delete_velements(state, 0);
+      break;
    default:
       assert(0);
       FREE(state);
@@ -294,6 +308,7 @@ struct cso_cache *cso_cache_create(void)
    sc->rasterizer_hash    = cso_hash_create();
    sc->fs_hash            = cso_hash_create();
    sc->vs_hash            = cso_hash_create();
+   sc->velements_hash     = cso_hash_create();
    sc->sanitize_cb        = sanitize_cb;
    sc->sanitize_data      = 0;
 
@@ -325,6 +340,9 @@ void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type,
    case CSO_VERTEX_SHADER:
       hash = sc->vs_hash;
       break;
+   case CSO_VELEMENTS:
+      hash = sc->velements_hash;
+      break;
    }
 
    iter = cso_hash_first_node(hash);
@@ -351,6 +369,7 @@ void cso_cache_delete(struct cso_cache *sc)
    cso_for_each_state(sc, CSO_VERTEX_SHADER, delete_vs_state, 0);
    cso_for_each_state(sc, CSO_RASTERIZER, delete_rasterizer_state, 0);
    cso_for_each_state(sc, CSO_SAMPLER, delete_sampler_state, 0);
+   cso_for_each_state(sc, CSO_VELEMENTS, delete_velements, 0);
 
    cso_hash_delete(sc->blend_hash);
    cso_hash_delete(sc->sampler_hash);
@@ -358,6 +377,7 @@ void cso_cache_delete(struct cso_cache *sc)
    cso_hash_delete(sc->rasterizer_hash);
    cso_hash_delete(sc->fs_hash);
    cso_hash_delete(sc->vs_hash);
+   cso_hash_delete(sc->velements_hash);
    FREE(sc);
 }
 
@@ -372,6 +392,7 @@ void cso_set_maximum_cache_size(struct cso_cache *sc, int number)
    sanitize_hash(sc, sc->vs_hash, CSO_VERTEX_SHADER, sc->max_size);
    sanitize_hash(sc, sc->rasterizer_hash, CSO_RASTERIZER, sc->max_size);
    sanitize_hash(sc, sc->sampler_hash, CSO_SAMPLER, sc->max_size);
+   sanitize_hash(sc, sc->velements_hash, CSO_VELEMENTS, sc->max_size);
 }
 
 int cso_maximum_cache_size(const struct cso_cache *sc)
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h
index eea60b940b..fb09b83c62 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.h
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.h
@@ -53,6 +53,7 @@
   * - rasterizer (old setup)
   * - sampler
   * - vertex shader
+  * - vertex elements
   *
   * Things that are not constant state objects include:
   * - blend_color
@@ -90,7 +91,8 @@ enum cso_cache_type {
    CSO_DEPTH_STENCIL_ALPHA,
    CSO_RASTERIZER,
    CSO_FRAGMENT_SHADER,
-   CSO_VERTEX_SHADER
+   CSO_VERTEX_SHADER,
+   CSO_VELEMENTS
 };
 
 typedef void (*cso_state_callback)(void *ctx, void *obj);
@@ -144,6 +146,18 @@ struct cso_sampler {
    struct pipe_context *context;
 };
 
+struct cso_velems_state {
+   unsigned count;
+   struct pipe_vertex_element velems[PIPE_MAX_ATTRIBS];
+};
+
+struct cso_velements {
+   struct cso_velems_state state;
+   void *data;
+   cso_state_callback delete_state;
+   struct pipe_context *context;
+};
+
 unsigned cso_construct_key(void *item, int item_size);
 
 struct cso_cache *cso_cache_create(void);
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 6ae765c899..8568a0052d 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -94,6 +94,10 @@ struct cso_context {
    void *rasterizer, *rasterizer_saved;
    void *fragment_shader, *fragment_shader_saved, *geometry_shader;
    void *vertex_shader, *vertex_shader_saved, *geometry_shader_saved;
+   void *velements, *velements_saved;
+
+   struct pipe_clip_state clip;
+   struct pipe_clip_state clip_saved;
 
    struct pipe_framebuffer_state fb, fb_saved;
    struct pipe_viewport_state vp, vp_saved;
@@ -176,6 +180,20 @@ static boolean delete_vs_state(struct cso_context *ctx, void *state)
    return FALSE;
 }
 
+static boolean delete_vertex_elements(struct cso_context *ctx,
+                                      void *state)
+{
+   struct cso_velements *cso = (struct cso_velements *)state;
+
+   if (ctx->velements == cso->data)
+      return FALSE;
+
+   if (cso->delete_state)
+      cso->delete_state(cso->context, cso->data);
+   FREE(state);
+   return TRUE;
+}
+
 
 static INLINE boolean delete_cso(struct cso_context *ctx,
                                  void *state, enum cso_cache_type type)
@@ -199,6 +217,9 @@ static INLINE boolean delete_cso(struct cso_context *ctx,
    case CSO_VERTEX_SHADER:
       return delete_vs_state(ctx, state);
       break;
+   case CSO_VELEMENTS:
+      return delete_vertex_elements(ctx, state);
+      break;
    default:
       assert(0);
       FREE(state);
@@ -273,6 +294,7 @@ void cso_release_all( struct cso_context *ctx )
       ctx->pipe->bind_depth_stencil_alpha_state( ctx->pipe, NULL );
       ctx->pipe->bind_fs_state( ctx->pipe, NULL );
       ctx->pipe->bind_vs_state( ctx->pipe, NULL );
+      ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL );
    }
 
    for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
@@ -1184,3 +1206,116 @@ void cso_restore_geometry_shader(struct cso_context *ctx)
    }
    ctx->geometry_shader_saved = NULL;
 }
+
+/* clip state */
+
+static INLINE void
+clip_state_cpy(struct pipe_clip_state *dst,
+               const struct pipe_clip_state *src)
+{
+   dst->nr = src->nr;
+   if (src->nr) {
+      memcpy(dst->ucp, src->ucp, src->nr * sizeof(src->ucp[0]));
+   }
+}
+
+static INLINE int
+clip_state_cmp(const struct pipe_clip_state *a,
+               const struct pipe_clip_state *b)
+{
+   if (a->nr != b->nr) {
+      return 1;
+   }
+   if (a->nr) {
+      return memcmp(a->ucp, b->ucp, a->nr * sizeof(a->ucp[0]));
+   }
+   return 0;
+}
+
+void
+cso_set_clip(struct cso_context *ctx,
+             const struct pipe_clip_state *clip)
+{
+   if (clip_state_cmp(&ctx->clip, clip)) {
+      clip_state_cpy(&ctx->clip, clip);
+      ctx->pipe->set_clip_state(ctx->pipe, clip);
+   }
+}
+
+void
+cso_save_clip(struct cso_context *ctx)
+{
+   clip_state_cpy(&ctx->clip_saved, &ctx->clip);
+}
+
+void
+cso_restore_clip(struct cso_context *ctx)
+{
+   if (clip_state_cmp(&ctx->clip, &ctx->clip_saved)) {
+      clip_state_cpy(&ctx->clip, &ctx->clip_saved);
+      ctx->pipe->set_clip_state(ctx->pipe, &ctx->clip_saved);
+   }
+}
+
+enum pipe_error cso_set_vertex_elements(struct cso_context *ctx,
+                                        unsigned count,
+                                        const struct pipe_vertex_element *states)
+{
+   unsigned key_size, hash_key;
+   struct cso_hash_iter iter;
+   void *handle;
+   struct cso_velems_state velems_state;
+
+   /* need to include the count into the stored state data too.
+      Otherwise first few count pipe_vertex_elements could be identical even if count
+      is different, and there's no guarantee the hash would be different in that
+      case neither */
+   key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned);
+   velems_state.count = count;
+   memcpy(velems_state.velems, states, sizeof(struct pipe_vertex_element) * count);
+   hash_key = cso_construct_key((void*)&velems_state, key_size);
+   iter = cso_find_state_template(ctx->cache, hash_key, CSO_VELEMENTS, (void*)&velems_state, key_size);
+
+   if (cso_hash_iter_is_null(iter)) {
+      struct cso_velements *cso = MALLOC(sizeof(struct cso_velements));
+      if (!cso)
+         return PIPE_ERROR_OUT_OF_MEMORY;
+
+      memcpy(&cso->state, &velems_state, key_size);
+      cso->data = ctx->pipe->create_vertex_elements_state(ctx->pipe, count, &cso->state.velems[0]);
+      cso->delete_state = (cso_state_callback)ctx->pipe->delete_vertex_elements_state;
+      cso->context = ctx->pipe;
+
+      iter = cso_insert_state(ctx->cache, hash_key, CSO_VELEMENTS, cso);
+      if (cso_hash_iter_is_null(iter)) {
+         FREE(cso);
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+
+      handle = cso->data;
+   }
+   else {
+      handle = ((struct cso_velements *)cso_hash_iter_data(iter))->data;
+   }
+
+   if (ctx->velements != handle) {
+      ctx->velements = handle;
+      ctx->pipe->bind_vertex_elements_state(ctx->pipe, handle);
+   }
+   return PIPE_OK;
+}
+
+void cso_save_vertex_elements(struct cso_context *ctx)
+{
+   assert(!ctx->velements_saved);
+   ctx->velements_saved = ctx->velements;
+}
+
+void cso_restore_vertex_elements(struct cso_context *ctx)
+{
+   if (ctx->velements != ctx->velements_saved) {
+      ctx->velements = ctx->velements_saved;
+      ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->velements_saved);
+   }
+   ctx->velements_saved = NULL;
+}
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h
index 707b3c2cee..9c16abd28d 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -122,6 +122,12 @@ void
 cso_restore_vertex_sampler_textures(struct cso_context *cso);
 
 
+enum pipe_error cso_set_vertex_elements(struct cso_context *ctx,
+                                        unsigned count,
+                                        const struct pipe_vertex_element *states);
+void cso_save_vertex_elements(struct cso_context *ctx);
+void cso_restore_vertex_elements(struct cso_context *ctx);
+
 
 /* These aren't really sensible -- most of the time the api provides
  * object semantics for shaders anyway, and the cases where it doesn't
@@ -157,7 +163,6 @@ void cso_save_geometry_shader(struct cso_context *cso);
 void cso_restore_geometry_shader(struct cso_context *cso);
 
 
-
 enum pipe_error cso_set_framebuffer(struct cso_context *cso,
                                     const struct pipe_framebuffer_state *fb);
 void cso_save_framebuffer(struct cso_context *cso);
@@ -180,6 +185,19 @@ void cso_save_stencil_ref(struct cso_context *cso);
 void cso_restore_stencil_ref(struct cso_context *cso);
 
 
+/* clip state */
+
+void
+cso_set_clip(struct cso_context *cso,
+             const struct pipe_clip_state *clip);
+
+void
+cso_save_clip(struct cso_context *cso);
+
+void
+cso_restore_clip(struct cso_context *cso);
+
+
 #ifdef	__cplusplus
 }
 #endif
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index d5ddc4a6a9..bb0988543f 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -128,9 +128,7 @@ void draw_set_rasterizer_state( struct draw_context *draw,
    draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
 
    draw->rasterizer = raster;
-   draw->bypass_clipping =
-      ((draw->rasterizer && draw->rasterizer->bypass_vs_clip_and_viewport) ||
-       draw->driver.bypass_clipping);
+   draw->bypass_clipping = draw->driver.bypass_clipping;
 }
 
 
@@ -140,9 +138,7 @@ void draw_set_driver_clipping( struct draw_context *draw,
    draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
 
    draw->driver.bypass_clipping = bypass_clipping;
-   draw->bypass_clipping =
-      ((draw->rasterizer && draw->rasterizer->bypass_vs_clip_and_viewport) ||
-       draw->driver.bypass_clipping);
+   draw->bypass_clipping = draw->driver.bypass_clipping;
 }
 
 
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index 97f3480879..9f9fb4312c 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -864,7 +864,7 @@ draw_install_aapoint_stage(struct draw_context *draw,
     */
    aapoint = draw_aapoint_stage( draw );
    if (aapoint == NULL)
-      goto fail;
+      return FALSE;
 
    aapoint->pipe = pipe;
 
@@ -881,10 +881,4 @@ draw_install_aapoint_stage(struct draw_context *draw,
    draw->pipeline.aapoint = &aapoint->stage;
 
    return TRUE;
-
- fail:
-   if (aapoint)
-      aapoint->stage.destroy( &aapoint->stage );
-
-   return FALSE;
 }
diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c
index e829492423..8e321946ce 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_offset.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c
@@ -161,7 +161,7 @@ struct draw_stage *draw_offset_stage( struct draw_context *draw )
 {
    struct offset_stage *offset = CALLOC_STRUCT(offset_stage);
    if (offset == NULL)
-      goto fail;
+      return NULL;
 
    draw_alloc_temp_verts( &offset->stage, 3 );
 
@@ -176,10 +176,4 @@ struct draw_stage *draw_offset_stage( struct draw_context *draw )
    offset->stage.destroy = offset_destroy;
 
    return &offset->stage;
-
- fail:
-   if (offset)
-      offset->stage.destroy( &offset->stage );
-
-   return NULL;
 }
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index d40c035240..2709957961 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -262,7 +262,7 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
 	 src_offset = 0;
 	 break;
       case EMIT_4UB:
-	 output_format = PIPE_FORMAT_B8G8R8A8_UNORM;
+	 output_format = PIPE_FORMAT_A8R8G8B8_UNORM;
 	 emit_sz = 4 * sizeof(ubyte);
          break;
       default:
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 341353f628..a8cdc57ad9 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -87,9 +87,7 @@ draw_pt_arrays(struct draw_context *draw,
          opt |= PT_CLIPTEST;
       }
       
-      if (!draw->rasterizer->bypass_vs_clip_and_viewport) {
-         opt |= PT_SHADE;
-      }
+      opt |= PT_SHADE;
    }
       
    if (opt == 0) 
@@ -309,9 +307,8 @@ draw_arrays_instanced(struct draw_context *draw,
       tgsi_dump(draw->vs.vertex_shader->state.tokens, 0);
       debug_printf("Elements:\n");
       for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
-         debug_printf("  format=%s comps=%u\n",
-                      util_format_name(draw->pt.vertex_element[i].src_format),
-                      draw->pt.vertex_element[i].nr_components);
+         debug_printf("  format=%s\n",
+                      util_format_name(draw->pt.vertex_element[i].src_format));
       }
       debug_printf("Buffers:\n");
       for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
diff --git a/src/gallium/auxiliary/draw/draw_pt_decompose.h b/src/gallium/auxiliary/draw/draw_pt_decompose.h
index 4ca5b52020..3c44f7c11e 100644
--- a/src/gallium/auxiliary/draw/draw_pt_decompose.h
+++ b/src/gallium/auxiliary/draw/draw_pt_decompose.h
@@ -105,40 +105,20 @@ static void FUNC( ARGS,
 
 
    case PIPE_PRIM_QUADS:
-      if (flatfirst) {
-         for (i = 0; i+3 < count; i += 4) {
-            QUAD( (i + 1),
-                  (i + 2),
-                  (i + 3),
-                  (i + 0) );
-         }
-      }
-      else {
-         for (i = 0; i+3 < count; i += 4) {
-            QUAD( (i + 0),
-                  (i + 1),
-                  (i + 2),
-                  (i + 3));
-         }
+      for (i = 0; i+3 < count; i += 4) {
+         QUAD( (i + 0),
+               (i + 1),
+               (i + 2),
+               (i + 3));
       }
       break;
 
    case PIPE_PRIM_QUAD_STRIP:
-      if (flatfirst) {
-         for (i = 0; i+3 < count; i += 2) {
-            QUAD( (i + 1),
-                  (i + 3),
-                  (i + 2),
-                  (i + 0) );
-         }
-      }
-      else {
-         for (i = 0; i+3 < count; i += 2) {
-            QUAD( (i + 2),
-                  (i + 0),
-                  (i + 1),
-                  (i + 3));
-         }
+      for (i = 0; i+3 < count; i += 2) {
+         QUAD( (i + 2),
+               (i + 0),
+               (i + 1),
+               (i + 3));
       }
       break;
 
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index 4fb53276bb..ae357b5122 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -112,7 +112,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
 	 src_offset = 0;
 	 break;
       case EMIT_4UB:
-	 output_format = PIPE_FORMAT_B8G8R8A8_UNORM;
+	 output_format = PIPE_FORMAT_A8R8G8B8_UNORM;
 	 emit_sz = 4 * sizeof(ubyte);
          break;
       default:
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index c5dfbcfa3c..1aecb51077 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -100,8 +100,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
    fse->key.nr_elements = MAX2(fse->key.nr_outputs,     /* outputs - translate to hw format */
                                fse->key.nr_inputs);     /* inputs - fetch from api format */
 
-   fse->key.viewport = (!draw->rasterizer->bypass_vs_clip_and_viewport &&
-                        !draw->identity_viewport);
+   fse->key.viewport = !draw->identity_viewport;
    fse->key.clip = !draw->bypass_clipping;
    fse->key.const_vbuffers = 0;
 
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 56b69354b2..da5106463a 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -96,8 +96,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
     */
    draw_pt_post_vs_prepare( fpme->post_vs,
 			    (boolean)draw->bypass_clipping,
-			    (boolean)(draw->identity_viewport ||
-			    draw->rasterizer->bypass_vs_clip_and_viewport),
+			    (boolean)draw->identity_viewport,
 			    (boolean)draw->rasterizer->gl_rasterization_rules,
 			    (draw->vs.edgeflag_output ? true : false) );    
 
@@ -154,9 +153,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
 		      (char *)pipeline_verts );
 
    /* Run the shader, note that this overwrites the data[] parts of
-    * the pipeline verts.  If there is no shader, eg if
-    * bypass_vs_clip_and_viewport, then the inputs == outputs, and are
-    * already in the correct place.
+    * the pipeline verts.
     */
    if (opt & PT_SHADE)
    {
@@ -239,9 +236,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
                              (char *)pipeline_verts );
 
    /* Run the shader, note that this overwrites the data[] parts of
-    * the pipeline verts.  If there is no shader, ie if
-    * bypass_vs_clip_and_viewport, then the inputs == outputs, and are
-    * already in the correct place.
+    * the pipeline verts.
     */
    if (opt & PT_SHADE)
    {
@@ -319,9 +314,7 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle
                              (char *)pipeline_verts );
 
    /* Run the shader, note that this overwrites the data[] parts of
-    * the pipeline verts.  If there is no shader, ie if
-    * bypass_vs_clip_and_viewport, then the inputs == outputs, and are
-    * already in the correct place.
+    * the pipeline verts.
     */
    if (opt & PT_SHADE)
    {
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
index 62822a3d56..7cba8547f1 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
@@ -118,39 +118,21 @@ static void FUNC( struct draw_pt_front_end *frontend,
 
    case PIPE_PRIM_QUADS:
       for (i = 0; i+3 < count; i += 4) {
-         if (flatfirst) {
-            QUAD( vcache,
-                  get_elt(elts, i + 0),
-                  get_elt(elts, i + 1),
-                  get_elt(elts, i + 2),
-                  get_elt(elts, i + 3) );
-         }
-         else {
-            QUAD( vcache,
-                  get_elt(elts, i + 0),
-                  get_elt(elts, i + 1),
-                  get_elt(elts, i + 2),
-                  get_elt(elts, i + 3) );
-         }
+         QUAD( vcache,
+               get_elt(elts, i + 0),
+               get_elt(elts, i + 1),
+               get_elt(elts, i + 2),
+               get_elt(elts, i + 3) );
       }
       break;
 
    case PIPE_PRIM_QUAD_STRIP:
       for (i = 0; i+3 < count; i += 2) {
-         if (flatfirst) {
-            QUAD( vcache,
-                  get_elt(elts, i + 0),
-                  get_elt(elts, i + 1),
-                  get_elt(elts, i + 3),
-                  get_elt(elts, i + 2) );
-         }
-         else {
-            QUAD( vcache,
-                  get_elt(elts, i + 2),
-                  get_elt(elts, i + 0),
-                  get_elt(elts, i + 1),
-                  get_elt(elts, i + 3) );
-         }
+         QUAD( vcache,
+               get_elt(elts, i + 2),
+               get_elt(elts, i + 0),
+               get_elt(elts, i + 1),
+               get_elt(elts, i + 3) );
       }
       break;
 
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
index a6eb37d128..ece1ddde0c 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
@@ -191,7 +191,7 @@ static boolean load_input( struct aos_compilation *cp,
    case PIPE_FORMAT_R32G32B32A32_FLOAT:
       emit_load_R32G32B32A32(cp, dataXMM, src);
       break;
-   case PIPE_FORMAT_B8G8R8A8_UNORM:
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
       emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
       emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
       break;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 54b31befe6..e2c6788397 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -614,6 +614,22 @@ lp_build_max(struct lp_build_context *bld,
 
 
 /**
+ * Generate clamp(a, min, max)
+ * Do checks for special cases.
+ */
+LLVMValueRef
+lp_build_clamp(struct lp_build_context *bld,
+               LLVMValueRef a,
+               LLVMValueRef min,
+               LLVMValueRef max)
+{
+   a = lp_build_min(bld, a, max);
+   a = lp_build_max(bld, a, min);
+   return a;
+}
+
+
+/**
  * Generate abs(a)
  */
 LLVMValueRef
@@ -628,13 +644,26 @@ lp_build_abs(struct lp_build_context *bld,
 
    if(type.floating) {
       /* Mask out the sign bit */
-      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
-      unsigned long long absMask = ~(1ULL << (type.width - 1));
-      LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask));
-      a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
-      a = LLVMBuildAnd(bld->builder, a, mask, "");
-      a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
-      return a;
+      if (type.length == 1) {
+         LLVMTypeRef int_type = LLVMIntType(type.width);
+         LLVMTypeRef float_type = LLVMFloatType();
+         unsigned long long absMask = ~(1ULL << (type.width - 1));
+         LLVMValueRef mask = LLVMConstInt(int_type, absMask, 0);
+         a = LLVMBuildBitCast(bld->builder, a, int_type, "");
+         a = LLVMBuildAnd(bld->builder, a, mask, "");
+         a = LLVMBuildBitCast(bld->builder, a, float_type, "");
+         return a;
+      }
+      else {
+         /* vector of floats */
+         LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+         unsigned long long absMask = ~(1ULL << (type.width - 1));
+         LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask));
+         a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+         a = LLVMBuildAnd(bld->builder, a, mask, "");
+         a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
+         return a;
+      }
    }
 
    if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) {
@@ -653,6 +682,14 @@ lp_build_abs(struct lp_build_context *bld,
 
 
 LLVMValueRef
+lp_build_negate(struct lp_build_context *bld,
+                LLVMValueRef a)
+{
+   return LLVMBuildNeg(bld->builder, a, "");
+}
+
+
+LLVMValueRef
 lp_build_sgn(struct lp_build_context *bld,
              LLVMValueRef a)
 {
@@ -693,6 +730,68 @@ lp_build_sgn(struct lp_build_context *bld,
 }
 
 
+/**
+ * Set the sign of float vector 'a' according to 'sign'.
+ * If sign==0, return abs(a).
+ * If sign==1, return -abs(a);
+ * Other values for sign produce undefined results.
+ */
+LLVMValueRef
+lp_build_set_sign(struct lp_build_context *bld,
+                  LLVMValueRef a, LLVMValueRef sign)
+{
+   const struct lp_type type = bld->type;
+   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+   LLVMTypeRef vec_type = lp_build_vec_type(type);
+   LLVMValueRef shift = lp_build_int_const_scalar(type, type.width - 1);
+   LLVMValueRef mask = lp_build_int_const_scalar(type,
+                             ~((unsigned long long) 1 << (type.width - 1)));
+   LLVMValueRef val, res;
+
+   assert(type.floating);
+
+   /* val = reinterpret_cast<int>(a) */
+   val = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+   /* val = val & mask */
+   val = LLVMBuildAnd(bld->builder, val, mask, "");
+   /* sign = sign << shift */
+   sign = LLVMBuildShl(bld->builder, sign, shift, "");
+   /* res = val | sign */
+   res = LLVMBuildOr(bld->builder, val, sign, "");
+   /* res = reinterpret_cast<float>(res) */
+   res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+
+   return res;
+}
+
+
+/**
+ * Convert vector of (or scalar) int to vector of (or scalar) float.
+ */
+LLVMValueRef
+lp_build_int_to_float(struct lp_build_context *bld,
+                      LLVMValueRef a)
+{
+   const struct lp_type type = bld->type;
+
+   assert(type.floating);
+   /*assert(lp_check_value(type, a));*/
+
+   if (type.length == 1) {
+      LLVMTypeRef float_type = LLVMFloatType();
+      return LLVMBuildSIToFP(bld->builder, a, float_type, "");
+   }
+   else {
+      LLVMTypeRef vec_type = lp_build_vec_type(type);
+      /*LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);*/
+      LLVMValueRef res;
+      res = LLVMBuildSIToFP(bld->builder, a, vec_type, "");
+      return res;
+   }
+}
+
+
+
 enum lp_build_round_sse41_mode
 {
    LP_BUILD_ROUND_SSE41_NEAREST = 0,
@@ -818,20 +917,39 @@ lp_build_ceil(struct lp_build_context *bld,
 
 
 /**
+ * Return fractional part of 'a' computed as a - floor(f)
+ * Typically used in texture coord arithmetic.
+ */
+LLVMValueRef
+lp_build_fract(struct lp_build_context *bld,
+               LLVMValueRef a)
+{
+   assert(bld->type.floating);
+   return lp_build_sub(bld, a, lp_build_floor(bld, a));
+}
+
+
+/**
  * Convert to integer, through whichever rounding method that's fastest,
- * typically truncating to zero.
+ * typically truncating toward zero.
  */
 LLVMValueRef
 lp_build_itrunc(struct lp_build_context *bld,
                 LLVMValueRef a)
 {
    const struct lp_type type = bld->type;
-   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
 
    assert(type.floating);
-   assert(lp_check_value(type, a));
 
-   return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+   if (type.length == 1) {
+      LLVMTypeRef int_type = LLVMIntType(type.width);
+      return LLVMBuildFPTrunc(bld->builder, a, int_type, "");
+   }
+   else {
+      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+      assert(lp_check_value(type, a));
+      return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+   }
 }
 
 
@@ -844,6 +962,15 @@ lp_build_iround(struct lp_build_context *bld,
    LLVMValueRef res;
 
    assert(type.floating);
+
+   if (type.length == 1) {
+      /* scalar float to int */
+      LLVMTypeRef int_type = LLVMIntType(type.width);
+      /* XXX we want rounding here! */
+      res = LLVMBuildFPToSI(bld->builder, a, int_type, "");
+      return res;
+   }
+
    assert(lp_check_value(type, a));
 
    if(util_cpu_caps.has_sse4_1) {
@@ -1112,6 +1239,7 @@ lp_build_polynomial(struct lp_build_context *bld,
                     unsigned num_coeffs)
 {
    const struct lp_type type = bld->type;
+   LLVMTypeRef float_type = LLVMFloatType();
    LLVMValueRef res = NULL;
    unsigned i;
 
@@ -1121,7 +1249,13 @@ lp_build_polynomial(struct lp_build_context *bld,
                    __FUNCTION__);
 
    for (i = num_coeffs; i--; ) {
-      LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]);
+      LLVMValueRef coeff;
+
+      if (type.length == 1)
+         coeff = LLVMConstReal(float_type, coeffs[i]);
+      else
+         coeff = lp_build_const_scalar(type, coeffs[i]);
+
       if(res)
          res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res));
       else
@@ -1315,11 +1449,87 @@ lp_build_log2_approx(struct lp_build_context *bld,
 }
 
 
+/** scalar version of above function */
+static void
+lp_build_float_log2_approx(struct lp_build_context *bld,
+                           LLVMValueRef x,
+                           LLVMValueRef *p_exp,
+                           LLVMValueRef *p_floor_log2,
+                           LLVMValueRef *p_log2)
+{
+   const struct lp_type type = bld->type;
+   LLVMTypeRef float_type = LLVMFloatType();
+   LLVMTypeRef int_type = LLVMIntType(type.width);
+
+   LLVMValueRef expmask = LLVMConstInt(int_type, 0x7f800000, 0);
+   LLVMValueRef mantmask = LLVMConstInt(int_type, 0x007fffff, 0);
+   LLVMValueRef one = LLVMConstBitCast(bld->one, int_type);
+
+   LLVMValueRef i = NULL;
+   LLVMValueRef exp = NULL;
+   LLVMValueRef mant = NULL;
+   LLVMValueRef logexp = NULL;
+   LLVMValueRef logmant = NULL;
+   LLVMValueRef res = NULL;
+
+   if(p_exp || p_floor_log2 || p_log2) {
+      /* TODO: optimize the constant case */
+      if(LLVMIsConstant(x))
+         debug_printf("%s: inefficient/imprecise constant arithmetic\n",
+                      __FUNCTION__);
+
+      assert(type.floating && type.width == 32);
+
+      i = LLVMBuildBitCast(bld->builder, x, int_type, "");
+
+      /* exp = (float) exponent(x) */
+      exp = LLVMBuildAnd(bld->builder, i, expmask, "");
+   }
+
+   if(p_floor_log2 || p_log2) {
+      LLVMValueRef c23 = LLVMConstInt(int_type, 23, 0);
+      LLVMValueRef c127 = LLVMConstInt(int_type, 127, 0);
+      logexp = LLVMBuildLShr(bld->builder, exp, c23, "");
+      logexp = LLVMBuildSub(bld->builder, logexp, c127, "");
+      logexp = LLVMBuildSIToFP(bld->builder, logexp, float_type, "");
+   }
+
+   if(p_log2) {
+      /* mant = (float) mantissa(x) */
+      mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
+      mant = LLVMBuildOr(bld->builder, mant, one, "");
+      mant = LLVMBuildBitCast(bld->builder, mant, float_type, "");
+
+      logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
+                                    Elements(lp_build_log2_polynomial));
+
+      /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
+      logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), "");
+
+      res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
+   }
+
+   if(p_exp)
+      *p_exp = exp;
+
+   if(p_floor_log2)
+      *p_floor_log2 = logexp;
+
+   if(p_log2)
+      *p_log2 = res;
+}
+
+
 LLVMValueRef
 lp_build_log2(struct lp_build_context *bld,
               LLVMValueRef x)
 {
    LLVMValueRef res;
-   lp_build_log2_approx(bld, x, NULL, NULL, &res);
+   if (bld->type.length == 1) {
+      lp_build_float_log2_approx(bld, x, NULL, NULL, &res);
+   }
+   else {
+      lp_build_log2_approx(bld, x, NULL, NULL, &res);
+   }
    return res;
 }
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
index 62be4b9aee..55385e3a66 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -107,14 +107,32 @@ lp_build_max(struct lp_build_context *bld,
              LLVMValueRef b);
 
 LLVMValueRef
+lp_build_clamp(struct lp_build_context *bld,
+               LLVMValueRef a,
+               LLVMValueRef min,
+               LLVMValueRef max);
+
+LLVMValueRef
 lp_build_abs(struct lp_build_context *bld,
              LLVMValueRef a);
 
 LLVMValueRef
+lp_build_negate(struct lp_build_context *bld,
+                LLVMValueRef a);
+
+LLVMValueRef
 lp_build_sgn(struct lp_build_context *bld,
              LLVMValueRef a);
 
 LLVMValueRef
+lp_build_set_sign(struct lp_build_context *bld,
+                  LLVMValueRef a, LLVMValueRef sign);
+
+LLVMValueRef
+lp_build_int_to_float(struct lp_build_context *bld,
+                      LLVMValueRef a);
+
+LLVMValueRef
 lp_build_round(struct lp_build_context *bld,
                LLVMValueRef a);
 
@@ -131,6 +149,10 @@ lp_build_trunc(struct lp_build_context *bld,
                LLVMValueRef a);
 
 LLVMValueRef
+lp_build_fract(struct lp_build_context *bld,
+               LLVMValueRef a);
+
+LLVMValueRef
 lp_build_ifloor(struct lp_build_context *bld,
                 LLVMValueRef a);
 LLVMValueRef
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c
index c8eaa8c394..53447757e8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c
@@ -264,10 +264,16 @@ lp_build_one(struct lp_type type)
    for(i = 1; i < type.length; ++i)
       elems[i] = elems[0];
 
-   return LLVMConstVector(elems, type.length);
+   if (type.length == 1)
+      return elems[0];
+   else
+      return LLVMConstVector(elems, type.length);
 }
                
 
+/**
+ * Build constant-valued vector from a scalar value.
+ */
 LLVMValueRef
 lp_build_const_scalar(struct lp_type type,
                       double val)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_depth.c b/src/gallium/auxiliary/gallivm/lp_bld_depth.c
index d438c0e63d..f08f8eb6d8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_depth.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_depth.c
@@ -171,7 +171,7 @@ lp_build_depth_test(LLVMBuilderRef builder,
       unsigned padding_right;
       unsigned chan;
 
-      assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+      assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
       assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED);
       assert(format_desc->channel[z_swizzle].size <= format_desc->block.bits);
       assert(format_desc->channel[z_swizzle].normalized);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index dfa080b853..a07f7418f2 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -70,7 +70,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
    unsigned i;
 
    /* FIXME: Support more formats */
-   assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+   assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
    assert(desc->block.width == 1);
    assert(desc->block.height == 1);
    assert(desc->block.bits <= 32);
@@ -189,7 +189,7 @@ lp_build_unpack_rgba8_aos(LLVMBuilderRef builder,
    lp_build_context_init(&bld, builder, type);
 
    /* FIXME: Support more formats */
-   assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+   assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
    assert(desc->block.width == 1);
    assert(desc->block.height == 1);
    assert(desc->block.bits <= 32);
@@ -303,7 +303,7 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
    unsigned shift;
    unsigned i, j;
 
-   assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+   assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
    assert(desc->block.width == 1);
    assert(desc->block.height == 1);
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index 64151d169d..abb27e4c32 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -92,9 +92,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
    unsigned chan;
 
    /* FIXME: Support more formats */
-   assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH ||
-          (format_desc->layout == UTIL_FORMAT_LAYOUT_ARRAY &&
-           format_desc->block.bits == format_desc->channel[0].size));
+   assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
    assert(format_desc->block.width == 1);
    assert(format_desc->block.height == 1);
    assert(format_desc->block.bits <= 32);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_init.cpp
index 6e79438ead..067397a520 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.cpp
@@ -26,39 +26,34 @@
  **************************************************************************/
 
 
-#include "pipe/p_config.h"
+#include <llvm/Config/config.h>
+#include <llvm/Target/TargetSelect.h>
+#include <llvm/Target/TargetOptions.h>
 
-#include "lp_bld_misc.h"
+#include "pipe/p_config.h"
 
+#include "lp_bld_init.h"
 
-#ifndef LLVM_NATIVE_ARCH
 
-namespace llvm {
-   extern void LinkInJIT();
-}
+extern "C" void LLVMLinkInJIT();
 
 
-void
-LLVMLinkInJIT(void)
+extern "C" void
+lp_build_init(void)
 {
-   llvm::LinkInJIT();
-}
-
-
-extern "C" int X86TargetMachineModule;
-
-
-int
-LLVMInitializeNativeTarget(void)
-{
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
-   X86TargetMachineModule = 1;
+#if defined(PIPE_OS_WINDOWS) && defined(PIPE_ARCH_X86)
+   /*
+    * This is mis-detected on some hardware / software combinations.
+    */
+   llvm::StackAlignment = 4;
+   llvm::RealignStack = true;
 #endif
-   return 0;
-}
 
+   /* Same as LLVMInitializeNativeTarget(); */
+   llvm::InitializeNativeTarget();
 
-#endif
+   LLVMLinkInJIT();
+}
 
 
 /* 
@@ -69,7 +64,6 @@ LLVMInitializeNativeTarget(void)
  */
 #if defined(_MSC_VER) && defined(_DEBUG)
 #include <crtdefs.h>
-extern "C" {
-   _CRTIMP void __cdecl _invalid_parameter_noinfo(void) {}
-}
+extern "C" _CRTIMP void __cdecl
+_invalid_parameter_noinfo(void) {}
 #endif
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h
index 0e787e0b9c..07f50d1c43 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h
@@ -26,26 +26,17 @@
  **************************************************************************/
 
 
-#ifndef LP_BLD_MISC_H
-#define LP_BLD_MISC_H
+#ifndef LP_BLD_INIT_H
+#define LP_BLD_INIT_H
 
 
-#include "llvm/Config/config.h"
-
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 
-#ifndef LLVM_NATIVE_ARCH
-
 void
-LLVMLinkInJIT(void);
-
-int
-LLVMInitializeNativeTarget(void);
-
-#endif /* !LLVM_NATIVE_ARCH */
+lp_build_init(void);
 
 
 #ifdef __cplusplus
@@ -53,4 +44,4 @@ LLVMInitializeNativeTarget(void);
 #endif
 
 
-#endif /* !LP_BLD_MISC_H */
+#endif /* !LP_BLD_INIT_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index 41ac81b744..7c585fda78 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -45,6 +45,7 @@
 /**
  * Build code to compare two values 'a' and 'b' of 'type' using the given func.
  * \param func  one of PIPE_FUNC_x
+ * The result values will be 0 for false or ~0 for true.
  */
 LLVMValueRef
 lp_build_compare(LLVMBuilderRef builder,
@@ -197,7 +198,7 @@ lp_build_compare(LLVMBuilderRef builder,
 
          return res;
       }
-   }
+   } /* if (type.width * type.length == 128) */
 #endif
 
    if(type.floating) {
@@ -237,20 +238,25 @@ lp_build_compare(LLVMBuilderRef builder,
       cond = LLVMBuildFCmp(builder, op, a, b, "");
       res = LLVMBuildSelect(builder, cond, ones, zeros, "");
 #else
-      debug_printf("%s: warning: using slow element-wise vector comparison\n",
-                   __FUNCTION__);
       res = LLVMGetUndef(int_vec_type);
-      for(i = 0; i < type.length; ++i) {
-         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
-         cond = LLVMBuildFCmp(builder, op,
-                              LLVMBuildExtractElement(builder, a, index, ""),
-                              LLVMBuildExtractElement(builder, b, index, ""),
-                              "");
-         cond = LLVMBuildSelect(builder, cond,
-                                LLVMConstExtractElement(ones, index),
-                                LLVMConstExtractElement(zeros, index),
-                                "");
-         res = LLVMBuildInsertElement(builder, res, cond, index, "");
+      if (type.length == 1) {
+         res = LLVMBuildFCmp(builder, op, a, b, "");
+      }
+      else {
+         debug_printf("%s: warning: using slow element-wise float"
+                      " vector comparison\n", __FUNCTION__);
+         for (i = 0; i < type.length; ++i) {
+            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+            cond = LLVMBuildFCmp(builder, op,
+                                 LLVMBuildExtractElement(builder, a, index, ""),
+                                 LLVMBuildExtractElement(builder, b, index, ""),
+                                 "");
+            cond = LLVMBuildSelect(builder, cond,
+                                   LLVMConstExtractElement(ones, index),
+                                   LLVMConstExtractElement(zeros, index),
+                                   "");
+            res = LLVMBuildInsertElement(builder, res, cond, index, "");
+         }
       }
 #endif
    }
@@ -285,20 +291,26 @@ lp_build_compare(LLVMBuilderRef builder,
       cond = LLVMBuildICmp(builder, op, a, b, "");
       res = LLVMBuildSelect(builder, cond, ones, zeros, "");
 #else
-      debug_printf("%s: warning: using slow element-wise int vector comparison\n",
-                   __FUNCTION__);
       res = LLVMGetUndef(int_vec_type);
-      for(i = 0; i < type.length; ++i) {
-         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
-         cond = LLVMBuildICmp(builder, op,
-                              LLVMBuildExtractElement(builder, a, index, ""),
-                              LLVMBuildExtractElement(builder, b, index, ""),
-                              "");
-         cond = LLVMBuildSelect(builder, cond,
-                                LLVMConstExtractElement(ones, index),
-                                LLVMConstExtractElement(zeros, index),
-                                "");
-         res = LLVMBuildInsertElement(builder, res, cond, index, "");
+      if (type.length == 1) {
+         res = LLVMBuildICmp(builder, op, a, b, "");
+      }
+      else {
+         debug_printf("%s: warning: using slow element-wise int"
+                      " vector comparison\n", __FUNCTION__);
+
+         for(i = 0; i < type.length; ++i) {
+            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+            cond = LLVMBuildICmp(builder, op,
+                                 LLVMBuildExtractElement(builder, a, index, ""),
+                                 LLVMBuildExtractElement(builder, b, index, ""),
+                                 "");
+            cond = LLVMBuildSelect(builder, cond,
+                                   LLVMConstExtractElement(ones, index),
+                                   LLVMConstExtractElement(zeros, index),
+                                   "");
+            res = LLVMBuildInsertElement(builder, res, cond, index, "");
+         }
       }
 #endif
    }
@@ -311,6 +323,7 @@ lp_build_compare(LLVMBuilderRef builder,
 /**
  * Build code to compare two values 'a' and 'b' using the given func.
  * \param func  one of PIPE_FUNC_x
+ * The result values will be 0 for false or ~0 for true.
  */
 LLVMValueRef
 lp_build_cmp(struct lp_build_context *bld,
@@ -322,6 +335,9 @@ lp_build_cmp(struct lp_build_context *bld,
 }
 
 
+/**
+ * Return mask ? a : b;
+ */
 LLVMValueRef
 lp_build_select(struct lp_build_context *bld,
                 LLVMValueRef mask,
@@ -334,26 +350,31 @@ lp_build_select(struct lp_build_context *bld,
    if(a == b)
       return a;
 
-   if(type.floating) {
-      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
-      a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
-      b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
+   if (type.length == 1) {
+      res = LLVMBuildSelect(bld->builder, mask, a, b, "");
    }
+   else {
+      if(type.floating) {
+         LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+         a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+         b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
+      }
 
-   a = LLVMBuildAnd(bld->builder, a, mask, "");
+      a = LLVMBuildAnd(bld->builder, a, mask, "");
 
-   /* This often gets translated to PANDN, but sometimes the NOT is
-    * pre-computed and stored in another constant. The best strategy depends
-    * on available registers, so it is not a big deal -- hopefully LLVM does
-    * the right decision attending the rest of the program.
-    */
-   b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
+      /* This often gets translated to PANDN, but sometimes the NOT is
+       * pre-computed and stored in another constant. The best strategy depends
+       * on available registers, so it is not a big deal -- hopefully LLVM does
+       * the right decision attending the rest of the program.
+       */
+      b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
 
-   res = LLVMBuildOr(bld->builder, a, b, "");
+      res = LLVMBuildOr(bld->builder, a, b, "");
 
-   if(type.floating) {
-      LLVMTypeRef vec_type = lp_build_vec_type(type);
-      res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+      if(type.floating) {
+         LLVMTypeRef vec_type = lp_build_vec_type(type);
+         res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+      }
    }
 
    return res;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
index bc360ad77a..4c61d10749 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -256,7 +256,9 @@ lp_build_pack2(LLVMBuilderRef builder,
                LLVMValueRef lo,
                LLVMValueRef hi)
 {
+#if !(HAVE_LLVM >= 0x0207)
    LLVMTypeRef src_vec_type = lp_build_vec_type(src_type);
+#endif
    LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type);
    LLVMValueRef shuffle;
    LLVMValueRef res;
@@ -272,11 +274,14 @@ lp_build_pack2(LLVMBuilderRef builder,
       switch(src_type.width) {
       case 32:
          if(dst_type.sign) {
+#if HAVE_LLVM >= 0x0207
+            res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", dst_vec_type, lo, hi);
+#else
             res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi);
+#endif
          }
          else {
             if (util_cpu_caps.has_sse4_1) {
-               /* PACKUSDW is the only instrinsic with a consistent signature */
                return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi);
             }
             else {
@@ -288,9 +293,17 @@ lp_build_pack2(LLVMBuilderRef builder,
 
       case 16:
          if(dst_type.sign)
+#if HAVE_LLVM >= 0x0207
+            res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", dst_vec_type, lo, hi);
+#else
             res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi);
+#endif
          else
+#if HAVE_LLVM >= 0x0207
+            res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", dst_vec_type, lo, hi);
+#else
             res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi);
+#endif
          break;
 
       default:
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index a133b56ac5..311c9f1b9e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -44,6 +44,11 @@
 #include "lp_bld_sample.h"
 
 
+/**
+ * Initialize lp_sampler_static_state object with the gallium sampler
+ * and texture state.
+ * The former is considered to be static and the later dynamic.
+ */
 void
 lp_sampler_static_state(struct lp_sampler_static_state *state,
                         const struct pipe_texture *texture,
@@ -57,6 +62,18 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
    if(!sampler)
       return;
 
+   /*
+    * We don't copy sampler state over unless it is actually enabled, to avoid
+    * spurious recompiles, as the sampler static state is part of the shader
+    * key.
+    *
+    * Ideally the state tracker or cso_cache module would make all state
+    * canonical, but until that happens it's better to be safe than sorry here.
+    *
+    * XXX: Actually there's much more than can be done here, especially
+    * regarding 1D/2D/3D/CUBE textures, wrap modes, etc.
+    */
+
    state->format            = texture->format;
    state->target            = texture->target;
    state->pot_width         = util_is_pot(texture->width0);
@@ -69,11 +86,20 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
    state->min_img_filter    = sampler->min_img_filter;
    state->min_mip_filter    = sampler->min_mip_filter;
    state->mag_img_filter    = sampler->mag_img_filter;
+
    state->compare_mode      = sampler->compare_mode;
-   if(sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
-      state->compare_func      = sampler->compare_func;
+   if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
+      state->compare_func   = sampler->compare_func;
    }
+
    state->normalized_coords = sampler->normalized_coords;
+   state->lod_bias          = sampler->lod_bias;
+   state->min_lod           = sampler->min_lod;
+   state->max_lod           = sampler->max_lod;
+   state->border_color[0]   = sampler->border_color[0];
+   state->border_color[1]   = sampler->border_color[1];
+   state->border_color[2]   = sampler->border_color[2];
+   state->border_color[3]   = sampler->border_color[3];
 }
 
 
@@ -136,8 +162,7 @@ lp_build_sample_offset(struct lp_build_context *bld,
                        const struct util_format_description *format_desc,
                        LLVMValueRef x,
                        LLVMValueRef y,
-                       LLVMValueRef y_stride,
-                       LLVMValueRef data_ptr)
+                       LLVMValueRef y_stride)
 {
    LLVMValueRef x_stride;
    LLVMValueRef offset;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index 39edcf13d1..68db91d6fd 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -70,6 +70,8 @@ struct lp_sampler_static_state
    unsigned compare_mode:1;
    unsigned compare_func:3;
    unsigned normalized_coords:1;
+   float lod_bias, min_lod, max_lod;
+   float border_color[4];
 };
 
 
@@ -98,6 +100,18 @@ struct lp_sampler_dynamic_state
               LLVMBuilderRef builder,
               unsigned unit);
 
+   /** Obtain the base texture depth. */
+   LLVMValueRef
+   (*depth)( struct lp_sampler_dynamic_state *state,
+             LLVMBuilderRef builder,
+             unsigned unit);
+
+   /** Obtain the number of mipmap levels (minus one). */
+   LLVMValueRef
+   (*last_level)( struct lp_sampler_dynamic_state *state,
+                  LLVMBuilderRef builder,
+                  unsigned unit);
+
    LLVMValueRef
    (*stride)( struct lp_sampler_dynamic_state *state,
               LLVMBuilderRef builder,
@@ -134,8 +148,7 @@ lp_build_sample_offset(struct lp_build_context *bld,
                        const struct util_format_description *format_desc,
                        LLVMValueRef x,
                        LLVMValueRef y,
-                       LLVMValueRef y_stride,
-                       LLVMValueRef data_ptr);
+                       LLVMValueRef y_stride);
 
 
 void
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index e268862282..a965d394f4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -65,11 +65,23 @@ struct lp_build_sample_context
 
    const struct util_format_description *format_desc;
 
+   /** regular scalar float type */
+   struct lp_type float_type;
+   struct lp_build_context float_bld;
+
+   /** regular scalar float type */
+   struct lp_type int_type;
+   struct lp_build_context int_bld;
+
    /** Incoming coordinates type and build context */
    struct lp_type coord_type;
    struct lp_build_context coord_bld;
 
-   /** Integer coordinates */
+   /** Unsigned integer coordinates */
+   struct lp_type uint_coord_type;
+   struct lp_build_context uint_coord_bld;
+
+   /** Signed integer coordinates */
    struct lp_type int_coord_type;
    struct lp_build_context int_coord_bld;
 
@@ -79,36 +91,145 @@ struct lp_build_sample_context
 };
 
 
+/**
+ * Does the given texture wrap mode allow sampling the texture border color?
+ * XXX maybe move this into gallium util code.
+ */
+static boolean
+wrap_mode_uses_border_color(unsigned mode)
+{
+   switch (mode) {
+   case PIPE_TEX_WRAP_REPEAT:
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+   case PIPE_TEX_WRAP_MIRROR_REPEAT:
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+      return FALSE;
+   case PIPE_TEX_WRAP_CLAMP:
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+   case PIPE_TEX_WRAP_MIRROR_CLAMP:
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+      return TRUE;
+   default:
+      assert(0 && "unexpected wrap mode");
+      return FALSE;
+   }
+}
+
+
+static LLVMValueRef
+lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
+                          LLVMValueRef data_array, LLVMValueRef level)
+{
+   LLVMValueRef indexes[2], data_ptr;
+   indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+   indexes[1] = level;
+   data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
+   data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
+   return data_ptr;
+}
+
+
+static LLVMValueRef
+lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
+                                LLVMValueRef data_array, int level)
+{
+   LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
+   return lp_build_get_mipmap_level(bld, data_array, lvl);
+}
+
+
+/**
+ * Gen code to fetch a texel from a texture at int coords (x, y).
+ * The result, texel, will be:
+ *   texel[0] = red values
+ *   texel[1] = green values
+ *   texel[2] = blue values
+ *   texel[3] = alpha values
+ */
 static void
 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
+                          LLVMValueRef width,
+                          LLVMValueRef height,
                           LLVMValueRef x,
                           LLVMValueRef y,
                           LLVMValueRef y_stride,
                           LLVMValueRef data_ptr,
                           LLVMValueRef *texel)
 {
+   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    LLVMValueRef offset;
    LLVMValueRef packed;
+   LLVMValueRef use_border = NULL;
+
+   /* use_border = x < 0 || x >= width || y < 0 || y >= height */
+   if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) {
+      LLVMValueRef b1, b2;
+      b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
+      b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
+      use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
+   }
+
+   if (wrap_mode_uses_border_color(bld->static_state->wrap_t)) {
+      LLVMValueRef b1, b2;
+      b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
+      b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
+      if (use_border) {
+         use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
+         use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
+      }
+      else {
+         use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
+      }
+   }
 
-   offset = lp_build_sample_offset(&bld->int_coord_bld,
+   /*
+    * Note: if we find an app which frequently samples the texture border
+    * we might want to implement a true conditional here to avoid sampling
+    * the texture whenever possible (since that's quite a bit of code).
+    * Ex:
+    *   if (use_border) {
+    *      texel = border_color;
+    *   }
+    *   else {
+    *      texel = sample_texture(coord);
+    *   }
+    * As it is now, we always sample the texture, then selectively replace
+    * the texel color results with the border color.
+    */
+
+   /* convert x,y coords to linear offset from start of texture, in bytes */
+   offset = lp_build_sample_offset(&bld->uint_coord_bld,
                                    bld->format_desc,
-                                   x, y, y_stride,
-                                   data_ptr);
+                                   x, y, y_stride);
 
    assert(bld->format_desc->block.width == 1);
    assert(bld->format_desc->block.height == 1);
    assert(bld->format_desc->block.bits <= bld->texel_type.width);
 
+   /* gather the texels from the texture */
    packed = lp_build_gather(bld->builder,
                             bld->texel_type.length,
                             bld->format_desc->block.bits,
                             bld->texel_type.width,
                             data_ptr, offset);
 
+   /* convert texels to float rgba */
    lp_build_unpack_rgba_soa(bld->builder,
                             bld->format_desc,
                             bld->texel_type,
                             packed, texel);
+
+   if (use_border) {
+      /* select texel color or border color depending on use_border */
+      int chan;
+      for (chan = 0; chan < 4; chan++) {
+         LLVMValueRef border_chan =
+            lp_build_const_scalar(bld->texel_type,
+                                  bld->static_state->border_color[chan]);
+         texel[chan] = lp_build_select(&bld->texel_bld, use_border,
+                                       border_chan, texel[chan]);
+      }
+   }
 }
 
 
@@ -117,19 +238,22 @@ lp_build_sample_packed(struct lp_build_sample_context *bld,
                        LLVMValueRef x,
                        LLVMValueRef y,
                        LLVMValueRef y_stride,
-                       LLVMValueRef data_ptr)
+                       LLVMValueRef data_array)
 {
    LLVMValueRef offset;
+   LLVMValueRef data_ptr;
 
-   offset = lp_build_sample_offset(&bld->int_coord_bld,
+   offset = lp_build_sample_offset(&bld->uint_coord_bld,
                                    bld->format_desc,
-                                   x, y, y_stride,
-                                   data_ptr);
+                                   x, y, y_stride);
 
    assert(bld->format_desc->block.width == 1);
    assert(bld->format_desc->block.height == 1);
    assert(bld->format_desc->block.bits <= bld->texel_type.width);
 
+   /* get pointer to mipmap level 0 data */
+   data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
+
    return lp_build_gather(bld->builder,
                           bld->texel_type.length,
                           bld->format_desc->block.bits,
@@ -138,17 +262,77 @@ lp_build_sample_packed(struct lp_build_sample_context *bld,
 }
 
 
+/**
+ * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
+ */
+static LLVMValueRef
+lp_build_coord_mirror(struct lp_build_sample_context *bld,
+                      LLVMValueRef coord)
+{
+   struct lp_build_context *coord_bld = &bld->coord_bld;
+   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+   LLVMValueRef fract, flr, isOdd;
+
+   /* fract = coord - floor(coord) */
+   fract = lp_build_sub(coord_bld, coord, lp_build_floor(coord_bld, coord));
+
+   /* flr = ifloor(coord); */
+   flr = lp_build_ifloor(coord_bld, coord);
+
+   /* isOdd = flr & 1 */
+   isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, "");
+
+   /* make coord positive or negative depending on isOdd */
+   coord = lp_build_set_sign(coord_bld, fract, isOdd);
+
+   /* convert isOdd to float */
+   isOdd = lp_build_int_to_float(coord_bld, isOdd);
+
+   /* add isOdd to coord */
+   coord = lp_build_add(coord_bld, coord, isOdd);
+
+   return coord;
+}
+
+
+/**
+ * We only support a few wrap modes in lp_build_sample_wrap_int() at this time.
+ * Return whether the given mode is supported by that function.
+ */
+static boolean
+is_simple_wrap_mode(unsigned mode)
+{
+   switch (mode) {
+   case PIPE_TEX_WRAP_REPEAT:
+   case PIPE_TEX_WRAP_CLAMP:
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+      return TRUE;
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+   default:
+      return FALSE;
+   }
+}
+
+
+/**
+ * Build LLVM code for texture wrap mode, for scaled integer texcoords.
+ * \param coord  the incoming texcoord (s,t,r or q) scaled to the texture size
+ * \param length  the texture size along one dimension
+ * \param is_pot  if TRUE, length is a power of two
+ * \param wrap_mode  one of PIPE_TEX_WRAP_x
+ */
 static LLVMValueRef
-lp_build_sample_wrap(struct lp_build_sample_context *bld,
-                     LLVMValueRef coord,
-                     LLVMValueRef length,
-                     boolean is_pot,
-                     unsigned wrap_mode)
+lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
+                         LLVMValueRef coord,
+                         LLVMValueRef length,
+                         boolean is_pot,
+                         unsigned wrap_mode)
 {
+   struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    LLVMValueRef length_minus_one;
 
-   length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
+   length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
 
    switch(wrap_mode) {
    case PIPE_TEX_WRAP_REPEAT:
@@ -161,12 +345,12 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld,
       break;
 
    case PIPE_TEX_WRAP_CLAMP:
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
       break;
 
-   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
-   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
    case PIPE_TEX_WRAP_MIRROR_REPEAT:
    case PIPE_TEX_WRAP_MIRROR_CLAMP:
    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
@@ -174,8 +358,8 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld,
       /* FIXME */
       _debug_printf("llvmpipe: failed to translate texture wrap mode %s\n",
                     util_dump_tex_wrap(wrap_mode, TRUE));
-      coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
-      coord = lp_build_min(int_coord_bld, coord, length_minus_one);
+      coord = lp_build_max(uint_coord_bld, coord, uint_coord_bld->zero);
+      coord = lp_build_min(uint_coord_bld, coord, length_minus_one);
       break;
 
    default:
@@ -186,6 +370,583 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld,
 }
 
 
+/**
+ * Build LLVM code for texture wrap mode for linear filtering.
+ * \param x0_out  returns first integer texcoord
+ * \param x1_out  returns second integer texcoord
+ * \param weight_out  returns linear interpolation weight
+ */
+static void
+lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
+                            LLVMValueRef coord,
+                            LLVMValueRef length,
+                            boolean is_pot,
+                            unsigned wrap_mode,
+                            LLVMValueRef *x0_out,
+                            LLVMValueRef *x1_out,
+                            LLVMValueRef *weight_out)
+{
+   struct lp_build_context *coord_bld = &bld->coord_bld;
+   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+   struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
+   LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0);
+   LLVMValueRef half = lp_build_const_scalar(coord_bld->type, 0.5);
+   LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
+   LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
+   LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
+   LLVMValueRef coord0, coord1, weight;
+
+   switch(wrap_mode) {
+   case PIPE_TEX_WRAP_REPEAT:
+      /* mul by size and subtract 0.5 */
+      coord = lp_build_mul(coord_bld, coord, length_f);
+      coord = lp_build_sub(coord_bld, coord, half);
+      /* convert to int */
+      coord0 = lp_build_ifloor(coord_bld, coord);
+      coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one);
+      /* compute lerp weight */
+      weight = lp_build_fract(coord_bld, coord);
+      /* repeat wrap */
+      if (is_pot) {
+         coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
+         coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, "");
+      }
+      else {
+         /* Signed remainder won't give the right results for negative
+          * dividends but unsigned remainder does.*/
+         coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
+         coord1 = LLVMBuildURem(bld->builder, coord1, length, "");
+      }
+      break;
+
+   case PIPE_TEX_WRAP_CLAMP:
+      if (bld->static_state->normalized_coords) {
+         coord = lp_build_mul(coord_bld, coord, length_f);
+      }
+      weight = lp_build_fract(coord_bld, coord);
+      coord0 = lp_build_clamp(coord_bld, coord, coord_bld->zero,
+                              length_f_minus_one);
+      coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
+      coord1 = lp_build_clamp(coord_bld, coord1, coord_bld->zero,
+                              length_f_minus_one);
+      coord0 = lp_build_ifloor(coord_bld, coord0);
+      coord1 = lp_build_ifloor(coord_bld, coord1);
+      break;
+
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+      if (bld->static_state->normalized_coords) {
+         /* clamp to [0,1] */
+         coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one);
+         /* mul by tex size and subtract 0.5 */
+         coord = lp_build_mul(coord_bld, coord, length_f);
+         coord = lp_build_sub(coord_bld, coord, half);
+      }
+      else {
+         LLVMValueRef min, max;
+         /* clamp to [0.5, length - 0.5] */
+         min = lp_build_const_scalar(coord_bld->type, 0.5F);
+         max = lp_build_sub(coord_bld, length_f, min);
+         coord = lp_build_clamp(coord_bld, coord, min, max);
+      }
+      /* compute lerp weight */
+      weight = lp_build_fract(coord_bld, coord);
+      /* coord0 = floor(coord); */
+      coord0 = lp_build_ifloor(coord_bld, coord);
+      coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+      /* coord0 = max(coord0, 0) */
+      coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
+      /* coord1 = min(coord1, length-1) */
+      coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
+      break;
+
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+      {
+         LLVMValueRef min, max;
+         if (bld->static_state->normalized_coords) {
+            /* min = -1.0 / (2 * length) = -0.5 / length */
+            min = lp_build_mul(coord_bld,
+                               lp_build_const_scalar(coord_bld->type, -0.5F),
+                               lp_build_rcp(coord_bld, length_f));
+            /* max = 1.0 - min */
+            max = lp_build_sub(coord_bld, coord_bld->one, min);
+            /* coord = clamp(coord, min, max) */
+            coord = lp_build_clamp(coord_bld, coord, min, max);
+            /* scale coord to length (and sub 0.5?) */
+            coord = lp_build_mul(coord_bld, coord, length_f);
+            coord = lp_build_sub(coord_bld, coord, half);
+         }
+         else {
+            /* clamp to [-0.5, length + 0.5] */
+            min = lp_build_const_scalar(coord_bld->type, -0.5F);
+            max = lp_build_sub(coord_bld, length_f, min);
+            coord = lp_build_clamp(coord_bld, coord, min, max);
+            coord = lp_build_sub(coord_bld, coord, half);
+         }
+         /* compute lerp weight */
+         weight = lp_build_fract(coord_bld, coord);
+         /* convert to int */
+         coord0 = lp_build_ifloor(coord_bld, coord);
+         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+      }
+      break;
+
+   case PIPE_TEX_WRAP_MIRROR_REPEAT:
+      /* compute mirror function */
+      coord = lp_build_coord_mirror(bld, coord);
+
+      /* scale coord to length */
+      coord = lp_build_mul(coord_bld, coord, length_f);
+      coord = lp_build_sub(coord_bld, coord, half);
+
+      /* compute lerp weight */
+      weight = lp_build_fract(coord_bld, coord);
+
+      /* convert to int coords */
+      coord0 = lp_build_ifloor(coord_bld, coord);
+      coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+
+      /* coord0 = max(coord0, 0) */
+      coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
+      /* coord1 = min(coord1, length-1) */
+      coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
+      break;
+
+   case PIPE_TEX_WRAP_MIRROR_CLAMP:
+      {
+         LLVMValueRef min, max;
+         /* min = 1.0 / (2 * length) */
+         min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
+         /* max = 1.0 - min */
+         max = lp_build_sub(coord_bld, coord_bld->one, min);
+
+         coord = lp_build_abs(coord_bld, coord);
+         coord = lp_build_clamp(coord_bld, coord, min, max);
+         coord = lp_build_mul(coord_bld, coord, length_f);
+         if(0)coord = lp_build_sub(coord_bld, coord, half);
+         weight = lp_build_fract(coord_bld, coord);
+         coord0 = lp_build_ifloor(coord_bld, coord);
+         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+      }
+      break;
+
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+      {
+         LLVMValueRef min, max;
+         /* min = 1.0 / (2 * length) */
+         min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
+         /* max = 1.0 - min */
+         max = lp_build_sub(coord_bld, coord_bld->one, min);
+
+         coord = lp_build_abs(coord_bld, coord);
+         coord = lp_build_clamp(coord_bld, coord, min, max);
+         coord = lp_build_mul(coord_bld, coord, length_f);
+         coord = lp_build_sub(coord_bld, coord, half);
+         weight = lp_build_fract(coord_bld, coord);
+         coord0 = lp_build_ifloor(coord_bld, coord);
+         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+      }
+      break;
+
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+      {
+         LLVMValueRef min, max;
+         /* min = -1.0 / (2 * length) = -0.5 / length */
+         min = lp_build_mul(coord_bld,
+                            lp_build_const_scalar(coord_bld->type, -0.5F),
+                            lp_build_rcp(coord_bld, length_f));
+         /* max = 1.0 - min */
+         max = lp_build_sub(coord_bld, coord_bld->one, min);
+
+         coord = lp_build_abs(coord_bld, coord);
+         coord = lp_build_clamp(coord_bld, coord, min, max);
+         coord = lp_build_mul(coord_bld, coord, length_f);
+         coord = lp_build_sub(coord_bld, coord, half);
+         weight = lp_build_fract(coord_bld, coord);
+         coord0 = lp_build_ifloor(coord_bld, coord);
+         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+      }
+      break;
+
+   default:
+      assert(0);
+      coord0 = NULL;
+      coord1 = NULL;
+      weight = NULL;
+   }
+
+   *x0_out = coord0;
+   *x1_out = coord1;
+   *weight_out = weight;
+}
+
+
+/**
+ * Build LLVM code for texture wrap mode for nearest filtering.
+ * \param coord  the incoming texcoord (nominally in [0,1])
+ * \param length  the texture size along one dimension, as int
+ * \param is_pot  if TRUE, length is a power of two
+ * \param wrap_mode  one of PIPE_TEX_WRAP_x
+ */
+static LLVMValueRef
+lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
+                             LLVMValueRef coord,
+                             LLVMValueRef length,
+                             boolean is_pot,
+                             unsigned wrap_mode)
+{
+   struct lp_build_context *coord_bld = &bld->coord_bld;
+   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+   struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
+   LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0);
+   LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
+   LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
+   LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
+   LLVMValueRef icoord;
+   
+   switch(wrap_mode) {
+   case PIPE_TEX_WRAP_REPEAT:
+      coord = lp_build_mul(coord_bld, coord, length_f);
+      icoord = lp_build_ifloor(coord_bld, coord);
+      if (is_pot)
+         icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, "");
+      else
+         /* Signed remainder won't give the right results for negative
+          * dividends but unsigned remainder does.*/
+         icoord = LLVMBuildURem(bld->builder, icoord, length, "");
+      break;
+
+   case PIPE_TEX_WRAP_CLAMP:
+      /* mul by size */
+      if (bld->static_state->normalized_coords) {
+         coord = lp_build_mul(coord_bld, coord, length_f);
+      }
+      /* floor */
+      icoord = lp_build_ifloor(coord_bld, coord);
+      /* clamp to [0, size-1].  Note: int coord builder type */
+      icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
+                              length_minus_one);
+      break;
+
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+      {
+         LLVMValueRef min, max;
+         if (bld->static_state->normalized_coords) {
+            /* min = 1.0 / (2 * length) */
+            min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
+            /* max = length - min */
+            max = lp_build_sub(coord_bld, length_f, min);
+            /* scale coord to length */
+            coord = lp_build_mul(coord_bld, coord, length_f);
+         }
+         else {
+            /* clamp to [0.5, length - 0.5] */
+            min = lp_build_const_scalar(coord_bld->type, 0.5F);
+            max = lp_build_sub(coord_bld, length_f, min);
+         }
+         /* coord = clamp(coord, min, max) */
+         coord = lp_build_clamp(coord_bld, coord, min, max);
+         icoord = lp_build_ifloor(coord_bld, coord);
+      }
+      break;
+
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+      /* Note: this is the same as CLAMP_TO_EDGE, except min = -min */
+      {
+         LLVMValueRef min, max;
+         if (bld->static_state->normalized_coords) {
+            /* min = -1.0 / (2 * length) = -0.5 / length */
+            min = lp_build_mul(coord_bld,
+                               lp_build_const_scalar(coord_bld->type, -0.5F),
+                               lp_build_rcp(coord_bld, length_f));
+            /* max = length - min */
+            max = lp_build_sub(coord_bld, length_f, min);
+            /* scale coord to length */
+            coord = lp_build_mul(coord_bld, coord, length_f);
+         }
+         else {
+            /* clamp to [-0.5, length + 0.5] */
+            min = lp_build_const_scalar(coord_bld->type, -0.5F);
+            max = lp_build_sub(coord_bld, length_f, min);
+         }
+         /* coord = clamp(coord, min, max) */
+         coord = lp_build_clamp(coord_bld, coord, min, max);
+         icoord = lp_build_ifloor(coord_bld, coord);
+      }
+      break;
+
+   case PIPE_TEX_WRAP_MIRROR_REPEAT:
+      {
+         LLVMValueRef min, max;
+         /* min = 1.0 / (2 * length) */
+         min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
+         /* max = length - min */
+         max = lp_build_sub(coord_bld, length_f, min);
+
+         /* compute mirror function */
+         coord = lp_build_coord_mirror(bld, coord);
+
+         /* scale coord to length */
+         coord = lp_build_mul(coord_bld, coord, length_f);
+
+         /* coord = clamp(coord, min, max) */
+         coord = lp_build_clamp(coord_bld, coord, min, max);
+         icoord = lp_build_ifloor(coord_bld, coord);
+      }
+      break;
+
+   case PIPE_TEX_WRAP_MIRROR_CLAMP:
+      coord = lp_build_abs(coord_bld, coord);
+      coord = lp_build_mul(coord_bld, coord, length_f);
+      coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f_minus_one);
+      icoord = lp_build_ifloor(coord_bld, coord);
+      break;
+
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+      {
+         LLVMValueRef min, max;
+         /* min = 1.0 / (2 * length) */
+         min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
+         /* max = length - min */
+         max = lp_build_sub(coord_bld, length_f, min);
+
+         coord = lp_build_abs(coord_bld, coord);
+         coord = lp_build_mul(coord_bld, coord, length_f);
+         coord = lp_build_clamp(coord_bld, coord, min, max);
+         icoord = lp_build_ifloor(coord_bld, coord);
+      }
+      break;
+
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+      {
+         LLVMValueRef min, max;
+         /* min = 1.0 / (2 * length) */
+         min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
+         min = lp_build_negate(coord_bld, min);
+         /* max = length - min */
+         max = lp_build_sub(coord_bld, length_f, min);
+
+         coord = lp_build_abs(coord_bld, coord);
+         coord = lp_build_mul(coord_bld, coord, length_f);
+         coord = lp_build_clamp(coord_bld, coord, min, max);
+         icoord = lp_build_ifloor(coord_bld, coord);
+      }
+      break;
+
+   default:
+      assert(0);
+      icoord = NULL;
+   }
+
+   return icoord;
+}
+
+
+/**
+ * Codegen equivalent for u_minify().
+ * Return max(1, base_size >> level);
+ */
+static LLVMValueRef
+lp_build_minify(struct lp_build_sample_context *bld,
+                LLVMValueRef base_size,
+                LLVMValueRef level)
+{
+   LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify");
+   size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
+   return size;
+}
+
+
+static int
+texture_dims(enum pipe_texture_target tex)
+{
+   switch (tex) {
+   case PIPE_TEXTURE_1D:
+      return 1;
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_CUBE:
+      return 2;
+   case PIPE_TEXTURE_3D:
+      return 3;
+   default:
+      assert(0 && "bad texture target in texture_dims()");
+      return 2;
+   }
+}
+
+
+/**
+ * Generate code to compute texture level of detail (lambda).
+ * \param s  vector of texcoord s values
+ * \param t  vector of texcoord t values
+ * \param r  vector of texcoord r values
+ * \param width  scalar int texture width
+ * \param height  scalar int texture height
+ * \param depth  scalar int texture depth
+ */
+static LLVMValueRef
+lp_build_lod_selector(struct lp_build_sample_context *bld,
+                      LLVMValueRef s,
+                      LLVMValueRef t,
+                      LLVMValueRef r,
+                      LLVMValueRef width,
+                      LLVMValueRef height,
+                      LLVMValueRef depth)
+
+{
+   const int dims = texture_dims(bld->static_state->target);
+   struct lp_build_context *coord_bld = &bld->coord_bld;
+   struct lp_build_context *float_bld = &bld->float_bld;
+   LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(), bld->static_state->lod_bias);
+   LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
+   LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->max_lod);
+
+   LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+   LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
+   LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
+
+   LLVMValueRef s0, s1, s2;
+   LLVMValueRef t0, t1, t2;
+   LLVMValueRef r0, r1, r2;
+   LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
+   LLVMValueRef rho, lod;
+
+   /*
+    * dsdx = abs(s[1] - s[0]);
+    * dsdy = abs(s[2] - s[0]);
+    * dtdx = abs(t[1] - t[0]);
+    * dtdy = abs(t[2] - t[0]);
+    * drdx = abs(r[1] - r[0]);
+    * drdy = abs(r[2] - r[0]);
+    * XXX we're assuming a four-element quad in 2x2 layout here.
+    */
+   s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
+   s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
+   s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
+   dsdx = LLVMBuildSub(bld->builder, s1, s0, "");
+   dsdx = lp_build_abs(float_bld, dsdx);
+   dsdy = LLVMBuildSub(bld->builder, s2, s0, "");
+   dsdy = lp_build_abs(float_bld, dsdy);
+   if (dims > 1) {
+      t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
+      t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
+      t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
+      dtdx = LLVMBuildSub(bld->builder, t1, t0, "");
+      dtdx = lp_build_abs(float_bld, dtdx);
+      dtdy = LLVMBuildSub(bld->builder, t2, t0, "");
+      dtdy = lp_build_abs(float_bld, dtdy);
+      if (dims > 2) {
+         r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
+         r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
+         r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
+         drdx = LLVMBuildSub(bld->builder, r1, r0, "");
+         drdx = lp_build_abs(float_bld, drdx);
+         drdy = LLVMBuildSub(bld->builder, r2, r0, "");
+         drdy = lp_build_abs(float_bld, drdy);
+      }
+   }
+
+   /* Compute rho = max of all partial derivatives scaled by texture size.
+    * XXX this could be vectorized somewhat
+    */
+   rho = LLVMBuildMul(bld->builder,
+                      lp_build_max(float_bld, dsdx, dsdy),
+                      lp_build_int_to_float(float_bld, width), "");
+   if (dims > 1) {
+      LLVMValueRef max;
+      max = LLVMBuildMul(bld->builder,
+                         lp_build_max(float_bld, dtdx, dtdy),
+                         lp_build_int_to_float(float_bld, height), "");
+      rho = lp_build_max(float_bld, rho, max);
+      if (dims > 2) {
+         max = LLVMBuildMul(bld->builder,
+                            lp_build_max(float_bld, drdx, drdy),
+                            lp_build_int_to_float(float_bld, depth), "");
+         rho = lp_build_max(float_bld, rho, max);
+      }
+   }
+
+   /* compute lod = log2(rho) */
+   lod = lp_build_log2(float_bld, rho);
+
+   /* add lod bias */
+   lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias");
+
+   /* clamp lod */
+   lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
+
+   return lod;
+}
+
+
+/**
+ * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
+ * mipmap level index.
+ * Note: this is all scalar code.
+ * \param lod  scalar float texture level of detail
+ * \param level_out  returns integer 
+ */
+static void
+lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
+                           unsigned unit,
+                           LLVMValueRef lod,
+                           LLVMValueRef *level_out)
+{
+   struct lp_build_context *float_bld = &bld->float_bld;
+   struct lp_build_context *int_bld = &bld->int_bld;
+   LLVMValueRef last_level, level;
+
+   LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
+
+   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
+                                               bld->builder, unit);
+
+   /* convert float lod to integer */
+   level = lp_build_iround(float_bld, lod);
+
+   /* clamp level to legal range of levels */
+   *level_out = lp_build_clamp(int_bld, level, zero, last_level);
+}
+
+
+/**
+ * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
+ * two (adjacent) mipmap level indexes.  Later, we'll sample from those
+ * two mipmap levels and interpolate between them.
+ */
+static void
+lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
+                           unsigned unit,
+                           LLVMValueRef lod,
+                           LLVMValueRef *level0_out,
+                           LLVMValueRef *level1_out,
+                           LLVMValueRef *weight_out)
+{
+   struct lp_build_context *coord_bld = &bld->coord_bld;
+   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+   LLVMValueRef last_level, level;
+
+   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
+                                               bld->builder, unit);
+
+   /* convert float lod to integer */
+   level = lp_build_ifloor(coord_bld, lod);
+
+   /* compute level 0 and clamp to legal range of levels */
+   *level0_out = lp_build_clamp(int_coord_bld, level,
+                                int_coord_bld->zero,
+                                last_level);
+   /* compute level 1 and clamp to legal range of levels */
+   *level1_out = lp_build_add(int_coord_bld, *level0_out, int_coord_bld->one);
+   *level1_out = lp_build_min(int_coord_bld, *level1_out, int_coord_bld->zero);
+
+   *weight_out = lp_build_fract(coord_bld, lod);
+}
+
+
+
+/**
+ * Sample 2D texture with nearest filtering, no mipmapping.
+ */
 static void
 lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
                                LLVMValueRef s,
@@ -193,26 +954,85 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
                                LLVMValueRef width,
                                LLVMValueRef height,
                                LLVMValueRef stride,
-                               LLVMValueRef data_ptr,
+                               LLVMValueRef data_array,
                                LLVMValueRef *texel)
 {
-   LLVMValueRef x;
-   LLVMValueRef y;
+   LLVMValueRef x, y;
+   LLVMValueRef data_ptr;
+
+   x = lp_build_sample_wrap_nearest(bld, s, width,
+                                    bld->static_state->pot_width,
+                                    bld->static_state->wrap_s);
+   y = lp_build_sample_wrap_nearest(bld, t, height,
+                                    bld->static_state->pot_height,
+                                    bld->static_state->wrap_t);
+
+   lp_build_name(x, "tex.x.wrapped");
+   lp_build_name(y, "tex.y.wrapped");
+
+   /* get pointer to mipmap level 0 data */
+   data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
+
+   lp_build_sample_texel_soa(bld, width, height, x, y, stride, data_ptr, texel);
+}
+
+
+/**
+ * Sample 2D texture with nearest filtering, nearest mipmap.
+ */
+static void
+lp_build_sample_2d_nearest_mip_nearest_soa(struct lp_build_sample_context *bld,
+                                           unsigned unit,
+                                           LLVMValueRef s,
+                                           LLVMValueRef t,
+                                           LLVMValueRef width,
+                                           LLVMValueRef height,
+                                           LLVMValueRef width_vec,
+                                           LLVMValueRef height_vec,
+                                           LLVMValueRef stride,
+                                           LLVMValueRef data_array,
+                                           LLVMValueRef *texel)
+{
+   LLVMValueRef x, y;
+   LLVMValueRef lod, ilevel, ilevel_vec;
+   LLVMValueRef data_ptr;
+
+   /* compute float LOD */
+   lod = lp_build_lod_selector(bld, s, t, NULL, width, height, NULL);
+
+   /* convert LOD to int */
+   lp_build_nearest_mip_level(bld, unit, lod, &ilevel);
 
-   x = lp_build_ifloor(&bld->coord_bld, s);
-   y = lp_build_ifloor(&bld->coord_bld, t);
-   lp_build_name(x, "tex.x.floor");
-   lp_build_name(y, "tex.y.floor");
+   ilevel_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel);
+
+   /* compute width_vec, height at mipmap level 'ilevel' */
+   width_vec = lp_build_minify(bld, width_vec, ilevel_vec);
+   height_vec = lp_build_minify(bld, height_vec, ilevel_vec);
+   stride = lp_build_minify(bld, stride, ilevel_vec);
+
+   x = lp_build_sample_wrap_nearest(bld, s, width_vec,
+                                    bld->static_state->pot_width,
+                                    bld->static_state->wrap_s);
+   y = lp_build_sample_wrap_nearest(bld, t, height_vec,
+                                    bld->static_state->pot_height,
+                                    bld->static_state->wrap_t);
 
-   x = lp_build_sample_wrap(bld, x, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
-   y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t);
    lp_build_name(x, "tex.x.wrapped");
    lp_build_name(y, "tex.y.wrapped");
 
-   lp_build_sample_texel_soa(bld, x, y, stride, data_ptr, texel);
+   /* get pointer to mipmap level [ilevel] data */
+   if (0)
+      data_ptr = lp_build_get_mipmap_level(bld, data_array, ilevel);
+   else
+      data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
+
+   lp_build_sample_texel_soa(bld, width_vec, height_vec, x, y, stride, data_ptr, texel);
 }
 
 
+/**
+ * Sample 2D texture with bilinear filtering.
+ */
 static void
 lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
                               LLVMValueRef s,
@@ -220,45 +1040,29 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
                               LLVMValueRef width,
                               LLVMValueRef height,
                               LLVMValueRef stride,
-                              LLVMValueRef data_ptr,
+                              LLVMValueRef data_array,
                               LLVMValueRef *texel)
 {
-   LLVMValueRef half;
-   LLVMValueRef s_ipart;
-   LLVMValueRef t_ipart;
    LLVMValueRef s_fpart;
    LLVMValueRef t_fpart;
    LLVMValueRef x0, x1;
    LLVMValueRef y0, y1;
    LLVMValueRef neighbors[2][2][4];
+   LLVMValueRef data_ptr;
    unsigned chan;
 
-   half = lp_build_const_scalar(bld->coord_type, 0.5);
-   s = lp_build_sub(&bld->coord_bld, s, half);
-   t = lp_build_sub(&bld->coord_bld, t, half);
-
-   s_ipart = lp_build_floor(&bld->coord_bld, s);
-   t_ipart = lp_build_floor(&bld->coord_bld, t);
-
-   s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart);
-   t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart);
-
-   x0 = lp_build_itrunc(&bld->coord_bld, s_ipart);
-   y0 = lp_build_itrunc(&bld->coord_bld, t_ipart);
-
-   x0 = lp_build_sample_wrap(bld, x0, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
-   y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t);
-
-   x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
-   y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
+   lp_build_sample_wrap_linear(bld, s, width, bld->static_state->pot_width,
+                               bld->static_state->wrap_s, &x0, &x1, &s_fpart);
+   lp_build_sample_wrap_linear(bld, t, height, bld->static_state->pot_height,
+                               bld->static_state->wrap_t, &y0, &y1, &t_fpart);
 
-   x1 = lp_build_sample_wrap(bld, x1, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
-   y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+   /* get pointer to mipmap level 0 data */
+   data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
 
-   lp_build_sample_texel_soa(bld, x0, y0, stride, data_ptr, neighbors[0][0]);
-   lp_build_sample_texel_soa(bld, x1, y0, stride, data_ptr, neighbors[0][1]);
-   lp_build_sample_texel_soa(bld, x0, y1, stride, data_ptr, neighbors[1][0]);
-   lp_build_sample_texel_soa(bld, x1, y1, stride, data_ptr, neighbors[1][1]);
+   lp_build_sample_texel_soa(bld, width, height, x0, y0, stride, data_ptr, neighbors[0][0]);
+   lp_build_sample_texel_soa(bld, width, height, x1, y0, stride, data_ptr, neighbors[0][1]);
+   lp_build_sample_texel_soa(bld, width, height, x0, y1, stride, data_ptr, neighbors[1][0]);
+   lp_build_sample_texel_soa(bld, width, height, x1, y1, stride, data_ptr, neighbors[1][1]);
 
    /* TODO: Don't interpolate missing channels */
    for(chan = 0; chan < 4; ++chan) {
@@ -309,7 +1113,7 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
                               LLVMValueRef width,
                               LLVMValueRef height,
                               LLVMValueRef stride,
-                              LLVMValueRef data_ptr,
+                              LLVMValueRef data_array,
                               LLVMValueRef *texel)
 {
    LLVMBuilderRef builder = bld->builder;
@@ -326,7 +1130,7 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
    LLVMValueRef packed, packed_lo, packed_hi;
    LLVMValueRef unswizzled[4];
 
-   lp_build_context_init(&i32, builder, lp_type_int(32));
+   lp_build_context_init(&i32, builder, lp_type_int_vec(32));
    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
    lp_build_context_init(&u8n, builder, lp_type_unorm(8));
 
@@ -334,20 +1138,33 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
    h16_vec_type = lp_build_vec_type(h16.type);
    u8n_vec_type = lp_build_vec_type(u8n.type);
 
+   if (bld->static_state->normalized_coords) {
+      LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
+      LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width, coord_vec_type, "");
+      LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height, coord_vec_type, "");
+      s = lp_build_mul(&bld->coord_bld, s, fp_width);
+      t = lp_build_mul(&bld->coord_bld, t, fp_height);
+   }
+
+   /* scale coords by 256 (8 fractional bits) */
    s = lp_build_mul_imm(&bld->coord_bld, s, 256);
    t = lp_build_mul_imm(&bld->coord_bld, t, 256);
 
+   /* convert float to int */
    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
    t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
 
+   /* subtract 0.5 (add -128) */
    i32_c128 = lp_build_int_const_scalar(i32.type, -128);
    s = LLVMBuildAdd(builder, s, i32_c128, "");
    t = LLVMBuildAdd(builder, t, i32_c128, "");
 
+   /* compute floor (shift right 8) */
    i32_c8 = lp_build_int_const_scalar(i32.type, 8);
    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
    t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
 
+   /* compute fractional part (AND with 0xff) */
    i32_c255 = lp_build_int_const_scalar(i32.type, 255);
    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
    t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
@@ -355,14 +1172,18 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
    x0 = s_ipart;
    y0 = t_ipart;
 
-   x0 = lp_build_sample_wrap(bld, x0, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
-   y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t);
-
    x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
    y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
 
-   x1 = lp_build_sample_wrap(bld, x1, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
-   y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+   x0 = lp_build_sample_wrap_int(bld, x0, width,  bld->static_state->pot_width,
+                                 bld->static_state->wrap_s);
+   y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height,
+                                 bld->static_state->wrap_t);
+
+   x1 = lp_build_sample_wrap_int(bld, x1, width,  bld->static_state->pot_width,
+                                 bld->static_state->wrap_s);
+   y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height,
+                                 bld->static_state->wrap_t);
 
    /*
     * Transform 4 x i32 in
@@ -432,10 +1253,10 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
     * The higher 8 bits of the resulting elements will be zero.
     */
 
-   neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_ptr);
-   neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_ptr);
-   neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_ptr);
-   neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_ptr);
+   neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array);
+   neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array);
+   neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array);
+   neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array);
 
    neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
    neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
@@ -518,6 +1339,12 @@ lp_build_sample_compare(struct lp_build_sample_context *bld,
 }
 
 
+/**
+ * Build texture sampling code.
+ * 'texel' will return a vector of four LLVMValueRefs corresponding to
+ * R, G, B, A.
+ * \param type  vector float type to use for coords, etc.
+ */
 void
 lp_build_sample_soa(LLVMBuilderRef builder,
                     const struct lp_sampler_static_state *static_state,
@@ -530,13 +1357,19 @@ lp_build_sample_soa(LLVMBuilderRef builder,
                     LLVMValueRef *texel)
 {
    struct lp_build_sample_context bld;
-   LLVMValueRef width;
-   LLVMValueRef height;
-   LLVMValueRef stride;
-   LLVMValueRef data_ptr;
+   LLVMValueRef width, width_vec;
+   LLVMValueRef height, height_vec;
+   LLVMValueRef stride, stride_vec;
+   LLVMValueRef data_array;
    LLVMValueRef s;
    LLVMValueRef t;
-   LLVMValueRef p;
+   LLVMValueRef r;
+   boolean done = FALSE;
+
+   (void) lp_build_lod_selector;   /* temporary to silence warning */
+   (void) lp_build_nearest_mip_level;
+   (void) lp_build_linear_mip_levels;
+   (void) lp_build_minify;
 
    /* Setup our build context */
    memset(&bld, 0, sizeof bld);
@@ -544,10 +1377,18 @@ lp_build_sample_soa(LLVMBuilderRef builder,
    bld.static_state = static_state;
    bld.dynamic_state = dynamic_state;
    bld.format_desc = util_format_description(static_state->format);
+
+   bld.float_type = lp_type_float(32);
+   bld.int_type = lp_type_int(32);
    bld.coord_type = type;
+   bld.uint_coord_type = lp_uint_type(type);
    bld.int_coord_type = lp_int_type(type);
    bld.texel_type = type;
+
+   lp_build_context_init(&bld.float_bld, builder, bld.float_type);
+   lp_build_context_init(&bld.int_bld, builder, bld.int_type);
    lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
+   lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
    lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
    lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
 
@@ -555,43 +1396,67 @@ lp_build_sample_soa(LLVMBuilderRef builder,
    width = dynamic_state->width(dynamic_state, builder, unit);
    height = dynamic_state->height(dynamic_state, builder, unit);
    stride = dynamic_state->stride(dynamic_state, builder, unit);
-   data_ptr = dynamic_state->data_ptr(dynamic_state, builder, unit);
+   data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
+   /* Note that data_array is an array[level] of pointers to texture images */
 
    s = coords[0];
    t = coords[1];
-   p = coords[2];
+   r = coords[2];
 
-   width = lp_build_broadcast_scalar(&bld.int_coord_bld, width);
-   height = lp_build_broadcast_scalar(&bld.int_coord_bld, height);
-   stride = lp_build_broadcast_scalar(&bld.int_coord_bld, stride);
+   width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
+   height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
+   stride_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, stride);
 
    if(static_state->target == PIPE_TEXTURE_1D)
       t = bld.coord_bld.zero;
 
-   if(static_state->normalized_coords) {
-      LLVMTypeRef coord_vec_type = lp_build_vec_type(bld.coord_type);
-      LLVMValueRef fp_width = LLVMBuildSIToFP(builder, width, coord_vec_type, "");
-      LLVMValueRef fp_height = LLVMBuildSIToFP(builder, height, coord_vec_type, "");
-      s = lp_build_mul(&bld.coord_bld, s, fp_width);
-      t = lp_build_mul(&bld.coord_bld, t, fp_height);
-   }
+   switch (static_state->min_mip_filter) {
+   case PIPE_TEX_MIPFILTER_NONE:
+      break;
+   case PIPE_TEX_MIPFILTER_NEAREST:
+
+      switch (static_state->min_img_filter) {
+      case PIPE_TEX_FILTER_NEAREST:
+         lp_build_sample_2d_nearest_mip_nearest_soa(&bld, unit,
+                                                    s, t,
+                                                    width, height,
+                                                    width_vec, height_vec,
+                                                    stride_vec,
+                                                    data_array, texel);
+         done = TRUE;
+         break;
+      }
 
-   switch (static_state->min_img_filter) {
-   case PIPE_TEX_FILTER_NEAREST:
-      lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel);
       break;
-   case PIPE_TEX_FILTER_LINEAR:
-      if(lp_format_is_rgba8(bld.format_desc))
-         lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel);
-      else
-         lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel);
+   case PIPE_TEX_MIPFILTER_LINEAR:
       break;
    default:
-      assert(0);
+      assert(0 && "invalid mip filter");
+   }
+
+   if (!done) {
+      switch (static_state->min_img_filter) {
+      case PIPE_TEX_FILTER_NEAREST:
+         lp_build_sample_2d_nearest_soa(&bld, s, t, width_vec, height_vec,
+                                        stride_vec, data_array, texel);
+         break;
+      case PIPE_TEX_FILTER_LINEAR:
+         if(lp_format_is_rgba8(bld.format_desc) &&
+            is_simple_wrap_mode(static_state->wrap_s) &&
+            is_simple_wrap_mode(static_state->wrap_t))
+            lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
+                                          stride_vec, data_array, texel);
+         else
+            lp_build_sample_2d_linear_soa(&bld, s, t, width_vec, height_vec,
+                                          stride_vec, data_array, texel);
+         break;
+      default:
+         assert(0);
+      }
    }
 
    /* FIXME: respect static_state->min_mip_filter */;
    /* FIXME: respect static_state->mag_img_filter */;
 
-   lp_build_sample_compare(&bld, p, texel);
+   lp_build_sample_compare(&bld, r, texel);
 }
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 4cf28a9f93..fbb664d43a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -52,6 +52,7 @@
 #include "lp_bld_swizzle.h"
 #include "lp_bld_flow.h"
 #include "lp_bld_tgsi.h"
+#include "lp_bld_debug.h"
 
 
 #define LP_MAX_TEMPS 256
@@ -81,6 +82,21 @@
 #define QUAD_BOTTOM_LEFT  2
 #define QUAD_BOTTOM_RIGHT 3
 
+#define LP_TGSI_MAX_NESTING 16
+
+struct lp_exec_mask {
+   struct lp_build_context *bld;
+
+   boolean has_mask;
+
+   LLVMTypeRef int_vec_type;
+
+   LLVMValueRef cond_stack[LP_TGSI_MAX_NESTING];
+   int cond_stack_size;
+   LLVMValueRef cond_mask;
+
+   LLVMValueRef exec_mask;
+};
 
 struct lp_build_tgsi_soa_context
 {
@@ -97,9 +113,9 @@ struct lp_build_tgsi_soa_context
    LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
 
    struct lp_build_mask_context *mask;
+   struct lp_exec_mask exec_mask;
 };
 
-
 static const unsigned char
 swizzle_left[4] = {
    QUAD_TOP_LEFT,     QUAD_TOP_LEFT,
@@ -124,6 +140,72 @@ swizzle_bottom[4] = {
    QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_RIGHT
 };
 
+static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
+{
+   mask->bld = bld;
+   mask->has_mask = FALSE;
+   mask->cond_stack_size = 0;
+
+   mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
+}
+
+static void lp_exec_mask_update(struct lp_exec_mask *mask)
+{
+   mask->exec_mask = mask->cond_mask;
+   mask->has_mask = (mask->cond_stack_size > 0);
+}
+
+static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
+                                   LLVMValueRef val)
+{
+   mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
+   mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val,
+                                      mask->int_vec_type, "");
+
+   lp_exec_mask_update(mask);
+}
+
+static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
+{
+   LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
+   LLVMValueRef inv_mask = LLVMBuildNot(mask->bld->builder,
+                                        mask->cond_mask, "");
+
+   /* means that we didn't have any mask before and that
+    * we were fully enabled */
+   if (mask->cond_stack_size <= 1) {
+      prev_mask = LLVMConstAllOnes(mask->int_vec_type);
+   }
+
+   mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
+                                  inv_mask,
+                                  prev_mask, "");
+   lp_exec_mask_update(mask);
+}
+
+static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
+{
+   mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
+   lp_exec_mask_update(mask);
+}
+
+static void lp_exec_mask_store(struct lp_exec_mask *mask,
+                               LLVMValueRef val,
+                               LLVMValueRef dst)
+{
+   if (mask->has_mask) {
+      LLVMValueRef real_val, dst_val;
+
+      dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
+      real_val = lp_build_select(mask->bld,
+                                 mask->exec_mask,
+                                 val, dst_val);
+
+      LLVMBuildStore(mask->bld->builder, real_val, dst);
+   } else
+      LLVMBuildStore(mask->bld->builder, val, dst);
+}
+
 
 static LLVMValueRef
 emit_ddx(struct lp_build_tgsi_soa_context *bld,
@@ -287,13 +369,13 @@ emit_store(
 
    switch( reg->Register.File ) {
    case TGSI_FILE_OUTPUT:
-      LLVMBuildStore(bld->base.builder, value,
-                     bld->outputs[reg->Register.Index][chan_index]);
+      lp_exec_mask_store(&bld->exec_mask, value,
+                         bld->outputs[reg->Register.Index][chan_index]);
       break;
 
    case TGSI_FILE_TEMPORARY:
-      LLVMBuildStore(bld->base.builder, value,
-                     bld->temps[reg->Register.Index][chan_index]);
+      lp_exec_mask_store(&bld->exec_mask, value,
+                         bld->temps[reg->Register.Index][chan_index]);
       break;
 
    case TGSI_FILE_ADDRESS:
@@ -301,6 +383,11 @@ emit_store(
       assert(0);
       break;
 
+   case TGSI_FILE_PREDICATE:
+      /* FIXME */
+      assert(0);
+      break;
+
    default:
       assert( 0 );
    }
@@ -498,6 +585,17 @@ emit_instruction(
    if (indirect_temp_reference(inst))
       return FALSE;
 
+   /*
+    * Stores and write masks are handled in a general fashion after the long
+    * instruction opcode switch statement.
+    *
+    * Although not stricitly necessary, we avoid generating instructions for
+    * channels which won't be stored, in cases where's that easy. For some
+    * complex instructions, like texture sampling, it is more convenient to
+    * assume a full writemask and then let LLVM optimization passes eliminate
+    * redundant code.
+    */
+
    assert(info->num_dst <= 1);
    if(info->num_dst) {
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
@@ -1272,8 +1370,8 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_IF:
-      /* FIXME */
-      return 0;
+      tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
+      lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
       break;
 
    case TGSI_OPCODE_BGNFOR:
@@ -1289,13 +1387,11 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_ELSE:
-      /* FIXME */
-      return 0;
+      lp_exec_mask_cond_invert(&bld->exec_mask);
       break;
 
    case TGSI_OPCODE_ENDIF:
-      /* FIXME */
-      return 0;
+      lp_exec_mask_cond_pop(&bld->exec_mask);
       break;
 
    case TGSI_OPCODE_ENDFOR:
@@ -1458,6 +1554,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
    bld.consts_ptr = consts_ptr;
    bld.sampler = sampler;
 
+   lp_exec_mask_init(&bld.exec_mask, &bld.base);
+
    tgsi_parse_init( &parse, tokens );
 
    while( !tgsi_parse_end_of_tokens( &parse ) ) {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.c b/src/gallium/auxiliary/gallivm/lp_bld_type.c
index 8270cd057f..c327ba045a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_type.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_type.c
@@ -178,6 +178,25 @@ lp_build_int32_vec4_type(void)
 }
 
 
+/**
+ * Create unsigned integer type variation of given type.
+ */
+struct lp_type
+lp_uint_type(struct lp_type type)
+{
+   struct lp_type res_type;
+
+   memset(&res_type, 0, sizeof res_type);
+   res_type.width = type.width;
+   res_type.length = type.length;
+
+   return res_type;
+}
+
+
+/**
+ * Create signed integer type variation of given type.
+ */
 struct lp_type
 lp_int_type(struct lp_type type)
 {
@@ -186,6 +205,7 @@ lp_int_type(struct lp_type type)
    memset(&res_type, 0, sizeof res_type);
    res_type.width = type.width;
    res_type.length = type.length;
+   res_type.sign = 1;
 
    return res_type;
 }
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h
index 62ee05be4d..4daa904e63 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_type.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h
@@ -103,7 +103,7 @@ struct lp_type {
    unsigned width:14;
 
    /**
-    * Vector length.
+    * Vector length.  If length==1, this is a scalar (float/int) type.
     *
     * width*length should be a power of two greater or equal to eight.
     *
@@ -139,6 +139,7 @@ struct lp_build_context
 };
 
 
+/** Create scalar float type */
 static INLINE struct lp_type
 lp_type_float(unsigned width)
 {
@@ -148,12 +149,29 @@ lp_type_float(unsigned width)
    res_type.floating = TRUE;
    res_type.sign = TRUE;
    res_type.width = width;
+   res_type.length = 1;
+
+   return res_type;
+}
+
+
+/** Create vector of float type */
+static INLINE struct lp_type
+lp_type_float_vec(unsigned width)
+{
+   struct lp_type res_type;
+
+   memset(&res_type, 0, sizeof res_type);
+   res_type.floating = TRUE;
+   res_type.sign = TRUE;
+   res_type.width = width;
    res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
 
    return res_type;
 }
 
 
+/** Create scalar int type */
 static INLINE struct lp_type
 lp_type_int(unsigned width)
 {
@@ -162,12 +180,28 @@ lp_type_int(unsigned width)
    memset(&res_type, 0, sizeof res_type);
    res_type.sign = TRUE;
    res_type.width = width;
+   res_type.length = 1;
+
+   return res_type;
+}
+
+
+/** Create vector int type */
+static INLINE struct lp_type
+lp_type_int_vec(unsigned width)
+{
+   struct lp_type res_type;
+
+   memset(&res_type, 0, sizeof res_type);
+   res_type.sign = TRUE;
+   res_type.width = width;
    res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
 
    return res_type;
 }
 
 
+/** Create scalar uint type */
 static INLINE struct lp_type
 lp_type_uint(unsigned width)
 {
@@ -175,6 +209,20 @@ lp_type_uint(unsigned width)
 
    memset(&res_type, 0, sizeof res_type);
    res_type.width = width;
+   res_type.length = 1;
+
+   return res_type;
+}
+
+
+/** Create vector uint type */
+static INLINE struct lp_type
+lp_type_uint_vec(unsigned width)
+{
+   struct lp_type res_type;
+
+   memset(&res_type, 0, sizeof res_type);
+   res_type.width = width;
    res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
 
    return res_type;
@@ -257,6 +305,10 @@ lp_build_int32_vec4_type(void);
 
 
 struct lp_type
+lp_uint_type(struct lp_type type);
+
+
+struct lp_type
 lp_int_type(struct lp_type type);
 
 
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
index 95eb5f6563..d97f749b6e 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
@@ -696,7 +696,7 @@ fenced_buffer_map(struct pb_buffer *buf,
        * Don't wait for the GPU to finish accessing it, if blocking is forbidden.
        */
       if((flags & PIPE_BUFFER_USAGE_DONTBLOCK) &&
-          ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) {
+          ops->fence_signalled(ops, fenced_buf->fence, 0) != 0) {
          goto done;
       }
 
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
index 53bc019a20..86f9266c95 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -294,7 +294,7 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
       LIST_DEL(&buf->head);
       pipe_mutex_unlock(mgr->mutex);
       /* Increase refcount */
-      pipe_reference(NULL, &buf->base.base.reference);
+      pipe_reference_init(&buf->base.base.reference, 1);
       return &buf->base;
    }
    
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
index c2593cf165..a5dbded2bc 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
@@ -179,7 +179,9 @@ pb_debug_buffer_check(struct pb_debug_buffer *buf)
 {
    uint8_t *map;
    
-   map = pb_map(buf->buffer, PIPE_BUFFER_USAGE_CPU_READ);
+   map = pb_map(buf->buffer,
+                PIPE_BUFFER_USAGE_CPU_READ |
+                PIPE_BUFFER_USAGE_UNSYNCHRONIZED);
    assert(map);
    if(map) {
       boolean underflow, overflow;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
index c445cb578b..24e2820f88 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
@@ -483,11 +483,15 @@ pb_slab_range_manager_create_buffer(struct pb_manager *_mgr,
 {
    struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr);
    pb_size bufSize;
+   pb_size reqSize = size;
    unsigned i;
 
+   if(desc->alignment > reqSize)
+	   reqSize = desc->alignment;
+
    bufSize = mgr->minBufSize;
    for (i = 0; i < mgr->numBuckets; ++i) {
-      if(bufSize >= size)
+      if(bufSize >= reqSize)
 	 return mgr->buckets[i]->create_buffer(mgr->buckets[i], size, desc);
       bufSize *= 2;
    }
diff --git a/src/gallium/auxiliary/target-helpers/wrap_screen.c b/src/gallium/auxiliary/target-helpers/wrap_screen.c
new file mode 100644
index 0000000000..5fe3013938
--- /dev/null
+++ b/src/gallium/auxiliary/target-helpers/wrap_screen.c
@@ -0,0 +1,65 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * 
+ **************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell
+ */
+
+#include "target-helpers/wrap_screen.h"
+#include "trace/tr_public.h"
+#include "identity/id_public.h"
+#include "util/u_debug.h"
+
+
+/* Centralized code to inject common wrapping layers:
+ */
+struct pipe_screen *
+gallium_wrap_screen( struct pipe_screen *screen )
+{
+   /* Screen wrapping functions are required not to fail.  If it is
+    * impossible to wrap a screen, the unwrapped screen should be
+    * returned instead.  Any failure condition should be returned in
+    * an OUT argument.
+    *
+    * Otherwise it is really messy trying to clean up in this code.
+    */
+   if (debug_get_bool_option("GALLIUM_WRAP", FALSE)) {
+      screen = identity_screen_create(screen);
+   }
+
+   if (debug_get_bool_option("GALLIUM_TRACE", FALSE)) {
+      screen = trace_screen_create( screen );
+   }
+
+   return screen;
+}
+
+
+
+
diff --git a/src/gallium/auxiliary/target-helpers/wrap_screen.h b/src/gallium/auxiliary/target-helpers/wrap_screen.h
new file mode 100644
index 0000000000..7e76beb7c5
--- /dev/null
+++ b/src/gallium/auxiliary/target-helpers/wrap_screen.h
@@ -0,0 +1,16 @@
+#ifndef WRAP_SCREEN_HELPER_H
+#define WRAP_SCREEN_HELPER_H
+
+#include "pipe/p_compiler.h"
+
+struct pipe_screen;
+
+/* Centralized code to inject common wrapping layers.  Other layers
+ * can be introduced by specific targets, but these are the generally
+ * helpful ones we probably want everywhere.
+ */
+struct pipe_screen *
+gallium_wrap_screen( struct pipe_screen *screen );
+
+
+#endif
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index fdbf2b436e..f853ea2820 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -110,6 +110,42 @@ micro_ceil(union tgsi_exec_channel *dst,
 }
 
 static void
+micro_clamp(union tgsi_exec_channel *dst,
+            const union tgsi_exec_channel *src0,
+            const union tgsi_exec_channel *src1,
+            const union tgsi_exec_channel *src2)
+{
+   dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0];
+   dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1];
+   dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2];
+   dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3];
+}
+
+static void
+micro_cmp(union tgsi_exec_channel *dst,
+          const union tgsi_exec_channel *src0,
+          const union tgsi_exec_channel *src1,
+          const union tgsi_exec_channel *src2)
+{
+   dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0];
+   dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1];
+   dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2];
+   dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3];
+}
+
+static void
+micro_cnd(union tgsi_exec_channel *dst,
+          const union tgsi_exec_channel *src0,
+          const union tgsi_exec_channel *src1,
+          const union tgsi_exec_channel *src2)
+{
+   dst->f[0] = src2->f[0] > 0.5f ? src0->f[0] : src1->f[0];
+   dst->f[1] = src2->f[1] > 0.5f ? src0->f[1] : src1->f[1];
+   dst->f[2] = src2->f[2] > 0.5f ? src0->f[2] : src1->f[2];
+   dst->f[3] = src2->f[3] > 0.5f ? src0->f[3] : src1->f[3];
+}
+
+static void
 micro_cos(union tgsi_exec_channel *dst,
           const union tgsi_exec_channel *src)
 {
@@ -961,16 +997,6 @@ micro_pow(
 }
 
 static void
-micro_sqrt( union tgsi_exec_channel *dst,
-            const union tgsi_exec_channel *src )
-{
-   dst->f[0] = sqrtf( src->f[0] );
-   dst->f[1] = sqrtf( src->f[1] );
-   dst->f[2] = sqrtf( src->f[2] );
-   dst->f[3] = sqrtf( src->f[3] );
-}
-
-static void
 micro_sub(union tgsi_exec_channel *dst,
           const union tgsi_exec_channel *src0,
           const union tgsi_exec_channel *src1)
@@ -2663,15 +2689,7 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_CND:
-      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
-         FETCH(&r[0], 0, chan_index);
-         FETCH(&r[1], 1, chan_index);
-         FETCH(&r[2], 2, chan_index);
-         micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
-      }
-      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
-         STORE(&d[chan_index], 0, chan_index);
-      }
+      exec_vector_trinary(mach, inst, micro_cnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
       break;
 
    case TGSI_OPCODE_DP2A:
@@ -2683,16 +2701,7 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_CLAMP:
-      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
-         FETCH(&r[0], 0, chan_index);
-         FETCH(&r[1], 1, chan_index);
-         micro_max(&r[0], &r[0], &r[1]);
-         FETCH(&r[1], 2, chan_index);
-         micro_min(&d[chan_index], &r[0], &r[1]);
-      }
-      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
-         STORE(&d[chan_index], 0, chan_index);
-      }
+      exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
       break;
 
    case TGSI_OPCODE_FLR:
@@ -3086,15 +3095,7 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_CMP:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH(&r[0], 0, chan_index);
-         FETCH(&r[1], 1, chan_index);
-         FETCH(&r[2], 2, chan_index);
-         micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]);
-      }
-      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
-         STORE(&d[chan_index], 0, chan_index);
-      }
+      exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
       break;
 
    case TGSI_OPCODE_SCS:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 91e1b27da1..371f690b29 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -321,6 +321,9 @@ iter_instruction(
          reg,
          "destination",
          FALSE );
+      if (!inst->Dst[i].Register.WriteMask) {
+         report_error(ctx, "Destination register has empty writemask");
+      }
    }
    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
       scan_register *reg = create_scan_register_src(&inst->Src[i]);
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index 24727d4988..c9ec2b32bf 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -392,10 +392,10 @@ static fetch_func get_fetch_func( enum pipe_format format )
    case PIPE_FORMAT_R8G8B8A8_SSCALED:
       return &fetch_R8G8B8A8_SSCALED;
 
-   case PIPE_FORMAT_A8R8G8B8_UNORM:
+   case PIPE_FORMAT_B8G8R8A8_UNORM:
       return &fetch_A8R8G8B8_UNORM;
 
-   case PIPE_FORMAT_B8G8R8A8_UNORM:
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
       return &fetch_B8G8R8A8_UNORM;
 
    case PIPE_FORMAT_R32_FIXED:
@@ -551,10 +551,10 @@ static emit_func get_emit_func( enum pipe_format format )
    case PIPE_FORMAT_R8G8B8A8_SSCALED:
       return &emit_R8G8B8A8_SSCALED;
 
-   case PIPE_FORMAT_A8R8G8B8_UNORM:
+   case PIPE_FORMAT_B8G8R8A8_UNORM:
       return &emit_A8R8G8B8_UNORM;
 
-   case PIPE_FORMAT_B8G8R8A8_UNORM:
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
       return &emit_B8G8R8A8_UNORM;
 
    default:
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index c13e742738..03e093c11e 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -336,7 +336,7 @@ static boolean translate_attr( struct translate_sse *p,
    case PIPE_FORMAT_R32G32B32A32_FLOAT:
       emit_load_R32G32B32A32(p, dataXMM, srcECX);
       break;
-   case PIPE_FORMAT_B8G8R8A8_UNORM:
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
       emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
       emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
       break;
@@ -360,7 +360,7 @@ static boolean translate_attr( struct translate_sse *p,
    case PIPE_FORMAT_R32G32B32A32_FLOAT:
       emit_store_R32G32B32A32(p, dstEAX, dataXMM);
       break;
-   case PIPE_FORMAT_B8G8R8A8_UNORM:
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
       emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
       emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
       break;
diff --git a/src/gallium/auxiliary/util/.gitignore b/src/gallium/auxiliary/util/.gitignore
index 29c586c9b5..448d2f304f 100644
--- a/src/gallium/auxiliary/util/.gitignore
+++ b/src/gallium/auxiliary/util/.gitignore
@@ -1,2 +1,3 @@
 u_format_access.c
 u_format_table.c
+u_format_pack.h
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index f0bc58a558..4d0737ccd3 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -62,6 +62,8 @@ struct blit_state
    struct pipe_rasterizer_state rasterizer;
    struct pipe_sampler_state sampler;
    struct pipe_viewport_state viewport;
+   struct pipe_clip_state clip;
+   struct pipe_vertex_element velem[2];
 
    void *vs;
    void *fs[TGSI_WRITEMASK_XYZW + 1];
@@ -101,7 +103,6 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
    memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer));
    ctx->rasterizer.front_winding = PIPE_WINDING_CW;
    ctx->rasterizer.cull_mode = PIPE_WINDING_NONE;
-   ctx->rasterizer.bypass_vs_clip_and_viewport = 1;
    ctx->rasterizer.gl_rasterization_rules = 1;
 
    /* samplers */
@@ -114,6 +115,14 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
    ctx->sampler.mag_img_filter = 0; /* set later */
    ctx->sampler.normalized_coords = 1;
 
+   /* vertex elements state */
+   memset(&ctx->velem[0], 0, sizeof(ctx->velem[0]) * 2);
+   for (i = 0; i < 2; i++) {
+      ctx->velem[i].src_offset = i * 4 * sizeof(float);
+      ctx->velem[i].instance_divisor = 0;
+      ctx->velem[i].vertex_buffer_index = 0;
+      ctx->velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   }
 
    /* vertex shader - still required to provide the linkage between
     * fragment shader input semantics and vertex_element/buffers.
@@ -266,7 +275,6 @@ regions_overlap(int srcX0, int srcY0,
  * \param writemask  controls which channels in the dest surface are sourced
  *                   from the src surface.  Disabled channels are sourced
  *                   from (0,0,0,1).
- * XXX what about clipping???
  * XXX need some control over blitting Z and/or stencil.
  */
 void
@@ -407,14 +415,19 @@ util_blit_pixels_writemask(struct blit_state *ctx,
    cso_save_rasterizer(ctx->cso);
    cso_save_samplers(ctx->cso);
    cso_save_sampler_textures(ctx->cso);
+   cso_save_viewport(ctx->cso);
    cso_save_framebuffer(ctx->cso);
    cso_save_fragment_shader(ctx->cso);
    cso_save_vertex_shader(ctx->cso);
+   cso_save_clip(ctx->cso);
+   cso_save_vertex_elements(ctx->cso);
 
    /* set misc state we care about */
    cso_set_blend(ctx->cso, &ctx->blend);
    cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
    cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
+   cso_set_clip(ctx->cso, &ctx->clip);
+   cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
 
    /* sampler */
    ctx->sampler.min_img_filter = filter;
@@ -422,6 +435,17 @@ util_blit_pixels_writemask(struct blit_state *ctx,
    cso_single_sampler(ctx->cso, 0, &ctx->sampler);
    cso_single_sampler_done(ctx->cso);
 
+   /* viewport */
+   ctx->viewport.scale[0] = 0.5f * dst->width;
+   ctx->viewport.scale[1] = 0.5f * dst->height;
+   ctx->viewport.scale[2] = 0.5f;
+   ctx->viewport.scale[3] = 1.0f;
+   ctx->viewport.translate[0] = 0.5f * dst->width;
+   ctx->viewport.translate[1] = 0.5f * dst->height;
+   ctx->viewport.translate[2] = 0.5f;
+   ctx->viewport.translate[3] = 0.0f;
+   cso_set_viewport(ctx->cso, &ctx->viewport);
+
    /* texture */
    cso_set_sampler_textures(ctx->cso, 1, &tex);
 
@@ -444,8 +468,10 @@ util_blit_pixels_writemask(struct blit_state *ctx,
 
    /* draw quad */
    offset = setup_vertex_data_tex(ctx,
-                                  (float) dstX0, (float) dstY0, 
-                                  (float) dstX1, (float) dstY1,
+                                  (float) dstX0 / dst->width * 2.0f - 1.0f,
+                                  (float) dstY0 / dst->height * 2.0f - 1.0f,
+                                  (float) dstX1 / dst->width * 2.0f - 1.0f,
+                                  (float) dstY1 / dst->height * 2.0f - 1.0f,
                                   s0, t0,
                                   s1, t1,
                                   z);
@@ -461,9 +487,12 @@ util_blit_pixels_writemask(struct blit_state *ctx,
    cso_restore_rasterizer(ctx->cso);
    cso_restore_samplers(ctx->cso);
    cso_restore_sampler_textures(ctx->cso);
+   cso_restore_viewport(ctx->cso);
    cso_restore_framebuffer(ctx->cso);
    cso_restore_fragment_shader(ctx->cso);
    cso_restore_vertex_shader(ctx->cso);
+   cso_restore_clip(ctx->cso);
+   cso_restore_vertex_elements(ctx->cso);
 
    pipe_texture_reference(&tex, NULL);
 }
@@ -547,11 +576,15 @@ util_blit_pixels_tex(struct blit_state *ctx,
    cso_save_framebuffer(ctx->cso);
    cso_save_fragment_shader(ctx->cso);
    cso_save_vertex_shader(ctx->cso);
+   cso_save_clip(ctx->cso);
+   cso_save_vertex_elements(ctx->cso);
 
    /* set misc state we care about */
    cso_set_blend(ctx->cso, &ctx->blend);
    cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
    cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
+   cso_set_clip(ctx->cso, &ctx->clip);
+   cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
 
    /* sampler */
    ctx->sampler.min_img_filter = filter;
@@ -559,6 +592,17 @@ util_blit_pixels_tex(struct blit_state *ctx,
    cso_single_sampler(ctx->cso, 0, &ctx->sampler);
    cso_single_sampler_done(ctx->cso);
 
+   /* viewport */
+   ctx->viewport.scale[0] = 0.5f * dst->width;
+   ctx->viewport.scale[1] = 0.5f * dst->height;
+   ctx->viewport.scale[2] = 0.5f;
+   ctx->viewport.scale[3] = 1.0f;
+   ctx->viewport.translate[0] = 0.5f * dst->width;
+   ctx->viewport.translate[1] = 0.5f * dst->height;
+   ctx->viewport.translate[2] = 0.5f;
+   ctx->viewport.translate[3] = 0.0f;
+   cso_set_viewport(ctx->cso, &ctx->viewport);
+
    /* texture */
    cso_set_sampler_textures(ctx->cso, 1, &tex);
 
@@ -576,8 +620,10 @@ util_blit_pixels_tex(struct blit_state *ctx,
 
    /* draw quad */
    offset = setup_vertex_data_tex(ctx,
-                                  (float) dstX0, (float) dstY0,
-                                  (float) dstX1, (float) dstY1,
+                                  (float) dstX0 / dst->width * 2.0f - 1.0f,
+                                  (float) dstY0 / dst->height * 2.0f - 1.0f,
+                                  (float) dstX1 / dst->width * 2.0f - 1.0f,
+                                  (float) dstY1 / dst->height * 2.0f - 1.0f,
                                   s0, t0, s1, t1,
                                   z);
 
@@ -596,4 +642,6 @@ util_blit_pixels_tex(struct blit_state *ctx,
    cso_restore_framebuffer(ctx->cso);
    cso_restore_fragment_shader(ctx->cso);
    cso_restore_vertex_shader(ctx->cso);
+   cso_restore_clip(ctx->cso);
+   cso_restore_vertex_elements(ctx->cso);
 }
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 29060a1eff..71cf9525d3 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -89,6 +89,8 @@ struct blitter_context_priv
    void *dsa_write_depth_keep_stencil;
    void *dsa_keep_depth_stencil;
 
+   void *velem_state;
+
    /* Sampler state for clamping to a miplevel. */
    void *sampler_state[PIPE_MAX_TEXTURE_LEVELS];
 
@@ -96,6 +98,12 @@ struct blitter_context_priv
    void *rs_state;
 
    struct pipe_sampler_view *sampler_view;
+
+   /* Viewport state. */
+   struct pipe_viewport_state viewport;
+
+   /* Clip state. */
+   struct pipe_clip_state clip;
 };
 
 struct blitter_context *util_blitter_create(struct pipe_context *pipe)
@@ -105,6 +113,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
    struct pipe_depth_stencil_alpha_state dsa = { { 0 } };
    struct pipe_rasterizer_state rs_state = { 0 };
    struct pipe_sampler_state *sampler_state;
+   struct pipe_vertex_element velem[2];
    unsigned i;
 
    ctx = CALLOC_STRUCT(blitter_context_priv);
@@ -119,6 +128,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
    ctx->blitter.saved_rs_state = INVALID_PTR;
    ctx->blitter.saved_fs = INVALID_PTR;
    ctx->blitter.saved_vs = INVALID_PTR;
+   ctx->blitter.saved_velem_state = INVALID_PTR;
    ctx->blitter.saved_fb_state.nr_cbufs = ~0;
    ctx->blitter.saved_num_sampler_views = ~0;
    ctx->blitter.saved_num_sampler_states = ~0;
@@ -163,11 +173,20 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
    memset(&rs_state, 0, sizeof(rs_state));
    rs_state.front_winding = PIPE_WINDING_CW;
    rs_state.cull_mode = PIPE_WINDING_NONE;
-   rs_state.bypass_vs_clip_and_viewport = 1;
    rs_state.gl_rasterization_rules = 1;
    rs_state.flatshade = 1;
    ctx->rs_state = pipe->create_rasterizer_state(pipe, &rs_state);
 
+   /* vertex elements state */
+   memset(&velem[0], 0, sizeof(velem[0]) * 2);
+   for (i = 0; i < 2; i++) {
+      velem[i].src_offset = i * 4 * sizeof(float);
+      velem[i].instance_divisor = 0;
+      velem[i].vertex_buffer_index = 0;
+      velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   }
+   ctx->velem_state = pipe->create_vertex_elements_state(pipe, 2, &velem[0]);
+
    /* fragment shaders are created on-demand */
 
    /* vertex shaders */
@@ -217,6 +236,7 @@ void util_blitter_destroy(struct blitter_context *blitter)
    pipe->delete_rasterizer_state(pipe, ctx->rs_state);
    pipe->delete_vs_state(pipe, ctx->vs_col);
    pipe->delete_vs_state(pipe, ctx->vs_tex);
+   pipe->delete_vertex_elements_state(pipe, ctx->velem_state);
 
    for (i = 0; i < PIPE_MAX_TEXTURE_TYPES; i++) {
       if (ctx->fs_texfetch_col[i])
@@ -248,7 +268,8 @@ static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx)
           ctx->blitter.saved_dsa_state != INVALID_PTR &&
           ctx->blitter.saved_rs_state != INVALID_PTR &&
           ctx->blitter.saved_fs != INVALID_PTR &&
-          ctx->blitter.saved_vs != INVALID_PTR);
+          ctx->blitter.saved_vs != INVALID_PTR &&
+          ctx->blitter.saved_velem_state != INVALID_PTR);
 }
 
 static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
@@ -261,15 +282,20 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
    pipe->bind_rasterizer_state(pipe, ctx->blitter.saved_rs_state);
    pipe->bind_fs_state(pipe, ctx->blitter.saved_fs);
    pipe->bind_vs_state(pipe, ctx->blitter.saved_vs);
+   pipe->bind_vertex_elements_state(pipe, ctx->blitter.saved_velem_state);
 
    ctx->blitter.saved_blend_state = INVALID_PTR;
    ctx->blitter.saved_dsa_state = INVALID_PTR;
    ctx->blitter.saved_rs_state = INVALID_PTR;
    ctx->blitter.saved_fs = INVALID_PTR;
    ctx->blitter.saved_vs = INVALID_PTR;
+   ctx->blitter.saved_velem_state = INVALID_PTR;
 
    pipe->set_stencil_ref(pipe, &ctx->blitter.saved_stencil_ref);
 
+   pipe->set_viewport_state(pipe, &ctx->blitter.saved_viewport);
+   pipe->set_clip_state(pipe, &ctx->blitter.saved_clip);
+
    /* restore the state objects which are required to be saved before copy/fill
     */
    if (ctx->blitter.saved_fb_state.nr_cbufs != ~0) {
@@ -295,25 +321,40 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
 static void blitter_set_rectangle(struct blitter_context_priv *ctx,
                                   unsigned x1, unsigned y1,
                                   unsigned x2, unsigned y2,
+                                  unsigned width, unsigned height,
                                   float depth)
 {
    int i;
 
    /* set vertex positions */
-   ctx->vertices[0][0][0] = x1; /*v0.x*/
-   ctx->vertices[0][0][1] = y1; /*v0.y*/
+   ctx->vertices[0][0][0] = (float)x1 / width * 2.0f - 1.0f; /*v0.x*/
+   ctx->vertices[0][0][1] = (float)y1 / height * 2.0f - 1.0f; /*v0.y*/
 
-   ctx->vertices[1][0][0] = x2; /*v1.x*/
-   ctx->vertices[1][0][1] = y1; /*v1.y*/
+   ctx->vertices[1][0][0] = (float)x2 / width * 2.0f - 1.0f; /*v1.x*/
+   ctx->vertices[1][0][1] = (float)y1 / height * 2.0f - 1.0f; /*v1.y*/
 
-   ctx->vertices[2][0][0] = x2; /*v2.x*/
-   ctx->vertices[2][0][1] = y2; /*v2.y*/
+   ctx->vertices[2][0][0] = (float)x2 / width * 2.0f - 1.0f; /*v2.x*/
+   ctx->vertices[2][0][1] = (float)y2 / height * 2.0f - 1.0f; /*v2.y*/
 
-   ctx->vertices[3][0][0] = x1; /*v3.x*/
-   ctx->vertices[3][0][1] = y2; /*v3.y*/
+   ctx->vertices[3][0][0] = (float)x1 / width * 2.0f - 1.0f; /*v3.x*/
+   ctx->vertices[3][0][1] = (float)y2 / height * 2.0f - 1.0f; /*v3.y*/
 
    for (i = 0; i < 4; i++)
       ctx->vertices[i][0][2] = depth; /*z*/
+
+   /* viewport */
+   ctx->viewport.scale[0] = 0.5f * width;
+   ctx->viewport.scale[1] = 0.5f * height;
+   ctx->viewport.scale[2] = 1.0f;
+   ctx->viewport.scale[3] = 1.0f;
+   ctx->viewport.translate[0] = 0.5f * width;
+   ctx->viewport.translate[1] = 0.5f * height;
+   ctx->viewport.translate[2] = 0.0f;
+   ctx->viewport.translate[3] = 0.0f;
+   ctx->pipe->set_viewport_state(ctx->pipe, &ctx->viewport);
+
+   /* clip */
+   ctx->pipe->set_clip_state(ctx->pipe, &ctx->clip);
 }
 
 static void blitter_set_clear_color(struct blitter_context_priv *ctx,
@@ -553,11 +594,12 @@ void util_blitter_clear(struct blitter_context *blitter,
       pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
 
    pipe->bind_rasterizer_state(pipe, ctx->rs_state);
+   pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
    pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, num_cbufs));
    pipe->bind_vs_state(pipe, ctx->vs_col);
 
    blitter_set_clear_color(ctx, rgba);
-   blitter_set_rectangle(ctx, 0, 0, width, height, depth);
+   blitter_set_rectangle(ctx, 0, 0, width, height, width, height, depth);
    blitter_draw_quad(ctx);
    blitter_restore_CSOs(ctx);
 }
@@ -631,6 +673,7 @@ static void util_blitter_do_copy(struct blitter_context *blitter,
    pipe->bind_vs_state(pipe, ctx->vs_tex);
    pipe->bind_fragment_sampler_states(pipe, 1,
       blitter_get_sampler_state(ctx, src->level));
+   pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
    pipe->set_fragment_sampler_views(pipe, 1, &view);
    pipe->set_framebuffer_state(pipe, &fb_state);
 
@@ -653,7 +696,7 @@ static void util_blitter_do_copy(struct blitter_context *blitter,
          assert(0);
    }
 
-   blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, 0);
+   blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, dst->width, dst->height, 0);
    blitter_draw_quad(ctx);
 
 }
@@ -804,6 +847,7 @@ void util_blitter_fill(struct blitter_context *blitter,
    pipe->bind_rasterizer_state(pipe, ctx->rs_state);
    pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 1));
    pipe->bind_vs_state(pipe, ctx->vs_col);
+   pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
 
    /* set a framebuffer state */
    fb_state.width = dst->width;
@@ -814,7 +858,7 @@ void util_blitter_fill(struct blitter_context *blitter,
    pipe->set_framebuffer_state(pipe, &fb_state);
 
    blitter_set_clear_color(ctx, rgba);
-   blitter_set_rectangle(ctx, 0, 0, width, height, 0);
+   blitter_set_rectangle(ctx, 0, 0, width, height, dst->width, dst->height, 0);
    blitter_draw_quad(ctx);
    blitter_restore_CSOs(ctx);
 }
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index 9911948560..2ad7201a29 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -43,11 +43,14 @@ struct blitter_context
    /* Private members, really. */
    void *saved_blend_state;   /**< blend state */
    void *saved_dsa_state;     /**< depth stencil alpha state */
+   void *saved_velem_state;   /**< vertex elements state */
    void *saved_rs_state;      /**< rasterizer state */
    void *saved_fs, *saved_vs; /**< fragment shader, vertex shader */
 
    struct pipe_framebuffer_state saved_fb_state;  /**< framebuffer state */
    struct pipe_stencil_ref saved_stencil_ref;     /**< stencil ref */
+   struct pipe_viewport_state saved_viewport;
+   struct pipe_clip_state saved_clip;
 
    int saved_num_sampler_states;
    void *saved_sampler_states[PIPE_MAX_SAMPLERS];
@@ -171,6 +174,13 @@ void util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter,
 }
 
 static INLINE
+void util_blitter_save_vertex_elements(struct blitter_context *blitter,
+                                       void *state)
+{
+   blitter->saved_velem_state = state;
+}
+
+static INLINE
 void util_blitter_save_stencil_ref(struct blitter_context *blitter,
                                    const struct pipe_stencil_ref *state)
 {
@@ -206,6 +216,20 @@ void util_blitter_save_framebuffer(struct blitter_context *blitter,
 }
 
 static INLINE
+void util_blitter_save_viewport(struct blitter_context *blitter,
+                                struct pipe_viewport_state *state)
+{
+   blitter->saved_viewport = *state;
+}
+
+static INLINE
+void util_blitter_save_clip(struct blitter_context *blitter,
+                            struct pipe_clip_state *state)
+{
+   blitter->saved_clip = *state;
+}
+
+static INLINE
 void util_blitter_save_fragment_sampler_states(
                   struct blitter_context *blitter,
                   int num_sampler_states,
diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index 858d52c6ef..94be682c4b 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -387,7 +387,7 @@ void debug_dump_image(const char *prefix,
 
       /* XXX this is a hack */
       switch (format) {
-      case PIPE_FORMAT_A8R8G8B8_UNORM:
+      case PIPE_FORMAT_B8G8R8A8_UNORM:
          r = 2;
          g = 1;
          b = 0;
diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c
index 14506e8451..8c194102bf 100644
--- a/src/gallium/auxiliary/util/u_draw_quad.c
+++ b/src/gallium/auxiliary/util/u_draw_quad.c
@@ -45,8 +45,6 @@ util_draw_vertex_buffer(struct pipe_context *pipe,
                         uint num_attribs)
 {
    struct pipe_vertex_buffer vbuffer;
-   struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
-   uint i;
 
    assert(num_attribs <= PIPE_MAX_ATTRIBS);
 
@@ -58,15 +56,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe,
    vbuffer.max_index = num_verts - 1;
    pipe->set_vertex_buffers(pipe, 1, &vbuffer);
 
-   /* tell pipe about the vertex attributes */
-   for (i = 0; i < num_attribs; i++) {
-      velements[i].src_offset = i * 4 * sizeof(float);
-      velements[i].instance_divisor = 0;
-      velements[i].vertex_buffer_index = 0;
-      velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
-      velements[i].nr_components = 4;
-   }
-   pipe->set_vertex_elements(pipe, num_attribs, velements);
+   /* note: vertex elements already set by caller */
 
    /* draw */
    pipe->draw_arrays(pipe, prim_type, 0, num_verts);
diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c
index eaf4ec90f2..52cf3ef4ce 100644
--- a/src/gallium/auxiliary/util/u_dump_state.c
+++ b/src/gallium/auxiliary/util/u_dump_state.c
@@ -318,7 +318,6 @@ util_dump_rasterizer_state(struct os_stream *stream, const struct pipe_rasterize
    util_dump_member(stream, uint, state, line_stipple_factor);
    util_dump_member(stream, uint, state, line_stipple_pattern);
    util_dump_member(stream, bool, state, line_last_pixel);
-   util_dump_member(stream, bool, state, bypass_vs_clip_and_viewport);
    util_dump_member(stream, bool, state, flatshade_first);
    util_dump_member(stream, bool, state, gl_rasterization_rules);
 
@@ -701,7 +700,6 @@ util_dump_vertex_element(struct os_stream *stream, const struct pipe_vertex_elem
    util_dump_member(stream, uint, state, src_offset);
 
    util_dump_member(stream, uint, state, vertex_buffer_index);
-   util_dump_member(stream, uint, state, nr_components);
 
    util_dump_member(stream, format, state, src_format);
 
diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c
deleted file mode 100644
index e0724a1a8b..0000000000
--- a/src/gallium/auxiliary/util/u_format.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 Vmware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include "u_format.h"
-
-
-const struct util_format_description *
-util_format_description(enum pipe_format format)
-{
-   const struct util_format_description *desc;
-
-   if (format >= PIPE_FORMAT_COUNT) {
-      return NULL;
-   }
-
-   desc = &util_format_description_table[format];
-   assert(desc->format == format);
-
-   return desc;
-}
diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv
index 01f7931aed..96a0fa6550 100644
--- a/src/gallium/auxiliary/util/u_format.csv
+++ b/src/gallium/auxiliary/util/u_format.csv
@@ -1,109 +1,186 @@
-PIPE_FORMAT_A8R8G8B8_UNORM        , arith , 1, 1, un8 , un8 , un8 , un8 , zyxw, rgb
-PIPE_FORMAT_X8R8G8B8_UNORM        , arith , 1, 1, un8 , un8 , un8 , un8 , zyx1, rgb
-PIPE_FORMAT_B8G8R8A8_UNORM        , arith , 1, 1, un8 , un8 , un8 , un8 , yzwx, rgb
-PIPE_FORMAT_B8G8R8X8_UNORM        , arith , 1, 1, un8 , un8 , un8 , un8 , yzw1, rgb
-PIPE_FORMAT_A1R5G5B5_UNORM        , arith , 1, 1, un5 , un5 , un5 , un1 , zyxw, rgb
-PIPE_FORMAT_A4R4G4B4_UNORM        , arith , 1, 1, un4 , un4 , un4 , un4 , zyxw, rgb
-PIPE_FORMAT_R5G6B5_UNORM          , arith , 1, 1, un5 , un6 , un5 ,     , zyx1, rgb
-PIPE_FORMAT_A2B10G10R10_UNORM     , arith , 1, 1, un10, un10, un10, un2 , xyzw, rgb
-PIPE_FORMAT_L8_UNORM              , arith , 1, 1, un8 ,     ,     ,     , xxx1, rgb
-PIPE_FORMAT_A8_UNORM              , arith , 1, 1, un8 ,     ,     ,     , 000x, rgb
-PIPE_FORMAT_I8_UNORM              , arith , 1, 1, un8 ,     ,     ,     , xxxx, rgb
-PIPE_FORMAT_A8L8_UNORM            , arith , 1, 1, un8 , un8 ,     ,     , xxxy, rgb
-PIPE_FORMAT_L16_UNORM             , arith , 1, 1, un16,     ,     ,     , xxx1, rgb
-PIPE_FORMAT_YCBCR                 , yuv   , 2, 1, x32 ,     ,     ,     , xyz1, yuv
-PIPE_FORMAT_YCBCR_REV             , yuv   , 2, 1, x32 ,     ,     ,     , xyz1, yuv
-PIPE_FORMAT_Z16_UNORM             , array , 1, 1, un16,     ,     ,     , x___, zs 
-PIPE_FORMAT_Z32_UNORM             , array , 1, 1, un32,     ,     ,     , x___, zs 
-PIPE_FORMAT_Z32_FLOAT             , array , 1, 1, f32 ,     ,     ,     , x___, zs 
-PIPE_FORMAT_S8Z24_UNORM           , arith , 1, 1, un24, un8 ,     ,     , xy__, zs 
-PIPE_FORMAT_Z24S8_UNORM           , arith , 1, 1, un8 , un24,     ,     , yx__, zs 
-PIPE_FORMAT_X8Z24_UNORM           , arith , 1, 1, un24, un8 ,     ,     , x___, zs 
-PIPE_FORMAT_Z24X8_UNORM           , arith , 1, 1, un8 , un24,     ,     , y___, zs 
-PIPE_FORMAT_S8_UNORM              , array , 1, 1, un8 ,     ,     ,     , _x__, zs 
-PIPE_FORMAT_R64_FLOAT             , array , 1, 1, f64 ,     ,     ,     , x001, rgb
-PIPE_FORMAT_R64G64_FLOAT          , array , 1, 1, f64 , f64 ,     ,     , xy01, rgb
-PIPE_FORMAT_R64G64B64_FLOAT       , array , 1, 1, f64 , f64 , f64 ,     , xyz1, rgb
-PIPE_FORMAT_R64G64B64A64_FLOAT    , array , 1, 1, f64 , f64 , f64 , f64 , xyzw, rgb
-PIPE_FORMAT_R32_FLOAT             , array , 1, 1, f32 ,     ,     ,     , x001, rgb
-PIPE_FORMAT_R32G32_FLOAT          , array , 1, 1, f32 , f32 ,     ,     , xy01, rgb
-PIPE_FORMAT_R32G32B32_FLOAT       , array , 1, 1, f32 , f32 , f32 ,     , xyz1, rgb
-PIPE_FORMAT_R32G32B32A32_FLOAT    , array , 1, 1, f32 , f32 , f32 , f32 , xyzw, rgb
-PIPE_FORMAT_R32_UNORM             , array , 1, 1, un32,     ,     ,     , x001, rgb
-PIPE_FORMAT_R32G32_UNORM          , array , 1, 1, un32, un32,     ,     , xy01, rgb
-PIPE_FORMAT_R32G32B32_UNORM       , array , 1, 1, un32, un32, un32,     , xyz1, rgb
-PIPE_FORMAT_R32G32B32A32_UNORM    , array , 1, 1, un32, un32, un32, un32, xyzw, rgb
-PIPE_FORMAT_R32_USCALED           , array , 1, 1, u32 ,     ,     ,     , x001, rgb
-PIPE_FORMAT_R32G32_USCALED        , array , 1, 1, u32 , u32 ,     ,     , xy01, rgb
-PIPE_FORMAT_R32G32B32_USCALED     , array , 1, 1, u32 , u32 , u32 ,     , xyz1, rgb
-PIPE_FORMAT_R32G32B32A32_USCALED  , array , 1, 1, u32 , u32 , u32 , u32 , xyzw, rgb
-PIPE_FORMAT_R32_SNORM             , array , 1, 1, sn32,     ,     ,     , x001, rgb
-PIPE_FORMAT_R32G32_SNORM          , array , 1, 1, sn32, sn32,     ,     , xy01, rgb
-PIPE_FORMAT_R32G32B32_SNORM       , array , 1, 1, sn32, sn32, sn32,     , xyz1, rgb
-PIPE_FORMAT_R32G32B32A32_SNORM    , array , 1, 1, sn32, sn32, sn32, sn32, xyzw, rgb
-PIPE_FORMAT_R32_SSCALED           , array , 1, 1, s32 ,     ,     ,     , x001, rgb
-PIPE_FORMAT_R32G32_SSCALED        , array , 1, 1, s32 , s32 ,     ,     , xy01, rgb
-PIPE_FORMAT_R32G32B32_SSCALED     , array , 1, 1, s32 , s32 , s32 ,     , xyz1, rgb
-PIPE_FORMAT_R32G32B32A32_SSCALED  , array , 1, 1, s32 , s32 , s32 , s32 , xyzw, rgb
-PIPE_FORMAT_R16_UNORM             , array , 1, 1, un16,     ,     ,     , x001, rgb
-PIPE_FORMAT_R16G16_UNORM          , array , 1, 1, un16, un16,     ,     , xy01, rgb
-PIPE_FORMAT_R16G16B16_UNORM       , array , 1, 1, un16, un16, un16,     , xyz1, rgb
-PIPE_FORMAT_R16G16B16A16_UNORM    , array , 1, 1, un16, un16, un16, un16, xyzw, rgb
-PIPE_FORMAT_R16_USCALED           , array , 1, 1, u16 ,     ,     ,     , x001, rgb
-PIPE_FORMAT_R16G16_USCALED        , array , 1, 1, u16 , u16 ,     ,     , xy01, rgb
-PIPE_FORMAT_R16G16B16_USCALED     , array , 1, 1, u16 , u16 , u16 ,     , xyz1, rgb
-PIPE_FORMAT_R16G16B16A16_USCALED  , array , 1, 1, u16 , u16 , u16 , u16 , xyzw, rgb
-PIPE_FORMAT_R16_SNORM             , array , 1, 1, sn16,     ,     ,     , x001, rgb
-PIPE_FORMAT_R16G16_SNORM          , array , 1, 1, sn16, sn16,     ,     , xy01, rgb
-PIPE_FORMAT_R16G16B16_SNORM       , array , 1, 1, sn16, sn16, sn16,     , xyz1, rgb
-PIPE_FORMAT_R16G16B16A16_SNORM    , array , 1, 1, sn16, sn16, sn16, sn16, xyzw, rgb
-PIPE_FORMAT_R16_SSCALED           , array , 1, 1, s16 ,     ,     ,     , x001, rgb
-PIPE_FORMAT_R16G16_SSCALED        , array , 1, 1, s16 , s16 ,     ,     , xy01, rgb
-PIPE_FORMAT_R16G16B16_SSCALED     , array , 1, 1, s16 , s16 , s16 ,     , xyz1, rgb
-PIPE_FORMAT_R16G16B16A16_SSCALED  , array , 1, 1, s16 , s16 , s16 , s16 , xyzw, rgb
-PIPE_FORMAT_R8_UNORM              , array , 1, 1, un8 ,     ,     ,     , x001, rgb
-PIPE_FORMAT_R8G8_UNORM            , array , 1, 1, un8 , un8 ,     ,     , yx01, rgb
-PIPE_FORMAT_R8G8B8_UNORM          , array , 1, 1, un8 , un8 , un8 ,     , zyx1, rgb
-PIPE_FORMAT_R8G8B8A8_UNORM        , array , 1, 1, un8 , un8 , un8 , un8 , wzyx, rgb
-PIPE_FORMAT_R8G8B8X8_UNORM        , array , 1, 1, un8 , un8 , un8 , un8 , wzy1, rgb
-PIPE_FORMAT_R8_USCALED            , array , 1, 1, u8  ,     ,     ,     , x001, rgb
-PIPE_FORMAT_R8G8_USCALED          , array , 1, 1, u8  , u8  ,     ,     , xy01, rgb
-PIPE_FORMAT_R8G8B8_USCALED        , array , 1, 1, u8  , u8  , u8  ,     , xyz1, rgb
-PIPE_FORMAT_R8G8B8A8_USCALED      , array , 1, 1, u8  , u8  , u8  , u8  , xyzw, rgb
-PIPE_FORMAT_R8G8B8X8_USCALED      , array , 1, 1, u8  , u8  , u8  , u8  , xyz1, rgb
-PIPE_FORMAT_R8_SNORM              , array , 1, 1, sn8 ,     ,     ,     , x001, rgb
-PIPE_FORMAT_R8G8_SNORM            , array , 1, 1, sn8 , sn8 ,     ,     , xy01, rgb
-PIPE_FORMAT_R8G8B8_SNORM          , array , 1, 1, sn8 , sn8 , sn8 ,     , xyz1, rgb
-PIPE_FORMAT_R8G8B8A8_SNORM        , array , 1, 1, sn8 , sn8 , sn8 , sn8 , xyzw, rgb
-PIPE_FORMAT_R8G8B8X8_SNORM        , array , 1, 1, sn8 , sn8 , sn8 , sn8 , xyz1, rgb
-PIPE_FORMAT_B6G5R5_SNORM          , arith , 1, 1, sn5 , sn5 , sn6 ,     , xyz1, rgb
-PIPE_FORMAT_A8B8G8R8_SNORM        , array , 1, 1, sn8 , sn8 , sn8 , sn8 , wzyx, rgb
-PIPE_FORMAT_X8B8G8R8_SNORM        , array , 1, 1, sn8 , sn8 , sn8 , sn8 , wzy1, rgb
-PIPE_FORMAT_R8_SSCALED            , array , 1, 1, s8  ,     ,     ,     , x001, rgb
-PIPE_FORMAT_R8G8_SSCALED          , array , 1, 1, s8  , s8  ,     ,     , xy01, rgb
-PIPE_FORMAT_R8G8B8_SSCALED        , array , 1, 1, s8  , s8  , s8  ,     , xyz1, rgb
-PIPE_FORMAT_R8G8B8A8_SSCALED      , array , 1, 1, s8  , s8  , s8  , s8  , xyzw, rgb
-PIPE_FORMAT_R8G8B8X8_SSCALED      , array , 1, 1, s8  , s8  , s8  , s8  , xyz1, rgb
-PIPE_FORMAT_R32_FIXED             , array , 1, 1, h32 ,     ,     ,     , x001, rgb
-PIPE_FORMAT_R32G32_FIXED          , array , 1, 1, h32 , h32 ,     ,     , xy01, rgb
-PIPE_FORMAT_R32G32B32_FIXED       , array , 1, 1, h32 , h32 , h32 ,     , xyz1, rgb
-PIPE_FORMAT_R32G32B32A32_FIXED    , array , 1, 1, h32 , h32 , h32 , h32 , xyzw, rgb
-PIPE_FORMAT_L8_SRGB               , arith , 1, 1, u8  ,     ,     ,     , xxx1, srgb 
-PIPE_FORMAT_A8L8_SRGB             , arith , 1, 1, u8  , u8  ,     ,     , xxxy, srgb 
-PIPE_FORMAT_R8G8B8_SRGB           , array , 1, 1, u8  , u8  , u8  ,     , xyz1, srgb 
-PIPE_FORMAT_R8G8B8A8_SRGB         , array , 1, 1, u8  , u8  , u8  , u8  , xyzw, srgb 
-PIPE_FORMAT_R8G8B8X8_SRGB         , array , 1, 1, u8  , u8  , u8  , u8  , xyz1, srgb 
-PIPE_FORMAT_A8R8G8B8_SRGB         , array , 1, 1, u8  , u8  , u8  , u8  , yzwx, srgb 
-PIPE_FORMAT_X8R8G8B8_SRGB         , array , 1, 1, u8  , u8  , u8  , u8  , yzw1, srgb 
-PIPE_FORMAT_B8G8R8A8_SRGB         , array , 1, 1, u8  , u8  , u8  , u8  , zyxw, srgb 
-PIPE_FORMAT_B8G8R8X8_SRGB         , array , 1, 1, u8  , u8  , u8  , u8  , zyx1, srgb 
-PIPE_FORMAT_X8UB8UG8SR8S_NORM     , array , 1, 1, sn8 , sn8 , un8 , x8  , wzy1, rgb
-PIPE_FORMAT_B6UG5SR5S_NORM        , arith , 1, 1, sn5 , sn5 , un6 ,     , xyz1, rgb
-PIPE_FORMAT_DXT1_RGB              , dxt   , 4, 4, x64 ,     ,     ,     , xyz1, rgb
-PIPE_FORMAT_DXT1_RGBA             , dxt   , 4, 4, x64 ,     ,     ,     , xyzw, rgb
-PIPE_FORMAT_DXT3_RGBA             , dxt   , 4, 4, x128,     ,     ,     , xyzw, rgb
-PIPE_FORMAT_DXT5_RGBA             , dxt   , 4, 4, x128,     ,     ,     , xyzw, rgb
-PIPE_FORMAT_DXT1_SRGB             , dxt   , 4, 4, x64 ,     ,     ,     , xyz1, srgb
-PIPE_FORMAT_DXT1_SRGBA            , dxt   , 4, 4, x64 ,     ,     ,     , xyzw, srgb
-PIPE_FORMAT_DXT3_SRGBA            , dxt   , 4, 4, x128,     ,     ,     , xyzw, srgb
-PIPE_FORMAT_DXT5_SRGBA            , dxt   , 4, 4, x128,     ,     ,     , xyzw, srgb
+###########################################################################
+# 
+# Copyright 2009-2010 VMware, Inc.
+# All Rights Reserved.
+# 
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sub license, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+# 
+# The above copyright notice and this permission notice (including the
+# next paragraph) shall be included in all copies or substantial portions
+# of the Software.
+# 
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+# 
+###########################################################################
+
+# This CSV file has the input data for u_format.h's struct
+# util_format_description.
+#
+# Each format entry contains:
+# - name, per enum pipe_format
+# - layout, per enum util_format_layout, in shortened lower caps
+# - pixel block's width
+# - pixel block's height
+# - channel encoding (only meaningful for plain layout), containing for each
+#   channel the following information:
+#   - type, one of
+#     - 'x': void
+#     - 'u': unsigned
+#     - 's': signed
+#     - 'h': fixed
+#     - 'f': FLOAT
+#   - optionally followed by 'n' if it is normalized
+#   - number of bits
+# - channel swizzle 
+# - color space: rgb, yub, sz
+#
+# See also:
+# - http://msdn.microsoft.com/en-us/library/ee416489.aspx (D3D9)
+# - http://msdn.microsoft.com/en-us/library/ee415668.aspx (D3D9 -> D3D10)
+# - http://msdn.microsoft.com/en-us/library/ee418116.aspx (D3D10)
+#
+# Note that GL doesn't really specify the layout of internal formats. See
+# OpenGL 2.1 specification, Table 3.16, on the "Correspondence of sized
+# internal formats to base in- ternal formats, and desired component
+# resolutions for each sized internal format."
+
+# Typical rendertarget formats
+PIPE_FORMAT_B8G8R8A8_UNORM        , plain, 1, 1, un8 , un8 , un8 , un8 , zyxw, rgb
+PIPE_FORMAT_B8G8R8X8_UNORM        , plain, 1, 1, un8 , un8 , un8 , un8 , zyx1, rgb
+PIPE_FORMAT_A8R8G8B8_UNORM        , plain, 1, 1, un8 , un8 , un8 , un8 , yzwx, rgb
+PIPE_FORMAT_X8R8G8B8_UNORM        , plain, 1, 1, un8 , un8 , un8 , un8 , yzw1, rgb
+PIPE_FORMAT_A8B8G8R8_UNORM        , plain, 1, 1, un8 , un8 , un8 , un8 , wzyx, rgb
+PIPE_FORMAT_X8B8G8R8_UNORM        , plain, 1, 1, un8 , un8 , un8 , un8 , wzy1, rgb
+PIPE_FORMAT_B5G5R5A1_UNORM        , plain, 1, 1, un5 , un5 , un5 , un1 , zyxw, rgb
+PIPE_FORMAT_B4G4R4A4_UNORM        , plain, 1, 1, un4 , un4 , un4 , un4 , zyxw, rgb
+PIPE_FORMAT_B5G6R5_UNORM          , plain, 1, 1, un5 , un6 , un5 ,     , zyx1, rgb
+PIPE_FORMAT_R10G10B10A2_UNORM     , plain, 1, 1, un10, un10, un10, un2 , xyzw, rgb
+
+# Luminance/Intensity/Alpha formats
+PIPE_FORMAT_L8_UNORM              , plain, 1, 1, un8 ,     ,     ,     , xxx1, rgb
+PIPE_FORMAT_A8_UNORM              , plain, 1, 1, un8 ,     ,     ,     , 000x, rgb
+PIPE_FORMAT_I8_UNORM              , plain, 1, 1, un8 ,     ,     ,     , xxxx, rgb
+PIPE_FORMAT_L8A8_UNORM            , plain, 1, 1, un8 , un8 ,     ,     , xxxy, rgb
+PIPE_FORMAT_L16_UNORM             , plain, 1, 1, un16,     ,     ,     , xxx1, rgb
+
+# SRGB formats
+PIPE_FORMAT_L8_SRGB               , plain, 1, 1, un8 ,     ,     ,     , xxx1, srgb 
+PIPE_FORMAT_L8A8_SRGB             , plain, 1, 1, un8 , un8 ,     ,     , xxxy, srgb 
+PIPE_FORMAT_R8G8B8_SRGB           , plain, 1, 1, un8 , un8 , un8 ,     , xyz1, srgb 
+PIPE_FORMAT_A8B8G8R8_SRGB         , plain, 1, 1, un8 , un8 , un8 , un8 , wzyx, srgb
+PIPE_FORMAT_X8B8G8R8_SRGB         , plain, 1, 1, un8 , un8 , un8 , un8 , wzy1, srgb
+PIPE_FORMAT_B8G8R8A8_SRGB         , plain, 1, 1, un8 , un8 , un8 , un8 , zyxw, srgb
+PIPE_FORMAT_B8G8R8X8_SRGB         , plain, 1, 1, un8 , un8 , un8 , un8 , zyx1, srgb
+PIPE_FORMAT_A8R8G8B8_SRGB         , plain, 1, 1, un8 , un8 , un8 , un8 , yzwx, srgb
+PIPE_FORMAT_X8R8G8B8_SRGB         , plain, 1, 1, un8 , un8 , un8 , un8 , yzw1, srgb
+
+# Mixed-sign formats (typically used for bump map textures)
+PIPE_FORMAT_R8SG8SB8UX8U_NORM     , plain, 1, 1, sn8 , sn8 , un8 , x8  , xyz1, rgb
+PIPE_FORMAT_R5SG5SB6U_NORM        , plain, 1, 1, sn5 , sn5 , un6 ,     , xyz1, rgb
+
+# Depth-stencil formats
+PIPE_FORMAT_S8_UNORM              , plain, 1, 1, un8 ,     ,     ,     , _x__, zs 
+PIPE_FORMAT_Z16_UNORM             , plain, 1, 1, un16,     ,     ,     , x___, zs 
+PIPE_FORMAT_Z32_UNORM             , plain, 1, 1, un32,     ,     ,     , x___, zs 
+PIPE_FORMAT_Z32_FLOAT             , plain, 1, 1, f32 ,     ,     ,     , x___, zs 
+PIPE_FORMAT_Z24S8_UNORM           , plain, 1, 1, un24, un8 ,     ,     , xy__, zs 
+PIPE_FORMAT_S8Z24_UNORM           , plain, 1, 1, un8 , un24,     ,     , yx__, zs 
+PIPE_FORMAT_Z24X8_UNORM           , plain, 1, 1, un24, un8 ,     ,     , x___, zs 
+PIPE_FORMAT_X8Z24_UNORM           , plain, 1, 1, un8 , un24,     ,     , y___, zs 
+
+# YUV formats
+# http://www.fourcc.org/yuv.php#UYVY
+PIPE_FORMAT_UYVY                 , subsampled, 2, 1, x32 ,     ,     ,     , xyz1, yuv
+# http://www.fourcc.org/yuv.php#YUYV (a.k.a http://www.fourcc.org/yuv.php#YUY2)
+# XXX: u_tile.c's ycbcr_get_tile_rgba actually interprets it as VYUY but the 
+# intent should be to match D3DFMT_YUY2
+PIPE_FORMAT_YUYV                 , subsampled, 2, 1, x32 ,     ,     ,     , xyz1, yuv
+
+# Compressed formats
+PIPE_FORMAT_DXT1_RGB              , compressed, 4, 4, x64 ,     ,     ,     , xyz1, rgb
+PIPE_FORMAT_DXT1_RGBA             , compressed, 4, 4, x64 ,     ,     ,     , xyzw, rgb
+PIPE_FORMAT_DXT3_RGBA             , compressed, 4, 4, x128,     ,     ,     , xyzw, rgb
+PIPE_FORMAT_DXT5_RGBA             , compressed, 4, 4, x128,     ,     ,     , xyzw, rgb
+PIPE_FORMAT_DXT1_SRGB             , compressed, 4, 4, x64 ,     ,     ,     , xyz1, srgb
+PIPE_FORMAT_DXT1_SRGBA            , compressed, 4, 4, x64 ,     ,     ,     , xyzw, srgb
+PIPE_FORMAT_DXT3_SRGBA            , compressed, 4, 4, x128,     ,     ,     , xyzw, srgb
+PIPE_FORMAT_DXT5_SRGBA            , compressed, 4, 4, x128,     ,     ,     , xyzw, srgb
+
+# Straightforward D3D10-like formats (also used for 
+# vertex buffer element description)
+# 
+# See also:
+# - src/gallium/auxiliary/translate/translate_generic.c
+# - src/mesa/state_tracker/st_draw.c
+PIPE_FORMAT_R64_FLOAT             , plain, 1, 1, f64 ,     ,     ,     , x001, rgb
+PIPE_FORMAT_R64G64_FLOAT          , plain, 1, 1, f64 , f64 ,     ,     , xy01, rgb
+PIPE_FORMAT_R64G64B64_FLOAT       , plain, 1, 1, f64 , f64 , f64 ,     , xyz1, rgb
+PIPE_FORMAT_R64G64B64A64_FLOAT    , plain, 1, 1, f64 , f64 , f64 , f64 , xyzw, rgb
+PIPE_FORMAT_R32_FLOAT             , plain, 1, 1, f32 ,     ,     ,     , x001, rgb
+PIPE_FORMAT_R32G32_FLOAT          , plain, 1, 1, f32 , f32 ,     ,     , xy01, rgb
+PIPE_FORMAT_R32G32B32_FLOAT       , plain, 1, 1, f32 , f32 , f32 ,     , xyz1, rgb
+PIPE_FORMAT_R32G32B32A32_FLOAT    , plain, 1, 1, f32 , f32 , f32 , f32 , xyzw, rgb
+PIPE_FORMAT_R32_UNORM             , plain, 1, 1, un32,     ,     ,     , x001, rgb
+PIPE_FORMAT_R32G32_UNORM          , plain, 1, 1, un32, un32,     ,     , xy01, rgb
+PIPE_FORMAT_R32G32B32_UNORM       , plain, 1, 1, un32, un32, un32,     , xyz1, rgb
+PIPE_FORMAT_R32G32B32A32_UNORM    , plain, 1, 1, un32, un32, un32, un32, xyzw, rgb
+PIPE_FORMAT_R32_USCALED           , plain, 1, 1, u32 ,     ,     ,     , x001, rgb
+PIPE_FORMAT_R32G32_USCALED        , plain, 1, 1, u32 , u32 ,     ,     , xy01, rgb
+PIPE_FORMAT_R32G32B32_USCALED     , plain, 1, 1, u32 , u32 , u32 ,     , xyz1, rgb
+PIPE_FORMAT_R32G32B32A32_USCALED  , plain, 1, 1, u32 , u32 , u32 , u32 , xyzw, rgb
+PIPE_FORMAT_R32_SNORM             , plain, 1, 1, sn32,     ,     ,     , x001, rgb
+PIPE_FORMAT_R32G32_SNORM          , plain, 1, 1, sn32, sn32,     ,     , xy01, rgb
+PIPE_FORMAT_R32G32B32_SNORM       , plain, 1, 1, sn32, sn32, sn32,     , xyz1, rgb
+PIPE_FORMAT_R32G32B32A32_SNORM    , plain, 1, 1, sn32, sn32, sn32, sn32, xyzw, rgb
+PIPE_FORMAT_R32_SSCALED           , plain, 1, 1, s32 ,     ,     ,     , x001, rgb
+PIPE_FORMAT_R32G32_SSCALED        , plain, 1, 1, s32 , s32 ,     ,     , xy01, rgb
+PIPE_FORMAT_R32G32B32_SSCALED     , plain, 1, 1, s32 , s32 , s32 ,     , xyz1, rgb
+PIPE_FORMAT_R32G32B32A32_SSCALED  , plain, 1, 1, s32 , s32 , s32 , s32 , xyzw, rgb
+PIPE_FORMAT_R32_FIXED             , plain, 1, 1, h32 ,     ,     ,     , x001, rgb
+PIPE_FORMAT_R32G32_FIXED          , plain, 1, 1, h32 , h32 ,     ,     , xy01, rgb
+PIPE_FORMAT_R32G32B32_FIXED       , plain, 1, 1, h32 , h32 , h32 ,     , xyz1, rgb
+PIPE_FORMAT_R32G32B32A32_FIXED    , plain, 1, 1, h32 , h32 , h32 , h32 , xyzw, rgb
+PIPE_FORMAT_R16_UNORM             , plain, 1, 1, un16,     ,     ,     , x001, rgb
+PIPE_FORMAT_R16G16_UNORM          , plain, 1, 1, un16, un16,     ,     , xy01, rgb
+PIPE_FORMAT_R16G16B16_UNORM       , plain, 1, 1, un16, un16, un16,     , xyz1, rgb
+PIPE_FORMAT_R16G16B16A16_UNORM    , plain, 1, 1, un16, un16, un16, un16, xyzw, rgb
+PIPE_FORMAT_R16_USCALED           , plain, 1, 1, u16 ,     ,     ,     , x001, rgb
+PIPE_FORMAT_R16G16_USCALED        , plain, 1, 1, u16 , u16 ,     ,     , xy01, rgb
+PIPE_FORMAT_R16G16B16_USCALED     , plain, 1, 1, u16 , u16 , u16 ,     , xyz1, rgb
+PIPE_FORMAT_R16G16B16A16_USCALED  , plain, 1, 1, u16 , u16 , u16 , u16 , xyzw, rgb
+PIPE_FORMAT_R16_SNORM             , plain, 1, 1, sn16,     ,     ,     , x001, rgb
+PIPE_FORMAT_R16G16_SNORM          , plain, 1, 1, sn16, sn16,     ,     , xy01, rgb
+PIPE_FORMAT_R16G16B16_SNORM       , plain, 1, 1, sn16, sn16, sn16,     , xyz1, rgb
+PIPE_FORMAT_R16G16B16A16_SNORM    , plain, 1, 1, sn16, sn16, sn16, sn16, xyzw, rgb
+PIPE_FORMAT_R16_SSCALED           , plain, 1, 1, s16 ,     ,     ,     , x001, rgb
+PIPE_FORMAT_R16G16_SSCALED        , plain, 1, 1, s16 , s16 ,     ,     , xy01, rgb
+PIPE_FORMAT_R16G16B16_SSCALED     , plain, 1, 1, s16 , s16 , s16 ,     , xyz1, rgb
+PIPE_FORMAT_R16G16B16A16_SSCALED  , plain, 1, 1, s16 , s16 , s16 , s16 , xyzw, rgb
+PIPE_FORMAT_R8_UNORM              , plain, 1, 1, un8 ,     ,     ,     , x001, rgb
+PIPE_FORMAT_R8G8_UNORM            , plain, 1, 1, un8 , un8 ,     ,     , xy01, rgb
+PIPE_FORMAT_R8G8B8_UNORM          , plain, 1, 1, un8 , un8 , un8 ,     , xyz1, rgb
+PIPE_FORMAT_R8G8B8A8_UNORM        , plain, 1, 1, un8 , un8 , un8 , un8 , xyzw, rgb
+PIPE_FORMAT_R8_USCALED            , plain, 1, 1, u8  ,     ,     ,     , x001, rgb
+PIPE_FORMAT_R8G8_USCALED          , plain, 1, 1, u8  , u8  ,     ,     , xy01, rgb
+PIPE_FORMAT_R8G8B8_USCALED        , plain, 1, 1, u8  , u8  , u8  ,     , xyz1, rgb
+PIPE_FORMAT_R8G8B8A8_USCALED      , plain, 1, 1, u8  , u8  , u8  , u8  , xyzw, rgb
+PIPE_FORMAT_R8_SNORM              , plain, 1, 1, sn8 ,     ,     ,     , x001, rgb
+PIPE_FORMAT_R8G8_SNORM            , plain, 1, 1, sn8 , sn8 ,     ,     , xy01, rgb
+PIPE_FORMAT_R8G8B8_SNORM          , plain, 1, 1, sn8 , sn8 , sn8 ,     , xyz1, rgb
+PIPE_FORMAT_R8G8B8A8_SNORM        , plain, 1, 1, sn8 , sn8 , sn8 , sn8 , xyzw, rgb
+PIPE_FORMAT_R8_SSCALED            , plain, 1, 1, s8  ,     ,     ,     , x001, rgb
+PIPE_FORMAT_R8G8_SSCALED          , plain, 1, 1, s8  , s8  ,     ,     , xy01, rgb
+PIPE_FORMAT_R8G8B8_SSCALED        , plain, 1, 1, s8  , s8  , s8  ,     , xyz1, rgb
+PIPE_FORMAT_R8G8B8A8_SSCALED      , plain, 1, 1, s8  , s8  , s8  , s8  , xyzw, rgb
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index 2fbbb83d4b..c08fdcafcc 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -1,6 +1,6 @@
 /**************************************************************************
  *
- * Copyright 2009 Vmware, Inc.
+ * Copyright 2009-2010 Vmware, Inc.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -39,47 +39,32 @@ extern "C" {
 
 
 /**
- * Describe how to best pack/unpack pixels into/from the prescribed format.
+ * Describe how to pack/unpack pixels into/from the prescribed format.
  *
- * These are used for automatic code generation of pixel packing and unpacking
- * routines (in compile time, e.g., u_format_access.py, or in runtime, like
- * llvmpipe does).
- *
- * Thumb rule is: if you're not code generating pixel packing/unpacking then
- * these are irrelevant for you.
- *
- * Note that this can be deduced from other values in util_format_description
- * structure. This is by design, to make code generation of pixel
- * packing/unpacking/sampling routines simple and efficient.
- *
- * XXX: This should be renamed to something like util_format_pack.
+ * XXX: This could be renamed to something like util_format_pack, or broke down
+ * in flags inside util_format_block that said exactly what we want.
  */
 enum util_format_layout {
    /**
-    * Single scalar component.
-    */
-   UTIL_FORMAT_LAYOUT_SCALAR = 0,
-
-   /**
-    * One or more components of mixed integer formats, arithmetically encoded
-    * in a word up to 32bits.
+    * Formats with util_format_block::width == util_format_block::height == 1
+    * that can be described as an ordinary data structure.
     */
-   UTIL_FORMAT_LAYOUT_ARITH = 1,
+   UTIL_FORMAT_LAYOUT_PLAIN = 0,
 
    /**
-    * One or more components, no mixed formats, each with equal power of two
-    * number of bytes.
+    * Formats with sub-sampled channels.
+    *
+    * This is for formats like YV12 where there is less than one sample per
+    * pixel.
+    *
+    * XXX: This could actually b
     */
-   UTIL_FORMAT_LAYOUT_ARRAY = 2,
+   UTIL_FORMAT_LAYOUT_SUBSAMPLED = 3,
 
    /**
-    * XXX: Not used yet. These might go away and be replaced by a single entry,
-    * for formats where multiple pixels have to be
-    * read in order to determine a single pixel value (i.e., block.width > 1
-    * || block.height > 1)
+    * An unspecified compression algorithm.
     */
-   UTIL_FORMAT_LAYOUT_YUV = 3,
-   UTIL_FORMAT_LAYOUT_DXT = 4
+   UTIL_FORMAT_LAYOUT_COMPRESSED = 4
 };
 
 
@@ -135,11 +120,66 @@ struct util_format_channel_description
 struct util_format_description
 {
    enum pipe_format format;
+
    const char *name;
+
+   /**
+    * Short name, striped of the prefix, lower case.
+    */
+   const char *short_name;
+
+   /**
+    * Pixel block dimensions.
+    */
    struct util_format_block block;
+
    enum util_format_layout layout;
+
+   /**
+    * The number of channels.
+    */
+   unsigned nr_channels:3;
+
+   /**
+    * Whether all channels have the same number of (whole) bytes.
+    */
+   unsigned is_array:1;
+
+   /**
+    * Whether the pixel format can be described as a bitfield structure.
+    *
+    * In particular:
+    * - pixel depth must be 8, 16, or 32 bits;
+    * - all channels must be unsigned, signed, or void
+    */
+   unsigned is_bitmask:1;
+
+   /**
+    * Whether channels have mixed types (ignoring UTIL_FORMAT_TYPE_VOID).
+    */
+   unsigned is_mixed:1;
+
+   /**
+    * Input channel description.
+    *
+    * Only valid for UTIL_FORMAT_LAYOUT_PLAIN formats.
+    */
    struct util_format_channel_description channel[4];
+
+   /**
+    * Output channel swizzle.
+    *
+    * The order is either:
+    * - RGBA
+    * - YUV(A)
+    * - ZS
+    * depending on the colorspace.
+    */
    unsigned char swizzle[4];
+
+   /**
+    * Colorspace transformation.
+    */
    enum util_format_colorspace colorspace;
 };
 
@@ -179,7 +219,7 @@ util_format_is_compressed(enum pipe_format format)
       return FALSE;
    }
 
-   return desc->layout == UTIL_FORMAT_LAYOUT_DXT ? TRUE : FALSE;
+   return desc->layout == UTIL_FORMAT_LAYOUT_COMPRESSED ? TRUE : FALSE;
 }
 
 static INLINE boolean 
@@ -253,14 +293,7 @@ util_format_get_blockwidth(enum pipe_format format)
       return 1;
    }
 
-   switch (desc->layout) {
-   case UTIL_FORMAT_LAYOUT_YUV:
-      return 2;
-   case UTIL_FORMAT_LAYOUT_DXT:
-      return 4;
-   default:
-      return 1;
-   }
+   return desc->block.width;
 }
 
 static INLINE uint
@@ -273,12 +306,7 @@ util_format_get_blockheight(enum pipe_format format)
       return 1;
    }
 
-   switch (desc->layout) {
-   case UTIL_FORMAT_LAYOUT_DXT:
-      return 4;
-   default:
-      return 1;
-   }
+   return desc->block.height;
 }
 
 static INLINE unsigned
@@ -373,37 +401,30 @@ util_format_has_alpha(enum pipe_format format)
       return FALSE;
    }
 
-   switch (desc->layout) {
-   case UTIL_FORMAT_LAYOUT_SCALAR:
-   case UTIL_FORMAT_LAYOUT_ARITH:
-   case UTIL_FORMAT_LAYOUT_ARRAY:
-      /* FIXME: pf_get_component_bits( PIPE_FORMAT_A8L8_UNORM, PIPE_FORMAT_COMP_A ) should not return 0 right? */
-      if (format == PIPE_FORMAT_A8_UNORM ||
-          format == PIPE_FORMAT_A8L8_UNORM ||
-          format == PIPE_FORMAT_A8L8_SRGB) {
-         return TRUE;
-      }
-      return util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 3) != 0;
-   case UTIL_FORMAT_LAYOUT_YUV:
+   switch (desc->colorspace) {
+   case UTIL_FORMAT_COLORSPACE_RGB:
+   case UTIL_FORMAT_COLORSPACE_SRGB:
+      return desc->swizzle[3] != UTIL_FORMAT_SWIZZLE_1;
+   case UTIL_FORMAT_COLORSPACE_YUV:
+      return FALSE;
+   case UTIL_FORMAT_COLORSPACE_ZS:
       return FALSE;
-   case UTIL_FORMAT_LAYOUT_DXT:
-      switch (format) {
-      case PIPE_FORMAT_DXT1_RGBA:
-      case PIPE_FORMAT_DXT3_RGBA:
-      case PIPE_FORMAT_DXT5_RGBA:
-      case PIPE_FORMAT_DXT1_SRGBA:
-      case PIPE_FORMAT_DXT3_SRGBA:
-      case PIPE_FORMAT_DXT5_SRGBA:
-         return TRUE;
-      default:
-         return FALSE;
-      }
    default:
       assert(0);
       return FALSE;
    }
 }
 
+/**
+ * Return the number of components stored.
+ * Formats with block size != 1x1 will always have 1 component (the block).
+ */
+static INLINE unsigned
+util_format_get_nr_components(enum pipe_format format)
+{
+   const struct util_format_description *desc = util_format_description(format);
+   return desc->nr_channels;
+}
 
 /*
  * Format access functions.
diff --git a/src/gallium/auxiliary/util/u_format_access.py b/src/gallium/auxiliary/util/u_format_access.py
index 0b05ddb931..00424779d2 100644
--- a/src/gallium/auxiliary/util/u_format_access.py
+++ b/src/gallium/auxiliary/util/u_format_access.py
@@ -37,20 +37,10 @@
 '''
 
 
+import math
 import sys
 
-from u_format_parse import *
-
-
-def short_name(format):
-    '''Make up a short norm for a format, suitable to be used as suffix in
-    function names.'''
-
-    name = format.name
-    if name.startswith('PIPE_FORMAT_'):
-        name = name[len('PIPE_FORMAT_'):]
-    name = name.lower()
-    return name
+from u_format_pack import *
 
 
 def is_format_supported(format):
@@ -63,16 +53,16 @@ def is_format_supported(format):
     if format.colorspace not in ('rgb', 'zs'):
         return False
 
-    if format.layout not in (ARITH, ARRAY):
+    if format.layout != PLAIN:
         return False
 
     for i in range(4):
-        type = format.in_types[i]
-        if type.kind not in (VOID, UNSIGNED, FLOAT):
+        channel = format.channels[i]
+        if channel.type not in (VOID, UNSIGNED, FLOAT):
             return False
 
     # We can only read a color from a depth/stencil format if the depth channel is present
-    if format.colorspace == 'zs' and format.out_swizzle[0] == SWIZZLE_NONE:
+    if format.colorspace == 'zs' and format.swizzles[0] == SWIZZLE_NONE:
         return False
 
     return True
@@ -81,187 +71,48 @@ def is_format_supported(format):
 def native_type(format):
     '''Get the native appropriate for a format.'''
 
-    if format.layout == ARITH:
-        # For arithmetic pixel formats return the integer type that matches the whole pixel
-        return 'uint%u_t' % format.block_size()
-    elif format.layout == ARRAY:
-        # For array pixel formats return the integer type that matches the color channel
-        type = format.in_types[0]
-        if type.kind == UNSIGNED:
-            return 'uint%u_t' % type.size
-        elif type.kind == SIGNED:
-            return 'int%u_t' % type.size
-        elif type.kind == FLOAT:
-            if type.size == 32:
-                return 'float'
-            elif type.size == 64:
-                return 'double'
+    if format.layout == PLAIN:
+        if not format.is_array():
+            # For arithmetic pixel formats return the integer type that matches the whole pixel
+            return 'uint%u_t' % format.block_size()
+        else:
+            # For array pixel formats return the integer type that matches the color channel
+            channel = format.channels[0]
+            if channel.type == UNSIGNED:
+                return 'uint%u_t' % channel.size
+            elif channel.type == SIGNED:
+                return 'int%u_t' % channel.size
+            elif channel.type == FLOAT:
+                if channel.size == 32:
+                    return 'float'
+                elif channel.size == 64:
+                    return 'double'
+                else:
+                    assert False
             else:
                 assert False
-        else:
-            assert False
-    else:
-        assert False
-
-
-def intermediate_native_type(bits, sign):
-    '''Find a native type adequate to hold intermediate results of the request bit size.'''
-
-    bytes = 4 # don't use anything smaller than 32bits
-    while bytes * 8 < bits:
-        bytes *= 2
-    bits = bytes*8
-
-    if sign:
-        return 'int%u_t' % bits
-    else:
-        return 'uint%u_t' % bits
-
-
-def get_one_shift(type):
-    '''Get the number of the bit that matches unity for this type.'''
-    if type.kind == 'FLOAT':
-        assert False
-    if not type.norm:
-        return 0
-    if type.kind == UNSIGNED:
-        return type.size
-    if type.kind == SIGNED:
-        return type.size - 1
-    if type.kind == FIXED:
-        return type.size / 2
-    assert False
-
-
-def get_one(type):
-    '''Get the value of unity for this type.'''
-    if type.kind == 'FLOAT' or not type.norm:
-        return 1
-    else:
-        return (1 << get_one_shift(type)) - 1
-
-
-def generate_clamp():
-    '''Code generate the clamping functions for each type.
-
-    We don't use a macro so that arguments with side effects, 
-    like *src_pixel++ are correctly handled.
-    '''
-
-    for suffix, native_type in [
-        ('', 'double'),
-        ('f', 'float'),
-        ('ui', 'unsigned int'),
-        ('si', 'int'),
-    ]:
-        print 'static INLINE %s' % native_type
-        print 'clamp%s(%s value, %s lbound, %s ubound)' % (suffix, native_type, native_type, native_type)
-        print '{'
-        print '   if(value < lbound)'
-        print '      return lbound;'
-        print '   if(value > ubound)'
-        print '      return ubound;'
-        print '   return value;'
-        print '}'
-        print
-
-
-def clamp_expr(src_type, dst_type, dst_native_type, value):
-    '''Generate the expression to clamp the value in the source type to the
-    destination type range.'''
-
-    if src_type == dst_type:
-        return value
-
-    # Pick the approriate clamp function
-    if src_type.kind == FLOAT:
-        if src_type.size == 32:
-            func = 'clampf'
-        elif src_type.size == 64:
-            func = 'clamp'
-        else:
-            assert False
-    elif src_type.kind == UNSIGNED:
-        func = 'clampui'
-    elif src_type.kind == SIGNED:
-        func = 'clampsi'
     else:
         assert False
 
-    # Clamp floats to [-1, 1] or [0, 1] range
-    if src_type.kind == FLOAT and dst_type.norm:
-        max = 1
-        if src_type.sign and dst_type.sign:
-            min = -1
-        else:
-            min = 0
-        return '%s(%s, %s, %s)' % (func, value, min, max)
-                
-    # FIXME: Also clamp scaled values
-
-    return value
-
-
-def conversion_expr(src_type, dst_type, dst_native_type, value):
-    '''Generate the expression to convert a value between two types.'''
-
-    if src_type == dst_type:
-        return value
 
-    if src_type.kind == FLOAT and dst_type.kind == FLOAT:
-        return '(%s)%s' % (dst_native_type, value)
-    
-    if not src_type.norm and not dst_type.norm:
-        return '(%s)%s' % (dst_native_type, value)
-
-    value = clamp_expr(src_type, dst_type, dst_native_type, value)
-
-    if dst_type.kind == FLOAT:
-        if src_type.norm:
-            one = get_one(src_type)
-            if src_type.size <= 23:
-                scale = '(1.0f/0x%x)' % one
-            else:
-                # bigger than single precision mantissa, use double
-                scale = '(1.0/0x%x)' % one
-            value = '(%s * %s)' % (value, scale)
-        return '(%s)%s' % (dst_native_type, value)
-
-    if src_type.kind == FLOAT:
-        if dst_type.norm:
-            dst_one = get_one(dst_type)
-            if dst_type.size <= 23:
-                scale = '0x%x' % dst_one
-            else:
-                # bigger than single precision mantissa, use double
-                scale = '(double)0x%x' % dst_one
-            value = '(%s * %s)' % (value, scale)
-        return '(%s)%s' % (dst_native_type, value)
-
-    if src_type.kind == dst_type.kind:
-        src_one = get_one(src_type)
-        dst_one = get_one(dst_type)
-
-        if src_one > dst_one and src_type.norm and dst_type.norm:
-            # We can just bitshift
-            src_shift = get_one_shift(src_type)
-            dst_shift = get_one_shift(dst_type)
-            value = '(%s >> %s)' % (value, src_shift - dst_shift)
-        else:
-            # We need to rescale using an intermediate type big enough to hold the multiplication of both
-            tmp_native_type = intermediate_native_type(src_type.size + dst_type.size, src_type.sign and dst_type.sign)
-            value = '(%s)%s' % (tmp_native_type, value)
-            value = '%s * 0x%x / 0x%x' % (value, dst_one, src_one)
-        value = '(%s)%s' % (dst_native_type, value)
-        return value
-
-    assert False
+def generate_srgb_tables():
+    print 'static ubyte srgb_to_linear[256] = {'
+    for i in range(256):
+        print '   %s,' % (int(math.pow((i / 255.0 + 0.055) / 1.055, 2.4) * 255))
+    print '};'
+    print
+    print 'static ubyte linear_to_srgb[256] = {'
+    print '   0,'
+    for i in range(1, 256):
+        print '   %s,' % (int((1.055 * math.pow(i / 255.0, 0.41666) - 0.055) * 255))
+    print '};'
+    print
 
 
-def generate_format_read(format, dst_type, dst_native_type, dst_suffix):
+def generate_format_read(format, dst_channel, dst_native_type, dst_suffix):
     '''Generate the function to read pixels from a particular format'''
 
-    name = short_name(format)
+    name = format.short_name()
 
     src_native_type = native_type(format)
 
@@ -279,11 +130,11 @@ def generate_format_read(format, dst_type, dst_native_type, dst_suffix):
     names = ['']*4
     if format.colorspace == 'rgb':
         for i in range(4):
-            swizzle = format.out_swizzle[i]
+            swizzle = format.swizzles[i]
             if swizzle < 4:
                 names[swizzle] += 'rgba'[i]
     elif format.colorspace == 'zs':
-        swizzle = format.out_swizzle[0]
+        swizzle = format.swizzles[0]
         if swizzle < 4:
             names[swizzle] = 'z'
         else:
@@ -291,64 +142,66 @@ def generate_format_read(format, dst_type, dst_native_type, dst_suffix):
     else:
         assert False
 
-    if format.layout == ARITH:
-        print '         %s pixel = *src_pixel++;' % src_native_type
-        shift = 0;
-        for i in range(4):
-            src_type = format.in_types[i]
-            width = src_type.size
-            if names[i]:
-                value = 'pixel'
-                mask = (1 << width) - 1
-                if shift:
-                    value = '(%s >> %u)' % (value, shift)
-                if shift + width < format.block_size():
-                    value = '(%s & 0x%x)' % (value, mask)
-                value = conversion_expr(src_type, dst_type, dst_native_type, value)
-                print '         %s %s = %s;' % (dst_native_type, names[i], value)
-            shift += width
-    elif format.layout == ARRAY:
-        for i in range(4):
-            src_type = format.in_types[i]
-            if names[i]:
-                value = '(*src_pixel++)'
-                value = conversion_expr(src_type, dst_type, dst_native_type, value)
-                print '         %s %s = %s;' % (dst_native_type, names[i], value)
+    if format.layout == PLAIN:
+        if not format.is_array():
+            print '         %s pixel = *src_pixel++;' % src_native_type
+            shift = 0;
+            for i in range(4):
+                src_channel = format.channels[i]
+                width = src_channel.size
+                if names[i]:
+                    value = 'pixel'
+                    mask = (1 << width) - 1
+                    if shift:
+                        value = '(%s >> %u)' % (value, shift)
+                    if shift + width < format.block_size():
+                        value = '(%s & 0x%x)' % (value, mask)
+                    value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
+                    print '         %s %s = %s;' % (dst_native_type, names[i], value)
+                shift += width
+        else:
+            for i in range(4):
+                src_channel = format.channels[i]
+                if names[i]:
+                    value = 'src_pixel[%u]' % i
+                    value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
+                    print '         %s %s = %s;' % (dst_native_type, names[i], value)
+            print '         src_pixel += %u;' % (format.nr_channels())
     else:
         assert False
 
     for i in range(4):
         if format.colorspace == 'rgb':
-            swizzle = format.out_swizzle[i]
+            swizzle = format.swizzles[i]
             if swizzle < 4:
                 value = names[swizzle]
             elif swizzle == SWIZZLE_0:
                 value = '0'
             elif swizzle == SWIZZLE_1:
-                value = get_one(dst_type)
+                value = get_one(dst_channel)
             else:
                 assert False
         elif format.colorspace == 'zs':
             if i < 3:
                 value = 'z'
             else:
-                value = get_one(dst_type)
+                value = get_one(dst_channel)
         else:
             assert False
         print '         *dst_pixel++ = %s; /* %s */' % (value, 'rgba'[i])
 
     print '      }'
     print '      src_row += src_stride;'
-    print '      dst_row += dst_stride/sizeof(%s);' % dst_native_type
+    print '      dst_row += dst_stride/sizeof(*dst_row);'
     print '   }'
     print '}'
     print
     
 
-def generate_format_write(format, src_type, src_native_type, src_suffix):
+def generate_format_write(format, src_channel, src_native_type, src_suffix):
     '''Generate the function to write pixels to a particular format'''
 
-    name = short_name(format)
+    name = format.short_name()
 
     dst_native_type = native_type(format)
 
@@ -363,58 +216,48 @@ def generate_format_write(format, src_type, src_native_type, src_suffix):
     print '      const %s *src_pixel = src_row;' %src_native_type
     print '      for (x = 0; x < w; ++x) {'
 
-    inv_swizzle = [None]*4
-    if format.colorspace == 'rgb':
-        for i in range(4):
-            swizzle = format.out_swizzle[i]
-            if swizzle < 4:
-                inv_swizzle[swizzle] = i
-    elif format.colorspace == 'zs':
-        swizzle = format.out_swizzle[0]
-        if swizzle < 4:
-            inv_swizzle[swizzle] = 0
-    else:
-        assert False
-
-    if format.layout == ARITH:
-        print '         %s pixel = 0;' % dst_native_type
-        shift = 0;
-        for i in range(4):
-            dst_type = format.in_types[i]
-            width = dst_type.size
-            if inv_swizzle[i] is not None:
-                value = 'src_pixel[%u]' % inv_swizzle[i]
-                value = conversion_expr(src_type, dst_type, dst_native_type, value)
-                if shift:
-                    value = '(%s << %u)' % (value, shift)
-                print '         pixel |= %s;' % value
-            shift += width
-        print '         *dst_pixel++ = pixel;'
-    elif format.layout == ARRAY:
-        for i in range(4):
-            dst_type = format.in_types[i]
-            if inv_swizzle[i] is not None:
-                value = 'src_pixel[%u]' % inv_swizzle[i]
-                value = conversion_expr(src_type, dst_type, dst_native_type, value)
-                print '         *dst_pixel++ = %s;' % value
+    inv_swizzle = format.inv_swizzles()
+
+    if format.layout == PLAIN:
+        if not format.is_array():
+            print '         %s pixel = 0;' % dst_native_type
+            shift = 0;
+            for i in range(4):
+                dst_channel = format.channels[i]
+                width = dst_channel.size
+                if inv_swizzle[i] is not None:
+                    value = 'src_pixel[%u]' % inv_swizzle[i]
+                    value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
+                    if shift:
+                        value = '(%s << %u)' % (value, shift)
+                    print '         pixel |= %s;' % value
+                shift += width
+            print '         *dst_pixel++ = pixel;'
+        else:
+            for i in range(4):
+                dst_channel = format.channels[i]
+                if inv_swizzle[i] is not None:
+                    value = 'src_pixel[%u]' % inv_swizzle[i]
+                    value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
+                    print '         *dst_pixel++ = %s;' % value
     else:
         assert False
     print '         src_pixel += 4;'
 
     print '      }'
     print '      dst_row += dst_stride;'
-    print '      src_row += src_stride/sizeof(%s);' % src_native_type
+    print '      src_row += src_stride/sizeof(*src_row);'
     print '   }'
     print '}'
     print
     
 
-def generate_read(formats, dst_type, dst_native_type, dst_suffix):
+def generate_read(formats, dst_channel, dst_native_type, dst_suffix):
     '''Generate the dispatch function to read pixels from any format'''
 
     for format in formats:
         if is_format_supported(format):
-            generate_format_read(format, dst_type, dst_native_type, dst_suffix)
+            generate_format_read(format, dst_channel, dst_native_type, dst_suffix)
 
     print 'void'
     print 'util_format_read_%s(enum pipe_format format, %s *dst, unsigned dst_stride, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type)
@@ -424,7 +267,7 @@ def generate_read(formats, dst_type, dst_native_type, dst_suffix):
     for format in formats:
         if is_format_supported(format):
             print '   case %s:' % format.name
-            print '      func = &util_format_%s_read_%s;' % (short_name(format), dst_suffix)
+            print '      func = &util_format_%s_read_%s;' % (format.short_name(), dst_suffix)
             print '      break;'
     print '   default:'
     print '      debug_printf("unsupported format\\n");'
@@ -435,12 +278,12 @@ def generate_read(formats, dst_type, dst_native_type, dst_suffix):
     print
 
 
-def generate_write(formats, src_type, src_native_type, src_suffix):
+def generate_write(formats, src_channel, src_native_type, src_suffix):
     '''Generate the dispatch function to write pixels to any format'''
 
     for format in formats:
         if is_format_supported(format):
-            generate_format_write(format, src_type, src_native_type, src_suffix)
+            generate_format_write(format, src_channel, src_native_type, src_suffix)
 
     print 'void'
     print 'util_format_write_%s(enum pipe_format format, const %s *src, unsigned src_stride, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type)
@@ -451,7 +294,7 @@ def generate_write(formats, src_type, src_native_type, src_suffix):
     for format in formats:
         if is_format_supported(format):
             print '   case %s:' % format.name
-            print '      func = &util_format_%s_write_%s;' % (short_name(format), src_suffix)
+            print '      func = &util_format_%s_write_%s;' % (format.short_name(), src_suffix)
             print '      break;'
     print '   default:'
     print '      debug_printf("unsupported format\\n");'
@@ -473,20 +316,20 @@ def main():
     print __doc__.strip()
     print
     print '#include "pipe/p_compiler.h"'
-    print '#include "u_format.h"'
     print '#include "u_math.h"'
+    print '#include "u_format_pack.h"'
     print
 
-    generate_clamp()
+    generate_srgb_tables()
 
-    type = Type(FLOAT, False, 32)
+    type = Channel(FLOAT, False, 32)
     native_type = 'float'
     suffix = '4f'
 
     generate_read(formats, type, native_type, suffix)
     generate_write(formats, type, native_type, suffix)
 
-    type = Type(UNSIGNED, True, 8)
+    type = Channel(UNSIGNED, True, 8)
     native_type = 'uint8_t'
     suffix = '4ub'
 
diff --git a/src/gallium/auxiliary/util/u_format_pack.py b/src/gallium/auxiliary/util/u_format_pack.py
new file mode 100644
index 0000000000..409d024c63
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_format_pack.py
@@ -0,0 +1,589 @@
+#!/usr/bin/env python
+
+'''
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Pixel format packing and unpacking functions.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+'''
+
+
+import sys
+
+from u_format_parse import *
+
+
+def generate_format_type(format):
+    '''Generate a structure that describes the format.'''
+
+    print 'union util_format_%s {' % format.short_name()
+    if format.is_bitmask():
+        print '   uint%u_t value;' % (format.block_size(),)
+    print '   struct {'
+    for channel in format.channels:
+        if format.is_bitmask() and not format.is_array():
+            if channel.type == VOID:
+                if channel.size:
+                    print '      unsigned %s:%u;' % (channel.name, channel.size)
+            elif channel.type == UNSIGNED:
+                print '      unsigned %s:%u;' % (channel.name, channel.size)
+            elif channel.type == SIGNED:
+                print '      int %s:%u;' % (channel.name, channel.size)
+            else:
+                assert 0
+        else:
+            assert channel.size % 8 == 0 and is_pot(channel.size)
+            if channel.type == VOID:
+                if channel.size:
+                    print '      uint%u_t %s;' % (channel.size, channel.name)
+            elif channel.type == UNSIGNED:
+                print '      uint%u_t %s;' % (channel.size, channel.name)
+            elif channel.type in (SIGNED, FIXED):
+                print '      int%u_t %s;' % (channel.size, channel.name)
+            elif channel.type == FLOAT:
+                if channel.size == 64:
+                    print '      double %s;' % (channel.name)
+                elif channel.size == 32:
+                    print '      float %s;' % (channel.name)
+                elif channel.size == 16:
+                    print '      uint16_t %s;' % (channel.name)
+                else:
+                    assert 0
+            else:
+                assert 0
+    print '   } chan;'
+    print '};'
+    print
+
+
+def bswap_format(format):
+    '''Generate a structure that describes the format.'''
+
+    if format.is_bitmask() and not format.is_array():
+        print '#ifdef PIPE_ARCH_BIG_ENDIAN'
+        print '   pixel.value = util_bswap%u(pixel.value);' % format.block_size()
+        print '#endif'
+
+
+def is_format_supported(format):
+    '''Determines whether we actually have the plumbing necessary to generate the 
+    to read/write to/from this format.'''
+
+    # FIXME: Ideally we would support any format combination here.
+
+    if format.layout != PLAIN:
+        return False
+
+    for i in range(4):
+        channel = format.channels[i]
+        if channel.type not in (VOID, UNSIGNED, SIGNED, FLOAT):
+            return False
+
+    # We can only read a color from a depth/stencil format if the depth channel is present
+    if format.colorspace == 'zs' and format.swizzles[0] == SWIZZLE_NONE:
+        return False
+
+    return True
+
+
+def native_type(format):
+    '''Get the native appropriate for a format.'''
+
+    if format.layout == PLAIN:
+        if not format.is_array():
+            # For arithmetic pixel formats return the integer type that matches the whole pixel
+            return 'uint%u_t' % format.block_size()
+        else:
+            # For array pixel formats return the integer type that matches the color channel
+            type = format.channels[0]
+            if type.type == UNSIGNED:
+                return 'uint%u_t' % type.size
+            elif type.type == SIGNED:
+                return 'int%u_t' % type.size
+            elif type.type == FLOAT:
+                if type.size == 32:
+                    return 'float'
+                elif type.size == 64:
+                    return 'double'
+                else:
+                    assert False
+            else:
+                assert False
+    else:
+        assert False
+
+
+def intermediate_native_type(bits, sign):
+    '''Find a native type adequate to hold intermediate results of the request bit size.'''
+
+    bytes = 4 # don't use anything smaller than 32bits
+    while bytes * 8 < bits:
+        bytes *= 2
+    bits = bytes*8
+
+    if sign:
+        return 'int%u_t' % bits
+    else:
+        return 'uint%u_t' % bits
+
+
+def get_one_shift(type):
+    '''Get the number of the bit that matches unity for this type.'''
+    if type.type == 'FLOAT':
+        assert False
+    if not type.norm:
+        return 0
+    if type.type == UNSIGNED:
+        return type.size
+    if type.type == SIGNED:
+        return type.size - 1
+    if type.type == FIXED:
+        return type.size / 2
+    assert False
+
+
+def get_one(type):
+    '''Get the value of unity for this type.'''
+    if type.type == 'FLOAT' or not type.norm:
+        return 1
+    else:
+        return (1 << get_one_shift(type)) - 1
+
+
+def generate_clamp():
+    '''Code generate the clamping functions for each type.
+
+    We don't use a macro so that arguments with side effects, 
+    like *src_pixel++ are correctly handled.
+    '''
+
+    for suffix, native_type in [
+        ('', 'double'),
+        ('f', 'float'),
+        ('ui', 'unsigned int'),
+        ('si', 'int'),
+    ]:
+        print 'static INLINE %s' % native_type
+        print 'clamp%s(%s value, %s lbound, %s ubound)' % (suffix, native_type, native_type, native_type)
+        print '{'
+        print '   if(value < lbound)'
+        print '      return lbound;'
+        print '   if(value > ubound)'
+        print '      return ubound;'
+        print '   return value;'
+        print '}'
+        print
+
+
+def clamp_expr(src_channel, dst_channel, dst_native_type, value):
+    '''Generate the expression to clamp the value in the source type to the
+    destination type range.'''
+
+    if src_channel == dst_channel:
+        return value
+
+    # Pick the approriate clamp function
+    if src_channel.type == FLOAT:
+        if src_channel.size == 32:
+            func = 'clampf'
+        elif src_channel.size == 64:
+            func = 'clamp'
+        else:
+            assert False
+    elif src_channel.type == UNSIGNED:
+        func = 'clampui'
+    elif src_channel.type == SIGNED:
+        func = 'clampsi'
+    else:
+        assert False
+
+    src_min = src_channel.min()
+    src_max = src_channel.max()
+    dst_min = dst_channel.min()
+    dst_max = dst_channel.max()
+
+    if src_min < dst_min and src_max > dst_max:
+        return 'CLAMP(%s, %s, %s)' % (value, dst_min, dst_max)
+
+    if src_max > dst_max:
+        return 'MIN2(%s, %s)' % (value, dst_max)
+        
+    if src_min < dst_min:
+        return 'MAX2(%s, %s)' % (value, dst_min)
+
+    return value
+
+
+def conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=True):
+    '''Generate the expression to convert a value between two types.'''
+
+    if src_channel == dst_channel:
+        return value
+
+    if src_channel.type == FLOAT and dst_channel.type == FLOAT:
+        return '(%s)%s' % (dst_native_type, value)
+    
+    if clamp:
+        value = clamp_expr(src_channel, dst_channel, dst_native_type, value)
+
+    if dst_channel.type == FLOAT:
+        if src_channel.norm:
+            one = get_one(src_channel)
+            if src_channel.size <= 23:
+                scale = '(1.0f/0x%x)' % one
+            else:
+                # bigger than single precision mantissa, use double
+                scale = '(1.0/0x%x)' % one
+            value = '(%s * %s)' % (value, scale)
+        return '(%s)%s' % (dst_native_type, value)
+
+    if src_channel.type == FLOAT:
+        if dst_channel.norm:
+            dst_one = get_one(dst_channel)
+            if dst_channel.size <= 23:
+                scale = '0x%x' % dst_one
+            else:
+                # bigger than single precision mantissa, use double
+                scale = '(double)0x%x' % dst_one
+            value = '(%s * %s)' % (value, scale)
+        return '(%s)%s' % (dst_native_type, value)
+
+    if src_channel.type in (SIGNED, UNSIGNED) and dst_channel.type in (SIGNED, UNSIGNED):
+        if not src_channel.norm and not dst_channel.norm:
+            # neither is normalized -- just cast
+            return '(%s)%s' % (dst_native_type, value)
+
+        src_one = get_one(src_channel)
+        dst_one = get_one(dst_channel)
+
+        if src_one > dst_one and src_channel.norm and dst_channel.norm:
+            # We can just bitshift
+            src_shift = get_one_shift(src_channel)
+            dst_shift = get_one_shift(dst_channel)
+            value = '(%s >> %s)' % (value, src_shift - dst_shift)
+        else:
+            # We need to rescale using an intermediate type big enough to hold the multiplication of both
+            tmp_native_type = intermediate_native_type(src_channel.size + dst_channel.size, src_channel.sign and dst_channel.sign)
+            value = '((%s)%s)' % (tmp_native_type, value)
+            value = '(%s * 0x%x / 0x%x)' % (value, dst_one, src_one)
+        value = '(%s)%s' % (dst_native_type, value)
+        return value
+
+    assert False
+
+
+def generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix):
+    '''Generate the function to unpack pixels from a particular format'''
+
+    assert format.layout == PLAIN
+
+    name = format.short_name()
+
+    src_native_type = native_type(format)
+
+    print 'static INLINE void'
+    print 'util_format_%s_unpack_%s(%s *dst, const void *src)' % (name, dst_suffix, dst_native_type)
+    print '{'
+    
+    if format.is_bitmask():
+        depth = format.block_size()
+        print '   uint%u_t value = *(uint%u_t *)src;' % (depth, depth) 
+
+        # Declare the intermediate variables
+        for i in range(format.nr_channels()):
+            src_channel = format.channels[i]
+            if src_channel.type == UNSIGNED:
+                print '   uint%u_t %s;' % (depth, src_channel.name)
+            elif src_channel.type == SIGNED:
+                print '   int%u_t %s;' % (depth, src_channel.name)
+
+        print '#ifdef PIPE_ARCH_BIG_ENDIAN'
+        print '   value = util_bswap%u(value);' % depth
+        print '#endif'
+
+        # Compute the intermediate unshifted values 
+        shift = 0
+        for i in range(format.nr_channels()):
+            src_channel = format.channels[i]
+            value = 'value'
+            if src_channel.type == UNSIGNED:
+                if shift:
+                    value = '%s >> %u' % (value, shift)
+                if shift + src_channel.size < depth:
+                    value = '(%s) & 0x%x' % (value, (1 << src_channel.size) - 1)
+            elif src_channel.type == SIGNED:
+                if shift + src_channel.size < depth:
+                    # Align the sign bit
+                    lshift = depth - (shift + src_channel.size)
+                    value = '%s << %u' % (value, lshift)
+                # Cast to signed
+                value = '(int%u_t)(%s) ' % (depth, value)
+                if src_channel.size < depth:
+                    # Align the LSB bit
+                    rshift = depth - src_channel.size
+                    value = '(%s) >> %u' % (value, rshift)
+            else:
+                value = None
+                
+            if value is not None:
+                print '   %s = %s;' % (src_channel.name, value)
+                
+            shift += src_channel.size
+
+        # Convert, swizzle, and store final values
+        for i in range(4):
+            swizzle = format.swizzles[i]
+            if swizzle < 4:
+                src_channel = format.channels[swizzle]
+                value = src_channel.name 
+                value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
+            elif swizzle == SWIZZLE_0:
+                value = '0'
+            elif swizzle == SWIZZLE_1:
+                value = get_one(dst_channel)
+            elif swizzle == SWIZZLE_NONE:
+                value = '0'
+            else:
+                assert False
+            if format.colorspace == ZS:
+                if i == 3:
+                    value = get_one(dst_channel)
+                elif i >= 1:
+                    value = 'dst[0]'
+            print '   dst[%u] = %s; /* %s */' % (i, value, 'rgba'[i])
+        
+    else:
+        print '   union util_format_%s pixel;' % format.short_name()
+        print '   memcpy(&pixel, src, sizeof pixel);'
+        bswap_format(format)
+    
+        for i in range(4):
+            swizzle = format.swizzles[i]
+            if swizzle < 4:
+                src_channel = format.channels[swizzle]
+                value = 'pixel.chan.%s' % src_channel.name 
+                value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
+            elif swizzle == SWIZZLE_0:
+                value = '0'
+            elif swizzle == SWIZZLE_1:
+                value = get_one(dst_channel)
+            elif swizzle == SWIZZLE_NONE:
+                value = '0'
+            else:
+                assert False
+            if format.colorspace == ZS:
+                if i == 3:
+                    value = get_one(dst_channel)
+                elif i >= 1:
+                    value = 'dst[0]'
+            print '   dst[%u] = %s; /* %s */' % (i, value, 'rgba'[i])
+
+    print '}'
+    print
+    
+
+def generate_format_pack(format, src_channel, src_native_type, src_suffix):
+    '''Generate the function to pack pixels to a particular format'''
+
+    name = format.short_name()
+
+    dst_native_type = native_type(format)
+
+    assert format.layout == PLAIN
+
+    inv_swizzle = format.inv_swizzles()
+    
+    print 'static INLINE void'
+    print 'util_format_%s_pack_%s(void *dst, %s r, %s g, %s b, %s a)' % (name, src_suffix, src_native_type, src_native_type, src_native_type, src_native_type)
+    print '{'
+    
+    if format.is_bitmask():
+        depth = format.block_size()
+        print '   uint%u_t value = 0;' % depth 
+
+        shift = 0
+        for i in range(4):
+            dst_channel = format.channels[i]
+            if inv_swizzle[i] is not None:
+                value = 'rgba'[inv_swizzle[i]]
+                value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
+                if format.colorspace == ZS:
+                    if i == 3:
+                        value = get_one(dst_channel)
+                    elif i >= 1:
+                        value = '0'
+                if dst_channel.type in (UNSIGNED, SIGNED):
+                    if shift + dst_channel.size < depth:
+                        value = '(%s) & 0x%x' % (value, (1 << dst_channel.size) - 1)
+                    if shift:
+                        value = '(%s) << %u' % (value, shift)
+                    if dst_channel.type == SIGNED:
+                        # Cast to unsigned
+                        value = '(uint%u_t)(%s) ' % (depth, value)
+                else:
+                    value = None
+                if value is not None:
+                    print '   value |= %s;' % (value)
+                
+            shift += dst_channel.size
+
+        print '#ifdef PIPE_ARCH_BIG_ENDIAN'
+        print '   value = util_bswap%u(value);' % depth
+        print '#endif'
+        
+        print '   *(uint%u_t *)dst = value;' % depth 
+
+    else:
+        print '   union util_format_%s pixel;' % format.short_name()
+    
+        for i in range(4):
+            dst_channel = format.channels[i]
+            width = dst_channel.size
+            if inv_swizzle[i] is None:
+                continue
+            value = 'rgba'[inv_swizzle[i]]
+            value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
+            if format.colorspace == ZS:
+                if i == 3:
+                    value = get_one(dst_channel)
+                elif i >= 1:
+                    value = '0'
+            print '   pixel.chan.%s = %s;' % (dst_channel.name, value)
+    
+        bswap_format(format)
+        print '   memcpy(dst, &pixel, sizeof pixel);'
+        
+    print '}'
+    print
+    
+
+def generate_unpack(formats, dst_channel, dst_native_type, dst_suffix):
+    '''Generate the dispatch function to unpack pixels from any format'''
+
+    for format in formats:
+        if is_format_supported(format):
+            generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix)
+
+    print 'static INLINE void'
+    print 'util_format_unpack_%s(enum pipe_format format, %s *dst, const void *src)' % (dst_suffix, dst_native_type)
+    print '{'
+    print '   void (*func)(%s *dst, const void *src);' % dst_native_type
+    print '   switch(format) {'
+    for format in formats:
+        if is_format_supported(format):
+            print '   case %s:' % format.name
+            print '      func = &util_format_%s_unpack_%s;' % (format.short_name(), dst_suffix)
+            print '      break;'
+    print '   default:'
+    print '      debug_printf("unsupported format\\n");'
+    print '      return;'
+    print '   }'
+    print '   func(dst, src);'
+    print '}'
+    print
+
+
+def generate_pack(formats, src_channel, src_native_type, src_suffix):
+    '''Generate the dispatch function to pack pixels to any format'''
+
+    for format in formats:
+        if is_format_supported(format):
+            generate_format_pack(format, src_channel, src_native_type, src_suffix)
+
+    print 'static INLINE void'
+    print 'util_format_pack_%s(enum pipe_format format, void *dst, %s r, %s g, %s b, %s a)' % (src_suffix, src_native_type, src_native_type, src_native_type, src_native_type)
+    print '{'
+    print '   void (*func)(void *dst, %s r, %s g, %s b, %s a);' % (src_native_type, src_native_type, src_native_type, src_native_type)
+    print '   switch(format) {'
+    for format in formats:
+        if is_format_supported(format):
+            print '   case %s:' % format.name
+            print '      func = &util_format_%s_pack_%s;' % (format.short_name(), src_suffix)
+            print '      break;'
+    print '   default:'
+    print '      debug_printf("%s: unsupported format\\n", __FUNCTION__);'
+    print '      return;'
+    print '   }'
+    print '   func(dst, r, g, b, a);'
+    print '}'
+    print
+
+
+def main():
+    formats = []
+    for arg in sys.argv[1:]:
+        formats.extend(parse(arg))
+
+    print '/* This file is autogenerated by u_format_pack.py from u_format.csv. Do not edit directly. */'
+    print
+    # This will print the copyright message on the top of this file
+    print __doc__.strip()
+
+    print
+    print '#ifndef U_FORMAT_PACK_H'
+    print '#define U_FORMAT_PACK_H'
+    print
+    print '#include "pipe/p_compiler.h"'
+    print '#include "u_math.h"'
+    print '#include "u_format.h"'
+    print
+
+    generate_clamp()
+
+    for format in formats:
+        if format.layout == PLAIN:
+            generate_format_type(format)
+
+    channel = Channel(FLOAT, False, 32)
+    native_type = 'float'
+    suffix = '4f'
+
+    generate_unpack(formats, channel, native_type, suffix)
+    generate_pack(formats, channel, native_type, suffix)
+
+    channel = Channel(UNSIGNED, True, 8)
+    native_type = 'uint8_t'
+    suffix = '4ub'
+
+    generate_unpack(formats, channel, native_type, suffix)
+    generate_pack(formats, channel, native_type, suffix)
+
+    print
+    print '#ifdef __cplusplus'
+    print '}'
+    print '#endif'
+    print
+    print '#endif /* ! U_FORMAT_PACK_H */'
+
+
+if __name__ == '__main__':
+    main()
diff --git a/src/gallium/auxiliary/util/u_format_parse.py b/src/gallium/auxiliary/util/u_format_parse.py
index 493aff7112..f74dc5e88a 100755
--- a/src/gallium/auxiliary/util/u_format_parse.py
+++ b/src/gallium/auxiliary/util/u_format_parse.py
@@ -30,64 +30,163 @@
 '''
 
 
-import sys
-
-
 VOID, UNSIGNED, SIGNED, FIXED, FLOAT = range(5)
 
 SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_0, SWIZZLE_1, SWIZZLE_NONE, = range(7)
 
-ARITH = 'arith'
-ARRAY = 'array'
+PLAIN = 'plain'
+
+RGB = 'rgb'
+SRGB = 'srgb'
+YUV = 'yuv'
+ZS = 'zs'
+
 
+def is_pot(x):
+   return (x & (x - 1)) == 0;
 
-class Type:
-    '''Describe the type of a color channel.'''
+
+VERY_LARGE = 99999999999999999999999
+
+
+class Channel:
+    '''Describe the channel of a color channel.'''
     
-    def __init__(self, kind, norm, size):
-        self.kind = kind
+    def __init__(self, type, norm, size, name = ''):
+        self.type = type
         self.norm = norm
         self.size = size
-        self.sign = kind in (SIGNED, FIXED, FLOAT)
+        self.sign = type in (SIGNED, FIXED, FLOAT)
+        self.name = name
 
     def __str__(self):
-        s = str(self.kind)
+        s = str(self.type)
         if self.norm:
             s += 'n'
         s += str(self.size)
         return s
 
     def __eq__(self, other):
-        return self.kind == other.kind and self.norm == other.norm and self.size == other.size
+        return self.type == other.type and self.norm == other.norm and self.size == other.size
+
+    def max(self):
+        '''Maximum representable number.'''
+        if self.type == FLOAT:
+            return VERY_LARGE
+        if self.norm:
+            return 1
+        if self.type == UNSIGNED:
+            return (1 << self.size) - 1
+        if self.type == SIGNED:
+            return (1 << (self.size - 1)) - 1
+        assert False
+    
+    def min(self):
+        '''Minimum representable number.'''
+        if self.type == FLOAT:
+            return -VERY_LARGE
+        if self.type == UNSIGNED:
+            return 0
+        if self.norm:
+            return -1
+        if self.type == SIGNED:
+            return -(1 << (self.size - 1))
+        assert False
 
 
 class Format:
     '''Describe a pixel format.'''
 
-    def __init__(self, name, layout, block_width, block_height, in_types, out_swizzle, colorspace):
+    def __init__(self, name, layout, block_width, block_height, channels, swizzles, colorspace):
         self.name = name
         self.layout = layout
         self.block_width = block_width
         self.block_height = block_height
-        self.in_types = in_types
-        self.out_swizzle = out_swizzle
+        self.channels = channels
+        self.swizzles = swizzles
         self.name = name
         self.colorspace = colorspace
 
     def __str__(self):
         return self.name
 
+    def short_name(self):
+        '''Make up a short norm for a format, suitable to be used as suffix in
+        function names.'''
+
+        name = self.name
+        if name.startswith('PIPE_FORMAT_'):
+            name = name[len('PIPE_FORMAT_'):]
+        name = name.lower()
+        return name
+
     def block_size(self):
         size = 0
-        for type in self.in_types:
-            size += type.size
+        for channel in self.channels:
+            size += channel.size
         return size
 
+    def nr_channels(self):
+        nr_channels = 0
+        for channel in self.channels:
+            if channel.size:
+                nr_channels += 1
+        return nr_channels
+
+    def is_array(self):
+        ref_channel = self.channels[0]
+        for channel in self.channels[1:]:
+            if channel.size and (channel.size != ref_channel.size or channel.size % 8):
+                return False
+        return True
+
+    def is_mixed(self):
+        ref_channel = self.channels[0]
+        for channel in self.channels[1:]:
+            if channel.type != VOID:
+                if channel.type != ref_channel.type:
+                    return True
+                if channel.norm != ref_channel.norm:
+                    return True
+        return False
+
+    def is_pot(self):
+        return is_pot(self.block_size())
+
+    def is_int(self):
+        for channel in self.channels:
+            if channel.type not in (VOID, UNSIGNED, SIGNED):
+                return False
+        return True
+
+    def is_float(self):
+        for channel in self.channels:
+            if channel.type not in (VOID, FLOAT):
+                return False
+        return True
+
+    def is_bitmask(self):
+        if self.block_size() not in (8, 16, 32):
+            return False
+        for channel in self.channels:
+            if channel.type not in (VOID, UNSIGNED, SIGNED):
+                return False
+        return True
+
+    def inv_swizzles(self):
+        '''Return an array[4] of inverse swizzle terms'''
+        inv_swizzle = [None]*4
+        for i in range(4):
+            swizzle = self.swizzles[i]
+            if swizzle < 4:
+                inv_swizzle[swizzle] = i
+        return inv_swizzle
+
     def stride(self):
         return self.block_size()/8
 
 
-_kind_parse_map = {
+_type_parse_map = {
     '':  VOID,
     'x': VOID,
     'u': UNSIGNED,
@@ -108,20 +207,55 @@ _swizzle_parse_map = {
 
 def parse(filename):
     '''Parse the format descrition in CSV format in terms of the 
-    Type and Format classes above.'''
+    Channel and Format classes above.'''
 
     stream = open(filename)
     formats = []
     for line in stream:
-        line = line.rstrip()
+        try:
+            comment = line.index('#')
+        except ValueError:
+            pass
+        else:
+            line = line[:comment]
+        line = line.strip()
+        if not line:
+            continue
+
         fields = [field.strip() for field in line.split(',')]
+        
         name = fields[0]
         layout = fields[1]
         block_width, block_height = map(int, fields[2:4])
-        in_types = []
-        for field in fields[4:8]:
+
+        swizzles = [_swizzle_parse_map[swizzle] for swizzle in fields[8]]
+        colorspace = fields[9]
+        
+        if layout == PLAIN:
+            names = ['']*4
+            if colorspace in (RGB, SRGB):
+                for i in range(4):
+                    swizzle = swizzles[i]
+                    if swizzle < 4:
+                        names[swizzle] += 'rgba'[i]
+            elif colorspace == ZS:
+                for i in range(4):
+                    swizzle = swizzles[i]
+                    if swizzle < 4:
+                        names[swizzle] += 'zs'[i]
+            else:
+                assert False
+            for i in range(4):
+                if names[i] == '':
+                    names[i] = 'x'
+        else:
+            names = ['x', 'y', 'z', 'w']
+
+        channels = []
+        for i in range(0, 4):
+            field = fields[4 + i]
             if field:
-                kind = _kind_parse_map[field[0]]
+                type = _type_parse_map[field[0]]
                 if field[1] == 'n':
                     norm = True
                     size = int(field[2:])
@@ -129,13 +263,13 @@ def parse(filename):
                     norm = False
                     size = int(field[1:])
             else:
-                kind = VOID
+                type = VOID
                 norm = False
                 size = 0
-            in_type = Type(kind, norm, size)
-            in_types.append(in_type)
-        out_swizzle = [_swizzle_parse_map[swizzle] for swizzle in fields[8]]
-        colorspace = fields[9]
-        formats.append(Format(name, layout, block_width, block_height, in_types, out_swizzle, colorspace))
+            channel = Channel(type, norm, size, names[i])
+            channels.append(channel)
+
+        format = Format(name, layout, block_width, block_height, channels, swizzles, colorspace)
+        formats.append(format)
     return formats
 
diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py
index 571cab55dc..fb68852a53 100755
--- a/src/gallium/auxiliary/util/u_format_table.py
+++ b/src/gallium/auxiliary/util/u_format_table.py
@@ -51,7 +51,7 @@ colorspace_channels_map = {
 }
 
 
-kind_map = {
+type_map = {
     VOID:     "UTIL_FORMAT_TYPE_VOID",
     UNSIGNED: "UTIL_FORMAT_TYPE_UNSIGNED",
     SIGNED:   "UTIL_FORMAT_TYPE_SIGNED",
@@ -87,35 +87,48 @@ def write_format_table(formats):
     print '#include "u_format.h"'
     print
     print 'const struct util_format_description'
-    print 'util_format_description_table[] = '
-    print "{"
-    print "   {"
-    print "      PIPE_FORMAT_NONE,"
-    print "      \"PIPE_FORMAT_NONE\","
-    print "      {0, 0, 0},"
-    print "      0,"
-    print "      {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}},"
-    print "      {0, 0, 0, 0},"
-    print "      0"
-    print "   },"
+    print 'util_format_none_description = {'
+    print "   PIPE_FORMAT_NONE,"
+    print "   \"PIPE_FORMAT_NONE\","
+    print "   \"none\","
+    print "   {0, 0, 0},"
+    print "   0,"
+    print "   0,"
+    print "   0,"
+    print "   0,"
+    print "   0,"
+    print "   {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}},"
+    print "   {0, 0, 0, 0},"
+    print "   0"
+    print "};"
+    print
     for format in formats:
+        print 'const struct util_format_description'
+        print 'util_format_%s_description = {' % (format.short_name(),)
+        print "   %s," % (format.name,)
+        print "   \"%s\"," % (format.name,)
+        print "   \"%s\"," % (format.short_name(),)
+        print "   {%u, %u, %u},\t/* block */" % (format.block_width, format.block_height, format.block_size())
+        print "   %s," % (layout_map(format.layout),)
+        print "   %u,\t/* nr_channels */" % (format.nr_channels(),)
+        print "   %s,\t/* is_array */" % (bool_map(format.is_array()),)
+        print "   %s,\t/* is_bitmask */" % (bool_map(format.is_bitmask()),)
+        print "   %s,\t/* is_mixed */" % (bool_map(format.is_mixed()),)
         print "   {"
-        print "      %s," % (format.name,)
-        print "      \"%s\"," % (format.name,)
-        print "      {%u, %u, %u},\t/* block */" % (format.block_width, format.block_height, format.block_size())
-        print "      %s," % (layout_map(format.layout),)
-        print "      {"
         for i in range(4):
-            type = format.in_types[i]
+            channel = format.channels[i]
             if i < 3:
                 sep = ","
             else:
                 sep = ""
-            print "         {%s, %s, %u}%s\t/* %s */" % (kind_map[type.kind], bool_map(type.norm), type.size, sep, "xyzw"[i])
-        print "      },"
-        print "      {"
+            if channel.size:
+                print "      {%s, %s, %u}%s\t/* %s = %s */" % (type_map[channel.type], bool_map(channel.norm), channel.size, sep, "xyzw"[i], channel.name)
+            else:
+                print "      {0, 0, 0}%s" % (sep,)
+        print "   },"
+        print "   {"
         for i in range(4):
-            swizzle = format.out_swizzle[i]
+            swizzle = format.swizzles[i]
             if i < 3:
                 sep = ","
             else:
@@ -124,11 +137,30 @@ def write_format_table(formats):
                 comment = colorspace_channels_map[format.colorspace][i]
             except (KeyError, IndexError):
                 comment = 'ignored'
-            print "         %s%s\t/* %s */" % (swizzle_map[swizzle], sep, comment)
-        print "      },"
-        print "      %s," % (colorspace_map(format.colorspace),)
+            print "      %s%s\t/* %s */" % (swizzle_map[swizzle], sep, comment)
         print "   },"
-    print "};"
+        print "   %s," % (colorspace_map(format.colorspace),)
+        print "};"
+        print
+    print "const struct util_format_description *"
+    print "util_format_description(enum pipe_format format)"
+    print "{"
+    print "   if (format >= PIPE_FORMAT_COUNT) {"
+    print "      return NULL;"
+    print "   }"
+    print
+    print "   switch (format) {"
+    print "   case PIPE_FORMAT_NONE:"
+    print "      return &util_format_none_description;"
+    for format in formats:
+        print "   case %s:" % format.name
+        print "      return &util_format_%s_description;" % (format.short_name(),)
+    print "   default:"
+    print "      assert(0);"
+    print "      return NULL;"
+    print "   }"
+    print "}"
+    print
 
 
 def main():
diff --git a/src/gallium/auxiliary/util/u_format_tests.c b/src/gallium/auxiliary/util/u_format_tests.c
new file mode 100644
index 0000000000..182a474044
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_format_tests.c
@@ -0,0 +1,544 @@
+/**************************************************************************
+ *
+ * Copyright 2009-2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "u_memory.h"
+#include "u_format_tests.h"
+
+
+/*
+ * Helper macros to create the packed bytes for longer words.
+ */
+
+#define PACKED_1x8(x)          {x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+#define PACKED_2x8(x, y)       {x, y, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+#define PACKED_3x8(x, y, z)    {x, y, z, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+#define PACKED_4x8(x, y, z, w) {x, y, z, w, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+
+#define PACKED_1x16(x)          {(x) & 0xff, (x) >> 8,          0,        0,          0,        0,          0,        0, 0, 0, 0, 0, 0, 0, 0, 0}
+#define PACKED_2x16(x, y)       {(x) & 0xff, (x) >> 8, (y) & 0xff, (y) >> 8,          0,        0,          0,        0, 0, 0, 0, 0, 0, 0, 0, 0}
+#define PACKED_3x16(x, y, z)    {(x) & 0xff, (x) >> 8, (y) & 0xff, (y) >> 8, (z) & 0xff, (z) >> 8,          0,        0, 0, 0, 0, 0, 0, 0, 0, 0}
+#define PACKED_4x16(x, y, z, w) {(x) & 0xff, (x) >> 8, (y) & 0xff, (y) >> 8, (z) & 0xff, (z) >> 8, (w) & 0xff, (w) >> 8, 0, 0, 0, 0, 0, 0, 0, 0}
+
+#define PACKED_1x32(x)          {(x) & 0xff, ((x) >> 8) & 0xff, ((x) >> 16) & 0xff, (x) >> 24,          0,                 0,                  0,         0,          0,                 0,                  0,         0,          0,                 0,                  0,         0}
+#define PACKED_2x32(x, y)       {(x) & 0xff, ((x) >> 8) & 0xff, ((x) >> 16) & 0xff, (x) >> 24, (y) & 0xff, ((y) >> 8) & 0xff, ((y) >> 16) & 0xff, (y) >> 24,          0,                 0,                  0,         0,          0,                 0,                  0,         0}
+#define PACKED_3x32(x, y, z)    {(x) & 0xff, ((x) >> 8) & 0xff, ((x) >> 16) & 0xff, (x) >> 24, (y) & 0xff, ((y) >> 8) & 0xff, ((y) >> 16) & 0xff, (y) >> 24, (z) & 0xff, ((z) >> 8) & 0xff, ((z) >> 16) & 0xff, (z) >> 24,          0,                 0,                  0,         0}
+#define PACKED_4x32(x, y, z, w) {(x) & 0xff, ((x) >> 8) & 0xff, ((x) >> 16) & 0xff, (x) >> 24, (y) & 0xff, ((y) >> 8) & 0xff, ((y) >> 16) & 0xff, (y) >> 24, (z) & 0xff, ((z) >> 8) & 0xff, ((z) >> 16) & 0xff, (z) >> 24, (w) & 0xff, ((w) >> 8) & 0xff, ((w) >> 16) & 0xff, (w) >> 24}
+
+
+/**
+ * Test cases.
+ *
+ * These were manually entered. We could generate these
+ *
+ * To keep this to a we cover only the corner cases, which should produce
+ * good enough coverage since that pixel format transformations are afine for
+ * non SRGB formats.
+ */
+const struct util_format_test_case
+util_format_test_cases[] =
+{
+
+   /*
+    * 32-bit rendertarget formats
+    */
+
+   {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), {0.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000ff), {0.0, 0.0, 1.0, 0.0}},
+   {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000ff00), {0.0, 1.0, 0.0, 0.0}},
+   {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00ff0000), {1.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), {1.0, 1.0, 1.0, 1.0}},
+
+   {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x00000000), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x000000ff), {0.0, 0.0, 1.0, 1.0}},
+   {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x0000ff00), {0.0, 1.0, 0.0, 1.0}},
+   {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x00ff0000), {1.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0xff000000), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0xffffffff), {1.0, 1.0, 1.0, 1.0}},
+
+   {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), {0.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000ff), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000ff00), {1.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00ff0000), {0.0, 1.0, 0.0, 0.0}},
+   {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), {0.0, 0.0, 1.0, 0.0}},
+   {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), {1.0, 1.0, 1.0, 1.0}},
+
+   {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x00000000), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x000000ff), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x0000ff00), {1.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x00ff0000), {0.0, 1.0, 0.0, 1.0}},
+   {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0xff000000), {0.0, 0.0, 1.0, 1.0}},
+   {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0xffffffff), {1.0, 1.0, 1.0, 1.0}},
+
+   {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), {0.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000ff), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000ff00), {0.0, 0.0, 1.0, 0.0}},
+   {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00ff0000), {0.0, 1.0, 0.0, 0.0}},
+   {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), {1.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), {1.0, 1.0, 1.0, 1.0}},
+
+   {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x00000000), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x000000ff), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x0000ff00), {0.0, 0.0, 1.0, 1.0}},
+   {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x00ff0000), {0.0, 1.0, 0.0, 1.0}},
+   {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0xff000000), {1.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0xffffffff), {1.0, 1.0, 1.0, 1.0}},
+
+   {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), {0.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000003ff), {1.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000ffc00), {0.0, 1.0, 0.0, 0.0}},
+   {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x3ff00000), {0.0, 0.0, 1.0, 0.0}},
+   {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xc0000000), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), {1.0, 1.0, 1.0, 1.0}},
+
+   /*
+    * 16-bit rendertarget formats
+    */
+
+   {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), {0.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x001f), {0.0, 0.0, 1.0, 0.0}},
+   {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x03e0), {0.0, 1.0, 0.0, 0.0}},
+   {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x7c00), {1.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x8000), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), {1.0, 1.0, 1.0, 1.0}},
+
+   {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), {0.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x000f), {0.0, 0.0, 1.0, 0.0}},
+   {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x00f0), {0.0, 1.0, 0.0, 0.0}},
+   {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0f00), {1.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xf000), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), {1.0, 1.0, 1.0, 1.0}},
+
+   {PIPE_FORMAT_B5G6R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_B5G6R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x001f), {0.0, 0.0, 1.0, 1.0}},
+   {PIPE_FORMAT_B5G6R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x07e0), {0.0, 1.0, 0.0, 1.0}},
+   {PIPE_FORMAT_B5G6R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xf800), {1.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_B5G6R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), {1.0, 1.0, 1.0, 1.0}},
+
+   /*
+    * Luminance/intensity/alpha formats
+    */
+
+   {PIPE_FORMAT_L8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0x00), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_L8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0xff), {1.0, 1.0, 1.0, 1.0}},
+
+   {PIPE_FORMAT_A8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0x00), {0.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_A8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0xff), {0.0, 0.0, 0.0, 1.0}},
+
+   {PIPE_FORMAT_I8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0x00), {0.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_I8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0xff), {1.0, 1.0, 1.0, 1.0}},
+
+   {PIPE_FORMAT_L8A8_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), {0.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_L8A8_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x00ff), {1.0, 1.0, 1.0, 0.0}},
+   {PIPE_FORMAT_L8A8_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xff00), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_L8A8_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), {1.0, 1.0, 1.0, 1.0}},
+
+   {PIPE_FORMAT_L16_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_L16_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), {1.0, 1.0, 1.0, 1.0}},
+
+   /*
+    * TODO: SRGB formats
+    */
+
+   /*
+    * Mixed-signed formats
+    */
+
+   {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x00, 0x00, 0x00, 0x00), { 0.0,  0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x7f, 0x00, 0x00, 0x00), { 1.0,  0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x81, 0x00, 0x00, 0x00), {-1.0,  0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x00, 0x7f, 0x00, 0x00), { 0.0,  1.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x00, 0x81, 0x00, 0x00), { 0.0, -1.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x00, 0x00, 0xff, 0x00), { 0.0,  0.0, 1.0, 1.0}},
+
+   {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), { 0.0,  0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0x000f), { 1.0,  0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0x0011), {-1.0,  0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0x01e0), { 0.0,  1.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0x0220), { 0.0, -1.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0xfc00), { 0.0,  0.0, 1.0, 1.0}},
+
+   /*
+    * TODO: Depth-stencil formats
+    */
+
+   /*
+    * TODO: YUV formats
+    */
+
+   /*
+    * TODO: Compressed formats
+    */
+
+   /*
+    * Standard 8-bit integer formats
+    */
+
+   {PIPE_FORMAT_R8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0x00), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0xff), {1.0, 0.0, 0.0, 1.0}},
+
+   {PIPE_FORMAT_R8G8_UNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x00), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R8G8_UNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0xff, 0x00), {1.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R8G8_UNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0xff), {0.0, 1.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R8G8_UNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0xff, 0xff), {1.0, 1.0, 0.0, 1.0}},
+
+   {PIPE_FORMAT_R8G8B8_UNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x00), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R8G8B8_UNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0xff, 0x00, 0x00), {1.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R8G8B8_UNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0xff, 0x00), {0.0, 1.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R8G8B8_UNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0xff), {0.0, 0.0, 1.0, 1.0}},
+   {PIPE_FORMAT_R8G8B8_UNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0xff, 0xff, 0xff), {1.0, 1.0, 1.0, 1.0}},
+
+   {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x00), {0.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0x00, 0x00, 0x00), {1.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0xff, 0x00, 0x00), {0.0, 1.0, 0.0, 0.0}},
+   {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0xff, 0x00), {0.0, 0.0, 1.0, 0.0}},
+   {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0xff), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0xff, 0xff, 0xff), {1.0, 1.0, 1.0, 1.0}},
+
+   {PIPE_FORMAT_R8_USCALED, PACKED_1x8(0xff), PACKED_1x8(0x00), {  0.0,   0.0,   0.0, 1.0}},
+   {PIPE_FORMAT_R8_USCALED, PACKED_1x8(0xff), PACKED_1x8(0xff), {255.0,   0.0,   0.0, 1.0}},
+
+   {PIPE_FORMAT_R8G8_USCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x00), {  0.0,   0.0,   0.0, 1.0}},
+   {PIPE_FORMAT_R8G8_USCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0xff, 0x00), {255.0,   0.0,   0.0, 1.0}},
+   {PIPE_FORMAT_R8G8_USCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0xff), {  0.0, 255.0,   0.0, 1.0}},
+   {PIPE_FORMAT_R8G8_USCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0xff, 0xff), {255.0, 255.0,   0.0, 1.0}},
+
+   {PIPE_FORMAT_R8G8B8_USCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x00), {  0.0,   0.0,   0.0, 1.0}},
+   {PIPE_FORMAT_R8G8B8_USCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0xff, 0x00, 0x00), {255.0,   0.0,   0.0, 1.0}},
+   {PIPE_FORMAT_R8G8B8_USCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0xff, 0x00), {  0.0, 255.0,   0.0, 1.0}},
+   {PIPE_FORMAT_R8G8B8_USCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0xff), {  0.0,   0.0, 255.0, 1.0}},
+   {PIPE_FORMAT_R8G8B8_USCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0xff, 0xff, 0xff), {255.0, 255.0, 255.0, 1.0}},
+
+   {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x00), {  0.0,   0.0,   0.0,   0.0}},
+   {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0x00, 0x00, 0x00), {255.0,   0.0,   0.0,   0.0}},
+   {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0xff, 0x00, 0x00), {  0.0, 255.0,   0.0,   0.0}},
+   {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0xff, 0x00), {  0.0,   0.0, 255.0,   0.0}},
+   {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0xff), {  0.0,   0.0,   0.0, 255.0}},
+   {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0xff, 0xff, 0xff), {255.0, 255.0, 255.0, 255.0}},
+
+   {PIPE_FORMAT_R8_SNORM, PACKED_1x8(0xff), PACKED_1x8(0x00), { 0.0,  0.0,  0.0,  1.0}},
+   {PIPE_FORMAT_R8_SNORM, PACKED_1x8(0xff), PACKED_1x8(0x7f), { 1.0,  0.0,  0.0,  1.0}},
+   {PIPE_FORMAT_R8_SNORM, PACKED_1x8(0xff), PACKED_1x8(0x81), {-1.0,  0.0,  0.0,  1.0}},
+
+   {PIPE_FORMAT_R8G8_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x00), { 0.0,  0.0,  0.0,  1.0}},
+   {PIPE_FORMAT_R8G8_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x7f, 0x00), { 1.0,  0.0,  0.0,  1.0}},
+   {PIPE_FORMAT_R8G8_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x81, 0x00), {-1.0,  0.0,  0.0,  1.0}},
+   {PIPE_FORMAT_R8G8_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x7f), { 0.0,  1.0,  0.0,  1.0}},
+   {PIPE_FORMAT_R8G8_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x81), { 0.0, -1.0,  0.0,  1.0}},
+
+   {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x00), { 0.0,  0.0,  0.0,  1.0}},
+   {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x7f, 0x00, 0x00), { 1.0,  0.0,  0.0,  1.0}},
+   {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x81, 0x00, 0x00), {-1.0,  0.0,  0.0,  1.0}},
+   {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x7f, 0x00), { 0.0,  1.0,  0.0,  1.0}},
+   {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x81, 0x00), { 0.0, -1.0,  0.0,  1.0}},
+   {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x7f), { 0.0,  0.0,  1.0,  1.0}},
+   {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x81), { 0.0,  0.0, -1.0,  1.0}},
+
+   {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x00), { 0.0,  0.0,  0.0,  0.0}},
+   {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x7f, 0x00, 0x00, 0x00), { 1.0,  0.0,  0.0,  0.0}},
+   {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x81, 0x00, 0x00, 0x00), {-1.0,  0.0,  0.0,  0.0}},
+   {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x7f, 0x00, 0x00), { 0.0,  1.0,  0.0,  0.0}},
+   {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x81, 0x00, 0x00), { 0.0, -1.0,  0.0,  0.0}},
+   {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x7f, 0x00), { 0.0,  0.0,  1.0,  0.0}},
+   {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x81, 0x00), { 0.0,  0.0, -1.0,  0.0}},
+   {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x7f), { 0.0,  0.0,  0.0,  1.0}},
+   {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x81), { 0.0,  0.0,  0.0, -1.0}},
+
+   {PIPE_FORMAT_R8_SSCALED, PACKED_1x8(0xff), PACKED_1x8(0x00), {   0.0,    0.0,    0.0, 1.0}},
+   {PIPE_FORMAT_R8_SSCALED, PACKED_1x8(0xff), PACKED_1x8(0x7f), { 127.0,    0.0,    0.0, 1.0}},
+   {PIPE_FORMAT_R8_SSCALED, PACKED_1x8(0xff), PACKED_1x8(0x80), {-128.0,    0.0,    0.0, 1.0}},
+
+   {PIPE_FORMAT_R8G8_SSCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x00), {   0.0,    0.0,    0.0, 1.0}},
+   {PIPE_FORMAT_R8G8_SSCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x7f, 0x00), { 127.0,    0.0,    0.0, 1.0}},
+   {PIPE_FORMAT_R8G8_SSCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x80, 0x00), {-128.0,    0.0,    0.0, 1.0}},
+   {PIPE_FORMAT_R8G8_SSCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x7f), {   0.0,  127.0,    0.0, 1.0}},
+   {PIPE_FORMAT_R8G8_SSCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x80), {   0.0, -128.0,    0.0, 1.0}},
+
+   {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x00), {   0.0,    0.0,    0.0, 1.0}},
+   {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x7f, 0x00, 0x00), { 127.0,    0.0,    0.0, 1.0}},
+   {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x80, 0x00, 0x00), {-128.0,    0.0,    0.0, 1.0}},
+   {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x7f, 0x00), {   0.0,  127.0,    0.0, 1.0}},
+   {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x80, 0x00), {   0.0, -128.0,    0.0, 1.0}},
+   {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x7f), {   0.0,    0.0,  127.0, 1.0}},
+   {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x80), {   0.0,    0.0, -128.0, 1.0}},
+
+   {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x00), {   0.0,    0.0,    0.0,    0.0}},
+   {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x7f, 0x00, 0x00, 0x00), { 127.0,    0.0,    0.0,    0.0}},
+   {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x80, 0x00, 0x00, 0x00), {-128.0,    0.0,    0.0,    0.0}},
+   {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x7f, 0x00, 0x00), {   0.0,  127.0,    0.0,    0.0}},
+   {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x80, 0x00, 0x00), {   0.0, -128.0,    0.0,    0.0}},
+   {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x7f, 0x00), {   0.0,    0.0,  127.0,    0.0}},
+   {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x80, 0x00), {   0.0,    0.0, -128.0,    0.0}},
+   {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x7f), {   0.0,    0.0,    0.0,  127.0}},
+   {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x80), {   0.0,    0.0,    0.0, -128.0}},
+
+   /*
+    * Standard 16-bit integer formats
+    */
+
+   {PIPE_FORMAT_R16_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R16_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), {1.0, 0.0, 0.0, 1.0}},
+
+   {PIPE_FORMAT_R16G16_UNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x0000), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R16G16_UNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0xffff, 0x0000), {1.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R16G16_UNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0xffff), {0.0, 1.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R16G16_UNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0xffff, 0xffff), {1.0, 1.0, 0.0, 1.0}},
+
+   {PIPE_FORMAT_R16G16B16_UNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x0000), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R16G16B16_UNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0xffff, 0x0000, 0x0000), {1.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R16G16B16_UNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0xffff, 0x0000), {0.0, 1.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R16G16B16_UNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0xffff), {0.0, 0.0, 1.0, 1.0}},
+   {PIPE_FORMAT_R16G16B16_UNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0xffff, 0xffff, 0xffff), {1.0, 1.0, 1.0, 1.0}},
+
+   {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x0000), {0.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0xffff, 0x0000, 0x0000, 0x0000), {1.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0xffff, 0x0000, 0x0000), {0.0, 1.0, 0.0, 0.0}},
+   {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0xffff, 0x0000), {0.0, 0.0, 1.0, 0.0}},
+   {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0xffff), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), {1.0, 1.0, 1.0, 1.0}},
+
+   {PIPE_FORMAT_R16_USCALED, PACKED_1x16(0xffff), PACKED_1x16(0x0000), {    0.0,     0.0,     0.0,   1.0}},
+   {PIPE_FORMAT_R16_USCALED, PACKED_1x16(0xffff), PACKED_1x16(0xffff), {65535.0,     0.0,     0.0,   1.0}},
+
+   {PIPE_FORMAT_R16G16_USCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x0000), {    0.0,     0.0,     0.0,   1.0}},
+   {PIPE_FORMAT_R16G16_USCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0xffff, 0x0000), {65535.0,     0.0,     0.0,   1.0}},
+   {PIPE_FORMAT_R16G16_USCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0xffff), {    0.0, 65535.0,     0.0,   1.0}},
+   {PIPE_FORMAT_R16G16_USCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0xffff, 0xffff), {65535.0, 65535.0,     0.0,   1.0}},
+
+   {PIPE_FORMAT_R16G16B16_USCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x0000), {    0.0,     0.0,     0.0,   1.0}},
+   {PIPE_FORMAT_R16G16B16_USCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0xffff, 0x0000, 0x0000), {65535.0,     0.0,     0.0,   1.0}},
+   {PIPE_FORMAT_R16G16B16_USCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0xffff, 0x0000), {    0.0, 65535.0,     0.0,   1.0}},
+   {PIPE_FORMAT_R16G16B16_USCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0xffff), {    0.0,     0.0, 65535.0,   1.0}},
+   {PIPE_FORMAT_R16G16B16_USCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0xffff, 0xffff, 0xffff), {65535.0, 65535.0, 65535.0,   1.0}},
+
+   {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x0000), {    0.0,     0.0,     0.0,     0.0}},
+   {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0xffff, 0x0000, 0x0000, 0x0000), {65535.0,     0.0,     0.0,     0.0}},
+   {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0xffff, 0x0000, 0x0000), {    0.0, 65535.0,     0.0,     0.0}},
+   {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0xffff, 0x0000), {    0.0,     0.0, 65535.0,     0.0}},
+   {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0xffff), {    0.0,     0.0,     0.0, 65535.0}},
+   {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), {65535.0, 65535.0, 65535.0, 65535.0}},
+
+   {PIPE_FORMAT_R16_SNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), {   0.0,    0.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R16_SNORM, PACKED_1x16(0xffff), PACKED_1x16(0x7fff), {   1.0,    0.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R16_SNORM, PACKED_1x16(0xffff), PACKED_1x16(0x8001), {  -1.0,    0.0,    0.0,    1.0}},
+
+   {PIPE_FORMAT_R16G16_SNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x0000), {   0.0,    0.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R16G16_SNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x7fff, 0x0000), {   1.0,    0.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R16G16_SNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x8001, 0x0000), {  -1.0,    0.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R16G16_SNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x7fff), {   0.0,    1.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R16G16_SNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x8001), {   0.0,   -1.0,    0.0,    1.0}},
+
+   {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x0000), {   0.0,    0.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x7fff, 0x0000, 0x0000), {   1.0,    0.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x8001, 0x0000, 0x0000), {  -1.0,    0.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x7fff, 0x0000), {   0.0,    1.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x8001, 0x0000), {   0.0,   -1.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x7fff), {   0.0,    0.0,    1.0,    1.0}},
+   {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x8001), {   0.0,    0.0,   -1.0,    1.0}},
+
+   {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x0000), {   0.0,    0.0,    0.0,    0.0}},
+   {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x7fff, 0x0000, 0x0000, 0x0000), {   1.0,    0.0,    0.0,    0.0}},
+   {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x8001, 0x0000, 0x0000, 0x0000), {  -1.0,    0.0,    0.0,    0.0}},
+   {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x7fff, 0x0000, 0x0000), {   0.0,    1.0,    0.0,    0.0}},
+   {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x8001, 0x0000, 0x0000), {   0.0,   -1.0,    0.0,    0.0}},
+   {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x7fff, 0x0000), {   0.0,    0.0,    1.0,    0.0}},
+   {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x8001, 0x0000), {   0.0,    0.0,   -1.0,    0.0}},
+   {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x7fff), {   0.0,    0.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x8001), {   0.0,    0.0,    0.0,   -1.0}},
+
+   {PIPE_FORMAT_R16_SSCALED, PACKED_1x16(0xffff), PACKED_1x16(0x0000), {     0.0,      0.0,      0.0,   1.0}},
+   {PIPE_FORMAT_R16_SSCALED, PACKED_1x16(0xffff), PACKED_1x16(0x7fff), { 32767.0,      0.0,      0.0,   1.0}},
+   {PIPE_FORMAT_R16_SSCALED, PACKED_1x16(0xffff), PACKED_1x16(0x8000), {-32768.0,      0.0,      0.0,   1.0}},
+
+   {PIPE_FORMAT_R16G16_SSCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x0000), {     0.0,      0.0,      0.0,   1.0}},
+   {PIPE_FORMAT_R16G16_SSCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x7fff, 0x0000), { 32767.0,      0.0,      0.0,   1.0}},
+   {PIPE_FORMAT_R16G16_SSCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x8000, 0x0000), {-32768.0,      0.0,      0.0,   1.0}},
+   {PIPE_FORMAT_R16G16_SSCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x7fff), {     0.0,  32767.0,      0.0,   1.0}},
+   {PIPE_FORMAT_R16G16_SSCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x8000), {     0.0, -32768.0,      0.0,   1.0}},
+
+   {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x0000), {     0.0,      0.0,      0.0,   1.0}},
+   {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x7fff, 0x0000, 0x0000), { 32767.0,      0.0,      0.0,   1.0}},
+   {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x8000, 0x0000, 0x0000), {-32768.0,      0.0,      0.0,   1.0}},
+   {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x7fff, 0x0000), {     0.0,  32767.0,      0.0,   1.0}},
+   {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x8000, 0x0000), {     0.0, -32768.0,      0.0,   1.0}},
+   {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x7fff), {     0.0,      0.0,  32767.0,   1.0}},
+   {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x8000), {     0.0,      0.0, -32768.0,   1.0}},
+
+   {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x0000), {     0.0,      0.0,      0.0,      0.0}},
+   {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x7fff, 0x0000, 0x0000, 0x0000), { 32767.0,      0.0,      0.0,      0.0}},
+   {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x8000, 0x0000, 0x0000, 0x0000), {-32768.0,      0.0,      0.0,      0.0}},
+   {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x7fff, 0x0000, 0x0000), {     0.0,  32767.0,      0.0,      0.0}},
+   {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x8000, 0x0000, 0x0000), {     0.0, -32768.0,      0.0,      0.0}},
+   {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x7fff, 0x0000), {     0.0,      0.0,  32767.0,      0.0}},
+   {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x8000, 0x0000), {     0.0,      0.0, -32768.0,      0.0}},
+   {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x7fff), {     0.0,      0.0,      0.0,  32767.0}},
+   {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x8000), {     0.0,      0.0,      0.0, -32768.0}},
+
+   /*
+    * Standard 32-bit integer formats
+    *
+    * NOTE: We can't accurately represent integers larger than +/-0x1000000
+    * with single precision floats, so that's as far as we test.
+    */
+
+   {PIPE_FORMAT_R32_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R32_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), {1.0, 0.0, 0.0, 1.0}},
+
+   {PIPE_FORMAT_R32G32_UNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00000000), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R32G32_UNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0xffffffff, 0x00000000), {1.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R32G32_UNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0xffffffff), {0.0, 1.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R32G32_UNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0xffffffff, 0xffffffff), {1.0, 1.0, 0.0, 1.0}},
+
+   {PIPE_FORMAT_R32G32B32_UNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00000000), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R32G32B32_UNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0xffffffff, 0x00000000, 0x00000000), {1.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R32G32B32_UNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0xffffffff, 0x00000000), {0.0, 1.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R32G32B32_UNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0xffffffff), {0.0, 0.0, 1.0, 1.0}},
+   {PIPE_FORMAT_R32G32B32_UNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), {1.0, 1.0, 1.0, 1.0}},
+
+   {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00000000), {0.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0xffffffff, 0x00000000, 0x00000000, 0x00000000), {1.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0xffffffff, 0x00000000, 0x00000000), {0.0, 1.0, 0.0, 0.0}},
+   {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0xffffffff, 0x00000000), {0.0, 0.0, 1.0, 0.0}},
+   {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0xffffffff), {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), {1.0, 1.0, 1.0, 1.0}},
+
+   {PIPE_FORMAT_R32_USCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), {       0.0,        0.0,        0.0,   1.0}},
+   {PIPE_FORMAT_R32_USCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0x01000000), {16777216.0,        0.0,        0.0,   1.0}},
+
+   {PIPE_FORMAT_R32G32_USCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00000000), {       0.0,        0.0,        0.0,   1.0}},
+   {PIPE_FORMAT_R32G32_USCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x01000000, 0x00000000), {16777216.0,        0.0,        0.0,   1.0}},
+   {PIPE_FORMAT_R32G32_USCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x01000000), {       0.0, 16777216.0,        0.0,   1.0}},
+   {PIPE_FORMAT_R32G32_USCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x01000000, 0x01000000), {16777216.0, 16777216.0,        0.0,   1.0}},
+
+   {PIPE_FORMAT_R32G32B32_USCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00000000), {       0.0,        0.0,        0.0,   1.0}},
+   {PIPE_FORMAT_R32G32B32_USCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x01000000, 0x00000000, 0x00000000), {16777216.0,        0.0,        0.0,   1.0}},
+   {PIPE_FORMAT_R32G32B32_USCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x01000000, 0x00000000), {       0.0, 16777216.0,        0.0,   1.0}},
+   {PIPE_FORMAT_R32G32B32_USCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x01000000), {       0.0,        0.0, 16777216.0,   1.0}},
+   {PIPE_FORMAT_R32G32B32_USCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x01000000, 0x01000000, 0x01000000), {16777216.0, 16777216.0, 16777216.0,   1.0}},
+
+   {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00000000), {       0.0,        0.0,        0.0,        0.0}},
+   {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x01000000, 0x00000000, 0x00000000, 0x00000000), {16777216.0,        0.0,        0.0,        0.0}},
+   {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x01000000, 0x00000000, 0x00000000), {       0.0, 16777216.0,        0.0,        0.0}},
+   {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x01000000, 0x00000000), {       0.0,        0.0, 16777216.0,        0.0}},
+   {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x01000000), {       0.0,        0.0,        0.0, 16777216.0}},
+   {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x01000000, 0x01000000, 0x01000000, 0x01000000), {16777216.0, 16777216.0, 16777216.0, 16777216.0}},
+
+   {PIPE_FORMAT_R32_SNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), {   0.0,    0.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R32_SNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x7fffffff), {   1.0,    0.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R32_SNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x80000001), {  -1.0,    0.0,    0.0,    1.0}},
+
+   {PIPE_FORMAT_R32G32_SNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00000000), {   0.0,    0.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R32G32_SNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x7fffffff, 0x00000000), {   1.0,    0.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R32G32_SNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x80000001, 0x00000000), {  -1.0,    0.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R32G32_SNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x7fffffff), {   0.0,    1.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R32G32_SNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x80000001), {   0.0,   -1.0,    0.0,    1.0}},
+
+   {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00000000), {   0.0,    0.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x7fffffff, 0x00000000, 0x00000000), {   1.0,    0.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x80000001, 0x00000000, 0x00000000), {  -1.0,    0.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x7fffffff, 0x00000000), {   0.0,    1.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x80000001, 0x00000000), {   0.0,   -1.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x7fffffff), {   0.0,    0.0,    1.0,    1.0}},
+   {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x80000001), {   0.0,    0.0,   -1.0,    1.0}},
+
+   {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00000000), {   0.0,    0.0,    0.0,    0.0}},
+   {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x7fffffff, 0x00000000, 0x00000000, 0x00000000), {   1.0,    0.0,    0.0,    0.0}},
+   {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x80000001, 0x00000000, 0x00000000, 0x00000000), {  -1.0,    0.0,    0.0,    0.0}},
+   {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x7fffffff, 0x00000000, 0x00000000), {   0.0,    1.0,    0.0,    0.0}},
+   {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x80000001, 0x00000000, 0x00000000), {   0.0,   -1.0,    0.0,    0.0}},
+   {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x7fffffff, 0x00000000), {   0.0,    0.0,    1.0,    0.0}},
+   {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x80000001, 0x00000000), {   0.0,    0.0,   -1.0,    0.0}},
+   {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x7fffffff), {   0.0,    0.0,    0.0,    1.0}},
+   {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x80000001), {   0.0,    0.0,    0.0,   -1.0}},
+
+   {PIPE_FORMAT_R32_SSCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), {        0.0,         0.0,         0.0,   1.0}},
+   {PIPE_FORMAT_R32_SSCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0x01000000), { 16777216.0,         0.0,         0.0,   1.0}},
+   {PIPE_FORMAT_R32_SSCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), {-16777216.0,         0.0,         0.0,   1.0}},
+
+   {PIPE_FORMAT_R32G32_SSCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00000000), {        0.0,         0.0,         0.0,   1.0}},
+   {PIPE_FORMAT_R32G32_SSCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x01000000, 0x00000000), { 16777216.0,         0.0,         0.0,   1.0}},
+   {PIPE_FORMAT_R32G32_SSCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0xff000000, 0x00000000), {-16777216.0,         0.0,         0.0,   1.0}},
+   {PIPE_FORMAT_R32G32_SSCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x01000000), {        0.0,  16777216.0,         0.0,   1.0}},
+   {PIPE_FORMAT_R32G32_SSCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0xff000000), {        0.0, -16777216.0,         0.0,   1.0}},
+
+   {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00000000), {        0.0,         0.0,         0.0,   1.0}},
+   {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x01000000, 0x00000000, 0x00000000), { 16777216.0,         0.0,         0.0,   1.0}},
+   {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0xff000000, 0x00000000, 0x00000000), {-16777216.0,         0.0,         0.0,   1.0}},
+   {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x01000000, 0x00000000), {        0.0,  16777216.0,         0.0,   1.0}},
+   {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0xff000000, 0x00000000), {        0.0, -16777216.0,         0.0,   1.0}},
+   {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x01000000), {        0.0,         0.0,  16777216.0,   1.0}},
+   {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0xff000000), {        0.0,         0.0, -16777216.0,   1.0}},
+
+   {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00000000), {        0.0,         0.0,         0.0,         0.0}},
+   {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x01000000, 0x00000000, 0x00000000, 0x00000000), { 16777216.0,         0.0,         0.0,         0.0}},
+   {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0xff000000, 0x00000000, 0x00000000, 0x00000000), {-16777216.0,         0.0,         0.0,         0.0}},
+   {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x01000000, 0x00000000, 0x00000000), {        0.0,  16777216.0,         0.0,         0.0}},
+   {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0xff000000, 0x00000000, 0x00000000), {        0.0, -16777216.0,         0.0,         0.0}},
+   {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x01000000, 0x00000000), {        0.0,         0.0,  16777216.0,         0.0}},
+   {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0xff000000, 0x00000000), {        0.0,         0.0, -16777216.0,         0.0}},
+   {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x01000000), {        0.0,         0.0,         0.0,  16777216.0}},
+   {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0xff000000), {        0.0,         0.0,         0.0, -16777216.0}},
+
+   /*
+    * Standard 32-bit float formats
+    */
+
+   {PIPE_FORMAT_R32_FLOAT, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), {  0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R32_FLOAT, PACKED_1x32(0xffffffff), PACKED_1x32(0x3f800000), {  1.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R32_FLOAT, PACKED_1x32(0xffffffff), PACKED_1x32(0xbf800000), { -1.0, 0.0, 0.0, 1.0}},
+
+   {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00000000), { 0.0,  0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x3f800000, 0x00000000), { 1.0,  0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0xbf800000, 0x00000000), {-1.0,  0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x3f800000), { 0.0,  1.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0xbf800000), { 0.0, -1.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x3f800000, 0x3f800000), { 1.0,  1.0, 0.0, 1.0}},
+
+   {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00000000), { 0.0,  0.0,  0.0, 1.0}},
+   {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x3f800000, 0x00000000, 0x00000000), { 1.0,  0.0,  0.0, 1.0}},
+   {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0xbf800000, 0x00000000, 0x00000000), {-1.0,  0.0,  0.0, 1.0}},
+   {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x3f800000, 0x00000000), { 0.0,  1.0,  0.0, 1.0}},
+   {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0xbf800000, 0x00000000), { 0.0, -1.0,  0.0, 1.0}},
+   {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x3f800000), { 0.0,  0.0,  1.0, 1.0}},
+   {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0xbf800000), { 0.0,  0.0, -1.0, 1.0}},
+   {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x3f800000, 0x3f800000, 0x3f800000), { 1.0,  1.0,  1.0, 1.0}},
+
+   {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00000000), { 0.0,  0.0,  0.0,  0.0}},
+   {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x3f800000, 0x00000000, 0x00000000, 0x00000000), { 1.0,  0.0,  0.0,  0.0}},
+   {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0xbf800000, 0x00000000, 0x00000000, 0x00000000), {-1.0,  0.0,  0.0,  0.0}},
+   {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x3f800000, 0x00000000, 0x00000000), { 0.0,  1.0,  0.0,  0.0}},
+   {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0xbf800000, 0x00000000, 0x00000000), { 0.0, -1.0,  0.0,  0.0}},
+   {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x3f800000, 0x00000000), { 0.0,  0.0,  1.0,  0.0}},
+   {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0xbf800000, 0x00000000), { 0.0,  0.0, -1.0,  0.0}},
+   {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x3f800000), { 0.0,  0.0,  0.0,  1.0}},
+   {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0xbf800000), { 0.0,  0.0,  0.0, -1.0}},
+   {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000), { 1.0,  1.0,  1.0,  1.0}},
+};
+
+
+const unsigned util_format_nr_test_cases = Elements(util_format_test_cases);
diff --git a/src/gallium/auxiliary/util/u_format_tests.h b/src/gallium/auxiliary/util/u_format_tests.h
new file mode 100644
index 0000000000..2d4d9d5fa9
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_format_tests.h
@@ -0,0 +1,69 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#ifndef U_FORMAT_TESTS_H_
+#define U_FORMAT_TESTS_H_
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_format.h"
+
+
+#define UTIL_FORMAT_MAX_PACKED_BYTES 16
+
+
+/**
+ * A (packed, unpacked) color pair.
+ */
+struct util_format_test_case
+{
+   enum pipe_format format;
+
+   /**
+    * Mask of the bits that actually meaningful data. Used to mask out the
+    * "X" channels.
+    */
+   uint8_t mask[UTIL_FORMAT_MAX_PACKED_BYTES];
+
+   uint8_t packed[UTIL_FORMAT_MAX_PACKED_BYTES];
+
+   /**
+    * RGBA.
+    */
+   double unpacked[4];
+};
+
+
+extern const struct util_format_test_case
+util_format_test_cases[];
+
+
+extern const unsigned util_format_nr_test_cases;
+
+
+#endif /* U_FORMAT_TESTS_H_ */
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index 4e358d3938..d421bee8ef 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -61,6 +61,8 @@ struct gen_mipmap_state
    struct pipe_depth_stencil_alpha_state depthstencil;
    struct pipe_rasterizer_state rasterizer;
    struct pipe_sampler_state sampler;
+   struct pipe_clip_state clip;
+   struct pipe_vertex_element velem[2];
 
    void *vs;
    void *fs2d, *fsCube;
@@ -922,29 +924,29 @@ format_to_type_comps(enum pipe_format pformat,
 {
    /* XXX I think this could be implemented in terms of the pf_*() functions */
    switch (pformat) {
-   case PIPE_FORMAT_A8R8G8B8_UNORM:
-   case PIPE_FORMAT_X8R8G8B8_UNORM:
    case PIPE_FORMAT_B8G8R8A8_UNORM:
    case PIPE_FORMAT_B8G8R8X8_UNORM:
-   case PIPE_FORMAT_R8G8B8A8_SRGB:
-   case PIPE_FORMAT_R8G8B8X8_SRGB:
-   case PIPE_FORMAT_A8R8G8B8_SRGB:
-   case PIPE_FORMAT_X8R8G8B8_SRGB:
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
+   case PIPE_FORMAT_X8R8G8B8_UNORM:
+   case PIPE_FORMAT_A8B8G8R8_SRGB:
+   case PIPE_FORMAT_X8B8G8R8_SRGB:
    case PIPE_FORMAT_B8G8R8A8_SRGB:
    case PIPE_FORMAT_B8G8R8X8_SRGB:
+   case PIPE_FORMAT_A8R8G8B8_SRGB:
+   case PIPE_FORMAT_X8R8G8B8_SRGB:
    case PIPE_FORMAT_R8G8B8_SRGB:
       *datatype = DTYPE_UBYTE;
       *comps = 4;
       return;
-   case PIPE_FORMAT_A1R5G5B5_UNORM:
+   case PIPE_FORMAT_B5G5R5A1_UNORM:
       *datatype = DTYPE_USHORT_1_5_5_5_REV;
       *comps = 4;
       return;
-   case PIPE_FORMAT_A4R4G4B4_UNORM:
+   case PIPE_FORMAT_B4G4R4A4_UNORM:
       *datatype = DTYPE_USHORT_4_4_4_4;
       *comps = 4;
       return;
-   case PIPE_FORMAT_R5G6B5_UNORM:
+   case PIPE_FORMAT_B5G6R5_UNORM:
       *datatype = DTYPE_USHORT_5_6_5;
       *comps = 3;
       return;
@@ -955,8 +957,8 @@ format_to_type_comps(enum pipe_format pformat,
       *datatype = DTYPE_UBYTE;
       *comps = 1;
       return;
-   case PIPE_FORMAT_A8L8_UNORM:
-   case PIPE_FORMAT_A8L8_SRGB:
+   case PIPE_FORMAT_L8A8_UNORM:
+   case PIPE_FORMAT_L8A8_SRGB:
       *datatype = DTYPE_UBYTE;
       *comps = 2;
       return;
@@ -1069,7 +1071,7 @@ reduce_3d(enum pipe_format pformat,
     */
 
    /*
-   _mesa_printf("mip3d %d x %d x %d  ->  %d x %d x %d\n",
+   printf("mip3d %d x %d x %d  ->  %d x %d x %d\n",
           srcWidth, srcHeight, srcDepth, dstWidth, dstHeight, dstDepth);
    */
 
@@ -1296,7 +1298,6 @@ util_create_gen_mipmap(struct pipe_context *pipe,
    memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer));
    ctx->rasterizer.front_winding = PIPE_WINDING_CW;
    ctx->rasterizer.cull_mode = PIPE_WINDING_NONE;
-   ctx->rasterizer.bypass_vs_clip_and_viewport = 1;
    ctx->rasterizer.gl_rasterization_rules = 1;
 
    /* sampler state */
@@ -1307,6 +1308,15 @@ util_create_gen_mipmap(struct pipe_context *pipe,
    ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
    ctx->sampler.normalized_coords = 1;
 
+   /* vertex elements state */
+   memset(&ctx->velem[0], 0, sizeof(ctx->velem[0]) * 2);
+   for (i = 0; i < 2; i++) {
+      ctx->velem[i].src_offset = i * 4 * sizeof(float);
+      ctx->velem[i].instance_divisor = 0;
+      ctx->velem[i].vertex_buffer_index = 0;
+      ctx->velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   }
+
    /* vertex shader - still needed to specify mapping from fragment
     * shader input semantics to vertex elements 
     */
@@ -1361,25 +1371,25 @@ get_next_slot(struct gen_mipmap_state *ctx)
 static unsigned
 set_vertex_data(struct gen_mipmap_state *ctx,
                 enum pipe_texture_target tex_target,
-                uint face, float width, float height)
+                uint face)
 {
    unsigned offset;
 
    /* vert[0].position */
-   ctx->vertices[0][0][0] = 0.0f; /*x*/
-   ctx->vertices[0][0][1] = 0.0f; /*y*/
+   ctx->vertices[0][0][0] = -1.0f; /*x*/
+   ctx->vertices[0][0][1] = -1.0f; /*y*/
 
    /* vert[1].position */
-   ctx->vertices[1][0][0] = width;
-   ctx->vertices[1][0][1] = 0.0f;
+   ctx->vertices[1][0][0] = 1.0f;
+   ctx->vertices[1][0][1] = -1.0f;
 
    /* vert[2].position */
-   ctx->vertices[2][0][0] = width;
-   ctx->vertices[2][0][1] = height;
+   ctx->vertices[2][0][0] = 1.0f;
+   ctx->vertices[2][0][1] = 1.0f;
 
    /* vert[3].position */
-   ctx->vertices[3][0][0] = 0.0f;
-   ctx->vertices[3][0][1] = height;
+   ctx->vertices[3][0][0] = -1.0f;
+   ctx->vertices[3][0][1] = 1.0f;
 
    /* Setup vertex texcoords.  This is a little tricky for cube maps. */
    if (tex_target == PIPE_TEXTURE_CUBE) {
@@ -1499,11 +1509,16 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
    cso_save_framebuffer(ctx->cso);
    cso_save_fragment_shader(ctx->cso);
    cso_save_vertex_shader(ctx->cso);
+   cso_save_viewport(ctx->cso);
+   cso_save_clip(ctx->cso);
+   cso_save_vertex_elements(ctx->cso);
 
    /* bind our state */
    cso_set_blend(ctx->cso, &ctx->blend);
    cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
    cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
+   cso_set_clip(ctx->cso, &ctx->clip);
+   cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
 
    cso_set_fragment_shader_handle(ctx->cso, fs);
    cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
@@ -1522,6 +1537,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
     */
    for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
       const uint srcLevel = dstLevel - 1;
+      struct pipe_viewport_state vp;
 
       struct pipe_surface *surf = 
          screen->get_tex_surface(screen, pt, face, dstLevel, zslice,
@@ -1535,6 +1551,17 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
       fb.height = u_minify(pt->height0, dstLevel);
       cso_set_framebuffer(ctx->cso, &fb);
 
+      /* viewport */
+      vp.scale[0] = 0.5f * fb.width;
+      vp.scale[1] = 0.5f * fb.height;
+      vp.scale[2] = 1.0f;
+      vp.scale[3] = 1.0f;
+      vp.translate[0] = 0.5f * fb.width;
+      vp.translate[1] = 0.5f * fb.height;
+      vp.translate[2] = 0.0f;
+      vp.translate[3] = 0.0f;
+      cso_set_viewport(ctx->cso, &vp);
+
       /*
        * Setup sampler state
        * Note: we should only have to set the min/max LOD clamps to ensure
@@ -1549,12 +1576,10 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
 
       cso_set_sampler_textures(ctx->cso, 1, &pt);
 
-      /* quad coords in window coords (bypassing vs, clip and viewport) */
+      /* quad coords in clip coords */
       offset = set_vertex_data(ctx,
                                pt->target,
-                               face,
-                               (float) u_minify(pt->width0, dstLevel),
-                               (float) u_minify(pt->height0, dstLevel));
+                               face);
 
       util_draw_vertex_buffer(ctx->pipe, 
                               ctx->vbuf,
@@ -1578,4 +1603,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
    cso_restore_framebuffer(ctx->cso);
    cso_restore_fragment_shader(ctx->cso);
    cso_restore_vertex_shader(ctx->cso);
+   cso_restore_viewport(ctx->cso);
+   cso_restore_clip(ctx->cso);
+   cso_restore_vertex_elements(ctx->cso);
 }
diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h
index f8dfea4e51..84d849aa90 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -90,7 +90,10 @@ pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
 static INLINE void
 pipe_buffer_reference(struct pipe_buffer **ptr, struct pipe_buffer *buf)
 {
-   struct pipe_buffer *old_buf = *ptr;
+   struct pipe_buffer *old_buf;
+
+   assert(ptr);
+   old_buf = *ptr;
 
    if (pipe_reference(&(*ptr)->reference, &buf->reference))
       old_buf->screen->buffer_destroy(old_buf);
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index b2969a210a..d1ec13def3 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -532,6 +532,17 @@ util_bswap32(uint32_t n)
 
 
 /**
+ * Reverse byte order of a 16 bit word.
+ */
+static INLINE uint16_t
+util_bswap16(uint16_t n)
+{
+   return (n >> 8) |
+          (n << 8);
+}
+
+
+/**
  * Clamp X to [MIN, MAX].
  * This is a macro to allow float, int, uint, etc. types.
  */
diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h
index 0ab53c75dd..50f1b1670b 100644
--- a/src/gallium/auxiliary/util/u_pack_color.h
+++ b/src/gallium/auxiliary/util/u_pack_color.h
@@ -57,47 +57,47 @@ util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a,
                    enum pipe_format format, union util_color *uc)
 {
    switch (format) {
-   case PIPE_FORMAT_R8G8B8A8_UNORM:
+   case PIPE_FORMAT_A8B8G8R8_UNORM:
       {
          uc->ui = (r << 24) | (g << 16) | (b << 8) | a;
       }
       return;
-   case PIPE_FORMAT_R8G8B8X8_UNORM:
+   case PIPE_FORMAT_X8B8G8R8_UNORM:
       {
          uc->ui = (r << 24) | (g << 16) | (b << 8) | 0xff;
       }
       return;
-   case PIPE_FORMAT_A8R8G8B8_UNORM:
+   case PIPE_FORMAT_B8G8R8A8_UNORM:
       {
          uc->ui = (a << 24) | (r << 16) | (g << 8) | b;
       }
       return;
-   case PIPE_FORMAT_X8R8G8B8_UNORM:
+   case PIPE_FORMAT_B8G8R8X8_UNORM:
       {
          uc->ui = (0xff << 24) | (r << 16) | (g << 8) | b;
       }
       return;
-   case PIPE_FORMAT_B8G8R8A8_UNORM:
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
       {
          uc->ui = (b << 24) | (g << 16) | (r << 8) | a;
       }
       return;
-   case PIPE_FORMAT_B8G8R8X8_UNORM:
+   case PIPE_FORMAT_X8R8G8B8_UNORM:
       {
          uc->ui = (b << 24) | (g << 16) | (r << 8) | 0xff;
       }
       return;
-   case PIPE_FORMAT_R5G6B5_UNORM:
+   case PIPE_FORMAT_B5G6R5_UNORM:
       {
          uc->us = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3);
       }
       return;
-   case PIPE_FORMAT_A1R5G5B5_UNORM:
+   case PIPE_FORMAT_B5G5R5A1_UNORM:
       {
          uc->us = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3);
       }
       return;
-   case PIPE_FORMAT_A4R4G4B4_UNORM:
+   case PIPE_FORMAT_B4G4R4A4_UNORM:
       {
          uc->us = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4);
       }
@@ -153,7 +153,7 @@ util_unpack_color_ub(enum pipe_format format, union util_color *uc,
                      ubyte *r, ubyte *g, ubyte *b, ubyte *a)
 {
    switch (format) {
-   case PIPE_FORMAT_R8G8B8A8_UNORM:
+   case PIPE_FORMAT_A8B8G8R8_UNORM:
       {
          uint p = uc->ui;
          *r = (ubyte) ((p >> 24) & 0xff);
@@ -162,7 +162,7 @@ util_unpack_color_ub(enum pipe_format format, union util_color *uc,
          *a = (ubyte) ((p >>  0) & 0xff);
       }
       return;
-   case PIPE_FORMAT_R8G8B8X8_UNORM:
+   case PIPE_FORMAT_X8B8G8R8_UNORM:
       {
          uint p = uc->ui;
          *r = (ubyte) ((p >> 24) & 0xff);
@@ -171,7 +171,7 @@ util_unpack_color_ub(enum pipe_format format, union util_color *uc,
          *a = (ubyte) 0xff;
       }
       return;
-   case PIPE_FORMAT_A8R8G8B8_UNORM:
+   case PIPE_FORMAT_B8G8R8A8_UNORM:
       {
          uint p = uc->ui;
          *r = (ubyte) ((p >> 16) & 0xff);
@@ -180,7 +180,7 @@ util_unpack_color_ub(enum pipe_format format, union util_color *uc,
          *a = (ubyte) ((p >> 24) & 0xff);
       }
       return;
-   case PIPE_FORMAT_X8R8G8B8_UNORM:
+   case PIPE_FORMAT_B8G8R8X8_UNORM:
       {
          uint p = uc->ui;
          *r = (ubyte) ((p >> 16) & 0xff);
@@ -189,7 +189,7 @@ util_unpack_color_ub(enum pipe_format format, union util_color *uc,
          *a = (ubyte) 0xff;
       }
       return;
-   case PIPE_FORMAT_B8G8R8A8_UNORM:
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
       {
          uint p = uc->ui;
          *r = (ubyte) ((p >>  8) & 0xff);
@@ -198,7 +198,7 @@ util_unpack_color_ub(enum pipe_format format, union util_color *uc,
          *a = (ubyte) ((p >>  0) & 0xff);
       }
       return;
-   case PIPE_FORMAT_B8G8R8X8_UNORM:
+   case PIPE_FORMAT_X8R8G8B8_UNORM:
       {
          uint p = uc->ui;
          *r = (ubyte) ((p >>  8) & 0xff);
@@ -207,7 +207,7 @@ util_unpack_color_ub(enum pipe_format format, union util_color *uc,
          *a = (ubyte) 0xff;
       }
       return;
-   case PIPE_FORMAT_R5G6B5_UNORM:
+   case PIPE_FORMAT_B5G6R5_UNORM:
       {
          ushort p = uc->us;
          *r = (ubyte) (((p >> 8) & 0xf8) | ((p >> 13) & 0x7));
@@ -216,7 +216,7 @@ util_unpack_color_ub(enum pipe_format format, union util_color *uc,
          *a = (ubyte) 0xff;
       }
       return;
-   case PIPE_FORMAT_A1R5G5B5_UNORM:
+   case PIPE_FORMAT_B5G5R5A1_UNORM:
       {
          ushort p = uc->us;
          *r = (ubyte) (((p >>  7) & 0xf8) | ((p >> 12) & 0x7));
@@ -225,7 +225,7 @@ util_unpack_color_ub(enum pipe_format format, union util_color *uc,
          *a = (ubyte) (0xff * (p >> 15));
       }
       return;
-   case PIPE_FORMAT_A4R4G4B4_UNORM:
+   case PIPE_FORMAT_B4G4R4A4_UNORM:
       {
          ushort p = uc->us;
          *r = (ubyte) (((p >> 4) & 0xf0) | ((p >>  8) & 0xf));
@@ -326,47 +326,47 @@ util_pack_color(const float rgba[4], enum pipe_format format, union util_color *
    }
 
    switch (format) {
-   case PIPE_FORMAT_R8G8B8A8_UNORM:
+   case PIPE_FORMAT_A8B8G8R8_UNORM:
       {
          uc->ui = (r << 24) | (g << 16) | (b << 8) | a;
       }
       return;
-   case PIPE_FORMAT_R8G8B8X8_UNORM:
+   case PIPE_FORMAT_X8B8G8R8_UNORM:
       {
          uc->ui = (r << 24) | (g << 16) | (b << 8) | 0xff;
       }
       return;
-   case PIPE_FORMAT_A8R8G8B8_UNORM:
+   case PIPE_FORMAT_B8G8R8A8_UNORM:
       {
          uc->ui = (a << 24) | (r << 16) | (g << 8) | b;
       }
       return;
-   case PIPE_FORMAT_X8R8G8B8_UNORM:
+   case PIPE_FORMAT_B8G8R8X8_UNORM:
       {
          uc->ui = (0xff << 24) | (r << 16) | (g << 8) | b;
       }
       return;
-   case PIPE_FORMAT_B8G8R8A8_UNORM:
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
       {
          uc->ui = (b << 24) | (g << 16) | (r << 8) | a;
       }
       return;
-   case PIPE_FORMAT_B8G8R8X8_UNORM:
+   case PIPE_FORMAT_X8R8G8B8_UNORM:
       {
          uc->ui = (b << 24) | (g << 16) | (r << 8) | 0xff;
       }
       return;
-   case PIPE_FORMAT_R5G6B5_UNORM:
+   case PIPE_FORMAT_B5G6R5_UNORM:
       {
          uc->us = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3);
       }
       return;
-   case PIPE_FORMAT_A1R5G5B5_UNORM:
+   case PIPE_FORMAT_B5G5R5A1_UNORM:
       {
          uc->us = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3);
       }
       return;
-   case PIPE_FORMAT_A4R4G4B4_UNORM:
+   case PIPE_FORMAT_B4G4R4A4_UNORM:
       {
          uc->ub = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4);
       }
@@ -427,13 +427,13 @@ util_pack_z(enum pipe_format format, double z)
       return (uint) (z * 0xffffffff);
    case PIPE_FORMAT_Z32_FLOAT:
       return (uint)z;
-   case PIPE_FORMAT_S8Z24_UNORM:
-   case PIPE_FORMAT_X8Z24_UNORM:
+   case PIPE_FORMAT_Z24S8_UNORM:
+   case PIPE_FORMAT_Z24X8_UNORM:
       if (z == 1.0)
          return 0xffffff;
       return (uint) (z * 0xffffff);
-   case PIPE_FORMAT_Z24S8_UNORM:
-   case PIPE_FORMAT_Z24X8_UNORM:
+   case PIPE_FORMAT_S8Z24_UNORM:
+   case PIPE_FORMAT_X8Z24_UNORM:
       if (z == 1.0)
          return 0xffffff00;
       return ((uint) (z * 0xffffff)) << 8;
@@ -458,10 +458,10 @@ util_pack_z_stencil(enum pipe_format format, double z, uint s)
    unsigned packed = util_pack_z(format, z);
 
    switch (format) {
-   case PIPE_FORMAT_S8Z24_UNORM:
+   case PIPE_FORMAT_Z24S8_UNORM:
       packed |= s << 24;
       break;
-   case PIPE_FORMAT_Z24S8_UNORM:
+   case PIPE_FORMAT_S8Z24_UNORM:
       packed |= s;
       break;
    case PIPE_FORMAT_S8_UNORM:
diff --git a/src/gallium/auxiliary/util/u_simple_screen.c b/src/gallium/auxiliary/util/u_simple_screen.c
index 53f3c16dbc..9203cb6580 100644
--- a/src/gallium/auxiliary/util/u_simple_screen.c
+++ b/src/gallium/auxiliary/util/u_simple_screen.c
@@ -59,22 +59,7 @@ pass_user_buffer_create(struct pipe_screen *screen,
    return buffer;
 }
 
-static struct pipe_buffer *
-pass_surface_buffer_create(struct pipe_screen *screen,
-                           unsigned width, unsigned height,
-                           enum pipe_format format,
-                           unsigned usage,
-                           unsigned tex_usage,
-                           unsigned *stride)
-{
-   struct pipe_buffer *buffer =
-      screen->winsys->surface_buffer_create(screen->winsys, width, height,
-                                            format, usage, tex_usage, stride);
 
-   buffer->screen = screen;
-
-   return buffer;
-}
 
 static void *
 pass_buffer_map(struct pipe_screen *screen,
@@ -135,7 +120,6 @@ u_simple_screen_init(struct pipe_screen *screen)
 {
    screen->buffer_create = pass_buffer_create;
    screen->user_buffer_create = pass_user_buffer_create;
-   screen->surface_buffer_create = pass_surface_buffer_create;
 
    screen->buffer_map = pass_buffer_map;
    screen->buffer_unmap = pass_buffer_unmap;
diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c
index c9f1c9c210..33306bbc2a 100644
--- a/src/gallium/auxiliary/util/u_surface.c
+++ b/src/gallium/auxiliary/util/u_surface.c
@@ -54,9 +54,9 @@ util_create_rgba_surface(struct pipe_screen *screen,
                          struct pipe_surface **surfaceOut)
 {
    static const enum pipe_format rgbaFormats[] = {
-      PIPE_FORMAT_A8R8G8B8_UNORM,
       PIPE_FORMAT_B8G8R8A8_UNORM,
-      PIPE_FORMAT_R8G8B8A8_UNORM,
+      PIPE_FORMAT_A8R8G8B8_UNORM,
+      PIPE_FORMAT_A8B8G8R8_UNORM,
       PIPE_FORMAT_NONE
    };
    const uint target = PIPE_TEXTURE_2D;
diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c
index 813ce4eb7d..024d9577bc 100644
--- a/src/gallium/auxiliary/util/u_tile.c
+++ b/src/gallium/auxiliary/util/u_tile.c
@@ -108,7 +108,7 @@ pipe_put_tile_raw(struct pipe_transfer *pt,
 
 
 
-/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/
+/*** PIPE_FORMAT_B8G8R8A8_UNORM ***/
 
 static void
 a8r8g8b8_get_tile_rgba(const unsigned *src,
@@ -155,7 +155,7 @@ a8r8g8b8_put_tile_rgba(unsigned *dst,
 }
 
 
-/*** PIPE_FORMAT_X8R8G8B8_UNORM ***/
+/*** PIPE_FORMAT_B8G8R8X8_UNORM ***/
 
 static void
 x8r8g8b8_get_tile_rgba(const unsigned *src,
@@ -201,7 +201,7 @@ x8r8g8b8_put_tile_rgba(unsigned *dst,
 }
 
 
-/*** PIPE_FORMAT_B8G8R8A8_UNORM ***/
+/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/
 
 static void
 b8g8r8a8_get_tile_rgba(const unsigned *src,
@@ -248,7 +248,7 @@ b8g8r8a8_put_tile_rgba(unsigned *dst,
 }
 
 
-/*** PIPE_FORMAT_R8G8B8A8_UNORM ***/
+/*** PIPE_FORMAT_A8B8G8R8_UNORM ***/
 
 static void
 r8g8b8a8_get_tile_rgba(const unsigned *src,
@@ -295,7 +295,7 @@ r8g8b8a8_put_tile_rgba(unsigned *dst,
 }
 
 
-/*** PIPE_FORMAT_A1R5G5B5_UNORM ***/
+/*** PIPE_FORMAT_B5G5R5A1_UNORM ***/
 
 static void
 a1r5g5b5_get_tile_rgba(const ushort *src,
@@ -346,7 +346,7 @@ a1r5g5b5_put_tile_rgba(ushort *dst,
 }
 
 
-/*** PIPE_FORMAT_A4R4G4B4_UNORM ***/
+/*** PIPE_FORMAT_B4G4R4A4_UNORM ***/
 
 static void
 a4r4g4b4_get_tile_rgba(const ushort *src,
@@ -397,7 +397,7 @@ a4r4g4b4_put_tile_rgba(ushort *dst,
 }
 
 
-/*** PIPE_FORMAT_R5G6B5_UNORM ***/
+/*** PIPE_FORMAT_B5G6R5_UNORM ***/
 
 static void
 r5g6b5_get_tile_rgba(const ushort *src,
@@ -691,7 +691,7 @@ r16g16b16a16_put_tile_rgba(short *dst,
 }
 
 
-/*** PIPE_FORMAT_R8G8B8A8_SRGB ***/
+/*** PIPE_FORMAT_A8B8G8R8_SRGB ***/
 
 /**
  * Convert an 8-bit sRGB value from non-linear space to a
@@ -784,7 +784,7 @@ a8r8g8b8_srgb_put_tile_rgba(unsigned *dst,
 }
 
 
-/*** PIPE_FORMAT_A8L8_SRGB ***/
+/*** PIPE_FORMAT_L8A8_SRGB ***/
 
 static void
 a8l8_srgb_get_tile_rgba(const ushort *src,
@@ -913,7 +913,7 @@ i8_put_tile_rgba(ubyte *dst,
 }
 
 
-/*** PIPE_FORMAT_A8L8_UNORM ***/
+/*** PIPE_FORMAT_L8A8_UNORM ***/
 
 static void
 a8l8_get_tile_rgba(const ushort *src,
@@ -987,7 +987,7 @@ z32_get_tile_rgba(const unsigned *src,
 }
 
 
-/*** PIPE_FORMAT_S8Z24_UNORM ***/
+/*** PIPE_FORMAT_Z24S8_UNORM ***/
 
 /**
  * Return Z component as four float in [0,1].  Stencil part ignored.
@@ -1014,7 +1014,7 @@ s8z24_get_tile_rgba(const unsigned *src,
 }
 
 
-/*** PIPE_FORMAT_Z24S8_UNORM ***/
+/*** PIPE_FORMAT_S8Z24_UNORM ***/
 
 /**
  * Return Z component as four float in [0,1].  Stencil part ignored.
@@ -1067,7 +1067,7 @@ z32f_get_tile_rgba(const float *src,
 }
 
 
-/*** PIPE_FORMAT_YCBCR / PIPE_FORMAT_YCBCR_REV ***/
+/*** PIPE_FORMAT_UYVY / PIPE_FORMAT_YUYV ***/
 
 /**
  * Convert YCbCr (or YCrCb) to RGBA.
@@ -1162,25 +1162,25 @@ pipe_tile_raw_to_rgba(enum pipe_format format,
                       float *dst, unsigned dst_stride)
 {
    switch (format) {
-   case PIPE_FORMAT_A8R8G8B8_UNORM:
+   case PIPE_FORMAT_B8G8R8A8_UNORM:
       a8r8g8b8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
       break;
-   case PIPE_FORMAT_X8R8G8B8_UNORM:
+   case PIPE_FORMAT_B8G8R8X8_UNORM:
       x8r8g8b8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
       break;
-   case PIPE_FORMAT_B8G8R8A8_UNORM:
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
       b8g8r8a8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
       break;
-   case PIPE_FORMAT_R8G8B8A8_UNORM:
+   case PIPE_FORMAT_A8B8G8R8_UNORM:
       r8g8b8a8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
       break;
-   case PIPE_FORMAT_A1R5G5B5_UNORM:
+   case PIPE_FORMAT_B5G5R5A1_UNORM:
       a1r5g5b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride);
       break;
-   case PIPE_FORMAT_A4R4G4B4_UNORM:
+   case PIPE_FORMAT_B4G4R4A4_UNORM:
       a4r4g4b4_get_tile_rgba((ushort *) src, w, h, dst, dst_stride);
       break;
-   case PIPE_FORMAT_R5G6B5_UNORM:
+   case PIPE_FORMAT_B5G6R5_UNORM:
       r5g6b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride);
       break;
    case PIPE_FORMAT_R8G8B8_UNORM:
@@ -1195,7 +1195,7 @@ pipe_tile_raw_to_rgba(enum pipe_format format,
    case PIPE_FORMAT_I8_UNORM:
       i8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride);
       break;
-   case PIPE_FORMAT_A8L8_UNORM:
+   case PIPE_FORMAT_L8A8_UNORM:
       a8l8_get_tile_rgba((ushort *) src, w, h, dst, dst_stride);
       break;
    case PIPE_FORMAT_R16_SNORM:
@@ -1204,10 +1204,10 @@ pipe_tile_raw_to_rgba(enum pipe_format format,
    case PIPE_FORMAT_R16G16B16A16_SNORM:
       r16g16b16a16_get_tile_rgba((short *) src, w, h, dst, dst_stride);
       break;
-   case PIPE_FORMAT_A8R8G8B8_SRGB:
+   case PIPE_FORMAT_B8G8R8A8_SRGB:
       a8r8g8b8_srgb_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
       break;
-   case PIPE_FORMAT_A8L8_SRGB:
+   case PIPE_FORMAT_L8A8_SRGB:
       a8l8_srgb_get_tile_rgba((ushort *) src, w, h, dst, dst_stride);
       break;
    case PIPE_FORMAT_L8_SRGB:
@@ -1219,21 +1219,21 @@ pipe_tile_raw_to_rgba(enum pipe_format format,
    case PIPE_FORMAT_Z32_UNORM:
       z32_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
       break;
-   case PIPE_FORMAT_S8Z24_UNORM:
-   case PIPE_FORMAT_X8Z24_UNORM:
-      s8z24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
-      break;
    case PIPE_FORMAT_Z24S8_UNORM:
    case PIPE_FORMAT_Z24X8_UNORM:
+      s8z24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
+      break;
+   case PIPE_FORMAT_S8Z24_UNORM:
+   case PIPE_FORMAT_X8Z24_UNORM:
       z24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
       break;
    case PIPE_FORMAT_Z32_FLOAT:
       z32f_get_tile_rgba((float *) src, w, h, dst, dst_stride);
       break;
-   case PIPE_FORMAT_YCBCR:
+   case PIPE_FORMAT_UYVY:
       ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, FALSE);
       break;
-   case PIPE_FORMAT_YCBCR_REV:
+   case PIPE_FORMAT_YUYV:
       ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, TRUE);
       break;
    default:
@@ -1262,7 +1262,7 @@ pipe_get_tile_rgba(struct pipe_transfer *pt,
    if (!packed)
       return;
 
-   if(format == PIPE_FORMAT_YCBCR || format == PIPE_FORMAT_YCBCR_REV)
+   if(format == PIPE_FORMAT_UYVY || format == PIPE_FORMAT_YUYV)
       assert((x & 1) == 0);
 
    pipe_get_tile_raw(pt, x, y, w, h, packed, 0);
@@ -1333,28 +1333,28 @@ pipe_put_tile_rgba(struct pipe_transfer *pt,
       return;
 
    switch (format) {
-   case PIPE_FORMAT_A8R8G8B8_UNORM:
+   case PIPE_FORMAT_B8G8R8A8_UNORM:
       a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);
       break;
-   case PIPE_FORMAT_X8R8G8B8_UNORM:
+   case PIPE_FORMAT_B8G8R8X8_UNORM:
       x8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);
       break;
-   case PIPE_FORMAT_B8G8R8A8_UNORM:
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
       b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);
       break;
-   case PIPE_FORMAT_R8G8B8A8_UNORM:
+   case PIPE_FORMAT_A8B8G8R8_UNORM:
       r8g8b8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);
       break;
-   case PIPE_FORMAT_A1R5G5B5_UNORM:
+   case PIPE_FORMAT_B5G5R5A1_UNORM:
       a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride);
       break;
-   case PIPE_FORMAT_R5G6B5_UNORM:
+   case PIPE_FORMAT_B5G6R5_UNORM:
       r5g6b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride);
       break;
    case PIPE_FORMAT_R8G8B8_UNORM:
       r8g8b8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);
       break;
-   case PIPE_FORMAT_A4R4G4B4_UNORM:
+   case PIPE_FORMAT_B4G4R4A4_UNORM:
       a4r4g4b4_put_tile_rgba((ushort *) packed, w, h, p, src_stride);
       break;
    case PIPE_FORMAT_L8_UNORM:
@@ -1366,7 +1366,7 @@ pipe_put_tile_rgba(struct pipe_transfer *pt,
    case PIPE_FORMAT_I8_UNORM:
       i8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);
       break;
-   case PIPE_FORMAT_A8L8_UNORM:
+   case PIPE_FORMAT_L8A8_UNORM:
       a8l8_put_tile_rgba((ushort *) packed, w, h, p, src_stride);
       break;
    case PIPE_FORMAT_R16_SNORM:
@@ -1375,10 +1375,10 @@ pipe_put_tile_rgba(struct pipe_transfer *pt,
    case PIPE_FORMAT_R16G16B16A16_SNORM:
       r16g16b16a16_put_tile_rgba((short *) packed, w, h, p, src_stride);
       break;
-   case PIPE_FORMAT_A8R8G8B8_SRGB:
+   case PIPE_FORMAT_B8G8R8A8_SRGB:
       a8r8g8b8_srgb_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);
       break;
-   case PIPE_FORMAT_A8L8_SRGB:
+   case PIPE_FORMAT_L8A8_SRGB:
       a8l8_srgb_put_tile_rgba((ushort *) packed, w, h, p, src_stride);
       break;
    case PIPE_FORMAT_L8_SRGB:
@@ -1390,12 +1390,12 @@ pipe_put_tile_rgba(struct pipe_transfer *pt,
    case PIPE_FORMAT_Z32_UNORM:
       /*z32_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/
       break;
-   case PIPE_FORMAT_S8Z24_UNORM:
-   case PIPE_FORMAT_X8Z24_UNORM:
-      /*s8z24_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/
-      break;
    case PIPE_FORMAT_Z24S8_UNORM:
    case PIPE_FORMAT_Z24X8_UNORM:
+      /*s8z24_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/
+      break;
+   case PIPE_FORMAT_S8Z24_UNORM:
+   case PIPE_FORMAT_X8Z24_UNORM:
       /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/
       break;
    default:
@@ -1447,8 +1447,8 @@ pipe_get_tile_z(struct pipe_transfer *pt,
          }
       }
       break;
-   case PIPE_FORMAT_S8Z24_UNORM:
-   case PIPE_FORMAT_X8Z24_UNORM:
+   case PIPE_FORMAT_Z24S8_UNORM:
+   case PIPE_FORMAT_Z24X8_UNORM:
       {
          const uint *ptrc
             = (const uint *)(map + y * pt->stride + x*4);
@@ -1462,8 +1462,8 @@ pipe_get_tile_z(struct pipe_transfer *pt,
          }
       }
       break;
-   case PIPE_FORMAT_Z24S8_UNORM:
-   case PIPE_FORMAT_Z24X8_UNORM:
+   case PIPE_FORMAT_S8Z24_UNORM:
+   case PIPE_FORMAT_X8Z24_UNORM:
       {
          const uint *ptrc
             = (const uint *)(map + y * pt->stride + x*4);
@@ -1531,7 +1531,7 @@ pipe_put_tile_z(struct pipe_transfer *pt,
          }
       }
       break;
-   case PIPE_FORMAT_S8Z24_UNORM:
+   case PIPE_FORMAT_Z24S8_UNORM:
       {
          uint *pDest = (uint *) (map + y * pt->stride + x*4);
          assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE);
@@ -1545,7 +1545,7 @@ pipe_put_tile_z(struct pipe_transfer *pt,
          }
       }
       break;
-   case PIPE_FORMAT_X8Z24_UNORM:
+   case PIPE_FORMAT_Z24X8_UNORM:
       {
          uint *pDest = (uint *) (map + y * pt->stride + x*4);
          for (i = 0; i < h; i++) {
@@ -1558,7 +1558,7 @@ pipe_put_tile_z(struct pipe_transfer *pt,
          }
       }
       break;
-   case PIPE_FORMAT_Z24S8_UNORM:
+   case PIPE_FORMAT_S8Z24_UNORM:
       {
          uint *pDest = (uint *) (map + y * pt->stride + x*4);
          assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE);
@@ -1572,7 +1572,7 @@ pipe_put_tile_z(struct pipe_transfer *pt,
          }
       }
       break;
-   case PIPE_FORMAT_Z24X8_UNORM:
+   case PIPE_FORMAT_X8Z24_UNORM:
       {
          uint *pDest = (uint *) (map + y * pt->stride + x*4);
          for (i = 0; i < h; i++) {
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index ba23435f69..6d461cb880 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -230,6 +230,7 @@ static bool
 init_pipe_state(struct vl_compositor *c)
 {
    struct pipe_sampler_state sampler;
+   struct pipe_vertex_element vertex_elems[2];
 
    assert(c);
 
@@ -251,15 +252,27 @@ init_pipe_state(struct vl_compositor *c)
    /*sampler.border_color[i] = ;*/
    /*sampler.max_anisotropy = ;*/
    c->sampler = c->pipe->create_sampler_state(c->pipe, &sampler);
-	
+
+   vertex_elems[0].src_offset = 0;
+   vertex_elems[0].instance_divisor = 0;
+   vertex_elems[0].vertex_buffer_index = 0;
+   vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   vertex_elems[1].src_offset = 0;
+   vertex_elems[1].instance_divisor = 0;
+   vertex_elems[1].vertex_buffer_index = 1;
+   vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   c->vertex_elems = c->pipe->create_vertex_elements_state(c->pipe, 2, vertex_elems);
+
+
    return true;
 }
 
 static void cleanup_pipe_state(struct vl_compositor *c)
 {
    assert(c);
-	
+
    c->pipe->delete_sampler_state(c->pipe, c->sampler);
+   c->pipe->delete_vertex_elements_state(c->pipe, c->vertex_elems);
 }
 
 static bool
@@ -314,12 +327,6 @@ init_buffers(struct vl_compositor *c)
 
    pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[0].buffer);
 
-   c->vertex_elems[0].src_offset = 0;
-   c->vertex_elems[0].instance_divisor = 0;
-   c->vertex_elems[0].vertex_buffer_index = 0;
-   c->vertex_elems[0].nr_components = 2;
-   c->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
    /*
     * Create our texcoord buffer and texcoord buffer element
     * Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices
@@ -344,12 +351,6 @@ init_buffers(struct vl_compositor *c)
 
    pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[1].buffer);
 
-   c->vertex_elems[1].src_offset = 0;
-   c->vertex_elems[1].instance_divisor = 0;
-   c->vertex_elems[1].vertex_buffer_index = 1;
-   c->vertex_elems[1].nr_components = 2;
-   c->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
    /*
     * Create our vertex shader's constant buffer
     * Const buffer contains scaling and translation vectors
@@ -483,7 +484,7 @@ void vl_compositor_render(struct vl_compositor          *compositor,
    compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader);
    compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader);
    compositor->pipe->set_vertex_buffers(compositor->pipe, 2, compositor->vertex_bufs);
-   compositor->pipe->set_vertex_elements(compositor->pipe, 2, compositor->vertex_elems);
+   compositor->pipe->bind_vertex_elements_state(compositor->pipe, compositor->vertex_elems);
    compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_VERTEX, 0, compositor->vs_const_buf);
    compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, compositor->fs_const_buf);
 
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index 6a9a3fd7af..51755554da 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -43,10 +43,10 @@ struct vl_compositor
    void *sampler;
    void *vertex_shader;
    void *fragment_shader;
+   void *vertex_elems;
    struct pipe_viewport_state viewport;
    struct pipe_scissor_state scissor;
    struct pipe_vertex_buffer vertex_bufs[2];
-   struct pipe_vertex_element vertex_elems[2];
    struct pipe_buffer *vs_const_buf, *fs_const_buf;
 };
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index f323de0ea5..0763b5bb0e 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -708,6 +708,7 @@ static bool
 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
 {
    struct pipe_sampler_state sampler;
+   struct pipe_vertex_element vertex_elems[8];
    unsigned filters[5];
    unsigned i;
 
@@ -771,6 +772,59 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
       r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
    }
 
+   /* Position element */
+   vertex_elems[0].src_offset = 0;
+   vertex_elems[0].instance_divisor = 0;
+   vertex_elems[0].vertex_buffer_index = 0;
+   vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* Luma, texcoord element */
+   vertex_elems[1].src_offset = sizeof(struct vertex2f);
+   vertex_elems[1].instance_divisor = 0;
+   vertex_elems[1].vertex_buffer_index = 0;
+   vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* Chroma Cr texcoord element */
+   vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
+   vertex_elems[2].instance_divisor = 0;
+   vertex_elems[2].vertex_buffer_index = 0;
+   vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* Chroma Cb texcoord element */
+   vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
+   vertex_elems[3].instance_divisor = 0;
+   vertex_elems[3].vertex_buffer_index = 0;
+   vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* First ref surface top field texcoord element */
+   vertex_elems[4].src_offset = 0;
+   vertex_elems[4].instance_divisor = 0;
+   vertex_elems[4].vertex_buffer_index = 1;
+   vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* First ref surface bottom field texcoord element */
+   vertex_elems[5].src_offset = sizeof(struct vertex2f);
+   vertex_elems[5].instance_divisor = 0;
+   vertex_elems[5].vertex_buffer_index = 1;
+   vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* Second ref surface top field texcoord element */
+   vertex_elems[6].src_offset = 0;
+   vertex_elems[6].instance_divisor = 0;
+   vertex_elems[6].vertex_buffer_index = 2;
+   vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* Second ref surface bottom field texcoord element */
+   vertex_elems[7].src_offset = sizeof(struct vertex2f);
+   vertex_elems[7].instance_divisor = 0;
+   vertex_elems[7].vertex_buffer_index = 2;
+   vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* need versions with 4,6 and 8 vertex elems */
+   r->vertex_elems[0] = r->pipe->create_vertex_elements_state(r->pipe, 4, vertex_elems);
+   r->vertex_elems[1] = r->pipe->create_vertex_elements_state(r->pipe, 6, vertex_elems);
+   r->vertex_elems[2] = r->pipe->create_vertex_elements_state(r->pipe, 8, vertex_elems);
+
    return true;
 }
 
@@ -783,6 +837,8 @@ cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
 
    for (i = 0; i < 5; ++i)
       r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
+   for (i = 0; i < 3; i++)
+      r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems[i]);
 }
 
 static bool
@@ -888,62 +944,6 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
       );
    }
 
-   /* Position element */
-   r->vertex_elems[0].src_offset = 0;
-   r->vertex_elems[0].instance_divisor = 0;
-   r->vertex_elems[0].vertex_buffer_index = 0;
-   r->vertex_elems[0].nr_components = 2;
-   r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Luma, texcoord element */
-   r->vertex_elems[1].src_offset = sizeof(struct vertex2f);
-   r->vertex_elems[1].instance_divisor = 0;
-   r->vertex_elems[1].vertex_buffer_index = 0;
-   r->vertex_elems[1].nr_components = 2;
-   r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Chroma Cr texcoord element */
-   r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
-   r->vertex_elems[2].instance_divisor = 0;
-   r->vertex_elems[2].vertex_buffer_index = 0;
-   r->vertex_elems[2].nr_components = 2;
-   r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Chroma Cb texcoord element */
-   r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
-   r->vertex_elems[3].instance_divisor = 0;
-   r->vertex_elems[3].vertex_buffer_index = 0;
-   r->vertex_elems[3].nr_components = 2;
-   r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* First ref surface top field texcoord element */
-   r->vertex_elems[4].src_offset = 0;
-   r->vertex_elems[4].instance_divisor = 0;
-   r->vertex_elems[4].vertex_buffer_index = 1;
-   r->vertex_elems[4].nr_components = 2;
-   r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* First ref surface bottom field texcoord element */
-   r->vertex_elems[5].src_offset = sizeof(struct vertex2f);
-   r->vertex_elems[5].instance_divisor = 0;
-   r->vertex_elems[5].vertex_buffer_index = 1;
-   r->vertex_elems[5].nr_components = 2;
-   r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Second ref surface top field texcoord element */
-   r->vertex_elems[6].src_offset = 0;
-   r->vertex_elems[6].instance_divisor = 0;
-   r->vertex_elems[6].vertex_buffer_index = 2;
-   r->vertex_elems[6].nr_components = 2;
-   r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Second ref surface bottom field texcoord element */
-   r->vertex_elems[7].src_offset = sizeof(struct vertex2f);
-   r->vertex_elems[7].instance_divisor = 0;
-   r->vertex_elems[7].vertex_buffer_index = 2;
-   r->vertex_elems[7].nr_components = 2;
-   r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
    r->vs_const_buf = pipe_buffer_create
    (
       r->pipe->screen,
@@ -1307,7 +1307,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[0]);
       r->pipe->set_fragment_sampler_textures(r->pipe, 3, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
       r->pipe->bind_vs_state(r->pipe, r->i_vs);
@@ -1320,7 +1320,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]);
       r->textures.individual.ref[0] = r->past;
       r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
@@ -1334,7 +1334,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]);
       r->textures.individual.ref[0] = r->past;
       r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
@@ -1348,7 +1348,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]);
       r->textures.individual.ref[0] = r->future;
       r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
@@ -1362,7 +1362,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]);
       r->textures.individual.ref[0] = r->future;
       r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
@@ -1376,7 +1376,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[2]);
       r->textures.individual.ref[0] = r->past;
       r->textures.individual.ref[1] = r->future;
       r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
@@ -1391,7 +1391,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
       r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[2]);
       r->textures.individual.ref[0] = r->past;
       r->textures.individual.ref[1] = r->future;
       r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index f00b8c7b8b..a11a3e7307 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -66,8 +66,8 @@ struct vl_mpeg12_mc_renderer
    struct pipe_buffer *vs_const_buf;
    struct pipe_buffer *fs_const_buf;
    struct pipe_framebuffer_state fb_state;
-   struct pipe_vertex_element vertex_elems[8];
-	
+   void *vertex_elems[3];
+
    union
    {
       void *all[5];
author	Michal Krol <michal@vmware.com>	2010-03-10 15:49:30 +0100
committer	Michal Krol <michal@vmware.com>	2010-03-10 15:49:30 +0100
commit	3ce4375912c8ea488460e593e07c5bb15b92dca9 (patch)
tree	1011fa439bd829fd46a44fd99478135848800e73 /src/gallium/auxiliary
parent	f59f28093ea827bd234d8e1a36bdd56a9fce5f09 (diff)
parent	9b348d0ed125a22be3f318ac60cef6f201edfdab (diff)