From 8be72bb7646d430e66cb36e09c13c13bee030d53 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 6 Sep 2009 11:20:14 +0100
Subject: llvmpipe: Further abstract the texture sampling generation from TGSI
 translation.

---
 src/gallium/drivers/llvmpipe/lp_bld_tgsi.h     |  35 +++++--
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c |  16 +--
 src/gallium/drivers/llvmpipe/lp_state_fs.c     | 115 ++-------------------
 src/gallium/drivers/llvmpipe/lp_tex_sample.c   | 133 +++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_tex_sample.h   |  12 +++
 5 files changed, 186 insertions(+), 125 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
index 912db24aec..10c251c416 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
@@ -44,14 +44,30 @@ struct lp_build_context;
 struct lp_build_mask_context;
 
 
-typedef void
-(*lp_emit_fetch_texel_soa_callback)( LLVMBuilderRef builder,
-                                void *context,
-                                unsigned unit,
-                                unsigned num_coords,
-                                const LLVMValueRef *coords,
-                                LLVMValueRef lodbias,
-                                LLVMValueRef *texel);
+/**
+ * Sampler code generation interface.
+ *
+ * Although texture sampling is a requirement for TGSI translation, it is
+ * a very different problem with several different approaches to it. This
+ * structure establishes an interface for texture sampling code generation, so
+ * that we can easily use different texture sampling strategies.
+ */
+struct lp_build_sampler_soa
+{
+   void
+   (*destroy)( struct lp_build_sampler_soa *sampler );
+
+   void
+   (*emit_fetch_texel)( struct lp_build_sampler_soa *sampler,
+                        LLVMBuilderRef builder,
+                        union lp_type type,
+                        unsigned unit,
+                        unsigned num_coords,
+                        const LLVMValueRef *coords,
+                        LLVMValueRef lodbias,
+                        LLVMValueRef *texel);
+};
+
 
 void
 lp_build_tgsi_soa(LLVMBuilderRef builder,
@@ -62,8 +78,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
                   const LLVMValueRef *pos,
                   const LLVMValueRef (*inputs)[4],
                   LLVMValueRef (*outputs)[4],
-                  lp_emit_fetch_texel_soa_callback emit_fetch_texel,
-                  void *emit_fetch_texel_context);
+                  struct lp_build_sampler_soa *sampler);
 
 
 #endif /* LP_BLD_TGSI_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index d4d18febec..3ce379de12 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -88,8 +88,7 @@ struct lp_build_tgsi_soa_context
    const LLVMValueRef (*inputs)[NUM_CHANNELS];
    LLVMValueRef (*outputs)[NUM_CHANNELS];
 
-   lp_emit_fetch_texel_soa_callback emit_fetch_texel;
-   void *emit_fetch_texel_context;
+   struct lp_build_sampler_soa *sampler;
 
    LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
    LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
@@ -289,8 +288,11 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
          coords[i] = lp_build_mul(&bld->base, coords[i], oow);
    }
 
-   bld->emit_fetch_texel(bld->base.builder, bld->emit_fetch_texel_context,
-                         unit, num_coords, coords, lodbias, texel);
+   bld->sampler->emit_fetch_texel(bld->sampler,
+                                  bld->base.builder,
+                                  bld->base.type,
+                                  unit, num_coords, coords, lodbias,
+                                  texel);
 
    FOR_EACH_DST0_ENABLED_CHANNEL( inst, i ) {
       emit_store( bld, inst, 0, i, texel[i] );
@@ -1283,8 +1285,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
                   const LLVMValueRef *pos,
                   const LLVMValueRef (*inputs)[NUM_CHANNELS],
                   LLVMValueRef (*outputs)[NUM_CHANNELS],
-                  lp_emit_fetch_texel_soa_callback emit_fetch_texel,
-                  void *emit_fetch_texel_context)
+                  struct lp_build_sampler_soa *sampler)
 {
    struct lp_build_tgsi_soa_context bld;
    struct tgsi_parse_context parse;
@@ -1299,8 +1300,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
    bld.inputs = inputs;
    bld.outputs = outputs;
    bld.consts_ptr = consts_ptr;
-   bld.emit_fetch_texel = emit_fetch_texel;
-   bld.emit_fetch_texel_context = emit_fetch_texel_context;
+   bld.sampler = sampler;
 
    tgsi_parse_init( &parse, tokens );
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 94170bd716..80f705c871 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -85,6 +85,7 @@
 #include "lp_context.h"
 #include "lp_state.h"
 #include "lp_quad.h"
+#include "lp_tex_sample.h"
 
 
 static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
@@ -173,107 +174,6 @@ generate_depth(struct llvmpipe_context *lp,
 }
 
 
-struct build_fetch_texel_context
-{
-   LLVMValueRef context_ptr;
-
-   LLVMValueRef samplers_ptr;
-
-   /** Coords/texels store */
-   LLVMValueRef store_ptr;
-};
-
-
-void PIPE_CDECL
-lp_fetch_texel_soa( struct tgsi_sampler **samplers,
-                    uint32_t unit,
-                    float *store )
-{
-   struct tgsi_sampler *sampler = samplers[unit];
-
-#if 0
-   uint j;
-
-   debug_printf("%s sampler: %p (%p) store: %p\n",
-                __FUNCTION__,
-                sampler, *sampler,
-                store );
-
-   debug_printf("lodbias %f\n", store[12]);
-
-   for (j = 0; j < 4; j++)
-      debug_printf("sample %d texcoord %f %f\n",
-                   j,
-                   store[0+j],
-                   store[4+j]);
-#endif
-
-   {
-      float rgba[NUM_CHANNELS][QUAD_SIZE];
-      sampler->get_samples(sampler,
-                           &store[0],
-                           &store[4],
-                           &store[8],
-                           0.0f, /*store[12],  lodbias */
-                           rgba);
-      memcpy(store, rgba, sizeof rgba);
-   }
-
-#if 0
-   for (j = 0; j < 4; j++)
-      debug_printf("sample %d result %f %f %f %f\n",
-                   j,
-                   store[0+j],
-                   store[4+j],
-                   store[8+j],
-                   store[12+j]);
-#endif
-}
-
-
-static void
-emit_fetch_texel( LLVMBuilderRef builder,
-                  void *context,
-                  unsigned unit,
-                  unsigned num_coords,
-                  const LLVMValueRef *coords,
-                  LLVMValueRef lodbias,
-                  LLVMValueRef *texel)
-{
-   struct build_fetch_texel_context *bld = context;
-   LLVMTypeRef vec_type = LLVMTypeOf(coords[0]);
-   LLVMValueRef args[3];
-   unsigned i;
-
-   if(!bld->samplers_ptr)
-      bld->samplers_ptr = lp_jit_context_samplers(builder, bld->context_ptr);
-
-   if(!bld->store_ptr)
-      bld->store_ptr = LLVMBuildArrayAlloca(builder,
-                                            vec_type,
-                                            LLVMConstInt(LLVMInt32Type(), 4, 0),
-                                            "texel_store");
-
-   for (i = 0; i < num_coords; i++) {
-      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
-      LLVMValueRef coord_ptr = LLVMBuildGEP(builder, bld->store_ptr, &index, 1, "");
-      LLVMBuildStore(builder, coords[i], coord_ptr);
-   }
-
-   args[0] = bld->samplers_ptr;
-   args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
-   args[2] = bld->store_ptr;
-
-   lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3);
-
-   for (i = 0; i < NUM_CHANNELS; ++i) {
-      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
-      LLVMValueRef texel_ptr = LLVMBuildGEP(builder, bld->store_ptr, &index, 1, "");
-      texel[i] = LLVMBuildLoad(builder, texel_ptr, "");
-   }
-}
-
-
 /**
  * Generate the fragment shader, depth/stencil test, and alpha tests.
  */
@@ -286,7 +186,7 @@ generate_fs(struct llvmpipe_context *lp,
             LLVMValueRef context_ptr,
             unsigned i,
             const struct lp_build_interp_soa_context *interp,
-            struct build_fetch_texel_context *sampler,
+            struct lp_build_sampler_soa *sampler,
             LLVMValueRef *pmask,
             LLVMValueRef *color,
             LLVMValueRef depth_ptr)
@@ -327,7 +227,7 @@ generate_fs(struct llvmpipe_context *lp,
 
    lp_build_tgsi_soa(builder, tokens, type, &mask,
                      consts_ptr, interp->pos, interp->inputs,
-                     outputs, emit_fetch_texel, sampler);
+                     outputs, sampler);
 
    for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
       for(chan = 0; chan < NUM_CHANNELS; ++chan) {
@@ -462,7 +362,7 @@ generate_fragment(struct llvmpipe_context *lp,
    LLVMBuilderRef builder;
    LLVMValueRef x0;
    LLVMValueRef y0;
-   struct build_fetch_texel_context sampler;
+   struct lp_build_sampler_soa *sampler;
    struct lp_build_interp_soa_context interp;
    LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH];
    LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH];
@@ -586,8 +486,7 @@ generate_fragment(struct llvmpipe_context *lp,
                             a0_ptr, dadx_ptr, dady_ptr,
                             x0, y0, 2, 0);
 
-   memset(&sampler, 0, sizeof sampler);
-   sampler.context_ptr = context_ptr;
+   sampler = lp_c_sampler_soa_create(context_ptr);
 
    for(i = 0; i < num_fs; ++i) {
       LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
@@ -606,7 +505,7 @@ generate_fragment(struct llvmpipe_context *lp,
                   context_ptr,
                   i,
                   &interp,
-                  &sampler,
+                  sampler,
                   &fs_mask[i],
                   out_color,
                   depth_ptr_i);
@@ -615,6 +514,8 @@ generate_fragment(struct llvmpipe_context *lp,
          fs_out_color[chan][i] = out_color[chan];
    }
 
+   sampler->destroy(sampler);
+
    /* 
     * Convert the fs's output color and mask to fit to the blending type. 
     */
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample.c
index 94eb6dad5a..9a876f404d 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.c
@@ -1578,3 +1578,136 @@ out:
    tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba );
 }
 
+
+void PIPE_CDECL
+lp_fetch_texel_soa( struct tgsi_sampler **samplers,
+                    uint32_t unit,
+                    float *store )
+{
+   struct tgsi_sampler *sampler = samplers[unit];
+
+#if 0
+   uint j;
+
+   debug_printf("%s sampler: %p (%p) store: %p\n",
+                __FUNCTION__,
+                sampler, *sampler,
+                store );
+
+   debug_printf("lodbias %f\n", store[12]);
+
+   for (j = 0; j < 4; j++)
+      debug_printf("sample %d texcoord %f %f\n",
+                   j,
+                   store[0+j],
+                   store[4+j]);
+#endif
+
+   {
+      float rgba[NUM_CHANNELS][QUAD_SIZE];
+      sampler->get_samples(sampler,
+                           &store[0],
+                           &store[4],
+                           &store[8],
+                           0.0f, /*store[12],  lodbias */
+                           rgba);
+      memcpy(store, rgba, sizeof rgba);
+   }
+
+#if 0
+   for (j = 0; j < 4; j++)
+      debug_printf("sample %d result %f %f %f %f\n",
+                   j,
+                   store[0+j],
+                   store[4+j],
+                   store[8+j],
+                   store[12+j]);
+#endif
+}
+
+
+#include "lp_bld_type.h"
+#include "lp_bld_intr.h"
+#include "lp_bld_tgsi.h"
+
+
+struct lp_c_sampler_soa
+{
+   struct lp_build_sampler_soa base;
+
+   LLVMValueRef context_ptr;
+
+   LLVMValueRef samplers_ptr;
+
+   /** Coords/texels store */
+   LLVMValueRef store_ptr;
+};
+
+
+static void
+lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
+{
+   FREE(sampler);
+}
+
+
+static void
+lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler,
+                                  LLVMBuilderRef builder,
+                                  union lp_type type,
+                                  unsigned unit,
+                                  unsigned num_coords,
+                                  const LLVMValueRef *coords,
+                                  LLVMValueRef lodbias,
+                                  LLVMValueRef *texel)
+{
+   struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler;
+   LLVMTypeRef vec_type = LLVMTypeOf(coords[0]);
+   LLVMValueRef args[3];
+   unsigned i;
+
+   if(!sampler->samplers_ptr)
+      sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr);
+
+   if(!sampler->store_ptr)
+      sampler->store_ptr = LLVMBuildArrayAlloca(builder,
+                                            vec_type,
+                                            LLVMConstInt(LLVMInt32Type(), 4, 0),
+                                            "texel_store");
+
+   for (i = 0; i < num_coords; i++) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+      LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, "");
+      LLVMBuildStore(builder, coords[i], coord_ptr);
+   }
+
+   args[0] = sampler->samplers_ptr;
+   args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
+   args[2] = sampler->store_ptr;
+
+   lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3);
+
+   for (i = 0; i < NUM_CHANNELS; ++i) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+      LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, "");
+      texel[i] = LLVMBuildLoad(builder, texel_ptr, "");
+   }
+}
+
+
+struct lp_build_sampler_soa *
+lp_c_sampler_soa_create(LLVMValueRef context_ptr)
+{
+   struct lp_c_sampler_soa *sampler;
+
+   sampler = CALLOC_STRUCT(lp_c_sampler_soa);
+   if(!sampler)
+      return NULL;
+
+   sampler->base.destroy = lp_c_sampler_soa_destroy;
+   sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel;
+   sampler->context_ptr = context_ptr;
+
+   return &sampler->base;
+}
+
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
index 628ec3f1ef..7d1e565885 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
@@ -29,6 +29,8 @@
 #define LP_TEX_SAMPLE_H
 
 
+#include <llvm-c/Core.h>
+
 #include "tgsi/tgsi_exec.h"
 
 
@@ -75,4 +77,14 @@ lp_get_samples(struct tgsi_sampler *tgsi_sampler,
                float rgba[NUM_CHANNELS][QUAD_SIZE]);
 
 
+/**
+ * Texture sampling code generator that just calls lp_get_samples C function
+ * for the actual sampling computation.
+ *
+ * @param context_ptr LLVM value with the pointer to the struct lp_jit_context.
+ */
+struct lp_build_sampler_soa *
+lp_c_sampler_soa_create(LLVMValueRef context_ptr);
+
+
 #endif /* LP_TEX_SAMPLE_H */
-- 
cgit v1.2.3


From 866fbacf2bf93282f622f1f455250491d0b3b63f Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 7 Sep 2009 14:24:31 +0100
Subject: llvmpipe: SoA pixel unpacking specialization.

---
 src/gallium/drivers/llvmpipe/Makefile            |   1 +
 src/gallium/drivers/llvmpipe/SConscript          |   1 +
 src/gallium/drivers/llvmpipe/lp_bld_format.h     |  62 +++++---
 src/gallium/drivers/llvmpipe/lp_bld_format_aos.c |  30 ++--
 src/gallium/drivers/llvmpipe/lp_bld_format_soa.c | 193 +++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_test_format.c    |   4 +-
 6 files changed, 252 insertions(+), 39 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_format_soa.c

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 6e63a0c2b7..54bb3a0e73 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -15,6 +15,7 @@ C_SOURCES = \
 	lp_bld_depth.c \
 	lp_bld_flow.c \
 	lp_bld_format_aos.c \
+	lp_bld_format_soa.c \
 	lp_bld_interp.c \
 	lp_bld_intr.c \
 	lp_bld_logic.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 5c29bdac56..3dcd5cb994 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -23,6 +23,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_depth.c',
 		'lp_bld_flow.c',
 		'lp_bld_format_aos.c',
+		'lp_bld_format_soa.c',
 		'lp_bld_interp.c',
 		'lp_bld_intr.c',
 		'lp_bld_struct.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h
index 01c8a752d1..5ee0656093 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h
@@ -31,20 +31,14 @@
 
 /**
  * @file
- * LLVM IR building helpers interfaces.
- *
- * We use LLVM-C bindings for now. They are not documented, but follow the C++
- * interfaces very closely, and appear to be complete enough for code
- * genration. See
- * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html
- * for a standalone example.
+ * Pixel format helpers.
  */
 
 #include <llvm-c/Core.h>  
- 
-#include "pipe/p_format.h"
 
+#include "pipe/p_format.h"
 
+struct util_format_description;
 union lp_type;
 
 
@@ -56,9 +50,9 @@ union lp_type;
  * @return RGBA in a 4 floats vector.
  */
 LLVMValueRef
-lp_build_unpack_rgba(LLVMBuilderRef builder,
-                     enum pipe_format format, 
-                     LLVMValueRef packed);
+lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
+                         enum pipe_format format,
+                         LLVMValueRef packed);
 
 
 /**
@@ -67,9 +61,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder,
  * @param rgba 4 float vector with the unpacked components.
  */
 LLVMValueRef
-lp_build_pack_rgba(LLVMBuilderRef builder,
-                   enum pipe_format format,
-                   LLVMValueRef rgba);
+lp_build_pack_rgba_aos(LLVMBuilderRef builder,
+                       enum pipe_format format,
+                       LLVMValueRef rgba);
 
 
 /**
@@ -81,9 +75,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder,
  * @return RGBA in a 4 floats vector.
  */
 LLVMValueRef
-lp_build_load_rgba(LLVMBuilderRef builder,
-                   enum pipe_format format, 
-                   LLVMValueRef ptr);
+lp_build_load_rgba_aos(LLVMBuilderRef builder,
+                       enum pipe_format format,
+                       LLVMValueRef ptr);
 
 
 /**
@@ -92,10 +86,34 @@ lp_build_load_rgba(LLVMBuilderRef builder,
  * @param rgba 4 float vector with the unpacked components.
  */
 void 
-lp_build_store_rgba(LLVMBuilderRef builder,
-                    enum pipe_format format,
-                    LLVMValueRef ptr,
-                    LLVMValueRef rgba);
+lp_build_store_rgba_aos(LLVMBuilderRef builder,
+                        enum pipe_format format,
+                        LLVMValueRef ptr,
+                        LLVMValueRef rgba);
 
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+                unsigned length,
+                unsigned src_width,
+                unsigned dst_width,
+                LLVMValueRef base_ptr,
+                LLVMValueRef offsets);
+
+
+void
+lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
+                         const struct util_format_description *format_desc,
+                         union lp_type type,
+                         LLVMValueRef packed,
+                         LLVMValueRef *rgba);
+
+
+void
+lp_build_load_rgba_soa(LLVMBuilderRef builder,
+                       const struct util_format_description *format_desc,
+                       union lp_type type,
+                       LLVMValueRef base_ptr,
+                       LLVMValueRef offsets,
+                       LLVMValueRef *rgba);
 
 #endif /* !LP_BLD_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
index dcbc0076c7..b9b5d84bed 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
@@ -32,9 +32,9 @@
 
 
 LLVMValueRef
-lp_build_unpack_rgba(LLVMBuilderRef builder,
-                     enum pipe_format format,
-                     LLVMValueRef packed)
+lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
+                         enum pipe_format format,
+                         LLVMValueRef packed)
 {
    const struct util_format_description *desc;
    LLVMTypeRef type;
@@ -152,9 +152,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder,
 
 
 LLVMValueRef
-lp_build_pack_rgba(LLVMBuilderRef builder,
-                   enum pipe_format format,
-                   LLVMValueRef rgba)
+lp_build_pack_rgba_aos(LLVMBuilderRef builder,
+                       enum pipe_format format,
+                       LLVMValueRef rgba)
 {
    const struct util_format_description *desc;
    LLVMTypeRef type;
@@ -250,9 +250,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder,
 
 
 LLVMValueRef
-lp_build_load_rgba(LLVMBuilderRef builder,
-                   enum pipe_format format,
-                   LLVMValueRef ptr)
+lp_build_load_rgba_aos(LLVMBuilderRef builder,
+                       enum pipe_format format,
+                       LLVMValueRef ptr)
 {
    const struct util_format_description *desc;
    LLVMTypeRef type;
@@ -272,15 +272,15 @@ lp_build_load_rgba(LLVMBuilderRef builder,
 
    packed = LLVMBuildLoad(builder, ptr, "");
 
-   return lp_build_unpack_rgba(builder, format, packed);
+   return lp_build_unpack_rgba_aos(builder, format, packed);
 }
 
 
 void
-lp_build_store_rgba(LLVMBuilderRef builder,
-                    enum pipe_format format,
-                    LLVMValueRef ptr,
-                    LLVMValueRef rgba)
+lp_build_store_rgba_aos(LLVMBuilderRef builder,
+                        enum pipe_format format,
+                        LLVMValueRef ptr,
+                        LLVMValueRef rgba)
 {
    const struct util_format_description *desc;
    LLVMTypeRef type;
@@ -294,7 +294,7 @@ lp_build_store_rgba(LLVMBuilderRef builder,
 
    type = LLVMIntType(desc->block.bits);
 
-   packed = lp_build_pack_rgba(builder, format, rgba);
+   packed = lp_build_pack_rgba_aos(builder, format, rgba);
 
    ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), "");
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
new file mode 100644
index 0000000000..36bac06d2e
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
@@ -0,0 +1,193 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_format.h"
+
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
+#include "lp_bld_format.h"
+
+
+/**
+ * Gather elements from scatter positions in memory into a single vector.
+ *
+ * @param src_width src element width
+ * @param dst_width result element width (source will be expanded to fit)
+ * @param length length of the offsets,
+ * @param base_ptr base pointer, should be a i8 pointer type.
+ * @param offsets vector with offsets
+ */
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+                unsigned length,
+                unsigned src_width,
+                unsigned dst_width,
+                LLVMValueRef base_ptr,
+                LLVMValueRef offsets)
+{
+   LLVMTypeRef src_type = LLVMIntType(src_width);
+   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
+   LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
+   LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
+   LLVMValueRef res;
+   unsigned i;
+
+   res = LLVMGetUndef(dst_vec_type);
+   for(i = 0; i < length; ++i) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+      LLVMValueRef elem_offset;
+      LLVMValueRef elem_ptr;
+      LLVMValueRef elem;
+
+      elem_offset = LLVMBuildExtractElement(builder, offsets, index, "");
+      elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, "");
+      elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, "");
+      elem = LLVMBuildLoad(builder, elem_ptr, "");
+
+      assert(src_width <= dst_width);
+      if(src_width > dst_width)
+         elem = LLVMBuildTrunc(builder, elem, dst_elem_type, "");
+      if(src_width < dst_width)
+         elem = LLVMBuildZExt(builder, elem, dst_elem_type, "");
+
+      res = LLVMBuildInsertElement(builder, res, elem, index, "");
+   }
+
+   return res;
+}
+
+
+void
+lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
+                         const struct util_format_description *format_desc,
+                         union lp_type type,
+                         LLVMValueRef packed,
+                         LLVMValueRef *rgba)
+{
+   LLVMValueRef inputs[4];
+   unsigned start;
+   unsigned chan;
+
+   /* FIXME: Support more formats */
+   assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+   assert(format_desc->block.width == 1);
+   assert(format_desc->block.height == 1);
+   assert(format_desc->block.bits <= 32);
+
+   /* Decode the input vector components */
+   start = 0;
+   for (chan = 0; chan < 4; ++chan) {
+      unsigned width = format_desc->channel[chan].size;
+      unsigned stop = start + width;
+      LLVMValueRef input;
+
+      input = packed;
+
+      switch(format_desc->channel[chan].type) {
+      case UTIL_FORMAT_TYPE_VOID:
+         input = NULL;
+         break;
+
+      case UTIL_FORMAT_TYPE_UNSIGNED:
+         if(type.floating) {
+            if(start)
+               input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(type, start), "");
+            if(stop < format_desc->block.bits) {
+               unsigned mask = ((unsigned long long)1 << width) - 1;
+               input = LLVMBuildAnd(builder, input, lp_build_int_const_scalar(type, mask), "");
+            }
+
+            if(format_desc->channel[chan].normalized)
+               input = lp_build_unsigned_norm_to_float(builder, width, type, input);
+            else
+               input = LLVMBuildFPToSI(builder, input, lp_build_vec_type(type), "");
+         }
+         else {
+            /* FIXME */
+            assert(0);
+            input = lp_build_undef(type);
+         }
+         break;
+
+      default:
+         /* fall through */
+         input = lp_build_undef(type);
+         break;
+      }
+
+      inputs[chan] = input;
+
+      start = stop;
+   }
+
+   for (chan = 0; chan < 4; ++chan) {
+      enum util_format_swizzle swizzle = format_desc->swizzle[chan];
+
+      switch (swizzle) {
+      case UTIL_FORMAT_SWIZZLE_X:
+      case UTIL_FORMAT_SWIZZLE_Y:
+      case UTIL_FORMAT_SWIZZLE_Z:
+      case UTIL_FORMAT_SWIZZLE_W:
+         rgba[chan] = inputs[swizzle];
+         break;
+      case UTIL_FORMAT_SWIZZLE_0:
+         rgba[chan] = lp_build_zero(type);
+         break;
+      case UTIL_FORMAT_SWIZZLE_1:
+         rgba[chan] = lp_build_one(type);
+         break;
+      case UTIL_FORMAT_SWIZZLE_NONE:
+         rgba[chan] = lp_build_undef(type);
+         break;
+      }
+   }
+}
+
+
+void
+lp_build_load_rgba_soa(LLVMBuilderRef builder,
+                       const struct util_format_description *format_desc,
+                       union lp_type type,
+                       LLVMValueRef base_ptr,
+                       LLVMValueRef offsets,
+                       LLVMValueRef *rgba)
+{
+   LLVMValueRef packed;
+
+   assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+   assert(format_desc->block.width == 1);
+   assert(format_desc->block.height == 1);
+   assert(format_desc->block.bits <= 32);
+
+   packed = lp_build_gather(builder,
+                            type.length, format_desc->block.bits, type.width,
+                            base_ptr, offsets);
+
+   lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index 1d192355ee..d8455e5649 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -119,7 +119,7 @@ add_load_rgba_test(LLVMModuleRef module,
 
    lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), &loop);
 
-   rgba = lp_build_load_rgba(builder, format, ptr);
+   rgba = lp_build_load_rgba_aos(builder, format, ptr);
    LLVMBuildStore(builder, rgba, rgba_ptr);
 
    lp_build_loop_end(builder, LLVMConstInt(LLVMInt32Type(), 4, 0), NULL, &loop);
@@ -160,7 +160,7 @@ add_store_rgba_test(LLVMModuleRef module,
 
    rgba = LLVMBuildLoad(builder, rgba_ptr, "");
 
-   lp_build_store_rgba(builder, format, ptr, rgba);
+   lp_build_store_rgba_aos(builder, format, ptr, rgba);
 
    LLVMBuildRetVoid(builder);
 
-- 
cgit v1.2.3


From b1eff018c7a07502c673032ef6426f49364146be Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 7 Sep 2009 14:25:02 +0100
Subject: llvmpipe: Utility function to get the pointer to a structure member.

---
 src/gallium/drivers/llvmpipe/lp_bld_struct.c | 21 +++++++++++++++++----
 src/gallium/drivers/llvmpipe/lp_bld_struct.h | 12 ++++++++++++
 2 files changed, 29 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.c b/src/gallium/drivers/llvmpipe/lp_bld_struct.c
index 14d2b10df9..3998ac374f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_struct.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_struct.c
@@ -41,18 +41,31 @@
 #include "lp_bld_struct.h"
 
 
+LLVMValueRef
+lp_build_struct_get_ptr(LLVMBuilderRef builder,
+                        LLVMValueRef ptr,
+                        unsigned member,
+                        const char *name)
+{
+   LLVMValueRef indices[2];
+   LLVMValueRef member_ptr;
+   indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+   indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0);
+   member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), "");
+   lp_build_name(member_ptr, "%s.%s_ptr", LLVMGetValueName(ptr), name);
+   return member_ptr;
+}
+
+
 LLVMValueRef
 lp_build_struct_get(LLVMBuilderRef builder,
                     LLVMValueRef ptr,
                     unsigned member,
                     const char *name)
 {
-   LLVMValueRef indices[2];
    LLVMValueRef member_ptr;
    LLVMValueRef res;
-   indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
-   indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0);
-   member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), "");
+   member_ptr = lp_build_struct_get_ptr(builder, ptr, member, name);
    res = LLVMBuildLoad(builder, member_ptr, "");
    lp_build_name(res, "%s.%s", LLVMGetValueName(ptr), name);
    return res;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.h b/src/gallium/drivers/llvmpipe/lp_bld_struct.h
index cbefdc9f81..740392f561 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_struct.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_struct.h
@@ -53,6 +53,18 @@
              offsetof(_ctype, _cmember))
 
 
+/**
+ * Get value pointer to a structure member.
+ */
+LLVMValueRef
+lp_build_struct_get_ptr(LLVMBuilderRef builder,
+                        LLVMValueRef ptr,
+                        unsigned member,
+                        const char *name);
+
+/**
+ * Get the value of a structure member.
+ */
 LLVMValueRef
 lp_build_struct_get(LLVMBuilderRef builder,
                     LLVMValueRef ptr,
-- 
cgit v1.2.3


From 4da20234f3ea9b1606522fd0a9f4ef5c4b903906 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 7 Sep 2009 14:26:30 +0100
Subject: llvmpipe: Correct implementation of floor.

---
 src/gallium/drivers/llvmpipe/lp_bld_arit.c | 83 ++++++++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_arit.h | 12 +++++
 2 files changed, 95 insertions(+)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index 09a57ff33d..eb9e579bdd 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -587,6 +587,89 @@ lp_build_abs(struct lp_build_context *bld,
 }
 
 
+enum lp_build_round_sse41_mode
+{
+   LP_BUILD_ROUND_SSE41_NEAREST = 0,
+   LP_BUILD_ROUND_SSE41_FLOOR = 1,
+   LP_BUILD_ROUND_SSE41_CEIL = 2,
+   LP_BUILD_ROUND_SSE41_TRUNCATE = 3
+};
+
+
+static INLINE LLVMValueRef
+lp_build_round_sse41(struct lp_build_context *bld,
+                     LLVMValueRef a,
+                     enum lp_build_round_sse41_mode mode)
+{
+   const union lp_type type = bld->type;
+   LLVMTypeRef vec_type = lp_build_vec_type(type);
+   const char *intrinsic;
+
+   assert(type.floating);
+   assert(type.width*type.length == 128);
+
+   switch(type.width) {
+   case 32:
+      intrinsic = "llvm.x86.sse41.round.ps";
+      break;
+   case 64:
+      intrinsic = "llvm.x86.sse41.round.pd";
+      break;
+   default:
+      assert(0);
+      return bld->undef;
+   }
+
+   return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a,
+                                    LLVMConstInt(LLVMInt32Type(), mode, 0));
+}
+
+
+LLVMValueRef
+lp_build_floor(struct lp_build_context *bld,
+               LLVMValueRef a)
+{
+   const union lp_type type = bld->type;
+
+   assert(type.floating);
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+   return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
+#endif
+
+   /* FIXME */
+   assert(0);
+   return bld->undef;
+}
+
+
+/**
+ * Convert to integer, through whichever rounding method that's fastest,
+ * typically truncating to zero.
+ */
+LLVMValueRef
+lp_build_int(struct lp_build_context *bld,
+             LLVMValueRef a)
+{
+   const union lp_type type = bld->type;
+   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+
+   assert(type.floating);
+
+   return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+}
+
+
+LLVMValueRef
+lp_build_ifloor(struct lp_build_context *bld,
+                LLVMValueRef a)
+{
+   a = lp_build_floor(bld, a);
+   a = lp_build_int(bld, a);
+   return a;
+}
+
+
 LLVMValueRef
 lp_build_sqrt(struct lp_build_context *bld,
               LLVMValueRef a)
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
index fc8cb25966..0732b9fa75 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
@@ -85,6 +85,18 @@ LLVMValueRef
 lp_build_abs(struct lp_build_context *bld,
              LLVMValueRef a);
 
+LLVMValueRef
+lp_build_floor(struct lp_build_context *bld,
+               LLVMValueRef a);
+
+LLVMValueRef
+lp_build_int(struct lp_build_context *bld,
+             LLVMValueRef a);
+
+LLVMValueRef
+lp_build_ifloor(struct lp_build_context *bld,
+                LLVMValueRef a);
+
 LLVMValueRef
 lp_build_sqrt(struct lp_build_context *bld,
               LLVMValueRef a);
-- 
cgit v1.2.3


From fa0f4b35be17f68667edd6a2757b89086a11a833 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 7 Sep 2009 14:27:06 +0100
Subject: llvmpipe: Utility functions for linear and bilinear interpolation.

---
 src/gallium/drivers/llvmpipe/lp_bld_arit.c | 25 +++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_arit.h | 20 ++++++++++++++++++++
 2 files changed, 45 insertions(+)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index eb9e579bdd..a1d8a89774 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -502,6 +502,31 @@ lp_build_div(struct lp_build_context *bld,
 }
 
 
+LLVMValueRef
+lp_build_lerp(struct lp_build_context *bld,
+              LLVMValueRef x,
+              LLVMValueRef v0,
+              LLVMValueRef v1)
+{
+   return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0)));
+}
+
+
+LLVMValueRef
+lp_build_lerp_2d(struct lp_build_context *bld,
+                 LLVMValueRef x,
+                 LLVMValueRef y,
+                 LLVMValueRef v00,
+                 LLVMValueRef v01,
+                 LLVMValueRef v10,
+                 LLVMValueRef v11)
+{
+   LLVMValueRef v0 = lp_build_lerp(bld, x, v00, v01);
+   LLVMValueRef v1 = lp_build_lerp(bld, x, v10, v11);
+   return lp_build_lerp(bld, y, v0, v1);
+}
+
+
 /**
  * Generate min(a, b)
  * Do checks for special cases.
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
index 0732b9fa75..383c3c3313 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
@@ -71,6 +71,26 @@ lp_build_div(struct lp_build_context *bld,
              LLVMValueRef a,
              LLVMValueRef b);
 
+LLVMValueRef
+lp_build_lerp(struct lp_build_context *bld,
+              LLVMValueRef x,
+              LLVMValueRef v0,
+              LLVMValueRef v1);
+
+/**
+ * Bilinear interpolation.
+ *
+ * Values indices are in v_{yx}.
+ */
+LLVMValueRef
+lp_build_lerp_2d(struct lp_build_context *bld,
+                 LLVMValueRef x,
+                 LLVMValueRef y,
+                 LLVMValueRef v00,
+                 LLVMValueRef v01,
+                 LLVMValueRef v10,
+                 LLVMValueRef v11);
+
 LLVMValueRef
 lp_build_min(struct lp_build_context *bld,
              LLVMValueRef a,
-- 
cgit v1.2.3


From 0c2ea2433833d5eda8a4fefe1412bf0ea40b14bf Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 7 Sep 2009 14:42:57 +0100
Subject: llvmpipe: Convenience function to obtain the integer type with same
 bitdepth of an arbitrary type.

---
 src/gallium/drivers/llvmpipe/lp_bld_type.c | 11 +++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_type.h |  4 ++++
 2 files changed, 15 insertions(+)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c
index 8e0026fd97..577644b7ab 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_type.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c
@@ -157,6 +157,17 @@ lp_build_int_vec_type(union lp_type type)
 }
 
 
+union lp_type
+lp_int_type(union lp_type type)
+{
+   union lp_type int_type;
+   int_type.value = 0;
+   int_type.width = type.width;
+   int_type.length = type.length;
+   return int_type;
+}
+
+
 void
 lp_build_context_init(struct lp_build_context *bld,
                       LLVMBuilderRef builder,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h
index 3ce566be64..9933e0b45c 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_type.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h
@@ -165,6 +165,10 @@ LLVMTypeRef
 lp_build_int_vec_type(union lp_type type);
 
 
+union lp_type
+lp_int_type(union lp_type type);
+
+
 void
 lp_build_context_init(struct lp_build_context *bld,
                       LLVMBuilderRef builder,
-- 
cgit v1.2.3


From de8376e2f22a59a0bc18bb7ddab88ee3153678b8 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 7 Sep 2009 14:43:51 +0100
Subject: llvmpipe: Texture sampling code generation primitives.

Only supports single level 2d textures, with neareast and bilinear
filtering for now.
---
 src/gallium/drivers/llvmpipe/Makefile            |   1 +
 src/gallium/drivers/llvmpipe/SConscript          |   1 +
 src/gallium/drivers/llvmpipe/lp_bld_sample.h     | 134 +++++++++
 src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 342 +++++++++++++++++++++++
 4 files changed, 478 insertions(+)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_sample.h
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 54bb3a0e73..c0033dea34 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -19,6 +19,7 @@ C_SOURCES = \
 	lp_bld_interp.c \
 	lp_bld_intr.c \
 	lp_bld_logic.c \
+	lp_bld_sample_soa.c \
 	lp_bld_swizzle.c \
 	lp_bld_struct.c \
 	lp_bld_tgsi_soa.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 3dcd5cb994..b74c9c4b6e 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -26,6 +26,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_format_soa.c',
 		'lp_bld_interp.c',
 		'lp_bld_intr.c',
+		'lp_bld_sample_soa.c',
 		'lp_bld_struct.c',
 		'lp_bld_logic.c',
 		'lp_bld_swizzle.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
new file mode 100644
index 0000000000..5798f191fe
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
@@ -0,0 +1,134 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Texture sampling.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#ifndef LP_BLD_SAMPLE_H
+#define LP_BLD_SAMPLE_H
+
+
+#include <llvm-c/Core.h>
+
+struct pipe_texture;
+struct pipe_sampler_state;
+union lp_type;
+
+
+/**
+ * Sampler static state.
+ *
+ * These are the bits of state from pipe_texture and pipe_sampler_state that
+ * are embedded in the generated code.
+ */
+struct lp_sampler_static_state
+{
+   /* pipe_texture's state */
+   enum pipe_format format;
+   unsigned pot_width:1;
+   unsigned pot_height:1;
+   unsigned pot_depth:1;
+
+   /* pipe_sampler_state's state */
+   unsigned wrap_s:3;
+   unsigned wrap_t:3;
+   unsigned wrap_r:3;
+   unsigned min_img_filter:2;
+   unsigned min_mip_filter:2;
+   unsigned mag_img_filter:2;
+   unsigned compare_mode:1;
+   unsigned compare_func:3;
+   unsigned normalized_coords:1;
+   unsigned prefilter:4;
+};
+
+
+/**
+ * Sampler dynamic state.
+ *
+ * These are the bits of state from pipe_texture and pipe_sampler_state that
+ * are computed in runtime.
+ *
+ * There are obtained through callbacks, as we don't want to tie the texture
+ * sampling code generation logic to any particular texture layout or pipe
+ * driver.
+ */
+struct lp_sampler_dynamic_state
+{
+
+   /** Obtain the base texture width. */
+   LLVMValueRef
+   (*width)( struct lp_sampler_dynamic_state *state,
+             LLVMBuilderRef builder,
+             unsigned unit);
+
+   /** Obtain the base texture height. */
+   LLVMValueRef
+   (*height)( struct lp_sampler_dynamic_state *state,
+              LLVMBuilderRef builder,
+              unsigned unit);
+
+   LLVMValueRef
+   (*stride)( struct lp_sampler_dynamic_state *state,
+              LLVMBuilderRef builder,
+              unsigned unit);
+
+   LLVMValueRef
+   (*data_ptr)( struct lp_sampler_dynamic_state *state,
+                LLVMBuilderRef builder,
+                unsigned unit);
+
+};
+
+
+/**
+ * Derive the sampler static state.
+ */
+void
+lp_sampler_static_state(struct lp_sampler_static_state *state,
+                        const struct pipe_texture *texture,
+                        const struct pipe_sampler_state *sampler);
+
+
+void
+lp_build_sample_soa(LLVMBuilderRef builder,
+                    const struct lp_sampler_static_state *static_state,
+                    struct lp_sampler_dynamic_state *dynamic_state,
+                    union lp_type fp_type,
+                    unsigned unit,
+                    unsigned num_coords,
+                    const LLVMValueRef *coords,
+                    LLVMValueRef lodbias,
+                    LLVMValueRef *texel);
+
+
+
+#endif /* LP_BLD_SAMPLE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
new file mode 100644
index 0000000000..25c8d84501
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -0,0 +1,342 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Texture sampling.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_format.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_format.h"
+#include "lp_bld_sample.h"
+
+
+void
+lp_sampler_static_state(struct lp_sampler_static_state *state,
+                        const struct pipe_texture *texture,
+                        const struct pipe_sampler_state *sampler)
+{
+   memset(state, 0, sizeof *state);
+
+   if(!texture)
+      return;
+
+   if(!sampler)
+      return;
+
+   state->format            = texture->format;
+   state->pot_width         = util_is_pot(texture->width[0]);
+   state->pot_height        = util_is_pot(texture->height[0]);
+   state->pot_depth         = util_is_pot(texture->depth[0]);
+
+   state->wrap_s            = sampler->wrap_s;
+   state->wrap_t            = sampler->wrap_t;
+   state->wrap_r            = sampler->wrap_r;
+   state->min_img_filter    = sampler->min_img_filter;
+   state->min_mip_filter    = sampler->min_mip_filter;
+   state->mag_img_filter    = sampler->mag_img_filter;
+   state->compare_mode      = sampler->compare_mode;
+   state->compare_func      = sampler->compare_func;
+   state->normalized_coords = sampler->normalized_coords;
+   state->prefilter         = sampler->prefilter;
+}
+
+
+
+/**
+ * Keep all information for sampling code generation in a single place.
+ */
+struct lp_build_sample_context
+{
+   LLVMBuilderRef builder;
+
+   const struct lp_sampler_static_state *static_state;
+
+   struct lp_sampler_dynamic_state *dynamic_state;
+
+   const struct util_format_description *format_desc;
+
+   /** Incoming coordinates type and build context */
+   union lp_type coord_type;
+   struct lp_build_context coord_bld;
+
+   /** Integer coordinates */
+   union lp_type int_coord_type;
+   struct lp_build_context int_coord_bld;
+
+   /** Output texels type and build context */
+   union lp_type texel_type;
+   struct lp_build_context texel_bld;
+};
+
+
+static void
+lp_build_sample_texel(struct lp_build_sample_context *bld,
+                      LLVMValueRef x,
+                      LLVMValueRef y,
+                      LLVMValueRef y_stride,
+                      LLVMValueRef data_ptr,
+                      LLVMValueRef *texel)
+{
+   LLVMValueRef x_stride;
+   LLVMValueRef x_offset;
+   LLVMValueRef y_offset;
+   LLVMValueRef offset;
+
+   x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8);
+
+   x_offset = lp_build_mul(&bld->int_coord_bld, x, x_stride);
+   y_offset = lp_build_mul(&bld->int_coord_bld, y, y_stride);
+
+   offset = lp_build_add(&bld->int_coord_bld, x_offset, y_offset);
+
+   lp_build_load_rgba_soa(bld->builder,
+                          bld->format_desc,
+                          bld->texel_type,
+                          data_ptr,
+                          offset,
+                          texel);
+}
+
+
+static LLVMValueRef
+lp_build_sample_wrap(struct lp_build_sample_context *bld,
+                     LLVMValueRef coord,
+                     LLVMValueRef length,
+                     boolean is_pot,
+                     unsigned wrap_mode)
+{
+   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+   LLVMValueRef length_minus_one;
+
+   length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
+
+   switch(wrap_mode) {
+   case PIPE_TEX_WRAP_REPEAT:
+      if(is_pot)
+         coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
+      else
+         /* Signed remainder won't give the right results for negative
+          * dividends but unsigned remainder does.*/
+         coord = LLVMBuildURem(bld->builder, coord, length, "");
+      break;
+
+   case PIPE_TEX_WRAP_CLAMP:
+      coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
+      coord = lp_build_min(int_coord_bld, coord, length_minus_one);
+      break;
+
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+   case PIPE_TEX_WRAP_MIRROR_REPEAT:
+   case PIPE_TEX_WRAP_MIRROR_CLAMP:
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+   default:
+      assert(0);
+   }
+
+   return coord;
+}
+
+
+static void
+lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
+                               LLVMValueRef s,
+                               LLVMValueRef t,
+                               LLVMValueRef width,
+                               LLVMValueRef height,
+                               LLVMValueRef stride,
+                               LLVMValueRef data_ptr,
+                               LLVMValueRef *texel)
+{
+   LLVMValueRef x;
+   LLVMValueRef y;
+
+   x = lp_build_ifloor(&bld->coord_bld, s);
+   y = lp_build_ifloor(&bld->coord_bld, t);
+
+   x = lp_build_sample_wrap(bld, x, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
+   y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+   lp_build_sample_texel(bld, x, y, stride, data_ptr, texel);
+}
+
+
+static void
+lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
+                              LLVMValueRef s,
+                              LLVMValueRef t,
+                              LLVMValueRef width,
+                              LLVMValueRef height,
+                              LLVMValueRef stride,
+                              LLVMValueRef data_ptr,
+                              LLVMValueRef *texel)
+{
+   LLVMValueRef half;
+   LLVMValueRef s_ipart;
+   LLVMValueRef t_ipart;
+   LLVMValueRef s_fpart;
+   LLVMValueRef t_fpart;
+   LLVMValueRef x0, x1;
+   LLVMValueRef y0, y1;
+   LLVMValueRef neighbors[2][2][4];
+   unsigned chan;
+
+   half = lp_build_const_scalar(bld->coord_type, 0.5);
+   s = lp_build_sub(&bld->coord_bld, s, half);
+   t = lp_build_sub(&bld->coord_bld, t, half);
+
+   s_ipart = lp_build_floor(&bld->coord_bld, s);
+   t_ipart = lp_build_floor(&bld->coord_bld, t);
+
+   s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart);
+   t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart);
+
+   x0 = lp_build_int(&bld->coord_bld, s_ipart);
+   y0 = lp_build_int(&bld->coord_bld, t_ipart);
+
+   x0 = lp_build_sample_wrap(bld, x0, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
+   y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+   x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
+   y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
+
+   x1 = lp_build_sample_wrap(bld, x1, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
+   y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+   lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]);
+   lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]);
+   lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]);
+   lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]);
+
+   /* TODO: Don't interpolate missing channels */
+   for(chan = 0; chan < 4; ++chan) {
+      switch(bld->format_desc->swizzle[chan]) {
+      case UTIL_FORMAT_SWIZZLE_X:
+      case UTIL_FORMAT_SWIZZLE_Y:
+      case UTIL_FORMAT_SWIZZLE_Z:
+      case UTIL_FORMAT_SWIZZLE_W:
+         texel[chan] = lp_build_lerp_2d(&bld->texel_bld,
+                                        s_fpart, t_fpart,
+                                        neighbors[0][0][chan],
+                                        neighbors[0][1][chan],
+                                        neighbors[1][0][chan],
+                                        neighbors[1][1][chan]);
+         break;
+      case UTIL_FORMAT_SWIZZLE_0:
+         texel[chan] = bld->texel_bld.zero;
+         break;
+      case UTIL_FORMAT_SWIZZLE_1:
+         texel[chan] = bld->texel_bld.one;
+         break;
+      default:
+         assert(0);
+         texel[chan] = bld->texel_bld.undef;
+         break;
+      }
+   }
+}
+
+
+void
+lp_build_sample_soa(LLVMBuilderRef builder,
+                    const struct lp_sampler_static_state *static_state,
+                    struct lp_sampler_dynamic_state *dynamic_state,
+                    union lp_type type,
+                    unsigned unit,
+                    unsigned num_coords,
+                    const LLVMValueRef *coords,
+                    LLVMValueRef lodbias,
+                    LLVMValueRef *texel)
+{
+   struct lp_build_sample_context bld;
+   LLVMValueRef width;
+   LLVMValueRef height;
+   LLVMValueRef stride;
+   LLVMValueRef data_ptr;
+   LLVMValueRef s;
+   LLVMValueRef t;
+   LLVMValueRef p;
+
+   /* Setup our build context */
+   memset(&bld, 0, sizeof bld);
+   bld.builder = builder;
+   bld.static_state = static_state;
+   bld.dynamic_state = dynamic_state;
+   bld.format_desc = util_format_description(static_state->format);
+   bld.coord_type = type;
+   bld.int_coord_type = lp_int_type(type);
+   bld.texel_type = type;
+   lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
+   lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
+   lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
+
+   /* Get the dynamic state */
+   width = dynamic_state->width(dynamic_state, builder, unit);
+   height = dynamic_state->height(dynamic_state, builder, unit);
+   stride = dynamic_state->stride(dynamic_state, builder, unit);
+   data_ptr = dynamic_state->data_ptr(dynamic_state, builder, unit);
+
+   s = coords[0];
+   t = coords[1];
+   p = coords[2];
+
+   width = lp_build_broadcast_scalar(&bld.int_coord_bld, width);
+   height = lp_build_broadcast_scalar(&bld.int_coord_bld, height);
+   stride = lp_build_broadcast_scalar(&bld.int_coord_bld, stride);
+
+   if(static_state->normalized_coords) {
+      LLVMTypeRef coord_vec_type = lp_build_vec_type(bld.coord_type);
+      LLVMValueRef fp_width = LLVMBuildSIToFP(builder, width, coord_vec_type, "");
+      LLVMValueRef fp_height = LLVMBuildSIToFP(builder, height, coord_vec_type, "");
+      s = lp_build_mul(&bld.coord_bld, s, fp_width);
+      t = lp_build_mul(&bld.coord_bld, t, fp_height);
+   }
+
+   switch (static_state->min_img_filter) {
+   case PIPE_TEX_FILTER_NEAREST:
+      lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel);
+      break;
+   case PIPE_TEX_FILTER_LINEAR:
+   case PIPE_TEX_FILTER_ANISO:
+      lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel);
+      break;
+   }
+}
-- 
cgit v1.2.3


From e4c76c02f77ed6e86537b546f4200f8f8132d114 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 7 Sep 2009 14:52:39 +0100
Subject: llvmpipe: Code generate the texture sampling inside the shader.

Finally a substantial performance improvement: framerates of apps using
texturing tripled, and furthermore, enabling/disabling texturing only
affects around 15% of the framerate, which means the bottleneck is now
somewhere else.

Generated texture sampling code is not complete though -- we always
sample from the base level -- so final figures will be different.
---
 src/gallium/drivers/llvmpipe/Makefile             |    3 +-
 src/gallium/drivers/llvmpipe/SConscript           |    3 +-
 src/gallium/drivers/llvmpipe/lp_jit.c             |   37 +-
 src/gallium/drivers/llvmpipe/lp_jit.h             |   27 +
 src/gallium/drivers/llvmpipe/lp_state.h           |    6 +-
 src/gallium/drivers/llvmpipe/lp_state_derived.c   |    4 +-
 src/gallium/drivers/llvmpipe/lp_state_fs.c        |   15 +-
 src/gallium/drivers/llvmpipe/lp_state_sampler.c   |   10 +
 src/gallium/drivers/llvmpipe/lp_tex_sample.c      | 1713 ---------------------
 src/gallium/drivers/llvmpipe/lp_tex_sample.h      |   11 +
 src/gallium/drivers/llvmpipe/lp_tex_sample_c.c    | 1713 +++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c |  196 +++
 12 files changed, 2019 insertions(+), 1719 deletions(-)
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_tex_sample.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index c0033dea34..06c586e6bb 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -46,7 +46,8 @@ C_SOURCES = \
 	lp_state_vs.c \
 	lp_surface.c \
 	lp_tex_cache.c \
-	lp_tex_sample.c \
+	lp_tex_sample_c.c \
+	lp_tex_sample_llvm.c \
 	lp_texture.c \
 	lp_tile_cache.c \
 	lp_tile_soa.c
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index b74c9c4b6e..ac1b5d6d1d 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -54,7 +54,8 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_state_vs.c',
 		'lp_surface.c',
 		'lp_tex_cache.c',
-		'lp_tex_sample.c',
+		'lp_tex_sample_c.c',
+		'lp_tex_sample_llvm.c',
 		'lp_texture.c',
 		'lp_tile_cache.c',
 		'lp_tile_soa.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index d288460a1b..9465f763d5 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -44,15 +44,47 @@
 static void
 lp_jit_init_globals(struct llvmpipe_screen *screen)
 {
-   /* struct lp_jit_context */
+   LLVMTypeRef texture_type;
+
+   /* struct lp_jit_texture */
    {
       LLVMTypeRef elem_types[4];
+
+      elem_types[LP_JIT_TEXTURE_WIDTH]  = LLVMInt32Type();
+      elem_types[LP_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
+      elem_types[LP_JIT_TEXTURE_STRIDE] = LLVMInt32Type();
+      elem_types[LP_JIT_TEXTURE_DATA]   = LLVMPointerType(LLVMInt8Type(), 0);
+
+      texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
+
+      LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width,
+                             screen->target, texture_type,
+                             LP_JIT_TEXTURE_WIDTH);
+      LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, height,
+                             screen->target, texture_type,
+                             LP_JIT_TEXTURE_HEIGHT);
+      LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, stride,
+                             screen->target, texture_type,
+                             LP_JIT_TEXTURE_STRIDE);
+      LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, data,
+                             screen->target, texture_type,
+                             LP_JIT_TEXTURE_DATA);
+      LP_CHECK_STRUCT_SIZE(struct lp_jit_texture,
+                           screen->target, texture_type);
+
+      LLVMAddTypeName(screen->module, "texture", texture_type);
+   }
+
+   /* struct lp_jit_context */
+   {
+      LLVMTypeRef elem_types[5];
       LLVMTypeRef context_type;
 
       elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */
       elem_types[1] = LLVMPointerType(LLVMInt8Type(), 0);  /* samplers */
       elem_types[2] = LLVMFloatType();                     /* alpha_ref_value */
       elem_types[3] = LLVMPointerType(LLVMInt8Type(), 0);  /* blend_color */
+      elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */
 
       context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
 
@@ -64,6 +96,9 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
                              screen->target, context_type, 2);
       LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color,
                              screen->target, context_type, 3);
+      LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures,
+                             screen->target, context_type,
+                             LP_JIT_CONTEXT_TEXTURES_INDEX);
       LP_CHECK_STRUCT_SIZE(struct lp_jit_context,
                            screen->target, context_type);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index a7fb60f9f5..c3e3e1af67 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -38,11 +38,31 @@
 
 #include "lp_bld_struct.h"
 
+#include "pipe/p_state.h"
+
 
 struct tgsi_sampler;
 struct llvmpipe_screen;
 
 
+struct lp_jit_texture
+{
+   uint32_t width;
+   uint32_t height;
+   uint32_t stride;
+   const void *data;
+};
+
+
+enum {
+   LP_JIT_TEXTURE_WIDTH = 0,
+   LP_JIT_TEXTURE_HEIGHT,
+   LP_JIT_TEXTURE_STRIDE,
+   LP_JIT_TEXTURE_DATA
+};
+
+
+
 /**
  * This structure is passed directly to the generated fragment shader.
  *
@@ -65,6 +85,8 @@ struct lp_jit_context
 
    /* TODO: blend constant color */
    uint8_t *blend_color;
+
+   struct lp_jit_texture textures[PIPE_MAX_SAMPLERS];
 };
 
 
@@ -80,6 +102,11 @@ struct lp_jit_context
 #define lp_jit_context_blend_color(_builder, _ptr) \
    lp_build_struct_get(_builder, _ptr, 3, "blend_color")
 
+#define LP_JIT_CONTEXT_TEXTURES_INDEX 4
+
+#define lp_jit_context_textures(_builder, _ptr) \
+   lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures")
+
 
 typedef void
 (*lp_jit_frag_func)(struct lp_jit_context *context,
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index fb10329887..0b846ecb13 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -36,6 +36,7 @@
 #include "pipe/p_state.h"
 #include "tgsi/tgsi_scan.h"
 #include "lp_jit.h"
+#include "lp_bld_sample.h" /* for struct lp_sampler_static_state */
 
 
 #define LP_NEW_VIEWPORT      0x1
@@ -57,7 +58,8 @@
 
 struct tgsi_sampler;
 struct vertex_info;
-
+struct pipe_context;
+struct llvmpipe_context;
 
 struct lp_fragment_shader;
 
@@ -67,6 +69,8 @@ struct lp_fragment_shader_variant_key
    struct pipe_depth_state depth;
    struct pipe_alpha_state alpha;
    struct pipe_blend_state blend;
+
+   struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS];
 };
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index 6fbb057937..e87976b9f3 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -250,7 +250,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
 
    if (llvmpipe->dirty & (LP_NEW_FS |
                           LP_NEW_BLEND |
-                          LP_NEW_DEPTH_STENCIL_ALPHA))
+                          LP_NEW_DEPTH_STENCIL_ALPHA |
+                          LP_NEW_SAMPLER |
+                          LP_NEW_TEXTURE))
       llvmpipe_update_fs( llvmpipe );
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 80f705c871..1a3e168245 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -486,7 +486,13 @@ generate_fragment(struct llvmpipe_context *lp,
                             a0_ptr, dadx_ptr, dady_ptr,
                             x0, y0, 2, 0);
 
+#if 0
+   /* C texture sampling */
    sampler = lp_c_sampler_soa_create(context_ptr);
+#else
+   /* code generated texture sampling */
+   sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr);
+#endif
 
    for(i = 0; i < num_fs; ++i) {
       LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
@@ -666,8 +672,11 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
  */
 static void
 make_variant_key(struct llvmpipe_context *lp,
+                 struct lp_fragment_shader *shader,
                  struct lp_fragment_shader_variant_key *key)
 {
+   unsigned i;
+
    memset(key, 0, sizeof *key);
 
    memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth);
@@ -678,6 +687,10 @@ make_variant_key(struct llvmpipe_context *lp,
    /* alpha.ref_value is passed in jit_context */
 
    memcpy(&key->blend, lp->blend, sizeof key->blend);
+
+   for(i = 0; i < PIPE_MAX_SAMPLERS; ++i)
+      if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i))
+         lp_sampler_static_state(&key->sampler[i], lp->texture[i], lp->sampler[i]);
 }
 
 
@@ -688,7 +701,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
    struct lp_fragment_shader_variant_key key;
    struct lp_fragment_shader_variant *variant;
 
-   make_variant_key(lp, &key);
+   make_variant_key(lp, shader, &key);
 
    variant = shader->variants;
    while(variant) {
diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index 4fef541b1e..c69d90c723 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -98,6 +98,16 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe,
 
       pipe_texture_reference(&llvmpipe->texture[i], tex);
       lp_tex_tile_cache_set_texture(llvmpipe->tex_cache[i], tex);
+
+      if(tex) {
+         struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex);
+         struct lp_jit_texture *jit_tex = &llvmpipe->jit_context.textures[i];
+         jit_tex->width = tex->width[0];
+         jit_tex->height = tex->height[0];
+         jit_tex->stride = lp_tex->stride[0];
+         if(!lp_tex->dt)
+            jit_tex->data = lp_tex->data;
+      }
    }
 
    llvmpipe->num_textures = num;
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample.c
deleted file mode 100644
index 9a876f404d..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c
+++ /dev/null
@@ -1,1713 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * Copyright 2008 VMware, Inc.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * Texture sampling
- *
- * Authors:
- *   Brian Paul
- */
-
-#include "lp_context.h"
-#include "lp_quad.h"
-#include "lp_surface.h"
-#include "lp_texture.h"
-#include "lp_tex_sample.h"
-#include "lp_tex_cache.h"
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_shader_tokens.h"
-#include "util/u_math.h"
-#include "util/u_memory.h"
-
-
-
-/*
- * Note, the FRAC macro has to work perfectly.  Otherwise you'll sometimes
- * see 1-pixel bands of improperly weighted linear-filtered textures.
- * The tests/texwrap.c demo is a good test.
- * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0.
- * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x).
- */
-#define FRAC(f)  ((f) - util_ifloor(f))
-
-
-/**
- * Linear interpolation macro
- */
-static INLINE float
-lerp(float a, float v0, float v1)
-{
-   return v0 + a * (v1 - v0);
-}
-
-
-/**
- * Do 2D/biliner interpolation of float values.
- * v00, v10, v01 and v11 are typically four texture samples in a square/box.
- * a and b are the horizontal and vertical interpolants.
- * It's important that this function is inlined when compiled with
- * optimization!  If we find that's not true on some systems, convert
- * to a macro.
- */
-static INLINE float
-lerp_2d(float a, float b,
-        float v00, float v10, float v01, float v11)
-{
-   const float temp0 = lerp(a, v00, v10);
-   const float temp1 = lerp(a, v01, v11);
-   return lerp(b, temp0, temp1);
-}
-
-
-/**
- * As above, but 3D interpolation of 8 values.
- */
-static INLINE float
-lerp_3d(float a, float b, float c,
-        float v000, float v100, float v010, float v110,
-        float v001, float v101, float v011, float v111)
-{
-   const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
-   const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
-   return lerp(c, temp0, temp1);
-}
-
-
-
-/**
- * If A is a signed integer, A % B doesn't give the right value for A < 0
- * (in terms of texture repeat).  Just casting to unsigned fixes that.
- */
-#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B))
-
-
-/**
- * Apply texture coord wrapping mode and return integer texture indexes
- * for a vector of four texcoords (S or T or P).
- * \param wrapMode  PIPE_TEX_WRAP_x
- * \param s  the incoming texcoords
- * \param size  the texture image size
- * \param icoord  returns the integer texcoords
- * \return  integer texture index
- */
-static INLINE void
-nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
-                   int icoord[4])
-{
-   uint ch;
-   switch (wrapMode) {
-   case PIPE_TEX_WRAP_REPEAT:
-      /* s limited to [0,1) */
-      /* i limited to [0,size-1] */
-      for (ch = 0; ch < 4; ch++) {
-         int i = util_ifloor(s[ch] * size);
-         icoord[ch] = REMAINDER(i, size);
-      }
-      return;
-   case PIPE_TEX_WRAP_CLAMP:
-      /* s limited to [0,1] */
-      /* i limited to [0,size-1] */
-      for (ch = 0; ch < 4; ch++) {
-         if (s[ch] <= 0.0F)
-            icoord[ch] = 0;
-         else if (s[ch] >= 1.0F)
-            icoord[ch] = size - 1;
-         else
-            icoord[ch] = util_ifloor(s[ch] * size);
-      }
-      return;
-   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
-      {
-         /* s limited to [min,max] */
-         /* i limited to [0, size-1] */
-         const float min = 1.0F / (2.0F * size);
-         const float max = 1.0F - min;
-         for (ch = 0; ch < 4; ch++) {
-            if (s[ch] < min)
-               icoord[ch] = 0;
-            else if (s[ch] > max)
-               icoord[ch] = size - 1;
-            else
-               icoord[ch] = util_ifloor(s[ch] * size);
-         }
-      }
-      return;
-   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
-      {
-         /* s limited to [min,max] */
-         /* i limited to [-1, size] */
-         const float min = -1.0F / (2.0F * size);
-         const float max = 1.0F - min;
-         for (ch = 0; ch < 4; ch++) {
-            if (s[ch] <= min)
-               icoord[ch] = -1;
-            else if (s[ch] >= max)
-               icoord[ch] = size;
-            else
-               icoord[ch] = util_ifloor(s[ch] * size);
-         }
-      }
-      return;
-   case PIPE_TEX_WRAP_MIRROR_REPEAT:
-      {
-         const float min = 1.0F / (2.0F * size);
-         const float max = 1.0F - min;
-         for (ch = 0; ch < 4; ch++) {
-            const int flr = util_ifloor(s[ch]);
-            float u;
-            if (flr & 1)
-               u = 1.0F - (s[ch] - (float) flr);
-            else
-               u = s[ch] - (float) flr;
-            if (u < min)
-               icoord[ch] = 0;
-            else if (u > max)
-               icoord[ch] = size - 1;
-            else
-               icoord[ch] = util_ifloor(u * size);
-         }
-      }
-      return;
-   case PIPE_TEX_WRAP_MIRROR_CLAMP:
-      for (ch = 0; ch < 4; ch++) {
-         /* s limited to [0,1] */
-         /* i limited to [0,size-1] */
-         const float u = fabsf(s[ch]);
-         if (u <= 0.0F)
-            icoord[ch] = 0;
-         else if (u >= 1.0F)
-            icoord[ch] = size - 1;
-         else
-            icoord[ch] = util_ifloor(u * size);
-      }
-      return;
-   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
-      {
-         /* s limited to [min,max] */
-         /* i limited to [0, size-1] */
-         const float min = 1.0F / (2.0F * size);
-         const float max = 1.0F - min;
-         for (ch = 0; ch < 4; ch++) {
-            const float u = fabsf(s[ch]);
-            if (u < min)
-               icoord[ch] = 0;
-            else if (u > max)
-               icoord[ch] = size - 1;
-            else
-               icoord[ch] = util_ifloor(u * size);
-         }
-      }
-      return;
-   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
-      {
-         /* s limited to [min,max] */
-         /* i limited to [0, size-1] */
-         const float min = -1.0F / (2.0F * size);
-         const float max = 1.0F - min;
-         for (ch = 0; ch < 4; ch++) {
-            const float u = fabsf(s[ch]);
-            if (u < min)
-               icoord[ch] = -1;
-            else if (u > max)
-               icoord[ch] = size;
-            else
-               icoord[ch] = util_ifloor(u * size);
-         }
-      }
-      return;
-   default:
-      assert(0);
-   }
-}
-
-
-/**
- * Used to compute texel locations for linear sampling for four texcoords.
- * \param wrapMode  PIPE_TEX_WRAP_x
- * \param s  the texcoords
- * \param size  the texture image size
- * \param icoord0  returns first texture indexes
- * \param icoord1  returns second texture indexes (usually icoord0 + 1)
- * \param w  returns blend factor/weight between texture indexes
- * \param icoord  returns the computed integer texture coords
- */
-static INLINE void
-linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
-                  int icoord0[4], int icoord1[4], float w[4])
-{
-   uint ch;
-
-   switch (wrapMode) {
-   case PIPE_TEX_WRAP_REPEAT:
-      for (ch = 0; ch < 4; ch++) {
-         float u = s[ch] * size - 0.5F;
-         icoord0[ch] = REMAINDER(util_ifloor(u), size);
-         icoord1[ch] = REMAINDER(icoord0[ch] + 1, size);
-         w[ch] = FRAC(u);
-      }
-      break;;
-   case PIPE_TEX_WRAP_CLAMP:
-      for (ch = 0; ch < 4; ch++) {
-         float u = CLAMP(s[ch], 0.0F, 1.0F);
-         u = u * size - 0.5f;
-         icoord0[ch] = util_ifloor(u);
-         icoord1[ch] = icoord0[ch] + 1;
-         w[ch] = FRAC(u);
-      }
-      break;;
-   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
-      for (ch = 0; ch < 4; ch++) {
-         float u = CLAMP(s[ch], 0.0F, 1.0F);
-         u = u * size - 0.5f;
-         icoord0[ch] = util_ifloor(u);
-         icoord1[ch] = icoord0[ch] + 1;
-         if (icoord0[ch] < 0)
-            icoord0[ch] = 0;
-         if (icoord1[ch] >= (int) size)
-            icoord1[ch] = size - 1;
-         w[ch] = FRAC(u);
-      }
-      break;;
-   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
-      {
-         const float min = -1.0F / (2.0F * size);
-         const float max = 1.0F - min;
-         for (ch = 0; ch < 4; ch++) {
-            float u = CLAMP(s[ch], min, max);
-            u = u * size - 0.5f;
-            icoord0[ch] = util_ifloor(u);
-            icoord1[ch] = icoord0[ch] + 1;
-            w[ch] = FRAC(u);
-         }
-      }
-      break;;
-   case PIPE_TEX_WRAP_MIRROR_REPEAT:
-      for (ch = 0; ch < 4; ch++) {
-         const int flr = util_ifloor(s[ch]);
-         float u;
-         if (flr & 1)
-            u = 1.0F - (s[ch] - (float) flr);
-         else
-            u = s[ch] - (float) flr;
-         u = u * size - 0.5F;
-         icoord0[ch] = util_ifloor(u);
-         icoord1[ch] = icoord0[ch] + 1;
-         if (icoord0[ch] < 0)
-            icoord0[ch] = 0;
-         if (icoord1[ch] >= (int) size)
-            icoord1[ch] = size - 1;
-         w[ch] = FRAC(u);
-      }
-      break;;
-   case PIPE_TEX_WRAP_MIRROR_CLAMP:
-      for (ch = 0; ch < 4; ch++) {
-         float u = fabsf(s[ch]);
-         if (u >= 1.0F)
-            u = (float) size;
-         else
-            u *= size;
-         u -= 0.5F;
-         icoord0[ch] = util_ifloor(u);
-         icoord1[ch] = icoord0[ch] + 1;
-         w[ch] = FRAC(u);
-      }
-      break;;
-   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
-      for (ch = 0; ch < 4; ch++) {
-         float u = fabsf(s[ch]);
-         if (u >= 1.0F)
-            u = (float) size;
-         else
-            u *= size;
-         u -= 0.5F;
-         icoord0[ch] = util_ifloor(u);
-         icoord1[ch] = icoord0[ch] + 1;
-         if (icoord0[ch] < 0)
-            icoord0[ch] = 0;
-         if (icoord1[ch] >= (int) size)
-            icoord1[ch] = size - 1;
-         w[ch] = FRAC(u);
-      }
-      break;;
-   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
-      {
-         const float min = -1.0F / (2.0F * size);
-         const float max = 1.0F - min;
-         for (ch = 0; ch < 4; ch++) {
-            float u = fabsf(s[ch]);
-            if (u <= min)
-               u = min * size;
-            else if (u >= max)
-               u = max * size;
-            else
-               u *= size;
-            u -= 0.5F;
-            icoord0[ch] = util_ifloor(u);
-            icoord1[ch] = icoord0[ch] + 1;
-            w[ch] = FRAC(u);
-         }
-      }
-      break;;
-   default:
-      assert(0);
-   }
-}
-
-
-/**
- * For RECT textures / unnormalized texcoords
- * Only a subset of wrap modes supported.
- */
-static INLINE void
-nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
-                          int icoord[4])
-{
-   uint ch;
-   switch (wrapMode) {
-   case PIPE_TEX_WRAP_CLAMP:
-      for (ch = 0; ch < 4; ch++) {
-         int i = util_ifloor(s[ch]);
-         icoord[ch]= CLAMP(i, 0, (int) size-1);
-      }
-      return;
-   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
-      /* fall-through */
-   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
-      for (ch = 0; ch < 4; ch++) {
-         icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) );
-      }
-      return;
-   default:
-      assert(0);
-   }
-}
-
-
-/**
- * For RECT textures / unnormalized texcoords.
- * Only a subset of wrap modes supported.
- */
-static INLINE void
-linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
-                         int icoord0[4], int icoord1[4], float w[4])
-{
-   uint ch;
-   switch (wrapMode) {
-   case PIPE_TEX_WRAP_CLAMP:
-      for (ch = 0; ch < 4; ch++) {
-         /* Not exactly what the spec says, but it matches NVIDIA output */
-         float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f);
-         icoord0[ch] = util_ifloor(u);
-         icoord1[ch] = icoord0[ch] + 1;
-         w[ch] = FRAC(u);
-      }
-      return;
-   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
-      /* fall-through */
-   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
-      for (ch = 0; ch < 4; ch++) {
-         float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F);
-         u -= 0.5F;
-         icoord0[ch] = util_ifloor(u);
-         icoord1[ch] = icoord0[ch] + 1;
-         if (icoord1[ch] > (int) size - 1)
-            icoord1[ch] = size - 1;
-         w[ch] = FRAC(u);
-      }
-      break;
-   default:
-      assert(0);
-   }
-}
-
-
-static unsigned
-choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
-{
-   /*
-      major axis
-      direction     target                             sc     tc    ma
-      ----------    -------------------------------    ---    ---   ---
-       +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
-       -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
-       +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
-       -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
-       +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
-       -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
-   */
-   const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
-   unsigned face;
-   float sc, tc, ma;
-
-   if (arx > ary && arx > arz) {
-      if (rx >= 0.0F) {
-         face = PIPE_TEX_FACE_POS_X;
-         sc = -rz;
-         tc = -ry;
-         ma = arx;
-      }
-      else {
-         face = PIPE_TEX_FACE_NEG_X;
-         sc = rz;
-         tc = -ry;
-         ma = arx;
-      }
-   }
-   else if (ary > arx && ary > arz) {
-      if (ry >= 0.0F) {
-         face = PIPE_TEX_FACE_POS_Y;
-         sc = rx;
-         tc = rz;
-         ma = ary;
-      }
-      else {
-         face = PIPE_TEX_FACE_NEG_Y;
-         sc = rx;
-         tc = -rz;
-         ma = ary;
-      }
-   }
-   else {
-      if (rz > 0.0F) {
-         face = PIPE_TEX_FACE_POS_Z;
-         sc = rx;
-         tc = -ry;
-         ma = arz;
-      }
-      else {
-         face = PIPE_TEX_FACE_NEG_Z;
-         sc = -rx;
-         tc = -ry;
-         ma = arz;
-      }
-   }
-
-   *newS = ( sc / ma + 1.0F ) * 0.5F;
-   *newT = ( tc / ma + 1.0F ) * 0.5F;
-
-   return face;
-}
-
-
-/**
- * Examine the quad's texture coordinates to compute the partial
- * derivatives w.r.t X and Y, then compute lambda (level of detail).
- *
- * This is only done for fragment shaders, not vertex shaders.
- */
-static float
-compute_lambda(struct tgsi_sampler *tgsi_sampler,
-               const float s[QUAD_SIZE],
-               const float t[QUAD_SIZE],
-               const float p[QUAD_SIZE],
-               float lodbias)
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   const struct pipe_sampler_state *sampler = samp->sampler;
-   float rho, lambda;
-
-   if (samp->processor == TGSI_PROCESSOR_VERTEX)
-      return lodbias;
-
-   assert(sampler->normalized_coords);
-
-   assert(s);
-   {
-      float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT];
-      float dsdy = s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT];
-      dsdx = fabsf(dsdx);
-      dsdy = fabsf(dsdy);
-      rho = MAX2(dsdx, dsdy) * texture->width[0];
-   }
-   if (t) {
-      float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT];
-      float dtdy = t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT];
-      float max;
-      dtdx = fabsf(dtdx);
-      dtdy = fabsf(dtdy);
-      max = MAX2(dtdx, dtdy) * texture->height[0];
-      rho = MAX2(rho, max);
-   }
-   if (p) {
-      float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT];
-      float dpdy = p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT];
-      float max;
-      dpdx = fabsf(dpdx);
-      dpdy = fabsf(dpdy);
-      max = MAX2(dpdx, dpdy) * texture->depth[0];
-      rho = MAX2(rho, max);
-   }
-
-   lambda = util_fast_log2(rho);
-   lambda += lodbias + sampler->lod_bias;
-   lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
-
-   return lambda;
-}
-
-
-/**
- * Do several things here:
- * 1. Compute lambda from the texcoords, if needed
- * 2. Determine if we're minifying or magnifying
- * 3. If minifying, choose mipmap levels
- * 4. Return image filter to use within mipmap images
- * \param level0  Returns first mipmap level to sample from
- * \param level1  Returns second mipmap level to sample from
- * \param levelBlend  Returns blend factor between levels, in [0,1]
- * \param imgFilter  Returns either the min or mag filter, depending on lambda
- */
-static void
-choose_mipmap_levels(struct tgsi_sampler *tgsi_sampler,
-                     const float s[QUAD_SIZE],
-                     const float t[QUAD_SIZE],
-                     const float p[QUAD_SIZE],
-                     float lodbias,
-                     unsigned *level0, unsigned *level1, float *levelBlend,
-                     unsigned *imgFilter)
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   const struct pipe_sampler_state *sampler = samp->sampler;
-
-   if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
-      /* no mipmap selection needed */
-      *level0 = *level1 = CLAMP((int) sampler->min_lod,
-                                0, (int) texture->last_level);
-
-      if (sampler->min_img_filter != sampler->mag_img_filter) {
-         /* non-mipmapped texture, but still need to determine if doing
-          * minification or magnification.
-          */
-         float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias);
-         if (lambda <= 0.0) {
-            *imgFilter = sampler->mag_img_filter;
-         }
-         else {
-            *imgFilter = sampler->min_img_filter;
-         }
-      }
-      else {
-         *imgFilter = sampler->mag_img_filter;
-      }
-   }
-   else {
-      float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias);
-
-      if (lambda <= 0.0) { /* XXX threshold depends on the filter */
-         /* magnifying */
-         *imgFilter = sampler->mag_img_filter;
-         *level0 = *level1 = 0;
-      }
-      else {
-         /* minifying */
-         *imgFilter = sampler->min_img_filter;
-
-         /* choose mipmap level(s) and compute the blend factor between them */
-         if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
-            /* Nearest mipmap level */
-            const int lvl = (int) (lambda + 0.5);
-            *level0 =
-            *level1 = CLAMP(lvl, 0, (int) texture->last_level);
-         }
-         else {
-            /* Linear interpolation between mipmap levels */
-            const int lvl = (int) lambda;
-            *level0 = CLAMP(lvl,     0, (int) texture->last_level);
-            *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level);
-            *levelBlend = FRAC(lambda);  /* blending weight between levels */
-         }
-      }
-   }
-}
-
-
-/**
- * Get a texel from a texture, using the texture tile cache.
- *
- * \param face  the cube face in 0..5
- * \param level  the mipmap level
- * \param x  the x coord of texel within 2D image
- * \param y  the y coord of texel within 2D image
- * \param z  which slice of a 3D texture
- * \param rgba  the quad to put the texel/color into
- * \param j  which element of the rgba quad to write to
- *
- * XXX maybe move this into lp_tile_cache.c and merge with the
- * lp_get_cached_tile_tex() function.  Also, get 4 texels instead of 1...
- */
-static void
-get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler,
-                  unsigned face, unsigned level, int x, int y, 
-                  const uint8_t *out[4])
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-
-   const struct llvmpipe_cached_tex_tile *tile
-      = lp_get_cached_tex_tile(samp->cache,
-                               tex_tile_address(x, y, 0, face, level));
-
-   y %= TEX_TILE_SIZE;
-   x %= TEX_TILE_SIZE;
-      
-   out[0] = &tile->color[y  ][x  ][0];
-   out[1] = &tile->color[y  ][x+1][0];
-   out[2] = &tile->color[y+1][x  ][0];
-   out[3] = &tile->color[y+1][x+1][0];
-}
-
-static INLINE const uint8_t *
-get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler,
-                 unsigned face, unsigned level, int x, int y)
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-
-   const struct llvmpipe_cached_tex_tile *tile
-      = lp_get_cached_tex_tile(samp->cache,
-                               tex_tile_address(x, y, 0, face, level));
-
-   y %= TEX_TILE_SIZE;
-   x %= TEX_TILE_SIZE;
-
-   return &tile->color[y][x][0];
-}
-
-
-static void
-get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler,
-                     unsigned face, unsigned level, 
-                     int x0, int y0, 
-                     int x1, int y1,
-                     const uint8_t *out[4])
-{
-   unsigned i;
-
-   for (i = 0; i < 4; i++) {
-      unsigned tx = (i & 1) ? x1 : x0;
-      unsigned ty = (i >> 1) ? y1 : y0;
-
-      out[i] = get_texel_2d_ptr( tgsi_sampler, face, level, tx, ty );
-   }
-}
-
-static void
-get_texel(const struct tgsi_sampler *tgsi_sampler,
-                 unsigned face, unsigned level, int x, int y, int z,
-                 float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j)
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   const struct pipe_sampler_state *sampler = samp->sampler;
-
-   if (x < 0 || x >= (int) texture->width[level] ||
-       y < 0 || y >= (int) texture->height[level] ||
-       z < 0 || z >= (int) texture->depth[level]) {
-      rgba[0][j] = sampler->border_color[0];
-      rgba[1][j] = sampler->border_color[1];
-      rgba[2][j] = sampler->border_color[2];
-      rgba[3][j] = sampler->border_color[3];
-   }
-   else {
-      const unsigned tx = x % TEX_TILE_SIZE;
-      const unsigned ty = y % TEX_TILE_SIZE;
-      const struct llvmpipe_cached_tex_tile *tile;
-
-      tile = lp_get_cached_tex_tile(samp->cache,
-                                    tex_tile_address(x, y, z, face, level));
-
-      rgba[0][j] = ubyte_to_float(tile->color[ty][tx][0]);
-      rgba[1][j] = ubyte_to_float(tile->color[ty][tx][1]);
-      rgba[2][j] = ubyte_to_float(tile->color[ty][tx][2]);
-      rgba[3][j] = ubyte_to_float(tile->color[ty][tx][3]);
-      if (0)
-      {
-         debug_printf("Get texel %f %f %f %f from %s\n",
-                      rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j],
-                      pf_name(texture->format));
-      }
-   }
-}
-
-
-/**
- * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
- * When we sampled the depth texture, the depth value was put into all
- * RGBA channels.  We look at the red channel here.
- * \param rgba  quad of (depth) texel values
- * \param p  texture 'P' components for four pixels in quad
- * \param j  which pixel in the quad to test [0..3]
- */
-static INLINE void
-shadow_compare(const struct pipe_sampler_state *sampler,
-               float rgba[NUM_CHANNELS][QUAD_SIZE],
-               const float p[QUAD_SIZE],
-               uint j)
-{
-   int k;
-   switch (sampler->compare_func) {
-   case PIPE_FUNC_LESS:
-      k = p[j] < rgba[0][j];
-      break;
-   case PIPE_FUNC_LEQUAL:
-      k = p[j] <= rgba[0][j];
-      break;
-   case PIPE_FUNC_GREATER:
-      k = p[j] > rgba[0][j];
-      break;
-   case PIPE_FUNC_GEQUAL:
-      k = p[j] >= rgba[0][j];
-      break;
-   case PIPE_FUNC_EQUAL:
-      k = p[j] == rgba[0][j];
-      break;
-   case PIPE_FUNC_NOTEQUAL:
-      k = p[j] != rgba[0][j];
-      break;
-   case PIPE_FUNC_ALWAYS:
-      k = 1;
-      break;
-   case PIPE_FUNC_NEVER:
-      k = 0;
-      break;
-   default:
-      k = 0;
-      assert(0);
-      break;
-   }
-
-   /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
-   rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k;
-   rgba[3][j] = 1.0F;
-}
-
-
-/**
- * As above, but do four z/texture comparisons.
- */
-static INLINE void
-shadow_compare4(const struct pipe_sampler_state *sampler,
-                float rgba[NUM_CHANNELS][QUAD_SIZE],
-                const float p[QUAD_SIZE])
-{
-   int j, k0, k1, k2, k3;
-   float val;
-
-   /* compare four texcoords vs. four texture samples */
-   switch (sampler->compare_func) {
-   case PIPE_FUNC_LESS:
-      k0 = p[0] < rgba[0][0];
-      k1 = p[1] < rgba[0][1];
-      k2 = p[2] < rgba[0][2];
-      k3 = p[3] < rgba[0][3];
-      break;
-   case PIPE_FUNC_LEQUAL:
-      k0 = p[0] <= rgba[0][0];
-      k1 = p[1] <= rgba[0][1];
-      k2 = p[2] <= rgba[0][2];
-      k3 = p[3] <= rgba[0][3];
-      break;
-   case PIPE_FUNC_GREATER:
-      k0 = p[0] > rgba[0][0];
-      k1 = p[1] > rgba[0][1];
-      k2 = p[2] > rgba[0][2];
-      k3 = p[3] > rgba[0][3];
-      break;
-   case PIPE_FUNC_GEQUAL:
-      k0 = p[0] >= rgba[0][0];
-      k1 = p[1] >= rgba[0][1];
-      k2 = p[2] >= rgba[0][2];
-      k3 = p[3] >= rgba[0][3];
-      break;
-   case PIPE_FUNC_EQUAL:
-      k0 = p[0] == rgba[0][0];
-      k1 = p[1] == rgba[0][1];
-      k2 = p[2] == rgba[0][2];
-      k3 = p[3] == rgba[0][3];
-      break;
-   case PIPE_FUNC_NOTEQUAL:
-      k0 = p[0] != rgba[0][0];
-      k1 = p[1] != rgba[0][1];
-      k2 = p[2] != rgba[0][2];
-      k3 = p[3] != rgba[0][3];
-      break;
-   case PIPE_FUNC_ALWAYS:
-      k0 = k1 = k2 = k3 = 1;
-      break;
-   case PIPE_FUNC_NEVER:
-      k0 = k1 = k2 = k3 = 0;
-      break;
-   default:
-      k0 = k1 = k2 = k3 = 0;
-      assert(0);
-      break;
-   }
-
-   /* convert four pass/fail values to an intensity in [0,1] */
-   val = 0.25F * (k0 + k1 + k2 + k3);
-
-   /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
-   for (j = 0; j < 4; j++) {
-      rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
-      rgba[3][j] = 1.0F;
-   }
-}
-
-
-
-static void
-lp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
-                                    const float s[QUAD_SIZE],
-                                    const float t[QUAD_SIZE],
-                                    const float p[QUAD_SIZE],
-                                    float lodbias,
-                                    float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   unsigned  j;
-   unsigned level = samp->level;
-   unsigned xpot = 1 << (samp->xpot - level);
-   unsigned ypot = 1 << (samp->ypot - level);
-   unsigned xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
-   unsigned ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
-      
-   for (j = 0; j < QUAD_SIZE; j++) {
-      int c;
-
-      float u = s[j] * xpot - 0.5F;
-      float v = t[j] * ypot - 0.5F;
-
-      int uflr = util_ifloor(u);
-      int vflr = util_ifloor(v);
-
-      float xw = u - (float)uflr;
-      float yw = v - (float)vflr;
-
-      int x0 = uflr & (xpot - 1);
-      int y0 = vflr & (ypot - 1);
-
-      const uint8_t *tx[4];
-      
-
-      /* Can we fetch all four at once:
-       */
-      if (x0 < xmax && y0 < ymax)
-      {
-         get_texel_quad_2d(tgsi_sampler, 0, level, x0, y0, tx);
-      }
-      else 
-      {
-         unsigned x1 = (x0 + 1) & (xpot - 1);
-         unsigned y1 = (y0 + 1) & (ypot - 1);
-         get_texel_quad_2d_mt(tgsi_sampler, 0, level, 
-                              x0, y0, x1, y1, tx);
-      }
-
-
-      /* interpolate R, G, B, A */
-      for (c = 0; c < 4; c++) {
-         rgba[c][j] = lerp_2d(xw, yw, 
-                              ubyte_to_float(tx[0][c]), ubyte_to_float(tx[1][c]),
-                              ubyte_to_float(tx[2][c]), ubyte_to_float(tx[3][c]));
-      }
-   }
-}
-
-
-static void
-lp_get_samples_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
-                                     const float s[QUAD_SIZE],
-                                     const float t[QUAD_SIZE],
-                                     const float p[QUAD_SIZE],
-                                     float lodbias,
-                                     float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   unsigned  j;
-   unsigned level = samp->level;
-   unsigned xpot = 1 << (samp->xpot - level);
-   unsigned ypot = 1 << (samp->ypot - level);
-
-   for (j = 0; j < QUAD_SIZE; j++) {
-      int c;
-
-      float u = s[j] * xpot;
-      float v = t[j] * ypot;
-
-      int uflr = util_ifloor(u);
-      int vflr = util_ifloor(v);
-
-      int x0 = uflr & (xpot - 1);
-      int y0 = vflr & (ypot - 1);
-
-      const uint8_t *out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0);
-
-      for (c = 0; c < 4; c++) {
-         rgba[c][j] = ubyte_to_float(out[c]);
-      }
-   }
-}
-
-
-static void
-lp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
-                                     const float s[QUAD_SIZE],
-                                     const float t[QUAD_SIZE],
-                                     const float p[QUAD_SIZE],
-                                     float lodbias,
-                                     float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   unsigned  j;
-   unsigned level = samp->level;
-   unsigned xpot = 1 << (samp->xpot - level);
-   unsigned ypot = 1 << (samp->ypot - level);
-
-   for (j = 0; j < QUAD_SIZE; j++) {
-      int c;
-
-      float u = s[j] * xpot;
-      float v = t[j] * ypot;
-
-      int x0, y0;
-      const uint8_t *out;
-
-      x0 = util_ifloor(u);
-      if (x0 < 0) 
-         x0 = 0;
-      else if (x0 > xpot - 1)
-         x0 = xpot - 1;
-
-      y0 = util_ifloor(v);
-      if (y0 < 0) 
-         y0 = 0;
-      else if (y0 > ypot - 1)
-         y0 = ypot - 1;
-      
-      out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0);
-
-      for (c = 0; c < 4; c++) {
-         rgba[c][j] = ubyte_to_float(out[c]);
-      }
-   }
-}
-
-
-static void
-lp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
-                                               const float s[QUAD_SIZE],
-                                               const float t[QUAD_SIZE],
-                                               const float p[QUAD_SIZE],
-                                               float lodbias,
-                                               float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   int level0;
-   float lambda;
-
-   lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias);
-   level0 = (int)lambda;
-
-   if (lambda < 0.0) { 
-      samp->level = 0;
-      lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
-                                           s, t, p, 0, rgba );
-   }
-   else if (level0 >= texture->last_level) {
-      samp->level = texture->last_level;
-      lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
-                                           s, t, p, 0, rgba );
-   }
-   else {
-      float levelBlend = lambda - level0;
-      float rgba0[4][4];
-      float rgba1[4][4];
-      int c,j;
-
-      samp->level = level0;
-      lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
-                                           s, t, p, 0, rgba0 );
-
-      samp->level = level0+1;
-      lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
-                                           s, t, p, 0, rgba1 );
-
-      for (j = 0; j < QUAD_SIZE; j++) {
-         for (c = 0; c < 4; c++) {
-            rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
-         }
-      }
-   }
-}
-
-/**
- * Common code for sampling 1D/2D/cube textures.
- * Could probably extend for 3D...
- */
-static void
-lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler,
-                         const float s[QUAD_SIZE],
-                         const float t[QUAD_SIZE],
-                         const float p[QUAD_SIZE],
-                         float lodbias,
-                         float rgba[NUM_CHANNELS][QUAD_SIZE],
-                         const unsigned faces[4])
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   const struct pipe_sampler_state *sampler = samp->sampler;
-   unsigned level0, level1, j, imgFilter;
-   int width, height;
-   float levelBlend;
-
-   choose_mipmap_levels(tgsi_sampler, s, t, p, 
-                        lodbias,
-                        &level0, &level1, &levelBlend, &imgFilter);
-
-   assert(sampler->normalized_coords);
-
-   width = texture->width[level0];
-   height = texture->height[level0];
-
-   assert(width > 0);
-
-   switch (imgFilter) {
-   case PIPE_TEX_FILTER_NEAREST:
-      {
-         int x[4], y[4];
-         nearest_texcoord_4(sampler->wrap_s, s, width, x);
-         nearest_texcoord_4(sampler->wrap_t, t, height, y);
-
-         for (j = 0; j < QUAD_SIZE; j++) {
-            get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j);
-            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
-               shadow_compare(sampler, rgba, p, j);
-            }
-
-            if (level0 != level1) {
-               /* get texels from second mipmap level and blend */
-               float rgba2[4][4];
-               unsigned c;
-               x[j] /= 2;
-               y[j] /= 2;
-               get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0,
-                         rgba2, j);
-               if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
-                  shadow_compare(sampler, rgba2, p, j);
-               }
-
-               for (c = 0; c < NUM_CHANNELS; c++) {
-                  rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
-               }
-            }
-         }
-      }
-      break;
-   case PIPE_TEX_FILTER_LINEAR:
-   case PIPE_TEX_FILTER_ANISO:
-      {
-         int x0[4], y0[4], x1[4], y1[4];
-         float xw[4], yw[4]; /* weights */
-
-         linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw);
-         linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
-
-         for (j = 0; j < QUAD_SIZE; j++) {
-            float tx[4][4]; /* texels */
-            int c;
-            get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0);
-            get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1);
-            get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2);
-            get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3);
-            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
-               shadow_compare4(sampler, tx, p);
-            }
-
-            /* interpolate R, G, B, A */
-            for (c = 0; c < 4; c++) {
-               rgba[c][j] = lerp_2d(xw[j], yw[j],
-                                    tx[c][0], tx[c][1],
-                                    tx[c][2], tx[c][3]);
-            }
-
-            if (level0 != level1) {
-               /* get texels from second mipmap level and blend */
-               float rgba2[4][4];
-
-               /* XXX: This is incorrect -- will often end up with (x0
-                *  == x1 && y0 == y1), meaning that we fetch the same
-                *  texel four times and linearly interpolate between
-                *  identical values.  The correct approach would be to
-                *  call linear_texcoord again for the second level.
-                */
-               x0[j] /= 2;
-               y0[j] /= 2;
-               x1[j] /= 2;
-               y1[j] /= 2;
-               get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0);
-               get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1);
-               get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2);
-               get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3);
-               if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
-                  shadow_compare4(sampler, tx, p);
-               }
-
-               /* interpolate R, G, B, A */
-               for (c = 0; c < 4; c++) {
-                  rgba2[c][j] = lerp_2d(xw[j], yw[j],
-                                        tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
-               }
-
-               for (c = 0; c < NUM_CHANNELS; c++) {
-                  rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
-               }
-            }
-         }
-      }
-      break;
-   default:
-      assert(0);
-   }
-}
-
-
-static INLINE void
-lp_get_samples_1d(struct tgsi_sampler *sampler,
-                  const float s[QUAD_SIZE],
-                  const float t[QUAD_SIZE],
-                  const float p[QUAD_SIZE],
-                  float lodbias,
-                  float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   static const unsigned faces[4] = {0, 0, 0, 0};
-   static const float tzero[4] = {0, 0, 0, 0};
-   lp_get_samples_2d_common(sampler, s, tzero, NULL,
-                            lodbias, rgba, faces);
-}
-
-
-static INLINE void
-lp_get_samples_2d(struct tgsi_sampler *sampler,
-                  const float s[QUAD_SIZE],
-                  const float t[QUAD_SIZE],
-                  const float p[QUAD_SIZE],
-                  float lodbias,
-                  float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   static const unsigned faces[4] = {0, 0, 0, 0};
-   lp_get_samples_2d_common(sampler, s, t, p,
-                            lodbias, rgba, faces);
-}
-
-
-static INLINE void
-lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler,
-                  const float s[QUAD_SIZE],
-                  const float t[QUAD_SIZE],
-                  const float p[QUAD_SIZE],
-                  float lodbias,
-                  float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   const struct pipe_sampler_state *sampler = samp->sampler;
-   /* get/map pipe_surfaces corresponding to 3D tex slices */
-   unsigned level0, level1, j, imgFilter;
-   int width, height, depth;
-   float levelBlend;
-   const uint face = 0;
-
-   choose_mipmap_levels(tgsi_sampler, s, t, p, 
-                        lodbias,
-                        &level0, &level1, &levelBlend, &imgFilter);
-
-   assert(sampler->normalized_coords);
-
-   width = texture->width[level0];
-   height = texture->height[level0];
-   depth = texture->depth[level0];
-
-   assert(width > 0);
-   assert(height > 0);
-   assert(depth > 0);
-
-   switch (imgFilter) {
-   case PIPE_TEX_FILTER_NEAREST:
-      {
-         int x[4], y[4], z[4];
-         nearest_texcoord_4(sampler->wrap_s, s, width, x);
-         nearest_texcoord_4(sampler->wrap_t, t, height, y);
-         nearest_texcoord_4(sampler->wrap_r, p, depth, z);
-         for (j = 0; j < QUAD_SIZE; j++) {
-            get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j);
-            if (level0 != level1) {
-               /* get texels from second mipmap level and blend */
-               float rgba2[4][4];
-               unsigned c;
-               x[j] /= 2;
-               y[j] /= 2;
-               z[j] /= 2;
-               get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j);
-               for (c = 0; c < NUM_CHANNELS; c++) {
-                  rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]);
-               }
-            }
-         }
-      }
-      break;
-   case PIPE_TEX_FILTER_LINEAR:
-   case PIPE_TEX_FILTER_ANISO:
-      {
-         int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
-         float xw[4], yw[4], zw[4]; /* interpolation weights */
-         linear_texcoord_4(sampler->wrap_s, s, width,  x0, x1, xw);
-         linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
-         linear_texcoord_4(sampler->wrap_r, p, depth,  z0, z1, zw);
-
-         for (j = 0; j < QUAD_SIZE; j++) {
-            int c;
-            float tx0[4][4], tx1[4][4];
-            get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0);
-            get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1);
-            get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2);
-            get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3);
-            get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0);
-            get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1);
-            get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2);
-            get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3);
-
-            /* interpolate R, G, B, A */
-            for (c = 0; c < 4; c++) {
-               rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j],
-                                    tx0[c][0], tx0[c][1],
-                                    tx0[c][2], tx0[c][3],
-                                    tx1[c][0], tx1[c][1],
-                                    tx1[c][2], tx1[c][3]);
-            }
-
-            if (level0 != level1) {
-               /* get texels from second mipmap level and blend */
-               float rgba2[4][4];
-               x0[j] /= 2;
-               y0[j] /= 2;
-               z0[j] /= 2;
-               x1[j] /= 2;
-               y1[j] /= 2;
-               z1[j] /= 2;
-               get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0);
-               get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1);
-               get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2);
-               get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3);
-               get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0);
-               get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1);
-               get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2);
-               get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3);
-
-               /* interpolate R, G, B, A */
-               for (c = 0; c < 4; c++) {
-                  rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j],
-                                        tx0[c][0], tx0[c][1],
-                                        tx0[c][2], tx0[c][3],
-                                        tx1[c][0], tx1[c][1],
-                                        tx1[c][2], tx1[c][3]);
-               }
-
-               /* blend mipmap levels */
-               for (c = 0; c < NUM_CHANNELS; c++) {
-                  rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
-               }
-            }
-         }
-      }
-      break;
-   default:
-      assert(0);
-   }
-}
-
-
-static void
-lp_get_samples_cube(struct tgsi_sampler *sampler,
-                    const float s[QUAD_SIZE],
-                    const float t[QUAD_SIZE],
-                    const float p[QUAD_SIZE],
-                    float lodbias,
-                    float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   unsigned faces[QUAD_SIZE], j;
-   float ssss[4], tttt[4];
-   for (j = 0; j < QUAD_SIZE; j++) {
-      faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j);
-   }
-   lp_get_samples_2d_common(sampler, ssss, tttt, NULL,
-                            lodbias, rgba, faces);
-}
-
-
-static void
-lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler,
-                    const float s[QUAD_SIZE],
-                    const float t[QUAD_SIZE],
-                    const float p[QUAD_SIZE],
-                    float lodbias,
-                    float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   const struct pipe_sampler_state *sampler = samp->sampler;
-   const uint face = 0;
-   unsigned level0, level1, j, imgFilter;
-   int width, height;
-   float levelBlend;
-
-   choose_mipmap_levels(tgsi_sampler, s, t, p, 
-                        lodbias,
-                        &level0, &level1, &levelBlend, &imgFilter);
-
-   /* texture RECTS cannot be mipmapped */
-   assert(level0 == level1);
-
-   width = texture->width[level0];
-   height = texture->height[level0];
-
-   assert(width > 0);
-
-   switch (imgFilter) {
-   case PIPE_TEX_FILTER_NEAREST:
-      {
-         int x[4], y[4];
-         nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x);
-         nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y);
-         for (j = 0; j < QUAD_SIZE; j++) {
-            get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j);
-            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
-               shadow_compare(sampler, rgba, p, j);
-            }
-         }
-      }
-      break;
-   case PIPE_TEX_FILTER_LINEAR:
-   case PIPE_TEX_FILTER_ANISO:
-      {
-         int x0[4], y0[4], x1[4], y1[4];
-         float xw[4], yw[4]; /* weights */
-         linear_texcoord_unnorm_4(sampler->wrap_s, s, width,  x0, x1, xw);
-         linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw);
-         for (j = 0; j < QUAD_SIZE; j++) {
-            float tx[4][4]; /* texels */
-            int c;
-            get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0);
-            get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1);
-            get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2);
-            get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3);
-            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
-               shadow_compare4(sampler, tx, p);
-            }
-            for (c = 0; c < 4; c++) {
-               rgba[c][j] = lerp_2d(xw[j], yw[j],
-                                    tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
-            }
-         }
-      }
-      break;
-   default:
-      assert(0);
-   }
-}
-
-
-/**
- * Error condition handler
- */
-static INLINE void
-lp_get_samples_null(struct tgsi_sampler *tgsi_sampler,
-                    const float s[QUAD_SIZE],
-                    const float t[QUAD_SIZE],
-                    const float p[QUAD_SIZE],
-                    float lodbias,
-                    float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   int i,j;
-
-   for (i = 0; i < 4; i++)
-      for (j = 0; j < 4; j++)
-         rgba[i][j] = 1.0;
-}
-
-/**
- * Called via tgsi_sampler::get_samples() when using a sampler for the
- * first time.  Determine the actual sampler function, link it in and
- * call it.
- */
-void
-lp_get_samples(struct tgsi_sampler *tgsi_sampler,
-               const float s[QUAD_SIZE],
-               const float t[QUAD_SIZE],
-               const float p[QUAD_SIZE],
-               float lodbias,
-               float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   const struct pipe_sampler_state *sampler = samp->sampler;
-
-   /* Default to the 'undefined' case:
-    */
-   tgsi_sampler->get_samples = lp_get_samples_null;
-
-   if (!texture) {
-      assert(0);                /* is this legal?? */
-      goto out;
-   }
-
-   if (!sampler->normalized_coords) {
-      assert (texture->target == PIPE_TEXTURE_2D);
-      tgsi_sampler->get_samples = lp_get_samples_rect;
-      goto out;
-   }
-
-   switch (texture->target) {
-   case PIPE_TEXTURE_1D:
-      tgsi_sampler->get_samples = lp_get_samples_1d;
-      break;
-   case PIPE_TEXTURE_2D:
-      tgsi_sampler->get_samples = lp_get_samples_2d;
-      break;
-   case PIPE_TEXTURE_3D:
-      tgsi_sampler->get_samples = lp_get_samples_3d;
-      break;
-   case PIPE_TEXTURE_CUBE:
-      tgsi_sampler->get_samples = lp_get_samples_cube;
-      break;
-   default:
-      assert(0);
-      break;
-   }
-
-   /* Do this elsewhere: 
-    */
-   samp->xpot = util_unsigned_logbase2( samp->texture->width[0] );
-   samp->ypot = util_unsigned_logbase2( samp->texture->height[0] );
-
-   /* Try to hook in a faster sampler.  Ultimately we'll have to
-    * code-generate these.  Luckily most of this looks like it is
-    * orthogonal state within the sampler.
-    */
-   if (texture->target == PIPE_TEXTURE_2D &&
-       sampler->min_img_filter == sampler->mag_img_filter &&
-       sampler->wrap_s == sampler->wrap_t &&
-       sampler->compare_mode == FALSE &&
-       sampler->normalized_coords) 
-   {
-      if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
-         samp->level = CLAMP((int) sampler->min_lod,
-                             0, (int) texture->last_level);
-
-         if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) {
-            switch (sampler->min_img_filter) {
-            case PIPE_TEX_FILTER_NEAREST:
-               tgsi_sampler->get_samples = lp_get_samples_2d_nearest_repeat_POT;
-               break;
-            case PIPE_TEX_FILTER_LINEAR:
-               tgsi_sampler->get_samples = lp_get_samples_2d_linear_repeat_POT;
-               break;
-            default:
-               break;
-            }
-         } 
-         else if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP) {
-            switch (sampler->min_img_filter) {
-            case PIPE_TEX_FILTER_NEAREST:
-               tgsi_sampler->get_samples = lp_get_samples_2d_nearest_clamp_POT;
-               break;
-            default:
-               break;
-            }
-         }
-      }
-      else if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
-         if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) {
-            switch (sampler->min_img_filter) {
-            case PIPE_TEX_FILTER_LINEAR:
-               tgsi_sampler->get_samples = lp_get_samples_2d_linear_mip_linear_repeat_POT;
-               break;
-            default:
-               break;
-            }
-         } 
-      }
-   }
-   else if (0) {
-      _debug_printf("target %d/%d min_mip %d/%d min_img %d/%d wrap %d/%d compare %d/%d norm %d/%d\n",
-                    texture->target, PIPE_TEXTURE_2D,
-                    sampler->min_mip_filter, PIPE_TEX_MIPFILTER_NONE,
-                    sampler->min_img_filter, sampler->mag_img_filter,
-                    sampler->wrap_s, sampler->wrap_t,
-                    sampler->compare_mode, FALSE,
-                    sampler->normalized_coords, TRUE);
-   }
-
-out:
-   tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba );
-}
-
-
-void PIPE_CDECL
-lp_fetch_texel_soa( struct tgsi_sampler **samplers,
-                    uint32_t unit,
-                    float *store )
-{
-   struct tgsi_sampler *sampler = samplers[unit];
-
-#if 0
-   uint j;
-
-   debug_printf("%s sampler: %p (%p) store: %p\n",
-                __FUNCTION__,
-                sampler, *sampler,
-                store );
-
-   debug_printf("lodbias %f\n", store[12]);
-
-   for (j = 0; j < 4; j++)
-      debug_printf("sample %d texcoord %f %f\n",
-                   j,
-                   store[0+j],
-                   store[4+j]);
-#endif
-
-   {
-      float rgba[NUM_CHANNELS][QUAD_SIZE];
-      sampler->get_samples(sampler,
-                           &store[0],
-                           &store[4],
-                           &store[8],
-                           0.0f, /*store[12],  lodbias */
-                           rgba);
-      memcpy(store, rgba, sizeof rgba);
-   }
-
-#if 0
-   for (j = 0; j < 4; j++)
-      debug_printf("sample %d result %f %f %f %f\n",
-                   j,
-                   store[0+j],
-                   store[4+j],
-                   store[8+j],
-                   store[12+j]);
-#endif
-}
-
-
-#include "lp_bld_type.h"
-#include "lp_bld_intr.h"
-#include "lp_bld_tgsi.h"
-
-
-struct lp_c_sampler_soa
-{
-   struct lp_build_sampler_soa base;
-
-   LLVMValueRef context_ptr;
-
-   LLVMValueRef samplers_ptr;
-
-   /** Coords/texels store */
-   LLVMValueRef store_ptr;
-};
-
-
-static void
-lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
-{
-   FREE(sampler);
-}
-
-
-static void
-lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler,
-                                  LLVMBuilderRef builder,
-                                  union lp_type type,
-                                  unsigned unit,
-                                  unsigned num_coords,
-                                  const LLVMValueRef *coords,
-                                  LLVMValueRef lodbias,
-                                  LLVMValueRef *texel)
-{
-   struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler;
-   LLVMTypeRef vec_type = LLVMTypeOf(coords[0]);
-   LLVMValueRef args[3];
-   unsigned i;
-
-   if(!sampler->samplers_ptr)
-      sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr);
-
-   if(!sampler->store_ptr)
-      sampler->store_ptr = LLVMBuildArrayAlloca(builder,
-                                            vec_type,
-                                            LLVMConstInt(LLVMInt32Type(), 4, 0),
-                                            "texel_store");
-
-   for (i = 0; i < num_coords; i++) {
-      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
-      LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, "");
-      LLVMBuildStore(builder, coords[i], coord_ptr);
-   }
-
-   args[0] = sampler->samplers_ptr;
-   args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
-   args[2] = sampler->store_ptr;
-
-   lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3);
-
-   for (i = 0; i < NUM_CHANNELS; ++i) {
-      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
-      LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, "");
-      texel[i] = LLVMBuildLoad(builder, texel_ptr, "");
-   }
-}
-
-
-struct lp_build_sampler_soa *
-lp_c_sampler_soa_create(LLVMValueRef context_ptr)
-{
-   struct lp_c_sampler_soa *sampler;
-
-   sampler = CALLOC_STRUCT(lp_c_sampler_soa);
-   if(!sampler)
-      return NULL;
-
-   sampler->base.destroy = lp_c_sampler_soa_destroy;
-   sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel;
-   sampler->context_ptr = context_ptr;
-
-   return &sampler->base;
-}
-
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
index 7d1e565885..9ad1bde956 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
@@ -35,6 +35,7 @@
 
 
 struct llvmpipe_tex_tile_cache;
+struct lp_sampler_static_state;
 
 
 /**
@@ -87,4 +88,14 @@ struct lp_build_sampler_soa *
 lp_c_sampler_soa_create(LLVMValueRef context_ptr);
 
 
+/**
+ * Pure-LLVM texture sampling code generator.
+ *
+ * @param context_ptr LLVM value with the pointer to the struct lp_jit_context.
+ */
+struct lp_build_sampler_soa *
+lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *key,
+                           LLVMValueRef context_ptr);
+
+
 #endif /* LP_TEX_SAMPLE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
new file mode 100644
index 0000000000..9a876f404d
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
@@ -0,0 +1,1713 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * Copyright 2008 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/**
+ * Texture sampling
+ *
+ * Authors:
+ *   Brian Paul
+ */
+
+#include "lp_context.h"
+#include "lp_quad.h"
+#include "lp_surface.h"
+#include "lp_texture.h"
+#include "lp_tex_sample.h"
+#include "lp_tex_cache.h"
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+
+
+/*
+ * Note, the FRAC macro has to work perfectly.  Otherwise you'll sometimes
+ * see 1-pixel bands of improperly weighted linear-filtered textures.
+ * The tests/texwrap.c demo is a good test.
+ * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0.
+ * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x).
+ */
+#define FRAC(f)  ((f) - util_ifloor(f))
+
+
+/**
+ * Linear interpolation macro
+ */
+static INLINE float
+lerp(float a, float v0, float v1)
+{
+   return v0 + a * (v1 - v0);
+}
+
+
+/**
+ * Do 2D/biliner interpolation of float values.
+ * v00, v10, v01 and v11 are typically four texture samples in a square/box.
+ * a and b are the horizontal and vertical interpolants.
+ * It's important that this function is inlined when compiled with
+ * optimization!  If we find that's not true on some systems, convert
+ * to a macro.
+ */
+static INLINE float
+lerp_2d(float a, float b,
+        float v00, float v10, float v01, float v11)
+{
+   const float temp0 = lerp(a, v00, v10);
+   const float temp1 = lerp(a, v01, v11);
+   return lerp(b, temp0, temp1);
+}
+
+
+/**
+ * As above, but 3D interpolation of 8 values.
+ */
+static INLINE float
+lerp_3d(float a, float b, float c,
+        float v000, float v100, float v010, float v110,
+        float v001, float v101, float v011, float v111)
+{
+   const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
+   const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
+   return lerp(c, temp0, temp1);
+}
+
+
+
+/**
+ * If A is a signed integer, A % B doesn't give the right value for A < 0
+ * (in terms of texture repeat).  Just casting to unsigned fixes that.
+ */
+#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B))
+
+
+/**
+ * Apply texture coord wrapping mode and return integer texture indexes
+ * for a vector of four texcoords (S or T or P).
+ * \param wrapMode  PIPE_TEX_WRAP_x
+ * \param s  the incoming texcoords
+ * \param size  the texture image size
+ * \param icoord  returns the integer texcoords
+ * \return  integer texture index
+ */
+static INLINE void
+nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
+                   int icoord[4])
+{
+   uint ch;
+   switch (wrapMode) {
+   case PIPE_TEX_WRAP_REPEAT:
+      /* s limited to [0,1) */
+      /* i limited to [0,size-1] */
+      for (ch = 0; ch < 4; ch++) {
+         int i = util_ifloor(s[ch] * size);
+         icoord[ch] = REMAINDER(i, size);
+      }
+      return;
+   case PIPE_TEX_WRAP_CLAMP:
+      /* s limited to [0,1] */
+      /* i limited to [0,size-1] */
+      for (ch = 0; ch < 4; ch++) {
+         if (s[ch] <= 0.0F)
+            icoord[ch] = 0;
+         else if (s[ch] >= 1.0F)
+            icoord[ch] = size - 1;
+         else
+            icoord[ch] = util_ifloor(s[ch] * size);
+      }
+      return;
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+      {
+         /* s limited to [min,max] */
+         /* i limited to [0, size-1] */
+         const float min = 1.0F / (2.0F * size);
+         const float max = 1.0F - min;
+         for (ch = 0; ch < 4; ch++) {
+            if (s[ch] < min)
+               icoord[ch] = 0;
+            else if (s[ch] > max)
+               icoord[ch] = size - 1;
+            else
+               icoord[ch] = util_ifloor(s[ch] * size);
+         }
+      }
+      return;
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+      {
+         /* s limited to [min,max] */
+         /* i limited to [-1, size] */
+         const float min = -1.0F / (2.0F * size);
+         const float max = 1.0F - min;
+         for (ch = 0; ch < 4; ch++) {
+            if (s[ch] <= min)
+               icoord[ch] = -1;
+            else if (s[ch] >= max)
+               icoord[ch] = size;
+            else
+               icoord[ch] = util_ifloor(s[ch] * size);
+         }
+      }
+      return;
+   case PIPE_TEX_WRAP_MIRROR_REPEAT:
+      {
+         const float min = 1.0F / (2.0F * size);
+         const float max = 1.0F - min;
+         for (ch = 0; ch < 4; ch++) {
+            const int flr = util_ifloor(s[ch]);
+            float u;
+            if (flr & 1)
+               u = 1.0F - (s[ch] - (float) flr);
+            else
+               u = s[ch] - (float) flr;
+            if (u < min)
+               icoord[ch] = 0;
+            else if (u > max)
+               icoord[ch] = size - 1;
+            else
+               icoord[ch] = util_ifloor(u * size);
+         }
+      }
+      return;
+   case PIPE_TEX_WRAP_MIRROR_CLAMP:
+      for (ch = 0; ch < 4; ch++) {
+         /* s limited to [0,1] */
+         /* i limited to [0,size-1] */
+         const float u = fabsf(s[ch]);
+         if (u <= 0.0F)
+            icoord[ch] = 0;
+         else if (u >= 1.0F)
+            icoord[ch] = size - 1;
+         else
+            icoord[ch] = util_ifloor(u * size);
+      }
+      return;
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+      {
+         /* s limited to [min,max] */
+         /* i limited to [0, size-1] */
+         const float min = 1.0F / (2.0F * size);
+         const float max = 1.0F - min;
+         for (ch = 0; ch < 4; ch++) {
+            const float u = fabsf(s[ch]);
+            if (u < min)
+               icoord[ch] = 0;
+            else if (u > max)
+               icoord[ch] = size - 1;
+            else
+               icoord[ch] = util_ifloor(u * size);
+         }
+      }
+      return;
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+      {
+         /* s limited to [min,max] */
+         /* i limited to [0, size-1] */
+         const float min = -1.0F / (2.0F * size);
+         const float max = 1.0F - min;
+         for (ch = 0; ch < 4; ch++) {
+            const float u = fabsf(s[ch]);
+            if (u < min)
+               icoord[ch] = -1;
+            else if (u > max)
+               icoord[ch] = size;
+            else
+               icoord[ch] = util_ifloor(u * size);
+         }
+      }
+      return;
+   default:
+      assert(0);
+   }
+}
+
+
+/**
+ * Used to compute texel locations for linear sampling for four texcoords.
+ * \param wrapMode  PIPE_TEX_WRAP_x
+ * \param s  the texcoords
+ * \param size  the texture image size
+ * \param icoord0  returns first texture indexes
+ * \param icoord1  returns second texture indexes (usually icoord0 + 1)
+ * \param w  returns blend factor/weight between texture indexes
+ * \param icoord  returns the computed integer texture coords
+ */
+static INLINE void
+linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
+                  int icoord0[4], int icoord1[4], float w[4])
+{
+   uint ch;
+
+   switch (wrapMode) {
+   case PIPE_TEX_WRAP_REPEAT:
+      for (ch = 0; ch < 4; ch++) {
+         float u = s[ch] * size - 0.5F;
+         icoord0[ch] = REMAINDER(util_ifloor(u), size);
+         icoord1[ch] = REMAINDER(icoord0[ch] + 1, size);
+         w[ch] = FRAC(u);
+      }
+      break;;
+   case PIPE_TEX_WRAP_CLAMP:
+      for (ch = 0; ch < 4; ch++) {
+         float u = CLAMP(s[ch], 0.0F, 1.0F);
+         u = u * size - 0.5f;
+         icoord0[ch] = util_ifloor(u);
+         icoord1[ch] = icoord0[ch] + 1;
+         w[ch] = FRAC(u);
+      }
+      break;;
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+      for (ch = 0; ch < 4; ch++) {
+         float u = CLAMP(s[ch], 0.0F, 1.0F);
+         u = u * size - 0.5f;
+         icoord0[ch] = util_ifloor(u);
+         icoord1[ch] = icoord0[ch] + 1;
+         if (icoord0[ch] < 0)
+            icoord0[ch] = 0;
+         if (icoord1[ch] >= (int) size)
+            icoord1[ch] = size - 1;
+         w[ch] = FRAC(u);
+      }
+      break;;
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+      {
+         const float min = -1.0F / (2.0F * size);
+         const float max = 1.0F - min;
+         for (ch = 0; ch < 4; ch++) {
+            float u = CLAMP(s[ch], min, max);
+            u = u * size - 0.5f;
+            icoord0[ch] = util_ifloor(u);
+            icoord1[ch] = icoord0[ch] + 1;
+            w[ch] = FRAC(u);
+         }
+      }
+      break;;
+   case PIPE_TEX_WRAP_MIRROR_REPEAT:
+      for (ch = 0; ch < 4; ch++) {
+         const int flr = util_ifloor(s[ch]);
+         float u;
+         if (flr & 1)
+            u = 1.0F - (s[ch] - (float) flr);
+         else
+            u = s[ch] - (float) flr;
+         u = u * size - 0.5F;
+         icoord0[ch] = util_ifloor(u);
+         icoord1[ch] = icoord0[ch] + 1;
+         if (icoord0[ch] < 0)
+            icoord0[ch] = 0;
+         if (icoord1[ch] >= (int) size)
+            icoord1[ch] = size - 1;
+         w[ch] = FRAC(u);
+      }
+      break;;
+   case PIPE_TEX_WRAP_MIRROR_CLAMP:
+      for (ch = 0; ch < 4; ch++) {
+         float u = fabsf(s[ch]);
+         if (u >= 1.0F)
+            u = (float) size;
+         else
+            u *= size;
+         u -= 0.5F;
+         icoord0[ch] = util_ifloor(u);
+         icoord1[ch] = icoord0[ch] + 1;
+         w[ch] = FRAC(u);
+      }
+      break;;
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+      for (ch = 0; ch < 4; ch++) {
+         float u = fabsf(s[ch]);
+         if (u >= 1.0F)
+            u = (float) size;
+         else
+            u *= size;
+         u -= 0.5F;
+         icoord0[ch] = util_ifloor(u);
+         icoord1[ch] = icoord0[ch] + 1;
+         if (icoord0[ch] < 0)
+            icoord0[ch] = 0;
+         if (icoord1[ch] >= (int) size)
+            icoord1[ch] = size - 1;
+         w[ch] = FRAC(u);
+      }
+      break;;
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+      {
+         const float min = -1.0F / (2.0F * size);
+         const float max = 1.0F - min;
+         for (ch = 0; ch < 4; ch++) {
+            float u = fabsf(s[ch]);
+            if (u <= min)
+               u = min * size;
+            else if (u >= max)
+               u = max * size;
+            else
+               u *= size;
+            u -= 0.5F;
+            icoord0[ch] = util_ifloor(u);
+            icoord1[ch] = icoord0[ch] + 1;
+            w[ch] = FRAC(u);
+         }
+      }
+      break;;
+   default:
+      assert(0);
+   }
+}
+
+
+/**
+ * For RECT textures / unnormalized texcoords
+ * Only a subset of wrap modes supported.
+ */
+static INLINE void
+nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
+                          int icoord[4])
+{
+   uint ch;
+   switch (wrapMode) {
+   case PIPE_TEX_WRAP_CLAMP:
+      for (ch = 0; ch < 4; ch++) {
+         int i = util_ifloor(s[ch]);
+         icoord[ch]= CLAMP(i, 0, (int) size-1);
+      }
+      return;
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+      /* fall-through */
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+      for (ch = 0; ch < 4; ch++) {
+         icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) );
+      }
+      return;
+   default:
+      assert(0);
+   }
+}
+
+
+/**
+ * For RECT textures / unnormalized texcoords.
+ * Only a subset of wrap modes supported.
+ */
+static INLINE void
+linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
+                         int icoord0[4], int icoord1[4], float w[4])
+{
+   uint ch;
+   switch (wrapMode) {
+   case PIPE_TEX_WRAP_CLAMP:
+      for (ch = 0; ch < 4; ch++) {
+         /* Not exactly what the spec says, but it matches NVIDIA output */
+         float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f);
+         icoord0[ch] = util_ifloor(u);
+         icoord1[ch] = icoord0[ch] + 1;
+         w[ch] = FRAC(u);
+      }
+      return;
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+      /* fall-through */
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+      for (ch = 0; ch < 4; ch++) {
+         float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F);
+         u -= 0.5F;
+         icoord0[ch] = util_ifloor(u);
+         icoord1[ch] = icoord0[ch] + 1;
+         if (icoord1[ch] > (int) size - 1)
+            icoord1[ch] = size - 1;
+         w[ch] = FRAC(u);
+      }
+      break;
+   default:
+      assert(0);
+   }
+}
+
+
+static unsigned
+choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
+{
+   /*
+      major axis
+      direction     target                             sc     tc    ma
+      ----------    -------------------------------    ---    ---   ---
+       +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
+       -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
+       +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
+       -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
+       +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
+       -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
+   */
+   const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
+   unsigned face;
+   float sc, tc, ma;
+
+   if (arx > ary && arx > arz) {
+      if (rx >= 0.0F) {
+         face = PIPE_TEX_FACE_POS_X;
+         sc = -rz;
+         tc = -ry;
+         ma = arx;
+      }
+      else {
+         face = PIPE_TEX_FACE_NEG_X;
+         sc = rz;
+         tc = -ry;
+         ma = arx;
+      }
+   }
+   else if (ary > arx && ary > arz) {
+      if (ry >= 0.0F) {
+         face = PIPE_TEX_FACE_POS_Y;
+         sc = rx;
+         tc = rz;
+         ma = ary;
+      }
+      else {
+         face = PIPE_TEX_FACE_NEG_Y;
+         sc = rx;
+         tc = -rz;
+         ma = ary;
+      }
+   }
+   else {
+      if (rz > 0.0F) {
+         face = PIPE_TEX_FACE_POS_Z;
+         sc = rx;
+         tc = -ry;
+         ma = arz;
+      }
+      else {
+         face = PIPE_TEX_FACE_NEG_Z;
+         sc = -rx;
+         tc = -ry;
+         ma = arz;
+      }
+   }
+
+   *newS = ( sc / ma + 1.0F ) * 0.5F;
+   *newT = ( tc / ma + 1.0F ) * 0.5F;
+
+   return face;
+}
+
+
+/**
+ * Examine the quad's texture coordinates to compute the partial
+ * derivatives w.r.t X and Y, then compute lambda (level of detail).
+ *
+ * This is only done for fragment shaders, not vertex shaders.
+ */
+static float
+compute_lambda(struct tgsi_sampler *tgsi_sampler,
+               const float s[QUAD_SIZE],
+               const float t[QUAD_SIZE],
+               const float p[QUAD_SIZE],
+               float lodbias)
+{
+   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+   const struct pipe_texture *texture = samp->texture;
+   const struct pipe_sampler_state *sampler = samp->sampler;
+   float rho, lambda;
+
+   if (samp->processor == TGSI_PROCESSOR_VERTEX)
+      return lodbias;
+
+   assert(sampler->normalized_coords);
+
+   assert(s);
+   {
+      float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT];
+      float dsdy = s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT];
+      dsdx = fabsf(dsdx);
+      dsdy = fabsf(dsdy);
+      rho = MAX2(dsdx, dsdy) * texture->width[0];
+   }
+   if (t) {
+      float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT];
+      float dtdy = t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT];
+      float max;
+      dtdx = fabsf(dtdx);
+      dtdy = fabsf(dtdy);
+      max = MAX2(dtdx, dtdy) * texture->height[0];
+      rho = MAX2(rho, max);
+   }
+   if (p) {
+      float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT];
+      float dpdy = p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT];
+      float max;
+      dpdx = fabsf(dpdx);
+      dpdy = fabsf(dpdy);
+      max = MAX2(dpdx, dpdy) * texture->depth[0];
+      rho = MAX2(rho, max);
+   }
+
+   lambda = util_fast_log2(rho);
+   lambda += lodbias + sampler->lod_bias;
+   lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
+
+   return lambda;
+}
+
+
+/**
+ * Do several things here:
+ * 1. Compute lambda from the texcoords, if needed
+ * 2. Determine if we're minifying or magnifying
+ * 3. If minifying, choose mipmap levels
+ * 4. Return image filter to use within mipmap images
+ * \param level0  Returns first mipmap level to sample from
+ * \param level1  Returns second mipmap level to sample from
+ * \param levelBlend  Returns blend factor between levels, in [0,1]
+ * \param imgFilter  Returns either the min or mag filter, depending on lambda
+ */
+static void
+choose_mipmap_levels(struct tgsi_sampler *tgsi_sampler,
+                     const float s[QUAD_SIZE],
+                     const float t[QUAD_SIZE],
+                     const float p[QUAD_SIZE],
+                     float lodbias,
+                     unsigned *level0, unsigned *level1, float *levelBlend,
+                     unsigned *imgFilter)
+{
+   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+   const struct pipe_texture *texture = samp->texture;
+   const struct pipe_sampler_state *sampler = samp->sampler;
+
+   if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+      /* no mipmap selection needed */
+      *level0 = *level1 = CLAMP((int) sampler->min_lod,
+                                0, (int) texture->last_level);
+
+      if (sampler->min_img_filter != sampler->mag_img_filter) {
+         /* non-mipmapped texture, but still need to determine if doing
+          * minification or magnification.
+          */
+         float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias);
+         if (lambda <= 0.0) {
+            *imgFilter = sampler->mag_img_filter;
+         }
+         else {
+            *imgFilter = sampler->min_img_filter;
+         }
+      }
+      else {
+         *imgFilter = sampler->mag_img_filter;
+      }
+   }
+   else {
+      float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias);
+
+      if (lambda <= 0.0) { /* XXX threshold depends on the filter */
+         /* magnifying */
+         *imgFilter = sampler->mag_img_filter;
+         *level0 = *level1 = 0;
+      }
+      else {
+         /* minifying */
+         *imgFilter = sampler->min_img_filter;
+
+         /* choose mipmap level(s) and compute the blend factor between them */
+         if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
+            /* Nearest mipmap level */
+            const int lvl = (int) (lambda + 0.5);
+            *level0 =
+            *level1 = CLAMP(lvl, 0, (int) texture->last_level);
+         }
+         else {
+            /* Linear interpolation between mipmap levels */
+            const int lvl = (int) lambda;
+            *level0 = CLAMP(lvl,     0, (int) texture->last_level);
+            *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level);
+            *levelBlend = FRAC(lambda);  /* blending weight between levels */
+         }
+      }
+   }
+}
+
+
+/**
+ * Get a texel from a texture, using the texture tile cache.
+ *
+ * \param face  the cube face in 0..5
+ * \param level  the mipmap level
+ * \param x  the x coord of texel within 2D image
+ * \param y  the y coord of texel within 2D image
+ * \param z  which slice of a 3D texture
+ * \param rgba  the quad to put the texel/color into
+ * \param j  which element of the rgba quad to write to
+ *
+ * XXX maybe move this into lp_tile_cache.c and merge with the
+ * lp_get_cached_tile_tex() function.  Also, get 4 texels instead of 1...
+ */
+static void
+get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler,
+                  unsigned face, unsigned level, int x, int y, 
+                  const uint8_t *out[4])
+{
+   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+
+   const struct llvmpipe_cached_tex_tile *tile
+      = lp_get_cached_tex_tile(samp->cache,
+                               tex_tile_address(x, y, 0, face, level));
+
+   y %= TEX_TILE_SIZE;
+   x %= TEX_TILE_SIZE;
+      
+   out[0] = &tile->color[y  ][x  ][0];
+   out[1] = &tile->color[y  ][x+1][0];
+   out[2] = &tile->color[y+1][x  ][0];
+   out[3] = &tile->color[y+1][x+1][0];
+}
+
+static INLINE const uint8_t *
+get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler,
+                 unsigned face, unsigned level, int x, int y)
+{
+   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+
+   const struct llvmpipe_cached_tex_tile *tile
+      = lp_get_cached_tex_tile(samp->cache,
+                               tex_tile_address(x, y, 0, face, level));
+
+   y %= TEX_TILE_SIZE;
+   x %= TEX_TILE_SIZE;
+
+   return &tile->color[y][x][0];
+}
+
+
+static void
+get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler,
+                     unsigned face, unsigned level, 
+                     int x0, int y0, 
+                     int x1, int y1,
+                     const uint8_t *out[4])
+{
+   unsigned i;
+
+   for (i = 0; i < 4; i++) {
+      unsigned tx = (i & 1) ? x1 : x0;
+      unsigned ty = (i >> 1) ? y1 : y0;
+
+      out[i] = get_texel_2d_ptr( tgsi_sampler, face, level, tx, ty );
+   }
+}
+
+static void
+get_texel(const struct tgsi_sampler *tgsi_sampler,
+                 unsigned face, unsigned level, int x, int y, int z,
+                 float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j)
+{
+   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+   const struct pipe_texture *texture = samp->texture;
+   const struct pipe_sampler_state *sampler = samp->sampler;
+
+   if (x < 0 || x >= (int) texture->width[level] ||
+       y < 0 || y >= (int) texture->height[level] ||
+       z < 0 || z >= (int) texture->depth[level]) {
+      rgba[0][j] = sampler->border_color[0];
+      rgba[1][j] = sampler->border_color[1];
+      rgba[2][j] = sampler->border_color[2];
+      rgba[3][j] = sampler->border_color[3];
+   }
+   else {
+      const unsigned tx = x % TEX_TILE_SIZE;
+      const unsigned ty = y % TEX_TILE_SIZE;
+      const struct llvmpipe_cached_tex_tile *tile;
+
+      tile = lp_get_cached_tex_tile(samp->cache,
+                                    tex_tile_address(x, y, z, face, level));
+
+      rgba[0][j] = ubyte_to_float(tile->color[ty][tx][0]);
+      rgba[1][j] = ubyte_to_float(tile->color[ty][tx][1]);
+      rgba[2][j] = ubyte_to_float(tile->color[ty][tx][2]);
+      rgba[3][j] = ubyte_to_float(tile->color[ty][tx][3]);
+      if (0)
+      {
+         debug_printf("Get texel %f %f %f %f from %s\n",
+                      rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j],
+                      pf_name(texture->format));
+      }
+   }
+}
+
+
+/**
+ * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
+ * When we sampled the depth texture, the depth value was put into all
+ * RGBA channels.  We look at the red channel here.
+ * \param rgba  quad of (depth) texel values
+ * \param p  texture 'P' components for four pixels in quad
+ * \param j  which pixel in the quad to test [0..3]
+ */
+static INLINE void
+shadow_compare(const struct pipe_sampler_state *sampler,
+               float rgba[NUM_CHANNELS][QUAD_SIZE],
+               const float p[QUAD_SIZE],
+               uint j)
+{
+   int k;
+   switch (sampler->compare_func) {
+   case PIPE_FUNC_LESS:
+      k = p[j] < rgba[0][j];
+      break;
+   case PIPE_FUNC_LEQUAL:
+      k = p[j] <= rgba[0][j];
+      break;
+   case PIPE_FUNC_GREATER:
+      k = p[j] > rgba[0][j];
+      break;
+   case PIPE_FUNC_GEQUAL:
+      k = p[j] >= rgba[0][j];
+      break;
+   case PIPE_FUNC_EQUAL:
+      k = p[j] == rgba[0][j];
+      break;
+   case PIPE_FUNC_NOTEQUAL:
+      k = p[j] != rgba[0][j];
+      break;
+   case PIPE_FUNC_ALWAYS:
+      k = 1;
+      break;
+   case PIPE_FUNC_NEVER:
+      k = 0;
+      break;
+   default:
+      k = 0;
+      assert(0);
+      break;
+   }
+
+   /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
+   rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k;
+   rgba[3][j] = 1.0F;
+}
+
+
+/**
+ * As above, but do four z/texture comparisons.
+ */
+static INLINE void
+shadow_compare4(const struct pipe_sampler_state *sampler,
+                float rgba[NUM_CHANNELS][QUAD_SIZE],
+                const float p[QUAD_SIZE])
+{
+   int j, k0, k1, k2, k3;
+   float val;
+
+   /* compare four texcoords vs. four texture samples */
+   switch (sampler->compare_func) {
+   case PIPE_FUNC_LESS:
+      k0 = p[0] < rgba[0][0];
+      k1 = p[1] < rgba[0][1];
+      k2 = p[2] < rgba[0][2];
+      k3 = p[3] < rgba[0][3];
+      break;
+   case PIPE_FUNC_LEQUAL:
+      k0 = p[0] <= rgba[0][0];
+      k1 = p[1] <= rgba[0][1];
+      k2 = p[2] <= rgba[0][2];
+      k3 = p[3] <= rgba[0][3];
+      break;
+   case PIPE_FUNC_GREATER:
+      k0 = p[0] > rgba[0][0];
+      k1 = p[1] > rgba[0][1];
+      k2 = p[2] > rgba[0][2];
+      k3 = p[3] > rgba[0][3];
+      break;
+   case PIPE_FUNC_GEQUAL:
+      k0 = p[0] >= rgba[0][0];
+      k1 = p[1] >= rgba[0][1];
+      k2 = p[2] >= rgba[0][2];
+      k3 = p[3] >= rgba[0][3];
+      break;
+   case PIPE_FUNC_EQUAL:
+      k0 = p[0] == rgba[0][0];
+      k1 = p[1] == rgba[0][1];
+      k2 = p[2] == rgba[0][2];
+      k3 = p[3] == rgba[0][3];
+      break;
+   case PIPE_FUNC_NOTEQUAL:
+      k0 = p[0] != rgba[0][0];
+      k1 = p[1] != rgba[0][1];
+      k2 = p[2] != rgba[0][2];
+      k3 = p[3] != rgba[0][3];
+      break;
+   case PIPE_FUNC_ALWAYS:
+      k0 = k1 = k2 = k3 = 1;
+      break;
+   case PIPE_FUNC_NEVER:
+      k0 = k1 = k2 = k3 = 0;
+      break;
+   default:
+      k0 = k1 = k2 = k3 = 0;
+      assert(0);
+      break;
+   }
+
+   /* convert four pass/fail values to an intensity in [0,1] */
+   val = 0.25F * (k0 + k1 + k2 + k3);
+
+   /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
+   for (j = 0; j < 4; j++) {
+      rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
+      rgba[3][j] = 1.0F;
+   }
+}
+
+
+
+static void
+lp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
+                                    const float s[QUAD_SIZE],
+                                    const float t[QUAD_SIZE],
+                                    const float p[QUAD_SIZE],
+                                    float lodbias,
+                                    float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+   unsigned  j;
+   unsigned level = samp->level;
+   unsigned xpot = 1 << (samp->xpot - level);
+   unsigned ypot = 1 << (samp->ypot - level);
+   unsigned xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
+   unsigned ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
+      
+   for (j = 0; j < QUAD_SIZE; j++) {
+      int c;
+
+      float u = s[j] * xpot - 0.5F;
+      float v = t[j] * ypot - 0.5F;
+
+      int uflr = util_ifloor(u);
+      int vflr = util_ifloor(v);
+
+      float xw = u - (float)uflr;
+      float yw = v - (float)vflr;
+
+      int x0 = uflr & (xpot - 1);
+      int y0 = vflr & (ypot - 1);
+
+      const uint8_t *tx[4];
+      
+
+      /* Can we fetch all four at once:
+       */
+      if (x0 < xmax && y0 < ymax)
+      {
+         get_texel_quad_2d(tgsi_sampler, 0, level, x0, y0, tx);
+      }
+      else 
+      {
+         unsigned x1 = (x0 + 1) & (xpot - 1);
+         unsigned y1 = (y0 + 1) & (ypot - 1);
+         get_texel_quad_2d_mt(tgsi_sampler, 0, level, 
+                              x0, y0, x1, y1, tx);
+      }
+
+
+      /* interpolate R, G, B, A */
+      for (c = 0; c < 4; c++) {
+         rgba[c][j] = lerp_2d(xw, yw, 
+                              ubyte_to_float(tx[0][c]), ubyte_to_float(tx[1][c]),
+                              ubyte_to_float(tx[2][c]), ubyte_to_float(tx[3][c]));
+      }
+   }
+}
+
+
+static void
+lp_get_samples_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
+                                     const float s[QUAD_SIZE],
+                                     const float t[QUAD_SIZE],
+                                     const float p[QUAD_SIZE],
+                                     float lodbias,
+                                     float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+   unsigned  j;
+   unsigned level = samp->level;
+   unsigned xpot = 1 << (samp->xpot - level);
+   unsigned ypot = 1 << (samp->ypot - level);
+
+   for (j = 0; j < QUAD_SIZE; j++) {
+      int c;
+
+      float u = s[j] * xpot;
+      float v = t[j] * ypot;
+
+      int uflr = util_ifloor(u);
+      int vflr = util_ifloor(v);
+
+      int x0 = uflr & (xpot - 1);
+      int y0 = vflr & (ypot - 1);
+
+      const uint8_t *out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0);
+
+      for (c = 0; c < 4; c++) {
+         rgba[c][j] = ubyte_to_float(out[c]);
+      }
+   }
+}
+
+
+static void
+lp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
+                                     const float s[QUAD_SIZE],
+                                     const float t[QUAD_SIZE],
+                                     const float p[QUAD_SIZE],
+                                     float lodbias,
+                                     float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+   unsigned  j;
+   unsigned level = samp->level;
+   unsigned xpot = 1 << (samp->xpot - level);
+   unsigned ypot = 1 << (samp->ypot - level);
+
+   for (j = 0; j < QUAD_SIZE; j++) {
+      int c;
+
+      float u = s[j] * xpot;
+      float v = t[j] * ypot;
+
+      int x0, y0;
+      const uint8_t *out;
+
+      x0 = util_ifloor(u);
+      if (x0 < 0) 
+         x0 = 0;
+      else if (x0 > xpot - 1)
+         x0 = xpot - 1;
+
+      y0 = util_ifloor(v);
+      if (y0 < 0) 
+         y0 = 0;
+      else if (y0 > ypot - 1)
+         y0 = ypot - 1;
+      
+      out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0);
+
+      for (c = 0; c < 4; c++) {
+         rgba[c][j] = ubyte_to_float(out[c]);
+      }
+   }
+}
+
+
+static void
+lp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
+                                               const float s[QUAD_SIZE],
+                                               const float t[QUAD_SIZE],
+                                               const float p[QUAD_SIZE],
+                                               float lodbias,
+                                               float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+   const struct pipe_texture *texture = samp->texture;
+   int level0;
+   float lambda;
+
+   lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias);
+   level0 = (int)lambda;
+
+   if (lambda < 0.0) { 
+      samp->level = 0;
+      lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
+                                           s, t, p, 0, rgba );
+   }
+   else if (level0 >= texture->last_level) {
+      samp->level = texture->last_level;
+      lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
+                                           s, t, p, 0, rgba );
+   }
+   else {
+      float levelBlend = lambda - level0;
+      float rgba0[4][4];
+      float rgba1[4][4];
+      int c,j;
+
+      samp->level = level0;
+      lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
+                                           s, t, p, 0, rgba0 );
+
+      samp->level = level0+1;
+      lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
+                                           s, t, p, 0, rgba1 );
+
+      for (j = 0; j < QUAD_SIZE; j++) {
+         for (c = 0; c < 4; c++) {
+            rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
+         }
+      }
+   }
+}
+
+/**
+ * Common code for sampling 1D/2D/cube textures.
+ * Could probably extend for 3D...
+ */
+static void
+lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler,
+                         const float s[QUAD_SIZE],
+                         const float t[QUAD_SIZE],
+                         const float p[QUAD_SIZE],
+                         float lodbias,
+                         float rgba[NUM_CHANNELS][QUAD_SIZE],
+                         const unsigned faces[4])
+{
+   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+   const struct pipe_texture *texture = samp->texture;
+   const struct pipe_sampler_state *sampler = samp->sampler;
+   unsigned level0, level1, j, imgFilter;
+   int width, height;
+   float levelBlend;
+
+   choose_mipmap_levels(tgsi_sampler, s, t, p, 
+                        lodbias,
+                        &level0, &level1, &levelBlend, &imgFilter);
+
+   assert(sampler->normalized_coords);
+
+   width = texture->width[level0];
+   height = texture->height[level0];
+
+   assert(width > 0);
+
+   switch (imgFilter) {
+   case PIPE_TEX_FILTER_NEAREST:
+      {
+         int x[4], y[4];
+         nearest_texcoord_4(sampler->wrap_s, s, width, x);
+         nearest_texcoord_4(sampler->wrap_t, t, height, y);
+
+         for (j = 0; j < QUAD_SIZE; j++) {
+            get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j);
+            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+               shadow_compare(sampler, rgba, p, j);
+            }
+
+            if (level0 != level1) {
+               /* get texels from second mipmap level and blend */
+               float rgba2[4][4];
+               unsigned c;
+               x[j] /= 2;
+               y[j] /= 2;
+               get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0,
+                         rgba2, j);
+               if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
+                  shadow_compare(sampler, rgba2, p, j);
+               }
+
+               for (c = 0; c < NUM_CHANNELS; c++) {
+                  rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
+               }
+            }
+         }
+      }
+      break;
+   case PIPE_TEX_FILTER_LINEAR:
+   case PIPE_TEX_FILTER_ANISO:
+      {
+         int x0[4], y0[4], x1[4], y1[4];
+         float xw[4], yw[4]; /* weights */
+
+         linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw);
+         linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
+
+         for (j = 0; j < QUAD_SIZE; j++) {
+            float tx[4][4]; /* texels */
+            int c;
+            get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0);
+            get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1);
+            get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2);
+            get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3);
+            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+               shadow_compare4(sampler, tx, p);
+            }
+
+            /* interpolate R, G, B, A */
+            for (c = 0; c < 4; c++) {
+               rgba[c][j] = lerp_2d(xw[j], yw[j],
+                                    tx[c][0], tx[c][1],
+                                    tx[c][2], tx[c][3]);
+            }
+
+            if (level0 != level1) {
+               /* get texels from second mipmap level and blend */
+               float rgba2[4][4];
+
+               /* XXX: This is incorrect -- will often end up with (x0
+                *  == x1 && y0 == y1), meaning that we fetch the same
+                *  texel four times and linearly interpolate between
+                *  identical values.  The correct approach would be to
+                *  call linear_texcoord again for the second level.
+                */
+               x0[j] /= 2;
+               y0[j] /= 2;
+               x1[j] /= 2;
+               y1[j] /= 2;
+               get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0);
+               get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1);
+               get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2);
+               get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3);
+               if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
+                  shadow_compare4(sampler, tx, p);
+               }
+
+               /* interpolate R, G, B, A */
+               for (c = 0; c < 4; c++) {
+                  rgba2[c][j] = lerp_2d(xw[j], yw[j],
+                                        tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
+               }
+
+               for (c = 0; c < NUM_CHANNELS; c++) {
+                  rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
+               }
+            }
+         }
+      }
+      break;
+   default:
+      assert(0);
+   }
+}
+
+
+static INLINE void
+lp_get_samples_1d(struct tgsi_sampler *sampler,
+                  const float s[QUAD_SIZE],
+                  const float t[QUAD_SIZE],
+                  const float p[QUAD_SIZE],
+                  float lodbias,
+                  float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   static const unsigned faces[4] = {0, 0, 0, 0};
+   static const float tzero[4] = {0, 0, 0, 0};
+   lp_get_samples_2d_common(sampler, s, tzero, NULL,
+                            lodbias, rgba, faces);
+}
+
+
+static INLINE void
+lp_get_samples_2d(struct tgsi_sampler *sampler,
+                  const float s[QUAD_SIZE],
+                  const float t[QUAD_SIZE],
+                  const float p[QUAD_SIZE],
+                  float lodbias,
+                  float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   static const unsigned faces[4] = {0, 0, 0, 0};
+   lp_get_samples_2d_common(sampler, s, t, p,
+                            lodbias, rgba, faces);
+}
+
+
+static INLINE void
+lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler,
+                  const float s[QUAD_SIZE],
+                  const float t[QUAD_SIZE],
+                  const float p[QUAD_SIZE],
+                  float lodbias,
+                  float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+   const struct pipe_texture *texture = samp->texture;
+   const struct pipe_sampler_state *sampler = samp->sampler;
+   /* get/map pipe_surfaces corresponding to 3D tex slices */
+   unsigned level0, level1, j, imgFilter;
+   int width, height, depth;
+   float levelBlend;
+   const uint face = 0;
+
+   choose_mipmap_levels(tgsi_sampler, s, t, p, 
+                        lodbias,
+                        &level0, &level1, &levelBlend, &imgFilter);
+
+   assert(sampler->normalized_coords);
+
+   width = texture->width[level0];
+   height = texture->height[level0];
+   depth = texture->depth[level0];
+
+   assert(width > 0);
+   assert(height > 0);
+   assert(depth > 0);
+
+   switch (imgFilter) {
+   case PIPE_TEX_FILTER_NEAREST:
+      {
+         int x[4], y[4], z[4];
+         nearest_texcoord_4(sampler->wrap_s, s, width, x);
+         nearest_texcoord_4(sampler->wrap_t, t, height, y);
+         nearest_texcoord_4(sampler->wrap_r, p, depth, z);
+         for (j = 0; j < QUAD_SIZE; j++) {
+            get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j);
+            if (level0 != level1) {
+               /* get texels from second mipmap level and blend */
+               float rgba2[4][4];
+               unsigned c;
+               x[j] /= 2;
+               y[j] /= 2;
+               z[j] /= 2;
+               get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j);
+               for (c = 0; c < NUM_CHANNELS; c++) {
+                  rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]);
+               }
+            }
+         }
+      }
+      break;
+   case PIPE_TEX_FILTER_LINEAR:
+   case PIPE_TEX_FILTER_ANISO:
+      {
+         int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
+         float xw[4], yw[4], zw[4]; /* interpolation weights */
+         linear_texcoord_4(sampler->wrap_s, s, width,  x0, x1, xw);
+         linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
+         linear_texcoord_4(sampler->wrap_r, p, depth,  z0, z1, zw);
+
+         for (j = 0; j < QUAD_SIZE; j++) {
+            int c;
+            float tx0[4][4], tx1[4][4];
+            get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0);
+            get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1);
+            get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2);
+            get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3);
+            get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0);
+            get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1);
+            get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2);
+            get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3);
+
+            /* interpolate R, G, B, A */
+            for (c = 0; c < 4; c++) {
+               rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j],
+                                    tx0[c][0], tx0[c][1],
+                                    tx0[c][2], tx0[c][3],
+                                    tx1[c][0], tx1[c][1],
+                                    tx1[c][2], tx1[c][3]);
+            }
+
+            if (level0 != level1) {
+               /* get texels from second mipmap level and blend */
+               float rgba2[4][4];
+               x0[j] /= 2;
+               y0[j] /= 2;
+               z0[j] /= 2;
+               x1[j] /= 2;
+               y1[j] /= 2;
+               z1[j] /= 2;
+               get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0);
+               get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1);
+               get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2);
+               get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3);
+               get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0);
+               get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1);
+               get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2);
+               get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3);
+
+               /* interpolate R, G, B, A */
+               for (c = 0; c < 4; c++) {
+                  rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j],
+                                        tx0[c][0], tx0[c][1],
+                                        tx0[c][2], tx0[c][3],
+                                        tx1[c][0], tx1[c][1],
+                                        tx1[c][2], tx1[c][3]);
+               }
+
+               /* blend mipmap levels */
+               for (c = 0; c < NUM_CHANNELS; c++) {
+                  rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
+               }
+            }
+         }
+      }
+      break;
+   default:
+      assert(0);
+   }
+}
+
+
+static void
+lp_get_samples_cube(struct tgsi_sampler *sampler,
+                    const float s[QUAD_SIZE],
+                    const float t[QUAD_SIZE],
+                    const float p[QUAD_SIZE],
+                    float lodbias,
+                    float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   unsigned faces[QUAD_SIZE], j;
+   float ssss[4], tttt[4];
+   for (j = 0; j < QUAD_SIZE; j++) {
+      faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j);
+   }
+   lp_get_samples_2d_common(sampler, ssss, tttt, NULL,
+                            lodbias, rgba, faces);
+}
+
+
+static void
+lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler,
+                    const float s[QUAD_SIZE],
+                    const float t[QUAD_SIZE],
+                    const float p[QUAD_SIZE],
+                    float lodbias,
+                    float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+   const struct pipe_texture *texture = samp->texture;
+   const struct pipe_sampler_state *sampler = samp->sampler;
+   const uint face = 0;
+   unsigned level0, level1, j, imgFilter;
+   int width, height;
+   float levelBlend;
+
+   choose_mipmap_levels(tgsi_sampler, s, t, p, 
+                        lodbias,
+                        &level0, &level1, &levelBlend, &imgFilter);
+
+   /* texture RECTS cannot be mipmapped */
+   assert(level0 == level1);
+
+   width = texture->width[level0];
+   height = texture->height[level0];
+
+   assert(width > 0);
+
+   switch (imgFilter) {
+   case PIPE_TEX_FILTER_NEAREST:
+      {
+         int x[4], y[4];
+         nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x);
+         nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y);
+         for (j = 0; j < QUAD_SIZE; j++) {
+            get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j);
+            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+               shadow_compare(sampler, rgba, p, j);
+            }
+         }
+      }
+      break;
+   case PIPE_TEX_FILTER_LINEAR:
+   case PIPE_TEX_FILTER_ANISO:
+      {
+         int x0[4], y0[4], x1[4], y1[4];
+         float xw[4], yw[4]; /* weights */
+         linear_texcoord_unnorm_4(sampler->wrap_s, s, width,  x0, x1, xw);
+         linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw);
+         for (j = 0; j < QUAD_SIZE; j++) {
+            float tx[4][4]; /* texels */
+            int c;
+            get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0);
+            get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1);
+            get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2);
+            get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3);
+            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+               shadow_compare4(sampler, tx, p);
+            }
+            for (c = 0; c < 4; c++) {
+               rgba[c][j] = lerp_2d(xw[j], yw[j],
+                                    tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
+            }
+         }
+      }
+      break;
+   default:
+      assert(0);
+   }
+}
+
+
+/**
+ * Error condition handler
+ */
+static INLINE void
+lp_get_samples_null(struct tgsi_sampler *tgsi_sampler,
+                    const float s[QUAD_SIZE],
+                    const float t[QUAD_SIZE],
+                    const float p[QUAD_SIZE],
+                    float lodbias,
+                    float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   int i,j;
+
+   for (i = 0; i < 4; i++)
+      for (j = 0; j < 4; j++)
+         rgba[i][j] = 1.0;
+}
+
+/**
+ * Called via tgsi_sampler::get_samples() when using a sampler for the
+ * first time.  Determine the actual sampler function, link it in and
+ * call it.
+ */
+void
+lp_get_samples(struct tgsi_sampler *tgsi_sampler,
+               const float s[QUAD_SIZE],
+               const float t[QUAD_SIZE],
+               const float p[QUAD_SIZE],
+               float lodbias,
+               float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+   const struct pipe_texture *texture = samp->texture;
+   const struct pipe_sampler_state *sampler = samp->sampler;
+
+   /* Default to the 'undefined' case:
+    */
+   tgsi_sampler->get_samples = lp_get_samples_null;
+
+   if (!texture) {
+      assert(0);                /* is this legal?? */
+      goto out;
+   }
+
+   if (!sampler->normalized_coords) {
+      assert (texture->target == PIPE_TEXTURE_2D);
+      tgsi_sampler->get_samples = lp_get_samples_rect;
+      goto out;
+   }
+
+   switch (texture->target) {
+   case PIPE_TEXTURE_1D:
+      tgsi_sampler->get_samples = lp_get_samples_1d;
+      break;
+   case PIPE_TEXTURE_2D:
+      tgsi_sampler->get_samples = lp_get_samples_2d;
+      break;
+   case PIPE_TEXTURE_3D:
+      tgsi_sampler->get_samples = lp_get_samples_3d;
+      break;
+   case PIPE_TEXTURE_CUBE:
+      tgsi_sampler->get_samples = lp_get_samples_cube;
+      break;
+   default:
+      assert(0);
+      break;
+   }
+
+   /* Do this elsewhere: 
+    */
+   samp->xpot = util_unsigned_logbase2( samp->texture->width[0] );
+   samp->ypot = util_unsigned_logbase2( samp->texture->height[0] );
+
+   /* Try to hook in a faster sampler.  Ultimately we'll have to
+    * code-generate these.  Luckily most of this looks like it is
+    * orthogonal state within the sampler.
+    */
+   if (texture->target == PIPE_TEXTURE_2D &&
+       sampler->min_img_filter == sampler->mag_img_filter &&
+       sampler->wrap_s == sampler->wrap_t &&
+       sampler->compare_mode == FALSE &&
+       sampler->normalized_coords) 
+   {
+      if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+         samp->level = CLAMP((int) sampler->min_lod,
+                             0, (int) texture->last_level);
+
+         if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) {
+            switch (sampler->min_img_filter) {
+            case PIPE_TEX_FILTER_NEAREST:
+               tgsi_sampler->get_samples = lp_get_samples_2d_nearest_repeat_POT;
+               break;
+            case PIPE_TEX_FILTER_LINEAR:
+               tgsi_sampler->get_samples = lp_get_samples_2d_linear_repeat_POT;
+               break;
+            default:
+               break;
+            }
+         } 
+         else if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP) {
+            switch (sampler->min_img_filter) {
+            case PIPE_TEX_FILTER_NEAREST:
+               tgsi_sampler->get_samples = lp_get_samples_2d_nearest_clamp_POT;
+               break;
+            default:
+               break;
+            }
+         }
+      }
+      else if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+         if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) {
+            switch (sampler->min_img_filter) {
+            case PIPE_TEX_FILTER_LINEAR:
+               tgsi_sampler->get_samples = lp_get_samples_2d_linear_mip_linear_repeat_POT;
+               break;
+            default:
+               break;
+            }
+         } 
+      }
+   }
+   else if (0) {
+      _debug_printf("target %d/%d min_mip %d/%d min_img %d/%d wrap %d/%d compare %d/%d norm %d/%d\n",
+                    texture->target, PIPE_TEXTURE_2D,
+                    sampler->min_mip_filter, PIPE_TEX_MIPFILTER_NONE,
+                    sampler->min_img_filter, sampler->mag_img_filter,
+                    sampler->wrap_s, sampler->wrap_t,
+                    sampler->compare_mode, FALSE,
+                    sampler->normalized_coords, TRUE);
+   }
+
+out:
+   tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba );
+}
+
+
+void PIPE_CDECL
+lp_fetch_texel_soa( struct tgsi_sampler **samplers,
+                    uint32_t unit,
+                    float *store )
+{
+   struct tgsi_sampler *sampler = samplers[unit];
+
+#if 0
+   uint j;
+
+   debug_printf("%s sampler: %p (%p) store: %p\n",
+                __FUNCTION__,
+                sampler, *sampler,
+                store );
+
+   debug_printf("lodbias %f\n", store[12]);
+
+   for (j = 0; j < 4; j++)
+      debug_printf("sample %d texcoord %f %f\n",
+                   j,
+                   store[0+j],
+                   store[4+j]);
+#endif
+
+   {
+      float rgba[NUM_CHANNELS][QUAD_SIZE];
+      sampler->get_samples(sampler,
+                           &store[0],
+                           &store[4],
+                           &store[8],
+                           0.0f, /*store[12],  lodbias */
+                           rgba);
+      memcpy(store, rgba, sizeof rgba);
+   }
+
+#if 0
+   for (j = 0; j < 4; j++)
+      debug_printf("sample %d result %f %f %f %f\n",
+                   j,
+                   store[0+j],
+                   store[4+j],
+                   store[8+j],
+                   store[12+j]);
+#endif
+}
+
+
+#include "lp_bld_type.h"
+#include "lp_bld_intr.h"
+#include "lp_bld_tgsi.h"
+
+
+struct lp_c_sampler_soa
+{
+   struct lp_build_sampler_soa base;
+
+   LLVMValueRef context_ptr;
+
+   LLVMValueRef samplers_ptr;
+
+   /** Coords/texels store */
+   LLVMValueRef store_ptr;
+};
+
+
+static void
+lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
+{
+   FREE(sampler);
+}
+
+
+static void
+lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler,
+                                  LLVMBuilderRef builder,
+                                  union lp_type type,
+                                  unsigned unit,
+                                  unsigned num_coords,
+                                  const LLVMValueRef *coords,
+                                  LLVMValueRef lodbias,
+                                  LLVMValueRef *texel)
+{
+   struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler;
+   LLVMTypeRef vec_type = LLVMTypeOf(coords[0]);
+   LLVMValueRef args[3];
+   unsigned i;
+
+   if(!sampler->samplers_ptr)
+      sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr);
+
+   if(!sampler->store_ptr)
+      sampler->store_ptr = LLVMBuildArrayAlloca(builder,
+                                            vec_type,
+                                            LLVMConstInt(LLVMInt32Type(), 4, 0),
+                                            "texel_store");
+
+   for (i = 0; i < num_coords; i++) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+      LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, "");
+      LLVMBuildStore(builder, coords[i], coord_ptr);
+   }
+
+   args[0] = sampler->samplers_ptr;
+   args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
+   args[2] = sampler->store_ptr;
+
+   lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3);
+
+   for (i = 0; i < NUM_CHANNELS; ++i) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+      LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, "");
+      texel[i] = LLVMBuildLoad(builder, texel_ptr, "");
+   }
+}
+
+
+struct lp_build_sampler_soa *
+lp_c_sampler_soa_create(LLVMValueRef context_ptr)
+{
+   struct lp_c_sampler_soa *sampler;
+
+   sampler = CALLOC_STRUCT(lp_c_sampler_soa);
+   if(!sampler)
+      return NULL;
+
+   sampler->base.destroy = lp_c_sampler_soa_destroy;
+   sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel;
+   sampler->context_ptr = context_ptr;
+
+   return &sampler->base;
+}
+
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c
new file mode 100644
index 0000000000..7d31705d01
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c
@@ -0,0 +1,196 @@
+/**************************************************************************
+ * 
+ * Copyright 2009 VMware, Inc.
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/**
+ * Texture sampling code generation
+ *
+ * This file is nothing more than ugly glue between three largely independent
+ * entities:
+ * - TGSI -> LLVM translation (i.e., lp_build_tgsi_soa)
+ * - texture sampling code generation (i.e., lp_build_sample_soa)
+ * - LLVM pipe driver
+ *
+ * All interesting code is in the functions mentioned above. There is really
+ * nothing to see here.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_type.h"
+#include "lp_bld_intr.h"
+#include "lp_bld_sample.h"
+#include "lp_bld_tgsi.h"
+#include "lp_state.h"
+#include "lp_tex_sample.h"
+
+
+/**
+ * This provides the bridge between the sampler state store in lp_jit_context
+ * and lp_jit_texture and the sampler code generator. It provides the
+ * texture layout information required by the texture sampler code generator
+ * in terms of the state stored in lp_jit_context and lp_jit_texture in runtime.
+ */
+struct llvmpipe_sampler_dynamic_state
+{
+   struct lp_sampler_dynamic_state base;
+
+   const struct lp_sampler_static_state *static_state;
+
+   LLVMValueRef context_ptr;
+};
+
+
+/**
+ * This is the bridge between our sampler and the TGSI translator.
+ */
+struct lp_llvm_sampler_soa
+{
+   struct lp_build_sampler_soa base;
+
+   struct llvmpipe_sampler_dynamic_state dynamic_state;
+};
+
+
+/**
+ * Fetch the specified member of the lp_jit_texture structure.
+ *
+ * @sa http://llvm.org/docs/GetElementPtr.html
+ */
+static LLVMValueRef
+lp_llvm_texture_member(struct lp_sampler_dynamic_state *base,
+                       LLVMBuilderRef builder,
+                       unsigned unit,
+                       unsigned member_index,
+                       const char *member_name)
+{
+   struct llvmpipe_sampler_dynamic_state *state = (struct llvmpipe_sampler_dynamic_state *)base;
+   LLVMValueRef indices[4];
+   LLVMValueRef ptr;
+   LLVMValueRef res;
+
+   assert(unit < PIPE_MAX_SAMPLERS);
+
+   /* context[0] */
+   indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+   /* context[0].textures */
+   indices[1] = LLVMConstInt(LLVMInt32Type(), LP_JIT_CONTEXT_TEXTURES_INDEX, 0);
+   /* context[0].textures[unit] */
+   indices[2] = LLVMConstInt(LLVMInt32Type(), unit, 0);
+   /* context[0].textures[unit].member */
+   indices[3] = LLVMConstInt(LLVMInt32Type(), member_index, 0);
+
+   ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), "");
+
+   res = LLVMBuildLoad(builder, ptr, "");
+
+   lp_build_name(res, "context.texture%u.%s", unit, member_name);
+
+   return res;
+}
+
+
+/**
+ * Helper macro to instantiate the functions that generate the code to fetch
+ * the members of lp_jit_texture to fulfill the sampler code generator requests.
+ *
+ * This complexity is the price we have to pay to keep the texture sampler code
+ * generator a reusable module without dependencies to llvmpipe internals.
+ */
+#define LP_LLVM_TEXTURE_MEMBER(_name, _index) \
+   static LLVMValueRef \
+   lp_llvm_texture_##_name( struct lp_sampler_dynamic_state *base, \
+                            LLVMBuilderRef builder, \
+                            unsigned unit) \
+   { \
+      return lp_llvm_texture_member(base, builder, unit, _index, #_name ); \
+   }
+
+
+LP_LLVM_TEXTURE_MEMBER(width,    LP_JIT_TEXTURE_WIDTH)
+LP_LLVM_TEXTURE_MEMBER(height,   LP_JIT_TEXTURE_HEIGHT)
+LP_LLVM_TEXTURE_MEMBER(stride,   LP_JIT_TEXTURE_STRIDE)
+LP_LLVM_TEXTURE_MEMBER(data_ptr, LP_JIT_TEXTURE_DATA)
+
+
+static void
+lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
+{
+   FREE(sampler);
+}
+
+
+static void
+lp_llvm_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *base,
+                                     LLVMBuilderRef builder,
+                                     union lp_type type,
+                                     unsigned unit,
+                                     unsigned num_coords,
+                                     const LLVMValueRef *coords,
+                                     LLVMValueRef lodbias,
+                                     LLVMValueRef *texel)
+{
+   struct lp_llvm_sampler_soa *sampler = (struct lp_llvm_sampler_soa *)base;
+
+   assert(unit < PIPE_MAX_SAMPLERS);
+
+   lp_build_sample_soa(builder,
+                       &sampler->dynamic_state.static_state[unit],
+                       &sampler->dynamic_state.base,
+                       type,
+                       unit,
+                       num_coords,
+                       coords,
+                       lodbias,
+                       texel);
+}
+
+
+struct lp_build_sampler_soa *
+lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state,
+                           LLVMValueRef context_ptr)
+{
+   struct lp_llvm_sampler_soa *sampler;
+
+   sampler = CALLOC_STRUCT(lp_llvm_sampler_soa);
+   if(!sampler)
+      return NULL;
+
+   sampler->base.destroy = lp_llvm_sampler_soa_destroy;
+   sampler->base.emit_fetch_texel = lp_llvm_sampler_soa_emit_fetch_texel;
+   sampler->dynamic_state.base.width = lp_llvm_texture_width;
+   sampler->dynamic_state.base.height = lp_llvm_texture_height;
+   sampler->dynamic_state.base.stride = lp_llvm_texture_stride;
+   sampler->dynamic_state.base.data_ptr = lp_llvm_texture_data_ptr;
+   sampler->dynamic_state.static_state = static_state;
+   sampler->dynamic_state.context_ptr = context_ptr;
+
+   return &sampler->base;
+}
+
-- 
cgit v1.2.3


From 11272010887ce26b0f4162dd311376798c1d3fc3 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 7 Sep 2009 14:53:15 +0100
Subject: llvmpipe: Better abs for floating points.

---
 src/gallium/drivers/llvmpipe/lp_bld_arit.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index a1d8a89774..be7442d00a 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -591,20 +591,31 @@ lp_build_abs(struct lp_build_context *bld,
              LLVMValueRef a)
 {
    const union lp_type type = bld->type;
+   LLVMTypeRef vec_type = lp_build_vec_type(type);
 
    if(!type.sign)
       return a;
 
-   /* XXX: is this really necessary? */
+   if(type.floating) {
+      /* Mask out the sign bit */
+      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+      LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long)1 << type.width) - 1);
+      a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+      a = LLVMBuildAnd(bld->builder, a, mask, "");
+      a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
+      return a;
+   }
+
 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
-   if(!type.floating && type.width*type.length == 128) {
-      LLVMTypeRef vec_type = lp_build_vec_type(type);
-      if(type.width == 8)
+   if(type.width*type.length == 128) {
+      switch(type.width) {
+      case 8:
          return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a);
-      if(type.width == 16)
+      case 16:
          return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a);
-      if(type.width == 32)
+      case 32:
          return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a);
+      }
    }
 #endif
 
-- 
cgit v1.2.3


From b481fb2c6d8a8def0956acb0bf9083f5441edd07 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 7 Sep 2009 14:53:26 +0100
Subject: llvmpipe: Silent debug statement.

---
 src/gallium/drivers/llvmpipe/lp_tex_cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c
index 23a94b5b0d..773e848242 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c
@@ -154,7 +154,7 @@ lp_tex_tile_cache_validate_texture(struct llvmpipe_tex_tile_cache *tc)
       if (lpt->timestamp != tc->timestamp) {
          /* texture was modified, invalidate all cached tiles */
          uint i;
-         _debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp);
+         debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp);
          for (i = 0; i < NUM_ENTRIES; i++) {
             tc->entries[i].addr.bits.invalid = 1;
          }
-- 
cgit v1.2.3


From 79f48c9f9e739a1f6b0810072e41bc826f2b789d Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Mon, 7 Sep 2009 15:16:25 +0100
Subject: scons: Don't set LLVM_VERSION if one of the llvm-config calls fails.

Ubuntu 8.10 has llvm-config version 2.2, which doesn't have
nativecodegen. This triggers an exception.
---
 scons/llvm.py                           | 18 ++++++++++--------
 src/gallium/drivers/llvmpipe/SConscript |  2 +-
 2 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/scons/llvm.py b/scons/llvm.py
index 702f1e354f..46a8d829ca 100644
--- a/scons/llvm.py
+++ b/scons/llvm.py
@@ -56,15 +56,17 @@ def generate(env):
         env.PrependENVPath('PATH', llvm_bin_dir)
 
     if env.Detect('llvm-config'):
-        try:
-            env['LLVM_VERSION'] = env.backtick('llvm-config --version')
-        except AttributeError:
-            env['LLVM_VERSION'] = 'X.X'
+        version = env.backtick('llvm-config --version').rstrip()
 
-        env.ParseConfig('llvm-config --cppflags')
-        env.ParseConfig('llvm-config --libs jit interpreter nativecodegen bitwriter')
-        env.ParseConfig('llvm-config --ldflags')
-        env['LINK'] = env['CXX']
+        try:
+            env.ParseConfig('llvm-config --cppflags')
+            env.ParseConfig('llvm-config --libs jit interpreter nativecodegen bitwriter')
+            env.ParseConfig('llvm-config --ldflags')
+        except OSError:
+            print 'llvm-config version %s failed' % version
+        else:
+            env['LINK'] = env['CXX']
+            env['LLVM_VERSION'] = version
 
 def exists(env):
     return True
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index ac1b5d6d1d..dea4b703c4 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -3,7 +3,7 @@ Import('*')
 env = env.Clone()
 
 env.Tool('llvm')
-if 'LLVM_VERSION' not in env:
+if env.has_key('LLVM_VERSION') is False:
     print 'warning: LLVM not found: not building llvmpipe'
     Return()
 
-- 
cgit v1.2.3


From 01c831576eb77ec87bff667ed5591a93cbbef97d Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Wed, 9 Sep 2009 19:21:22 +0100
Subject: llvmpipe: Debug function to check stack alignment.

Doing alignment check in locus is redundant, as gcc alignment assumptions
will optimize away the check.
---
 src/gallium/drivers/llvmpipe/lp_bld_debug.c | 17 +++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_debug.h |  4 ++++
 src/gallium/drivers/llvmpipe/lp_setup.c     | 11 +++++++----
 3 files changed, 28 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.c b/src/gallium/drivers/llvmpipe/lp_bld_debug.c
index 30925b5f41..59d8f492e6 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_debug.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.c
@@ -30,10 +30,27 @@
 #include <udis86.h>
 #endif
 
+#include "util/u_math.h"
 #include "util/u_debug.h"
 #include "lp_bld_debug.h"
 
 
+/**
+ * Check alignment.
+ *
+ * It is important that this check is not implemented as a macro or inlined
+ * function, as the compiler assumptions in respect to alignment of global
+ * and stack variables would often make the check a no op, defeating the
+ * whole purpose of the exercise.
+ */
+boolean
+lp_check_alignment(const void *ptr, unsigned alignment)
+{
+   assert(util_is_pot(alignment));
+   return ((uintptr_t)ptr & (alignment - 1)) == 0;
+}
+
+
 void
 lp_disassemble(const void* func)
 {
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.h b/src/gallium/drivers/llvmpipe/lp_bld_debug.h
index ecdafef76d..583e6132b4 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_debug.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.h
@@ -53,6 +53,10 @@ lp_build_name(LLVMValueRef val, const char *format, ...)
 }
 
 
+boolean
+lp_check_alignment(const void *ptr, unsigned alignment);
+
+
 void
 lp_disassemble(const void* func);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index d145f6d6bb..f9e254efca 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -44,6 +44,7 @@
 #include "pipe/p_thread.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "lp_bld_debug.h"
 #include "lp_tile_cache.h"
 #include "lp_tile_soa.h"
 
@@ -164,10 +165,12 @@ shade_quads(struct llvmpipe_context *llvmpipe,
 
    /* TODO: blend color */
 
-   assert((((uintptr_t)mask) & 0xf) == 0);
-   assert((((uintptr_t)depth) & 0xf) == 0);
-   assert((((uintptr_t)color) & 0xf) == 0);
-   assert((((uintptr_t)llvmpipe->jit_context.blend_color) & 0xf) == 0);
+   /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */
+   assert(lp_check_alignment(mask, 16));
+
+   assert(lp_check_alignment(depth, 16));
+   assert(lp_check_alignment(color, 16));
+   assert(lp_check_alignment(llvmpipe->jit_context.blend_color, 16));
 
    /* run shader */
    fs->current->jit_function( &llvmpipe->jit_context,
-- 
cgit v1.2.3


From cdbbcdf3bdb114d79cf7b9474436c3d26b135592 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Wed, 9 Sep 2009 21:16:06 +0100
Subject: llvmpipe: Include zsbuf's format in the fragment shader key.

---
 src/gallium/drivers/llvmpipe/lp_state.h    |  1 +
 src/gallium/drivers/llvmpipe/lp_state_fs.c | 30 ++++++++++++++++--------------
 2 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index 0b846ecb13..7b26ce61a3 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -66,6 +66,7 @@ struct lp_fragment_shader;
 
 struct lp_fragment_shader_variant_key
 {
+   enum pipe_format zsbuf_format;
    struct pipe_depth_state depth;
    struct pipe_alpha_state alpha;
    struct pipe_blend_state blend;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 1a3e168245..21e675c34e 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -131,9 +131,8 @@ generate_pos0(LLVMBuilderRef builder,
  * Generate the depth test.
  */
 static void
-generate_depth(struct llvmpipe_context *lp,
-               LLVMBuilderRef builder,
-               const struct pipe_depth_state *state,
+generate_depth(LLVMBuilderRef builder,
+               const struct lp_fragment_shader_variant_key *key,
                union lp_type src_type,
                struct lp_build_mask_context *mask,
                LLVMValueRef src,
@@ -142,10 +141,10 @@ generate_depth(struct llvmpipe_context *lp,
    const struct util_format_description *format_desc;
    union lp_type dst_type;
 
-   if(!lp->framebuffer.zsbuf)
+   if(!key->depth.enabled)
       return;
 
-   format_desc = util_format_description(lp->framebuffer.zsbuf->format);
+   format_desc = util_format_description(key->zsbuf_format);
    assert(format_desc);
 
    /* Pick the depth type. */
@@ -165,7 +164,7 @@ generate_depth(struct llvmpipe_context *lp,
 #endif
 
    lp_build_depth_test(builder,
-                       state,
+                       &key->depth,
                        dst_type,
                        format_desc,
                        mask,
@@ -212,14 +211,13 @@ generate_fs(struct llvmpipe_context *lp,
    lp_build_mask_begin(&mask, builder, type, *pmask);
 
    early_depth_test =
-      lp->depth_stencil->depth.enabled &&
-      lp->framebuffer.zsbuf &&
-      !lp->depth_stencil->alpha.enabled &&
-      !lp->fs->info.uses_kill &&
-      !lp->fs->info.writes_z;
+      key->depth.enabled &&
+      !key->alpha.enabled &&
+      !shader->info.uses_kill &&
+      !shader->info.writes_z;
 
    if(early_depth_test)
-      generate_depth(lp, builder, &key->depth,
+      generate_depth(builder, key,
                      type, &mask,
                      z, depth_ptr);
 
@@ -268,7 +266,7 @@ generate_fs(struct llvmpipe_context *lp,
    }
 
    if(!early_depth_test)
-      generate_depth(lp, builder, &key->depth,
+      generate_depth(builder, key,
                      type, &mask,
                      z, depth_ptr);
 
@@ -679,7 +677,11 @@ make_variant_key(struct llvmpipe_context *lp,
 
    memset(key, 0, sizeof *key);
 
-   memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth);
+   if(lp->framebuffer.zsbuf &&
+      lp->depth_stencil->depth.enabled) {
+      key->zsbuf_format = lp->framebuffer.zsbuf->format;
+      memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth);
+   }
 
    key->alpha.enabled = lp->depth_stencil->alpha.enabled;
    if(key->alpha.enabled)
-- 
cgit v1.2.3


From abc160b664c3fbd4c18a2cd3402c9a84f5f2d00f Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Wed, 9 Sep 2009 21:17:20 +0100
Subject: llvmpipe: Fix depth mask computation.

Fixes depth test for 24bit depth formats.
---
 src/gallium/drivers/llvmpipe/lp_bld_depth.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 2cd6e6b921..3f88a14b5d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -179,12 +179,13 @@ lp_build_depth_test(LLVMBuilderRef builder,
       padding_right = 0;
       for(chan = 0; chan < z_swizzle; ++chan)
          padding_right += format_desc->channel[chan].size;
-      padding_left = format_desc->block.bits - format_desc->channel[z_swizzle].size;
+      padding_left = format_desc->block.bits -
+                     (padding_right + format_desc->channel[z_swizzle].size);
 
       if(padding_left || padding_right) {
-         const long long mask_left = ((long long)1 << (format_desc->block.bits - padding_left)) - 1;
-         const long long mask_right = ((long long)1 << (padding_right)) - 1;
-         z_bitmask = lp_build_int_const_scalar(type, mask_left & mask_right);
+         const unsigned long long mask_left = ((unsigned long long)1 << (format_desc->block.bits - padding_left)) - 1;
+         const unsigned long long mask_right = ((unsigned long long)1 << (padding_right)) - 1;
+         z_bitmask = lp_build_int_const_scalar(type, mask_left ^ mask_right);
       }
 
       if(padding_left)
-- 
cgit v1.2.3


From 4139bc8f4375c1f8961b281b6303bccaab24367a Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Wed, 9 Sep 2009 21:46:18 +0100
Subject: llvmpipe: Quick hack for 1D textures.

---
 src/gallium/drivers/llvmpipe/lp_bld_sample.h     | 1 +
 src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
index 5798f191fe..6f565af76d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
@@ -53,6 +53,7 @@ struct lp_sampler_static_state
 {
    /* pipe_texture's state */
    enum pipe_format format;
+   unsigned target:2;
    unsigned pot_width:1;
    unsigned pot_height:1;
    unsigned pot_depth:1;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
index 25c8d84501..bcc9e41c50 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -322,6 +322,9 @@ lp_build_sample_soa(LLVMBuilderRef builder,
    height = lp_build_broadcast_scalar(&bld.int_coord_bld, height);
    stride = lp_build_broadcast_scalar(&bld.int_coord_bld, stride);
 
+   if(static_state->target == PIPE_TEXTURE_1D)
+      t = bld.coord_bld.zero;
+
    if(static_state->normalized_coords) {
       LLVMTypeRef coord_vec_type = lp_build_vec_type(bld.coord_type);
       LLVMValueRef fp_width = LLVMBuildSIToFP(builder, width, coord_vec_type, "");
-- 
cgit v1.2.3


From bd3b59da632d85a062accab267e18b66274b857a Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 10 Sep 2009 09:19:51 +0100
Subject: llvmpipe: Copy the texture target into the sampler static state.

Hunk forgotten in previous commit.
---
 src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
index bcc9e41c50..6ebf107cc2 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -61,6 +61,7 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
       return;
 
    state->format            = texture->format;
+   state->target            = texture->target;
    state->pot_width         = util_is_pot(texture->width[0]);
    state->pot_height        = util_is_pot(texture->height[0]);
    state->pot_depth         = util_is_pot(texture->depth[0]);
-- 
cgit v1.2.3


From 8e6b925d2a963a2d5a403e106d7d25e3dcca0775 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 10 Sep 2009 11:44:03 +0100
Subject: llvmpipe: Proper control flow builders.

New control flow helper functions which keep track of all variables
and generate the correct Phi functions.

This re-enables skipping the fs execution of quads masked out by
the rasterizer, early z testing, and kill opcode.

This yields a performance improvement of around 20%.
---
 src/gallium/drivers/llvmpipe/lp_bld_flow.c | 401 +++++++++++++++++++++++++----
 src/gallium/drivers/llvmpipe/lp_bld_flow.h |  38 ++-
 src/gallium/drivers/llvmpipe/lp_state_fs.c |  47 +++-
 3 files changed, 426 insertions(+), 60 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c
index 9d99e1a9d9..1252593226 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c
@@ -32,59 +32,261 @@
  */
 
 #include "util/u_debug.h"
+#include "util/u_memory.h"
 
 #include "lp_bld_type.h"
 #include "lp_bld_flow.h"
 
 
+#define LP_BUILD_FLOW_MAX_VARIABLES 32
+#define LP_BUILD_FLOW_MAX_DEPTH 32
+
+
+/**
+ * Enumeration of all possible flow constructs.
+ */
+enum lp_build_flow_construct_kind {
+   lP_BUILD_FLOW_SCOPE,
+   LP_BUILD_FLOW_SKIP,
+};
+
+
+/**
+ * Variable declaration scope.
+ */
+struct lp_build_flow_scope
+{
+   /** Number of variables declared in this scope */
+   unsigned num_variables;
+};
+
+
+/**
+ * Early exit. Useful to skip to the end of a function or block when
+ * the execution mask becomes zero or when there is an error condition.
+ */
+struct lp_build_flow_skip
+{
+   /** Block to skip to */
+   LLVMBasicBlockRef block;
+
+   /** Number of variables declared at the beginning */
+   unsigned num_variables;
+
+   LLVMValueRef *phi;
+};
+
+
+/**
+ * Union of all possible flow constructs' data
+ */
+union lp_build_flow_construct_data
+{
+   struct lp_build_flow_scope scope;
+   struct lp_build_flow_skip skip;
+};
+
+
+/**
+ * Element of the flow construct stack.
+ */
+struct lp_build_flow_construct
+{
+   enum lp_build_flow_construct_kind kind;
+   union lp_build_flow_construct_data data;
+};
+
+
+/**
+ * All necessary data to generate LLVM control flow constructs.
+ *
+ * Besides keeping track of the control flow construct themselves we also
+ * need to keep track of variables in order to generate SSA Phi values.
+ */
+struct lp_build_flow_context
+{
+   LLVMBuilderRef builder;
+
+   /**
+    * Control flow stack.
+    */
+   struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH];
+   unsigned num_constructs;
+
+   /**
+    * Variable stack
+    */
+   LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES];
+   unsigned num_variables;
+};
+
+
+struct lp_build_flow_context *
+lp_build_flow_create(LLVMBuilderRef builder)
+{
+   struct lp_build_flow_context *flow;
+
+   flow = CALLOC_STRUCT(lp_build_flow_context);
+   if(!flow)
+      return NULL;
+
+   flow->builder = builder;
+
+   return flow;
+}
+
+
 void
-lp_build_mask_begin(struct lp_build_mask_context *mask,
-                    LLVMBuilderRef builder,
-                    union lp_type type,
-                    LLVMValueRef value)
+lp_build_flow_destroy(struct lp_build_flow_context *flow)
 {
-   memset(mask, 0, sizeof *mask);
+   assert(flow->num_constructs == 0);
+   assert(flow->num_variables == 0);
+   FREE(flow);
+}
 
-   mask->builder = builder;
-   mask->reg_type = LLVMIntType(type.width * type.length);
-   mask->value = value;
+
+static union lp_build_flow_construct_data *
+lp_build_flow_push(struct lp_build_flow_context *flow,
+                   enum lp_build_flow_construct_kind kind)
+{
+   assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH);
+   if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH)
+      return NULL;
+
+   flow->constructs[flow->num_constructs].kind = kind;
+   return &flow->constructs[flow->num_constructs++].data;
+}
+
+
+static union lp_build_flow_construct_data *
+lp_build_flow_peek(struct lp_build_flow_context *flow,
+                   enum lp_build_flow_construct_kind kind)
+{
+   assert(flow->num_constructs);
+   if(!flow->num_constructs)
+      return NULL;
+
+   assert(flow->constructs[flow->num_constructs - 1].kind == kind);
+   if(flow->constructs[flow->num_constructs - 1].kind != kind)
+      return NULL;
+
+   return &flow->constructs[flow->num_constructs - 1].data;
+}
+
+
+static union lp_build_flow_construct_data *
+lp_build_flow_pop(struct lp_build_flow_context *flow,
+                  enum lp_build_flow_construct_kind kind)
+{
+   assert(flow->num_constructs);
+   if(!flow->num_constructs)
+      return NULL;
+
+   assert(flow->constructs[flow->num_constructs - 1].kind == kind);
+   if(flow->constructs[flow->num_constructs - 1].kind != kind)
+      return NULL;
+
+   return &flow->constructs[--flow->num_constructs].data;
 }
 
 
+/**
+ * Begin a variable scope.
+ *
+ *
+ */
 void
-lp_build_mask_update(struct lp_build_mask_context *mask,
-                     LLVMValueRef value)
+lp_build_flow_scope_begin(struct lp_build_flow_context *flow)
 {
+   struct lp_build_flow_scope *scope;
 
-   LLVMValueRef cond;
-   LLVMBasicBlockRef current_block;
-   LLVMBasicBlockRef next_block;
-   LLVMBasicBlockRef new_block;
+   scope = &lp_build_flow_push(flow, lP_BUILD_FLOW_SCOPE)->scope;
+   if(!scope)
+      return;
 
-   if(mask->value)
-      mask->value = LLVMBuildAnd(mask->builder, mask->value, value, "");
-   else
-      mask->value = value;
+   scope->num_variables = 0;
+}
 
-   /* FIXME: disabled until we have proper control flow helpers */
-#if 0
-   cond = LLVMBuildICmp(mask->builder,
-                        LLVMIntEQ,
-                        LLVMBuildBitCast(mask->builder, mask->value, mask->reg_type, ""),
-                        LLVMConstNull(mask->reg_type),
-                        "");
 
-   current_block = LLVMGetInsertBlock(mask->builder);
+/**
+ * Declare a variable.
+ *
+ * A variable is a named entity which can have different LLVMValueRef's at
+ * different points of the program. This is relevant for control flow because
+ * when there are mutiple branches to a same location we need to replace
+ * the variable's value with a Phi function as explained in
+ * http://en.wikipedia.org/wiki/Static_single_assignment_form .
+ *
+ * We keep track of variables by keeping around a pointer to where their
+ * current.
+ *
+ * There are a few cautions to observe:
+ *
+ * - Variable's value must not be NULL. If there is no initial value then
+ *   LLVMGetUndef() should be used.
+ *
+ * - Variable's value must be kept up-to-date. If the variable is going to be
+ *   modified by a function then a pointer should be passed so that its value
+ *   is accurate. Failure to do this will cause some of the variables'
+ *   transient values to be lost, leading to wrong results.
+ *
+ * - A program should be written from top to bottom, by always appending
+ *   instructions to the bottom with a single LLVMBuilderRef. Inserting and/or
+ *   modifying existing statements will most likely lead to wrong results.
+ *
+ */
+void
+lp_build_flow_scope_declare(struct lp_build_flow_context *flow,
+                            LLVMValueRef *variable)
+{
+   struct lp_build_flow_scope *scope;
+
+   scope = &lp_build_flow_peek(flow, lP_BUILD_FLOW_SCOPE)->scope;
+   if(!scope)
+      return;
+
+   assert(*variable);
+   if(!*variable)
+      return;
+
+   assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES);
+   if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES)
+      return;
+
+   flow->variables[flow->num_variables++] = variable;
+   ++scope->num_variables;
+}
 
-   if(!mask->skip_block) {
-      LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
-      mask->skip_block = LLVMAppendBasicBlock(function, "skip");
 
-      mask->phi = LLVMBuildPhi(mask->builder, LLVMTypeOf(mask->value), "");
+void
+lp_build_flow_scope_end(struct lp_build_flow_context *flow)
+{
+   struct lp_build_flow_scope *scope;
+
+   scope = &lp_build_flow_pop(flow, lP_BUILD_FLOW_SCOPE)->scope;
+   if(!scope)
+      return;
+
+   assert(flow->num_variables >= scope->num_variables);
+   if(flow->num_variables < scope->num_variables) {
+      flow->num_variables = 0;
+      return;
    }
 
+   flow->num_variables -= scope->num_variables;
+}
+
+
+static LLVMBasicBlockRef
+lp_build_flow_insert_block(struct lp_build_flow_context *flow)
+{
+   LLVMBasicBlockRef current_block;
+   LLVMBasicBlockRef next_block;
+   LLVMBasicBlockRef new_block;
+
+   current_block = LLVMGetInsertBlock(flow->builder);
+
    next_block = LLVMGetNextBasicBlock(current_block);
-   assert(next_block);
    if(next_block) {
       new_block = LLVMInsertBasicBlock(next_block, "");
    }
@@ -93,30 +295,139 @@ lp_build_mask_update(struct lp_build_mask_context *mask,
       new_block = LLVMAppendBasicBlock(function, "");
    }
 
-   LLVMAddIncoming(mask->phi, &mask->value, &current_block, 1);
-   LLVMBuildCondBr(mask->builder, cond, mask->skip_block, new_block);
+   return new_block;
+}
+
+void
+lp_build_flow_skip_begin(struct lp_build_flow_context *flow)
+{
+   struct lp_build_flow_skip *skip;
+   LLVMBuilderRef builder;
+   unsigned i;
+
+   skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip;
+   if(!skip)
+      return;
+
+   skip->block = lp_build_flow_insert_block(flow);
+   skip->num_variables = flow->num_variables;
+   if(!skip->num_variables) {
+      skip->phi = NULL;
+      return;
+   }
+
+   skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi);
+   if(!skip->phi) {
+      skip->num_variables = 0;
+      return;
+   }
+
+   builder = LLVMCreateBuilder();
+   LLVMPositionBuilderAtEnd(builder, skip->block);
 
-   LLVMPositionBuilderAtEnd(mask->builder, new_block);
-#endif
+   for(i = 0; i < skip->num_variables; ++i)
+      skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), "");
+
+   LLVMDisposeBuilder(builder);
 }
 
 
-LLVMValueRef
-lp_build_mask_end(struct lp_build_mask_context *mask)
+void
+lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow,
+                              LLVMValueRef cond)
+{
+   struct lp_build_flow_skip *skip;
+   LLVMBasicBlockRef current_block;
+   LLVMBasicBlockRef new_block;
+   unsigned i;
+
+   skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip;
+   if(!skip)
+      return;
+
+   current_block = LLVMGetInsertBlock(flow->builder);
+
+   new_block = lp_build_flow_insert_block(flow);
+
+   for(i = 0; i < skip->num_variables; ++i) {
+      assert(*flow->variables[i]);
+      LLVMAddIncoming(skip->phi[i], flow->variables[i], &current_block, 1);
+   }
+
+   LLVMBuildCondBr(flow->builder, cond, skip->block, new_block);
+
+   LLVMPositionBuilderAtEnd(flow->builder, new_block);
+ }
+
+
+void
+lp_build_flow_skip_end(struct lp_build_flow_context *flow)
 {
-   if(mask->skip_block) {
-      LLVMBasicBlockRef current_block = LLVMGetInsertBlock(mask->builder);
+   struct lp_build_flow_skip *skip;
+   LLVMBasicBlockRef current_block;
+   unsigned i;
 
-      LLVMAddIncoming(mask->phi, &mask->value, &current_block, 1);
-      LLVMBuildBr(mask->builder, mask->skip_block);
+   skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip;
+   if(!skip)
+      return;
 
-      LLVMPositionBuilderAtEnd(mask->builder, mask->skip_block);
+   current_block = LLVMGetInsertBlock(flow->builder);
 
-      mask->value = mask->phi;
-      mask->phi = NULL;
-      mask->skip_block = NULL;
+   for(i = 0; i < skip->num_variables; ++i) {
+      assert(*flow->variables[i]);
+      LLVMAddIncoming(skip->phi[i], flow->variables[i], &current_block, 1);
+      *flow->variables[i] = skip->phi[i];
    }
 
+   LLVMBuildBr(flow->builder, skip->block);
+   LLVMPositionBuilderAtEnd(flow->builder, skip->block);
+
+   FREE(skip->phi);
+}
+
+
+void
+lp_build_mask_begin(struct lp_build_mask_context *mask,
+                    struct lp_build_flow_context *flow,
+                    union lp_type type,
+                    LLVMValueRef value)
+{
+   memset(mask, 0, sizeof *mask);
+
+   mask->flow = flow;
+   mask->reg_type = LLVMIntType(type.width * type.length);
+   mask->value = value;
+
+   lp_build_flow_scope_begin(flow);
+   lp_build_flow_scope_declare(flow, &mask->value);
+   lp_build_flow_skip_begin(flow);
+}
+
+
+void
+lp_build_mask_update(struct lp_build_mask_context *mask,
+                     LLVMValueRef value)
+{
+   LLVMBuilderRef builder = mask->flow->builder;
+   LLVMValueRef cond;
+
+   mask->value = LLVMBuildAnd(builder, mask->value, value, "");
+
+   cond = LLVMBuildICmp(builder,
+                        LLVMIntEQ,
+                        LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""),
+                        LLVMConstNull(mask->reg_type),
+                        "");
+
+   lp_build_flow_skip_cond_break(mask->flow, cond);
+}
+
+
+LLVMValueRef
+lp_build_mask_end(struct lp_build_mask_context *mask)
+{
+   lp_build_flow_skip_end(mask->flow);
+   lp_build_flow_scope_end(mask->flow);
    return mask->value;
 }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/drivers/llvmpipe/lp_bld_flow.h
index 1b634ff038..9d76e3064d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_flow.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.h
@@ -41,23 +41,49 @@
 union lp_type;
 
 
+struct lp_build_flow_context;
+
+
+struct lp_build_flow_context *
+lp_build_flow_create(LLVMBuilderRef builder);
+
+void
+lp_build_flow_destroy(struct lp_build_flow_context *flow);
+
+void
+lp_build_flow_scope_begin(struct lp_build_flow_context *flow);
+
+void
+lp_build_flow_scope_declare(struct lp_build_flow_context *flow,
+                            LLVMValueRef *variable);
+
+void
+lp_build_flow_scope_end(struct lp_build_flow_context *flow);
+
+void
+lp_build_flow_skip_begin(struct lp_build_flow_context *flow);
+
+void
+lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow,
+                              LLVMValueRef cond);
+
+void
+lp_build_flow_skip_end(struct lp_build_flow_context *flow);
+
+
 struct lp_build_mask_context
 {
-   LLVMBuilderRef builder;
+   struct lp_build_flow_context *flow;
 
    LLVMTypeRef reg_type;
 
    LLVMValueRef value;
-
-   LLVMValueRef phi;
-
-   LLVMBasicBlockRef skip_block;
 };
 
 
 void
 lp_build_mask_begin(struct lp_build_mask_context *mask,
-                    LLVMBuilderRef builder,
+                    struct lp_build_flow_context *flow,
                     union lp_type type,
                     LLVMValueRef value);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 21e675c34e..2e60da60cd 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -197,6 +197,7 @@ generate_fs(struct llvmpipe_context *lp,
    LLVMValueRef consts_ptr;
    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
    LLVMValueRef z = interp->pos[2];
+   struct lp_build_flow_context *flow;
    struct lp_build_mask_context mask;
    boolean early_depth_test;
    unsigned attrib;
@@ -208,7 +209,33 @@ generate_fs(struct llvmpipe_context *lp,
 
    consts_ptr = lp_jit_context_constants(builder, context_ptr);
 
-   lp_build_mask_begin(&mask, builder, type, *pmask);
+   flow = lp_build_flow_create(builder);
+
+   memset(outputs, 0, sizeof outputs);
+
+   lp_build_flow_scope_begin(flow);
+
+   /* Declare the color and z variables */
+   for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
+      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+         boolean declare = FALSE;
+         switch (shader->info.output_semantic_name[attrib]) {
+         case TGSI_SEMANTIC_COLOR:
+            declare = TRUE;
+            break;
+         case TGSI_SEMANTIC_POSITION:
+            if(chan == 2)
+               declare = TRUE;
+            break;
+         }
+         if(declare) {
+            outputs[attrib][chan] = LLVMGetUndef(vec_type);
+            lp_build_flow_scope_declare(flow, &outputs[attrib][chan]);
+         }
+      }
+   }
+
+   lp_build_mask_begin(&mask, flow, type, *pmask);
 
    early_depth_test =
       key->depth.enabled &&
@@ -221,12 +248,19 @@ generate_fs(struct llvmpipe_context *lp,
                      type, &mask,
                      z, depth_ptr);
 
-   memset(outputs, 0, sizeof outputs);
-
    lp_build_tgsi_soa(builder, tokens, type, &mask,
                      consts_ptr, interp->pos, interp->inputs,
                      outputs, sampler);
 
+   if(!early_depth_test)
+      generate_depth(builder, key,
+                     type, &mask,
+                     z, depth_ptr);
+
+   lp_build_mask_end(&mask);
+
+   lp_build_flow_scope_end(flow);
+
    for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
       for(chan = 0; chan < NUM_CHANNELS; ++chan) {
          if(outputs[attrib][chan]) {
@@ -265,12 +299,7 @@ generate_fs(struct llvmpipe_context *lp,
       }
    }
 
-   if(!early_depth_test)
-      generate_depth(builder, key,
-                     type, &mask,
-                     z, depth_ptr);
-
-   lp_build_mask_end(&mask);
+   lp_build_flow_destroy(flow);
 
    *pmask = mask.value;
 
-- 
cgit v1.2.3


From c3c80c5c22f9ce7fabba90daa5d5142e5fb1c012 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 10 Sep 2009 12:01:42 +0100
Subject: llvmpipe: Skip blending when mask is zero.

This increases quake3 timedemo fps another 10%.
---
 src/gallium/drivers/llvmpipe/lp_bld_flow.c | 31 +++++++++++++++++++-----------
 src/gallium/drivers/llvmpipe/lp_state_fs.c | 12 ++++++++++--
 2 files changed, 30 insertions(+), 13 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c
index 1252593226..69ed014ff3 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c
@@ -386,6 +386,22 @@ lp_build_flow_skip_end(struct lp_build_flow_context *flow)
 }
 
 
+static void
+lp_build_mask_check(struct lp_build_mask_context *mask)
+{
+   LLVMBuilderRef builder = mask->flow->builder;
+   LLVMValueRef cond;
+
+   cond = LLVMBuildICmp(builder,
+                        LLVMIntEQ,
+                        LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""),
+                        LLVMConstNull(mask->reg_type),
+                        "");
+
+   lp_build_flow_skip_cond_break(mask->flow, cond);
+}
+
+
 void
 lp_build_mask_begin(struct lp_build_mask_context *mask,
                     struct lp_build_flow_context *flow,
@@ -401,6 +417,8 @@ lp_build_mask_begin(struct lp_build_mask_context *mask,
    lp_build_flow_scope_begin(flow);
    lp_build_flow_scope_declare(flow, &mask->value);
    lp_build_flow_skip_begin(flow);
+
+   lp_build_mask_check(mask);
 }
 
 
@@ -408,18 +426,9 @@ void
 lp_build_mask_update(struct lp_build_mask_context *mask,
                      LLVMValueRef value)
 {
-   LLVMBuilderRef builder = mask->flow->builder;
-   LLVMValueRef cond;
-
-   mask->value = LLVMBuildAnd(builder, mask->value, value, "");
-
-   cond = LLVMBuildICmp(builder,
-                        LLVMIntEQ,
-                        LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""),
-                        LLVMConstNull(mask->reg_type),
-                        "");
+   mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, "");
 
-   lp_build_flow_skip_cond_break(mask->flow, cond);
+   lp_build_mask_check(mask);
 }
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 2e60da60cd..354d9916c7 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -319,6 +319,8 @@ generate_blend(const struct pipe_blend_state *blend,
                LLVMValueRef dst_ptr)
 {
    struct lp_build_context bld;
+   struct lp_build_flow_context *flow;
+   struct lp_build_mask_context mask_ctx;
    LLVMTypeRef vec_type;
    LLVMTypeRef int_vec_type;
    LLVMValueRef const_ptr;
@@ -327,11 +329,14 @@ generate_blend(const struct pipe_blend_state *blend,
    LLVMValueRef res[4];
    unsigned chan;
 
+   lp_build_context_init(&bld, builder, type);
+
+   flow = lp_build_flow_create(builder);
+   lp_build_mask_begin(&mask_ctx, flow, type, mask);
+
    vec_type = lp_build_vec_type(type);
    int_vec_type = lp_build_int_vec_type(type);
 
-   lp_build_context_init(&bld, builder, type);
-
    const_ptr = lp_jit_context_blend_color(builder, context_ptr);
    const_ptr = LLVMBuildBitCast(builder, const_ptr,
                                 LLVMPointerType(vec_type, 0), "");
@@ -354,6 +359,9 @@ generate_blend(const struct pipe_blend_state *blend,
       res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]);
       LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, ""));
    }
+
+   lp_build_mask_end(&mask_ctx);
+   lp_build_flow_destroy(flow);
 }
 
 
-- 
cgit v1.2.3


From 48f19c0bcdc56d33ef2873eeabe635380764e968 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 10 Sep 2009 12:14:53 +0100
Subject: llvmpipe: Fix sampling from depth textures. Respect texture compare
 func.

Fixes Mesa shadowtex sample.
---
 src/gallium/drivers/llvmpipe/lp_bld_format_soa.c |  53 ++++++----
 src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 118 +++++++++++++++++------
 2 files changed, 122 insertions(+), 49 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
index 36bac06d2e..569e8d10a3 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
@@ -83,6 +83,30 @@ lp_build_gather(LLVMBuilderRef builder,
 }
 
 
+static LLVMValueRef
+lp_build_format_swizzle(union lp_type type,
+                        const LLVMValueRef *inputs,
+                        enum util_format_swizzle swizzle)
+{
+   switch (swizzle) {
+   case UTIL_FORMAT_SWIZZLE_X:
+   case UTIL_FORMAT_SWIZZLE_Y:
+   case UTIL_FORMAT_SWIZZLE_Z:
+   case UTIL_FORMAT_SWIZZLE_W:
+      return inputs[swizzle];
+   case UTIL_FORMAT_SWIZZLE_0:
+      return lp_build_zero(type);
+   case UTIL_FORMAT_SWIZZLE_1:
+      return lp_build_one(type);
+   case UTIL_FORMAT_SWIZZLE_NONE:
+      return lp_build_undef(type);
+   default:
+      assert(0);
+      return lp_build_undef(type);
+   }
+}
+
+
 void
 lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
                          const struct util_format_description *format_desc,
@@ -146,25 +170,16 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
       start = stop;
    }
 
-   for (chan = 0; chan < 4; ++chan) {
-      enum util_format_swizzle swizzle = format_desc->swizzle[chan];
-
-      switch (swizzle) {
-      case UTIL_FORMAT_SWIZZLE_X:
-      case UTIL_FORMAT_SWIZZLE_Y:
-      case UTIL_FORMAT_SWIZZLE_Z:
-      case UTIL_FORMAT_SWIZZLE_W:
-         rgba[chan] = inputs[swizzle];
-         break;
-      case UTIL_FORMAT_SWIZZLE_0:
-         rgba[chan] = lp_build_zero(type);
-         break;
-      case UTIL_FORMAT_SWIZZLE_1:
-         rgba[chan] = lp_build_one(type);
-         break;
-      case UTIL_FORMAT_SWIZZLE_NONE:
-         rgba[chan] = lp_build_undef(type);
-         break;
+   if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+      enum util_format_swizzle swizzle = format_desc->swizzle[0];
+      LLVMValueRef depth = lp_build_format_swizzle(type, inputs, swizzle);
+      rgba[2] = rgba[1] = rgba[0] = depth;
+      rgba[3] = lp_build_one(type);
+   }
+   else {
+      for (chan = 0; chan < 4; ++chan) {
+         enum util_format_swizzle swizzle = format_desc->swizzle[chan];
+         rgba[chan] = lp_build_format_swizzle(type, inputs, swizzle);
       }
    }
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
index 6ebf107cc2..2913b17d3f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -42,6 +42,7 @@
 #include "lp_bld_type.h"
 #include "lp_bld_const.h"
 #include "lp_bld_arit.h"
+#include "lp_bld_logic.h"
 #include "lp_bld_swizzle.h"
 #include "lp_bld_format.h"
 #include "lp_bld_sample.h"
@@ -72,8 +73,10 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
    state->min_img_filter    = sampler->min_img_filter;
    state->min_mip_filter    = sampler->min_mip_filter;
    state->mag_img_filter    = sampler->mag_img_filter;
-   state->compare_mode      = sampler->compare_mode;
-   state->compare_func      = sampler->compare_func;
+   if(sampler->compare_mode) {
+      state->compare_mode      = sampler->compare_mode;
+      state->compare_func      = sampler->compare_func;
+   }
    state->normalized_coords = sampler->normalized_coords;
    state->prefilter         = sampler->prefilter;
 }
@@ -115,17 +118,52 @@ lp_build_sample_texel(struct lp_build_sample_context *bld,
                       LLVMValueRef data_ptr,
                       LLVMValueRef *texel)
 {
+   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    LLVMValueRef x_stride;
-   LLVMValueRef x_offset;
-   LLVMValueRef y_offset;
    LLVMValueRef offset;
 
    x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8);
 
-   x_offset = lp_build_mul(&bld->int_coord_bld, x, x_stride);
-   y_offset = lp_build_mul(&bld->int_coord_bld, y, y_stride);
+   if(bld->format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+      LLVMValueRef x_lo, x_hi;
+      LLVMValueRef y_lo, y_hi;
+      LLVMValueRef x_stride_lo, x_stride_hi;
+      LLVMValueRef y_stride_lo, y_stride_hi;
+      LLVMValueRef x_offset_lo, x_offset_hi;
+      LLVMValueRef y_offset_lo, y_offset_hi;
+      LLVMValueRef offset_lo, offset_hi;
+
+      x_lo = LLVMBuildAnd(bld->builder, x, int_coord_bld->one, "");
+      y_lo = LLVMBuildAnd(bld->builder, y, int_coord_bld->one, "");
+
+      x_hi = LLVMBuildLShr(bld->builder, x, int_coord_bld->one, "");
+      y_hi = LLVMBuildLShr(bld->builder, y, int_coord_bld->one, "");
+
+      x_stride_lo = x_stride;
+      y_stride_lo = lp_build_const_scalar(bld->int_coord_type, 2*bld->format_desc->block.bits/8);
+
+      x_stride_hi = lp_build_const_scalar(bld->int_coord_type, 4*bld->format_desc->block.bits/8);
+      y_stride_hi = LLVMBuildShl(bld->builder, y_stride, int_coord_bld->one, "");
+
+      x_offset_lo = lp_build_mul(int_coord_bld, x_lo, x_stride_lo);
+      y_offset_lo = lp_build_mul(int_coord_bld, y_lo, y_stride_lo);
+      offset_lo = lp_build_add(int_coord_bld, x_offset_lo, y_offset_lo);
 
-   offset = lp_build_add(&bld->int_coord_bld, x_offset, y_offset);
+      x_offset_hi = lp_build_mul(int_coord_bld, x_hi, x_stride_hi);
+      y_offset_hi = lp_build_mul(int_coord_bld, y_hi, y_stride_hi);
+      offset_hi = lp_build_add(int_coord_bld, x_offset_hi, y_offset_hi);
+
+      offset = lp_build_add(int_coord_bld, offset_hi, offset_lo);
+   }
+   else {
+      LLVMValueRef x_offset;
+      LLVMValueRef y_offset;
+
+      x_offset = lp_build_mul(int_coord_bld, x, x_stride);
+      y_offset = lp_build_mul(int_coord_bld, y, y_stride);
+
+      offset = lp_build_add(int_coord_bld, x_offset, y_offset);
+   }
 
    lp_build_load_rgba_soa(bld->builder,
                           bld->format_desc,
@@ -249,30 +287,48 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
 
    /* TODO: Don't interpolate missing channels */
    for(chan = 0; chan < 4; ++chan) {
-      switch(bld->format_desc->swizzle[chan]) {
-      case UTIL_FORMAT_SWIZZLE_X:
-      case UTIL_FORMAT_SWIZZLE_Y:
-      case UTIL_FORMAT_SWIZZLE_Z:
-      case UTIL_FORMAT_SWIZZLE_W:
-         texel[chan] = lp_build_lerp_2d(&bld->texel_bld,
-                                        s_fpart, t_fpart,
-                                        neighbors[0][0][chan],
-                                        neighbors[0][1][chan],
-                                        neighbors[1][0][chan],
-                                        neighbors[1][1][chan]);
-         break;
-      case UTIL_FORMAT_SWIZZLE_0:
-         texel[chan] = bld->texel_bld.zero;
-         break;
-      case UTIL_FORMAT_SWIZZLE_1:
-         texel[chan] = bld->texel_bld.one;
-         break;
-      default:
-         assert(0);
-         texel[chan] = bld->texel_bld.undef;
-         break;
-      }
+      texel[chan] = lp_build_lerp_2d(&bld->texel_bld,
+                                     s_fpart, t_fpart,
+                                     neighbors[0][0][chan],
+                                     neighbors[0][1][chan],
+                                     neighbors[1][0][chan],
+                                     neighbors[1][1][chan]);
+   }
+}
+
+
+static void
+lp_build_sample_compare(struct lp_build_sample_context *bld,
+                        LLVMValueRef p,
+                        LLVMValueRef *texel)
+{
+   struct lp_build_context *texel_bld = &bld->texel_bld;
+   LLVMValueRef res;
+   unsigned chan;
+
+   if(!bld->static_state->compare_mode)
+      return;
+
+   /* TODO: Compare before swizzling, to avoid redundant computations */
+   res = NULL;
+   for(chan = 0; chan < 4; ++chan) {
+      LLVMValueRef cmp;
+      cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]);
+      cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero);
+
+      if(res)
+         res = lp_build_add(texel_bld, res, cmp);
+      else
+         res = cmp;
    }
+
+   assert(res);
+   res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25));
+
+   /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
+   for(chan = 0; chan < 3; ++chan)
+      texel[chan] = res;
+   texel[3] = texel_bld->one;
 }
 
 
@@ -343,4 +399,6 @@ lp_build_sample_soa(LLVMBuilderRef builder,
       lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel);
       break;
    }
+
+   lp_build_sample_compare(&bld, p, texel);
 }
-- 
cgit v1.2.3


From 4c3a48ad0cb36e6d8601535b91f83caed0d07570 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 10 Sep 2009 12:37:44 +0100
Subject: llvmpipe: Mask out color channels not present in the color buffer.

---
 src/gallium/drivers/llvmpipe/lp_state_fs.c | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 354d9916c7..ccbccc813f 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -354,10 +354,12 @@ generate_blend(const struct pipe_blend_state *blend,
    lp_build_blend_soa(builder, blend, type, src, dst, con, res);
 
    for(chan = 0; chan < 4; ++chan) {
-      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
-      lp_build_name(res[chan], "res.%c", "rgba"[chan]);
-      res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]);
-      LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, ""));
+      if(blend->colormask & (1 << chan)) {
+         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
+         lp_build_name(res[chan], "res.%c", "rgba"[chan]);
+         res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]);
+         LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, ""));
+      }
    }
 
    lp_build_mask_end(&mask_ctx);
@@ -725,7 +727,23 @@ make_variant_key(struct llvmpipe_context *lp,
       key->alpha.func = lp->depth_stencil->alpha.func;
    /* alpha.ref_value is passed in jit_context */
 
-   memcpy(&key->blend, lp->blend, sizeof key->blend);
+   if(lp->framebuffer.cbufs[0]) {
+      const struct util_format_description *format_desc;
+      unsigned chan;
+
+      memcpy(&key->blend, lp->blend, sizeof key->blend);
+
+      format_desc = util_format_description(lp->framebuffer.cbufs[0]->format);
+      assert(format_desc->layout == UTIL_FORMAT_COLORSPACE_RGB ||
+             format_desc->layout == UTIL_FORMAT_COLORSPACE_SRGB);
+
+      /* mask out color channels not present in the color buffer */
+      for(chan = 0; chan < 4; ++chan) {
+         enum util_format_swizzle swizzle = format_desc->swizzle[chan];
+         if(swizzle > 4)
+            key->blend.colormask &= ~(1 << chan);
+      }
+   }
 
    for(i = 0; i < PIPE_MAX_SAMPLERS; ++i)
       if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i))
-- 
cgit v1.2.3


From 6a405b4a21ac1fa45a93da37ce6b95d98d17f0e2 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 10 Sep 2009 13:35:39 +0100
Subject: llvmpipe: Fix alpha test.

---
 src/gallium/drivers/llvmpipe/lp_state_fs.c | 39 ++++++++++--------------------
 1 file changed, 13 insertions(+), 26 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index ccbccc813f..618cf1ffb8 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -216,24 +216,11 @@ generate_fs(struct llvmpipe_context *lp,
    lp_build_flow_scope_begin(flow);
 
    /* Declare the color and z variables */
-   for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
-      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
-         boolean declare = FALSE;
-         switch (shader->info.output_semantic_name[attrib]) {
-         case TGSI_SEMANTIC_COLOR:
-            declare = TRUE;
-            break;
-         case TGSI_SEMANTIC_POSITION:
-            if(chan == 2)
-               declare = TRUE;
-            break;
-         }
-         if(declare) {
-            outputs[attrib][chan] = LLVMGetUndef(vec_type);
-            lp_build_flow_scope_declare(flow, &outputs[attrib][chan]);
-         }
-      }
+   for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+      color[chan] = LLVMGetUndef(vec_type);
+      lp_build_flow_scope_declare(flow, &color[chan]);
    }
+   lp_build_flow_scope_declare(flow, &z);
 
    lp_build_mask_begin(&mask, flow, type, *pmask);
 
@@ -252,15 +239,6 @@ generate_fs(struct llvmpipe_context *lp,
                      consts_ptr, interp->pos, interp->inputs,
                      outputs, sampler);
 
-   if(!early_depth_test)
-      generate_depth(builder, key,
-                     type, &mask,
-                     z, depth_ptr);
-
-   lp_build_mask_end(&mask);
-
-   lp_build_flow_scope_end(flow);
-
    for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
       for(chan = 0; chan < NUM_CHANNELS; ++chan) {
          if(outputs[attrib][chan]) {
@@ -299,6 +277,15 @@ generate_fs(struct llvmpipe_context *lp,
       }
    }
 
+   if(!early_depth_test)
+      generate_depth(builder, key,
+                     type, &mask,
+                     z, depth_ptr);
+
+   lp_build_mask_end(&mask);
+
+   lp_build_flow_scope_end(flow);
+
    lp_build_flow_destroy(flow);
 
    *pmask = mask.value;
-- 
cgit v1.2.3


From 1fc41002252419f4688c24ea8c3814553b3d76ad Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 11 Sep 2009 11:24:00 +0100
Subject: llvmpipe: Update status in README and TODO/FIXME comments throughout
 the code.

---
 src/gallium/drivers/llvmpipe/README              | 23 +++++++++++++++--------
 src/gallium/drivers/llvmpipe/lp_bld_conv.c       |  4 ++--
 src/gallium/drivers/llvmpipe/lp_bld_depth.c      |  1 +
 src/gallium/drivers/llvmpipe/lp_bld_logic.c      | 11 ++++-------
 src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c |  7 +++++++
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c   | 19 ++++++-------------
 src/gallium/drivers/llvmpipe/lp_context.c        |  2 --
 src/gallium/drivers/llvmpipe/lp_jit.h            |  3 +--
 src/gallium/drivers/llvmpipe/lp_setup.c          |  2 --
 9 files changed, 36 insertions(+), 36 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README
index 498d21dea6..89d08834a3 100644
--- a/src/gallium/drivers/llvmpipe/README
+++ b/src/gallium/drivers/llvmpipe/README
@@ -8,13 +8,16 @@ Done so far is:
 
  - the whole fragment pipeline is code generated in a single function
  
+   - input interpolation
+   
    - depth testing
  
+   - texture sampling (not all state/formats are supported) 
+   
    - fragment shader TGSI translation
      - same level of support as the TGSI SSE2 exec machine, with the exception
        we don't fallback to TGSI interpretation when an unsupported opcode is
        found, but just ignore it
-     - texture sampling via an intrinsic call
      - done in SoA layout
      - input interpolation also code generated
  
@@ -28,16 +31,17 @@ Done so far is:
      any width and length
    - not all operations are implemented for these types yet though
 
-Most mesa/progs/demos/* work. Speed is on par with Keith's softpipe-opt branch,
-which includes hand written fast implementations for common cases.
+Most mesa/progs/demos/* work. 
 
 To do (probably by this order):
 
  - code generate stipple and stencil testing
 
- - code generate texture sampling
+ - translate the remaining bits of texture sampling state
 
  - translate TGSI control flow instructions, and all other remaining opcodes
+ 
+ - integrate with the draw module for VS code generation
 
  - code generate the triangle setup and rasterization
 
@@ -93,7 +97,7 @@ Alternatively, you can build it with GNU make, if you prefer, by invoking it as
 
   make linux-llvm
 
-but the rest of these instructions assume scons is used.
+but the rest of these instructions assume that scons is used.
 
 
 Using
@@ -108,6 +112,9 @@ or
 
   export LD_LIBRARY_PATH=$PWD/build/linux-x86-debug/lib:$LD_LIBRARY_PATH
 
+For performance evaluation pass debug=no to scons, and use the corresponding
+lib directory without the "-debug" suffix.
+
 
 Unit testing
 ============
@@ -119,7 +126,7 @@ build/linux-???-debug/gallium/drivers/llvmpipe:
  - lp_test_conv: SIMD vector conversion
  - lp_test_format: pixel unpacking/packing
 
-Some of this tests can output results and benchmarks to a tab-seperated-file
+Some of this tests can output results and benchmarks to a tab-separated-file
 for posterior analysis, e.g.:
 
   build/linux-x86_64-debug/gallium/drivers/llvmpipe/lp_test_blend -o blend.tsv
@@ -133,10 +140,10 @@ Development Notes
   at the top of the lp_bld_*.c functions.  
 
 - All lp_bld_*.[ch] are isolated from the rest of the driver, and could/may be 
-  put in a standalone Gallium state -> LLVM IR translation module.
+  put in a stand-alone Gallium state -> LLVM IR translation module.
 
 - We use LLVM-C bindings for now. They are not documented, but follow the C++
   interfaces very closely, and appear to be complete enough for code
   generation. See 
   http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html
-  for a standalone example.
+  for a stand-alone example.
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
index c8954c8a34..cd01496d54 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
@@ -122,7 +122,7 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
       int shift = dst_width - n;
       res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), "");
 
-      /* Fill in the empty lower bits for added precision? */
+      /* TODO: Fill in the empty lower bits for additional precision? */
 #if 0
       {
          LLVMValueRef msb;
@@ -244,7 +244,7 @@ lp_build_const_pack_shuffle(unsigned n)
  * Expand the bit width.
  *
  * This will only change the number of bits the values are represented, not the
- * values themselved.
+ * values themselves.
  */
 static void
 lp_build_expand(LLVMBuilderRef builder,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 3f88a14b5d..e5fe81193f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -211,5 +211,6 @@ lp_build_depth_test(LLVMBuilderRef builder,
       LLVMBuildStore(builder, dst, dst_ptr);
    }
 
+   /* FIXME */
    assert(!state->occlusion_count);
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c
index 8631efd6c3..d566780e5d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c
@@ -313,8 +313,6 @@ lp_build_select(struct lp_build_context *bld,
       b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
    }
 
-   /* TODO: On SSE4 we could do this with a single instruction -- PBLENDVB */
-
    a = LLVMBuildAnd(bld->builder, a, mask, "");
 
    /* This often gets translated to PANDN, but sometimes the NOT is
@@ -376,9 +374,9 @@ lp_build_select_aos(struct lp_build_context *bld,
 
       return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
    }
+   else {
 #if 0
-   else if(0) {
-      /* FIXME: Unfortunately select of vectors do not work */
+      /* XXX: Unfortunately select of vectors do not work */
       /* Use a select */
       LLVMTypeRef elem_type = LLVMInt1Type();
       LLVMValueRef cond[LP_MAX_VECTOR_LENGTH];
@@ -388,10 +386,9 @@ lp_build_select_aos(struct lp_build_context *bld,
             cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0);
 
       return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, "");
-   }
-#endif
-   else {
+#else
       LLVMValueRef mask = lp_build_const_mask_aos(type, cond);
       return lp_build_select(bld, mask, a, b);
+#endif
    }
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
index 2913b17d3f..3ca25b0e76 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -207,6 +207,7 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld,
    case PIPE_TEX_WRAP_MIRROR_CLAMP:
    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+      /* FIXME */
    default:
       assert(0);
    }
@@ -398,7 +399,13 @@ lp_build_sample_soa(LLVMBuilderRef builder,
    case PIPE_TEX_FILTER_ANISO:
       lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel);
       break;
+   default:
+      assert(0);
    }
 
+   /* FIXME: respect static_state->min_mip_filter */;
+   /* FIXME: respect static_state->mag_img_filter */;
+   /* FIXME: respect static_state->prefilter */;
+
    lp_build_sample_compare(&bld, p, texel);
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index 3ce379de12..4756811dc3 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -167,6 +167,7 @@ emit_fetch(
       break;
 
    case TGSI_UTIL_SIGN_SET:
+      /* TODO: Use bitwese OR for floating point */
       res = lp_build_abs( &bld->base, res );
       res = LLVMBuildNeg( bld->base.builder, res, "" );
       break;
@@ -349,14 +350,6 @@ emit_kil(
 }
 
 
-static void
-emit_kilp(
-   struct lp_build_tgsi_soa_context *bld )
-{
-   /* XXX todo / fix me */
-}
-
-
 /**
  * Check if inst src/dest regs use indirect addressing into temporary
  * register file.
@@ -398,6 +391,7 @@ emit_instruction(
    switch (inst->Instruction.Opcode) {
 #if 0
    case TGSI_OPCODE_ARL:
+      /* FIXME */
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
          tmp0 = emit_fetch( bld, inst, 0, chan_index );
          emit_flr(bld, 0, 0);
@@ -686,6 +680,7 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_CND:
+      /* FIXME */
       return 0;
       break;
 
@@ -849,13 +844,11 @@ emit_instruction(
       return 0;
       break;
 
-#if 0
    case TGSI_OPCODE_KILP:
       /* predicated kill */
-      emit_kilp( bld );
-      return 0; /* XXX fix me */
+      /* FIXME */
+      return 0;
       break;
-#endif
 
    case TGSI_OPCODE_KIL:
       /* conditional kill */
@@ -1309,7 +1302,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
 
       switch( parse.FullToken.Token.Type ) {
       case TGSI_TOKEN_TYPE_DECLARATION:
-         /* Input already interpolated */
+         /* Inputs already interpolated */
          break;
 
       case TGSI_TOKEN_TYPE_INSTRUCTION:
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 233d1df0e1..a4b2bd8c2a 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -141,8 +141,6 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe,
          return PIPE_REFERENCED_FOR_WRITE;
    }
    
-   /* FIXME: we also need to do the same for the texture cache */
-   
    return PIPE_UNREFERENCED;
 }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index c3e3e1af67..58f716ede2 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -80,10 +80,9 @@ struct lp_jit_context
 
    struct tgsi_sampler **samplers;
 
-   /* TODO: alpha reference value */
    float alpha_ref_value;
 
-   /* TODO: blend constant color */
+   /* FIXME: store (also?) in floats */
    uint8_t *blend_color;
 
    struct lp_jit_texture textures[PIPE_MAX_SAMPLERS];
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index f9e254efca..2d2fc19a65 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -163,8 +163,6 @@ shade_quads(struct llvmpipe_context *llvmpipe,
    else
       depth = NULL;
 
-   /* TODO: blend color */
-
    /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */
    assert(lp_check_alignment(mask, 16));
 
-- 
cgit v1.2.3


From 672c5f52d1f97ed20d9f382b33c13919ec811684 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 11 Sep 2009 11:29:24 +0100
Subject: llvmpipe: set dirty_render_cache in llvmpipe_clear()

Based on Brian's softpipe change on
commit 988db641195819c948249a1bb2d59f13577a482f. We don't use the tile
cache for zsbuf though, only for color buffers.
---
 src/gallium/drivers/llvmpipe/lp_clear.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_clear.c b/src/gallium/drivers/llvmpipe/lp_clear.c
index 580cca5b46..bdcff94b9b 100644
--- a/src/gallium/drivers/llvmpipe/lp_clear.c
+++ b/src/gallium/drivers/llvmpipe/lp_clear.c
@@ -67,6 +67,7 @@ llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
          util_pack_color(rgba, ps->format, &cv);
          lp_tile_cache_clear(llvmpipe->cbuf_cache[i], rgba, cv);
       }
+      llvmpipe->dirty_render_cache = TRUE;
    }
 
    if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
-- 
cgit v1.2.3


From d81086a86bd7c82eae5a8f0d1092a30c23626257 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 11 Sep 2009 13:39:14 -0600
Subject: llvmpipe: asst fixes for 'make linux-llvmpipe'

---
 src/gallium/drivers/llvmpipe/Makefile   | 2 ++
 src/gallium/winsys/xlib/Makefile        | 1 +
 src/gallium/winsys/xlib/xlib_llvmpipe.c | 3 +++
 3 files changed, 6 insertions(+)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 6e63a0c2b7..5ac09de79e 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -3,6 +3,8 @@ include $(TOP)/configs/current
 
 LIBNAME = llvmpipe
 
+CFLAGS += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS
+
 C_SOURCES = \
 	lp_bld_alpha.c \
 	lp_bld_arit.c \
diff --git a/src/gallium/winsys/xlib/Makefile b/src/gallium/winsys/xlib/Makefile
index 522f6dc5ae..3a1945d92c 100644
--- a/src/gallium/winsys/xlib/Makefile
+++ b/src/gallium/winsys/xlib/Makefile
@@ -34,6 +34,7 @@ XLIB_WINSYS_SOURCES = \
 	xlib_brw_aub.c \
 	xlib_brw_context.c \
 	xlib_brw_screen.c \
+	xlib_llvmpipe.c \
 	xlib_softpipe.c \
 	xlib_trace.c 
 
diff --git a/src/gallium/winsys/xlib/xlib_llvmpipe.c b/src/gallium/winsys/xlib/xlib_llvmpipe.c
index bc876591c0..3dd15e099b 100644
--- a/src/gallium/winsys/xlib/xlib_llvmpipe.c
+++ b/src/gallium/winsys/xlib/xlib_llvmpipe.c
@@ -33,6 +33,8 @@
  */
 
 
+#if defined(GALLIUM_LLVMPIPE)
+
 #include "xm_api.h"
 
 #undef ASSERT
@@ -459,3 +461,4 @@ struct xm_driver xlib_llvmpipe_driver =
 
 
+#endif /* GALLIUM_LLVMPIPE */
-- 
cgit v1.2.3


From d7aa114e166c5f5330ecbe321adad65ad2cd54aa Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 13 Sep 2009 13:45:48 +0100
Subject: llvmpipe: Rename function to free up lp_build_trunc to the usual
 arithmetic meaning.

---
 src/gallium/drivers/llvmpipe/lp_bld_conv.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
index cd01496d54..c5a71d2c72 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
@@ -391,11 +391,11 @@ lp_build_pack2(LLVMBuilderRef builder,
  * TODO: Handle saturation consistently.
  */
 static LLVMValueRef
-lp_build_trunc(LLVMBuilderRef builder,
-               union lp_type src_type,
-               union lp_type dst_type,
-               boolean clamped,
-               const LLVMValueRef *src, unsigned num_srcs)
+lp_build_pack(LLVMBuilderRef builder,
+              union lp_type src_type,
+              union lp_type dst_type,
+              boolean clamped,
+              const LLVMValueRef *src, unsigned num_srcs)
 {
    LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
    unsigned i;
@@ -565,7 +565,7 @@ lp_build_conv(LLVMBuilderRef builder,
 
    if(tmp_type.width > dst_type.width) {
       assert(num_dsts == 1);
-      tmp[0] = lp_build_trunc(builder, tmp_type, dst_type, TRUE, tmp, num_tmps);
+      tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps);
       tmp_type.width = dst_type.width;
       tmp_type.length = dst_type.length;
       num_tmps = 1;
@@ -689,7 +689,7 @@ lp_build_conv_mask(LLVMBuilderRef builder,
 
    if(src_type.width > dst_type.width) {
       assert(num_dsts == 1);
-      dst[0] = lp_build_trunc(builder, src_type, dst_type, TRUE, src, num_srcs);
+      dst[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs);
    }
    else if(src_type.width < dst_type.width) {
       assert(num_srcs == 1);
-- 
cgit v1.2.3


From 00dd0156e08d2801aa2bc5454f94692bf65a33a6 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 13 Sep 2009 13:50:19 +0100
Subject: llvmpipe: Add a few more common arithmetic functions.

We are relying on SSE4.1 for round/trunc/ceil/floor. We'll need to
eventually find alternatives for the rest of the world.
---
 src/gallium/drivers/llvmpipe/lp_bld_arit.c | 95 ++++++++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_arit.h | 16 +++++
 2 files changed, 111 insertions(+)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index be7442d00a..ce3e5f91c0 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -623,6 +623,47 @@ lp_build_abs(struct lp_build_context *bld,
 }
 
 
+LLVMValueRef
+lp_build_sgn(struct lp_build_context *bld,
+             LLVMValueRef a)
+{
+   const union lp_type type = bld->type;
+   LLVMTypeRef vec_type = lp_build_vec_type(type);
+   LLVMValueRef cond;
+   LLVMValueRef res;
+
+   /* Handle non-zero case */
+   if(!type.sign) {
+      /* if not zero then sign must be positive */
+      res = bld->one;
+   }
+   else if(type.floating) {
+      /* Take the sign bit and add it to 1 constant */
+      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+      LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
+      LLVMValueRef sign;
+      LLVMValueRef one;
+      sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+      sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+      one = LLVMConstBitCast(bld->one, int_vec_type);
+      res = LLVMBuildOr(bld->builder, sign, one, "");
+      res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+   }
+   else
+   {
+      LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0);
+      cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero);
+      res = lp_build_select(bld, cond, bld->one, minus_one);
+   }
+
+   /* Handle zero */
+   cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero);
+   res = lp_build_select(bld, cond, bld->zero, bld->one);
+
+   return res;
+}
+
+
 enum lp_build_round_sse41_mode
 {
    LP_BUILD_ROUND_SSE41_NEAREST = 0,
@@ -661,6 +702,24 @@ lp_build_round_sse41(struct lp_build_context *bld,
 }
 
 
+LLVMValueRef
+lp_build_round(struct lp_build_context *bld,
+               LLVMValueRef a)
+{
+   const union lp_type type = bld->type;
+
+   assert(type.floating);
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+   return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
+#endif
+
+   /* FIXME */
+   assert(0);
+   return bld->undef;
+}
+
+
 LLVMValueRef
 lp_build_floor(struct lp_build_context *bld,
                LLVMValueRef a)
@@ -679,6 +738,42 @@ lp_build_floor(struct lp_build_context *bld,
 }
 
 
+LLVMValueRef
+lp_build_ceil(struct lp_build_context *bld,
+              LLVMValueRef a)
+{
+   const union lp_type type = bld->type;
+
+   assert(type.floating);
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+   return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
+#endif
+
+   /* FIXME */
+   assert(0);
+   return bld->undef;
+}
+
+
+LLVMValueRef
+lp_build_trunc(struct lp_build_context *bld,
+               LLVMValueRef a)
+{
+   const union lp_type type = bld->type;
+
+   assert(type.floating);
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+   return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE);
+#endif
+
+   /* FIXME */
+   assert(0);
+   return bld->undef;
+}
+
+
 /**
  * Convert to integer, through whichever rounding method that's fastest,
  * typically truncating to zero.
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
index 383c3c3313..5e083b847f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
@@ -105,10 +105,26 @@ LLVMValueRef
 lp_build_abs(struct lp_build_context *bld,
              LLVMValueRef a);
 
+LLVMValueRef
+lp_build_sgn(struct lp_build_context *bld,
+             LLVMValueRef a);
+
+LLVMValueRef
+lp_build_round(struct lp_build_context *bld,
+               LLVMValueRef a);
+
 LLVMValueRef
 lp_build_floor(struct lp_build_context *bld,
                LLVMValueRef a);
 
+LLVMValueRef
+lp_build_ceil(struct lp_build_context *bld,
+              LLVMValueRef a);
+
+LLVMValueRef
+lp_build_trunc(struct lp_build_context *bld,
+               LLVMValueRef a);
+
 LLVMValueRef
 lp_build_int(struct lp_build_context *bld,
              LLVMValueRef a);
-- 
cgit v1.2.3


From 873773ee2b034e8df72ddfacc764915b8a76ebe2 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 13 Sep 2009 13:55:08 +0100
Subject: llvmpipe: Translate more TGSI opcodes.

Basically cover all low hanging fruit, and mark the still missing opcodes
as "fixme" or deprecated.
---
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 139 +++++++++++++++++++++----
 1 file changed, 116 insertions(+), 23 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index 4756811dc3..c6488d073c 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -680,8 +680,15 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_CND:
-      /* FIXME */
-      return 0;
+      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+         src0 = emit_fetch( bld, inst, 0, chan_index );
+         src1 = emit_fetch( bld, inst, 1, chan_index );
+         src2 = emit_fetch( bld, inst, 2, chan_index );
+         tmp1 = lp_build_const_scalar(bld->base.type, 0.5);
+         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
+         dst0 = lp_build_select( &bld->base, tmp0, src0, src1 );
+         emit_store( bld, inst, 0, chan_index, dst0);
+      }
       break;
 
    case TGSI_OPCODE_DP2A:
@@ -699,23 +706,30 @@ emit_instruction(
       }
       break;
 
-#if 0
    case TGSI_OPCODE_FRC:
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         emit_frc( bld, 0, 0 );
+         src0 = emit_fetch( bld, inst, 0, chan_index );
+         tmp0 = lp_build_floor(&bld->base, src0);
+         tmp0 = lp_build_sub(&bld->base, tmp0, src0);
          emit_store( bld, inst, 0, chan_index, tmp0);
       }
       break;
 
    case TGSI_OPCODE_CLAMP:
-      return 0;
+      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+         tmp0 = emit_fetch( bld, inst, 0, chan_index );
+         src1 = emit_fetch( bld, inst, 1, chan_index );
+         src2 = emit_fetch( bld, inst, 2, chan_index );
+         tmp0 = lp_build_max(&bld->base, tmp0, src1);
+         tmp0 = lp_build_min(&bld->base, tmp0, src2);
+         emit_store( bld, inst, 0, chan_index, tmp0);
+      }
       break;
 
    case TGSI_OPCODE_FLR:
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
          tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         emit_flr( bld, 0, 0 );
+         tmp0 = lp_build_floor(&bld->base, tmp0);
          emit_store( bld, inst, 0, chan_index, tmp0);
       }
       break;
@@ -723,11 +737,10 @@ emit_instruction(
    case TGSI_OPCODE_ROUND:
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
          tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         emit_rnd( bld, 0, 0 );
+         tmp0 = lp_build_round(&bld->base, tmp0);
          emit_store( bld, inst, 0, chan_index, tmp0);
       }
       break;
-#endif
 
    case TGSI_OPCODE_EX2: {
       tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
@@ -806,8 +819,9 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_RCC:
+      /* deprecated? */
+      assert(0);
       return 0;
-      break;
 
    case TGSI_OPCODE_DPH:
       tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
@@ -837,10 +851,12 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_DDX:
+      /* FIXME */
       return 0;
       break;
 
    case TGSI_OPCODE_DDY:
+      /* FIXME */
       return 0;
       break;
 
@@ -886,7 +902,10 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_SFL:
-      return 0;
+      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+         dst0 = bld->base.zero;
+         emit_store( bld, inst, 0, chan_index, dst0);
+      }
       break;
 
    case TGSI_OPCODE_SGT:
@@ -928,7 +947,10 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_STR:
-      return 0;
+      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+         dst0 = bld->base.one;
+         emit_store( bld, inst, 0, chan_index, dst0);
+      }
       break;
 
    case TGSI_OPCODE_TEX:
@@ -936,35 +958,49 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_TXD:
+      /* FIXME */
       return 0;
       break;
 
    case TGSI_OPCODE_UP2H:
+      /* deprecated */
+      assert (0);
       return 0;
       break;
 
    case TGSI_OPCODE_UP2US:
+      /* deprecated */
+      assert(0);
       return 0;
       break;
 
    case TGSI_OPCODE_UP4B:
+      /* deprecated */
+      assert(0);
       return 0;
       break;
 
    case TGSI_OPCODE_UP4UB:
+      /* deprecated */
+      assert(0);
       return 0;
       break;
 
    case TGSI_OPCODE_X2D:
+      /* deprecated? */
+      assert(0);
       return 0;
       break;
 
    case TGSI_OPCODE_ARA:
+      /* deprecated */
+      assert(0);
       return 0;
       break;
 
 #if 0
    case TGSI_OPCODE_ARR:
+      /* FIXME */
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
          tmp0 = emit_fetch( bld, inst, 0, chan_index );
          emit_rnd( bld, 0, 0 );
@@ -975,32 +1011,33 @@ emit_instruction(
 #endif
 
    case TGSI_OPCODE_BRA:
+      /* deprecated */
+      assert(0);
       return 0;
       break;
 
    case TGSI_OPCODE_CAL:
+      /* FIXME */
       return 0;
       break;
 
-#if 0
    case TGSI_OPCODE_RET:
-      emit_ret( bld );
+      /* FIXME */
+      return 0;
       break;
-#endif
 
    case TGSI_OPCODE_END:
       break;
 
-#if 0
    case TGSI_OPCODE_SSG:
    /* TGSI_OPCODE_SGN */
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
          tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         emit_sgn( bld, 0, 0 );
+         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+         tmp0 = lp_build_sgn( &bld->base, tmp0 );
          emit_store( bld, inst, 0, chan_index, tmp0);
       }
       break;
-#endif
 
    case TGSI_OPCODE_CMP:
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
@@ -1126,6 +1163,8 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_DIV:
+      /* deprecated */
+      assert( 0 );
       return 0;
       break;
 
@@ -1151,105 +1190,146 @@ emit_instruction(
       break;
       
    case TGSI_OPCODE_BRK:
+      /* FIXME */
       return 0;
       break;
 
    case TGSI_OPCODE_IF:
+      /* FIXME */
       return 0;
       break;
 
    case TGSI_OPCODE_BGNFOR:
+      /* deprecated */
+      assert(0);
       return 0;
       break;
 
    case TGSI_OPCODE_REP:
+      /* deprecated */
+      assert(0);
       return 0;
       break;
 
    case TGSI_OPCODE_ELSE:
+      /* FIXME */
       return 0;
       break;
 
    case TGSI_OPCODE_ENDIF:
+      /* FIXME */
       return 0;
       break;
 
    case TGSI_OPCODE_ENDFOR:
+      /* deprecated */
+      assert(0);
       return 0;
       break;
 
    case TGSI_OPCODE_ENDREP:
+      /* deprecated */
+      assert(0);
       return 0;
       break;
 
    case TGSI_OPCODE_PUSHA:
+      /* deprecated? */
+      assert(0);
       return 0;
       break;
 
    case TGSI_OPCODE_POPA:
+      /* deprecated? */
+      assert(0);
       return 0;
       break;
 
    case TGSI_OPCODE_CEIL:
-      return 0;
+      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+         tmp0 = emit_fetch( bld, inst, 0, chan_index );
+         tmp0 = lp_build_ceil(&bld->base, tmp0);
+         emit_store( bld, inst, 0, chan_index, tmp0);
+      }
       break;
 
    case TGSI_OPCODE_I2F:
+      /* deprecated? */
+      assert(0);
       return 0;
       break;
 
    case TGSI_OPCODE_NOT:
+      /* deprecated? */
+      assert(0);
       return 0;
       break;
 
-#if 0
    case TGSI_OPCODE_TRUNC:
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
          tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         emit_f2it( bld, 0 );
-         emit_i2f( bld, 0 );
+         tmp0 = lp_build_trunc(&bld->base, tmp0);
          emit_store( bld, inst, 0, chan_index, tmp0);
       }
       break;
-#endif
 
    case TGSI_OPCODE_SHL:
+      /* deprecated? */
+      assert(0);
       return 0;
       break;
 
    case TGSI_OPCODE_SHR:
+      /* deprecated? */
+      assert(0);
       return 0;
       break;
 
    case TGSI_OPCODE_AND:
+      /* deprecated? */
+      assert(0);
       return 0;
       break;
 
    case TGSI_OPCODE_OR:
+      /* deprecated? */
+      assert(0);
       return 0;
       break;
 
    case TGSI_OPCODE_MOD:
+      /* deprecated? */
+      assert(0);
       return 0;
       break;
 
    case TGSI_OPCODE_XOR:
+      /* deprecated? */
+      assert(0);
       return 0;
       break;
 
    case TGSI_OPCODE_SAD:
+      /* deprecated? */
+      assert(0);
       return 0;
       break;
 
    case TGSI_OPCODE_TXF:
+      /* deprecated? */
+      assert(0);
       return 0;
       break;
 
    case TGSI_OPCODE_TXQ:
+      /* deprecated? */
+      assert(0);
       return 0;
       break;
 
    case TGSI_OPCODE_CONT:
+      /* deprecated? */
+      assert(0);
       return 0;
       break;
 
@@ -1261,6 +1341,19 @@ emit_instruction(
       return 0;
       break;
 
+   case TGSI_OPCODE_NOISE1:
+   case TGSI_OPCODE_NOISE2:
+   case TGSI_OPCODE_NOISE3:
+   case TGSI_OPCODE_NOISE4:
+      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+         tmp0 = bld->base.zero;
+         emit_store( bld, inst, 0, chan_index, tmp0);
+      }
+      break;
+
+   case TGSI_OPCODE_NOP:
+      break;
+
    default:
       return 0;
    }
-- 
cgit v1.2.3


From faec23387e035bcdd413b7364933d36a8ec22dba Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 13 Sep 2009 14:42:52 +0100
Subject: llvmpipe: Delay storing into the dst register to prevent clobbering
 the src registers.

How I'm thankful for regular expressions -- just a couple of them were
all that was needed to do this otherwise tiresome and bug prone change.
---
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 237 +++++++++++--------------
 1 file changed, 103 insertions(+), 134 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index c6488d073c..98d31f122f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -243,13 +243,13 @@ static void
 emit_tex( struct lp_build_tgsi_soa_context *bld,
           const struct tgsi_full_instruction *inst,
           boolean apply_lodbias,
-          boolean projected)
+          boolean projected,
+          LLVMValueRef *texel)
 {
    const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
    LLVMValueRef lodbias;
    LLVMValueRef oow;
    LLVMValueRef coords[3];
-   LLVMValueRef texel[4];
    unsigned num_coords;
    unsigned i;
 
@@ -294,10 +294,6 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
                                   bld->base.type,
                                   unit, num_coords, coords, lodbias,
                                   texel);
-
-   FOR_EACH_DST0_ENABLED_CHANNEL( inst, i ) {
-      emit_store( bld, inst, 0, i, texel[i] );
-   }
 }
 
 
@@ -377,17 +373,26 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst)
 static int
 emit_instruction(
    struct lp_build_tgsi_soa_context *bld,
-   struct tgsi_full_instruction *inst )
+   const struct tgsi_full_instruction *inst,
+   const struct tgsi_opcode_info *info)
 {
    unsigned chan_index;
    LLVMValueRef src0, src1, src2;
    LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-   LLVMValueRef dst0;
+   LLVMValueRef res;
+   LLVMValueRef dst0[NUM_CHANNELS];
 
    /* we can't handle indirect addressing into temp register file yet */
    if (indirect_temp_reference(inst))
       return FALSE;
 
+   assert(info->num_dst <= 1);
+   if(info->num_dst) {
+      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+         dst0[chan_index] = bld->base.undef;
+      }
+   }
+
    switch (inst->Instruction.Opcode) {
 #if 0
    case TGSI_OPCODE_ARL:
@@ -396,7 +401,7 @@ emit_instruction(
          tmp0 = emit_fetch( bld, inst, 0, chan_index );
          emit_flr(bld, 0, 0);
          emit_f2it( bld, 0 );
-         emit_store( bld, inst, 0, chan_index, tmp0);
+         dst0[chan_index] = tmp0;
       }
       break;
 #endif
@@ -404,19 +409,17 @@ emit_instruction(
    case TGSI_OPCODE_MOV:
    case TGSI_OPCODE_SWZ:
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         emit_store( bld, inst, 0, chan_index, tmp0);
+         dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
       }
       break;
 
    case TGSI_OPCODE_LIT:
       if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
-         emit_store( bld, inst, 0, CHAN_X, bld->base.one);
+         dst0[CHAN_X] = bld->base.one;
       }
       if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
          src0 = emit_fetch( bld, inst, 0, CHAN_X );
-         dst0 = lp_build_max( &bld->base, src0, bld->base.zero);
-         emit_store( bld, inst, 0, CHAN_Y, dst0);
+         dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
       }
       if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
          /* XMM[1] = SrcReg[0].yyyy */
@@ -428,20 +431,19 @@ emit_instruction(
          tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
          tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
          tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
-         dst0 = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
-         emit_store( bld, inst, 0, CHAN_Z, dst0);
+         dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
       }
       if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
-         emit_store( bld, inst, 0, CHAN_W, bld->base.one);
+         dst0[CHAN_W] = bld->base.one;
       }
       break;
 
    case TGSI_OPCODE_RCP:
    /* TGSI_OPCODE_RECIP */
       src0 = emit_fetch( bld, inst, 0, CHAN_X );
-      dst0 = lp_build_rcp(&bld->base, src0);
+      res = lp_build_rcp(&bld->base, src0);
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         emit_store( bld, inst, 0, chan_index, dst0 );
+         dst0[chan_index] = res;
       }
       break;
 
@@ -449,9 +451,9 @@ emit_instruction(
    /* TGSI_OPCODE_RECIPSQRT */
       src0 = emit_fetch( bld, inst, 0, CHAN_X );
       src0 = lp_build_abs(&bld->base, src0);
-      dst0 = lp_build_rsqrt(&bld->base, src0);
+      res = lp_build_rsqrt(&bld->base, src0);
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         emit_store( bld, inst, 0, chan_index, dst0 );
+         dst0[chan_index] = res;
       }
       break;
 
@@ -475,16 +477,15 @@ emit_instruction(
          lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
 
          if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
-            emit_store( bld, inst, 0, CHAN_X, tmp0);
+            dst0[CHAN_X] = tmp0;
          if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
-            emit_store( bld, inst, 0, CHAN_Y, tmp1);
+            dst0[CHAN_Y] = tmp1;
          if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
-            emit_store( bld, inst, 0, CHAN_Z, tmp2);
+            dst0[CHAN_Z] = tmp2;
       }
       /* dst.w = 1.0 */
       if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
-         tmp0 = bld->base.one;
-         emit_store( bld, inst, 0, CHAN_W, tmp0);
+         dst0[CHAN_W] = bld->base.one;
       }
       break;
 
@@ -510,20 +511,18 @@ emit_instruction(
 
          /* dst.x = floor(lg2(abs(src.x))) */
          if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
-            emit_store( bld, inst, 0, CHAN_X, tmp0);
+            dst0[CHAN_X] = tmp0;
          /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
          if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
-            tmp1 = lp_build_div( &bld->base, src0, tmp1);
-            emit_store( bld, inst, 0, CHAN_Y, tmp1);
+            dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
          }
          /* dst.z = lg2(abs(src.x)) */
          if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
-            emit_store( bld, inst, 0, CHAN_Z, tmp2);
+            dst0[CHAN_Z] = tmp2;
       }
       /* dst.w = 1.0 */
       if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
-         tmp0 = bld->base.one;
-         emit_store( bld, inst, 0, CHAN_W, tmp0);
+         dst0[CHAN_W] = bld->base.one;
       }
       break;
 
@@ -531,8 +530,7 @@ emit_instruction(
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
          src0 = emit_fetch( bld, inst, 0, chan_index );
          src1 = emit_fetch( bld, inst, 1, chan_index );
-         dst0 = lp_build_mul(&bld->base, src0, src1);
-         emit_store( bld, inst, 0, chan_index, dst0);
+         dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
       }
       break;
 
@@ -540,8 +538,7 @@ emit_instruction(
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
          src0 = emit_fetch( bld, inst, 0, chan_index );
          src1 = emit_fetch( bld, inst, 1, chan_index );
-         dst0 = lp_build_add(&bld->base, src0, src1);
-         emit_store( bld, inst, 0, chan_index, dst0);
+         dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
       }
       break;
 
@@ -559,7 +556,7 @@ emit_instruction(
       tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
       tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         emit_store( bld, inst, 0, chan_index, tmp0);
+         dst0[chan_index] = tmp0;
       }
       break;
 
@@ -581,28 +578,24 @@ emit_instruction(
       tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
       tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         emit_store( bld, inst, 0, chan_index, tmp0);
+         dst0[chan_index] = tmp0;
       }
       break;
 
    case TGSI_OPCODE_DST:
       IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
-         tmp0 = bld->base.one;
-         emit_store( bld, inst, 0, CHAN_X, tmp0);
+         dst0[CHAN_X] = bld->base.one;
       }
       IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
          tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
          tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
-         tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
-         emit_store( bld, inst, 0, CHAN_Y, tmp0);
+         dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
       }
       IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
-         tmp0 = emit_fetch( bld, inst, 0, CHAN_Z );
-         emit_store( bld, inst, 0, CHAN_Z, tmp0);
+         dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
       }
       IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
-         tmp0 = emit_fetch( bld, inst, 1, CHAN_W );
-         emit_store( bld, inst, 0, CHAN_W, tmp0);
+         dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
       }
       break;
 
@@ -610,8 +603,7 @@ emit_instruction(
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
          src0 = emit_fetch( bld, inst, 0, chan_index );
          src1 = emit_fetch( bld, inst, 1, chan_index );
-         dst0 = lp_build_min( &bld->base, src0, src1 );
-         emit_store( bld, inst, 0, chan_index, dst0);
+         dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
       }
       break;
 
@@ -619,8 +611,7 @@ emit_instruction(
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
          src0 = emit_fetch( bld, inst, 0, chan_index );
          src1 = emit_fetch( bld, inst, 1, chan_index );
-         dst0 = lp_build_max( &bld->base, src0, src1 );
-         emit_store( bld, inst, 0, chan_index, dst0);
+         dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
       }
       break;
 
@@ -630,8 +621,7 @@ emit_instruction(
          src0 = emit_fetch( bld, inst, 0, chan_index );
          src1 = emit_fetch( bld, inst, 1, chan_index );
          tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
-         dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
-         emit_store( bld, inst, 0, chan_index, dst0);
+         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
       }
       break;
 
@@ -641,8 +631,7 @@ emit_instruction(
          src0 = emit_fetch( bld, inst, 0, chan_index );
          src1 = emit_fetch( bld, inst, 1, chan_index );
          tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
-         dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
-         emit_store( bld, inst, 0, chan_index, dst0);
+         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
       }
       break;
 
@@ -654,7 +643,7 @@ emit_instruction(
          tmp2 = emit_fetch( bld, inst, 2, chan_index );
          tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
          tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
-         emit_store( bld, inst, 0, chan_index, tmp0);
+         dst0[chan_index] = tmp0;
       }
       break;
 
@@ -662,8 +651,7 @@ emit_instruction(
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
          tmp0 = emit_fetch( bld, inst, 0, chan_index );
          tmp1 = emit_fetch( bld, inst, 1, chan_index );
-         tmp0 = lp_build_sub( &bld->base, tmp0, tmp1);
-         emit_store( bld, inst, 0, chan_index, tmp0);
+         dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
       }
       break;
 
@@ -674,8 +662,7 @@ emit_instruction(
          src2 = emit_fetch( bld, inst, 2, chan_index );
          tmp0 = lp_build_sub( &bld->base, src1, src2 );
          tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
-         dst0 = lp_build_add( &bld->base, tmp0, src2 );
-         emit_store( bld, inst, 0, chan_index, dst0 );
+         dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
       }
       break;
 
@@ -686,8 +673,7 @@ emit_instruction(
          src2 = emit_fetch( bld, inst, 2, chan_index );
          tmp1 = lp_build_const_scalar(bld->base.type, 0.5);
          tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
-         dst0 = lp_build_select( &bld->base, tmp0, src0, src1 );
-         emit_store( bld, inst, 0, chan_index, dst0);
+         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
       }
       break;
 
@@ -702,7 +688,7 @@ emit_instruction(
       tmp1 = emit_fetch( bld, inst, 2, CHAN_X );  /* xmm1 = src[2].x */
       tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         emit_store( bld, inst, 0, chan_index, tmp0);  /* dest[ch] = xmm0 */
+         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
       }
       break;
 
@@ -711,7 +697,7 @@ emit_instruction(
          src0 = emit_fetch( bld, inst, 0, chan_index );
          tmp0 = lp_build_floor(&bld->base, src0);
          tmp0 = lp_build_sub(&bld->base, tmp0, src0);
-         emit_store( bld, inst, 0, chan_index, tmp0);
+         dst0[chan_index] = tmp0;
       }
       break;
 
@@ -722,23 +708,21 @@ emit_instruction(
          src2 = emit_fetch( bld, inst, 2, chan_index );
          tmp0 = lp_build_max(&bld->base, tmp0, src1);
          tmp0 = lp_build_min(&bld->base, tmp0, src2);
-         emit_store( bld, inst, 0, chan_index, tmp0);
+         dst0[chan_index] = tmp0;
       }
       break;
 
    case TGSI_OPCODE_FLR:
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
          tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         tmp0 = lp_build_floor(&bld->base, tmp0);
-         emit_store( bld, inst, 0, chan_index, tmp0);
+         dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
       }
       break;
 
    case TGSI_OPCODE_ROUND:
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
          tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         tmp0 = lp_build_round(&bld->base, tmp0);
-         emit_store( bld, inst, 0, chan_index, tmp0);
+         dst0[chan_index] = lp_build_round(&bld->base, tmp0);
       }
       break;
 
@@ -746,7 +730,7 @@ emit_instruction(
       tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
       tmp0 = lp_build_exp2( &bld->base, tmp0);
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         emit_store( bld, inst, 0, chan_index, tmp0);
+         dst0[chan_index] = tmp0;
       }
       break;
    }
@@ -755,16 +739,16 @@ emit_instruction(
       tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
       tmp0 = lp_build_log2( &bld->base, tmp0);
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         emit_store( bld, inst, 0, chan_index, tmp0);
+         dst0[chan_index] = tmp0;
       }
       break;
 
    case TGSI_OPCODE_POW:
       src0 = emit_fetch( bld, inst, 0, CHAN_X );
       src1 = emit_fetch( bld, inst, 1, CHAN_X );
-      dst0 = lp_build_pow( &bld->base, src0, src1 );
+      res = lp_build_pow( &bld->base, src0, src1 );
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         emit_store( bld, inst, 0, chan_index, dst0 );
+         dst0[chan_index] = res;
       }
       break;
 
@@ -785,7 +769,7 @@ emit_instruction(
          tmp5 = tmp3;
          tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
          tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
-         emit_store( bld, inst, 0, CHAN_X, tmp2);
+         dst0[CHAN_X] = tmp2;
       }
       if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
           IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
@@ -796,25 +780,23 @@ emit_instruction(
          tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
          tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
          tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
-         emit_store( bld, inst, 0, CHAN_Y, tmp3);
+         dst0[CHAN_Y] = tmp3;
       }
       IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
          tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
          tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
          tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
-         emit_store( bld, inst, 0, CHAN_Z, tmp5);
+         dst0[CHAN_Z] = tmp5;
       }
       IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
-         tmp0 = bld->base.one;
-         emit_store( bld, inst, 0, CHAN_W, tmp0);
+         dst0[CHAN_W] = bld->base.one;
       }
       break;
 
    case TGSI_OPCODE_ABS:
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
          tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         tmp0 = lp_build_abs( &bld->base, tmp0 ) ;
-         emit_store( bld, inst, 0, chan_index, tmp0);
+         dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
       }
       break;
 
@@ -838,7 +820,7 @@ emit_instruction(
       tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
       tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         emit_store( bld, inst, 0, chan_index, tmp0);
+         dst0[chan_index] = tmp0;
       }
       break;
 
@@ -846,7 +828,7 @@ emit_instruction(
       tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
       tmp0 = lp_build_cos( &bld->base, tmp0 );
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         emit_store( bld, inst, 0, chan_index, tmp0);
+         dst0[chan_index] = tmp0;
       }
       break;
 
@@ -896,15 +878,13 @@ emit_instruction(
          src0 = emit_fetch( bld, inst, 0, chan_index );
          src1 = emit_fetch( bld, inst, 1, chan_index );
          tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
-         dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
-         emit_store( bld, inst, 0, chan_index, dst0);
+         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
       }
       break;
 
    case TGSI_OPCODE_SFL:
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         dst0 = bld->base.zero;
-         emit_store( bld, inst, 0, chan_index, dst0);
+         dst0[chan_index] = bld->base.zero;
       }
       break;
 
@@ -913,8 +893,7 @@ emit_instruction(
          src0 = emit_fetch( bld, inst, 0, chan_index );
          src1 = emit_fetch( bld, inst, 1, chan_index );
          tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
-         dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
-         emit_store( bld, inst, 0, chan_index, dst0);
+         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
       }
       break;
 
@@ -922,7 +901,7 @@ emit_instruction(
       tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
       tmp0 = lp_build_sin( &bld->base, tmp0 );
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         emit_store( bld, inst, 0, chan_index, tmp0);
+         dst0[chan_index] = tmp0;
       }
       break;
 
@@ -931,8 +910,7 @@ emit_instruction(
          src0 = emit_fetch( bld, inst, 0, chan_index );
          src1 = emit_fetch( bld, inst, 1, chan_index );
          tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
-         dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
-         emit_store( bld, inst, 0, chan_index, dst0);
+         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
       }
       break;
 
@@ -941,20 +919,18 @@ emit_instruction(
          src0 = emit_fetch( bld, inst, 0, chan_index );
          src1 = emit_fetch( bld, inst, 1, chan_index );
          tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
-         dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
-         emit_store( bld, inst, 0, chan_index, dst0);
+         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
       }
       break;
 
    case TGSI_OPCODE_STR:
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         dst0 = bld->base.one;
-         emit_store( bld, inst, 0, chan_index, dst0);
+         dst0[chan_index] = bld->base.one;
       }
       break;
 
    case TGSI_OPCODE_TEX:
-      emit_tex( bld, inst, FALSE, FALSE );
+      emit_tex( bld, inst, FALSE, FALSE, dst0 );
       break;
 
    case TGSI_OPCODE_TXD:
@@ -1005,7 +981,7 @@ emit_instruction(
          tmp0 = emit_fetch( bld, inst, 0, chan_index );
          emit_rnd( bld, 0, 0 );
          emit_f2it( bld, 0 );
-         emit_store( bld, inst, 0, chan_index, tmp0);
+         dst0[chan_index] = tmp0;
       }
       break;
 #endif
@@ -1033,9 +1009,7 @@ emit_instruction(
    /* TGSI_OPCODE_SGN */
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
          tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
-         tmp0 = lp_build_sgn( &bld->base, tmp0 );
-         emit_store( bld, inst, 0, chan_index, tmp0);
+         dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
       }
       break;
 
@@ -1045,34 +1019,29 @@ emit_instruction(
          src1 = emit_fetch( bld, inst, 1, chan_index );
          src2 = emit_fetch( bld, inst, 2, chan_index );
          tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
-         dst0 = lp_build_select( &bld->base, tmp0, src1, src2);
-         emit_store( bld, inst, 0, chan_index, dst0);
+         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
       }
       break;
 
    case TGSI_OPCODE_SCS:
       IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
          tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
-         tmp0 = lp_build_cos( &bld->base, tmp0 );
-         emit_store( bld, inst, 0, CHAN_X, tmp0);
+         dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
       }
       IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
          tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
-         tmp0 = lp_build_sin( &bld->base, tmp0 );
-         emit_store( bld, inst, 0, CHAN_Y, tmp0);
+         dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
       }
       IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
-         tmp0 = bld->base.zero;
-         emit_store( bld, inst, 0, CHAN_Z, tmp0);
+         dst0[CHAN_Z] = bld->base.zero;
       }
       IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
-         tmp0 = bld->base.one;
-         emit_store( bld, inst, 0, CHAN_W, tmp0);
+         dst0[CHAN_W] = bld->base.one;
       }
       break;
 
    case TGSI_OPCODE_TXB:
-      emit_tex( bld, inst, TRUE, FALSE );
+      emit_tex( bld, inst, TRUE, FALSE, dst0 );
       break;
 
    case TGSI_OPCODE_NRM:
@@ -1131,33 +1100,28 @@ emit_instruction(
 
             /* dst.x = xmm1 * src.x */
             if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
-               tmp4 = lp_build_mul( &bld->base, tmp4, tmp1);
-               emit_store(bld, inst, 0, CHAN_X, tmp4);
+               dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
             }
 
             /* dst.y = xmm1 * src.y */
             if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
-               tmp5 = lp_build_mul( &bld->base, tmp5, tmp1);
-               emit_store(bld, inst, 0, CHAN_Y, tmp5);
+               dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
             }
 
             /* dst.z = xmm1 * src.z */
             if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
-               tmp6 = lp_build_mul( &bld->base, tmp6, tmp1);
-               emit_store(bld, inst, 0, CHAN_Z, tmp6);
+               dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
             }
 
             /* dst.w = xmm1 * src.w */
             if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
-               tmp7 = lp_build_mul( &bld->base, tmp7, tmp1);
-               emit_store(bld, inst, 0, CHAN_W, tmp7);
+               dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
             }
          }
 
-         /* dst0.w = 1.0 */
+         /* dst.w = 1.0 */
          if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
-            tmp0 = bld->base.one;
-            emit_store(bld, inst, 0, CHAN_W, tmp0);
+            dst0[CHAN_W] = bld->base.one;
          }
       }
       break;
@@ -1177,16 +1141,16 @@ emit_instruction(
       tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
       tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         emit_store( bld, inst, 0, chan_index, tmp0);  /* dest[ch] = xmm0 */
+         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
       }
       break;
 
    case TGSI_OPCODE_TXL:
-      emit_tex( bld, inst, TRUE, FALSE );
+      emit_tex( bld, inst, TRUE, FALSE, dst0 );
       break;
 
    case TGSI_OPCODE_TXP:
-      emit_tex( bld, inst, FALSE, TRUE );
+      emit_tex( bld, inst, FALSE, TRUE, dst0 );
       break;
       
    case TGSI_OPCODE_BRK:
@@ -1248,8 +1212,7 @@ emit_instruction(
    case TGSI_OPCODE_CEIL:
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
          tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         tmp0 = lp_build_ceil(&bld->base, tmp0);
-         emit_store( bld, inst, 0, chan_index, tmp0);
+         dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
       }
       break;
 
@@ -1268,8 +1231,7 @@ emit_instruction(
    case TGSI_OPCODE_TRUNC:
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
          tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         tmp0 = lp_build_trunc(&bld->base, tmp0);
-         emit_store( bld, inst, 0, chan_index, tmp0);
+         dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
       }
       break;
 
@@ -1346,8 +1308,7 @@ emit_instruction(
    case TGSI_OPCODE_NOISE3:
    case TGSI_OPCODE_NOISE4:
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         tmp0 = bld->base.zero;
-         emit_store( bld, inst, 0, chan_index, tmp0);
+         dst0[chan_index] = bld->base.zero;
       }
       break;
 
@@ -1358,6 +1319,12 @@ emit_instruction(
       return 0;
    }
    
+   if(info->num_dst) {
+      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+         emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
+      }
+   }
+
    return 1;
 }
 
@@ -1399,12 +1366,14 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
          break;
 
       case TGSI_TOKEN_TYPE_INSTRUCTION:
-         if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) {
+         {
             unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
             const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
-	    _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
-	                  info ? info->mnemonic : "<invalid>");
-	 }
+            if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
+               _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
+                             info ? info->mnemonic : "<invalid>");
+         }
+
          break;
 
       case TGSI_TOKEN_TYPE_IMMEDIATE:
-- 
cgit v1.2.3


From 5e13e987da6ce656b08f6c25f8d373c80949e3b0 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 13 Sep 2009 16:12:48 +0100
Subject: llvmpipe: Use const keyword for input array arguments.

---
 src/gallium/drivers/llvmpipe/lp_bld_logic.c   |  2 +-
 src/gallium/drivers/llvmpipe/lp_bld_logic.h   |  2 +-
 src/gallium/drivers/llvmpipe/lp_bld_swizzle.c | 15 ++++++++-------
 src/gallium/drivers/llvmpipe/lp_bld_swizzle.h |  4 ++--
 4 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c
index d566780e5d..995a69c0f4 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c
@@ -337,7 +337,7 @@ LLVMValueRef
 lp_build_select_aos(struct lp_build_context *bld,
                     LLVMValueRef a,
                     LLVMValueRef b,
-                    boolean cond[4])
+                    const boolean cond[4])
 {
    const union lp_type type = bld->type;
    const unsigned n = type.length;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h
index 29b9e1c45b..9099e0fb5b 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.h
@@ -66,7 +66,7 @@ LLVMValueRef
 lp_build_select_aos(struct lp_build_context *bld,
                     LLVMValueRef a,
                     LLVMValueRef b,
-                    boolean cond[4]);
+                    const boolean cond[4]);
 
 
 #endif /* !LP_BLD_LOGIC_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
index ac7eed9379..f35638be44 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
@@ -161,7 +161,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
 LLVMValueRef
 lp_build_swizzle1_aos(struct lp_build_context *bld,
                       LLVMValueRef a,
-                      unsigned char swizzle[4])
+                      const unsigned char swizzle[4])
 {
    const unsigned n = bld->type.length;
    unsigned i, j;
@@ -192,7 +192,7 @@ LLVMValueRef
 lp_build_swizzle2_aos(struct lp_build_context *bld,
                       LLVMValueRef a,
                       LLVMValueRef b,
-                      unsigned char swizzle[4])
+                      const unsigned char swizzle[4])
 {
    const unsigned n = bld->type.length;
    unsigned i, j;
@@ -201,11 +201,12 @@ lp_build_swizzle2_aos(struct lp_build_context *bld,
       return lp_build_swizzle1_aos(bld, a, swizzle);
 
    if(a == b) {
-      swizzle[0] %= 4;
-      swizzle[1] %= 4;
-      swizzle[2] %= 4;
-      swizzle[3] %= 4;
-      return lp_build_swizzle1_aos(bld, a, swizzle);
+      unsigned char swizzle1[4];
+      swizzle1[0] = swizzle[0] % 4;
+      swizzle1[1] = swizzle[1] % 4;
+      swizzle1[2] = swizzle[2] % 4;
+      swizzle1[3] = swizzle[3] % 4;
+      return lp_build_swizzle1_aos(bld, a, swizzle1);
    }
 
    if(swizzle[0] % 4 == 0 &&
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
index d7dd6a8a60..cb0b6707ec 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
@@ -73,7 +73,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
 LLVMValueRef
 lp_build_swizzle1_aos(struct lp_build_context *bld,
                       LLVMValueRef a,
-                      unsigned char swizzle[4]);
+                      const unsigned char swizzle[4]);
 
 
 /**
@@ -85,7 +85,7 @@ LLVMValueRef
 lp_build_swizzle2_aos(struct lp_build_context *bld,
                       LLVMValueRef a,
                       LLVMValueRef b,
-                      unsigned char swizzle[4]);
+                      const unsigned char swizzle[4]);
 
 
 #endif /* !LP_BLD_SWIZZLE_H */
-- 
cgit v1.2.3


From 4b32dd30072b5889ca1efcd5ac8bdbf14b1e2bb5 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 13 Sep 2009 16:13:12 +0100
Subject: llvmpipe: Remove dead references to pipe_winsys.

---
 src/gallium/drivers/llvmpipe/lp_screen.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 125035771e..0ce1a37bd4 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -27,8 +27,6 @@
 
 
 #include "util/u_memory.h"
-#include "util/u_simple_screen.h"
-#include "pipe/internal/p_winsys_screen.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
 
@@ -196,8 +194,7 @@ static void
 llvmpipe_destroy_screen( struct pipe_screen *_screen )
 {
    struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
-
-   struct pipe_winsys *winsys = _screen->winsys;
+   struct llvmpipe_winsys *winsys = screen->winsys;
 
    lp_jit_screen_cleanup(screen);
 
-- 
cgit v1.2.3


From 86226d5ea186d3fc6013bc40a341e0c0a891de39 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 13 Sep 2009 16:22:27 +0100
Subject: llvmpipe: Compute derivatives.

---
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 91 ++++++++++++++++++++++++--
 1 file changed, 87 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index 98d31f122f..b106ce2317 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -78,6 +78,11 @@
 #define CHAN_Z 2
 #define CHAN_W 3
 
+#define QUAD_TOP_LEFT     0
+#define QUAD_TOP_RIGHT    1
+#define QUAD_BOTTOM_LEFT  2
+#define QUAD_BOTTOM_RIGHT 3
+
 
 struct lp_build_tgsi_soa_context
 {
@@ -97,6 +102,51 @@ struct lp_build_tgsi_soa_context
 };
 
 
+static const unsigned char
+swizzle_left[4] = {
+   QUAD_TOP_LEFT,     QUAD_TOP_LEFT,
+   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_LEFT
+};
+
+static const unsigned char
+swizzle_right[4] = {
+   QUAD_TOP_RIGHT,    QUAD_TOP_RIGHT,
+   QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
+};
+
+static const unsigned char
+swizzle_top[4] = {
+   QUAD_TOP_LEFT,     QUAD_TOP_RIGHT,
+   QUAD_TOP_LEFT,     QUAD_TOP_RIGHT
+};
+
+static const unsigned char
+swizzle_bottom[4] = {
+   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_RIGHT,
+   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_RIGHT
+};
+
+
+static LLVMValueRef
+emit_ddx(struct lp_build_tgsi_soa_context *bld,
+         LLVMValueRef src)
+{
+   LLVMValueRef src_left  = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
+   LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
+   return lp_build_sub(&bld->base, src_right, src_left);
+}
+
+
+static LLVMValueRef
+emit_ddy(struct lp_build_tgsi_soa_context *bld,
+         LLVMValueRef src)
+{
+   LLVMValueRef src_top    = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
+   LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
+   return lp_build_sub(&bld->base, src_top, src_bottom);
+}
+
+
 /**
  * Register fetch.
  */
@@ -184,6 +234,36 @@ emit_fetch(
 }
 
 
+/**
+ * Register fetch with derivatives.
+ */
+static void
+emit_fetch_deriv(
+   struct lp_build_tgsi_soa_context *bld,
+   const struct tgsi_full_instruction *inst,
+   unsigned index,
+   const unsigned chan_index,
+   LLVMValueRef *res,
+   LLVMValueRef *ddx,
+   LLVMValueRef *ddy)
+{
+   LLVMValueRef src;
+
+   src = emit_fetch(bld, inst, index, chan_index);
+
+   if(res)
+      *res = src;
+
+   /* TODO: use interpolation coeffs for inputs */
+
+   if(ddx)
+      *ddx = emit_ddx(bld, src);
+
+   if(ddy)
+      *ddy = emit_ddy(bld, src);
+}
+
+
 /**
  * Register store.
  */
@@ -239,6 +319,7 @@ emit_store(
  * High-level instruction translators.
  */
 
+
 static void
 emit_tex( struct lp_build_tgsi_soa_context *bld,
           const struct tgsi_full_instruction *inst,
@@ -833,13 +914,15 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_DDX:
-      /* FIXME */
-      return 0;
+      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
+      }
       break;
 
    case TGSI_OPCODE_DDY:
-      /* FIXME */
-      return 0;
+      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
+      }
       break;
 
    case TGSI_OPCODE_KILP:
-- 
cgit v1.2.3


From b4835ea03d64261da5a892f9590c9977b06920e8 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 14 Sep 2009 11:05:06 +0100
Subject: llvmpipe: Make lp_type a regular union.

Union not worth the hassle of violating C99 or adding a name to
the structure.
---
 src/gallium/drivers/llvmpipe/lp_bld_alpha.c       |   2 +-
 src/gallium/drivers/llvmpipe/lp_bld_alpha.h       |   4 +-
 src/gallium/drivers/llvmpipe/lp_bld_arit.c        |  46 ++++----
 src/gallium/drivers/llvmpipe/lp_bld_arit.h        |   2 +-
 src/gallium/drivers/llvmpipe/lp_bld_blend.h       |   6 +-
 src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c   |   2 +-
 src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c   |   2 +-
 src/gallium/drivers/llvmpipe/lp_bld_const.c       |  30 +++---
 src/gallium/drivers/llvmpipe/lp_bld_const.h       |  32 +++---
 src/gallium/drivers/llvmpipe/lp_bld_conv.c        |  32 +++---
 src/gallium/drivers/llvmpipe/lp_bld_conv.h        |  14 +--
 src/gallium/drivers/llvmpipe/lp_bld_depth.c       |   8 +-
 src/gallium/drivers/llvmpipe/lp_bld_depth.h       |   6 +-
 src/gallium/drivers/llvmpipe/lp_bld_flow.c        |   2 +-
 src/gallium/drivers/llvmpipe/lp_bld_flow.h        |   4 +-
 src/gallium/drivers/llvmpipe/lp_bld_format.h      |   6 +-
 src/gallium/drivers/llvmpipe/lp_bld_format_soa.c  |   6 +-
 src/gallium/drivers/llvmpipe/lp_bld_interp.c      |   2 +-
 src/gallium/drivers/llvmpipe/lp_bld_interp.h      |   2 +-
 src/gallium/drivers/llvmpipe/lp_bld_logic.c       |   6 +-
 src/gallium/drivers/llvmpipe/lp_bld_logic.h       |   2 +-
 src/gallium/drivers/llvmpipe/lp_bld_sample.h      |   4 +-
 src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c  |   8 +-
 src/gallium/drivers/llvmpipe/lp_bld_swizzle.c     |   6 +-
 src/gallium/drivers/llvmpipe/lp_bld_swizzle.h     |   2 +-
 src/gallium/drivers/llvmpipe/lp_bld_tgsi.h        |   6 +-
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c    |   2 +-
 src/gallium/drivers/llvmpipe/lp_bld_type.c        |  25 ++---
 src/gallium/drivers/llvmpipe/lp_bld_type.h        | 123 +++++++++++-----------
 src/gallium/drivers/llvmpipe/lp_state_fs.c        |  16 +--
 src/gallium/drivers/llvmpipe/lp_test.h            |  20 ++--
 src/gallium/drivers/llvmpipe/lp_test_blend.c      |  18 ++--
 src/gallium/drivers/llvmpipe/lp_test_conv.c       |  72 ++++++-------
 src/gallium/drivers/llvmpipe/lp_test_main.c       |  20 ++--
 src/gallium/drivers/llvmpipe/lp_tex_sample_c.c    |   2 +-
 src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c |   2 +-
 36 files changed, 270 insertions(+), 272 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
index 49c2f911af..2b4bc5c819 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
@@ -45,7 +45,7 @@
 void
 lp_build_alpha_test(LLVMBuilderRef builder,
                     const struct pipe_alpha_state *state,
-                    union lp_type type,
+                    struct lp_type type,
                     struct lp_build_mask_context *mask,
                     LLVMValueRef alpha,
                     LLVMValueRef ref)
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
index 9dbcdb4daa..634575670d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
@@ -38,14 +38,14 @@
 #include <llvm-c/Core.h>  
 
 struct pipe_alpha_state;
-union lp_type;
+struct lp_type;
 struct lp_build_mask_context;
 
 
 void
 lp_build_alpha_test(LLVMBuilderRef builder,
                     const struct pipe_alpha_state *state,
-                    union lp_type type,
+                    struct lp_type type,
                     struct lp_build_mask_context *mask,
                     LLVMValueRef alpha,
                     LLVMValueRef ref);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index ce3e5f91c0..0b115fc9b0 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -65,7 +65,7 @@ lp_build_min_simple(struct lp_build_context *bld,
                     LLVMValueRef a,
                     LLVMValueRef b)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
    const char *intrinsic = NULL;
    LLVMValueRef cond;
 
@@ -113,7 +113,7 @@ lp_build_max_simple(struct lp_build_context *bld,
                     LLVMValueRef a,
                     LLVMValueRef b)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
    const char *intrinsic = NULL;
    LLVMValueRef cond;
 
@@ -159,7 +159,7 @@ LLVMValueRef
 lp_build_comp(struct lp_build_context *bld,
               LLVMValueRef a)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
 
    if(a == bld->one)
       return bld->zero;
@@ -188,7 +188,7 @@ lp_build_add(struct lp_build_context *bld,
              LLVMValueRef a,
              LLVMValueRef b)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
    LLVMValueRef res;
 
    if(a == bld->zero)
@@ -241,7 +241,7 @@ lp_build_sub(struct lp_build_context *bld,
              LLVMValueRef a,
              LLVMValueRef b)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
    LLVMValueRef res;
 
    if(b == bld->zero)
@@ -405,7 +405,7 @@ lp_build_mul(struct lp_build_context *bld,
              LLVMValueRef a,
              LLVMValueRef b)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
 
    if(a == bld->zero)
       return bld->zero;
@@ -477,7 +477,7 @@ lp_build_div(struct lp_build_context *bld,
              LLVMValueRef a,
              LLVMValueRef b)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
 
    if(a == bld->zero)
       return bld->zero;
@@ -590,7 +590,7 @@ LLVMValueRef
 lp_build_abs(struct lp_build_context *bld,
              LLVMValueRef a)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
    LLVMTypeRef vec_type = lp_build_vec_type(type);
 
    if(!type.sign)
@@ -627,7 +627,7 @@ LLVMValueRef
 lp_build_sgn(struct lp_build_context *bld,
              LLVMValueRef a)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
    LLVMTypeRef vec_type = lp_build_vec_type(type);
    LLVMValueRef cond;
    LLVMValueRef res;
@@ -678,7 +678,7 @@ lp_build_round_sse41(struct lp_build_context *bld,
                      LLVMValueRef a,
                      enum lp_build_round_sse41_mode mode)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
    LLVMTypeRef vec_type = lp_build_vec_type(type);
    const char *intrinsic;
 
@@ -706,7 +706,7 @@ LLVMValueRef
 lp_build_round(struct lp_build_context *bld,
                LLVMValueRef a)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
 
    assert(type.floating);
 
@@ -724,7 +724,7 @@ LLVMValueRef
 lp_build_floor(struct lp_build_context *bld,
                LLVMValueRef a)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
 
    assert(type.floating);
 
@@ -742,7 +742,7 @@ LLVMValueRef
 lp_build_ceil(struct lp_build_context *bld,
               LLVMValueRef a)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
 
    assert(type.floating);
 
@@ -760,7 +760,7 @@ LLVMValueRef
 lp_build_trunc(struct lp_build_context *bld,
                LLVMValueRef a)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
 
    assert(type.floating);
 
@@ -782,7 +782,7 @@ LLVMValueRef
 lp_build_int(struct lp_build_context *bld,
              LLVMValueRef a)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
    LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
 
    assert(type.floating);
@@ -805,7 +805,7 @@ LLVMValueRef
 lp_build_sqrt(struct lp_build_context *bld,
               LLVMValueRef a)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
    LLVMTypeRef vec_type = lp_build_vec_type(type);
    char intrinsic[32];
 
@@ -823,7 +823,7 @@ LLVMValueRef
 lp_build_rcp(struct lp_build_context *bld,
              LLVMValueRef a)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
 
    if(a == bld->zero)
       return bld->undef;
@@ -854,7 +854,7 @@ LLVMValueRef
 lp_build_rsqrt(struct lp_build_context *bld,
                LLVMValueRef a)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
 
    assert(type.floating);
 
@@ -875,7 +875,7 @@ LLVMValueRef
 lp_build_cos(struct lp_build_context *bld,
               LLVMValueRef a)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
    LLVMTypeRef vec_type = lp_build_vec_type(type);
    char intrinsic[32];
 
@@ -895,7 +895,7 @@ LLVMValueRef
 lp_build_sin(struct lp_build_context *bld,
               LLVMValueRef a)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
    LLVMTypeRef vec_type = lp_build_vec_type(type);
    char intrinsic[32];
 
@@ -966,7 +966,7 @@ lp_build_polynomial(struct lp_build_context *bld,
                     const double *coeffs,
                     unsigned num_coeffs)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
    LLVMValueRef res = NULL;
    unsigned i;
 
@@ -1014,7 +1014,7 @@ lp_build_exp2_approx(struct lp_build_context *bld,
                      LLVMValueRef *p_frac_part,
                      LLVMValueRef *p_exp2)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
    LLVMTypeRef vec_type = lp_build_vec_type(type);
    LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
    LLVMValueRef ipart = NULL;
@@ -1107,7 +1107,7 @@ lp_build_log2_approx(struct lp_build_context *bld,
                      LLVMValueRef *p_floor_log2,
                      LLVMValueRef *p_log2)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
    LLVMTypeRef vec_type = lp_build_vec_type(type);
    LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
index 5e083b847f..d68a97c4b8 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
@@ -40,7 +40,7 @@
 #include <llvm-c/Core.h>  
 
 
-union lp_type type;
+struct lp_type type;
 struct lp_build_context;
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
index d19e18846c..da272e549f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
@@ -46,7 +46,7 @@
 
 
 struct pipe_blend_state;
-union lp_type;
+struct lp_type;
 struct lp_build_context;
 
 
@@ -74,7 +74,7 @@ lp_build_blend_func(struct lp_build_context *bld,
 LLVMValueRef
 lp_build_blend_aos(LLVMBuilderRef builder,
                    const struct pipe_blend_state *blend,
-                   union lp_type type,
+                   struct lp_type type,
                    LLVMValueRef src,
                    LLVMValueRef dst,
                    LLVMValueRef const_,
@@ -84,7 +84,7 @@ lp_build_blend_aos(LLVMBuilderRef builder,
 void
 lp_build_blend_soa(LLVMBuilderRef builder,
                    const struct pipe_blend_state *blend,
-                   union lp_type type,
+                   struct lp_type type,
                    LLVMValueRef src[4],
                    LLVMValueRef dst[4],
                    LLVMValueRef const_[4],
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
index c11a9398f8..d14f468ba9 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -303,7 +303,7 @@ lp_build_blend_func(struct lp_build_context *bld,
 LLVMValueRef
 lp_build_blend_aos(LLVMBuilderRef builder,
                    const struct pipe_blend_state *blend,
-                   union lp_type type,
+                   struct lp_type type,
                    LLVMValueRef src,
                    LLVMValueRef dst,
                    LLVMValueRef const_,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
index b92254a7d6..9511299d55 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
@@ -199,7 +199,7 @@ lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld,
 void
 lp_build_blend_soa(LLVMBuilderRef builder,
                    const struct pipe_blend_state *blend,
-                   union lp_type type,
+                   struct lp_type type,
                    LLVMValueRef src[4],
                    LLVMValueRef dst[4],
                    LLVMValueRef con[4],
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.c b/src/gallium/drivers/llvmpipe/lp_bld_const.c
index 21487365ea..c8eaa8c394 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_const.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_const.c
@@ -42,7 +42,7 @@
 
 
 unsigned
-lp_mantissa(union lp_type type)
+lp_mantissa(struct lp_type type)
 {
    assert(type.floating);
 
@@ -72,7 +72,7 @@ lp_mantissa(union lp_type type)
  * Same as lp_const_scale(), but in terms of shifts.
  */
 unsigned
-lp_const_shift(union lp_type type)
+lp_const_shift(struct lp_type type)
 {
    if(type.floating)
       return 0;
@@ -86,7 +86,7 @@ lp_const_shift(union lp_type type)
 
 
 unsigned
-lp_const_offset(union lp_type type)
+lp_const_offset(struct lp_type type)
 {
    if(type.floating || type.fixed)
       return 0;
@@ -104,7 +104,7 @@ lp_const_offset(union lp_type type)
  * else for the fixed points types and normalized integers.
  */
 double
-lp_const_scale(union lp_type type)
+lp_const_scale(struct lp_type type)
 {
    unsigned long long llscale;
    double dscale;
@@ -122,7 +122,7 @@ lp_const_scale(union lp_type type)
  * Minimum value representable by the type.
  */
 double
-lp_const_min(union lp_type type)
+lp_const_min(struct lp_type type)
 {
    unsigned bits;
 
@@ -158,7 +158,7 @@ lp_const_min(union lp_type type)
  * Maximum value representable by the type.
  */
 double
-lp_const_max(union lp_type type)
+lp_const_max(struct lp_type type)
 {
    unsigned bits;
 
@@ -190,7 +190,7 @@ lp_const_max(union lp_type type)
 
 
 double
-lp_const_eps(union lp_type type)
+lp_const_eps(struct lp_type type)
 {
    if (type.floating) {
       switch(type.width) {
@@ -211,7 +211,7 @@ lp_const_eps(union lp_type type)
 
 
 LLVMValueRef
-lp_build_undef(union lp_type type)
+lp_build_undef(struct lp_type type)
 {
    LLVMTypeRef vec_type = lp_build_vec_type(type);
    return LLVMGetUndef(vec_type);
@@ -219,7 +219,7 @@ lp_build_undef(union lp_type type)
                
 
 LLVMValueRef
-lp_build_zero(union lp_type type)
+lp_build_zero(struct lp_type type)
 {
    LLVMTypeRef vec_type = lp_build_vec_type(type);
    return LLVMConstNull(vec_type);
@@ -227,7 +227,7 @@ lp_build_zero(union lp_type type)
                
 
 LLVMValueRef
-lp_build_one(union lp_type type)
+lp_build_one(struct lp_type type)
 {
    LLVMTypeRef elem_type;
    LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
@@ -269,7 +269,7 @@ lp_build_one(union lp_type type)
                
 
 LLVMValueRef
-lp_build_const_scalar(union lp_type type,
+lp_build_const_scalar(struct lp_type type,
                       double val)
 {
    LLVMTypeRef elem_type = lp_build_elem_type(type);
@@ -295,7 +295,7 @@ lp_build_const_scalar(union lp_type type,
 
 
 LLVMValueRef
-lp_build_int_const_scalar(union lp_type type,
+lp_build_int_const_scalar(struct lp_type type,
                           long long val)
 {
    LLVMTypeRef elem_type = lp_build_int_elem_type(type);
@@ -312,7 +312,7 @@ lp_build_int_const_scalar(union lp_type type,
 
 
 LLVMValueRef
-lp_build_const_aos(union lp_type type, 
+lp_build_const_aos(struct lp_type type, 
                    double r, double g, double b, double a, 
                    const unsigned char *swizzle)
 {
@@ -352,8 +352,8 @@ lp_build_const_aos(union lp_type type,
 
 
 LLVMValueRef
-lp_build_const_mask_aos(union lp_type type,
-                        boolean cond[4])
+lp_build_const_mask_aos(struct lp_type type,
+                        const boolean cond[4])
 {
    LLVMTypeRef elem_type = LLVMIntType(type.width);
    LLVMValueRef masks[LP_MAX_VECTOR_LENGTH];
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.h b/src/gallium/drivers/llvmpipe/lp_bld_const.h
index 1934530ea3..ffb302f736 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_const.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_const.h
@@ -42,67 +42,67 @@
 #include <pipe/p_compiler.h>
 
 
-union lp_type type;
+struct lp_type type;
 
 
 unsigned
-lp_mantissa(union lp_type type);
+lp_mantissa(struct lp_type type);
 
 
 unsigned
-lp_const_shift(union lp_type type);
+lp_const_shift(struct lp_type type);
 
 
 unsigned
-lp_const_offset(union lp_type type);
+lp_const_offset(struct lp_type type);
 
 
 double
-lp_const_scale(union lp_type type);
+lp_const_scale(struct lp_type type);
 
 double
-lp_const_min(union lp_type type);
+lp_const_min(struct lp_type type);
 
 
 double
-lp_const_max(union lp_type type);
+lp_const_max(struct lp_type type);
 
 
 double
-lp_const_eps(union lp_type type);
+lp_const_eps(struct lp_type type);
 
 
 LLVMValueRef
-lp_build_undef(union lp_type type);
+lp_build_undef(struct lp_type type);
 
 
 LLVMValueRef
-lp_build_zero(union lp_type type);
+lp_build_zero(struct lp_type type);
 
 
 LLVMValueRef
-lp_build_one(union lp_type type);
+lp_build_one(struct lp_type type);
 
 
 LLVMValueRef
-lp_build_const_scalar(union lp_type type,
+lp_build_const_scalar(struct lp_type type,
                       double val);
 
 
 LLVMValueRef
-lp_build_int_const_scalar(union lp_type type,
+lp_build_int_const_scalar(struct lp_type type,
                           long long val);
 
 
 LLVMValueRef
-lp_build_const_aos(union lp_type type, 
+lp_build_const_aos(struct lp_type type, 
                    double r, double g, double b, double a, 
                    const unsigned char *swizzle);
 
 
 LLVMValueRef
-lp_build_const_mask_aos(union lp_type type,
-                        boolean cond[4]);
+lp_build_const_mask_aos(struct lp_type type,
+                        const boolean cond[4]);
 
 
 #endif /* !LP_BLD_CONST_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
index c5a71d2c72..186cac70f6 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
@@ -86,7 +86,7 @@
  */
 LLVMValueRef
 lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
-                                        union lp_type src_type,
+                                        struct lp_type src_type,
                                         unsigned dst_width,
                                         LLVMValueRef src)
 {
@@ -152,7 +152,7 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
 LLVMValueRef
 lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
                                 unsigned src_width,
-                                union lp_type dst_type,
+                                struct lp_type dst_type,
                                 LLVMValueRef src)
 {
    LLVMTypeRef vec_type = lp_build_vec_type(dst_type);
@@ -248,8 +248,8 @@ lp_build_const_pack_shuffle(unsigned n)
  */
 static void
 lp_build_expand(LLVMBuilderRef builder,
-               union lp_type src_type,
-               union lp_type dst_type,
+               struct lp_type src_type,
+               struct lp_type dst_type,
                LLVMValueRef src,
                LLVMValueRef *dst, unsigned num_dsts)
 {
@@ -266,7 +266,7 @@ lp_build_expand(LLVMBuilderRef builder,
    dst[0] = src;
 
    while(src_type.width < dst_type.width) {
-      union lp_type new_type = src_type;
+      struct lp_type new_type = src_type;
       LLVMTypeRef new_vec_type;
 
       new_type.width *= 2;
@@ -314,8 +314,8 @@ lp_build_expand(LLVMBuilderRef builder,
  */
 static LLVMValueRef
 lp_build_pack2(LLVMBuilderRef builder,
-               union lp_type src_type,
-               union lp_type dst_type,
+               struct lp_type src_type,
+               struct lp_type dst_type,
                boolean clamped,
                LLVMValueRef lo,
                LLVMValueRef hi)
@@ -392,8 +392,8 @@ lp_build_pack2(LLVMBuilderRef builder,
  */
 static LLVMValueRef
 lp_build_pack(LLVMBuilderRef builder,
-              union lp_type src_type,
-              union lp_type dst_type,
+              struct lp_type src_type,
+              struct lp_type dst_type,
               boolean clamped,
               const LLVMValueRef *src, unsigned num_srcs)
 {
@@ -410,7 +410,7 @@ lp_build_pack(LLVMBuilderRef builder,
       tmp[i] = src[i];
 
    while(src_type.width > dst_type.width) {
-      union lp_type new_type = src_type;
+      struct lp_type new_type = src_type;
 
       new_type.width /= 2;
       new_type.length *= 2;
@@ -442,12 +442,12 @@ lp_build_pack(LLVMBuilderRef builder,
  */
 void
 lp_build_conv(LLVMBuilderRef builder,
-              union lp_type src_type,
-              union lp_type dst_type,
+              struct lp_type src_type,
+              struct lp_type dst_type,
               const LLVMValueRef *src, unsigned num_srcs,
               LLVMValueRef *dst, unsigned num_dsts)
 {
-   union lp_type tmp_type;
+   struct lp_type tmp_type;
    LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
    unsigned num_tmps;
    unsigned i;
@@ -470,7 +470,7 @@ lp_build_conv(LLVMBuilderRef builder,
     * Clamp if necessary
     */
 
-   if(src_type.value != dst_type.value) {
+   if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) {
       struct lp_build_context bld;
       double src_min = lp_const_min(src_type);
       double dst_min = lp_const_min(dst_type);
@@ -656,8 +656,8 @@ lp_build_conv(LLVMBuilderRef builder,
  */
 void
 lp_build_conv_mask(LLVMBuilderRef builder,
-                   union lp_type src_type,
-                   union lp_type dst_type,
+                   struct lp_type src_type,
+                   struct lp_type dst_type,
                    const LLVMValueRef *src, unsigned num_srcs,
                    LLVMValueRef *dst, unsigned num_dsts)
 {
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.h b/src/gallium/drivers/llvmpipe/lp_bld_conv.h
index 05c1ef2a10..ca378804d2 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_conv.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.h
@@ -40,33 +40,33 @@
 #include <llvm-c/Core.h>  
 
 
-union lp_type type;
+struct lp_type type;
 
 
 LLVMValueRef
 lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
-                                        union lp_type src_type,
+                                        struct lp_type src_type,
                                         unsigned dst_width,
                                         LLVMValueRef src);
 
 LLVMValueRef
 lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
                                 unsigned src_width,
-                                union lp_type dst_type,
+                                struct lp_type dst_type,
                                 LLVMValueRef src);
 
 
 void
 lp_build_conv(LLVMBuilderRef builder,
-              union lp_type src_type,
-              union lp_type dst_type,
+              struct lp_type src_type,
+              struct lp_type dst_type,
               const LLVMValueRef *srcs, unsigned num_srcs,
               LLVMValueRef *dsts, unsigned num_dsts);
 
 void
 lp_build_conv_mask(LLVMBuilderRef builder,
-                   union lp_type src_type,
-                   union lp_type dst_type,
+                   struct lp_type src_type,
+                   struct lp_type dst_type,
                    const LLVMValueRef *src, unsigned num_srcs,
                    LLVMValueRef *dst, unsigned num_dsts);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index e5fe81193f..21c665c4d4 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -71,11 +71,11 @@
 /**
  * Return a type appropriate for depth/stencil testing.
  */
-union lp_type
+struct lp_type
 lp_depth_type(const struct util_format_description *format_desc,
               unsigned length)
 {
-   union lp_type type;
+   struct lp_type type;
    unsigned swizzle;
 
    assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
@@ -85,7 +85,7 @@ lp_depth_type(const struct util_format_description *format_desc,
    swizzle = format_desc->swizzle[0];
    assert(swizzle < 4);
 
-   type.value = 0;
+   memset(&type, 0, sizeof type);
    type.width = format_desc->block.bits;
 
    if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
@@ -114,7 +114,7 @@ lp_depth_type(const struct util_format_description *format_desc,
 void
 lp_build_depth_test(LLVMBuilderRef builder,
                     const struct pipe_depth_state *state,
-                    union lp_type type,
+                    struct lp_type type,
                     const struct util_format_description *format_desc,
                     struct lp_build_mask_context *mask,
                     LLVMValueRef src,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
index 5d2e042fcc..79d6981bb5 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
@@ -41,11 +41,11 @@
  
 struct pipe_depth_state;
 struct util_format_description;
-union lp_type;
+struct lp_type;
 struct lp_build_mask_context;
 
 
-union lp_type
+struct lp_type
 lp_depth_type(const struct util_format_description *format_desc,
               unsigned length);
 
@@ -53,7 +53,7 @@ lp_depth_type(const struct util_format_description *format_desc,
 void
 lp_build_depth_test(LLVMBuilderRef builder,
                     const struct pipe_depth_state *state,
-                    union lp_type type,
+                    struct lp_type type,
                     const struct util_format_description *format_desc,
                     struct lp_build_mask_context *mask,
                     LLVMValueRef src,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c
index 69ed014ff3..dcc25fbff8 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c
@@ -405,7 +405,7 @@ lp_build_mask_check(struct lp_build_mask_context *mask)
 void
 lp_build_mask_begin(struct lp_build_mask_context *mask,
                     struct lp_build_flow_context *flow,
-                    union lp_type type,
+                    struct lp_type type,
                     LLVMValueRef value)
 {
    memset(mask, 0, sizeof *mask);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/drivers/llvmpipe/lp_bld_flow.h
index 9d76e3064d..e61999ff06 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_flow.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.h
@@ -38,7 +38,7 @@
 #include <llvm-c/Core.h>  
 
 
-union lp_type;
+struct lp_type;
 
 
 struct lp_build_flow_context;
@@ -84,7 +84,7 @@ struct lp_build_mask_context
 void
 lp_build_mask_begin(struct lp_build_mask_context *mask,
                     struct lp_build_flow_context *flow,
-                    union lp_type type,
+                    struct lp_type type,
                     LLVMValueRef value);
 
 /**
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h
index 5ee0656093..6d3f692619 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h
@@ -39,7 +39,7 @@
 #include "pipe/p_format.h"
 
 struct util_format_description;
-union lp_type;
+struct lp_type;
 
 
 /**
@@ -103,7 +103,7 @@ lp_build_gather(LLVMBuilderRef builder,
 void
 lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
                          const struct util_format_description *format_desc,
-                         union lp_type type,
+                         struct lp_type type,
                          LLVMValueRef packed,
                          LLVMValueRef *rgba);
 
@@ -111,7 +111,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
 void
 lp_build_load_rgba_soa(LLVMBuilderRef builder,
                        const struct util_format_description *format_desc,
-                       union lp_type type,
+                       struct lp_type type,
                        LLVMValueRef base_ptr,
                        LLVMValueRef offsets,
                        LLVMValueRef *rgba);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
index 569e8d10a3..b5ff434e1a 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
@@ -84,7 +84,7 @@ lp_build_gather(LLVMBuilderRef builder,
 
 
 static LLVMValueRef
-lp_build_format_swizzle(union lp_type type,
+lp_build_format_swizzle(struct lp_type type,
                         const LLVMValueRef *inputs,
                         enum util_format_swizzle swizzle)
 {
@@ -110,7 +110,7 @@ lp_build_format_swizzle(union lp_type type,
 void
 lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
                          const struct util_format_description *format_desc,
-                         union lp_type type,
+                         struct lp_type type,
                          LLVMValueRef packed,
                          LLVMValueRef *rgba)
 {
@@ -188,7 +188,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
 void
 lp_build_load_rgba_soa(LLVMBuilderRef builder,
                        const struct util_format_description *format_desc,
-                       union lp_type type,
+                       struct lp_type type,
                        LLVMValueRef base_ptr,
                        LLVMValueRef offsets,
                        LLVMValueRef *rgba)
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index cfe20a0d75..338dbca6d1 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -292,7 +292,7 @@ void
 lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
                          const struct tgsi_token *tokens,
                          LLVMBuilderRef builder,
-                         union lp_type type,
+                         struct lp_type type,
                          LLVMValueRef a0_ptr,
                          LLVMValueRef dadx_ptr,
                          LLVMValueRef dady_ptr,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
index 9194f6233a..9c57a10879 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
@@ -83,7 +83,7 @@ void
 lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
                          const struct tgsi_token *tokens,
                          LLVMBuilderRef builder,
-                         union lp_type type,
+                         struct lp_type type,
                          LLVMValueRef a0_ptr,
                          LLVMValueRef dadx_ptr,
                          LLVMValueRef dady_ptr,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c
index 995a69c0f4..6b6f820769 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c
@@ -45,7 +45,7 @@ lp_build_cmp(struct lp_build_context *bld,
              LLVMValueRef a,
              LLVMValueRef b)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
    LLVMTypeRef vec_type = lp_build_vec_type(type);
    LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
    LLVMValueRef zeros = LLVMConstNull(int_vec_type);
@@ -301,7 +301,7 @@ lp_build_select(struct lp_build_context *bld,
                 LLVMValueRef a,
                 LLVMValueRef b)
 {
-   union lp_type type = bld->type;
+   struct lp_type type = bld->type;
    LLVMValueRef res;
 
    if(a == b)
@@ -339,7 +339,7 @@ lp_build_select_aos(struct lp_build_context *bld,
                     LLVMValueRef b,
                     const boolean cond[4])
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
    const unsigned n = type.length;
    unsigned i, j;
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h
index 9099e0fb5b..a4ee7723b5 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.h
@@ -42,7 +42,7 @@
 #include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */
 
 
-union lp_type type;
+struct lp_type type;
 struct lp_build_context;
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
index 6f565af76d..403d0e4836 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
@@ -40,7 +40,7 @@
 
 struct pipe_texture;
 struct pipe_sampler_state;
-union lp_type;
+struct lp_type;
 
 
 /**
@@ -123,7 +123,7 @@ void
 lp_build_sample_soa(LLVMBuilderRef builder,
                     const struct lp_sampler_static_state *static_state,
                     struct lp_sampler_dynamic_state *dynamic_state,
-                    union lp_type fp_type,
+                    struct lp_type fp_type,
                     unsigned unit,
                     unsigned num_coords,
                     const LLVMValueRef *coords,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
index 3ca25b0e76..08b1dc10f3 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -97,15 +97,15 @@ struct lp_build_sample_context
    const struct util_format_description *format_desc;
 
    /** Incoming coordinates type and build context */
-   union lp_type coord_type;
+   struct lp_type coord_type;
    struct lp_build_context coord_bld;
 
    /** Integer coordinates */
-   union lp_type int_coord_type;
+   struct lp_type int_coord_type;
    struct lp_build_context int_coord_bld;
 
    /** Output texels type and build context */
-   union lp_type texel_type;
+   struct lp_type texel_type;
    struct lp_build_context texel_bld;
 };
 
@@ -337,7 +337,7 @@ void
 lp_build_sample_soa(LLVMBuilderRef builder,
                     const struct lp_sampler_static_state *static_state,
                     struct lp_sampler_dynamic_state *dynamic_state,
-                    union lp_type type,
+                    struct lp_type type,
                     unsigned unit,
                     unsigned num_coords,
                     const LLVMValueRef *coords,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
index f35638be44..64e81f7b1f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
@@ -64,7 +64,7 @@ LLVMValueRef
 lp_build_broadcast_scalar(struct lp_build_context *bld,
                           LLVMValueRef scalar)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
    LLVMValueRef res;
    unsigned i;
 
@@ -83,7 +83,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
                        LLVMValueRef a,
                        unsigned channel)
 {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
    const unsigned n = type.length;
    unsigned i, j;
 
@@ -115,7 +115,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
        *   YY00 YY00 .... YY00
        *   YYYY YYYY .... YYYY  <= output
        */
-      union lp_type type4 = type;
+      struct lp_type type4 = type;
       const char shifts[4][2] = {
          { 1,  2},
          {-1,  2},
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
index cb0b6707ec..1f6da80448 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
@@ -40,7 +40,7 @@
 #include <llvm-c/Core.h>  
 
 
-union lp_type type;
+struct lp_type type;
 struct lp_build_context;
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
index 10c251c416..eddb7a83fa 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
@@ -39,7 +39,7 @@
 
 
 struct tgsi_token;
-union lp_type;
+struct lp_type;
 struct lp_build_context;
 struct lp_build_mask_context;
 
@@ -60,7 +60,7 @@ struct lp_build_sampler_soa
    void
    (*emit_fetch_texel)( struct lp_build_sampler_soa *sampler,
                         LLVMBuilderRef builder,
-                        union lp_type type,
+                        struct lp_type type,
                         unsigned unit,
                         unsigned num_coords,
                         const LLVMValueRef *coords,
@@ -72,7 +72,7 @@ struct lp_build_sampler_soa
 void
 lp_build_tgsi_soa(LLVMBuilderRef builder,
                   const struct tgsi_token *tokens,
-                  union lp_type type,
+                  struct lp_type type,
                   struct lp_build_mask_context *mask,
                   LLVMValueRef consts_ptr,
                   const LLVMValueRef *pos,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index b106ce2317..adc81569ed 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -1415,7 +1415,7 @@ emit_instruction(
 void
 lp_build_tgsi_soa(LLVMBuilderRef builder,
                   const struct tgsi_token *tokens,
-                  union lp_type type,
+                  struct lp_type type,
                   struct lp_build_mask_context *mask,
                   LLVMValueRef consts_ptr,
                   const LLVMValueRef *pos,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c
index 577644b7ab..606243d6c5 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_type.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c
@@ -33,7 +33,7 @@
 
 
 LLVMTypeRef
-lp_build_elem_type(union lp_type type)
+lp_build_elem_type(struct lp_type type)
 {
    if (type.floating) {
       switch(type.width) {
@@ -55,7 +55,7 @@ lp_build_elem_type(union lp_type type)
 
 
 LLVMTypeRef
-lp_build_vec_type(union lp_type type)
+lp_build_vec_type(struct lp_type type)
 {
    LLVMTypeRef elem_type = lp_build_elem_type(type);
    return LLVMVectorType(elem_type, type.length);
@@ -69,7 +69,7 @@ lp_build_vec_type(union lp_type type)
  * type and check for identity.
  */
 boolean
-lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type) 
+lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type) 
 {
    LLVMTypeKind elem_kind;
 
@@ -107,7 +107,7 @@ lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type)
 
 
 boolean
-lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type) 
+lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type) 
 {
    LLVMTypeRef elem_type;
 
@@ -128,7 +128,7 @@ lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type)
 
 
 boolean
-lp_check_value(union lp_type type, LLVMValueRef val) 
+lp_check_value(struct lp_type type, LLVMValueRef val) 
 {
    LLVMTypeRef vec_type;
 
@@ -143,25 +143,26 @@ lp_check_value(union lp_type type, LLVMValueRef val)
 
 
 LLVMTypeRef
-lp_build_int_elem_type(union lp_type type)
+lp_build_int_elem_type(struct lp_type type)
 {
    return LLVMIntType(type.width);
 }
 
 
 LLVMTypeRef
-lp_build_int_vec_type(union lp_type type)
+lp_build_int_vec_type(struct lp_type type)
 {
    LLVMTypeRef elem_type = lp_build_int_elem_type(type);
    return LLVMVectorType(elem_type, type.length);
 }
 
 
-union lp_type
-lp_int_type(union lp_type type)
+struct lp_type
+lp_int_type(struct lp_type type)
 {
-   union lp_type int_type;
-   int_type.value = 0;
+   struct lp_type int_type;
+
+   memset(&int_type, 0, sizeof int_type);
    int_type.width = type.width;
    int_type.length = type.length;
    return int_type;
@@ -171,7 +172,7 @@ lp_int_type(union lp_type type)
 void
 lp_build_context_init(struct lp_build_context *bld,
                       LLVMBuilderRef builder,
-                      union lp_type type)
+                      struct lp_type type)
 {
    bld->builder = builder;
    bld->type = type;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h
index 9933e0b45c..ee5ca3483c 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_type.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h
@@ -56,58 +56,55 @@
  * on the types used for intermediate computations, such as signed vs unsigned,
  * normalized values, or fixed point.
  */
-union lp_type {
-   struct {
-      /** 
-       * Floating-point. Cannot be used with fixed. Integer numbers are
-       * represented by this zero.
-       */
-      unsigned floating:1;
-
-      /** 
-       * Fixed-point. Cannot be used with floating. Integer numbers are
-       * represented by this zero.
-       */
-      unsigned fixed:1;
-      
-      /** 
-       * Whether it can represent negative values or not.
-       *
-       * If this is not set for floating point, it means that all values are
-       * assumed to be positive.
-       */
-      unsigned sign:1;
-
-      /**
-       * Whether values are normalized to fit [0, 1] interval, or [-1, 1]
-       * interval for signed types.
-       *
-       * For integer types it means the representable integer range should be
-       * interpreted as the interval above.
-       *
-       * For floating and fixed point formats it means the values should be
-       * clamped to the interval above.
-       */
-      unsigned norm:1;
-
-      /**
-       * Element width.
-       *
-       * For fixed point values, the fixed point is assumed to be at half the
-       * width.
-       */
-      unsigned width:14;
-
-      /** 
-       * Vector length.
-       *
-       * width*length should be a power of two greater or equal to eight.
-       *
-       * @sa LP_MAX_VECTOR_LENGTH
-       */
-      unsigned length:14;
-   };
-   uint32_t value;
+struct lp_type {
+   /**
+    * Floating-point. Cannot be used with fixed. Integer numbers are
+    * represented by this zero.
+    */
+   unsigned floating:1;
+
+   /**
+    * Fixed-point. Cannot be used with floating. Integer numbers are
+    * represented by this zero.
+    */
+   unsigned fixed:1;
+
+   /**
+    * Whether it can represent negative values or not.
+    *
+    * If this is not set for floating point, it means that all values are
+    * assumed to be positive.
+    */
+   unsigned sign:1;
+
+   /**
+    * Whether values are normalized to fit [0, 1] interval, or [-1, 1]
+    * interval for signed types.
+    *
+    * For integer types it means the representable integer range should be
+    * interpreted as the interval above.
+    *
+    * For floating and fixed point formats it means the values should be
+    * clamped to the interval above.
+    */
+   unsigned norm:1;
+
+   /**
+    * Element width.
+    *
+    * For fixed point values, the fixed point is assumed to be at half the
+    * width.
+    */
+   unsigned width:14;
+
+   /**
+    * Vector length.
+    *
+    * width*length should be a power of two greater or equal to eight.
+    *
+    * @sa LP_MAX_VECTOR_LENGTH
+    */
+   unsigned length:14;
 };
 
 
@@ -124,7 +121,7 @@ struct lp_build_context
     * This not only describes the input/output LLVM types, but also whether
     * to normalize/clamp the results.
     */
-   union lp_type type;
+   struct lp_type type;
 
    /** Same as lp_build_undef(type) */
    LLVMValueRef undef;
@@ -138,41 +135,41 @@ struct lp_build_context
 
 
 LLVMTypeRef
-lp_build_elem_type(union lp_type type);
+lp_build_elem_type(struct lp_type type);
 
 
 LLVMTypeRef
-lp_build_vec_type(union lp_type type);
+lp_build_vec_type(struct lp_type type);
 
 
 boolean
-lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type);
+lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type);
 
 
 boolean
-lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type);
+lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type);
 
 
 boolean
-lp_check_value(union lp_type type, LLVMValueRef val);
+lp_check_value(struct lp_type type, LLVMValueRef val);
 
 
 LLVMTypeRef
-lp_build_int_elem_type(union lp_type type);
+lp_build_int_elem_type(struct lp_type type);
 
 
 LLVMTypeRef
-lp_build_int_vec_type(union lp_type type);
+lp_build_int_vec_type(struct lp_type type);
 
 
-union lp_type
-lp_int_type(union lp_type type);
+struct lp_type
+lp_int_type(struct lp_type type);
 
 
 void
 lp_build_context_init(struct lp_build_context *bld,
                       LLVMBuilderRef builder,
-                      union lp_type type);
+                      struct lp_type type);
 
 
 #endif /* !LP_BLD_TYPE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 618cf1ffb8..9faed5a0b1 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -133,13 +133,13 @@ generate_pos0(LLVMBuilderRef builder,
 static void
 generate_depth(LLVMBuilderRef builder,
                const struct lp_fragment_shader_variant_key *key,
-               union lp_type src_type,
+               struct lp_type src_type,
                struct lp_build_mask_context *mask,
                LLVMValueRef src,
                LLVMValueRef dst_ptr)
 {
    const struct util_format_description *format_desc;
-   union lp_type dst_type;
+   struct lp_type dst_type;
 
    if(!key->depth.enabled)
       return;
@@ -181,7 +181,7 @@ generate_fs(struct llvmpipe_context *lp,
             struct lp_fragment_shader *shader,
             const struct lp_fragment_shader_variant_key *key,
             LLVMBuilderRef builder,
-            union lp_type type,
+            struct lp_type type,
             LLVMValueRef context_ptr,
             unsigned i,
             const struct lp_build_interp_soa_context *interp,
@@ -299,7 +299,7 @@ generate_fs(struct llvmpipe_context *lp,
 static void
 generate_blend(const struct pipe_blend_state *blend,
                LLVMBuilderRef builder,
-               union lp_type type,
+               struct lp_type type,
                LLVMValueRef context_ptr,
                LLVMValueRef mask,
                LLVMValueRef *src,
@@ -364,8 +364,8 @@ generate_fragment(struct llvmpipe_context *lp,
 {
    struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
    struct lp_fragment_shader_variant *variant;
-   union lp_type fs_type;
-   union lp_type blend_type;
+   struct lp_type fs_type;
+   struct lp_type blend_type;
    LLVMTypeRef fs_elem_type;
    LLVMTypeRef fs_vec_type;
    LLVMTypeRef fs_int_vec_type;
@@ -431,7 +431,7 @@ generate_fragment(struct llvmpipe_context *lp,
    /* TODO: actually pick these based on the fs and color buffer
     * characteristics. */
 
-   fs_type.value = 0;
+   memset(&fs_type, 0, sizeof fs_type);
    fs_type.floating = TRUE; /* floating point values */
    fs_type.sign = TRUE;     /* values are signed */
    fs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
@@ -439,7 +439,7 @@ generate_fragment(struct llvmpipe_context *lp,
    fs_type.length = 4;      /* 4 element per vector */
    num_fs = 4;
 
-   blend_type.value = 0;
+   memset(&blend_type, 0, sizeof blend_type);
    blend_type.floating = FALSE; /* values are integers */
    blend_type.sign = FALSE;     /* values are unsigned */
    blend_type.norm = TRUE;      /* values are in [0,1] or [-1,1] */
diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h
index 69aaae26e0..a88e110c66 100644
--- a/src/gallium/drivers/llvmpipe/lp_test.h
+++ b/src/gallium/drivers/llvmpipe/lp_test.h
@@ -86,43 +86,43 @@ random_float(void);
 
 
 void
-dump_type(FILE *fp, union lp_type type);
+dump_type(FILE *fp, struct lp_type type);
 
 
 double
-read_elem(union lp_type type, const void *src, unsigned index);
+read_elem(struct lp_type type, const void *src, unsigned index);
 
 
 void
-write_elem(union lp_type type, void *dst, unsigned index, double src);
+write_elem(struct lp_type type, void *dst, unsigned index, double src);
 
 
 void
-random_elem(union lp_type type, void *dst, unsigned index);
+random_elem(struct lp_type type, void *dst, unsigned index);
 
 
 void
-read_vec(union lp_type type, const void *src, double *dst);
+read_vec(struct lp_type type, const void *src, double *dst);
 
 
 void
-write_vec(union lp_type type, void *dst, const double *src);
+write_vec(struct lp_type type, void *dst, const double *src);
 
 
 void
-random_vec(union lp_type type, void *dst);
+random_vec(struct lp_type type, void *dst);
 
 
 boolean
-compare_vec_with_eps(union lp_type type, const void *res, const void *ref, double eps);
+compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps);
 
 
 boolean
-compare_vec(union lp_type type, const void *res, const void *ref);
+compare_vec(struct lp_type type, const void *res, const void *ref);
 
 
 void
-dump_vec(FILE *fp, union lp_type type, const void *src);
+dump_vec(FILE *fp, struct lp_type type, const void *src);
 
 
 #endif /* !LP_TEST_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index 8dfad468e3..1b57ea2a5c 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -80,7 +80,7 @@ static void
 write_tsv_row(FILE *fp,
               const struct pipe_blend_state *blend,
               enum vector_mode mode,
-              union lp_type type,
+              struct lp_type type,
               double cycles,
               boolean success)
 {
@@ -125,7 +125,7 @@ static void
 dump_blend_type(FILE *fp,
                 const struct pipe_blend_state *blend,
                 enum vector_mode mode,
-                union lp_type type)
+                struct lp_type type)
 {
    fprintf(fp, "%s", mode ? "soa" : "aos");
 
@@ -153,7 +153,7 @@ static LLVMValueRef
 add_blend_test(LLVMModuleRef module,
                const struct pipe_blend_state *blend,
                enum vector_mode mode,
-               union lp_type type)
+               struct lp_type type)
 {
    LLVMTypeRef ret_type;
    LLVMTypeRef vec_type;
@@ -467,7 +467,7 @@ test_one(unsigned verbose,
          FILE *fp,
          const struct pipe_blend_state *blend,
          enum vector_mode mode,
-         union lp_type type)
+         struct lp_type type)
 {
    LLVMModuleRef module = NULL;
    LLVMValueRef func = NULL;
@@ -765,10 +765,10 @@ blend_funcs[] = {
 };
 
 
-const union lp_type blend_types[] = {
+const struct lp_type blend_types[] = {
    /* float, fixed,  sign,  norm, width, len */
-   {{  TRUE, FALSE, FALSE,  TRUE,    32,   4 }}, /* f32 x 4 */
-   {{ FALSE, FALSE, FALSE,  TRUE,     8,  16 }}, /* u8n x 16 */
+   {   TRUE, FALSE, FALSE,  TRUE,    32,   4 }, /* f32 x 4 */
+   {  FALSE, FALSE, FALSE,  TRUE,     8,  16 }, /* u8n x 16 */
 };
 
 
@@ -788,7 +788,7 @@ test_all(unsigned verbose, FILE *fp)
    const unsigned *alpha_dst_factor;
    struct pipe_blend_state blend;
    enum vector_mode mode;
-   const union lp_type *type;
+   const struct lp_type *type;
    bool success = TRUE;
 
    for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) {
@@ -841,7 +841,7 @@ test_some(unsigned verbose, FILE *fp, unsigned long n)
    const unsigned *alpha_dst_factor;
    struct pipe_blend_state blend;
    enum vector_mode mode;
-   const union lp_type *type;
+   const struct lp_type *type;
    unsigned long i;
    bool success = TRUE;
 
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
index e6489834af..ae2697143f 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -59,8 +59,8 @@ write_tsv_header(FILE *fp)
 
 static void
 write_tsv_row(FILE *fp,
-              union lp_type src_type,
-              union lp_type dst_type,
+              struct lp_type src_type,
+              struct lp_type dst_type,
               double cycles,
               boolean success)
 {
@@ -80,8 +80,8 @@ write_tsv_row(FILE *fp,
 
 static void
 dump_conv_types(FILE *fp,
-               union lp_type src_type,
-               union lp_type dst_type)
+               struct lp_type src_type,
+               struct lp_type dst_type)
 {
    fprintf(fp, "src_type=");
    dump_type(fp, src_type);
@@ -96,8 +96,8 @@ dump_conv_types(FILE *fp,
 
 static LLVMValueRef
 add_conv_test(LLVMModuleRef module,
-              union lp_type src_type, unsigned num_srcs,
-              union lp_type dst_type, unsigned num_dsts)
+              struct lp_type src_type, unsigned num_srcs,
+              struct lp_type dst_type, unsigned num_dsts)
 {
    LLVMTypeRef args[2];
    LLVMValueRef func;
@@ -145,8 +145,8 @@ add_conv_test(LLVMModuleRef module,
 static boolean
 test_one(unsigned verbose,
          FILE *fp,
-         union lp_type src_type,
-         union lp_type dst_type)
+         struct lp_type src_type,
+         struct lp_type dst_type)
 {
    LLVMModuleRef module = NULL;
    LLVMValueRef func = NULL;
@@ -343,35 +343,35 @@ test_one(unsigned verbose,
 }
 
 
-const union lp_type conv_types[] = {
+const struct lp_type conv_types[] = {
    /* float, fixed,  sign,  norm, width, len */
 
-   {{  TRUE, FALSE,  TRUE,  TRUE,    32,   4 }},
-   {{  TRUE, FALSE,  TRUE, FALSE,    32,   4 }},
-   {{  TRUE, FALSE, FALSE,  TRUE,    32,   4 }},
-   {{  TRUE, FALSE, FALSE, FALSE,    32,   4 }},
+   {   TRUE, FALSE,  TRUE,  TRUE,    32,   4 },
+   {   TRUE, FALSE,  TRUE, FALSE,    32,   4 },
+   {   TRUE, FALSE, FALSE,  TRUE,    32,   4 },
+   {   TRUE, FALSE, FALSE, FALSE,    32,   4 },
 
    /* TODO: test fixed formats too */
 
-   {{ FALSE, FALSE,  TRUE,  TRUE,    16,   8 }},
-   {{ FALSE, FALSE,  TRUE, FALSE,    16,   8 }},
-   {{ FALSE, FALSE, FALSE,  TRUE,    16,   8 }},
-   {{ FALSE, FALSE, FALSE, FALSE,    16,   8 }},
-
-   {{ FALSE, FALSE,  TRUE,  TRUE,    32,   4 }},
-   {{ FALSE, FALSE,  TRUE, FALSE,    32,   4 }},
-   {{ FALSE, FALSE, FALSE,  TRUE,    32,   4 }},
-   {{ FALSE, FALSE, FALSE, FALSE,    32,   4 }},
-
-   {{ FALSE, FALSE,  TRUE,  TRUE,    16,   8 }},
-   {{ FALSE, FALSE,  TRUE, FALSE,    16,   8 }},
-   {{ FALSE, FALSE, FALSE,  TRUE,    16,   8 }},
-   {{ FALSE, FALSE, FALSE, FALSE,    16,   8 }},
-
-   {{ FALSE, FALSE,  TRUE,  TRUE,     8,  16 }},
-   {{ FALSE, FALSE,  TRUE, FALSE,     8,  16 }},
-   {{ FALSE, FALSE, FALSE,  TRUE,     8,  16 }},
-   {{ FALSE, FALSE, FALSE, FALSE,     8,  16 }},
+   {  FALSE, FALSE,  TRUE,  TRUE,    16,   8 },
+   {  FALSE, FALSE,  TRUE, FALSE,    16,   8 },
+   {  FALSE, FALSE, FALSE,  TRUE,    16,   8 },
+   {  FALSE, FALSE, FALSE, FALSE,    16,   8 },
+
+   {  FALSE, FALSE,  TRUE,  TRUE,    32,   4 },
+   {  FALSE, FALSE,  TRUE, FALSE,    32,   4 },
+   {  FALSE, FALSE, FALSE,  TRUE,    32,   4 },
+   {  FALSE, FALSE, FALSE, FALSE,    32,   4 },
+
+   {  FALSE, FALSE,  TRUE,  TRUE,    16,   8 },
+   {  FALSE, FALSE,  TRUE, FALSE,    16,   8 },
+   {  FALSE, FALSE, FALSE,  TRUE,    16,   8 },
+   {  FALSE, FALSE, FALSE, FALSE,    16,   8 },
+
+   {  FALSE, FALSE,  TRUE,  TRUE,     8,  16 },
+   {  FALSE, FALSE,  TRUE, FALSE,     8,  16 },
+   {  FALSE, FALSE, FALSE,  TRUE,     8,  16 },
+   {  FALSE, FALSE, FALSE, FALSE,     8,  16 },
 };
 
 
@@ -381,8 +381,8 @@ const unsigned num_types = sizeof(conv_types)/sizeof(conv_types[0]);
 boolean
 test_all(unsigned verbose, FILE *fp)
 {
-   const union lp_type *src_type;
-   const union lp_type *dst_type;
+   const struct lp_type *src_type;
+   const struct lp_type *dst_type;
    bool success = TRUE;
 
    for(src_type = conv_types; src_type < &conv_types[num_types]; ++src_type) {
@@ -407,8 +407,8 @@ test_all(unsigned verbose, FILE *fp)
 boolean
 test_some(unsigned verbose, FILE *fp, unsigned long n)
 {
-   const union lp_type *src_type;
-   const union lp_type *dst_type;
+   const struct lp_type *src_type;
+   const struct lp_type *dst_type;
    unsigned long i;
    bool success = TRUE;
 
diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c
index 49213fb4f0..c3bb8fadf7 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_main.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_main.c
@@ -40,7 +40,7 @@
 
 void
 dump_type(FILE *fp,
-          union lp_type type)
+          struct lp_type type)
 {
    fprintf(fp, "%s%s%u%sx%u",
            type.sign ? (type.floating || type.fixed ? "" : "s") : "u",
@@ -52,7 +52,7 @@ dump_type(FILE *fp,
 
 
 double
-read_elem(union lp_type type, const void *src, unsigned index)
+read_elem(struct lp_type type, const void *src, unsigned index)
 {
    double scale = lp_const_scale(type);
    double value;
@@ -115,7 +115,7 @@ read_elem(union lp_type type, const void *src, unsigned index)
 
 
 void
-write_elem(union lp_type type, void *dst, unsigned index, double value)
+write_elem(struct lp_type type, void *dst, unsigned index, double value)
 {
    assert(index < type.length);
    if(!type.sign && value < 0.0)
@@ -184,7 +184,7 @@ write_elem(union lp_type type, void *dst, unsigned index, double value)
 
 
 void
-random_elem(union lp_type type, void *dst, unsigned index)
+random_elem(struct lp_type type, void *dst, unsigned index)
 {
    double value;
    assert(index < type.length);
@@ -209,7 +209,7 @@ random_elem(union lp_type type, void *dst, unsigned index)
 
 
 void
-read_vec(union lp_type type, const void *src, double *dst)
+read_vec(struct lp_type type, const void *src, double *dst)
 {
    unsigned i;
    for (i = 0; i < type.length; ++i)
@@ -218,7 +218,7 @@ read_vec(union lp_type type, const void *src, double *dst)
 
 
 void
-write_vec(union lp_type type, void *dst, const double *src)
+write_vec(struct lp_type type, void *dst, const double *src)
 {
    unsigned i;
    for (i = 0; i < type.length; ++i)
@@ -234,7 +234,7 @@ random_float(void)
 
 
 void
-random_vec(union lp_type type, void *dst)
+random_vec(struct lp_type type, void *dst)
 {
    unsigned i;
    for (i = 0; i < type.length; ++i)
@@ -243,7 +243,7 @@ random_vec(union lp_type type, void *dst)
 
 
 boolean
-compare_vec_with_eps(union lp_type type, const void *res, const void *ref, double eps)
+compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps)
 {
    unsigned i;
    for (i = 0; i < type.length; ++i) {
@@ -259,7 +259,7 @@ compare_vec_with_eps(union lp_type type, const void *res, const void *ref, doubl
 
 
 boolean
-compare_vec(union lp_type type, const void *res, const void *ref)
+compare_vec(struct lp_type type, const void *res, const void *ref)
 {
    double eps = lp_const_eps(type);
    return compare_vec_with_eps(type, res, ref, eps);
@@ -267,7 +267,7 @@ compare_vec(union lp_type type, const void *res, const void *ref)
 
 
 void
-dump_vec(FILE *fp, union lp_type type, const void *src)
+dump_vec(FILE *fp, struct lp_type type, const void *src)
 {
    unsigned i;
    for (i = 0; i < type.length; ++i) {
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
index 9a876f404d..a1365a045f 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
@@ -1654,7 +1654,7 @@ lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
 static void
 lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler,
                                   LLVMBuilderRef builder,
-                                  union lp_type type,
+                                  struct lp_type type,
                                   unsigned unit,
                                   unsigned num_coords,
                                   const LLVMValueRef *coords,
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c
index 7d31705d01..d2a6ae21f5 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c
@@ -149,7 +149,7 @@ lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
 static void
 lp_llvm_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *base,
                                      LLVMBuilderRef builder,
-                                     union lp_type type,
+                                     struct lp_type type,
                                      unsigned unit,
                                      unsigned num_coords,
                                      const LLVMValueRef *coords,
-- 
cgit v1.2.3


From 459ea0095c31eff835b25dd3eef48a4c073d05f9 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Wed, 16 Sep 2009 10:39:06 +0100
Subject: llvmpipe: Make the code portable for MinGW.

---
 scons/llvm.py                                |  6 +++++-
 src/gallium/drivers/llvmpipe/SConscript      |  2 +-
 src/gallium/drivers/llvmpipe/lp_jit.c        |  2 +-
 src/gallium/drivers/llvmpipe/lp_test_blend.c | 16 ++++++++--------
 src/gallium/drivers/llvmpipe/lp_test_conv.c  |  4 ++--
 src/gallium/drivers/llvmpipe/lp_test_main.c  |  8 ++++----
 6 files changed, 21 insertions(+), 17 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/scons/llvm.py b/scons/llvm.py
index 46a8d829ca..d3293bb404 100644
--- a/scons/llvm.py
+++ b/scons/llvm.py
@@ -51,7 +51,9 @@ def generate(env):
 
         llvm_bin_dir = os.path.join(llvm_dir, llvm_subdir, 'bin')
         if not os.path.isdir(llvm_bin_dir):
-            raise SCons.Errors.InternalError, "LLVM build directory not found"
+            llvm_bin_dir = os.path.join(llvm_dir, 'bin')
+            if not os.path.isdir(llvm_bin_dir):
+                raise SCons.Errors.InternalError, "LLVM binary directory not found"
 
         env.PrependENVPath('PATH', llvm_bin_dir)
 
@@ -65,6 +67,8 @@ def generate(env):
         except OSError:
             print 'llvm-config version %s failed' % version
         else:
+            if env['platform'] == 'windows':
+                env.Append(LIBS = ['imagehlp', 'psapi'])
             env['LINK'] = env['CXX']
             env['LLVM_VERSION'] = version
 
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index dea4b703c4..f4a9a3b22e 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -3,7 +3,7 @@ Import('*')
 env = env.Clone()
 
 env.Tool('llvm')
-if env.has_key('LLVM_VERSION') is False:
+if not env.has_key('LLVM_VERSION'):
     print 'warning: LLVM not found: not building llvmpipe'
     Return()
 
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index 9465f763d5..b4a22ff4a9 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -152,7 +152,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)
    screen->provider = LLVMCreateModuleProviderForExistingModule(screen->module);
 
    if (LLVMCreateJITCompiler(&screen->engine, screen->provider, 1, &error)) {
-      fprintf(stderr, "%s\n", error);
+      _debug_printf("%s\n", error);
       LLVMDisposeMessage(error);
       abort();
    }
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index 1b57ea2a5c..94b661dcba 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -846,22 +846,22 @@ test_some(unsigned verbose, FILE *fp, unsigned long n)
    bool success = TRUE;
 
    for(i = 0; i < n; ++i) {
-      rgb_func = &blend_funcs[random() % num_funcs];
-      alpha_func = &blend_funcs[random() % num_funcs];
-      rgb_src_factor = &blend_factors[random() % num_factors];
-      alpha_src_factor = &blend_factors[random() % num_factors];
+      rgb_func = &blend_funcs[rand() % num_funcs];
+      alpha_func = &blend_funcs[rand() % num_funcs];
+      rgb_src_factor = &blend_factors[rand() % num_factors];
+      alpha_src_factor = &blend_factors[rand() % num_factors];
       
       do {
-         rgb_dst_factor = &blend_factors[random() % num_factors];
+         rgb_dst_factor = &blend_factors[rand() % num_factors];
       } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
 
       do {
-         alpha_dst_factor = &blend_factors[random() % num_factors];
+         alpha_dst_factor = &blend_factors[rand() % num_factors];
       } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
 
-      mode = random() & 1;
+      mode = rand() & 1;
 
-      type = &blend_types[random() % num_types];
+      type = &blend_types[rand() % num_types];
 
       memset(&blend, 0, sizeof blend);
       blend.blend_enable      = 1;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
index ae2697143f..9dcf58e5dc 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -413,10 +413,10 @@ test_some(unsigned verbose, FILE *fp, unsigned long n)
    bool success = TRUE;
 
    for(i = 0; i < n; ++i) {
-      src_type = &conv_types[random() % num_types];
+      src_type = &conv_types[rand() % num_types];
       
       do {
-         dst_type = &conv_types[random() % num_types];
+         dst_type = &conv_types[rand() % num_types];
       } while (src_type == dst_type || src_type->norm != dst_type->norm);
 
       if(!test_one(verbose, fp, *src_type, *dst_type))
diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c
index c3bb8fadf7..4592dc0b2d 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_main.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_main.c
@@ -188,7 +188,7 @@ random_elem(struct lp_type type, void *dst, unsigned index)
 {
    double value;
    assert(index < type.length);
-   value = (double)random()/(double)RAND_MAX;
+   value = (double)rand()/(double)RAND_MAX;
    if(!type.norm) {
       unsigned long long mask;
       if (type.floating)
@@ -199,10 +199,10 @@ random_elem(struct lp_type type, void *dst, unsigned index)
          mask = ((unsigned long long)1 << (type.width - 1)) - 1;
       else
          mask = ((unsigned long long)1 << type.width) - 1;
-      value += (double)(mask & random());
+      value += (double)(mask & rand());
    }
    if(!type.sign)
-      if(random() & 1)
+      if(rand() & 1)
          value = -value;
    write_elem(type, dst, index, value);
 }
@@ -229,7 +229,7 @@ write_vec(struct lp_type type, void *dst, const double *src)
 float
 random_float(void)
 {
-    return (float)((double)random()/(double)RAND_MAX);
+    return (float)((double)rand()/(double)RAND_MAX);
 }
 
 
-- 
cgit v1.2.3


From c29905aa318cf9ed782935552fa983b048646984 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Wed, 16 Sep 2009 14:31:14 +0100
Subject: gallium: Deprecate PIPE_CAP_S3TC.

No longer used. S3TC support is queried via
pipe_screen::is_format_supported.
---
 src/gallium/drivers/cell/ppu/cell_screen.c   | 2 --
 src/gallium/drivers/i915simple/i915_screen.c | 2 --
 src/gallium/drivers/i965simple/brw_screen.c  | 2 --
 src/gallium/drivers/llvmpipe/lp_screen.c     | 2 --
 src/gallium/drivers/nv04/nv04_screen.c       | 2 --
 src/gallium/drivers/nv10/nv10_screen.c       | 2 --
 src/gallium/drivers/nv20/nv20_screen.c       | 2 --
 src/gallium/drivers/nv30/nv30_screen.c       | 2 --
 src/gallium/drivers/nv40/nv40_screen.c       | 2 --
 src/gallium/drivers/nv50/nv50_screen.c       | 2 --
 src/gallium/drivers/r300/r300_screen.c       | 2 --
 src/gallium/drivers/softpipe/sp_screen.c     | 2 --
 src/gallium/include/pipe/p_defines.h         | 2 +-
 13 files changed, 1 insertion(+), 25 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c
index bd48ce7005..9161747fdb 100644
--- a/src/gallium/drivers/cell/ppu/cell_screen.c
+++ b/src/gallium/drivers/cell/ppu/cell_screen.c
@@ -64,8 +64,6 @@ cell_get_param(struct pipe_screen *screen, int param)
       return 1;
    case PIPE_CAP_GLSL:
       return 1;
-   case PIPE_CAP_S3TC:
-      return 0;
    case PIPE_CAP_ANISOTROPIC_FILTER:
       return 0;
    case PIPE_CAP_POINT_SPRITE:
diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c
index 9f017a14cc..a1dd43c1bc 100644
--- a/src/gallium/drivers/i915simple/i915_screen.c
+++ b/src/gallium/drivers/i915simple/i915_screen.c
@@ -101,8 +101,6 @@ i915_get_param(struct pipe_screen *screen, int param)
       return 1;
    case PIPE_CAP_GLSL:
       return 0;
-   case PIPE_CAP_S3TC:
-      return 0;
    case PIPE_CAP_ANISOTROPIC_FILTER:
       return 0;
    case PIPE_CAP_POINT_SPRITE:
diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c
index b22e105f10..fb68fd624b 100644
--- a/src/gallium/drivers/i965simple/brw_screen.c
+++ b/src/gallium/drivers/i965simple/brw_screen.c
@@ -85,8 +85,6 @@ brw_get_param(struct pipe_screen *screen, int param)
       return 1;
    case PIPE_CAP_GLSL:
       return 0;
-   case PIPE_CAP_S3TC:
-      return 0;
    case PIPE_CAP_ANISOTROPIC_FILTER:
       return 0;
    case PIPE_CAP_POINT_SPRITE:
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 0ce1a37bd4..ff7ef8658a 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -65,8 +65,6 @@ llvmpipe_get_param(struct pipe_screen *screen, int param)
       return 1;
    case PIPE_CAP_GLSL:
       return 1;
-   case PIPE_CAP_S3TC:
-      return 0;
    case PIPE_CAP_ANISOTROPIC_FILTER:
       return 0;
    case PIPE_CAP_POINT_SPRITE:
diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c
index ff2febb668..170ce3eb7e 100644
--- a/src/gallium/drivers/nv04/nv04_screen.c
+++ b/src/gallium/drivers/nv04/nv04_screen.c
@@ -16,8 +16,6 @@ nv04_screen_get_param(struct pipe_screen *screen, int param)
 		return 0;
 	case PIPE_CAP_GLSL:
 		return 0;
-	case PIPE_CAP_S3TC:
-		return 0;
 	case PIPE_CAP_ANISOTROPIC_FILTER:
 		return 0;
 	case PIPE_CAP_POINT_SPRITE:
diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c
index 4469b22d91..ee5901e743 100644
--- a/src/gallium/drivers/nv10/nv10_screen.c
+++ b/src/gallium/drivers/nv10/nv10_screen.c
@@ -15,8 +15,6 @@ nv10_screen_get_param(struct pipe_screen *screen, int param)
 		return 0;
 	case PIPE_CAP_GLSL:
 		return 0;
-	case PIPE_CAP_S3TC:
-		return 0;
 	case PIPE_CAP_ANISOTROPIC_FILTER:
 		return 1;
 	case PIPE_CAP_POINT_SPRITE:
diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c
index e6924ad71e..4eeacd1afd 100644
--- a/src/gallium/drivers/nv20/nv20_screen.c
+++ b/src/gallium/drivers/nv20/nv20_screen.c
@@ -15,8 +15,6 @@ nv20_screen_get_param(struct pipe_screen *screen, int param)
 		return 0;
 	case PIPE_CAP_GLSL:
 		return 0;
-	case PIPE_CAP_S3TC:
-		return 0;
 	case PIPE_CAP_ANISOTROPIC_FILTER:
 		return 1;
 	case PIPE_CAP_POINT_SPRITE:
diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c
index f8285e4455..41af38450b 100644
--- a/src/gallium/drivers/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nv30/nv30_screen.c
@@ -22,8 +22,6 @@ nv30_screen_get_param(struct pipe_screen *pscreen, int param)
 		return 1;
 	case PIPE_CAP_GLSL:
 		return 0;
-	case PIPE_CAP_S3TC:
-		return 0;
 	case PIPE_CAP_ANISOTROPIC_FILTER:
 		return 1;
 	case PIPE_CAP_POINT_SPRITE:
diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c
index 5d2a4216c5..bd13dfddd1 100644
--- a/src/gallium/drivers/nv40/nv40_screen.c
+++ b/src/gallium/drivers/nv40/nv40_screen.c
@@ -21,8 +21,6 @@ nv40_screen_get_param(struct pipe_screen *pscreen, int param)
 		return 1;
 	case PIPE_CAP_GLSL:
 		return 0;
-	case PIPE_CAP_S3TC:
-		return 1;
 	case PIPE_CAP_ANISOTROPIC_FILTER:
 		return 1;
 	case PIPE_CAP_POINT_SPRITE:
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 7adaaaa135..3b08e1b89f 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -87,8 +87,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
 		return 1;
 	case PIPE_CAP_GLSL:
 		return 0;
-	case PIPE_CAP_S3TC:
-		return 1;
 	case PIPE_CAP_ANISOTROPIC_FILTER:
 		return 1;
 	case PIPE_CAP_POINT_SPRITE:
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 593178c50b..3b5b1bbd37 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -93,8 +93,6 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
             } else {
                 return 0;
             }
-        case PIPE_CAP_S3TC:
-            return 1;
         case PIPE_CAP_ANISOTROPIC_FILTER:
             return 1;
         case PIPE_CAP_POINT_SPRITE:
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index da97e7ac04..ce77018415 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -65,8 +65,6 @@ softpipe_get_param(struct pipe_screen *screen, int param)
       return 1;
    case PIPE_CAP_GLSL:
       return 1;
-   case PIPE_CAP_S3TC:
-      return 0;
    case PIPE_CAP_ANISOTROPIC_FILTER:
       return 0;
    case PIPE_CAP_POINT_SPRITE:
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index b01ab6d137..f252d6df00 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -281,7 +281,7 @@ enum pipe_transfer_usage {
 #define PIPE_CAP_NPOT_TEXTURES           2
 #define PIPE_CAP_TWO_SIDED_STENCIL       3
 #define PIPE_CAP_GLSL                    4  /* XXX need something better */
-#define PIPE_CAP_S3TC                    5
+#define PIPE_CAP_S3TC                    5  /* XXX: deprecated; cap determined via supported sampler formats */
 #define PIPE_CAP_ANISOTROPIC_FILTER      6
 #define PIPE_CAP_POINT_SPRITE            7
 #define PIPE_CAP_MAX_RENDER_TARGETS      8
-- 
cgit v1.2.3


From f911d196cf7bdf2d922e11de8ab35649eb6a748c Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Wed, 16 Sep 2009 16:12:22 +0100
Subject: llvmpipe: Don't assert due to unsupported texture wrap modes.

Issue a warning and fallback to clamping.
---
 src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
index 08b1dc10f3..8ca1be6f1b 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -208,6 +208,11 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld,
    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
       /* FIXME */
+      _debug_printf("warning: failed to translate texture wrap mode %u\n", wrap_mode);
+      coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
+      coord = lp_build_min(int_coord_bld, coord, length_minus_one);
+      break;
+
    default:
       assert(0);
    }
-- 
cgit v1.2.3


From 18d0f9a7a38674367eca25e87f67ddf423d8c4f7 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Thu, 17 Sep 2009 16:05:08 +0100
Subject: llvmpipe: Respect input interpolators for the shader.

Cherry-picked from fb2c7b6743ba6e89f24843890fb7fcd6a09c3dbb
---
 src/gallium/drivers/llvmpipe/lp_state_derived.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index e87976b9f3..30fb41ea65 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -93,6 +93,23 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
       vinfo->num_attribs = 0;
       for (i = 0; i < lpfs->info.num_inputs; i++) {
          int src;
+         enum interp_mode interp;
+
+         switch (lpfs->info.input_interpolate[i]) {
+         case TGSI_INTERPOLATE_CONSTANT:
+            interp = INTERP_CONSTANT;
+            break;
+         case TGSI_INTERPOLATE_LINEAR:
+            interp = INTERP_LINEAR;
+            break;
+         case TGSI_INTERPOLATE_PERSPECTIVE:
+            interp = INTERP_PERSPECTIVE;
+            break;
+         default:
+            assert(0);
+            interp = INTERP_LINEAR;
+         }
+
          switch (lpfs->info.input_semantic_name[i]) {
          case TGSI_SEMANTIC_POSITION:
             src = draw_find_vs_output(llvmpipe->draw,
@@ -108,7 +125,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
 
          case TGSI_SEMANTIC_FOG:
             src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_FOG, 0);
-            draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+            draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
             break;
 
          case TGSI_SEMANTIC_GENERIC:
@@ -116,7 +133,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
             /* this includes texcoords and varying vars */
             src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_GENERIC,
                                       lpfs->info.input_semantic_index[i]);
-            draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+            draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
             break;
 
          default:
-- 
cgit v1.2.3


From 76c2e34b22836c3a71a096be5620ded97a2ae636 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 20 Sep 2009 12:28:07 +0100
Subject: llvmpipe: Update tile status on flush.

---
 src/gallium/drivers/llvmpipe/lp_tile_cache.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c
index 143afec3d3..ddda5650a9 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c
+++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c
@@ -225,11 +225,14 @@ lp_flush_tile_cache(struct llvmpipe_tile_cache *tc)
                            tc->clear_val);
 
             screen->transfer_unmap(screen, pt);
+
+            tile->status = LP_TILE_STATUS_UNDEFINED;
             break;
          }
 
          case LP_TILE_STATUS_DEFINED:
             lp_put_tile_rgba_soa(pt, x, y, tile->color);
+            tile->status = LP_TILE_STATUS_UNDEFINED;
             break;
          }
       }
-- 
cgit v1.2.3


From 911a7a82cd44e89dd7c24a256a0a172f01eadde3 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 20 Sep 2009 18:04:00 +0100
Subject: llvmpipe: Fix lp_get_cached_tile.

Align coordinates to tile boundaries.
---
 src/gallium/drivers/llvmpipe/lp_tile_cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c
index ddda5650a9..2e576e6039 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c
+++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c
@@ -260,7 +260,7 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc,
 
    case LP_TILE_STATUS_UNDEFINED:
       /* get new tile data from transfer */
-      lp_get_tile_rgba_soa(pt, x, y, tile->color);
+      lp_get_tile_rgba_soa(pt, x & ~(TILE_SIZE - 1), y & ~(TILE_SIZE - 1), tile->color);
       tile->status = LP_TILE_STATUS_DEFINED;
       break;
 
-- 
cgit v1.2.3