From 946f432a08112148d743eb9faf6b27bb8cc7fa76 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 26 Jul 2009 23:44:38 +0100
Subject: llvmpipe: Fork softpipe for experimentation with llvm.

---
 src/gallium/drivers/llvmpipe/SConscript | 46 +++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 src/gallium/drivers/llvmpipe/SConscript

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
new file mode 100644
index 0000000000..f021549c44
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -0,0 +1,46 @@
+Import('*')
+
+env = env.Clone()
+
+llvmpipe = env.ConvenienceLibrary(
+	target = 'llvmpipe',
+	source = [
+		'lp_fs_exec.c',
+		'lp_fs_sse.c',
+		'lp_fs_llvm.c',
+		'lp_clear.c',
+		'lp_context.c',
+		'lp_draw_arrays.c',
+		'lp_flush.c',
+		'lp_prim_setup.c',
+		'lp_prim_vbuf.c',
+		'lp_setup.c',
+		'lp_quad_alpha_test.c',
+		'lp_quad_blend.c',
+		'lp_quad_pipe.c',
+		'lp_quad_colormask.c',
+		'lp_quad_coverage.c',
+		'lp_quad_depth_test.c',
+		'lp_quad_earlyz.c',
+		'lp_quad_fs.c',
+		'lp_quad_occlusion.c',
+		'lp_quad_output.c',
+		'lp_quad_stencil.c',
+		'lp_quad_stipple.c',
+		'lp_query.c',
+		'lp_screen.c',
+		'lp_state_blend.c',
+		'lp_state_clip.c',
+		'lp_state_derived.c',
+		'lp_state_fs.c',
+		'lp_state_rasterizer.c',
+		'lp_state_sampler.c',
+		'lp_state_surface.c',
+		'lp_state_vertex.c',
+		'lp_surface.c',
+		'lp_tex_sample.c',
+		'lp_texture.c',
+		'lp_tile_cache.c',
+	])
+
+Export('softpipe')
\ No newline at end of file
-- 
cgit v1.2.3


From 8783732c4d2b9162d996f678eb41e3eae3ac86c7 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 27 Jul 2009 01:23:15 +0100
Subject: llvmpipe: Pixel packing/unpacking and loop code generators.

Just a small proof of concept plus a standalone test app. Not integrated
with the rest of the driver yet.
---
 src/gallium/drivers/llvmpipe/Makefile        |   3 +
 src/gallium/drivers/llvmpipe/SConscript      |  15 +-
 src/gallium/drivers/llvmpipe/lp_bld.h        |  94 +++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_loop.c   |  91 ++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_pack.c   | 132 ++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_test.c   | 199 +++++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_unpack.c | 156 +++++++++++++++++++++
 7 files changed, 689 insertions(+), 1 deletion(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld.h
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_loop.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_pack.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_test.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_unpack.c

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index c0c430628d..a6556c5782 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -7,6 +7,9 @@ C_SOURCES = \
 	lp_fs_exec.c \
 	lp_fs_sse.c \
 	lp_fs_llvm.c \
+	lp_bld_pack.c \
+	lp_bld_unpack.c \
+	lp_bld_loop.c \
 	lp_clear.c \
 	lp_flush.c \
 	lp_query.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index f021549c44..fa3047f165 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -2,12 +2,17 @@ Import('*')
 
 env = env.Clone()
 
+env.ParseConfig('llvm-config --cflags --libs jit interpreter nativecodegen')
+
 llvmpipe = env.ConvenienceLibrary(
 	target = 'llvmpipe',
 	source = [
 		'lp_fs_exec.c',
 		'lp_fs_sse.c',
 		'lp_fs_llvm.c',
+		'lp_bld_pack.c',
+		'lp_bld_unpack.c',
+		'lp_bld_loop.c',
 		'lp_clear.c',
 		'lp_context.c',
 		'lp_draw_arrays.c',
@@ -43,4 +48,12 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_tile_cache.c',
 	])
 
-Export('softpipe')
\ No newline at end of file
+env['LINK'] = env['CXX']
+
+env.Program(
+    target = 'lp_bld_test',
+    source = ['lp_bld_test.c'],
+    LIBS = [llvmpipe] + auxiliaries + env['LIBS'],
+)
+
+Export('llvmpipe')
diff --git a/src/gallium/drivers/llvmpipe/lp_bld.h b/src/gallium/drivers/llvmpipe/lp_bld.h
new file mode 100644
index 0000000000..88b70d7478
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld.h
@@ -0,0 +1,94 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Vmware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef LP_BLD_H
+#define LP_BLD_H
+
+
+/**
+ * @file
+ * LLVM IR building helpers interfaces.
+ *
+ * We use LLVM-C bindings for now. They are not documented, but follow the C++
+ * interfaces very closely, and appear to be complete enough for code
+ * genration. See
+ * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html
+ * for a standalone example.
+ */
+
+#include <llvm-c/Core.h>  
+ 
+#include "pipe/p_format.h"
+
+
+/**
+ * Unpack a pixel into its RGBA components.
+ *
+ * @param ptr value with the pointer to the packed pixel. Pointer type is
+ * irrelevant.
+ *
+ * @return RGBA in a 4 floats vector.
+ */
+LLVMValueRef
+lp_build_unpack_rgba(LLVMBuilderRef builder,
+                     enum pipe_format format, 
+                     LLVMValueRef ptr);
+
+
+/**
+ * Pack a pixel.
+ *
+ * @param rgba 4 float vector with the unpacked components.
+ */
+void 
+lp_build_pack_rgba(LLVMBuilderRef builder,
+                   enum pipe_format format,
+                   LLVMValueRef ptr,
+                   LLVMValueRef rgba);
+
+
+struct lp_build_loop_state
+{
+  LLVMBasicBlockRef block;
+  LLVMValueRef counter;
+};
+
+
+void
+lp_build_loop_begin(LLVMBuilderRef builder,
+                    LLVMValueRef start,
+                    struct lp_build_loop_state *state);
+
+
+void
+lp_build_loop_end(LLVMBuilderRef builder,
+                  LLVMValueRef end,
+                  LLVMValueRef step,
+                  struct lp_build_loop_state *state);
+
+
+#endif /* !LP_BLD_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_loop.c b/src/gallium/drivers/llvmpipe/lp_bld_loop.c
new file mode 100644
index 0000000000..3c0a9a1ede
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_loop.c
@@ -0,0 +1,91 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Vmware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "lp_bld.h"
+
+
+/**
+ * @file
+ * Auxiliaries to build loops.
+ *
+ * LLVM's IR doesn't represent for-loops directly. Furthermore it
+ * it requires creating code blocks, branches, phi variables, so it
+ * requires a fair amount of code.
+ *
+ * @sa http://www.llvm.org/docs/tutorial/LangImpl5.html#for
+ */
+
+
+void
+lp_build_loop_begin(LLVMBuilderRef builder,
+                    LLVMValueRef start,
+                    struct lp_build_loop_state *state)
+{
+   LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
+   LLVMValueRef function = LLVMGetBasicBlockParent(block);
+
+   state->block = LLVMAppendBasicBlock(function, "loop");
+
+   LLVMBuildBr(builder, state->block);
+
+   LLVMPositionBuilderAtEnd(builder, state->block);
+
+   state->counter = LLVMBuildPhi(builder, LLVMTypeOf(start), "");
+
+   LLVMAddIncoming(state->counter, &start, &block, 1);
+
+}
+
+
+void
+lp_build_loop_end(LLVMBuilderRef builder,
+                  LLVMValueRef end,
+                  LLVMValueRef step,
+                  struct lp_build_loop_state *state)
+{
+   LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
+   LLVMValueRef function = LLVMGetBasicBlockParent(block);
+   LLVMValueRef next;
+   LLVMValueRef cond;
+   LLVMBasicBlockRef after_block;
+
+   if (!step)
+      step = LLVMConstInt(LLVMTypeOf(end), 1, 0);
+
+   next = LLVMBuildAdd(builder, state->counter, step, "");
+
+   cond = LLVMBuildICmp(builder, LLVMIntNE, next, end, "");
+
+   after_block = LLVMAppendBasicBlock(function, "");
+
+   LLVMBuildCondBr(builder, cond, after_block, state->block);
+
+   LLVMAddIncoming(state->counter, &next, &block, 1);
+
+   LLVMPositionBuilderAtEnd(builder, after_block);
+}
+
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.c b/src/gallium/drivers/llvmpipe/lp_bld_pack.c
new file mode 100644
index 0000000000..7c2c7a7c76
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_pack.c
@@ -0,0 +1,132 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Vmware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_format.h"
+
+#include "lp_bld.h"
+
+
+void
+lp_build_pack_rgba(LLVMBuilderRef builder,
+                   enum pipe_format format,
+                   LLVMValueRef ptr,
+                   LLVMValueRef rgba)
+{
+   const struct util_format_description *desc;
+   LLVMTypeRef type;
+   LLVMValueRef packed = NULL;
+   unsigned shift = 0;
+   unsigned i, j;
+
+   desc = util_format_description(format);
+
+   assert(desc->layout == UTIL_FORMAT_LAYOUT_RGBA);
+   assert(desc->block.width == 1);
+   assert(desc->block.height == 1);
+
+   type = LLVMIntType(desc->block.bits);
+
+   LLVMValueRef swizzles[4];
+   LLVMValueRef shifted, casted, scaled, unswizzled;
+
+
+   /* Unswizzle the color components into the source vector. */
+   for (i = 0; i < 4; ++i) {
+      for (j = 0; j < 4; ++j) {
+         if (desc->swizzle[j] == i)
+            break;
+      }
+      if (j < 4)
+         swizzles[i] = LLVMConstInt(LLVMInt32Type(), j, 0);
+      else
+         swizzles[i] = LLVMGetUndef(LLVMInt32Type());
+   }
+
+   unswizzled = LLVMBuildShuffleVector(builder, rgba,
+                                       LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)),
+                                       LLVMConstVector(swizzles, 4), "");
+
+   LLVMValueRef shifts[4];
+   LLVMValueRef scales[4];
+   bool normalized = FALSE;
+
+   for (i = 0; i < 4; ++i) {
+      unsigned bits = desc->channel[i].size;
+
+      if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
+         shifts[i] = LLVMGetUndef(LLVMInt32Type());
+         scales[i] =  LLVMGetUndef(LLVMFloatType());
+      }
+      else {
+         unsigned mask = (1 << bits) - 1;
+
+         assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
+         assert(bits < 32);
+
+         shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
+
+         if (desc->channel[i].normalized) {
+            scales[i] = LLVMConstReal(LLVMFloatType(), mask);
+            normalized = TRUE;
+         }
+         else
+            scales[i] =  LLVMConstReal(LLVMFloatType(), 1.0);
+      }
+
+      shift += bits;
+   }
+
+   if (normalized)
+      scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
+   else
+      scaled = unswizzled;
+
+   casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32Type(), 4), "");
+
+   shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
+   
+   /* Bitwise or all components */
+   for (i = 0; i < 4; ++i) {
+      if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+         LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, LLVMConstInt(LLVMInt32Type(), i, 0), "");
+         if (packed)
+            packed = LLVMBuildOr(builder, packed, component, "");
+         else
+            packed = component;
+      }
+   }
+
+   if (packed) {
+
+      if (desc->block.bits < 32)
+         packed = LLVMBuildTrunc(builder, packed, type, "");
+
+      LLVMBuildStore(builder, packed, LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), ""));
+   }
+}
+
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_test.c b/src/gallium/drivers/llvmpipe/lp_bld_test.c
new file mode 100644
index 0000000000..5325b7d333
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_test.c
@@ -0,0 +1,199 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Vmware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <llvm-c/Core.h>
+#include <llvm-c/Analysis.h>
+#include <llvm-c/ExecutionEngine.h>
+#include <llvm-c/Target.h>
+#include <llvm-c/Transforms/Scalar.h>
+
+#include "lp_bld.h"
+
+
+static LLVMValueRef
+add_unpack_rgba_test(LLVMModuleRef module,
+                     enum pipe_format format)
+{
+   LLVMTypeRef args[] = {
+      LLVMPointerType(LLVMInt8Type(), 0),
+      LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0)
+   };
+   LLVMValueRef func = LLVMAddFunction(module, "unpack", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
+   LLVMSetFunctionCallConv(func, LLVMCCallConv);
+   LLVMValueRef ptr = LLVMGetParam(func, 0);
+   LLVMValueRef rgba_ptr = LLVMGetParam(func, 1);
+
+   LLVMBasicBlockRef block = LLVMAppendBasicBlock(func, "entry");
+   LLVMBuilderRef builder = LLVMCreateBuilder();
+   LLVMPositionBuilderAtEnd(builder, block);
+
+   LLVMValueRef rgba;
+
+   struct lp_build_loop_state loop;
+
+   lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), &loop);
+
+   rgba = lp_build_unpack_rgba(builder, format, ptr);
+   LLVMBuildStore(builder, rgba, rgba_ptr);
+
+   lp_build_loop_end(builder, LLVMConstInt(LLVMInt32Type(), 4, 0), NULL, &loop);
+
+   LLVMBuildRetVoid(builder);
+
+   LLVMDisposeBuilder(builder);
+   return func;
+}
+
+
+static LLVMValueRef
+add_pack_rgba_test(LLVMModuleRef module,
+                   enum pipe_format format)
+{
+   LLVMTypeRef args[] = {
+      LLVMPointerType(LLVMInt8Type(), 0),
+      LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0)
+   };
+   LLVMValueRef func = LLVMAddFunction(module, "pack", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
+   LLVMSetFunctionCallConv(func, LLVMCCallConv);
+   LLVMValueRef ptr = LLVMGetParam(func, 0);
+   LLVMValueRef rgba_ptr = LLVMGetParam(func, 1);
+
+   LLVMBasicBlockRef block = LLVMAppendBasicBlock(func, "entry");
+   LLVMBuilderRef builder = LLVMCreateBuilder();
+   LLVMPositionBuilderAtEnd(builder, block);
+
+   LLVMValueRef rgba;
+
+   rgba = LLVMBuildLoad(builder, rgba_ptr, "");
+
+   lp_build_pack_rgba(builder, format, ptr, rgba);
+
+   LLVMBuildRetVoid(builder);
+
+   LLVMDisposeBuilder(builder);
+   return func;
+}
+
+
+int main(int argc, char **argv)
+{
+   char *error = NULL;
+   int n;
+
+   if (argc > 1)
+      sscanf(argv[1], "%x", &n);
+   else
+      n = 0x0000f0f0;
+
+   LLVMModuleRef module = LLVMModuleCreateWithName("test");
+
+   enum pipe_format format;
+   format = PIPE_FORMAT_R5G6B5_UNORM;
+   LLVMValueRef unpack = add_unpack_rgba_test(module, format);
+   LLVMValueRef pack = add_pack_rgba_test(module, format);
+
+   LLVMVerifyModule(module, LLVMAbortProcessAction, &error);
+   LLVMDisposeMessage(error);
+
+   LLVMExecutionEngineRef engine;
+   LLVMModuleProviderRef provider = LLVMCreateModuleProviderForExistingModule(module);
+   error = NULL;
+   LLVMCreateJITCompiler(&engine, provider, 1, &error);
+   if (error) {
+      fprintf(stderr, "%s\n", error);
+      LLVMDisposeMessage(error);
+      abort();
+   }
+
+   LLVMPassManagerRef pass = LLVMCreatePassManager();
+#if 0
+   LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
+   /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+    * but there are more on SVN. */
+   LLVMAddConstantPropagationPass(pass);
+   LLVMAddInstructionCombiningPass(pass);
+   LLVMAddPromoteMemoryToRegisterPass(pass);
+   LLVMAddDemoteMemoryToRegisterPass(pass);
+   LLVMAddGVNPass(pass);
+   LLVMAddCFGSimplificationPass(pass);
+   LLVMRunPassManager(pass, module);
+#endif
+   LLVMDumpModule(module);
+
+   printf("Packed: %08x\n", n);
+
+   float rgba[4] = {0, 0, 0, 0};
+
+   {
+#if 1
+      typedef void (*unpack_ptr_t)(void *, float *);
+      unpack_ptr_t unpack_ptr = (unpack_ptr_t)LLVMGetPointerToGlobal(engine, unpack);
+
+      unpack_ptr(&n, rgba);
+#else
+      LLVMGenericValueRef exec_args[] = {
+         LLVMCreateGenericValueOfPointer(n),
+         LLVMCreateGenericValueOfPointer(rgba)
+      };
+      LLVMGenericValueRef exec_res = LLVMRunFunction(engine, unpack, 2, exec_args);
+#endif
+
+      printf("Unpacked: %f %f %f %f\n",
+             rgba[0],
+             rgba[1],
+             rgba[2],
+             rgba[3]);
+   }
+
+   n = 0;
+
+   {
+#if 1
+      typedef void (*pack_ptr_t)(void *, float *);
+      pack_ptr_t pack_ptr = (pack_ptr_t)LLVMGetPointerToGlobal(engine, pack);
+
+      pack_ptr(&n, rgba);
+#else
+      LLVMGenericValueRef exec_args[] = {
+         LLVMCreateGenericValueOfPointer(n),
+         LLVMCreateGenericValueOfPointer(rgba)
+      };
+      LLVMGenericValueRef exec_res = LLVMRunFunction(engine, pack, 2, exec_args);
+#endif
+
+      printf("Packed: %08x\n", n);
+   }
+
+   LLVMDisposePassManager(pass);
+   LLVMDisposeExecutionEngine(engine);
+
+   return 0;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_unpack.c b/src/gallium/drivers/llvmpipe/lp_bld_unpack.c
new file mode 100644
index 0000000000..79022c68a6
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_unpack.c
@@ -0,0 +1,156 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Vmware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_format.h"
+
+#include "lp_bld.h"
+
+
+LLVMValueRef
+lp_build_unpack_rgba(LLVMBuilderRef builder,
+                     enum pipe_format format,
+                     LLVMValueRef ptr)
+{
+   const struct util_format_description *desc;
+   LLVMTypeRef type;
+   LLVMValueRef deferred;
+   unsigned shift = 0;
+   unsigned i;
+
+   desc = util_format_description(format);
+
+   /* FIXME: Support more formats */
+   assert(desc->layout == UTIL_FORMAT_LAYOUT_RGBA);
+   assert(desc->block.width == 1);
+   assert(desc->block.height == 1);
+   assert(desc->block.bits <= 32);
+
+   type = LLVMIntType(desc->block.bits);
+
+   deferred = LLVMBuildLoad(builder, LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), ""), "");
+
+   /* Do the intermediate integer computations with 32bit integers since it
+    * matches floating point size */
+   if (desc->block.bits < 32)
+      deferred = LLVMBuildZExt(builder, deferred, LLVMInt32Type(), "");
+
+   /* Broadcast the packed value to all four channels */
+   deferred = LLVMBuildInsertElement(builder,
+                                     LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
+                                     deferred,
+                                     LLVMConstNull(LLVMInt32Type()),
+                                     "");
+   deferred = LLVMBuildShuffleVector(builder,
+                                     deferred,
+                                     LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
+                                     LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)),
+                                     "");
+
+   LLVMValueRef shifted, casted, scaled, masked, swizzled;
+   LLVMValueRef shifts[4];
+   LLVMValueRef masks[4];
+   LLVMValueRef scales[4];
+   bool normalized = FALSE;
+   int empty_channel = -1;
+
+   /* Initialize vector constants */
+   for (i = 0; i < 4; ++i) {
+      unsigned bits = desc->channel[i].size;
+
+      if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
+         shifts[i] = LLVMGetUndef(LLVMInt32Type());
+         masks[i] = LLVMConstNull(LLVMInt32Type());
+         scales[i] =  LLVMConstNull(LLVMFloatType());
+         empty_channel = i;
+      }
+      else {
+         unsigned mask = (1 << bits) - 1;
+
+         assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
+         assert(bits < 32);
+
+         shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
+         masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0);
+
+         if (desc->channel[i].normalized) {
+            scales[i] = LLVMConstReal(LLVMFloatType(), 1.0/mask);
+            normalized = TRUE;
+         }
+         else
+            scales[i] =  LLVMConstReal(LLVMFloatType(), 1.0);
+      }
+
+      shift += bits;
+   }
+
+   shifted = LLVMBuildLShr(builder, deferred, LLVMConstVector(shifts, 4), "");
+   masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
+   // UIToFP can't be expressed in SSE2
+   casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
+
+   if (normalized)
+      scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), "");
+   else
+      scaled = casted;
+
+   LLVMValueRef swizzles[4];
+   LLVMValueRef aux[4];
+
+   for (i = 0; i < 4; ++i)
+      aux[i] = LLVMGetUndef(LLVMFloatType());
+
+   for (i = 0; i < 4; ++i) {
+      enum util_format_swizzle swizzle = desc->swizzle[i];
+
+      switch (swizzle) {
+      case UTIL_FORMAT_SWIZZLE_X:
+      case UTIL_FORMAT_SWIZZLE_Y:
+      case UTIL_FORMAT_SWIZZLE_Z:
+      case UTIL_FORMAT_SWIZZLE_W:
+         swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
+         break;
+      case UTIL_FORMAT_SWIZZLE_0:
+         assert(empty_channel >= 0);
+         swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0);
+         break;
+      case UTIL_FORMAT_SWIZZLE_1:
+         swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0);
+         aux[0] = LLVMConstReal(LLVMFloatType(), 1.0);
+         break;
+      case UTIL_FORMAT_SWIZZLE_NONE:
+         swizzles[i] = LLVMGetUndef(LLVMFloatType());
+         assert(0);
+         break;
+      }
+   }
+
+   swizzled = LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4), LLVMConstVector(swizzles, 4), "");
+
+   return swizzled;
+}
+
-- 
cgit v1.2.3


From 90caf1a3f39204a1a4b6207e0746ba694668c144 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 24 Jul 2009 20:19:18 +0100
Subject: llvmpipe: move all color-combine code into lp_quad_blend.c

Consolidate the read-modify-write color combining code from
the blend, colormask and output stages.
---
 src/gallium/drivers/llvmpipe/Makefile            |   14 +-
 src/gallium/drivers/llvmpipe/SConscript          |    2 -
 src/gallium/drivers/llvmpipe/lp_context.c        |    4 -
 src/gallium/drivers/llvmpipe/lp_context.h        |    2 -
 src/gallium/drivers/llvmpipe/lp_quad_blend.c     | 1352 ++++++++++++----------
 src/gallium/drivers/llvmpipe/lp_quad_bufloop.c   |   74 --
 src/gallium/drivers/llvmpipe/lp_quad_colormask.c |  126 --
 src/gallium/drivers/llvmpipe/lp_quad_coverage.c  |    1 -
 src/gallium/drivers/llvmpipe/lp_quad_output.c    |  109 --
 src/gallium/drivers/llvmpipe/lp_quad_pipe.c      |   15 +-
 10 files changed, 727 insertions(+), 972 deletions(-)
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_quad_bufloop.c
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_quad_colormask.c
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_quad_output.c

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index a6556c5782..64541ec655 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -18,17 +18,15 @@ C_SOURCES = \
 	lp_prim_setup.c \
 	lp_prim_vbuf.c \
 	lp_quad_pipe.c \
-	lp_quad_alpha_test.c \
-	lp_quad_blend.c \
-	lp_quad_colormask.c \
-	lp_quad_coverage.c \
-	lp_quad_depth_test.c \
+	lp_quad_stipple.c \
 	lp_quad_earlyz.c \
+	lp_quad_depth_test.c \
+	lp_quad_stencil.c \
 	lp_quad_fs.c \
+	lp_quad_alpha_test.c \
 	lp_quad_occlusion.c \
-	lp_quad_output.c \
-	lp_quad_stencil.c \
-	lp_quad_stipple.c \
+	lp_quad_coverage.c \
+	lp_quad_blend.c \
 	lp_screen.c \
         lp_setup.c \
 	lp_state_blend.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index fa3047f165..e395ee4268 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -23,13 +23,11 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_quad_alpha_test.c',
 		'lp_quad_blend.c',
 		'lp_quad_pipe.c',
-		'lp_quad_colormask.c',
 		'lp_quad_coverage.c',
 		'lp_quad_depth_test.c',
 		'lp_quad_earlyz.c',
 		'lp_quad_fs.c',
 		'lp_quad_occlusion.c',
-		'lp_quad_output.c',
 		'lp_quad_stencil.c',
 		'lp_quad_stipple.c',
 		'lp_query.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index edd409dc94..4bb63adfec 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -97,8 +97,6 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
       llvmpipe->quad.occlusion->destroy( llvmpipe->quad.occlusion );
       llvmpipe->quad.coverage->destroy( llvmpipe->quad.coverage );
       llvmpipe->quad.blend->destroy( llvmpipe->quad.blend );
-      llvmpipe->quad.colormask->destroy( llvmpipe->quad.colormask );
-      llvmpipe->quad.output->destroy( llvmpipe->quad.output );
 
    for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
       lp_destroy_tile_cache(llvmpipe->cbuf_cache[i]);
@@ -241,8 +239,6 @@ llvmpipe_create( struct pipe_screen *screen )
       llvmpipe->quad.occlusion = lp_quad_occlusion_stage(llvmpipe);
       llvmpipe->quad.coverage = lp_quad_coverage_stage(llvmpipe);
       llvmpipe->quad.blend = lp_quad_blend_stage(llvmpipe);
-      llvmpipe->quad.colormask = lp_quad_colormask_stage(llvmpipe);
-      llvmpipe->quad.output = lp_quad_output_stage(llvmpipe);
 
    /* vertex shader samplers */
    for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 38ad1ca50f..e341cd01af 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -123,8 +123,6 @@ struct llvmpipe_context {
       struct quad_stage *occlusion;
       struct quad_stage *coverage;
       struct quad_stage *blend;
-      struct quad_stage *colormask;
-      struct quad_stage *output;
 
       struct quad_stage *first; /**< points to one of the above stages */
    } quad;
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_blend.c b/src/gallium/drivers/llvmpipe/lp_quad_blend.c
index b48d41b947..0ebe0e6c51 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_blend.c
@@ -117,135 +117,677 @@ do { \
 
 
 static void
-logicop_quads(struct quad_stage *qs, 
-              struct quad_header *quads[],
-              unsigned nr)
+logicop_quad(struct quad_stage *qs, 
+             float (*quadColor)[4],
+             float (*dest)[4])
 {
    struct llvmpipe_context *llvmpipe = qs->llvmpipe;
-   uint cbuf;
+   ubyte src[4][4], dst[4][4], res[4][4];
+   uint *src4 = (uint *) src;
+   uint *dst4 = (uint *) dst;
+   uint *res4 = (uint *) res;
+   uint j;
+
+
+   /* convert to ubyte */
+   for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
+      dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
+      dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
+      dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
+      dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
+
+      src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
+      src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
+      src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
+      src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
+   }
+
+   switch (llvmpipe->blend->logicop_func) {
+   case PIPE_LOGICOP_CLEAR:
+      for (j = 0; j < 4; j++)
+         res4[j] = 0;
+      break;
+   case PIPE_LOGICOP_NOR:
+      for (j = 0; j < 4; j++)
+         res4[j] = ~(src4[j] | dst4[j]);
+      break;
+   case PIPE_LOGICOP_AND_INVERTED:
+      for (j = 0; j < 4; j++)
+         res4[j] = ~src4[j] & dst4[j];
+      break;
+   case PIPE_LOGICOP_COPY_INVERTED:
+      for (j = 0; j < 4; j++)
+         res4[j] = ~src4[j];
+      break;
+   case PIPE_LOGICOP_AND_REVERSE:
+      for (j = 0; j < 4; j++)
+         res4[j] = src4[j] & ~dst4[j];
+      break;
+   case PIPE_LOGICOP_INVERT:
+      for (j = 0; j < 4; j++)
+         res4[j] = ~dst4[j];
+      break;
+   case PIPE_LOGICOP_XOR:
+      for (j = 0; j < 4; j++)
+         res4[j] = dst4[j] ^ src4[j];
+      break;
+   case PIPE_LOGICOP_NAND:
+      for (j = 0; j < 4; j++)
+         res4[j] = ~(src4[j] & dst4[j]);
+      break;
+   case PIPE_LOGICOP_AND:
+      for (j = 0; j < 4; j++)
+         res4[j] = src4[j] & dst4[j];
+      break;
+   case PIPE_LOGICOP_EQUIV:
+      for (j = 0; j < 4; j++)
+         res4[j] = ~(src4[j] ^ dst4[j]);
+      break;
+   case PIPE_LOGICOP_NOOP:
+      for (j = 0; j < 4; j++)
+         res4[j] = dst4[j];
+      break;
+   case PIPE_LOGICOP_OR_INVERTED:
+      for (j = 0; j < 4; j++)
+         res4[j] = ~src4[j] | dst4[j];
+      break;
+   case PIPE_LOGICOP_COPY:
+      for (j = 0; j < 4; j++)
+         res4[j] = src4[j];
+      break;
+   case PIPE_LOGICOP_OR_REVERSE:
+      for (j = 0; j < 4; j++)
+         res4[j] = src4[j] | ~dst4[j];
+      break;
+   case PIPE_LOGICOP_OR:
+      for (j = 0; j < 4; j++)
+         res4[j] = src4[j] | dst4[j];
+      break;
+   case PIPE_LOGICOP_SET:
+      for (j = 0; j < 4; j++)
+         res4[j] = ~0;
+      break;
+   default:
+      assert(0);
+   }
+
+   for (j = 0; j < 4; j++) {
+      quadColor[j][0] = ubyte_to_float(res[j][0]);
+      quadColor[j][1] = ubyte_to_float(res[j][1]);
+      quadColor[j][2] = ubyte_to_float(res[j][2]);
+      quadColor[j][3] = ubyte_to_float(res[j][3]);
+   }
+}
+
+
+
+static void
+blend_quad(struct quad_stage *qs, 
+           float (*quadColor)[4],
+           float (*dest)[4])
+{
+   static const float zero[4] = { 0, 0, 0, 0 };
+   static const float one[4] = { 1, 1, 1, 1 };
+   struct llvmpipe_context *llvmpipe = qs->llvmpipe;
+   float source[4][QUAD_SIZE];
+
+   /*
+    * Compute src/first term RGB
+    */
+   switch (llvmpipe->blend->rgb_src_factor) {
+   case PIPE_BLENDFACTOR_ONE:
+      VEC4_COPY(source[0], quadColor[0]); /* R */
+      VEC4_COPY(source[1], quadColor[1]); /* G */
+      VEC4_COPY(source[2], quadColor[2]); /* B */
+      break;
+   case PIPE_BLENDFACTOR_SRC_COLOR:
+      VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
+      VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
+      VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
+      break;
+   case PIPE_BLENDFACTOR_SRC_ALPHA:
+   {
+      const float *alpha = quadColor[3];
+      VEC4_MUL(source[0], quadColor[0], alpha); /* R */
+      VEC4_MUL(source[1], quadColor[1], alpha); /* G */
+      VEC4_MUL(source[2], quadColor[2], alpha); /* B */
+   }
+   break;
+   case PIPE_BLENDFACTOR_DST_COLOR:
+      VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
+      VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
+      VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
+      break;
+   case PIPE_BLENDFACTOR_DST_ALPHA:
+   {
+      const float *alpha = dest[3];
+      VEC4_MUL(source[0], quadColor[0], alpha); /* R */
+      VEC4_MUL(source[1], quadColor[1], alpha); /* G */
+      VEC4_MUL(source[2], quadColor[2], alpha); /* B */
+   }
+   break;
+   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+   {
+      const float *alpha = quadColor[3];
+      float diff[4], temp[4];
+      VEC4_SUB(diff, one, dest[3]);
+      VEC4_MIN(temp, alpha, diff);
+      VEC4_MUL(source[0], quadColor[0], temp); /* R */
+      VEC4_MUL(source[1], quadColor[1], temp); /* G */
+      VEC4_MUL(source[2], quadColor[2], temp); /* B */
+   }
+   break;
+   case PIPE_BLENDFACTOR_CONST_COLOR:
+   {
+      float comp[4];
+      VEC4_SCALAR(comp, llvmpipe->blend_color.color[0]); /* R */
+      VEC4_MUL(source[0], quadColor[0], comp); /* R */
+      VEC4_SCALAR(comp, llvmpipe->blend_color.color[1]); /* G */
+      VEC4_MUL(source[1], quadColor[1], comp); /* G */
+      VEC4_SCALAR(comp, llvmpipe->blend_color.color[2]); /* B */
+      VEC4_MUL(source[2], quadColor[2], comp); /* B */
+   }
+   break;
+   case PIPE_BLENDFACTOR_CONST_ALPHA:
+   {
+      float alpha[4];
+      VEC4_SCALAR(alpha, llvmpipe->blend_color.color[3]);
+      VEC4_MUL(source[0], quadColor[0], alpha); /* R */
+      VEC4_MUL(source[1], quadColor[1], alpha); /* G */
+      VEC4_MUL(source[2], quadColor[2], alpha); /* B */
+   }
+   break;
+   case PIPE_BLENDFACTOR_SRC1_COLOR:
+      assert(0); /* to do */
+      break;
+   case PIPE_BLENDFACTOR_SRC1_ALPHA:
+      assert(0); /* to do */
+      break;
+   case PIPE_BLENDFACTOR_ZERO:
+      VEC4_COPY(source[0], zero); /* R */
+      VEC4_COPY(source[1], zero); /* G */
+      VEC4_COPY(source[2], zero); /* B */
+      break;
+   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+   {
+      float inv_comp[4];
+      VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
+      VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
+      VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
+      VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
+      VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
+      VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
+   }
+   break;
+   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+   {
+      float inv_alpha[4];
+      VEC4_SUB(inv_alpha, one, quadColor[3]);
+      VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
+      VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
+      VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
+   }
+   break;
+   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+   {
+      float inv_alpha[4];
+      VEC4_SUB(inv_alpha, one, dest[3]);
+      VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
+      VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
+      VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
+   }
+   break;
+   case PIPE_BLENDFACTOR_INV_DST_COLOR:
+   {
+      float inv_comp[4];
+      VEC4_SUB(inv_comp, one, dest[0]); /* R */
+      VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
+      VEC4_SUB(inv_comp, one, dest[1]); /* G */
+      VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
+      VEC4_SUB(inv_comp, one, dest[2]); /* B */
+      VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
+   }
+   break;
+   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+   {
+      float inv_comp[4];
+      /* R */
+      VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[0]);
+      VEC4_MUL(source[0], quadColor[0], inv_comp);
+      /* G */
+      VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[1]);
+      VEC4_MUL(source[1], quadColor[1], inv_comp);
+      /* B */
+      VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[2]);
+      VEC4_MUL(source[2], quadColor[2], inv_comp);
+   }
+   break;
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+   {
+      float inv_alpha[4];
+      VEC4_SCALAR(inv_alpha, 1.0f - llvmpipe->blend_color.color[3]);
+      VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
+      VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
+      VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
+   }
+   break;
+   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+      assert(0); /* to do */
+      break;
+   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+      assert(0); /* to do */
+      break;
+   default:
+      assert(0);
+   }
+
+   /*
+    * Compute src/first term A
+    */
+   switch (llvmpipe->blend->alpha_src_factor) {
+   case PIPE_BLENDFACTOR_ONE:
+      VEC4_COPY(source[3], quadColor[3]); /* A */
+      break;
+   case PIPE_BLENDFACTOR_SRC_COLOR:
+      /* fall-through */
+   case PIPE_BLENDFACTOR_SRC_ALPHA:
+   {
+      const float *alpha = quadColor[3];
+      VEC4_MUL(source[3], quadColor[3], alpha); /* A */
+   }
+   break;
+   case PIPE_BLENDFACTOR_DST_COLOR:
+      /* fall-through */
+   case PIPE_BLENDFACTOR_DST_ALPHA:
+      VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
+      break;
+   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+      /* multiply alpha by 1.0 */
+      VEC4_COPY(source[3], quadColor[3]); /* A */
+      break;
+   case PIPE_BLENDFACTOR_CONST_COLOR:
+      /* fall-through */
+   case PIPE_BLENDFACTOR_CONST_ALPHA:
+   {
+      float comp[4];
+      VEC4_SCALAR(comp, llvmpipe->blend_color.color[3]); /* A */
+      VEC4_MUL(source[3], quadColor[3], comp); /* A */
+   }
+   break;
+   case PIPE_BLENDFACTOR_ZERO:
+      VEC4_COPY(source[3], zero); /* A */
+      break;
+   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+      /* fall-through */
+   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+   {
+      float inv_alpha[4];
+      VEC4_SUB(inv_alpha, one, quadColor[3]);
+      VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
+   }
+   break;
+   case PIPE_BLENDFACTOR_INV_DST_COLOR:
+      /* fall-through */
+   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+   {
+      float inv_alpha[4];
+      VEC4_SUB(inv_alpha, one, dest[3]);
+      VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
+   }
+   break;
+   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+      /* fall-through */
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+   {
+      float inv_comp[4];
+      /* A */
+      VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[3]);
+      VEC4_MUL(source[3], quadColor[3], inv_comp);
+   }
+   break;
+   default:
+      assert(0);
+   }
+
 
-   /* loop over colorbuffer outputs */
-   for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) {
+   /*
+    * Compute dest/second term RGB
+    */
+   switch (llvmpipe->blend->rgb_dst_factor) {
+   case PIPE_BLENDFACTOR_ONE:
+      /* dest = dest * 1   NO-OP, leave dest as-is */
+      break;
+   case PIPE_BLENDFACTOR_SRC_COLOR:
+      VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */
+      VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */
+      VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */
+      break;
+   case PIPE_BLENDFACTOR_SRC_ALPHA:
+      VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */
+      VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */
+      VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */
+      break;
+   case PIPE_BLENDFACTOR_DST_ALPHA:
+      VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */
+      VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */
+      VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */
+      break;
+   case PIPE_BLENDFACTOR_DST_COLOR:
+      VEC4_MUL(dest[0], dest[0], dest[0]); /* R */
+      VEC4_MUL(dest[1], dest[1], dest[1]); /* G */
+      VEC4_MUL(dest[2], dest[2], dest[2]); /* B */
+      break;
+   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+      assert(0); /* illegal */
+      break;
+   case PIPE_BLENDFACTOR_CONST_COLOR:
+   {
+      float comp[4];
+      VEC4_SCALAR(comp, llvmpipe->blend_color.color[0]); /* R */
+      VEC4_MUL(dest[0], dest[0], comp); /* R */
+      VEC4_SCALAR(comp, llvmpipe->blend_color.color[1]); /* G */
+      VEC4_MUL(dest[1], dest[1], comp); /* G */
+      VEC4_SCALAR(comp, llvmpipe->blend_color.color[2]); /* B */
+      VEC4_MUL(dest[2], dest[2], comp); /* B */
+   }
+   break;
+   case PIPE_BLENDFACTOR_CONST_ALPHA:
+   {
+      float comp[4];
+      VEC4_SCALAR(comp, llvmpipe->blend_color.color[3]); /* A */
+      VEC4_MUL(dest[0], dest[0], comp); /* R */
+      VEC4_MUL(dest[1], dest[1], comp); /* G */
+      VEC4_MUL(dest[2], dest[2], comp); /* B */
+   }
+   break;
+   case PIPE_BLENDFACTOR_ZERO:
+      VEC4_COPY(dest[0], zero); /* R */
+      VEC4_COPY(dest[1], zero); /* G */
+      VEC4_COPY(dest[2], zero); /* B */
+      break;
+   case PIPE_BLENDFACTOR_SRC1_COLOR:
+   case PIPE_BLENDFACTOR_SRC1_ALPHA:
+      /* XXX what are these? */
+      assert(0);
+      break;
+   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+   {
+      float inv_comp[4];
+      VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
+      VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */
+      VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
+      VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */
+      VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
+      VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
+   }
+   break;
+   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+   {
+      float one_minus_alpha[QUAD_SIZE];
+      VEC4_SUB(one_minus_alpha, one, quadColor[3]);
+      VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
+      VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
+      VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
+   }
+   break;
+   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+   {
+      float inv_comp[4];
+      VEC4_SUB(inv_comp, one, dest[3]); /* A */
+      VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */
+      VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */
+      VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
+   }
+   break;
+   case PIPE_BLENDFACTOR_INV_DST_COLOR:
+   {
+      float inv_comp[4];
+      VEC4_SUB(inv_comp, one, dest[0]); /* R */
+      VEC4_MUL(dest[0], dest[0], inv_comp); /* R */
+      VEC4_SUB(inv_comp, one, dest[1]); /* G */
+      VEC4_MUL(dest[1], dest[1], inv_comp); /* G */
+      VEC4_SUB(inv_comp, one, dest[2]); /* B */
+      VEC4_MUL(dest[2], dest[2], inv_comp); /* B */
+   }
+   break;
+   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+   {
+      float inv_comp[4];
+      /* R */
+      VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[0]);
+      VEC4_MUL(dest[0], dest[0], inv_comp);
+      /* G */
+      VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[1]);
+      VEC4_MUL(dest[1], dest[1], inv_comp);
+      /* B */
+      VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[2]);
+      VEC4_MUL(dest[2], dest[2], inv_comp);
+   }
+   break;
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+   {
+      float inv_comp[4];
+      VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[3]);
+      VEC4_MUL(dest[0], dest[0], inv_comp);
+      VEC4_MUL(dest[1], dest[1], inv_comp);
+      VEC4_MUL(dest[2], dest[2], inv_comp);
+   }
+   break;
+   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+      /* XXX what are these? */
+      assert(0);
+      break;
+   default:
+      assert(0);
+   }
+
+   /*
+    * Compute dest/second term A
+    */
+   switch (llvmpipe->blend->alpha_dst_factor) {
+   case PIPE_BLENDFACTOR_ONE:
+      /* dest = dest * 1   NO-OP, leave dest as-is */
+      break;
+   case PIPE_BLENDFACTOR_SRC_COLOR:
+      /* fall-through */
+   case PIPE_BLENDFACTOR_SRC_ALPHA:
+      VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */
+      break;
+   case PIPE_BLENDFACTOR_DST_COLOR:
+      /* fall-through */
+   case PIPE_BLENDFACTOR_DST_ALPHA:
+      VEC4_MUL(dest[3], dest[3], dest[3]); /* A */
+      break;
+   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+      assert(0); /* illegal */
+      break;
+   case PIPE_BLENDFACTOR_CONST_COLOR:
+      /* fall-through */
+   case PIPE_BLENDFACTOR_CONST_ALPHA:
+   {
+      float comp[4];
+      VEC4_SCALAR(comp, llvmpipe->blend_color.color[3]); /* A */
+      VEC4_MUL(dest[3], dest[3], comp); /* A */
+   }
+   break;
+   case PIPE_BLENDFACTOR_ZERO:
+      VEC4_COPY(dest[3], zero); /* A */
+      break;
+   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+      /* fall-through */
+   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+   {
+      float one_minus_alpha[QUAD_SIZE];
+      VEC4_SUB(one_minus_alpha, one, quadColor[3]);
+      VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
+   }
+   break;
+   case PIPE_BLENDFACTOR_INV_DST_COLOR:
+      /* fall-through */
+   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+   {
+      float inv_comp[4];
+      VEC4_SUB(inv_comp, one, dest[3]); /* A */
+      VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */
+   }
+   break;
+   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+      /* fall-through */
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+   {
+      float inv_comp[4];
+      VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[3]);
+      VEC4_MUL(dest[3], dest[3], inv_comp);
+   }
+   break;
+   default:
+      assert(0);
+   }
+
+   /*
+    * Combine RGB terms
+    */
+   switch (llvmpipe->blend->rgb_func) {
+   case PIPE_BLEND_ADD:
+      VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */
+      VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */
+      VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */
+      break;
+   case PIPE_BLEND_SUBTRACT:
+      VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */
+      VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */
+      VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */
+      break;
+   case PIPE_BLEND_REVERSE_SUBTRACT:
+      VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */
+      VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */
+      VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */
+      break;
+   case PIPE_BLEND_MIN:
+      VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */
+      VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */
+      VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */
+      break;
+   case PIPE_BLEND_MAX:
+      VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */
+      VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */
+      VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */
+      break;
+   default:
+      assert(0);
+   }
+
+   /*
+    * Combine A terms
+    */
+   switch (llvmpipe->blend->alpha_func) {
+   case PIPE_BLEND_ADD:
+      VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */
+      break;
+   case PIPE_BLEND_SUBTRACT:
+      VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */
+      break;
+   case PIPE_BLEND_REVERSE_SUBTRACT:
+      VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */
+      break;
+   case PIPE_BLEND_MIN:
+      VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */
+      break;
+   case PIPE_BLEND_MAX:
+      VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */
+      break;
+   default:
+      assert(0);
+   }
+}
+
+static void
+colormask_quad(struct quad_stage *qs,
+               float (*quadColor)[4],
+               float (*dest)[4])
+{
+   struct llvmpipe_context *llvmpipe = qs->llvmpipe;
+
+   /* R */
+   if (!(llvmpipe->blend->colormask & PIPE_MASK_R))
+      COPY_4V(quadColor[0], dest[0]);
+
+   /* G */
+   if (!(llvmpipe->blend->colormask & PIPE_MASK_G))
+      COPY_4V(quadColor[1], dest[1]);
+
+   /* B */
+   if (!(llvmpipe->blend->colormask & PIPE_MASK_B))
+      COPY_4V(quadColor[2], dest[2]);
+
+   /* A */
+   if (!(llvmpipe->blend->colormask & PIPE_MASK_A))
+      COPY_4V(quadColor[3], dest[3]);
+}
+
+
+static void
+blend_fallback(struct quad_stage *qs, 
+               struct quad_header *quads[],
+               unsigned nr)
+{
+   struct llvmpipe_context *llvmpipe = qs->llvmpipe;
+   const struct pipe_blend_state *blend = llvmpipe->blend;
+   unsigned cbuf;
+
+   for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) 
+   {
       float dest[4][QUAD_SIZE];
-      ubyte src[4][4], dst[4][4], res[4][4];
-      uint *src4 = (uint *) src;
-      uint *dst4 = (uint *) dst;
-      uint *res4 = (uint *) res;
-      uint i, j;
-
-      struct llvmpipe_cached_tile *
-         tile = lp_get_cached_tile(llvmpipe->cbuf_cache[cbuf],
-                                   quads[0]->input.x0, 
-                                   quads[0]->input.y0);
-
-      for (i = 0; i < nr; i++) {
-         struct quad_header *quad = quads[i];
+      struct llvmpipe_cached_tile *tile
+         = lp_get_cached_tile(llvmpipe->cbuf_cache[cbuf],
+                              quads[0]->input.x0, 
+                              quads[0]->input.y0);
+      uint q, i, j;
+
+      for (q = 0; q < nr; q++) {
+         struct quad_header *quad = quads[q];
          float (*quadColor)[4] = quad->output.color[cbuf];
+         const int itx = (quad->input.x0 & (TILE_SIZE-1));
+         const int ity = (quad->input.y0 & (TILE_SIZE-1));
 
-         /* get/swizzle dest colors */
+         /* get/swizzle dest colors 
+          */
          for (j = 0; j < QUAD_SIZE; j++) {
-            int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1);
-            int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1);
+            int x = itx + (j & 1);
+            int y = ity + (j >> 1);
             for (i = 0; i < 4; i++) {
                dest[i][j] = tile->data.color[y][x][i];
             }
          }
 
-         /* convert to ubyte */
-         for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
-            dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
-            dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
-            dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
-            dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
-
-            src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
-            src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
-            src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
-            src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
-         }
 
-         switch (llvmpipe->blend->logicop_func) {
-         case PIPE_LOGICOP_CLEAR:
-            for (j = 0; j < 4; j++)
-               res4[j] = 0;
-            break;
-         case PIPE_LOGICOP_NOR:
-            for (j = 0; j < 4; j++)
-               res4[j] = ~(src4[j] | dst4[j]);
-            break;
-         case PIPE_LOGICOP_AND_INVERTED:
-            for (j = 0; j < 4; j++)
-               res4[j] = ~src4[j] & dst4[j];
-            break;
-         case PIPE_LOGICOP_COPY_INVERTED:
-            for (j = 0; j < 4; j++)
-               res4[j] = ~src4[j];
-            break;
-         case PIPE_LOGICOP_AND_REVERSE:
-            for (j = 0; j < 4; j++)
-               res4[j] = src4[j] & ~dst4[j];
-            break;
-         case PIPE_LOGICOP_INVERT:
-            for (j = 0; j < 4; j++)
-               res4[j] = ~dst4[j];
-            break;
-         case PIPE_LOGICOP_XOR:
-            for (j = 0; j < 4; j++)
-               res4[j] = dst4[j] ^ src4[j];
-            break;
-         case PIPE_LOGICOP_NAND:
-            for (j = 0; j < 4; j++)
-               res4[j] = ~(src4[j] & dst4[j]);
-            break;
-         case PIPE_LOGICOP_AND:
-            for (j = 0; j < 4; j++)
-               res4[j] = src4[j] & dst4[j];
-            break;
-         case PIPE_LOGICOP_EQUIV:
-            for (j = 0; j < 4; j++)
-               res4[j] = ~(src4[j] ^ dst4[j]);
-            break;
-         case PIPE_LOGICOP_NOOP:
-            for (j = 0; j < 4; j++)
-               res4[j] = dst4[j];
-            break;
-         case PIPE_LOGICOP_OR_INVERTED:
-            for (j = 0; j < 4; j++)
-               res4[j] = ~src4[j] | dst4[j];
-            break;
-         case PIPE_LOGICOP_COPY:
-            for (j = 0; j < 4; j++)
-               res4[j] = src4[j];
-            break;
-         case PIPE_LOGICOP_OR_REVERSE:
-            for (j = 0; j < 4; j++)
-               res4[j] = src4[j] | ~dst4[j];
-            break;
-         case PIPE_LOGICOP_OR:
-            for (j = 0; j < 4; j++)
-               res4[j] = src4[j] | dst4[j];
-            break;
-         case PIPE_LOGICOP_SET:
-            for (j = 0; j < 4; j++)
-               res4[j] = ~0;
-            break;
-         default:
-            assert(0);
+         if (blend->logicop_enable) {
+            logicop_quad( qs, quadColor, dest );
+         }
+         else if (blend->blend_enable) {
+            blend_quad( qs, quadColor, dest );
          }
 
-         for (j = 0; j < 4; j++) {
-            quadColor[j][0] = ubyte_to_float(res[j][0]);
-            quadColor[j][1] = ubyte_to_float(res[j][1]);
-            quadColor[j][2] = ubyte_to_float(res[j][2]);
-            quadColor[j][3] = ubyte_to_float(res[j][3]);
+         if (blend->colormask != 0xf)
+            colormask_quad( qs, quadColor, dest );
+   
+         /* Output color values
+          */
+         for (j = 0; j < QUAD_SIZE; j++) {
+            if (quad->inout.mask & (1 << j)) {
+               int x = itx + (j & 1);
+               int y = ity + (j >> 1);
+               for (i = 0; i < 4; i++) { /* loop over color chans */
+                  tile->data.color[y][x][i] = quadColor[i][j];
+               }
+            }
          }
       }
    }
-
-   /* pass blended quad to next stage */
-   qs->next->run(qs->next, quads, nr);
 }
 
+
 static void
 blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs, 
                                          struct quad_header *quads[],
@@ -266,11 +808,13 @@ blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs,
       struct quad_header *quad = quads[q];
       float (*quadColor)[4] = quad->output.color[0];
       const float *alpha = quadColor[3];
+      const int itx = (quad->input.x0 & (TILE_SIZE-1));
+      const int ity = (quad->input.y0 & (TILE_SIZE-1));
       
       /* get/swizzle dest colors */
       for (j = 0; j < QUAD_SIZE; j++) {
-         int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1);
-         int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1);
+         int x = itx + (j & 1);
+         int y = ity + (j >> 1);
          for (i = 0; i < 4; i++) {
             dest[i][j] = tile->data.color[y][x][i];
          }
@@ -291,10 +835,17 @@ blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs,
       VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */
       VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */
       VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */
-   }
 
-   /* pass blended quad to next stage */
-   qs->next->run(qs->next, quads, nr);
+      for (j = 0; j < QUAD_SIZE; j++) {
+         if (quad->inout.mask & (1 << j)) {
+            int x = itx + (j & 1);
+            int y = ity + (j >> 1);
+            for (i = 0; i < 4; i++) { /* loop over color chans */
+               tile->data.color[y][x][i] = quadColor[i][j];
+            }
+         }
+      }
+   }
 }
 
 static void
@@ -313,11 +864,13 @@ blend_single_add_one_one(struct quad_stage *qs,
    for (q = 0; q < nr; q++) {
       struct quad_header *quad = quads[q];
       float (*quadColor)[4] = quad->output.color[0];
+      const int itx = (quad->input.x0 & (TILE_SIZE-1));
+      const int ity = (quad->input.y0 & (TILE_SIZE-1));
       
       /* get/swizzle dest colors */
       for (j = 0; j < QUAD_SIZE; j++) {
-         int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1);
-         int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1);
+         int x = itx + (j & 1);
+         int y = ity + (j >> 1);
          for (i = 0; i < 4; i++) {
             dest[i][j] = tile->data.color[y][x][i];
          }
@@ -327,539 +880,71 @@ blend_single_add_one_one(struct quad_stage *qs,
       VEC4_ADD_SAT(quadColor[1], quadColor[1], dest[1]); /* G */
       VEC4_ADD_SAT(quadColor[2], quadColor[2], dest[2]); /* B */
       VEC4_ADD_SAT(quadColor[3], quadColor[3], dest[3]); /* A */
-   }
 
-   /* pass blended quad to next stage */
-   qs->next->run(qs->next, quads, nr);
+      for (j = 0; j < QUAD_SIZE; j++) {
+         if (quad->inout.mask & (1 << j)) {
+            int x = itx + (j & 1);
+            int y = ity + (j >> 1);
+            for (i = 0; i < 4; i++) { /* loop over color chans */
+               tile->data.color[y][x][i] = quadColor[i][j];
+            }
+         }
+      }
+   }
 }
 
+
 static void
-blend_quads_fallback(struct quad_stage *qs, 
-                     struct quad_header *quads[],
-                     unsigned nr)
+single_output_color(struct quad_stage *qs, 
+                    struct quad_header *quads[],
+                    unsigned nr)
 {
-   static const float zero[4] = { 0, 0, 0, 0 };
-   static const float one[4] = { 1, 1, 1, 1 };
-   struct llvmpipe_context *llvmpipe = qs->llvmpipe;
-   uint cbuf;
-
-   /* loop over colorbuffer outputs */
-   for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) {
-      float source[4][QUAD_SIZE], dest[4][QUAD_SIZE];
-      struct llvmpipe_cached_tile *tile
-         = lp_get_cached_tile(llvmpipe->cbuf_cache[cbuf],
-                              quads[0]->input.x0, 
-                              quads[0]->input.y0);
-      uint q, i, j;
+   uint i, j, q;
 
-      for (q = 0; q < nr; q++) {
-         struct quad_header *quad = quads[q];
-         float (*quadColor)[4] = quad->output.color[cbuf];
+   struct llvmpipe_cached_tile *tile
+      = lp_get_cached_tile(qs->llvmpipe->cbuf_cache[0],
+                           quads[0]->input.x0, 
+                           quads[0]->input.y0);
 
-         /* get/swizzle dest colors */
-         for (j = 0; j < QUAD_SIZE; j++) {
-            int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1);
-            int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1);
-            for (i = 0; i < 4; i++) {
-               dest[i][j] = tile->data.color[y][x][i];
+   for (q = 0; q < nr; q++) {
+      struct quad_header *quad = quads[q];
+      float (*quadColor)[4] = quad->output.color[0];
+      const int itx = (quad->input.x0 & (TILE_SIZE-1));
+      const int ity = (quad->input.y0 & (TILE_SIZE-1));
+      
+      for (j = 0; j < QUAD_SIZE; j++) {
+         if (quad->inout.mask & (1 << j)) {
+            int x = itx + (j & 1);
+            int y = ity + (j >> 1);
+            for (i = 0; i < 4; i++) { /* loop over color chans */
+               tile->data.color[y][x][i] = quadColor[i][j];
             }
          }
-
-         /*
-          * Compute src/first term RGB
-          */
-         switch (llvmpipe->blend->rgb_src_factor) {
-         case PIPE_BLENDFACTOR_ONE:
-            VEC4_COPY(source[0], quadColor[0]); /* R */
-            VEC4_COPY(source[1], quadColor[1]); /* G */
-            VEC4_COPY(source[2], quadColor[2]); /* B */
-            break;
-         case PIPE_BLENDFACTOR_SRC_COLOR:
-            VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
-            VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
-            VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
-            break;
-         case PIPE_BLENDFACTOR_SRC_ALPHA:
-         {
-            const float *alpha = quadColor[3];
-            VEC4_MUL(source[0], quadColor[0], alpha); /* R */
-            VEC4_MUL(source[1], quadColor[1], alpha); /* G */
-            VEC4_MUL(source[2], quadColor[2], alpha); /* B */
-         }
-         break;
-         case PIPE_BLENDFACTOR_DST_COLOR:
-            VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
-            VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
-            VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
-            break;
-         case PIPE_BLENDFACTOR_DST_ALPHA:
-         {
-            const float *alpha = dest[3];
-            VEC4_MUL(source[0], quadColor[0], alpha); /* R */
-            VEC4_MUL(source[1], quadColor[1], alpha); /* G */
-            VEC4_MUL(source[2], quadColor[2], alpha); /* B */
-         }
-         break;
-         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-         {
-            const float *alpha = quadColor[3];
-            float diff[4], temp[4];
-            VEC4_SUB(diff, one, dest[3]);
-            VEC4_MIN(temp, alpha, diff);
-            VEC4_MUL(source[0], quadColor[0], temp); /* R */
-            VEC4_MUL(source[1], quadColor[1], temp); /* G */
-            VEC4_MUL(source[2], quadColor[2], temp); /* B */
-         }
-         break;
-         case PIPE_BLENDFACTOR_CONST_COLOR:
-         {
-            float comp[4];
-            VEC4_SCALAR(comp, llvmpipe->blend_color.color[0]); /* R */
-            VEC4_MUL(source[0], quadColor[0], comp); /* R */
-            VEC4_SCALAR(comp, llvmpipe->blend_color.color[1]); /* G */
-            VEC4_MUL(source[1], quadColor[1], comp); /* G */
-            VEC4_SCALAR(comp, llvmpipe->blend_color.color[2]); /* B */
-            VEC4_MUL(source[2], quadColor[2], comp); /* B */
-         }
-         break;
-         case PIPE_BLENDFACTOR_CONST_ALPHA:
-         {
-            float alpha[4];
-            VEC4_SCALAR(alpha, llvmpipe->blend_color.color[3]);
-            VEC4_MUL(source[0], quadColor[0], alpha); /* R */
-            VEC4_MUL(source[1], quadColor[1], alpha); /* G */
-            VEC4_MUL(source[2], quadColor[2], alpha); /* B */
-         }
-         break;
-         case PIPE_BLENDFACTOR_SRC1_COLOR:
-            assert(0); /* to do */
-            break;
-         case PIPE_BLENDFACTOR_SRC1_ALPHA:
-            assert(0); /* to do */
-            break;
-         case PIPE_BLENDFACTOR_ZERO:
-            VEC4_COPY(source[0], zero); /* R */
-            VEC4_COPY(source[1], zero); /* G */
-            VEC4_COPY(source[2], zero); /* B */
-            break;
-         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
-         {
-            float inv_comp[4];
-            VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
-            VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
-            VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
-            VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
-            VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
-            VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
-         }
-         break;
-         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-         {
-            float inv_alpha[4];
-            VEC4_SUB(inv_alpha, one, quadColor[3]);
-            VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
-            VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
-            VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
-         }
-         break;
-         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
-         {
-            float inv_alpha[4];
-            VEC4_SUB(inv_alpha, one, dest[3]);
-            VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
-            VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
-            VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
-         }
-         break;
-         case PIPE_BLENDFACTOR_INV_DST_COLOR:
-         {
-            float inv_comp[4];
-            VEC4_SUB(inv_comp, one, dest[0]); /* R */
-            VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
-            VEC4_SUB(inv_comp, one, dest[1]); /* G */
-            VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
-            VEC4_SUB(inv_comp, one, dest[2]); /* B */
-            VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
-         }
-         break;
-         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
-         {
-            float inv_comp[4];
-            /* R */
-            VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[0]);
-            VEC4_MUL(source[0], quadColor[0], inv_comp);
-            /* G */
-            VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[1]);
-            VEC4_MUL(source[1], quadColor[1], inv_comp);
-            /* B */
-            VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[2]);
-            VEC4_MUL(source[2], quadColor[2], inv_comp);
-         }
-         break;
-         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-         {
-            float inv_alpha[4];
-            VEC4_SCALAR(inv_alpha, 1.0f - llvmpipe->blend_color.color[3]);
-            VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
-            VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
-            VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
-         }
-         break;
-         case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
-            assert(0); /* to do */
-            break;
-         case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
-            assert(0); /* to do */
-            break;
-         default:
-            assert(0);
-         }
-
-         /*
-          * Compute src/first term A
-          */
-         switch (llvmpipe->blend->alpha_src_factor) {
-         case PIPE_BLENDFACTOR_ONE:
-            VEC4_COPY(source[3], quadColor[3]); /* A */
-            break;
-         case PIPE_BLENDFACTOR_SRC_COLOR:
-            /* fall-through */
-         case PIPE_BLENDFACTOR_SRC_ALPHA:
-         {
-            const float *alpha = quadColor[3];
-            VEC4_MUL(source[3], quadColor[3], alpha); /* A */
-         }
-         break;
-         case PIPE_BLENDFACTOR_DST_COLOR:
-            /* fall-through */
-         case PIPE_BLENDFACTOR_DST_ALPHA:
-            VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
-            break;
-         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-            /* multiply alpha by 1.0 */
-            VEC4_COPY(source[3], quadColor[3]); /* A */
-            break;
-         case PIPE_BLENDFACTOR_CONST_COLOR:
-            /* fall-through */
-         case PIPE_BLENDFACTOR_CONST_ALPHA:
-         {
-            float comp[4];
-            VEC4_SCALAR(comp, llvmpipe->blend_color.color[3]); /* A */
-            VEC4_MUL(source[3], quadColor[3], comp); /* A */
-         }
-         break;
-         case PIPE_BLENDFACTOR_ZERO:
-            VEC4_COPY(source[3], zero); /* A */
-            break;
-         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
-            /* fall-through */
-         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-         {
-            float inv_alpha[4];
-            VEC4_SUB(inv_alpha, one, quadColor[3]);
-            VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
-         }
-         break;
-         case PIPE_BLENDFACTOR_INV_DST_COLOR:
-            /* fall-through */
-         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
-         {
-            float inv_alpha[4];
-            VEC4_SUB(inv_alpha, one, dest[3]);
-            VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
-         }
-         break;
-         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
-            /* fall-through */
-         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-         {
-            float inv_comp[4];
-            /* A */
-            VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[3]);
-            VEC4_MUL(source[3], quadColor[3], inv_comp);
-         }
-         break;
-         default:
-            assert(0);
-         }
-
-
-         /*
-          * Compute dest/second term RGB
-          */
-         switch (llvmpipe->blend->rgb_dst_factor) {
-         case PIPE_BLENDFACTOR_ONE:
-            /* dest = dest * 1   NO-OP, leave dest as-is */
-            break;
-         case PIPE_BLENDFACTOR_SRC_COLOR:
-            VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */
-            VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */
-            VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */
-            break;
-         case PIPE_BLENDFACTOR_SRC_ALPHA:
-            VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */
-            VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */
-            VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */
-            break;
-         case PIPE_BLENDFACTOR_DST_ALPHA:
-            VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */
-            VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */
-            VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */
-            break;
-         case PIPE_BLENDFACTOR_DST_COLOR:
-            VEC4_MUL(dest[0], dest[0], dest[0]); /* R */
-            VEC4_MUL(dest[1], dest[1], dest[1]); /* G */
-            VEC4_MUL(dest[2], dest[2], dest[2]); /* B */
-            break;
-         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-            assert(0); /* illegal */
-            break;
-         case PIPE_BLENDFACTOR_CONST_COLOR:
-         {
-            float comp[4];
-            VEC4_SCALAR(comp, llvmpipe->blend_color.color[0]); /* R */
-            VEC4_MUL(dest[0], dest[0], comp); /* R */
-            VEC4_SCALAR(comp, llvmpipe->blend_color.color[1]); /* G */
-            VEC4_MUL(dest[1], dest[1], comp); /* G */
-            VEC4_SCALAR(comp, llvmpipe->blend_color.color[2]); /* B */
-            VEC4_MUL(dest[2], dest[2], comp); /* B */
-         }
-         break;
-         case PIPE_BLENDFACTOR_CONST_ALPHA:
-         {
-            float comp[4];
-            VEC4_SCALAR(comp, llvmpipe->blend_color.color[3]); /* A */
-            VEC4_MUL(dest[0], dest[0], comp); /* R */
-            VEC4_MUL(dest[1], dest[1], comp); /* G */
-            VEC4_MUL(dest[2], dest[2], comp); /* B */
-         }
-         break;
-         case PIPE_BLENDFACTOR_ZERO:
-            VEC4_COPY(dest[0], zero); /* R */
-            VEC4_COPY(dest[1], zero); /* G */
-            VEC4_COPY(dest[2], zero); /* B */
-            break;
-         case PIPE_BLENDFACTOR_SRC1_COLOR:
-         case PIPE_BLENDFACTOR_SRC1_ALPHA:
-            /* XXX what are these? */
-            assert(0);
-            break;
-         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
-         {
-            float inv_comp[4];
-            VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
-            VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */
-            VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
-            VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */
-            VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
-            VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
-         }
-         break;
-         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-         {
-            float one_minus_alpha[QUAD_SIZE];
-            VEC4_SUB(one_minus_alpha, one, quadColor[3]);
-            VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
-            VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
-            VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
-         }
-         break;
-         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
-         {
-            float inv_comp[4];
-            VEC4_SUB(inv_comp, one, dest[3]); /* A */
-            VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */
-            VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */
-            VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
-         }
-         break;
-         case PIPE_BLENDFACTOR_INV_DST_COLOR:
-         {
-            float inv_comp[4];
-            VEC4_SUB(inv_comp, one, dest[0]); /* R */
-            VEC4_MUL(dest[0], dest[0], inv_comp); /* R */
-            VEC4_SUB(inv_comp, one, dest[1]); /* G */
-            VEC4_MUL(dest[1], dest[1], inv_comp); /* G */
-            VEC4_SUB(inv_comp, one, dest[2]); /* B */
-            VEC4_MUL(dest[2], dest[2], inv_comp); /* B */
-         }
-         break;
-         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
-         {
-            float inv_comp[4];
-            /* R */
-            VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[0]);
-            VEC4_MUL(dest[0], dest[0], inv_comp);
-            /* G */
-            VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[1]);
-            VEC4_MUL(dest[1], dest[1], inv_comp);
-            /* B */
-            VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[2]);
-            VEC4_MUL(dest[2], dest[2], inv_comp);
-         }
-         break;
-         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-         {
-            float inv_comp[4];
-            VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[3]);
-            VEC4_MUL(dest[0], dest[0], inv_comp);
-            VEC4_MUL(dest[1], dest[1], inv_comp);
-            VEC4_MUL(dest[2], dest[2], inv_comp);
-         }
-         break;
-         case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
-         case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
-            /* XXX what are these? */
-            assert(0);
-            break;
-         default:
-            assert(0);
-         }
-
-         /*
-          * Compute dest/second term A
-          */
-         switch (llvmpipe->blend->alpha_dst_factor) {
-         case PIPE_BLENDFACTOR_ONE:
-            /* dest = dest * 1   NO-OP, leave dest as-is */
-            break;
-         case PIPE_BLENDFACTOR_SRC_COLOR:
-            /* fall-through */
-         case PIPE_BLENDFACTOR_SRC_ALPHA:
-            VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */
-            break;
-         case PIPE_BLENDFACTOR_DST_COLOR:
-            /* fall-through */
-         case PIPE_BLENDFACTOR_DST_ALPHA:
-            VEC4_MUL(dest[3], dest[3], dest[3]); /* A */
-            break;
-         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-            assert(0); /* illegal */
-            break;
-         case PIPE_BLENDFACTOR_CONST_COLOR:
-            /* fall-through */
-         case PIPE_BLENDFACTOR_CONST_ALPHA:
-         {
-            float comp[4];
-            VEC4_SCALAR(comp, llvmpipe->blend_color.color[3]); /* A */
-            VEC4_MUL(dest[3], dest[3], comp); /* A */
-         }
-         break;
-         case PIPE_BLENDFACTOR_ZERO:
-            VEC4_COPY(dest[3], zero); /* A */
-            break;
-         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
-            /* fall-through */
-         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-         {
-            float one_minus_alpha[QUAD_SIZE];
-            VEC4_SUB(one_minus_alpha, one, quadColor[3]);
-            VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
-         }
-         break;
-         case PIPE_BLENDFACTOR_INV_DST_COLOR:
-            /* fall-through */
-         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
-         {
-            float inv_comp[4];
-            VEC4_SUB(inv_comp, one, dest[3]); /* A */
-            VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */
-         }
-         break;
-         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
-            /* fall-through */
-         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-         {
-            float inv_comp[4];
-            VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[3]);
-            VEC4_MUL(dest[3], dest[3], inv_comp);
-         }
-         break;
-         default:
-            assert(0);
-         }
-
-         /*
-          * Combine RGB terms
-          */
-         switch (llvmpipe->blend->rgb_func) {
-         case PIPE_BLEND_ADD:
-            VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */
-            VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */
-            VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */
-            break;
-         case PIPE_BLEND_SUBTRACT:
-            VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */
-            VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */
-            VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */
-            break;
-         case PIPE_BLEND_REVERSE_SUBTRACT:
-            VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */
-            VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */
-            VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */
-            break;
-         case PIPE_BLEND_MIN:
-            VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */
-            VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */
-            VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */
-            break;
-         case PIPE_BLEND_MAX:
-            VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */
-            VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */
-            VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */
-            break;
-         default:
-            assert(0);
-         }
-
-         /*
-          * Combine A terms
-          */
-         switch (llvmpipe->blend->alpha_func) {
-         case PIPE_BLEND_ADD:
-            VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */
-            break;
-         case PIPE_BLEND_SUBTRACT:
-            VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */
-            break;
-         case PIPE_BLEND_REVERSE_SUBTRACT:
-            VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */
-            break;
-         case PIPE_BLEND_MIN:
-            VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */
-            break;
-         case PIPE_BLEND_MAX:
-            VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */
-            break;
-         default:
-            assert(0);
-         }
       }
-   } /* cbuf loop */
-
-   /* pass blended quad to next stage */
-   qs->next->run(qs->next, quads, nr);
+   }
 }
 
 
 static void
-blend_quad(struct quad_stage *qs, 
-           struct quad_header *quads[],
-           unsigned nr)
+choose_blend_quad(struct quad_stage *qs, 
+                  struct quad_header *quads[],
+                  unsigned nr)
 {
    struct llvmpipe_context *llvmpipe = qs->llvmpipe;
    const struct pipe_blend_state *blend = llvmpipe->blend;
 
-   if (llvmpipe->blend->logicop_enable) {
-      qs->run = logicop_quads;
-   }
-   else {
-      qs->run = blend_quads_fallback;
+   qs->run = blend_fallback;
 
-      if (blend->rgb_src_factor == blend->alpha_src_factor &&
-          blend->rgb_dst_factor == blend->alpha_dst_factor &&
-          blend->rgb_func == blend->alpha_func &&
-          llvmpipe->framebuffer.nr_cbufs == 1)
+   if (!llvmpipe->blend->logicop_enable &&
+       llvmpipe->blend->colormask == 0xf) 
+   {
+      if (!blend->blend_enable) {
+         qs->run = single_output_color;
+      }
+      else if (blend->rgb_src_factor == blend->alpha_src_factor &&
+               blend->rgb_dst_factor == blend->alpha_dst_factor &&
+               blend->rgb_func == blend->alpha_func &&
+               llvmpipe->framebuffer.nr_cbufs == 1)
       {
          if (blend->alpha_func == PIPE_BLEND_ADD) {
             if (blend->rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
@@ -871,7 +956,7 @@ blend_quad(struct quad_stage *qs,
                qs->run = blend_single_add_src_alpha_inv_src_alpha;
 
          }
-      } 
+      }
    }
 
    qs->run(qs, quads, nr);
@@ -880,8 +965,7 @@ blend_quad(struct quad_stage *qs,
 
 static void blend_begin(struct quad_stage *qs)
 {
-   qs->run = blend_quad;
-   qs->next->begin(qs->next);
+   qs->run = choose_blend_quad;
 }
 
 
@@ -897,7 +981,7 @@ struct quad_stage *lp_quad_blend_stage( struct llvmpipe_context *llvmpipe )
 
    stage->llvmpipe = llvmpipe;
    stage->begin = blend_begin;
-   stage->run = blend_quad;
+   stage->run = choose_blend_quad;
    stage->destroy = blend_destroy;
 
    return stage;
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_bufloop.c b/src/gallium/drivers/llvmpipe/lp_quad_bufloop.c
deleted file mode 100644
index 2522c3c423..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_quad_bufloop.c
+++ /dev/null
@@ -1,74 +0,0 @@
-
-#include "util/u_memory.h"
-#include "lp_context.h"
-#include "lp_quad.h"
-#include "lp_surface.h"
-#include "lp_quad_pipe.h"
-
-
-/**
- * Loop over colorbuffers, passing quad to next stage each time.
- */
-static void
-cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad)
-{
-   struct llvmpipe_context *llvmpipe = qs->llvmpipe;
-   float tmp[PIPE_MAX_COLOR_BUFS][4][QUAD_SIZE];
-   unsigned i;
-
-   assert(sizeof(quad->outputs.color) == sizeof(tmp));
-   assert(llvmpipe->framebuffer.nr_cbufs <= PIPE_MAX_COLOR_BUFS);
-
-   /* make copy of original colors since they can get modified
-    * by blending and masking.
-    * XXX we won't have to do this if the fragment program actually emits
-    * N separate colors and we're drawing to N color buffers (MRT).
-    * But if we emitted one color and glDrawBuffer(GL_FRONT_AND_BACK) is
-    * in effect, we need to save/restore colors like this.
-    */
-   memcpy(tmp, quad->outputs.color, sizeof(tmp));
-
-   for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) {
-      /* set current cbuffer */
-#if 0 /* obsolete & going away */
-      llvmpipe->current_cbuf = i;
-#endif
-
-      /* pass blended quad to next stage */
-      qs->next->run(qs->next, quad);
-
-      /* restore quad's colors for next buffer */
-      memcpy(quad->outputs.color, tmp, sizeof(tmp));
-   }
-}
-
-
-static void cbuf_loop_begin(struct quad_stage *qs)
-{
-   qs->next->begin(qs->next);
-}
-
-
-static void cbuf_loop_destroy(struct quad_stage *qs)
-{
-   FREE( qs );
-}
-
-
-/**
- * Create the colorbuffer loop stage.
- * This is used to implement multiple render targets and GL_FRONT_AND_BACK
- * rendering.
- */
-struct quad_stage *lp_quad_bufloop_stage( struct llvmpipe_context *llvmpipe )
-{
-   struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
-
-   stage->llvmpipe = llvmpipe;
-   stage->begin = cbuf_loop_begin;
-   stage->run = cbuf_loop_quad;
-   stage->destroy = cbuf_loop_destroy;
-
-   return stage;
-}
-
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_colormask.c b/src/gallium/drivers/llvmpipe/lp_quad_colormask.c
deleted file mode 100644
index df811a72d7..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_quad_colormask.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * \brief  quad colormask stage
- * \author Brian Paul
- */
-
-#include "pipe/p_defines.h"
-#include "util/u_math.h"
-#include "util/u_memory.h"
-#include "lp_context.h"
-#include "lp_quad.h"
-#include "lp_surface.h"
-#include "lp_quad_pipe.h"
-#include "lp_tile_cache.h"
-
-
-
-/**
- * XXX colormask could be rolled into blending...
- */
-static void
-colormask_quad(struct quad_stage *qs, struct quad_header *quad)
-{
-   struct llvmpipe_context *llvmpipe = qs->llvmpipe;
-   uint cbuf;
-
-   /* loop over colorbuffer outputs */
-   for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) {
-      float dest[4][QUAD_SIZE];
-      struct llvmpipe_cached_tile *tile
-         = lp_get_cached_tile(llvmpipe->cbuf_cache[cbuf],
-                              quad->input.x0, quad->input.y0);
-      float (*quadColor)[4] = quad->output.color[cbuf];
-      uint i, j;
-
-      /* get/swizzle dest colors */
-      for (j = 0; j < QUAD_SIZE; j++) {
-         int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1);
-         int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1);
-         for (i = 0; i < 4; i++) {
-            dest[i][j] = tile->data.color[y][x][i];
-         }
-      }
-
-      /* R */
-      if (!(llvmpipe->blend->colormask & PIPE_MASK_R))
-          COPY_4V(quadColor[0], dest[0]);
-
-      /* G */
-      if (!(llvmpipe->blend->colormask & PIPE_MASK_G))
-          COPY_4V(quadColor[1], dest[1]);
-
-      /* B */
-      if (!(llvmpipe->blend->colormask & PIPE_MASK_B))
-          COPY_4V(quadColor[2], dest[2]);
-
-      /* A */
-      if (!(llvmpipe->blend->colormask & PIPE_MASK_A))
-          COPY_4V(quadColor[3], dest[3]);
-   }
-}
-
-static void
-colormask_quads(struct quad_stage *qs, struct quad_header *quads[],
-                unsigned nr)
-{
-   unsigned i;
-
-   for (i = 0; i < nr; i++)
-      colormask_quad(qs, quads[i]);
-
-   /* pass quad to next stage */
-   qs->next->run(qs->next, quads, nr);
-}
-
-
-
-static void colormask_begin(struct quad_stage *qs)
-{
-   qs->next->begin(qs->next);
-}
-
-
-static void colormask_destroy(struct quad_stage *qs)
-{
-   FREE( qs );
-}
-
-
-struct quad_stage *lp_quad_colormask_stage( struct llvmpipe_context *llvmpipe )
-{
-   struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
-
-   stage->llvmpipe = llvmpipe;
-   stage->begin = colormask_begin;
-   stage->run = colormask_quads;
-   stage->destroy = colormask_destroy;
-
-   return stage;
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_coverage.c b/src/gallium/drivers/llvmpipe/lp_quad_coverage.c
index 7d70ab135a..9bc31dfe9d 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_coverage.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_coverage.c
@@ -68,7 +68,6 @@ coverage_run(struct quad_stage *qs,
                struct quad_header *quads[],
                unsigned nr)
 {
-   struct llvmpipe_context *llvmpipe = qs->llvmpipe;
    unsigned i;
 
    for (i = 0; i < nr; i++)
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_output.c b/src/gallium/drivers/llvmpipe/lp_quad_output.c
deleted file mode 100644
index 07cc840848..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_quad_output.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "util/u_memory.h"
-#include "lp_context.h"
-#include "lp_quad.h"
-#include "lp_surface.h"
-#include "lp_quad_pipe.h"
-#include "lp_tile_cache.h"
-
-
-/**
- * Last step of quad processing: write quad colors to the framebuffer,
- * taking mask into account.
- */
-static void
-output_quad(struct quad_stage *qs, struct quad_header *quads[], unsigned nr)
-{
-
-   struct llvmpipe_context *llvmpipe = qs->llvmpipe;
-   uint cbuf;
-
-   /* loop over colorbuffer outputs */
-   for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) {
-      struct llvmpipe_cached_tile *tile
-         = lp_get_cached_tile(llvmpipe->cbuf_cache[cbuf],
-                              quads[0]->input.x0, 
-                              quads[0]->input.y0);
-      int i, j, q;
-
-      /* get/swizzle dest colors */
-      for (q = 0; q < nr; q++) {
-         struct quad_header *quad = quads[q];
-         float (*quadColor)[4] = quad->output.color[cbuf];
-
-         /* in-tile pos: */
-         const int itx = quad->input.x0 % TILE_SIZE;
-         const int ity = quad->input.y0 % TILE_SIZE;
-
-         
-         for (j = 0; j < QUAD_SIZE; j++) {
-            if (quad->inout.mask & (1 << j)) {
-               int x = itx + (j & 1);
-               int y = ity + (j >> 1);
-               for (i = 0; i < 4; i++) { /* loop over color chans */
-                  tile->data.color[y][x][i] = quadColor[i][j];
-               }
-               if (0) {
-                  debug_printf("lp write pixel %d,%d: %g, %g, %g\n",
-                               quad->input.x0 + x,
-                               quad->input.y0 + y,
-                               quadColor[0][j],
-                               quadColor[1][j],
-                               quadColor[2][j]);
-               }
-            }
-         }
-      }
-   }
-}
-
-
-static void output_begin(struct quad_stage *qs)
-{
-   assert(qs->next == NULL);
-}
-
-
-static void output_destroy(struct quad_stage *qs)
-{
-   FREE( qs );
-}
-
-
-struct quad_stage *lp_quad_output_stage( struct llvmpipe_context *llvmpipe )
-{
-   struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
-
-   stage->llvmpipe = llvmpipe;
-   stage->begin = output_begin;
-   stage->run = output_quad;
-   stage->destroy = output_destroy;
-
-   return stage;
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_pipe.c b/src/gallium/drivers/llvmpipe/lp_quad_pipe.c
index 0b81dda967..e912f76b34 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_pipe.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_pipe.c
@@ -65,25 +65,16 @@ lp_build_quad_pipeline(struct llvmpipe_context *lp)
 
    /* Color combine
     */
-   lp->quad.first = lp->quad.output;
-
-   if (lp->blend->colormask != 0xf) {
-      lp_push_quad_first( lp, lp->quad.colormask );
-   }
-
-   if (lp->blend->blend_enable ||
-       lp->blend->logicop_enable) {
-      lp_push_quad_first( lp, lp->quad.blend );
-   }
+   lp->quad.first = lp->quad.blend;
 
+   /* Shade/Depth/Stencil/Alpha
+    */
    if ((lp->rasterizer->poly_smooth && lp->reduced_prim == PIPE_PRIM_TRIANGLES) ||
        (lp->rasterizer->line_smooth && lp->reduced_prim == PIPE_PRIM_LINES) ||
        (lp->rasterizer->point_smooth && lp->reduced_prim == PIPE_PRIM_POINTS)) {
       lp_push_quad_first( lp, lp->quad.coverage );
    }
 
-   /* Shade/Depth/Stencil/Alpha
-    */
    if (lp->active_query_count) {
       lp_push_quad_first( lp, lp->quad.occlusion );
    }
-- 
cgit v1.2.3


From 3ace63c00fe85079599fe088e90ee23e006bada4 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 27 Jul 2009 11:45:50 +0100
Subject: llvmpipe: Update SConscript.

---
 src/gallium/drivers/llvmpipe/SConscript | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index e395ee4268..c6c54d2f00 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -20,15 +20,10 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_prim_setup.c',
 		'lp_prim_vbuf.c',
 		'lp_setup.c',
-		'lp_quad_alpha_test.c',
 		'lp_quad_blend.c',
 		'lp_quad_pipe.c',
-		'lp_quad_coverage.c',
 		'lp_quad_depth_test.c',
-		'lp_quad_earlyz.c',
 		'lp_quad_fs.c',
-		'lp_quad_occlusion.c',
-		'lp_quad_stencil.c',
 		'lp_quad_stipple.c',
 		'lp_query.c',
 		'lp_screen.c',
-- 
cgit v1.2.3


From 833323b8d02a08b4f53847a521e14c707efd8beb Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Wed, 29 Jul 2009 07:58:27 +0100
Subject: llvmpipe: Separate pixel packing/unpacking from loading/storing.

---
 src/gallium/drivers/llvmpipe/Makefile        |  2 +
 src/gallium/drivers/llvmpipe/SConscript      |  2 +
 src/gallium/drivers/llvmpipe/lp_bld.h        | 34 +++++++++++++---
 src/gallium/drivers/llvmpipe/lp_bld_load.c   | 59 ++++++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_pack.c   | 13 +++---
 src/gallium/drivers/llvmpipe/lp_bld_store.c  | 58 +++++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_test.c   | 32 +++++++--------
 src/gallium/drivers/llvmpipe/lp_bld_unpack.c | 29 ++++++--------
 8 files changed, 185 insertions(+), 44 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_load.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_store.c

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 20a8c44a24..e06bf6668f 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -9,6 +9,8 @@ C_SOURCES = \
 	lp_fs_llvm.c \
 	lp_bld_pack.c \
 	lp_bld_unpack.c \
+	lp_bld_load.c \
+	lp_bld_store.c \
 	lp_bld_loop.c \
 	lp_clear.c \
 	lp_flush.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index c6c54d2f00..bba283f296 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -12,6 +12,8 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_fs_llvm.c',
 		'lp_bld_pack.c',
 		'lp_bld_unpack.c',
+		'lp_bld_load.c',
+		'lp_bld_store.c',
 		'lp_bld_loop.c',
 		'lp_clear.c',
 		'lp_context.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld.h b/src/gallium/drivers/llvmpipe/lp_bld.h
index 368182d638..44343f644b 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld.h
@@ -48,15 +48,14 @@
 /**
  * Unpack a pixel into its RGBA components.
  *
- * @param ptr value with the pointer to the packed pixel. Pointer type is
- * irrelevant.
+ * @param packed integer.
  *
  * @return RGBA in a 4 floats vector.
  */
 LLVMValueRef
 lp_build_unpack_rgba(LLVMBuilderRef builder,
                      enum pipe_format format, 
-                     LLVMValueRef ptr);
+                     LLVMValueRef packed);
 
 
 /**
@@ -64,13 +63,38 @@ lp_build_unpack_rgba(LLVMBuilderRef builder,
  *
  * @param rgba 4 float vector with the unpacked components.
  */
-void 
+LLVMValueRef
 lp_build_pack_rgba(LLVMBuilderRef builder,
                    enum pipe_format format,
-                   LLVMValueRef ptr,
                    LLVMValueRef rgba);
 
 
+/**
+ * Load a pixel into its RGBA components.
+ *
+ * @param ptr value with the pointer to the packed pixel. Pointer type is
+ * irrelevant.
+ *
+ * @return RGBA in a 4 floats vector.
+ */
+LLVMValueRef
+lp_build_load_rgba(LLVMBuilderRef builder,
+                   enum pipe_format format, 
+                   LLVMValueRef ptr);
+
+
+/**
+ * Store a pixel.
+ *
+ * @param rgba 4 float vector with the unpacked components.
+ */
+void 
+lp_build_store_rgba(LLVMBuilderRef builder,
+                    enum pipe_format format,
+                    LLVMValueRef ptr,
+                    LLVMValueRef rgba);
+
+
 struct lp_build_loop_state
 {
   LLVMBasicBlockRef block;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_load.c b/src/gallium/drivers/llvmpipe/lp_bld_load.c
new file mode 100644
index 0000000000..b9734bdbed
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_load.c
@@ -0,0 +1,59 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_format.h"
+
+#include "lp_bld.h"
+
+
+LLVMValueRef
+lp_build_load_rgba(LLVMBuilderRef builder,
+                   enum pipe_format format,
+                   LLVMValueRef ptr)
+{
+   const struct util_format_description *desc;
+   LLVMTypeRef type;
+   LLVMValueRef packed;
+
+   desc = util_format_description(format);
+
+   /* FIXME: Support more formats */
+   assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+   assert(desc->block.width == 1);
+   assert(desc->block.height == 1);
+   assert(desc->block.bits <= 32);
+
+   type = LLVMIntType(desc->block.bits);
+
+   ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), "");
+
+   packed = LLVMBuildLoad(builder, ptr, "");
+
+   return lp_build_unpack_rgba(builder, format, packed);
+}
+
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.c b/src/gallium/drivers/llvmpipe/lp_bld_pack.c
index 2383a07d72..823d67e12d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_pack.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_pack.c
@@ -31,10 +31,9 @@
 #include "lp_bld.h"
 
 
-void
+LLVMValueRef
 lp_build_pack_rgba(LLVMBuilderRef builder,
                    enum pipe_format format,
-                   LLVMValueRef ptr,
                    LLVMValueRef rgba)
 {
    const struct util_format_description *desc;
@@ -121,12 +120,12 @@ lp_build_pack_rgba(LLVMBuilderRef builder,
       }
    }
 
-   if (packed) {
+   if (!packed)
+      packed = LLVMGetUndef(LLVMInt32Type());
 
-      if (desc->block.bits < 32)
-         packed = LLVMBuildTrunc(builder, packed, type, "");
+   if (desc->block.bits < 32)
+      packed = LLVMBuildTrunc(builder, packed, type, "");
 
-      LLVMBuildStore(builder, packed, LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), ""));
-   }
+   return packed;
 }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_store.c b/src/gallium/drivers/llvmpipe/lp_bld_store.c
new file mode 100644
index 0000000000..6273c9ee62
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_store.c
@@ -0,0 +1,58 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_format.h"
+
+#include "lp_bld.h"
+
+
+void
+lp_build_store_rgba(LLVMBuilderRef builder,
+                    enum pipe_format format,
+                    LLVMValueRef ptr,
+                    LLVMValueRef rgba)
+{
+   const struct util_format_description *desc;
+   LLVMTypeRef type;
+   LLVMValueRef packed;
+
+   desc = util_format_description(format);
+
+   assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+   assert(desc->block.width == 1);
+   assert(desc->block.height == 1);
+
+   type = LLVMIntType(desc->block.bits);
+
+   packed = lp_build_pack_rgba(builder, format, rgba);
+
+   ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), "");
+
+   LLVMBuildStore(builder, packed, ptr);
+}
+
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_test.c b/src/gallium/drivers/llvmpipe/lp_bld_test.c
index 09947dd6bb..1f09310267 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_test.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_test.c
@@ -89,14 +89,14 @@ struct pixel_test_case test_cases[] =
 
 
 static LLVMValueRef
-add_unpack_rgba_test(LLVMModuleRef module,
-                     enum pipe_format format)
+add_load_rgba_test(LLVMModuleRef module,
+                   enum pipe_format format)
 {
    LLVMTypeRef args[] = {
       LLVMPointerType(LLVMInt8Type(), 0),
       LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0)
    };
-   LLVMValueRef func = LLVMAddFunction(module, "unpack", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
+   LLVMValueRef func = LLVMAddFunction(module, "load", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
    LLVMSetFunctionCallConv(func, LLVMCCallConv);
    LLVMValueRef ptr = LLVMGetParam(func, 0);
    LLVMValueRef rgba_ptr = LLVMGetParam(func, 1);
@@ -111,7 +111,7 @@ add_unpack_rgba_test(LLVMModuleRef module,
 
    lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), &loop);
 
-   rgba = lp_build_unpack_rgba(builder, format, ptr);
+   rgba = lp_build_load_rgba(builder, format, ptr);
    LLVMBuildStore(builder, rgba, rgba_ptr);
 
    lp_build_loop_end(builder, LLVMConstInt(LLVMInt32Type(), 4, 0), NULL, &loop);
@@ -124,14 +124,14 @@ add_unpack_rgba_test(LLVMModuleRef module,
 
 
 static LLVMValueRef
-add_pack_rgba_test(LLVMModuleRef module,
-                   enum pipe_format format)
+add_store_rgba_test(LLVMModuleRef module,
+                    enum pipe_format format)
 {
    LLVMTypeRef args[] = {
       LLVMPointerType(LLVMInt8Type(), 0),
       LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0)
    };
-   LLVMValueRef func = LLVMAddFunction(module, "pack", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
+   LLVMValueRef func = LLVMAddFunction(module, "store", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
    LLVMSetFunctionCallConv(func, LLVMCCallConv);
    LLVMValueRef ptr = LLVMGetParam(func, 0);
    LLVMValueRef rgba_ptr = LLVMGetParam(func, 1);
@@ -144,7 +144,7 @@ add_pack_rgba_test(LLVMModuleRef module,
 
    rgba = LLVMBuildLoad(builder, rgba_ptr, "");
 
-   lp_build_pack_rgba(builder, format, ptr, rgba);
+   lp_build_store_rgba(builder, format, ptr, rgba);
 
    LLVMBuildRetVoid(builder);
 
@@ -164,8 +164,8 @@ test_format(const struct pixel_test_case *test)
 
    LLVMModuleRef module = LLVMModuleCreateWithName("test");
 
-   LLVMValueRef unpack = add_unpack_rgba_test(module, test->format);
-   LLVMValueRef pack = add_pack_rgba_test(module, test->format);
+   LLVMValueRef load = add_load_rgba_test(module, test->format);
+   LLVMValueRef store = add_store_rgba_test(module, test->format);
 
    LLVMVerifyModule(module, LLVMAbortProcessAction, &error);
    LLVMDisposeMessage(error);
@@ -200,19 +200,19 @@ test_format(const struct pixel_test_case *test)
    unsigned packed = 0;
 
    {
-      typedef void (*unpack_ptr_t)(const void *, float *);
-      unpack_ptr_t unpack_ptr = (unpack_ptr_t)LLVMGetPointerToGlobal(engine, unpack);
+      typedef void (*load_ptr_t)(const void *, float *);
+      load_ptr_t load_ptr = (load_ptr_t)LLVMGetPointerToGlobal(engine, load);
 
-      unpack_ptr(&test->packed, unpacked);
+      load_ptr(&test->packed, unpacked);
 
    }
 
 
    {
-      typedef void (*pack_ptr_t)(void *, const float *);
-      pack_ptr_t pack_ptr = (pack_ptr_t)LLVMGetPointerToGlobal(engine, pack);
+      typedef void (*store_ptr_t)(void *, const float *);
+      store_ptr_t store_ptr = (store_ptr_t)LLVMGetPointerToGlobal(engine, store);
 
-      pack_ptr(&packed, unpacked);
+      store_ptr(&packed, unpacked);
 
    }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_unpack.c b/src/gallium/drivers/llvmpipe/lp_bld_unpack.c
index cf6f831328..f1ffe3ecd8 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_unpack.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_unpack.c
@@ -34,11 +34,10 @@
 LLVMValueRef
 lp_build_unpack_rgba(LLVMBuilderRef builder,
                      enum pipe_format format,
-                     LLVMValueRef ptr)
+                     LLVMValueRef packed)
 {
    const struct util_format_description *desc;
    LLVMTypeRef type;
-   LLVMValueRef deferred;
    unsigned shift = 0;
    unsigned i;
 
@@ -52,24 +51,22 @@ lp_build_unpack_rgba(LLVMBuilderRef builder,
 
    type = LLVMIntType(desc->block.bits);
 
-   deferred = LLVMBuildLoad(builder, LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), ""), "");
-
    /* Do the intermediate integer computations with 32bit integers since it
     * matches floating point size */
    if (desc->block.bits < 32)
-      deferred = LLVMBuildZExt(builder, deferred, LLVMInt32Type(), "");
+      packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), "");
 
    /* Broadcast the packed value to all four channels */
-   deferred = LLVMBuildInsertElement(builder,
-                                     LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
-                                     deferred,
-                                     LLVMConstNull(LLVMInt32Type()),
-                                     "");
-   deferred = LLVMBuildShuffleVector(builder,
-                                     deferred,
-                                     LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
-                                     LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)),
-                                     "");
+   packed = LLVMBuildInsertElement(builder,
+                                   LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
+                                   packed,
+                                   LLVMConstNull(LLVMInt32Type()),
+                                   "");
+   packed = LLVMBuildShuffleVector(builder,
+                                   packed,
+                                   LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
+                                   LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)),
+                                   "");
 
    LLVMValueRef shifted, casted, scaled, masked, swizzled;
    LLVMValueRef shifts[4];
@@ -108,7 +105,7 @@ lp_build_unpack_rgba(LLVMBuilderRef builder,
       shift += bits;
    }
 
-   shifted = LLVMBuildLShr(builder, deferred, LLVMConstVector(shifts, 4), "");
+   shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
    masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
    // UIToFP can't be expressed in SSE2
    casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
-- 
cgit v1.2.3


From 4639f8aad5db08cf9386a910c02a4dc205740d8b Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Wed, 29 Jul 2009 08:29:13 +0100
Subject: llvmpipe: Translate logicops.

---
 src/gallium/drivers/llvmpipe/Makefile         |   1 +
 src/gallium/drivers/llvmpipe/SConscript       |   1 +
 src/gallium/drivers/llvmpipe/lp_bld.h         |  13 ++++
 src/gallium/drivers/llvmpipe/lp_bld_logicop.c | 100 ++++++++++++++++++++++++++
 4 files changed, 115 insertions(+)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_logicop.c

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index e06bf6668f..c7af3bf1dc 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -12,6 +12,7 @@ C_SOURCES = \
 	lp_bld_load.c \
 	lp_bld_store.c \
 	lp_bld_loop.c \
+	lp_bld_logicop.c \
 	lp_clear.c \
 	lp_flush.c \
 	lp_query.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index bba283f296..128facee02 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -15,6 +15,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_load.c',
 		'lp_bld_store.c',
 		'lp_bld_loop.c',
+		'lp_bld_logicop.c',
 		'lp_clear.c',
 		'lp_context.c',
 		'lp_draw_arrays.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld.h b/src/gallium/drivers/llvmpipe/lp_bld.h
index 44343f644b..e722e0b7a1 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld.h
@@ -115,4 +115,17 @@ lp_build_loop_end(LLVMBuilderRef builder,
                   struct lp_build_loop_state *state);
 
 
+/**
+ * Apply a logic op.
+ *
+ * src/dst parameters are packed values. It should work regardless the inputs
+ * are scalars, or a vector.
+ */
+LLVMValueRef
+lp_build_logicop(LLVMBuilderRef builder,
+                 unsigned logicop_func,
+                 LLVMValueRef src,
+                 LLVMValueRef dst);
+
+
 #endif /* !LP_BLD_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logicop.c b/src/gallium/drivers/llvmpipe/lp_bld_logicop.c
new file mode 100644
index 0000000000..a04544d365
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_logicop.c
@@ -0,0 +1,100 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_state.h"
+
+#include "lp_bld.h"
+
+
+LLVMValueRef
+lp_build_logicop(LLVMBuilderRef builder,
+                 unsigned logicop_func,
+                 LLVMValueRef src,
+                 LLVMValueRef dst)
+{
+   LLVMTypeRef type;
+   LLVMValueRef res;
+
+   type = LLVMTypeOf(src);
+
+   switch (logicop_func) {
+   case PIPE_LOGICOP_CLEAR:
+      res = LLVMConstNull(type);
+      break;
+   case PIPE_LOGICOP_NOR:
+      res = LLVMBuildNot(builder, LLVMBuildOr(builder, src, dst, ""), "");
+      break;
+   case PIPE_LOGICOP_AND_INVERTED:
+      res = LLVMBuildAnd(builder, LLVMBuildNot(builder, src, ""), dst, "");
+      break;
+   case PIPE_LOGICOP_COPY_INVERTED:
+      res = LLVMBuildNot(builder, src, "");
+      break;
+   case PIPE_LOGICOP_AND_REVERSE:
+      res = LLVMBuildAnd(builder, src, LLVMBuildNot(builder, dst, ""), "");
+      break;
+   case PIPE_LOGICOP_INVERT:
+      res = LLVMBuildNot(builder, dst, "");
+      break;
+   case PIPE_LOGICOP_XOR:
+      res = LLVMBuildXor(builder, src, dst, "");
+      break;
+   case PIPE_LOGICOP_NAND:
+      res = LLVMBuildNot(builder, LLVMBuildAnd(builder, src, dst, ""), "");
+      break;
+   case PIPE_LOGICOP_AND:
+      res = LLVMBuildAnd(builder, src, dst, "");
+      break;
+   case PIPE_LOGICOP_EQUIV:
+      res = LLVMBuildNot(builder, LLVMBuildXor(builder, src, dst, ""), "");
+      break;
+   case PIPE_LOGICOP_NOOP:
+      res = dst;
+      break;
+   case PIPE_LOGICOP_OR_INVERTED:
+      res = LLVMBuildOr(builder, LLVMBuildNot(builder, src, ""), dst, "");
+      break;
+   case PIPE_LOGICOP_COPY:
+      res = src;
+      break;
+   case PIPE_LOGICOP_OR_REVERSE:
+      res = LLVMBuildOr(builder, src, LLVMBuildNot(builder, dst, ""), "");
+      break;
+   case PIPE_LOGICOP_OR:
+      res = LLVMBuildOr(builder, src, dst, "");
+      break;
+   case PIPE_LOGICOP_SET:
+      res = LLVMConstAllOnes(type);
+      break;
+   default:
+      assert(0);
+      res = src;
+   }
+
+   return res;
+}
-- 
cgit v1.2.3


From d2cf3e8dfd37d6225d5f6911fc53a07706cd07f7 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 1 Aug 2009 17:27:05 +0100
Subject: llvmpipe: Arithmetic helpers.

---
 src/gallium/drivers/llvmpipe/Makefile      |   1 +
 src/gallium/drivers/llvmpipe/SConscript    |   1 +
 src/gallium/drivers/llvmpipe/lp_bld_arit.c | 305 +++++++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_arit.h | 113 +++++++++++
 4 files changed, 420 insertions(+)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_arit.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_arit.h

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index c7af3bf1dc..8cbc54ccb7 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -7,6 +7,7 @@ C_SOURCES = \
 	lp_fs_exec.c \
 	lp_fs_sse.c \
 	lp_fs_llvm.c \
+	lp_bld_arit.c \
 	lp_bld_pack.c \
 	lp_bld_unpack.c \
 	lp_bld_load.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 128facee02..e09608ed64 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -10,6 +10,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_fs_exec.c',
 		'lp_fs_sse.c',
 		'lp_fs_llvm.c',
+		'lp_bld_arit.c',
 		'lp_bld_pack.c',
 		'lp_bld_unpack.c',
 		'lp_bld_load.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
new file mode 100644
index 0000000000..cfffe3b12a
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -0,0 +1,305 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Helper
+ *
+ * LLVM IR doesn't support all basic arithmetic operations we care about (most
+ * notably min/max and saturated operations), and it is often necessary to
+ * resort machine-specific intrinsics directly. The functions here hide all
+ * these implementation details from the other modules.
+ *
+ * We also do simple expressions simplification here. Reasons are:
+ * - it is very easy given we have all necessary information readily available
+ * - LLVM optimization passes fail to simplify several vector expressions
+ * - We often know value constraints which the optimization passes have no way
+ *   of knowing, such as when source arguments are known to be in [0, 1] range.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "pipe/p_state.h"
+
+#include "lp_bld_arit.h"
+
+
+LLVMValueRef
+lp_build_const_aos(LLVMTypeRef type, 
+                   double r, double g, double b, double a, 
+                   const unsigned char *swizzle)
+{
+   const unsigned char default_swizzle[4] = {0, 1, 2, 3};
+   LLVMTypeRef elem_type;
+   unsigned num_elems;
+   unsigned elem_width;
+   LLVMValueRef elems[LP_MAX_VECTOR_SIZE];
+   double scale;
+   unsigned i;
+
+   num_elems = LLVMGetVectorSize(type);
+   assert(num_elems % 4 == 0);
+   assert(num_elems < LP_MAX_VECTOR_SIZE);
+
+   elem_type = LLVMGetElementType(type);
+
+   if(swizzle == NULL)
+      swizzle = default_swizzle;
+
+   switch(LLVMGetTypeKind(elem_type)) {
+   case LLVMFloatTypeKind:
+      for(i = 0; i < num_elems; i += 4) {
+         elems[i + swizzle[0]] = LLVMConstReal(elem_type, r);
+         elems[i + swizzle[1]] = LLVMConstReal(elem_type, g);
+         elems[i + swizzle[2]] = LLVMConstReal(elem_type, b);
+         elems[i + swizzle[3]] = LLVMConstReal(elem_type, a);
+      }
+      break;
+
+   case LLVMIntegerTypeKind:
+      elem_width = LLVMGetIntTypeWidth(elem_type);
+      assert(elem_width <= 32);
+      scale = (double)((1 << elem_width) - 1);
+      for(i = 0; i < num_elems; i += 4) {
+         elems[i + swizzle[0]] = LLVMConstInt(elem_type, r*scale + 0.5, 0);
+         elems[i + swizzle[1]] = LLVMConstInt(elem_type, g*scale + 0.5, 0);
+         elems[i + swizzle[2]] = LLVMConstInt(elem_type, b*scale + 0.5, 0);
+         elems[i + swizzle[3]] = LLVMConstInt(elem_type, a*scale + 0.5, 0);
+      }
+      break;
+
+   default:
+      assert(0);
+      return LLVMGetUndef(type);
+   }
+
+   return LLVMConstVector(elems, num_elems);
+}
+               
+
+LLVMValueRef
+lp_build_add(LLVMBuilderRef builder,
+             LLVMValueRef a,
+             LLVMValueRef b,
+             LLVMValueRef zero)
+{
+   if(a == zero)
+      return b;
+   else if(b == zero)
+      return a;
+   else if(LLVMIsConstant(a) && LLVMIsConstant(b))
+      return LLVMConstAdd(a, b);
+   else
+      return LLVMBuildAdd(builder, a, b, "");
+}
+
+
+LLVMValueRef
+lp_build_sub(LLVMBuilderRef builder,
+             LLVMValueRef a,
+             LLVMValueRef b,
+             LLVMValueRef zero)
+{
+   if(b == zero)
+      return a;
+   else if(a == b)
+      return zero;
+   else if(LLVMIsConstant(a) && LLVMIsConstant(b))
+      return LLVMConstSub(a, b);
+   else
+      return LLVMBuildSub(builder, a, b, "");
+}
+
+
+LLVMValueRef
+lp_build_mul(LLVMBuilderRef builder,
+             LLVMValueRef a,
+             LLVMValueRef b,
+             LLVMValueRef zero,
+             LLVMValueRef one)
+{
+   if(a == zero)
+      return zero;
+   else if(a == one)
+      return b;
+   else if(b == zero)
+      return zero;
+   else if(b == one)
+      return a;
+   else if(LLVMIsConstant(a) && LLVMIsConstant(b))
+      return LLVMConstMul(a, b);
+   else
+      return LLVMBuildMul(builder, a, b, "");
+}
+
+
+LLVMValueRef
+lp_build_min(LLVMBuilderRef builder,
+             LLVMValueRef a,
+             LLVMValueRef b)
+{
+   /* TODO: optimize the constant case */
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+
+   LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
+   LLVMValueRef function;
+   LLVMValueRef args[2];
+
+   function = LLVMGetNamedFunction(module, "llvm.x86.sse.min.ps");
+   if(!function) {
+      LLVMTypeRef type = LLVMVectorType(LLVMFloatType(), 4);
+      LLVMTypeRef arg_types[2];
+      arg_types[0] = type;
+      arg_types[1] = type;
+      function = LLVMAddFunction(module, "llvm.x86.sse.min.ps", LLVMFunctionType(type, arg_types, 2, 0));
+      LLVMSetFunctionCallConv(function, LLVMCCallConv);
+      LLVMSetLinkage(function, LLVMExternalLinkage);
+   }
+   assert(LLVMIsDeclaration(function));
+
+   args[0] = a;
+   args[1] = b;
+
+   return LLVMBuildCall(builder, function, args, 2, "");
+
+#else
+
+   LLVMValueRef cond = LLVMBuildFCmp(values->builder, LLVMRealULT, a, b, "");
+   return LLVMBuildSelect(values->builder, cond, a, b, "");
+
+#endif
+}
+
+
+LLVMValueRef
+lp_build_max(LLVMBuilderRef builder,
+             LLVMValueRef a,
+             LLVMValueRef b)
+{
+   /* TODO: optimize the constant case */
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+
+   LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
+   LLVMValueRef function;
+   LLVMValueRef args[2];
+
+   function = LLVMGetNamedFunction(module, "llvm.x86.sse.max.ps");
+   if(!function) {
+      LLVMTypeRef type = LLVMVectorType(LLVMFloatType(), 4);
+      LLVMTypeRef arg_types[2];
+      arg_types[0] = type;
+      arg_types[1] = type;
+      function = LLVMAddFunction(module, "llvm.x86.sse.max.ps", LLVMFunctionType(type, arg_types, 2, 0));
+      LLVMSetFunctionCallConv(function, LLVMCCallConv);
+      LLVMSetLinkage(function, LLVMExternalLinkage);
+   }
+   assert(LLVMIsDeclaration(function));
+
+   args[0] = a;
+   args[1] = b;
+
+   return LLVMBuildCall(builder, function, args, 2, "");
+
+#else
+
+   LLVMValueRef cond = LLVMBuildFCmp(values->builder, LLVMRealULT, a, b, "");
+   return LLVMBuildSelect(values->builder, cond, b, a, "");
+
+#endif
+}
+
+
+LLVMValueRef
+lp_build_add_sat(LLVMBuilderRef builder,
+                 LLVMValueRef a,
+                 LLVMValueRef b,
+                 LLVMValueRef zero,
+                 LLVMValueRef one)
+{
+   if(a == zero)
+      return b;
+   else if(b == zero)
+      return a;
+   else if(a == one || b == one)
+      return one;
+   else
+      return lp_build_min(builder, lp_build_add(builder, a, b, zero), one);
+}
+
+LLVMValueRef
+lp_build_sub_sat(LLVMBuilderRef builder,
+                 LLVMValueRef a,
+                 LLVMValueRef b,
+                 LLVMValueRef zero,
+                 LLVMValueRef one)
+{
+   if(b == zero)
+      return a;
+   else if(b == one)
+      return zero;
+   else
+      return lp_build_max(builder, lp_build_sub(builder, a, b, zero), zero);
+}
+
+LLVMValueRef
+lp_build_min_sat(LLVMBuilderRef builder,
+                 LLVMValueRef a,
+                 LLVMValueRef b,
+                 LLVMValueRef zero,
+                 LLVMValueRef one)
+{
+   if(a == zero || b == zero)
+      return zero;
+   else if(a == one)
+      return b;
+   else if(b == one)
+      return a;
+   else
+      return lp_build_min(builder, a, b);
+}
+
+
+LLVMValueRef
+lp_build_max_sat(LLVMBuilderRef builder,
+                 LLVMValueRef a,
+                 LLVMValueRef b,
+                 LLVMValueRef zero,
+                 LLVMValueRef one)
+{
+   if(a == zero)
+      return b;
+   else if(b == zero)
+      return a;
+   else if(a == one || b == one)
+      return one;
+   else
+      return lp_build_max(builder, a, b);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
new file mode 100644
index 0000000000..0d2636aed9
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
@@ -0,0 +1,113 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef LP_BLD_ARIT_H
+#define LP_BLD_ARIT_H
+
+
+#include <llvm-c/Core.h>  
+
+ 
+#define LP_MAX_VECTOR_SIZE 16
+
+
+/*
+ * Constants
+ */
+
+LLVMValueRef
+lp_build_const_aos(LLVMTypeRef type, 
+                   double r, double g, double b, double a, 
+                   const unsigned char *swizzle);
+
+/*
+ * Basic arithmetic
+ */
+
+LLVMValueRef
+lp_build_add(LLVMBuilderRef builder,
+             LLVMValueRef a,
+             LLVMValueRef b,
+             LLVMValueRef zero);
+
+LLVMValueRef
+lp_build_sub(LLVMBuilderRef builder,
+             LLVMValueRef a,
+             LLVMValueRef b,
+             LLVMValueRef zero);
+
+LLVMValueRef
+lp_build_mul(LLVMBuilderRef builder,
+             LLVMValueRef a,
+             LLVMValueRef b,
+             LLVMValueRef zero,
+             LLVMValueRef one);
+
+LLVMValueRef
+lp_build_min(LLVMBuilderRef builder,
+             LLVMValueRef a,
+             LLVMValueRef b);
+
+LLVMValueRef
+lp_build_max(LLVMBuilderRef builder,
+             LLVMValueRef a,
+             LLVMValueRef b);
+
+/*
+ * Satured arithmetic
+ */
+
+LLVMValueRef
+lp_build_add_sat(LLVMBuilderRef builder,
+                 LLVMValueRef a,
+                 LLVMValueRef b,
+                 LLVMValueRef zero,
+                 LLVMValueRef one);
+
+LLVMValueRef
+lp_build_sub_sat(LLVMBuilderRef builder,
+                 LLVMValueRef a,
+                 LLVMValueRef b,
+                 LLVMValueRef zero,
+                 LLVMValueRef one);
+
+LLVMValueRef
+lp_build_min_sat(LLVMBuilderRef builder,
+                 LLVMValueRef a,
+                 LLVMValueRef b,
+                 LLVMValueRef zero,
+                 LLVMValueRef one);
+
+LLVMValueRef
+lp_build_max_sat(LLVMBuilderRef builder,
+                 LLVMValueRef a,
+                 LLVMValueRef b,
+                 LLVMValueRef zero,
+                 LLVMValueRef one);
+
+
+#endif /* !LP_BLD_ARIT_H */
-- 
cgit v1.2.3


From 7d043162c5d9150947d9341cfa22192bd4c70fde Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 1 Aug 2009 17:59:19 +0100
Subject: llvmpipe: Blending.

The code
---
 src/gallium/drivers/llvmpipe/Makefile        |   1 +
 src/gallium/drivers/llvmpipe/SConscript      |  14 +-
 src/gallium/drivers/llvmpipe/lp_bld.h        |  12 +
 src/gallium/drivers/llvmpipe/lp_bld_blend.c  | 319 ++++++++++++++
 src/gallium/drivers/llvmpipe/lp_test_blend.c | 621 +++++++++++++++++++++++++++
 5 files changed, 965 insertions(+), 2 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_blend.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_test_blend.c

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 8cbc54ccb7..1b6cd5ed85 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -14,6 +14,7 @@ C_SOURCES = \
 	lp_bld_store.c \
 	lp_bld_loop.c \
 	lp_bld_logicop.c \
+	lp_bld_blend.c \
 	lp_clear.c \
 	lp_flush.c \
 	lp_query.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index e09608ed64..4a365e0cb3 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -2,7 +2,7 @@ Import('*')
 
 env = env.Clone()
 
-env.ParseConfig('llvm-config --cflags --libs jit interpreter nativecodegen')
+env.ParseConfig('llvm-config --cflags')
 
 llvmpipe = env.ConvenienceLibrary(
 	target = 'llvmpipe',
@@ -17,6 +17,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_store.c',
 		'lp_bld_loop.c',
 		'lp_bld_logicop.c',
+		'lp_bld_blend.c',
 		'lp_clear.c',
 		'lp_context.c',
 		'lp_draw_arrays.c',
@@ -45,12 +46,21 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_tile_cache.c',
 	])
 
+
+env = env.Clone()
+
 env['LINK'] = env['CXX']
+env.ParseConfig('llvm-config --libs jit interpreter nativecodegen bitwriter')
+env.Prepend(LIBS = [llvmpipe] + auxiliaries)
 
 env.Program(
     target = 'lp_bld_test',
     source = ['lp_bld_test.c'],
-    LIBS = [llvmpipe] + auxiliaries + env['LIBS'],
+)
+
+env.Program(
+    target = 'lp_test_blend',
+    source = ['lp_test_blend.c'],
 )
 
 Export('llvmpipe')
diff --git a/src/gallium/drivers/llvmpipe/lp_bld.h b/src/gallium/drivers/llvmpipe/lp_bld.h
index e722e0b7a1..86571374b6 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld.h
@@ -45,6 +45,9 @@
 #include "pipe/p_format.h"
 
 
+struct pipe_blend_state;
+
+
 /**
  * Unpack a pixel into its RGBA components.
  *
@@ -128,4 +131,13 @@ lp_build_logicop(LLVMBuilderRef builder,
                  LLVMValueRef dst);
 
 
+LLVMValueRef
+lp_build_blend(LLVMBuilderRef builder,
+               const struct pipe_blend_state *blend,
+               LLVMValueRef src,
+               LLVMValueRef dst,
+               LLVMValueRef const_,
+               unsigned alpha_swizzle);
+
+
 #endif /* !LP_BLD_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.c b/src/gallium/drivers/llvmpipe/lp_bld_blend.c
new file mode 100644
index 0000000000..d708047202
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.c
@@ -0,0 +1,319 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Blend LLVM IR generation.
+ *
+ * This code is generic -- it should be able to cope both with floating point
+ * and integer inputs in AOS form.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "pipe/p_state.h"
+
+#include "lp_bld.h"
+#include "lp_bld_arit.h"
+
+
+/**
+ * We may the same values several times, so we keep them here to avoid
+ * recomputing them. Also reusing the values allows us to do simplifications
+ * that LLVM optimization passes wouldn't normally be able to do.
+ */
+struct lp_build_blend_values
+{
+   LLVMBuilderRef builder;
+   
+   LLVMValueRef undef;
+   LLVMValueRef zero;
+   LLVMValueRef one;
+
+   LLVMValueRef src;
+   LLVMValueRef dst;
+   LLVMValueRef const_;
+
+   LLVMValueRef inv_src;
+   LLVMValueRef inv_dst;
+   LLVMValueRef inv_const;
+   LLVMValueRef saturate;
+
+   LLVMValueRef rgb_src_factor;
+   LLVMValueRef alpha_src_factor;
+   LLVMValueRef rgb_dst_factor;
+   LLVMValueRef alpha_dst_factor;
+};
+
+
+static LLVMValueRef
+lp_build_blend_factor_unswizzled(struct lp_build_blend_values *values,
+                                 unsigned factor,
+                                 boolean alpha)
+{
+   switch (factor) {
+   case PIPE_BLENDFACTOR_ZERO:
+      return values->zero;
+   case PIPE_BLENDFACTOR_ONE:
+      return values->one;
+   case PIPE_BLENDFACTOR_SRC_COLOR:
+   case PIPE_BLENDFACTOR_SRC_ALPHA:
+      return values->src;
+   case PIPE_BLENDFACTOR_DST_COLOR:
+   case PIPE_BLENDFACTOR_DST_ALPHA:
+      return values->dst;
+   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+      if(alpha)
+         return values->one;
+      else {
+         if(!values->inv_dst)
+            values->inv_dst = lp_build_sub(values->builder, values->one, values->dst, values->zero);
+         if(!values->saturate)
+            values->saturate = lp_build_min_sat(values->builder, values->src, values->inv_dst, values->zero, values->one);
+         return values->saturate;
+      }
+   case PIPE_BLENDFACTOR_CONST_COLOR:
+   case PIPE_BLENDFACTOR_CONST_ALPHA:
+      return values->const_;
+   case PIPE_BLENDFACTOR_SRC1_COLOR:
+   case PIPE_BLENDFACTOR_SRC1_ALPHA:
+      /* TODO */
+      assert(0);
+      return values->zero;
+   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+      if(!values->inv_src)
+         values->inv_src = lp_build_sub(values->builder, values->one, values->src, values->zero);
+      return values->inv_src;
+   case PIPE_BLENDFACTOR_INV_DST_COLOR:
+   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+      if(!values->inv_dst)
+         values->inv_dst = lp_build_sub(values->builder, values->one, values->dst, values->zero);
+      return values->inv_dst;
+   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+      if(!values->inv_const)
+         values->inv_const = lp_build_sub(values->builder, values->one, values->const_, values->zero);
+      return values->inv_const;
+   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+      /* TODO */
+      assert(0);
+      return values->zero;
+   default:
+      assert(0);
+      return values->zero;
+   }
+}
+
+
+enum lp_build_blend_swizzle {
+   LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
+   LP_BUILD_BLEND_SWIZZLE_AAAA = 1,
+};
+
+
+/**
+ * How should we shuffle the base factor.
+ */
+static enum lp_build_blend_swizzle
+lp_build_blend_factor_swizzle(unsigned factor)
+{
+   switch (factor) {
+   case PIPE_BLENDFACTOR_ONE:
+   case PIPE_BLENDFACTOR_ZERO:
+   case PIPE_BLENDFACTOR_SRC_COLOR:
+   case PIPE_BLENDFACTOR_DST_COLOR:
+   case PIPE_BLENDFACTOR_CONST_COLOR:
+   case PIPE_BLENDFACTOR_SRC1_COLOR:
+   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+   case PIPE_BLENDFACTOR_INV_DST_COLOR:
+   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+      return LP_BUILD_BLEND_SWIZZLE_RGBA;
+   case PIPE_BLENDFACTOR_SRC_ALPHA:
+   case PIPE_BLENDFACTOR_DST_ALPHA:
+   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+   case PIPE_BLENDFACTOR_SRC1_ALPHA:
+   case PIPE_BLENDFACTOR_CONST_ALPHA:
+   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+      return LP_BUILD_BLEND_SWIZZLE_AAAA;
+   default:
+      assert(0);
+      return LP_BUILD_BLEND_SWIZZLE_RGBA;
+   }
+}
+
+
+static LLVMValueRef
+lp_build_blend_swizzle(struct lp_build_blend_values *values,
+                       LLVMValueRef rgb, 
+                       LLVMValueRef alpha, 
+                       enum lp_build_blend_swizzle rgb_swizzle,
+                       unsigned alpha_swizzle,
+                       unsigned n)
+{
+   LLVMValueRef swizzles[LP_MAX_VECTOR_SIZE];
+   unsigned i, j;
+
+   if(rgb == alpha) {
+      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA)
+         return rgb;
+
+      alpha = values->undef;
+   }
+
+   for(j = 0; j < n; j += 4) {
+      for(i = 0; i < 4; ++i) {
+         unsigned swizzle;
+
+         if(i == alpha_swizzle && alpha != values->undef) {
+            /* Take the alpha from the second shuffle argument */
+            swizzle = n + j + alpha_swizzle;
+         }
+         else if (rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) {
+            /* Take the alpha from the first shuffle argument */
+            swizzle = j + alpha_swizzle;
+         }
+         else {
+            swizzle = j + i;
+         }
+
+         swizzles[j + i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
+      }
+   }
+
+   return LLVMBuildShuffleVector(values->builder, rgb, alpha, LLVMConstVector(swizzles, n), "");
+}
+
+
+static LLVMValueRef
+lp_build_blend_factor(struct lp_build_blend_values *values,
+                      LLVMValueRef factor1,
+                      unsigned rgb_factor,
+                      unsigned alpha_factor,
+                      unsigned alpha_swizzle,
+                      unsigned n)
+{
+   LLVMValueRef rgb_factor_;
+   LLVMValueRef alpha_factor_;
+   LLVMValueRef factor2;
+   enum lp_build_blend_swizzle rgb_swizzle;
+
+   rgb_factor_   = lp_build_blend_factor_unswizzled(values, rgb_factor,   FALSE);
+   alpha_factor_ = lp_build_blend_factor_unswizzled(values, alpha_factor, TRUE);
+
+   rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
+
+   factor2 = lp_build_blend_swizzle(values, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle, n);
+
+   return lp_build_mul(values->builder, factor1, factor2, values->zero, values->one);
+}
+
+
+static LLVMValueRef
+lp_build_blend_func(struct lp_build_blend_values *values,
+                    unsigned func,
+                    LLVMValueRef term1, 
+                    LLVMValueRef term2)
+{
+   switch (func) {
+   case PIPE_BLEND_ADD:
+      return lp_build_add_sat(values->builder, term1, term2, values->zero, values->one);
+      break;
+   case PIPE_BLEND_SUBTRACT:
+      return lp_build_sub_sat(values->builder, term1, term2, values->zero, values->one);
+   case PIPE_BLEND_REVERSE_SUBTRACT:
+      return lp_build_sub_sat(values->builder, term2, term1, values->zero, values->one);
+   case PIPE_BLEND_MIN:
+      return lp_build_min_sat(values->builder, term1, term2, values->zero, values->one);
+   case PIPE_BLEND_MAX:
+      return lp_build_max_sat(values->builder, term1, term2, values->zero, values->one);
+   default:
+      assert(0);
+      return values->zero;
+   }
+}
+
+
+LLVMValueRef
+lp_build_blend(LLVMBuilderRef builder,
+               const struct pipe_blend_state *blend,
+               LLVMValueRef src,
+               LLVMValueRef dst,
+               LLVMValueRef const_,
+               unsigned alpha_swizzle)
+{
+   struct lp_build_blend_values values;
+   LLVMValueRef src_term;
+   LLVMValueRef dst_term;
+   LLVMTypeRef type;
+   unsigned n;
+
+   type = LLVMTypeOf(src);
+   n = LLVMGetVectorSize(type);
+
+   /*
+    * Compute constants
+    */
+   memset(&values, 0, sizeof values);
+   values.builder = builder;
+   values.undef = LLVMGetUndef(type);
+   values.zero = LLVMConstNull(type);
+   values.one = lp_build_const_aos(type, 1.0, 1.0, 1.0, 1.0, NULL);
+
+   values.src = src;
+   values.dst = dst;
+   values.const_ = const_;
+
+   /* TODO: There are still a few optimization oportunities here. For certain
+    * combinations it is possible to reorder the operations and therefor saving
+    * some instructions. */
+
+   src_term = lp_build_blend_factor(&values, src, blend->rgb_src_factor, blend->alpha_src_factor, alpha_swizzle, n);
+   dst_term = lp_build_blend_factor(&values, dst, blend->rgb_dst_factor, blend->alpha_dst_factor, alpha_swizzle, n);
+
+   if(blend->rgb_func == blend->alpha_func) {
+      return lp_build_blend_func(&values, blend->rgb_func, src_term, dst_term);
+   }
+   else {
+      /* Seperate RGB / A functions */
+
+      LLVMValueRef rgb;
+      LLVMValueRef alpha;
+
+      rgb   = lp_build_blend_func(&values, blend->rgb_func,   src_term, dst_term);
+      alpha = lp_build_blend_func(&values, blend->alpha_func, src_term, dst_term);
+
+      return lp_build_blend_swizzle(&values, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle, n);
+   }
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
new file mode 100644
index 0000000000..1a1313a884
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -0,0 +1,621 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Unit tests for blend LLVM IR generation
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ *
+ * Blend computation code derived from code written by
+ * @author Brian Paul <brian@vmware.com>
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <llvm-c/Core.h>
+#include <llvm-c/Analysis.h>
+#include <llvm-c/ExecutionEngine.h>
+#include <llvm-c/Target.h>
+#include <llvm-c/BitWriter.h>
+#include <llvm-c/Transforms/Scalar.h>
+
+#include "pipe/p_state.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+
+#include "lp_bld.h"
+
+
+unsigned verbose = 0;
+
+
+typedef void (*blend_test_ptr_t)(const float *src, const float *dst, const float *const_, float *res);
+
+
+static LLVMValueRef
+add_blend_test(LLVMModuleRef module,
+               const struct pipe_blend_state *blend)
+{
+   LLVMTypeRef args[4];
+   LLVMValueRef func;
+   LLVMValueRef src_ptr;
+   LLVMValueRef dst_ptr;
+   LLVMValueRef const_ptr;
+   LLVMValueRef res_ptr;
+   LLVMBasicBlockRef block;
+   LLVMBuilderRef builder;
+   LLVMValueRef src;
+   LLVMValueRef dst;
+   LLVMValueRef const_;
+   LLVMValueRef res;
+
+   args[0] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
+   args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
+   args[2] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
+   args[3] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
+   func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 4, 0));
+   LLVMSetFunctionCallConv(func, LLVMCCallConv);
+   src_ptr = LLVMGetParam(func, 0);
+   dst_ptr = LLVMGetParam(func, 1);
+   const_ptr = LLVMGetParam(func, 2);
+   res_ptr = LLVMGetParam(func, 3);
+
+   block = LLVMAppendBasicBlock(func, "entry");
+   builder = LLVMCreateBuilder();
+   LLVMPositionBuilderAtEnd(builder, block);
+
+   src = LLVMBuildLoad(builder, src_ptr, "src");
+   dst = LLVMBuildLoad(builder, dst_ptr, "dst");
+   const_ = LLVMBuildLoad(builder, const_ptr, "const");
+
+   res = lp_build_blend(builder, blend, src, dst, const_, 3);
+
+   LLVMSetValueName(res, "res");
+
+   LLVMBuildStore(builder, res, res_ptr);
+
+   LLVMBuildRetVoid(builder);
+
+   LLVMDisposeBuilder(builder);
+   return func;
+}
+
+
+static void
+random_color(float *color)
+{
+    color[0] = (float)((double)random()/(double)RAND_MAX);
+    color[1] = (float)((double)random()/(double)RAND_MAX);
+    color[2] = (float)((double)random()/(double)RAND_MAX);
+    color[3] = (float)((double)random()/(double)RAND_MAX);
+}
+
+
+/** Add and limit result to ceiling of 1.0 */
+#define ADD_SAT(R, A, B) \
+do { \
+   R = (A) + (B);  if (R > 1.0f) R = 1.0f; \
+} while (0)
+
+/** Subtract and limit result to floor of 0.0 */
+#define SUB_SAT(R, A, B) \
+do { \
+   R = (A) - (B);  if (R < 0.0f) R = 0.0f; \
+} while (0)
+
+
+static void
+compute_blend_ref_term(unsigned rgb_factor,
+                       unsigned alpha_factor,
+                       const float *factor,
+                       const float *src, 
+                       const float *dst, 
+                       const float *const_, 
+                       float *term)
+{
+   float temp;
+
+   switch (rgb_factor) {
+   case PIPE_BLENDFACTOR_ONE:
+      term[0] = factor[0]; /* R */
+      term[1] = factor[1]; /* G */
+      term[2] = factor[2]; /* B */
+      break;
+   case PIPE_BLENDFACTOR_SRC_COLOR:
+      term[0] = factor[0] * src[0]; /* R */
+      term[1] = factor[1] * src[1]; /* G */
+      term[2] = factor[2] * src[2]; /* B */
+      break;
+   case PIPE_BLENDFACTOR_SRC_ALPHA:
+      term[0] = factor[0] * src[3]; /* R */
+      term[1] = factor[1] * src[3]; /* G */
+      term[2] = factor[2] * src[3]; /* B */
+      break;
+   case PIPE_BLENDFACTOR_DST_COLOR:
+      term[0] = factor[0] * dst[0]; /* R */
+      term[1] = factor[1] * dst[1]; /* G */
+      term[2] = factor[2] * dst[2]; /* B */
+      break;
+   case PIPE_BLENDFACTOR_DST_ALPHA:
+      term[0] = factor[0] * dst[3]; /* R */
+      term[1] = factor[1] * dst[3]; /* G */
+      term[2] = factor[2] * dst[3]; /* B */
+      break;
+   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+      temp = MIN2(src[3], 1.0f - dst[3]);
+      term[0] = factor[0] * temp; /* R */
+      term[1] = factor[1] * temp; /* G */
+      term[2] = factor[2] * temp; /* B */
+      break;
+   case PIPE_BLENDFACTOR_CONST_COLOR:
+      term[0] = factor[0] * const_[0]; /* R */
+      term[1] = factor[1] * const_[1]; /* G */
+      term[2] = factor[2] * const_[2]; /* B */
+      break;
+   case PIPE_BLENDFACTOR_CONST_ALPHA:
+      term[0] = factor[0] * const_[3]; /* R */
+      term[1] = factor[1] * const_[3]; /* G */
+      term[2] = factor[2] * const_[3]; /* B */
+      break;
+   case PIPE_BLENDFACTOR_SRC1_COLOR:
+      assert(0); /* to do */
+      break;
+   case PIPE_BLENDFACTOR_SRC1_ALPHA:
+      assert(0); /* to do */
+      break;
+   case PIPE_BLENDFACTOR_ZERO:
+      term[0] = 0.0f; /* R */
+      term[1] = 0.0f; /* G */
+      term[2] = 0.0f; /* B */
+      break;
+   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+      term[0] = factor[0] * (1.0f - src[0]); /* R */
+      term[1] = factor[1] * (1.0f - src[1]); /* G */
+      term[2] = factor[2] * (1.0f - src[2]); /* B */
+      break;
+   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+      term[0] = factor[0] * (1.0f - src[3]); /* R */
+      term[1] = factor[1] * (1.0f - src[3]); /* G */
+      term[2] = factor[2] * (1.0f - src[3]); /* B */
+      break;
+   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+      term[0] = factor[0] * (1.0f - dst[3]); /* R */
+      term[1] = factor[1] * (1.0f - dst[3]); /* G */
+      term[2] = factor[2] * (1.0f - dst[3]); /* B */
+      break;
+   case PIPE_BLENDFACTOR_INV_DST_COLOR:
+      term[0] = factor[0] * (1.0f - dst[0]); /* R */
+      term[1] = factor[1] * (1.0f - dst[1]); /* G */
+      term[2] = factor[2] * (1.0f - dst[2]); /* B */
+      break;
+   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+      term[0] = factor[0] * (1.0f - const_[0]); /* R */
+      term[1] = factor[1] * (1.0f - const_[1]); /* G */
+      term[2] = factor[2] * (1.0f - const_[2]); /* B */
+      break;
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+      term[0] = factor[0] * (1.0f - const_[3]); /* R */
+      term[1] = factor[1] * (1.0f - const_[3]); /* G */
+      term[2] = factor[2] * (1.0f - const_[3]); /* B */
+      break;
+   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+      assert(0); /* to do */
+      break;
+   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+      assert(0); /* to do */
+      break;
+   default:
+      assert(0);
+   }
+
+   /*
+    * Compute src/first term A
+    */
+   switch (alpha_factor) {
+   case PIPE_BLENDFACTOR_ONE:
+      term[3] = factor[3]; /* A */
+      break;
+   case PIPE_BLENDFACTOR_SRC_COLOR:
+   case PIPE_BLENDFACTOR_SRC_ALPHA:
+      term[3] = factor[3] * src[3]; /* A */
+      break;
+   case PIPE_BLENDFACTOR_DST_COLOR:
+   case PIPE_BLENDFACTOR_DST_ALPHA:
+      term[3] = factor[3] * dst[3]; /* A */
+      break;
+   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+      term[3] = src[3]; /* A */
+      break;
+   case PIPE_BLENDFACTOR_CONST_COLOR:
+   case PIPE_BLENDFACTOR_CONST_ALPHA:
+      term[3] = factor[3] * const_[3]; /* A */
+      break;
+   case PIPE_BLENDFACTOR_ZERO:
+      term[3] = 0.0f; /* A */
+      break;
+   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+      term[3] = factor[3] * (1.0f - src[3]); /* A */
+      break;
+   case PIPE_BLENDFACTOR_INV_DST_COLOR:
+   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+      term[3] = factor[3] * (1.0f - dst[3]); /* A */
+      break;
+   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+      term[3] = factor[3] * (1.0f - const_[3]);
+      break;
+   default:
+      assert(0);
+   }
+}
+
+
+static void
+compute_blend_ref(const struct pipe_blend_state *blend,
+                  const float *src, 
+                  const float *dst, 
+                  const float *const_, 
+                  float *res)
+{
+   float src_term[4];
+   float dst_term[4];
+
+   compute_blend_ref_term(blend->rgb_src_factor, blend->alpha_src_factor, src, src, dst, const_, src_term);
+   compute_blend_ref_term(blend->rgb_dst_factor, blend->alpha_dst_factor, dst, src, dst, const_, dst_term);
+
+   /*
+    * Combine RGB terms
+    */
+   switch (blend->rgb_func) {
+   case PIPE_BLEND_ADD:
+      ADD_SAT(res[0], src_term[0], dst_term[0]); /* R */
+      ADD_SAT(res[1], src_term[1], dst_term[1]); /* G */
+      ADD_SAT(res[2], src_term[2], dst_term[2]); /* B */
+      break;
+   case PIPE_BLEND_SUBTRACT:
+      SUB_SAT(res[0], src_term[0], dst_term[0]); /* R */
+      SUB_SAT(res[1], src_term[1], dst_term[1]); /* G */
+      SUB_SAT(res[2], src_term[2], dst_term[2]); /* B */
+      break;
+   case PIPE_BLEND_REVERSE_SUBTRACT:
+      SUB_SAT(res[0], dst_term[0], src_term[0]); /* R */
+      SUB_SAT(res[1], dst_term[1], src_term[1]); /* G */
+      SUB_SAT(res[2], dst_term[2], src_term[2]); /* B */
+      break;
+   case PIPE_BLEND_MIN:
+      res[0] = MIN2(src_term[0], dst_term[0]); /* R */
+      res[1] = MIN2(src_term[1], dst_term[1]); /* G */
+      res[2] = MIN2(src_term[2], dst_term[2]); /* B */
+      break;
+   case PIPE_BLEND_MAX:
+      res[0] = MAX2(src_term[0], dst_term[0]); /* R */
+      res[1] = MAX2(src_term[1], dst_term[1]); /* G */
+      res[2] = MAX2(src_term[2], dst_term[2]); /* B */
+      break;
+   default:
+      assert(0);
+   }
+
+   /*
+    * Combine A terms
+    */
+   switch (blend->alpha_func) {
+   case PIPE_BLEND_ADD:
+      ADD_SAT(res[3], src_term[3], dst_term[3]); /* A */
+      break;
+   case PIPE_BLEND_SUBTRACT:
+      SUB_SAT(res[3], src_term[3], dst_term[3]); /* A */
+      break;
+   case PIPE_BLEND_REVERSE_SUBTRACT:
+      SUB_SAT(res[3], dst_term[3], src_term[3]); /* A */
+      break;
+   case PIPE_BLEND_MIN:
+      res[3] = MIN2(src_term[3], dst_term[3]); /* A */
+      break;
+   case PIPE_BLEND_MAX:
+      res[3] = MAX2(src_term[3], dst_term[3]); /* A */
+      break;
+   default:
+      assert(0);
+   }
+}
+
+
+static boolean
+test_one(const struct pipe_blend_state *blend)
+{
+   LLVMModuleRef module = NULL;
+   LLVMValueRef func = NULL;
+   LLVMExecutionEngineRef engine = NULL;
+   LLVMModuleProviderRef provider = NULL;
+   LLVMPassManagerRef pass = NULL;
+   char *error = NULL;
+   blend_test_ptr_t blend_test_ptr;
+   boolean success;
+   unsigned i, j;
+
+   module = LLVMModuleCreateWithName("test");
+
+   func = add_blend_test(module, blend);
+
+   if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
+      LLVMDumpModule(module);
+      LLVMDisposeMessage(error);
+      abort();
+   }
+
+   provider = LLVMCreateModuleProviderForExistingModule(module);
+   if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
+      fprintf(stderr, "%s\n", error);
+      LLVMDisposeMessage(error);
+      abort();
+   }
+
+#if 0
+   pass = LLVMCreatePassManager();
+   LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
+   /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+    * but there are more on SVN. */
+   LLVMAddConstantPropagationPass(pass);
+   LLVMAddInstructionCombiningPass(pass);
+   LLVMAddPromoteMemoryToRegisterPass(pass);
+   LLVMAddGVNPass(pass);
+   LLVMAddCFGSimplificationPass(pass);
+   LLVMRunPassManager(pass, module);
+#else
+   (void)pass;
+#endif
+
+   blend_test_ptr = (blend_test_ptr_t)LLVMGetPointerToGlobal(engine, func);
+
+   if(verbose >= 2)
+      LLVMDumpModule(module);
+
+   success = TRUE;
+   for(i = 0; i < 10; ++i) { 
+      float src[4];
+      float dst[4];
+      float const_[4];
+      float ref[4];
+      float res[4];
+
+      random_color(src);
+      random_color(dst);
+      random_color(const_);
+
+      compute_blend_ref(blend, src, dst, const_, ref);
+
+      blend_test_ptr(src, dst, const_, res);
+
+      for(j = 0; j < 4; ++j)
+         if(res[j] != ref[j])
+            success = FALSE;
+
+      if (!success) {
+         fprintf(stderr, "FAILED\n");
+         fprintf(stderr, "  Result: %f %f %f %f\n", res[0], res[1], res[2], res[3]);
+         fprintf(stderr, "          %f %f %f %f\n", ref[0], ref[1], ref[2], ref[3]);
+         LLVMDumpModule(module);
+         LLVMWriteBitcodeToFile(module, "blend.bc");
+         fprintf(stderr, "blend.bc written\n");
+         abort();
+         break;
+      }
+   }
+
+   LLVMDisposeExecutionEngine(engine);
+   //LLVMDisposeModule(module);
+
+   return success;
+}
+
+
+struct value_name_pair
+{
+   unsigned value;
+   const char *name;
+};
+
+
+const struct value_name_pair
+blend_factors[] = {
+   {PIPE_BLENDFACTOR_ZERO                , "zero"},
+   {PIPE_BLENDFACTOR_ONE                 , "one"},
+   {PIPE_BLENDFACTOR_SRC_COLOR           , "src_color"},
+   {PIPE_BLENDFACTOR_SRC_ALPHA           , "src_alpha"},
+   {PIPE_BLENDFACTOR_DST_COLOR           , "dst_color"},
+   {PIPE_BLENDFACTOR_DST_ALPHA           , "dst_alpha"},
+   {PIPE_BLENDFACTOR_CONST_COLOR         , "const_color"},
+   {PIPE_BLENDFACTOR_CONST_ALPHA         , "const_alpha"},
+#if 0
+   {PIPE_BLENDFACTOR_SRC1_COLOR          , "src1_color"},
+   {PIPE_BLENDFACTOR_SRC1_ALPHA          , "src1_alpha"},
+#endif
+   {PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE  , "src_alpha_saturate"},
+   {PIPE_BLENDFACTOR_INV_SRC_COLOR       , "inv_src_color"},
+   {PIPE_BLENDFACTOR_INV_SRC_ALPHA       , "inv_src_alpha"},
+   {PIPE_BLENDFACTOR_INV_DST_COLOR       , "inv_dst_color"},
+   {PIPE_BLENDFACTOR_INV_DST_ALPHA       , "inv_dst_alpha"},
+   {PIPE_BLENDFACTOR_INV_CONST_COLOR     , "inv_const_color"},
+   {PIPE_BLENDFACTOR_INV_CONST_ALPHA     , "inv_const_alpha"},
+#if 0
+   {PIPE_BLENDFACTOR_INV_SRC1_COLOR      , "inv_src1_color"},
+   {PIPE_BLENDFACTOR_INV_SRC1_ALPHA      , "inv_src1_alpha"}
+#endif
+};
+
+
+const struct value_name_pair
+blend_funcs[] = {
+   {PIPE_BLEND_ADD               , "add"},
+   {PIPE_BLEND_SUBTRACT          , "sub"},
+   {PIPE_BLEND_REVERSE_SUBTRACT  , "rev_sub"},
+   {PIPE_BLEND_MIN               , "min"},
+   {PIPE_BLEND_MAX               , "max"}
+};
+
+
+const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]);
+const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]);
+
+
+static boolean 
+test_all(void)
+{
+   const struct value_name_pair *rgb_func;
+   const struct value_name_pair *rgb_src_factor;
+   const struct value_name_pair *rgb_dst_factor;
+   const struct value_name_pair *alpha_func;
+   const struct value_name_pair *alpha_src_factor;
+   const struct value_name_pair *alpha_dst_factor;
+   struct pipe_blend_state blend;
+   bool success = TRUE;
+
+   for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) {
+      for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) {
+         for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) {
+            for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) {
+               for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) {
+                  for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) {
+
+                     if(rgb_dst_factor->value == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+                        alpha_dst_factor->value == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
+                        continue;
+
+                     if(verbose >= 1)
+                        fprintf(stderr, 
+                                "%s=%s %s=%s %s=%s %s=%s %s=%s %s=%s ...\n",
+                                "rgb_func",         rgb_func->name,
+                                "rgb_src_factor",   rgb_src_factor->name,
+                                "rgb_dst_factor",   rgb_dst_factor->name,
+                                "alpha_func",       alpha_func->name,
+                                "alpha_src_factor", alpha_src_factor->name,
+                                "alpha_dst_factor", alpha_dst_factor->name);
+
+                     memset(&blend, 0, sizeof blend);
+                     blend.blend_enable      = 1;
+                     blend.rgb_func          = rgb_func->value;
+                     blend.rgb_src_factor    = rgb_src_factor->value;
+                     blend.rgb_dst_factor    = rgb_dst_factor->value;
+                     blend.alpha_func        = alpha_func->value;
+                     blend.alpha_src_factor  = alpha_src_factor->value;
+                     blend.alpha_dst_factor  = alpha_dst_factor->value;
+
+                     if(!test_one(&blend))
+                       success = FALSE;
+
+                  }
+               }
+            }
+         }
+      }
+   }
+
+   return success;
+}
+
+
+static boolean 
+test_some(unsigned long n)
+{
+   const struct value_name_pair *rgb_func;
+   const struct value_name_pair *rgb_src_factor;
+   const struct value_name_pair *rgb_dst_factor;
+   const struct value_name_pair *alpha_func;
+   const struct value_name_pair *alpha_src_factor;
+   const struct value_name_pair *alpha_dst_factor;
+   struct pipe_blend_state blend;
+   unsigned long i;
+   bool success = TRUE;
+
+   for(i = 0; i < n; ++i) {
+      rgb_func = &blend_funcs[random() % num_funcs];
+      alpha_func = &blend_funcs[random() % num_funcs];
+      rgb_src_factor = &blend_factors[random() % num_factors];
+      alpha_src_factor = &blend_factors[random() % num_factors];
+      
+      do {
+         rgb_dst_factor = &blend_factors[random() % num_factors];
+      } while(rgb_dst_factor->value == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
+
+      do {
+         alpha_dst_factor = &blend_factors[random() % num_factors];
+      } while(alpha_dst_factor->value == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
+
+      if(verbose >= 1)
+         fprintf(stderr, 
+                 "%s=%s %s=%s %s=%s %s=%s %s=%s %s=%s ...\n",
+                 "rgb_func",         rgb_func->name,
+                 "rgb_src_factor",   rgb_src_factor->name,
+                 "rgb_dst_factor",   rgb_dst_factor->name,
+                 "alpha_func",       alpha_func->name,
+                 "alpha_src_factor", alpha_src_factor->name,
+                 "alpha_dst_factor", alpha_dst_factor->name);
+
+      memset(&blend, 0, sizeof blend);
+      blend.blend_enable      = 1;
+      blend.rgb_func          = rgb_func->value;
+      blend.rgb_src_factor    = rgb_src_factor->value;
+      blend.rgb_dst_factor    = rgb_dst_factor->value;
+      blend.alpha_func        = alpha_func->value;
+      blend.alpha_src_factor  = alpha_src_factor->value;
+      blend.alpha_dst_factor  = alpha_dst_factor->value;
+
+      if(!test_one(&blend))
+        success = FALSE;
+
+   }
+
+   return success;
+}
+
+
+int main(int argc, char **argv)
+{
+   unsigned long n = 1000;
+   unsigned i;
+   boolean success;
+
+   for(i = 1; i < argc; ++i) {
+      if(strcmp(argv[i], "-v") == 0)
+         ++verbose;
+      else
+         n = atoi(argv[i]);
+   }
+      
+   if(n)
+      success = test_some(n);
+   else
+      success = test_all();
+
+   return success ? 0 : 1;
+}
-- 
cgit v1.2.3


From 7ace0b1f4ac48caaa8c477d0641cfc7c4d9518c0 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 1 Aug 2009 18:35:04 +0100
Subject: llvmpipe: Cleanup test programs.

---
 src/gallium/drivers/llvmpipe/SConscript       |   4 +-
 src/gallium/drivers/llvmpipe/lp_bld_test.c    | 254 ------------------------
 src/gallium/drivers/llvmpipe/lp_test_blend.c  |   7 +-
 src/gallium/drivers/llvmpipe/lp_test_format.c | 271 ++++++++++++++++++++++++++
 4 files changed, 278 insertions(+), 258 deletions(-)
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_test.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_test_format.c

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 4a365e0cb3..8d4d6736fc 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -54,8 +54,8 @@ env.ParseConfig('llvm-config --libs jit interpreter nativecodegen bitwriter')
 env.Prepend(LIBS = [llvmpipe] + auxiliaries)
 
 env.Program(
-    target = 'lp_bld_test',
-    source = ['lp_bld_test.c'],
+    target = 'lp_test_format',
+    source = ['lp_test_format.c'],
 )
 
 env.Program(
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_test.c b/src/gallium/drivers/llvmpipe/lp_bld_test.c
deleted file mode 100644
index 1f09310267..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_bld_test.c
+++ /dev/null
@@ -1,254 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include <stdlib.h>
-#include <stdio.h>
-
-#include <llvm-c/Core.h>
-#include <llvm-c/Analysis.h>
-#include <llvm-c/ExecutionEngine.h>
-#include <llvm-c/Target.h>
-#include <llvm-c/Transforms/Scalar.h>
-
-#include "util/u_format.h"
-
-#include "lp_bld.h"
-
-
-struct pixel_test_case
-{
-   enum pipe_format format;
-   uint32_t packed;
-   double unpacked[4];
-};
-
-
-struct pixel_test_case test_cases[] =
-{
-   {PIPE_FORMAT_R5G6B5_UNORM,   0x0000, {0.0, 0.0, 0.0, 1.0}},
-   {PIPE_FORMAT_R5G6B5_UNORM,   0x001f, {0.0, 0.0, 1.0, 1.0}},
-   {PIPE_FORMAT_R5G6B5_UNORM,   0x07e0, {0.0, 1.0, 0.0, 1.0}},
-   {PIPE_FORMAT_R5G6B5_UNORM,   0xf800, {1.0, 0.0, 0.0, 1.0}},
-   {PIPE_FORMAT_R5G6B5_UNORM,   0xffff, {1.0, 1.0, 1.0, 1.0}},
-
-   {PIPE_FORMAT_A1R5G5B5_UNORM, 0x0000, {0.0, 0.0, 0.0, 0.0}},
-   {PIPE_FORMAT_A1R5G5B5_UNORM, 0x001f, {0.0, 0.0, 1.0, 0.0}},
-   {PIPE_FORMAT_A1R5G5B5_UNORM, 0x03e0, {0.0, 1.0, 0.0, 0.0}},
-   {PIPE_FORMAT_A1R5G5B5_UNORM, 0x7c00, {1.0, 0.0, 0.0, 0.0}},
-   {PIPE_FORMAT_A1R5G5B5_UNORM, 0x8000, {0.0, 0.0, 0.0, 1.0}},
-   {PIPE_FORMAT_A1R5G5B5_UNORM, 0xffff, {1.0, 1.0, 1.0, 1.0}},
-
-   {PIPE_FORMAT_A8R8G8B8_UNORM, 0x00000000, {0.0, 0.0, 0.0, 0.0}},
-   {PIPE_FORMAT_A8R8G8B8_UNORM, 0x000000ff, {0.0, 0.0, 1.0, 0.0}},
-   {PIPE_FORMAT_A8R8G8B8_UNORM, 0x0000ff00, {0.0, 1.0, 0.0, 0.0}},
-   {PIPE_FORMAT_A8R8G8B8_UNORM, 0x00ff0000, {1.0, 0.0, 0.0, 0.0}},
-   {PIPE_FORMAT_A8R8G8B8_UNORM, 0xff000000, {0.0, 0.0, 0.0, 1.0}},
-   {PIPE_FORMAT_A8R8G8B8_UNORM, 0xffffffff, {1.0, 1.0, 1.0, 1.0}},
-
-#if 0
-   {PIPE_FORMAT_R8G8B8A8_UNORM, 0x00000000, {0.0, 0.0, 0.0, 0.0}},
-   {PIPE_FORMAT_R8G8B8A8_UNORM, 0x000000ff, {0.0, 0.0, 0.0, 1.0}},
-   {PIPE_FORMAT_R8G8B8A8_UNORM, 0x0000ff00, {0.0, 0.0, 1.0, 0.0}},
-   {PIPE_FORMAT_R8G8B8A8_UNORM, 0x00ff0000, {0.0, 1.0, 0.0, 0.0}},
-   {PIPE_FORMAT_R8G8B8A8_UNORM, 0xff000000, {1.0, 0.0, 0.0, 0.0}},
-   {PIPE_FORMAT_R8G8B8A8_UNORM, 0xffffffff, {1.0, 1.0, 1.0, 1.0}},
-#endif
-
-   {PIPE_FORMAT_B8G8R8A8_UNORM, 0x00000000, {0.0, 0.0, 0.0, 0.0}},
-   {PIPE_FORMAT_B8G8R8A8_UNORM, 0x000000ff, {0.0, 0.0, 0.0, 1.0}},
-   {PIPE_FORMAT_B8G8R8A8_UNORM, 0x0000ff00, {1.0, 0.0, 0.0, 0.0}},
-   {PIPE_FORMAT_B8G8R8A8_UNORM, 0x00ff0000, {0.0, 1.0, 0.0, 0.0}},
-   {PIPE_FORMAT_B8G8R8A8_UNORM, 0xff000000, {0.0, 0.0, 1.0, 0.0}},
-   {PIPE_FORMAT_B8G8R8A8_UNORM, 0xffffffff, {1.0, 1.0, 1.0, 1.0}},
-};
-
-
-static LLVMValueRef
-add_load_rgba_test(LLVMModuleRef module,
-                   enum pipe_format format)
-{
-   LLVMTypeRef args[] = {
-      LLVMPointerType(LLVMInt8Type(), 0),
-      LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0)
-   };
-   LLVMValueRef func = LLVMAddFunction(module, "load", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
-   LLVMSetFunctionCallConv(func, LLVMCCallConv);
-   LLVMValueRef ptr = LLVMGetParam(func, 0);
-   LLVMValueRef rgba_ptr = LLVMGetParam(func, 1);
-
-   LLVMBasicBlockRef block = LLVMAppendBasicBlock(func, "entry");
-   LLVMBuilderRef builder = LLVMCreateBuilder();
-   LLVMPositionBuilderAtEnd(builder, block);
-
-   LLVMValueRef rgba;
-
-   struct lp_build_loop_state loop;
-
-   lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), &loop);
-
-   rgba = lp_build_load_rgba(builder, format, ptr);
-   LLVMBuildStore(builder, rgba, rgba_ptr);
-
-   lp_build_loop_end(builder, LLVMConstInt(LLVMInt32Type(), 4, 0), NULL, &loop);
-
-   LLVMBuildRetVoid(builder);
-
-   LLVMDisposeBuilder(builder);
-   return func;
-}
-
-
-static LLVMValueRef
-add_store_rgba_test(LLVMModuleRef module,
-                    enum pipe_format format)
-{
-   LLVMTypeRef args[] = {
-      LLVMPointerType(LLVMInt8Type(), 0),
-      LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0)
-   };
-   LLVMValueRef func = LLVMAddFunction(module, "store", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
-   LLVMSetFunctionCallConv(func, LLVMCCallConv);
-   LLVMValueRef ptr = LLVMGetParam(func, 0);
-   LLVMValueRef rgba_ptr = LLVMGetParam(func, 1);
-
-   LLVMBasicBlockRef block = LLVMAppendBasicBlock(func, "entry");
-   LLVMBuilderRef builder = LLVMCreateBuilder();
-   LLVMPositionBuilderAtEnd(builder, block);
-
-   LLVMValueRef rgba;
-
-   rgba = LLVMBuildLoad(builder, rgba_ptr, "");
-
-   lp_build_store_rgba(builder, format, ptr, rgba);
-
-   LLVMBuildRetVoid(builder);
-
-   LLVMDisposeBuilder(builder);
-   return func;
-}
-
-
-static boolean
-test_format(const struct pixel_test_case *test)
-{
-   char *error = NULL;
-   const struct util_format_description *desc;
-   
-   desc = util_format_description(test->format);
-   fprintf(stderr, "%s\n", desc->name);
-
-   LLVMModuleRef module = LLVMModuleCreateWithName("test");
-
-   LLVMValueRef load = add_load_rgba_test(module, test->format);
-   LLVMValueRef store = add_store_rgba_test(module, test->format);
-
-   LLVMVerifyModule(module, LLVMAbortProcessAction, &error);
-   LLVMDisposeMessage(error);
-
-   LLVMExecutionEngineRef engine;
-   LLVMModuleProviderRef provider = LLVMCreateModuleProviderForExistingModule(module);
-   error = NULL;
-   LLVMCreateJITCompiler(&engine, provider, 1, &error);
-   if (error) {
-      fprintf(stderr, "%s\n", error);
-      LLVMDisposeMessage(error);
-      abort();
-   }
-
-   LLVMPassManagerRef pass = LLVMCreatePassManager();
-#if 0
-   LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
-   /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
-    * but there are more on SVN. */
-   LLVMAddConstantPropagationPass(pass);
-   LLVMAddInstructionCombiningPass(pass);
-   LLVMAddPromoteMemoryToRegisterPass(pass);
-   LLVMAddDemoteMemoryToRegisterPass(pass);
-   LLVMAddGVNPass(pass);
-   LLVMAddCFGSimplificationPass(pass);
-   LLVMRunPassManager(pass, module);
-   LLVMDumpModule(module);
-#endif
-
-
-   float unpacked[4] = {0, 0, 0, 0};
-   unsigned packed = 0;
-
-   {
-      typedef void (*load_ptr_t)(const void *, float *);
-      load_ptr_t load_ptr = (load_ptr_t)LLVMGetPointerToGlobal(engine, load);
-
-      load_ptr(&test->packed, unpacked);
-
-   }
-
-
-   {
-      typedef void (*store_ptr_t)(void *, const float *);
-      store_ptr_t store_ptr = (store_ptr_t)LLVMGetPointerToGlobal(engine, store);
-
-      store_ptr(&packed, unpacked);
-
-   }
-
-   boolean success = TRUE;
-   unsigned i;
-   if(test->packed != packed)
-      success = FALSE;
-   for(i = 0; i < 4; ++i)
-      if(test->unpacked[i] != unpacked[i])
-         success = FALSE;
-
-   if (!success) {
-      printf("FAILED\n");
-      printf("  Packed: %08x\n", test->packed);
-      printf("          %08x\n", packed);
-      printf("  Unpacked: %f %f %f %f\n", unpacked[0], unpacked[1], unpacked[2], unpacked[3]);
-      printf("            %f %f %f %f\n", test->unpacked[0], test->unpacked[1], test->unpacked[2], test->unpacked[3]);
-      LLVMDumpModule(module);
-   }
-
-   LLVMDisposePassManager(pass);
-   LLVMDisposeExecutionEngine(engine);
-   //LLVMDisposeModule(module);
-
-   return success;
-}
-
-
-int main(int argc, char **argv)
-{
-   unsigned i;
-   int ret;
-
-   for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i)
-      if(!test_format(&test_cases[i]))
-        ret = 1;
-
-   return ret;
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index 1a1313a884..1621fa79ab 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -369,9 +369,9 @@ test_one(const struct pipe_blend_state *blend)
 
    if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
       LLVMDumpModule(module);
-      LLVMDisposeMessage(error);
       abort();
    }
+   LLVMDisposeMessage(error);
 
    provider = LLVMCreateModuleProviderForExistingModule(module);
    if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
@@ -432,8 +432,11 @@ test_one(const struct pipe_blend_state *blend)
       }
    }
 
+   LLVMFreeMachineCodeForFunction(engine, func);
+
    LLVMDisposeExecutionEngine(engine);
-   //LLVMDisposeModule(module);
+   if(pass)
+      LLVMDisposePassManager(pass);
 
    return success;
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
new file mode 100644
index 0000000000..3086bf871b
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -0,0 +1,271 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <llvm-c/Core.h>
+#include <llvm-c/Analysis.h>
+#include <llvm-c/ExecutionEngine.h>
+#include <llvm-c/Target.h>
+#include <llvm-c/Transforms/Scalar.h>
+
+#include "util/u_format.h"
+
+#include "lp_bld.h"
+
+
+struct pixel_test_case
+{
+   enum pipe_format format;
+   uint32_t packed;
+   double unpacked[4];
+};
+
+
+struct pixel_test_case test_cases[] =
+{
+   {PIPE_FORMAT_R5G6B5_UNORM,   0x0000, {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R5G6B5_UNORM,   0x001f, {0.0, 0.0, 1.0, 1.0}},
+   {PIPE_FORMAT_R5G6B5_UNORM,   0x07e0, {0.0, 1.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R5G6B5_UNORM,   0xf800, {1.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R5G6B5_UNORM,   0xffff, {1.0, 1.0, 1.0, 1.0}},
+
+   {PIPE_FORMAT_A1R5G5B5_UNORM, 0x0000, {0.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_A1R5G5B5_UNORM, 0x001f, {0.0, 0.0, 1.0, 0.0}},
+   {PIPE_FORMAT_A1R5G5B5_UNORM, 0x03e0, {0.0, 1.0, 0.0, 0.0}},
+   {PIPE_FORMAT_A1R5G5B5_UNORM, 0x7c00, {1.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_A1R5G5B5_UNORM, 0x8000, {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_A1R5G5B5_UNORM, 0xffff, {1.0, 1.0, 1.0, 1.0}},
+
+   {PIPE_FORMAT_A8R8G8B8_UNORM, 0x00000000, {0.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_A8R8G8B8_UNORM, 0x000000ff, {0.0, 0.0, 1.0, 0.0}},
+   {PIPE_FORMAT_A8R8G8B8_UNORM, 0x0000ff00, {0.0, 1.0, 0.0, 0.0}},
+   {PIPE_FORMAT_A8R8G8B8_UNORM, 0x00ff0000, {1.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_A8R8G8B8_UNORM, 0xff000000, {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_A8R8G8B8_UNORM, 0xffffffff, {1.0, 1.0, 1.0, 1.0}},
+
+#if 0
+   {PIPE_FORMAT_R8G8B8A8_UNORM, 0x00000000, {0.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_R8G8B8A8_UNORM, 0x000000ff, {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_R8G8B8A8_UNORM, 0x0000ff00, {0.0, 0.0, 1.0, 0.0}},
+   {PIPE_FORMAT_R8G8B8A8_UNORM, 0x00ff0000, {0.0, 1.0, 0.0, 0.0}},
+   {PIPE_FORMAT_R8G8B8A8_UNORM, 0xff000000, {1.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_R8G8B8A8_UNORM, 0xffffffff, {1.0, 1.0, 1.0, 1.0}},
+#endif
+
+   {PIPE_FORMAT_B8G8R8A8_UNORM, 0x00000000, {0.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_B8G8R8A8_UNORM, 0x000000ff, {0.0, 0.0, 0.0, 1.0}},
+   {PIPE_FORMAT_B8G8R8A8_UNORM, 0x0000ff00, {1.0, 0.0, 0.0, 0.0}},
+   {PIPE_FORMAT_B8G8R8A8_UNORM, 0x00ff0000, {0.0, 1.0, 0.0, 0.0}},
+   {PIPE_FORMAT_B8G8R8A8_UNORM, 0xff000000, {0.0, 0.0, 1.0, 0.0}},
+   {PIPE_FORMAT_B8G8R8A8_UNORM, 0xffffffff, {1.0, 1.0, 1.0, 1.0}},
+};
+
+
+typedef void (*load_ptr_t)(const void *, float *);
+
+
+static LLVMValueRef
+add_load_rgba_test(LLVMModuleRef module,
+                   enum pipe_format format)
+{
+   LLVMTypeRef args[2];
+   LLVMValueRef func;
+   LLVMValueRef ptr;
+   LLVMValueRef rgba_ptr;
+   LLVMBasicBlockRef block;
+   LLVMBuilderRef builder;
+   LLVMValueRef rgba;
+   struct lp_build_loop_state loop;
+
+   args[0] = LLVMPointerType(LLVMInt8Type(), 0);
+   args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
+
+   func = LLVMAddFunction(module, "load", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
+   LLVMSetFunctionCallConv(func, LLVMCCallConv);
+   ptr = LLVMGetParam(func, 0);
+   rgba_ptr = LLVMGetParam(func, 1);
+
+   block = LLVMAppendBasicBlock(func, "entry");
+   builder = LLVMCreateBuilder();
+   LLVMPositionBuilderAtEnd(builder, block);
+
+   lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), &loop);
+
+   rgba = lp_build_load_rgba(builder, format, ptr);
+   LLVMBuildStore(builder, rgba, rgba_ptr);
+
+   lp_build_loop_end(builder, LLVMConstInt(LLVMInt32Type(), 4, 0), NULL, &loop);
+
+   LLVMBuildRetVoid(builder);
+
+   LLVMDisposeBuilder(builder);
+   return func;
+}
+
+
+typedef void (*store_ptr_t)(void *, const float *);
+
+
+static LLVMValueRef
+add_store_rgba_test(LLVMModuleRef module,
+                    enum pipe_format format)
+{
+   LLVMTypeRef args[2];
+   LLVMValueRef func;
+   LLVMValueRef ptr;
+   LLVMValueRef rgba_ptr;
+   LLVMBasicBlockRef block;
+   LLVMBuilderRef builder;
+   LLVMValueRef rgba;
+
+   args[0] = LLVMPointerType(LLVMInt8Type(), 0);
+   args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
+
+   func = LLVMAddFunction(module, "store", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
+   LLVMSetFunctionCallConv(func, LLVMCCallConv);
+   ptr = LLVMGetParam(func, 0);
+   rgba_ptr = LLVMGetParam(func, 1);
+
+   block = LLVMAppendBasicBlock(func, "entry");
+   builder = LLVMCreateBuilder();
+   LLVMPositionBuilderAtEnd(builder, block);
+
+   rgba = LLVMBuildLoad(builder, rgba_ptr, "");
+
+   lp_build_store_rgba(builder, format, ptr, rgba);
+
+   LLVMBuildRetVoid(builder);
+
+   LLVMDisposeBuilder(builder);
+   return func;
+}
+
+
+static boolean
+test_format(const struct pixel_test_case *test)
+{
+   LLVMModuleRef module = NULL;
+   LLVMValueRef load = NULL;
+   LLVMValueRef store = NULL;
+   LLVMExecutionEngineRef engine = NULL;
+   LLVMModuleProviderRef provider = NULL;
+   LLVMPassManagerRef pass = NULL;
+   char *error = NULL;
+   const struct util_format_description *desc;
+   load_ptr_t load_ptr;
+   store_ptr_t store_ptr;
+   float unpacked[4];
+   unsigned packed;
+   boolean success;
+   unsigned i;
+
+   desc = util_format_description(test->format);
+   fprintf(stderr, "%s\n", desc->name);
+
+   module = LLVMModuleCreateWithName("test");
+
+   load = add_load_rgba_test(module, test->format);
+   store = add_store_rgba_test(module, test->format);
+
+   if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
+      LLVMDumpModule(module);
+      abort();
+   }
+   LLVMDisposeMessage(error);
+
+   provider = LLVMCreateModuleProviderForExistingModule(module);
+   if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
+      fprintf(stderr, "%s\n", error);
+      LLVMDisposeMessage(error);
+      abort();
+   }
+
+#if 0
+   pass = LLVMCreatePassManager();
+   LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
+   /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+    * but there are more on SVN. */
+   LLVMAddConstantPropagationPass(pass);
+   LLVMAddInstructionCombiningPass(pass);
+   LLVMAddPromoteMemoryToRegisterPass(pass);
+   LLVMAddGVNPass(pass);
+   LLVMAddCFGSimplificationPass(pass);
+   LLVMRunPassManager(pass, module);
+#else
+   (void)pass;
+#endif
+
+   load_ptr  = (load_ptr_t) LLVMGetPointerToGlobal(engine, load);
+   store_ptr = (store_ptr_t)LLVMGetPointerToGlobal(engine, store);
+
+   memset(unpacked, 0, sizeof unpacked);
+   packed = 0;
+
+   load_ptr(&test->packed, unpacked);
+   store_ptr(&packed, unpacked);
+
+   success = TRUE;
+   if(test->packed != packed)
+      success = FALSE;
+   for(i = 0; i < 4; ++i)
+      if(test->unpacked[i] != unpacked[i])
+         success = FALSE;
+
+   if (!success) {
+      printf("FAILED\n");
+      printf("  Packed: %08x\n", test->packed);
+      printf("          %08x\n", packed);
+      printf("  Unpacked: %f %f %f %f\n", unpacked[0], unpacked[1], unpacked[2], unpacked[3]);
+      printf("            %f %f %f %f\n", test->unpacked[0], test->unpacked[1], test->unpacked[2], test->unpacked[3]);
+      LLVMDumpModule(module);
+   }
+
+   LLVMFreeMachineCodeForFunction(engine, store);
+   LLVMFreeMachineCodeForFunction(engine, load);
+
+   LLVMDisposeExecutionEngine(engine);
+   if(pass)
+      LLVMDisposePassManager(pass);
+
+   return success;
+}
+
+
+int main(int argc, char **argv)
+{
+   unsigned i;
+   int ret;
+
+   for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i)
+      if(!test_format(&test_cases[i]))
+        ret = 1;
+
+   return ret;
+}
-- 
cgit v1.2.3


From c87fab0008453567b45dd5e5eb7dd5d026990071 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 3 Aug 2009 22:24:01 +0100
Subject: llvmpipe: Move type support functions into a separate file.

---
 src/gallium/drivers/llvmpipe/SConscript      |   1 +
 src/gallium/drivers/llvmpipe/lp_bld_arit.c   | 112 +--------------------
 src/gallium/drivers/llvmpipe/lp_bld_arit.h   |  94 +-----------------
 src/gallium/drivers/llvmpipe/lp_bld_blend.c  |   1 +
 src/gallium/drivers/llvmpipe/lp_bld_type.c   | 142 +++++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_type.h   | 131 ++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_test_blend.c |   1 +
 7 files changed, 278 insertions(+), 204 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_type.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_type.h

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 8d4d6736fc..615a885cc5 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -18,6 +18,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_loop.c',
 		'lp_bld_logicop.c',
 		'lp_bld_blend.c',
+		'lp_bld_type.c',
 		'lp_clear.c',
 		'lp_context.c',
 		'lp_draw_arrays.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index db0db02c15..36b266a45a 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -47,120 +47,10 @@
 
 #include "pipe/p_state.h"
 
+#include "lp_bld_type.h"
 #include "lp_bld_arit.h"
 
 
-LLVMTypeRef
-lp_build_elem_type(union lp_type type)
-{
-   if (type.floating) {
-      assert(type.sign);
-      switch(type.width) {
-         case 32:
-         return LLVMFloatType();
-         break;
-      case 64:
-         return LLVMDoubleType();
-         break;
-      default:
-         assert(0);
-         return LLVMFloatType();
-      }
-   }
-   else {
-      return LLVMIntType(type.width);
-   }
-}
-
-
-LLVMTypeRef
-lp_build_vec_type(union lp_type type)
-{
-   LLVMTypeRef elem_type = lp_build_elem_type(type);
-   return LLVMVectorType(elem_type, type.length);
-}
-
-
-/**
- * This function is a mirrot of lp_build_elem_type() above.
- *
- * XXX: I'm not sure if it wouldn't be easier/efficient to just recreate the
- * type and check for identity.
- */
-boolean
-lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type) 
-{
-   LLVMTypeKind elem_kind;
-
-   assert(elem_type);
-   if(!elem_type)
-      return FALSE;
-
-   elem_kind = LLVMGetTypeKind(elem_type);
-
-   if (type.floating) {
-      switch(type.width) {
-      case 32:
-         if(elem_kind != LLVMFloatTypeKind)
-            return FALSE;
-         break;
-      case 64:
-         if(elem_kind != LLVMDoubleTypeKind)
-            return FALSE;
-         break;
-      default:
-         assert(0);
-         return FALSE;
-      }
-   }
-   else {
-      if(elem_kind != LLVMIntegerTypeKind)
-         return FALSE;
-
-      if(LLVMGetIntTypeWidth(elem_type) != type.width)
-         return FALSE;
-   }
-
-   return TRUE; 
-}
-
-
-boolean
-lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type) 
-{
-   LLVMTypeRef elem_type;
-
-   assert(vec_type);
-   if(!vec_type)
-      return FALSE;
-
-   if(LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind)
-      return FALSE;
-
-   if(LLVMGetVectorSize(vec_type) != type.length)
-      return FALSE;
-
-   elem_type = LLVMGetElementType(vec_type);
-
-   return lp_check_elem_type(type, elem_type);
-}
-
-
-boolean
-lp_check_value(union lp_type type, LLVMValueRef val) 
-{
-   LLVMTypeRef vec_type;
-
-   assert(val);
-   if(!val)
-      return FALSE;
-
-   vec_type = LLVMTypeOf(val);
-
-   return lp_check_vec_type(type, vec_type);
-}
-
-
 LLVMValueRef
 lp_build_undef(union lp_type type)
 {
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
index 795b816507..c437d2bcd0 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
@@ -39,100 +39,8 @@
 
 #include <llvm-c/Core.h>  
 
- 
-#define LP_MAX_VECTOR_LENGTH 16
 
-
-/*
- * Types
- */
-
-
-enum lp_type_kind {
-   LP_TYPE_INTEGER = 0,
-   LP_TYPE_FLOAT = 1,
-   LP_TYPE_FIXED = 2
-};
-
-
-/**
- * The LLVM type system can't conveniently express all the things we care about
- * on the types used for intermediate computations, such as signed vs unsigned,
- * normalized values, or fixed point.
- */
-union lp_type {
-   struct {
-      /** 
-       * Integer. floating-point, or fixed point as established by the
-       * lp_build_type_kind enum above.
-       */
-      unsigned floating:1;
-
-      /** 
-       * Integer. floating-point, or fixed point as established by the
-       * lp_build_type_kind enum above.
-       */
-      unsigned fixed:1;
-      
-      /** 
-       * Whether it can represent negative values or not.
-       *
-       * Floating point values 
-       */
-      unsigned sign:1;
-
-      /**
-       * Whether values are normalized to fit [0, 1] interval, or [-1, 1] interval for
-       * signed types.
-       *
-       * For integer types it means the representable integer range should be
-       * interpreted as the interval above.
-       *
-       * For floating and fixed point formats it means the values should be
-       * clamped to the interval above.
-       */
-      unsigned norm:1;
-
-      /**
-       * Element width.
-       *
-       * For fixed point values, the fixed point is assumed to be at half the width.
-       */
-      unsigned width:14;
-
-      /** 
-       * Vector length.
-       *
-       * width*length should be a power of two greater or equal to height.
-       *
-       * Several functions can only cope with vectors of length up to
-       * LP_MAX_VECTOR_LENGTH, so you may need to increase that value if you
-       * want to represent bigger vectors.
-       */
-      unsigned length:14;
-   };
-   uint32_t value;
-};
-
-
-LLVMTypeRef
-lp_build_elem_type(union lp_type type);
-
-
-LLVMTypeRef
-lp_build_vec_type(union lp_type type);
-
-
-boolean
-lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type);
-
-
-boolean
-lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type);
-
-
-boolean
-lp_check_value(union lp_type type, LLVMValueRef val);
+union lp_type type;
 
 
 /*
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.c b/src/gallium/drivers/llvmpipe/lp_bld_blend.c
index 31dbee7d6e..2c5e67418f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.c
@@ -40,6 +40,7 @@
 #include "pipe/p_state.h"
 
 #include "lp_bld.h"
+#include "lp_bld_type.h"
 #include "lp_bld_arit.h"
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c
new file mode 100644
index 0000000000..e2abd04f60
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c
@@ -0,0 +1,142 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_debug.h"
+
+#include "lp_bld_type.h"
+
+
+LLVMTypeRef
+lp_build_elem_type(union lp_type type)
+{
+   if (type.floating) {
+      assert(type.sign);
+      switch(type.width) {
+         case 32:
+         return LLVMFloatType();
+         break;
+      case 64:
+         return LLVMDoubleType();
+         break;
+      default:
+         assert(0);
+         return LLVMFloatType();
+      }
+   }
+   else {
+      return LLVMIntType(type.width);
+   }
+}
+
+
+LLVMTypeRef
+lp_build_vec_type(union lp_type type)
+{
+   LLVMTypeRef elem_type = lp_build_elem_type(type);
+   return LLVMVectorType(elem_type, type.length);
+}
+
+
+/**
+ * This function is a mirrot of lp_build_elem_type() above.
+ *
+ * XXX: I'm not sure if it wouldn't be easier/efficient to just recreate the
+ * type and check for identity.
+ */
+boolean
+lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type) 
+{
+   LLVMTypeKind elem_kind;
+
+   assert(elem_type);
+   if(!elem_type)
+      return FALSE;
+
+   elem_kind = LLVMGetTypeKind(elem_type);
+
+   if (type.floating) {
+      switch(type.width) {
+      case 32:
+         if(elem_kind != LLVMFloatTypeKind)
+            return FALSE;
+         break;
+      case 64:
+         if(elem_kind != LLVMDoubleTypeKind)
+            return FALSE;
+         break;
+      default:
+         assert(0);
+         return FALSE;
+      }
+   }
+   else {
+      if(elem_kind != LLVMIntegerTypeKind)
+         return FALSE;
+
+      if(LLVMGetIntTypeWidth(elem_type) != type.width)
+         return FALSE;
+   }
+
+   return TRUE; 
+}
+
+
+boolean
+lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type) 
+{
+   LLVMTypeRef elem_type;
+
+   assert(vec_type);
+   if(!vec_type)
+      return FALSE;
+
+   if(LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind)
+      return FALSE;
+
+   if(LLVMGetVectorSize(vec_type) != type.length)
+      return FALSE;
+
+   elem_type = LLVMGetElementType(vec_type);
+
+   return lp_check_elem_type(type, elem_type);
+}
+
+
+boolean
+lp_check_value(union lp_type type, LLVMValueRef val) 
+{
+   LLVMTypeRef vec_type;
+
+   assert(val);
+   if(!val)
+      return FALSE;
+
+   vec_type = LLVMTypeOf(val);
+
+   return lp_check_vec_type(type, vec_type);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h
new file mode 100644
index 0000000000..4623183223
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h
@@ -0,0 +1,131 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Convenient representation of SIMD types.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifndef LP_BLD_TYPE_H
+#define LP_BLD_TYPE_H
+
+
+#include <llvm-c/Core.h>  
+
+#include <pipe/p_compiler.h>
+
+
+/**
+ * Several functions can only cope with vectors of length up to this value.
+ * You may need to increase that value if you want to represent bigger vectors.
+ */
+#define LP_MAX_VECTOR_LENGTH 16
+
+
+/**
+ * The LLVM type system can't conveniently express all the things we care about
+ * on the types used for intermediate computations, such as signed vs unsigned,
+ * normalized values, or fixed point.
+ */
+union lp_type {
+   struct {
+      /** 
+       * Floating-point. Cannot be used with fixed. Integer numbers are
+       * represented by this zero.
+       */
+      unsigned floating:1;
+
+      /** 
+       * Fixed-point. Cannot be used with floating. Integer numbers are
+       * represented by this zero.
+       */
+      unsigned fixed:1;
+      
+      /** 
+       * Whether it can represent negative values or not.
+       *
+       * Floating point values should always have this bit set.
+       */
+      unsigned sign:1;
+
+      /**
+       * Whether values are normalized to fit [0, 1] interval, or [-1, 1]
+       * interval for signed types.
+       *
+       * For integer types it means the representable integer range should be
+       * interpreted as the interval above.
+       *
+       * For floating and fixed point formats it means the values should be
+       * clamped to the interval above.
+       */
+      unsigned norm:1;
+
+      /**
+       * Element width.
+       *
+       * For fixed point values, the fixed point is assumed to be at half the
+       * width.
+       */
+      unsigned width:14;
+
+      /** 
+       * Vector length.
+       *
+       * width*length should be a power of two greater or equal to eight.
+       *
+       * @sa LP_MAX_VECTOR_LENGTH
+       */
+      unsigned length:14;
+   };
+   uint32_t value;
+};
+
+
+LLVMTypeRef
+lp_build_elem_type(union lp_type type);
+
+
+LLVMTypeRef
+lp_build_vec_type(union lp_type type);
+
+
+boolean
+lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type);
+
+
+boolean
+lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type);
+
+
+boolean
+lp_check_value(union lp_type type, LLVMValueRef val);
+
+
+#endif /* !LP_BLD_TYPE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index cf641c1b67..60ba8d839c 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -53,6 +53,7 @@
 #include "util/u_math.h"
 
 #include "lp_bld.h"
+#include "lp_bld_type.h"
 #include "lp_bld_arit.h"
 
 
-- 
cgit v1.2.3


From 64611e086dbefa2003773ab541c0381b5713e18d Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 3 Aug 2009 22:31:08 +0100
Subject: llvmpipe: Separate constant building.

---
 src/gallium/drivers/llvmpipe/SConscript     |   1 +
 src/gallium/drivers/llvmpipe/lp_bld_arit.c  | 107 +-------------------
 src/gallium/drivers/llvmpipe/lp_bld_arit.h  |  39 +++-----
 src/gallium/drivers/llvmpipe/lp_bld_blend.c |   5 +-
 src/gallium/drivers/llvmpipe/lp_bld_const.c | 145 ++++++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_const.h |  64 ++++++++++++
 6 files changed, 226 insertions(+), 135 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_const.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_const.h

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 615a885cc5..71c55a93ab 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -11,6 +11,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_fs_sse.c',
 		'lp_fs_llvm.c',
 		'lp_bld_arit.c',
+		'lp_bld_const.c',
 		'lp_bld_pack.c',
 		'lp_bld_unpack.c',
 		'lp_bld_load.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index 36b266a45a..5dc1b7c968 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -45,117 +45,12 @@
  */
 
 
-#include "pipe/p_state.h"
+#include "util/u_debug.h"
 
 #include "lp_bld_type.h"
 #include "lp_bld_arit.h"
 
 
-LLVMValueRef
-lp_build_undef(union lp_type type)
-{
-   LLVMTypeRef vec_type = lp_build_vec_type(type);
-   return LLVMGetUndef(vec_type);
-}
-               
-
-LLVMValueRef
-lp_build_zero(union lp_type type)
-{
-   LLVMTypeRef vec_type = lp_build_vec_type(type);
-   return LLVMConstNull(vec_type);
-}
-               
-
-LLVMValueRef
-lp_build_one(union lp_type type)
-{
-   LLVMTypeRef elem_type;
-   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
-   unsigned i;
-
-   assert(type.length <= LP_MAX_VECTOR_LENGTH);
-
-   elem_type = lp_build_elem_type(type);
-
-   if(type.floating)
-      elems[0] = LLVMConstReal(elem_type, 1.0);
-   else if(type.fixed)
-      elems[0] = LLVMConstInt(elem_type, 1LL << (type.width/2), 0);
-   else if(!type.norm)
-      elems[0] = LLVMConstInt(elem_type, 1, 0);
-   else {
-      /* special case' -- 1.0 for normalized types is more easily attained if
-       * we start with a vector consisting of all bits set */
-      LLVMTypeRef vec_type = LLVMVectorType(elem_type, type.length);
-      LLVMValueRef vec = LLVMConstAllOnes(vec_type);
-
-      if(type.sign)
-         vec = LLVMConstLShr(vec, LLVMConstInt(LLVMInt32Type(), 1, 0));
-
-      return vec;
-   }
-
-   for(i = 1; i < type.length; ++i)
-      elems[i] = elems[0];
-
-   return LLVMConstVector(elems, type.length);
-}
-               
-
-LLVMValueRef
-lp_build_const_aos(union lp_type type, 
-                   double r, double g, double b, double a, 
-                   const unsigned char *swizzle)
-{
-   const unsigned char default_swizzle[4] = {0, 1, 2, 3};
-   LLVMTypeRef elem_type;
-   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
-   unsigned i;
-
-   assert(type.length % 4 == 0);
-   assert(type.length <= LP_MAX_VECTOR_LENGTH);
-
-   elem_type = lp_build_elem_type(type);
-
-   if(swizzle == NULL)
-      swizzle = default_swizzle;
-
-   if(type.floating) {
-      elems[swizzle[0]] = LLVMConstReal(elem_type, r);
-      elems[swizzle[1]] = LLVMConstReal(elem_type, g);
-      elems[swizzle[2]] = LLVMConstReal(elem_type, b);
-      elems[swizzle[3]] = LLVMConstReal(elem_type, a);
-   }
-   else {
-      unsigned shift;
-      long long llscale;
-      double dscale;
-
-      if(type.fixed)
-         shift = type.width/2;
-      else if(type.norm)
-         shift = type.sign ? type.width - 1 : type.width;
-      else
-         shift = 0;
-
-      llscale = (long long)1 << shift;
-      dscale = (double)llscale;
-      assert((long long)dscale == llscale);
-
-      elems[swizzle[0]] = LLVMConstInt(elem_type, r*dscale + 0.5, 0);
-      elems[swizzle[1]] = LLVMConstInt(elem_type, g*dscale + 0.5, 0);
-      elems[swizzle[2]] = LLVMConstInt(elem_type, b*dscale + 0.5, 0);
-      elems[swizzle[3]] = LLVMConstInt(elem_type, a*dscale + 0.5, 0);
-   }
-
-   for(i = 4; i < type.length; ++i)
-      elems[i] = elems[i % 4];
-
-   return LLVMConstVector(elems, type.length);
-}
-               
-
 static LLVMValueRef
 lp_build_intrinsic_binary(LLVMBuilderRef builder,
                           const char *name,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
index c437d2bcd0..cec54a257f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
@@ -43,43 +43,28 @@
 union lp_type type;
 
 
-/*
- * Constants
- */
-
-
-LLVMValueRef
-lp_build_undef(union lp_type type);
-
-
-LLVMValueRef
-lp_build_zero(union lp_type type);
-
-
-LLVMValueRef
-lp_build_one(union lp_type type);
-
-
-LLVMValueRef
-lp_build_const_aos(union lp_type type, 
-                   double r, double g, double b, double a, 
-                   const unsigned char *swizzle);
-
-/*
- * Basic arithmetic
- */
-
-
 /**
+ * We need most of the information here in order to correctly and efficiently
+ * translate an arithmetic operation into LLVM IR. Putting it here avoids the
+ * trouble of passing it as parameters.
  */
 struct lp_build_context
 {
    LLVMBuilderRef builder;
    
+   /**
+    * This not only describes the input/output LLVM types, but also whether
+    * to normalize/clamp the results.
+    */
    union lp_type type;
 
+   /** Same as lp_build_undef(type) */
    LLVMValueRef undef;
+
+   /** Same as lp_build_zero(type) */
    LLVMValueRef zero;
+
+   /** Same as lp_build_one(type) */
    LLVMValueRef one;
 };
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.c b/src/gallium/drivers/llvmpipe/lp_bld_blend.c
index 2c5e67418f..90afe2e6b6 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.c
@@ -41,12 +41,13 @@
 
 #include "lp_bld.h"
 #include "lp_bld_type.h"
+#include "lp_bld_const.h"
 #include "lp_bld_arit.h"
 
 
 /**
- * We may the same bld several times, so we keep them here to avoid
- * recomputing them. Also reusing the bld allows us to do simplifications
+ * We may the same values several times, so we keep them here to avoid
+ * recomputing them. Also reusing the values allows us to do simplifications
  * that LLVM optimization passes wouldn't normally be able to do.
  */
 struct lp_build_blend_context
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.c b/src/gallium/drivers/llvmpipe/lp_bld_const.c
new file mode 100644
index 0000000000..44fcc467f4
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_const.c
@@ -0,0 +1,145 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Helper functions for constant building.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "util/u_debug.h"
+
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+
+
+LLVMValueRef
+lp_build_undef(union lp_type type)
+{
+   LLVMTypeRef vec_type = lp_build_vec_type(type);
+   return LLVMGetUndef(vec_type);
+}
+               
+
+LLVMValueRef
+lp_build_zero(union lp_type type)
+{
+   LLVMTypeRef vec_type = lp_build_vec_type(type);
+   return LLVMConstNull(vec_type);
+}
+               
+
+LLVMValueRef
+lp_build_one(union lp_type type)
+{
+   LLVMTypeRef elem_type;
+   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
+   unsigned i;
+
+   assert(type.length <= LP_MAX_VECTOR_LENGTH);
+
+   elem_type = lp_build_elem_type(type);
+
+   if(type.floating)
+      elems[0] = LLVMConstReal(elem_type, 1.0);
+   else if(type.fixed)
+      elems[0] = LLVMConstInt(elem_type, 1LL << (type.width/2), 0);
+   else if(!type.norm)
+      elems[0] = LLVMConstInt(elem_type, 1, 0);
+   else {
+      /* special case' -- 1.0 for normalized types is more easily attained if
+       * we start with a vector consisting of all bits set */
+      LLVMTypeRef vec_type = LLVMVectorType(elem_type, type.length);
+      LLVMValueRef vec = LLVMConstAllOnes(vec_type);
+
+      if(type.sign)
+         vec = LLVMConstLShr(vec, LLVMConstInt(LLVMInt32Type(), 1, 0));
+
+      return vec;
+   }
+
+   for(i = 1; i < type.length; ++i)
+      elems[i] = elems[0];
+
+   return LLVMConstVector(elems, type.length);
+}
+               
+
+LLVMValueRef
+lp_build_const_aos(union lp_type type, 
+                   double r, double g, double b, double a, 
+                   const unsigned char *swizzle)
+{
+   const unsigned char default_swizzle[4] = {0, 1, 2, 3};
+   LLVMTypeRef elem_type;
+   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
+   unsigned i;
+
+   assert(type.length % 4 == 0);
+   assert(type.length <= LP_MAX_VECTOR_LENGTH);
+
+   elem_type = lp_build_elem_type(type);
+
+   if(swizzle == NULL)
+      swizzle = default_swizzle;
+
+   if(type.floating) {
+      elems[swizzle[0]] = LLVMConstReal(elem_type, r);
+      elems[swizzle[1]] = LLVMConstReal(elem_type, g);
+      elems[swizzle[2]] = LLVMConstReal(elem_type, b);
+      elems[swizzle[3]] = LLVMConstReal(elem_type, a);
+   }
+   else {
+      unsigned shift;
+      long long llscale;
+      double dscale;
+
+      if(type.fixed)
+         shift = type.width/2;
+      else if(type.norm)
+         shift = type.sign ? type.width - 1 : type.width;
+      else
+         shift = 0;
+
+      llscale = (long long)1 << shift;
+      dscale = (double)llscale;
+      assert((long long)dscale == llscale);
+
+      elems[swizzle[0]] = LLVMConstInt(elem_type, r*dscale + 0.5, 0);
+      elems[swizzle[1]] = LLVMConstInt(elem_type, g*dscale + 0.5, 0);
+      elems[swizzle[2]] = LLVMConstInt(elem_type, b*dscale + 0.5, 0);
+      elems[swizzle[3]] = LLVMConstInt(elem_type, a*dscale + 0.5, 0);
+   }
+
+   for(i = 4; i < type.length; ++i)
+      elems[i] = elems[i % 4];
+
+   return LLVMConstVector(elems, type.length);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.h b/src/gallium/drivers/llvmpipe/lp_bld_const.h
new file mode 100644
index 0000000000..f2e5deca94
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_const.h
@@ -0,0 +1,64 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Helper functions for constant building.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifndef LP_BLD_CONST_H
+#define LP_BLD_CONST_H
+
+
+#include <llvm-c/Core.h>  
+
+
+union lp_type type;
+
+
+LLVMValueRef
+lp_build_undef(union lp_type type);
+
+
+LLVMValueRef
+lp_build_zero(union lp_type type);
+
+
+LLVMValueRef
+lp_build_one(union lp_type type);
+
+
+LLVMValueRef
+lp_build_const_aos(union lp_type type, 
+                   double r, double g, double b, double a, 
+                   const unsigned char *swizzle);
+
+
+#endif /* !LP_BLD_CONST_H */
-- 
cgit v1.2.3


From 1dd7bb17c7331f9ecd0bc830b61ada235a56fe6d Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 4 Aug 2009 12:09:52 +0100
Subject: llvmpipe: Optimize blend swizzles by using bitmasks instead of
 shuffles for ubytes.

---
 src/gallium/drivers/llvmpipe/SConscript       |   1 +
 src/gallium/drivers/llvmpipe/lp_bld_blend.c   |  74 ++------
 src/gallium/drivers/llvmpipe/lp_bld_const.c   |  35 ++++
 src/gallium/drivers/llvmpipe/lp_bld_const.h   |  10 +
 src/gallium/drivers/llvmpipe/lp_bld_swizzle.c | 264 ++++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_swizzle.h |  87 +++++++++
 6 files changed, 416 insertions(+), 55 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_swizzle.h

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 71c55a93ab..85d0a737fa 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -19,6 +19,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_loop.c',
 		'lp_bld_logicop.c',
 		'lp_bld_blend.c',
+		'lp_bld_swizzle.c',
 		'lp_bld_type.c',
 		'lp_clear.c',
 		'lp_context.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.c b/src/gallium/drivers/llvmpipe/lp_bld_blend.c
index 90afe2e6b6..e070aac378 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.c
@@ -43,6 +43,7 @@
 #include "lp_bld_type.h"
 #include "lp_bld_const.h"
 #include "lp_bld_arit.h"
+#include "lp_bld_swizzle.h"
 
 
 /**
@@ -179,67 +180,30 @@ lp_build_blend_swizzle(struct lp_build_blend_context *bld,
                        enum lp_build_blend_swizzle rgb_swizzle,
                        unsigned alpha_swizzle)
 {
-   const unsigned n = bld->base.type.length;
-   LLVMValueRef swizzles[LP_MAX_VECTOR_LENGTH];
-   unsigned i, j;
-
    if(rgb == alpha) {
       if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA)
          return rgb;
-
-      alpha = bld->base.undef;
-   }
-
-   if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA &&
-      !bld->base.type.floating) {
-#if 0
-      /* Use a select */
-      /* FIXME: Unfortunetaly select of vectors do not work */
-
-      for(j = 0; j < n; j += 4)
-         for(i = 0; i < 4; ++i)
-            swizzles[j + i] = LLVMConstInt(LLVMInt1Type(), i == alpha_swizzle ? 0 : 1, 0);
-
-      return LLVMBuildSelect(bld->base.builder, LLVMConstVector(swizzles, n), rgb, alpha, "");
-#else
-      /* XXX: Use a bitmask, as byte shuffles often end up being translated
-       * into many PEXTRB. Ideally LLVM X86 code generation should pick this
-       * automatically for us. */
-
-      for(j = 0; j < n; j += 4)
-         for(i = 0; i < 4; ++i)
-            swizzles[j + i] = LLVMConstInt(LLVMIntType(bld->base.type.width), i == alpha_swizzle ? 0 : ~0, 0);
-
-      /* TODO: Unfortunately constant propagation prevents from using PANDN. And
-       * on SSE4 we have even better -- PBLENDVB */
-      return LLVMBuildOr(bld->base.builder,
-                         LLVMBuildAnd(bld->base.builder, rgb,   LLVMConstVector(swizzles, n), ""),
-                         LLVMBuildAnd(bld->base.builder, alpha, LLVMBuildNot(bld->base.builder, LLVMConstVector(swizzles, n), ""), ""),
-                         "");
-#endif
+      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA)
+         return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle);
    }
-
-   for(j = 0; j < n; j += 4) {
-      for(i = 0; i < 4; ++i) {
-         unsigned swizzle;
-
-         if(i == alpha_swizzle && alpha != bld->base.undef) {
-            /* Take the alpha from the second shuffle argument */
-            swizzle = n + j + alpha_swizzle;
-         }
-         else if (rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) {
-            /* Take the alpha from the first shuffle argument */
-            swizzle = j + alpha_swizzle;
-         }
-         else {
-            swizzle = j + i;
-         }
-
-         swizzles[j + i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
+   else {
+      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) {
+         boolean cond[4] = {0, 0, 0, 0};
+         cond[alpha_swizzle] = 1;
+         return lp_build_select_aos(&bld->base, alpha, rgb, cond);
+      }
+      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) {
+         unsigned char swizzle[4];
+         swizzle[0] = alpha_swizzle;
+         swizzle[1] = alpha_swizzle;
+         swizzle[2] = alpha_swizzle;
+         swizzle[3] = alpha_swizzle;
+         swizzle[alpha_swizzle] += 4;
+         return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle);
       }
    }
-
-   return LLVMBuildShuffleVector(bld->base.builder, rgb, alpha, LLVMConstVector(swizzles, n), "");
+   assert(0);
+   return bld->base.undef;
 }
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.c b/src/gallium/drivers/llvmpipe/lp_bld_const.c
index 44fcc467f4..fe1c627eee 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_const.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_const.c
@@ -143,3 +143,38 @@ lp_build_const_aos(union lp_type type,
 
    return LLVMConstVector(elems, type.length);
 }
+
+
+LLVMValueRef
+lp_build_const_shift(union lp_type type,
+                     int c)
+{
+   LLVMTypeRef elem_type = LLVMIntType(type.width);
+   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
+   unsigned i;
+
+   assert(type.length <= LP_MAX_VECTOR_LENGTH);
+
+   for(i = 0; i < type.length; ++i)
+      elems[i] = LLVMConstInt(elem_type, c, 0);
+
+   return LLVMConstVector(elems, type.length);
+}
+
+
+LLVMValueRef
+lp_build_const_mask_aos(union lp_type type,
+                        boolean cond[4])
+{
+   LLVMTypeRef elem_type = LLVMIntType(type.width);
+   LLVMValueRef masks[LP_MAX_VECTOR_LENGTH];
+   unsigned i, j;
+
+   assert(type.length <= LP_MAX_VECTOR_LENGTH);
+
+   for(j = 0; j < type.length; j += 4)
+      for(i = 0; i < 4; ++i)
+         masks[j + i] = LLVMConstInt(elem_type, cond[i] ? ~0 : 0, 0);
+
+   return LLVMConstVector(masks, type.length);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.h b/src/gallium/drivers/llvmpipe/lp_bld_const.h
index f2e5deca94..98ed8911a5 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_const.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_const.h
@@ -61,4 +61,14 @@ lp_build_const_aos(union lp_type type,
                    const unsigned char *swizzle);
 
 
+LLVMValueRef
+lp_build_const_shift(union lp_type type,
+                     int c);
+
+
+LLVMValueRef
+lp_build_const_mask_aos(union lp_type type,
+                        boolean cond[4]);
+
+
 #endif /* !LP_BLD_CONST_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
new file mode 100644
index 0000000000..0205d17ff1
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
@@ -0,0 +1,264 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_debug.h"
+
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_swizzle.h"
+
+
+LLVMValueRef
+lp_build_broadcast_aos(struct lp_build_context *bld,
+                       LLVMValueRef a,
+                       unsigned channel)
+{
+   const union lp_type type = bld->type;
+   const unsigned n = type.length;
+   unsigned i, j;
+
+   if(a == bld->undef || a == bld->zero || a == bld->one)
+      return a;
+
+   if (n <= 4) {
+      /*
+       * Shuffle.
+       */
+      LLVMTypeRef elem_type = LLVMInt32Type();
+      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
+
+      for(j = 0; j < n; j += 4)
+         for(i = 0; i < 4; ++i)
+            shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);
+
+      return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
+   }
+   else {
+      /*
+       * Bit mask and recursive shifts
+       *
+       *   XYZW XYZW .... XYZW
+       *   _Y__ _Y__ .... _Y__
+       *   YY_  YY__ .... YY__
+       *   YYYY YYYY .... YYYY
+       */
+      union lp_type type4 = type;
+      const char shifts[4][2] = {
+         { 1,  2},
+         {-1,  2},
+         { 1, -2},
+         {-1, -2}
+      };
+      boolean cond[4];
+      unsigned i;
+
+      memset(cond, 0, sizeof cond);
+      cond[channel] = 1;
+
+      a = LLVMBuildAnd(bld->builder, a, lp_build_const_mask_aos(type, cond), "");
+
+      type4.width *= 4;
+      type4.length /= 4;
+
+      a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), "");
+
+      for(i = 0; i < 2; ++i) {
+         LLVMValueRef tmp = NULL;
+         int shift = shifts[channel][i];
+
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+         shift = -shift;
+#endif
+
+         if(shift > 0)
+            tmp = LLVMBuildLShr(bld->builder, a, lp_build_const_shift(type4, shift*type.width), "");
+         if(shift < 0)
+            tmp = LLVMBuildShl(bld->builder, a, lp_build_const_shift(type4, -shift*type.width), "");
+
+         assert(tmp);
+         if(tmp)
+            a = LLVMBuildOr(bld->builder, a, tmp, "");
+      }
+
+      return LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type), "");
+   }
+}
+
+
+LLVMValueRef
+lp_build_select_aos(struct lp_build_context *bld,
+                    LLVMValueRef a,
+                    LLVMValueRef b,
+                    boolean cond[4])
+{
+   const union lp_type type = bld->type;
+   const unsigned n = type.length;
+   unsigned i, j;
+
+   if(a == b)
+      return a;
+   if(cond[0] && cond[1] && cond[2] && cond[3])
+      return a;
+   if(!cond[0] && !cond[1] && !cond[2] && !cond[3])
+      return b;
+   if(a == bld->undef || b == bld->undef)
+      return bld->undef;
+
+   /*
+    * There are three major ways of accomplishing this:
+    * - with a shuffle,
+    * - with a select,
+    * - or with a bit mask.
+    *
+    * Select isn't supported for vector types yet.
+    * The flip between these is empirical and might need to be.
+    */
+   if (n <= 4) {
+      /*
+       * Shuffle.
+       */
+      LLVMTypeRef elem_type = LLVMInt32Type();
+      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
+
+      for(j = 0; j < n; j += 4)
+         for(i = 0; i < 4; ++i)
+            shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0);
+
+      return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
+   }
+#if 0
+   else if(0) {
+      /* FIXME: Unfortunately select of vectors do not work */
+      /* Use a select */
+      LLVMTypeRef elem_type = LLVMInt1Type();
+      LLVMValueRef cond[LP_MAX_VECTOR_LENGTH];
+
+      for(j = 0; j < n; j += 4)
+         for(i = 0; i < 4; ++i)
+            cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0);
+
+      return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, "");
+   }
+#endif
+   else {
+      LLVMValueRef mask = lp_build_const_mask_aos(type, cond);
+
+      /* TODO: On SSE4 we could do this with a single instruction -- PBLENDVB */
+
+      a = LLVMBuildAnd(bld->builder, a, mask, "");
+
+      /* This often gets translated to PANDN, but sometimes the NOT is
+       * pre-computed and stored in another constant. The best strategy depends
+       * on available registers, so it is not a big deal -- hopefully LLVM does
+       * the right decision attending the rest of the program.
+       */
+      b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
+
+      return LLVMBuildOr(bld->builder, a, b, "");
+   }
+}
+
+
+LLVMValueRef
+lp_build_swizzle1_aos(struct lp_build_context *bld,
+                      LLVMValueRef a,
+                      unsigned char swizzle[4])
+{
+   const unsigned n = bld->type.length;
+   unsigned i, j;
+
+   if(a == bld->undef || a == bld->zero || a == bld->one)
+      return a;
+
+   if(swizzle[0] == swizzle[1] && swizzle[1] == swizzle[2] && swizzle[2] == swizzle[3])
+      return lp_build_broadcast_aos(bld, a, swizzle[0]);
+
+   {
+      /*
+       * Shuffle.
+       */
+      LLVMTypeRef elem_type = LLVMInt32Type();
+      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
+
+      for(j = 0; j < n; j += 4)
+         for(i = 0; i < 4; ++i)
+            shuffles[j + i] = LLVMConstInt(elem_type, j + swizzle[i], 0);
+
+      return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
+   }
+}
+
+
+LLVMValueRef
+lp_build_swizzle2_aos(struct lp_build_context *bld,
+                      LLVMValueRef a,
+                      LLVMValueRef b,
+                      unsigned char swizzle[4])
+{
+   const unsigned n = bld->type.length;
+   unsigned i, j;
+
+   if(swizzle[0] < 4 && swizzle[1] < 4 && swizzle[2] < 4 && swizzle[3] < 4)
+      return lp_build_swizzle1_aos(bld, a, swizzle);
+
+   if(a == b) {
+      swizzle[0] %= 4;
+      swizzle[1] %= 4;
+      swizzle[2] %= 4;
+      swizzle[3] %= 4;
+      return lp_build_swizzle1_aos(bld, a, swizzle);
+   }
+
+   if(swizzle[0] % 4 == 0 &&
+      swizzle[1] % 4 == 1 &&
+      swizzle[2] % 4 == 2 &&
+      swizzle[3] % 4 == 3) {
+      boolean cond[4];
+      cond[0] = swizzle[0] / 4;
+      cond[1] = swizzle[1] / 4;
+      cond[2] = swizzle[2] / 4;
+      cond[3] = swizzle[3] / 4;
+      return lp_build_select_aos(bld, a, b, cond);
+   }
+
+   {
+      /*
+       * Shuffle.
+       */
+      LLVMTypeRef elem_type = LLVMInt32Type();
+      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
+
+      for(j = 0; j < n; j += 4)
+         for(i = 0; i < 4; ++i)
+            shuffles[j + i] = LLVMConstInt(elem_type, j + (swizzle[i] % 4) + (swizzle[i] / 4 * n), 0);
+
+      return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
+   }
+}
+
+
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
new file mode 100644
index 0000000000..aeb4f42fa8
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
@@ -0,0 +1,87 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Helper functions for constant building.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifndef LP_BLD_SWIZZLE_H
+#define LP_BLD_SWIZZLE_H
+
+
+#include <llvm-c/Core.h>  
+
+
+union lp_type type;
+struct lp_build_context;
+
+
+/**
+ * Broadcast one channel of a vector composed of arrays of XYZW structures into
+ * all four channel.
+ */
+LLVMValueRef
+lp_build_broadcast_aos(struct lp_build_context *bld,
+                       LLVMValueRef a,
+                       unsigned channel);
+
+
+LLVMValueRef
+lp_build_select_aos(struct lp_build_context *bld,
+                    LLVMValueRef a,
+                    LLVMValueRef b,
+                    boolean cond[4]);
+
+
+/**
+ * Swizzle a vector consisting of an array of XYZW structs.
+ *
+ * @param swizzle is the in [0,4[ range.
+ */
+LLVMValueRef
+lp_build_swizzle1_aos(struct lp_build_context *bld,
+                      LLVMValueRef a,
+                      unsigned char swizzle[4]);
+
+
+/**
+ * Swizzle two vector consisting of an array of XYZW structs.
+ *
+ * @param swizzle is the in [0,8[ range. Values in [4,8[ range refer to b.
+ */
+LLVMValueRef
+lp_build_swizzle2_aos(struct lp_build_context *bld,
+                      LLVMValueRef a,
+                      LLVMValueRef b,
+                      unsigned char swizzle[4]);
+
+
+#endif /* !LP_BLD_SWIZZLE_H */
-- 
cgit v1.2.3


From 627d6a6b044b3916996cb9f50ce7f911f2196565 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 7 Aug 2009 01:16:59 +0100
Subject: llvmpipe: Move intrinsic helpers to a separate module.

---
 src/gallium/drivers/llvmpipe/SConscript    |  1 +
 src/gallium/drivers/llvmpipe/lp_bld_arit.c | 48 +++--------------
 src/gallium/drivers/llvmpipe/lp_bld_intr.c | 87 ++++++++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_intr.h | 51 ++++++++++++++++++
 4 files changed, 145 insertions(+), 42 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_intr.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_intr.h

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 85d0a737fa..58e6a888e8 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -12,6 +12,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_fs_llvm.c',
 		'lp_bld_arit.c',
 		'lp_bld_const.c',
+		'lp_bld_intr.c',
 		'lp_bld_pack.c',
 		'lp_bld_unpack.c',
 		'lp_bld_load.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index 5dc1b7c968..461c01310c 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -48,46 +48,10 @@
 #include "util/u_debug.h"
 
 #include "lp_bld_type.h"
+#include "lp_bld_intr.h"
 #include "lp_bld_arit.h"
 
 
-static LLVMValueRef
-lp_build_intrinsic_binary(LLVMBuilderRef builder,
-                          const char *name,
-                          LLVMValueRef a,
-                          LLVMValueRef b)
-{
-   LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
-   LLVMValueRef function;
-   LLVMValueRef args[2];
-
-   function = LLVMGetNamedFunction(module, name);
-   if(!function) {
-      LLVMTypeRef type = LLVMTypeOf(a);
-      LLVMTypeRef arg_types[2];
-      arg_types[0] = type;
-      arg_types[1] = type;
-      function = LLVMAddFunction(module, name, LLVMFunctionType(type, arg_types, 2, 0));
-      LLVMSetFunctionCallConv(function, LLVMCCallConv);
-      LLVMSetLinkage(function, LLVMExternalLinkage);
-   }
-   assert(LLVMIsDeclaration(function));
-
-#ifdef DEBUG
-   /* We shouldn't use only constants with intrinsics, as they won't be
-    * propagated by LLVM optimization passes.
-    */
-   if(LLVMIsConstant(a) && LLVMIsConstant(b))
-      debug_printf("warning: invoking intrinsic \"%s\" with constants\n");
-#endif
-
-   args[0] = a;
-   args[1] = b;
-
-   return LLVMBuildCall(builder, function, args, 2, "");
-}
-
-
 static LLVMValueRef
 lp_build_min_simple(struct lp_build_context *bld,
                     LLVMValueRef a,
@@ -116,7 +80,7 @@ lp_build_min_simple(struct lp_build_context *bld,
 #endif
    
    if(intrinsic)
-      return lp_build_intrinsic_binary(bld->builder, intrinsic, a, b);
+      return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
 
    if(type.floating)
       cond = LLVMBuildFCmp(bld->builder, LLVMRealULT, a, b, "");
@@ -154,7 +118,7 @@ lp_build_max_simple(struct lp_build_context *bld,
 #endif
 
    if(intrinsic)
-      return lp_build_intrinsic_binary(bld->builder, intrinsic, a, b);
+      return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
 
    if(type.floating)
       cond = LLVMBuildFCmp(bld->builder, LLVMRealULT, a, b, "");
@@ -221,7 +185,7 @@ lp_build_add(struct lp_build_context *bld,
 #endif
    
       if(intrinsic)
-         return lp_build_intrinsic_binary(bld->builder, intrinsic, a, b);
+         return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
    }
 
    if(LLVMIsConstant(a) && LLVMIsConstant(b))
@@ -268,7 +232,7 @@ lp_build_sub(struct lp_build_context *bld,
 #endif
    
       if(intrinsic)
-         return lp_build_intrinsic_binary(bld->builder, intrinsic, a, b);
+         return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
    }
 
    if(LLVMIsConstant(a) && LLVMIsConstant(b))
@@ -443,7 +407,7 @@ lp_build_mul(struct lp_build_context *bld,
          abh = lp_build_mul_u8n(bld->builder, ah, bh);
 
          /* PACKUSWB */
-         ab = lp_build_intrinsic_binary(bld->builder, "llvm.x86.sse2.packuswb.128" , abl, abh);
+         ab = lp_build_intrinsic_binary(bld->builder, "llvm.x86.sse2.packuswb.128" , i16x8, abl, abh);
 
          /* NOP */
          ab = LLVMBuildBitCast(bld->builder, ab, i8x16, "");
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_intr.c b/src/gallium/drivers/llvmpipe/lp_bld_intr.c
new file mode 100644
index 0000000000..c055f8f38c
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_intr.c
@@ -0,0 +1,87 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Helper
+ *
+ * LLVM IR doesn't support all basic arithmetic operations we care about (most
+ * notably min/max and saturated operations), and it is often necessary to
+ * resort machine-specific intrinsics directly. The functions here hide all
+ * these implementation details from the other modules.
+ *
+ * We also do simple expressions simplification here. Reasons are:
+ * - it is very easy given we have all necessary information readily available
+ * - LLVM optimization passes fail to simplify several vector expressions
+ * - We often know value constraints which the optimization passes have no way
+ *   of knowing, such as when source arguments are known to be in [0, 1] range.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "util/u_debug.h"
+
+#include "lp_bld_intr.h"
+
+
+LLVMValueRef
+lp_build_intrinsic_binary(LLVMBuilderRef builder,
+                          const char *name,
+                          LLVMTypeRef ret_type,
+                          LLVMValueRef a,
+                          LLVMValueRef b)
+{
+   LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
+   LLVMValueRef function;
+   LLVMValueRef args[2];
+
+   function = LLVMGetNamedFunction(module, name);
+   if(!function) {
+      LLVMTypeRef arg_types[2];
+      arg_types[0] = LLVMTypeOf(a);
+      arg_types[1] = LLVMTypeOf(b);
+      function = LLVMAddFunction(module, name, LLVMFunctionType(ret_type, arg_types, 2, 0));
+      LLVMSetFunctionCallConv(function, LLVMCCallConv);
+      LLVMSetLinkage(function, LLVMExternalLinkage);
+   }
+   assert(LLVMIsDeclaration(function));
+
+#ifdef DEBUG
+   /* We shouldn't use only constants with intrinsics, as they won't be
+    * propagated by LLVM optimization passes.
+    */
+   if(LLVMIsConstant(a) && LLVMIsConstant(b))
+      debug_printf("warning: invoking intrinsic \"%s\" with constants\n");
+#endif
+
+   args[0] = a;
+   args[1] = b;
+
+   return LLVMBuildCall(builder, function, args, 2, "");
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_intr.h b/src/gallium/drivers/llvmpipe/lp_bld_intr.h
new file mode 100644
index 0000000000..67f596c2b5
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_intr.h
@@ -0,0 +1,51 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Helper arithmetic functions.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifndef LP_BLD_INTR_H
+#define LP_BLD_INTR_H
+
+
+#include <llvm-c/Core.h>  
+
+
+LLVMValueRef
+lp_build_intrinsic_binary(LLVMBuilderRef builder,
+                          const char *name,
+                          LLVMTypeRef ret_type,
+                          LLVMValueRef a,
+                          LLVMValueRef b);
+
+
+#endif /* !LP_BLD_INTR_H */
-- 
cgit v1.2.3


From e6ebebc4853c98aa387b2c39a886a0c1173e93fb Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 7 Aug 2009 01:20:01 +0100
Subject: llvmpipe: Factor out shared test code into a separate module.

---
 src/gallium/drivers/llvmpipe/SConscript      |   2 +-
 src/gallium/drivers/llvmpipe/lp_bld_type.h   |   2 +
 src/gallium/drivers/llvmpipe/lp_test.h       | 124 ++++++++
 src/gallium/drivers/llvmpipe/lp_test_blend.c | 250 +++++------------
 src/gallium/drivers/llvmpipe/lp_test_main.c  | 404 +++++++++++++++++++++++++++
 5 files changed, 598 insertions(+), 184 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_test.h
 create mode 100644 src/gallium/drivers/llvmpipe/lp_test_main.c

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 58e6a888e8..7982e4219a 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -64,7 +64,7 @@ env.Program(
 
 env.Program(
     target = 'lp_test_blend',
-    source = ['lp_test_blend.c'],
+    source = ['lp_test_blend.c', 'lp_test_main.c'],
 )
 
 Export('llvmpipe')
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h
index 566a86ed06..37d6885049 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_type.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h
@@ -48,6 +48,8 @@
  */
 #define LP_MAX_VECTOR_LENGTH 16
 
+#define LP_MAX_TYPE_WIDTH 64
+
 
 /**
  * The LLVM type system can't conveniently express all the things we care about
diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h
new file mode 100644
index 0000000000..dd173fe4c5
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_test.h
@@ -0,0 +1,124 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Shared testing code.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifndef LP_TEST_H
+#define LP_TEST_H
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <float.h>
+
+#include <llvm-c/Core.h>
+#include <llvm-c/Analysis.h>
+#include <llvm-c/ExecutionEngine.h>
+#include <llvm-c/Target.h>
+#include <llvm-c/BitWriter.h>
+#include <llvm-c/Transforms/Scalar.h>
+
+#include "pipe/p_state.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_debug_dump.h"
+
+#include "lp_bld_type.h"
+
+
+void
+write_tsv_header(FILE *fp);
+
+
+boolean
+test_some(unsigned verbose, FILE *fp, unsigned long n);
+
+
+boolean
+test_all(unsigned verbose, FILE *fp);
+
+
+static INLINE uint64_t
+rdtsc(void)
+{
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+   uint32_t hi, lo;
+   __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
+   return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+#else
+   return 0;
+#endif
+}
+
+
+float
+random_float(void);
+
+
+void
+dump_type(FILE *fp, union lp_type type);
+
+
+double
+read_elem(union lp_type type, const void *src, unsigned index);
+
+
+void
+write_elem(union lp_type type, void *dst, unsigned index, double src);
+
+
+void
+random_elem(union lp_type type, void *dst, unsigned index);
+
+
+void
+read_vec(union lp_type type, const void *src, double *dst);
+
+
+void
+write_vec(union lp_type type, void *dst, const double *src);
+
+
+void
+random_vec(union lp_type type, void *dst);
+
+
+boolean
+compare_vec(union lp_type type, const void *res, const double *ref);
+
+
+void
+dump_vec(FILE *fp, union lp_type type, const void *src);
+
+
+#endif /* !LP_TEST_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index 56cd800b37..f42a9a9e42 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -37,34 +37,16 @@
  */
 
 
-#include <stdlib.h>
-#include <stdio.h>
-#include <float.h>
-
-#include <llvm-c/Core.h>
-#include <llvm-c/Analysis.h>
-#include <llvm-c/ExecutionEngine.h>
-#include <llvm-c/Target.h>
-#include <llvm-c/BitWriter.h>
-#include <llvm-c/Transforms/Scalar.h>
-
-#include "pipe/p_state.h"
-#include "util/u_format.h"
-#include "util/u_math.h"
-#include "util/u_debug_dump.h"
-
 #include "lp_bld.h"
 #include "lp_bld_type.h"
 #include "lp_bld_arit.h"
-
-
-unsigned verbose = 0;
+#include "lp_test.h"
 
 
 typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res);
 
 
-static void
+void
 write_tsv_header(FILE *fp)
 {
    fprintf(fp,
@@ -145,19 +127,6 @@ dump_blend_type(FILE *fp,
 }
 
 
-static INLINE uint64_t
-rdtsc(void)
-{
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
-   uint32_t hi, lo;
-   __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
-   return ((uint64_t)lo) | (((uint64_t)hi) << 32);
-#else
-   return 0;
-#endif
-}
-
-
 static LLVMValueRef
 add_blend_test(LLVMModuleRef module,
                const struct pipe_blend_state *blend,
@@ -210,13 +179,6 @@ add_blend_test(LLVMModuleRef module,
 }
 
 
-static float
-random_float(void)
-{
-    return (float)((double)random()/(double)RAND_MAX);
-}
-
-
 /** Add and limit result to ceiling of 1.0 */
 #define ADD_SAT(R, A, B) \
 do { \
@@ -233,13 +195,13 @@ do { \
 static void
 compute_blend_ref_term(unsigned rgb_factor,
                        unsigned alpha_factor,
-                       const float *factor,
-                       const float *src, 
-                       const float *dst, 
-                       const float *con, 
-                       float *term)
+                       const double *factor,
+                       const double *src,
+                       const double *dst,
+                       const double *con,
+                       double *term)
 {
-   float temp;
+   double temp;
 
    switch (rgb_factor) {
    case PIPE_BLENDFACTOR_ONE:
@@ -379,13 +341,13 @@ compute_blend_ref_term(unsigned rgb_factor,
 
 static void
 compute_blend_ref(const struct pipe_blend_state *blend,
-                  const float *src, 
-                  const float *dst, 
-                  const float *con, 
-                  float *res)
+                  const double *src,
+                  const double *dst,
+                  const double *con,
+                  double *res)
 {
-   float src_term[4];
-   float dst_term[4];
+   double src_term[4];
+   double dst_term[4];
 
    compute_blend_ref_term(blend->rgb_src_factor, blend->alpha_src_factor, src, src, dst, con, src_term);
    compute_blend_ref_term(blend->rgb_dst_factor, blend->alpha_dst_factor, dst, src, dst, con, dst_term);
@@ -449,7 +411,8 @@ compute_blend_ref(const struct pipe_blend_state *blend,
 
 
 static boolean
-test_one(FILE *fp,
+test_one(unsigned verbose,
+         FILE *fp,
          const struct pipe_blend_state *blend,
          union lp_type type)
 {
@@ -464,7 +427,7 @@ test_one(FILE *fp,
    const unsigned n = 32;
    int64_t cycles[n];
    double cycles_avg = 0.0;
-   unsigned i, j, k;
+   unsigned i, j;
 
    if(verbose >= 1)
       dump_blend_type(stdout, blend, type);
@@ -510,108 +473,64 @@ test_one(FILE *fp,
 
    success = TRUE;
    for(i = 0; i < n && success; ++i) {
+      uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+      uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+      uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+      uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+      double ref[LP_MAX_VECTOR_LENGTH];
       int64_t start_counter = 0;
       int64_t end_counter = 0;
 
-      if(type.floating && type.width == 32) {
-         float src[LP_MAX_VECTOR_LENGTH];
-         float dst[LP_MAX_VECTOR_LENGTH];
-         float con[LP_MAX_VECTOR_LENGTH];
-         float ref[LP_MAX_VECTOR_LENGTH];
-         float res[LP_MAX_VECTOR_LENGTH];
-
-         for(j = 0; j < type.length; ++j) {
-            src[j] = random_float();
-            dst[j] = random_float();
-            con[j] = random_float();
-         }
+      random_vec(type, src);
+      random_vec(type, dst);
+      random_vec(type, con);
+
+      {
+         double fsrc[LP_MAX_VECTOR_LENGTH];
+         double fdst[LP_MAX_VECTOR_LENGTH];
+         double fcon[LP_MAX_VECTOR_LENGTH];
+
+         read_vec(type, src, fsrc);
+         read_vec(type, dst, fdst);
+         read_vec(type, con, fcon);
 
          for(j = 0; j < type.length; j += 4)
-            compute_blend_ref(blend, src + j, dst + j, con + j, ref + j);
-
-         start_counter = rdtsc();
-         blend_test_ptr(src, dst, con, res);
-         end_counter = rdtsc();
-
-         for(j = 0; j < type.length; ++j)
-            if(fabs(res[j] - ref[j]) > FLT_EPSILON)
-               success = FALSE;
-
-         if (!success) {
-            dump_blend_type(stderr, blend, type);
-            fprintf(stderr, "\n");
-            fprintf(stderr, "MISMATCH\n");
-            fprintf(stderr, "  Result:   ");
-            for(j = 0; j < type.length; ++j)
-               fprintf(stderr, " %f", res[j]);
-            fprintf(stderr, "\n");
-            fprintf(stderr, "  Expected: ");
-            for(j = 0; j < type.length; ++j)
-               fprintf(stderr, " %f", ref[j]);
-            fprintf(stderr, "\n");
-         }
+            compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, ref + j);
       }
-      else if(!type.floating && !type.fixed && !type.sign && type.norm && type.width == 8) {
-         uint8_t src[LP_MAX_VECTOR_LENGTH];
-         uint8_t dst[LP_MAX_VECTOR_LENGTH];
-         uint8_t con[LP_MAX_VECTOR_LENGTH];
-         uint8_t ref[LP_MAX_VECTOR_LENGTH];
-         uint8_t res[LP_MAX_VECTOR_LENGTH];
-
-         for(j = 0; j < type.length; ++j) {
-            src[j] = random() & 0xff;
-            dst[j] = random() & 0xff;
-            con[j] = random() & 0xff;
-         }
 
-         for(j = 0; j < type.length; j += 4) {
-            float srcf[4];
-            float dstf[4];
-            float conf[4];
-            float reff[4];
+      start_counter = rdtsc();
+      blend_test_ptr(src, dst, con, res);
+      end_counter = rdtsc();
 
-            for(k = 0; k < 4; ++k) {
-               srcf[k] = (1.0f/255.0f)*src[j + k];
-               dstf[k] = (1.0f/255.0f)*dst[j + k];
-               conf[k] = (1.0f/255.0f)*con[j + k];
-            }
+      cycles[i] = end_counter - start_counter;
 
-            compute_blend_ref(blend, srcf, dstf, conf, reff);
+      success = compare_vec(type, res, ref);
 
-            for(k = 0; k < 4; ++k)
-               ref[j + k] = (uint8_t)(reff[k]*255.0f + 0.5f);
-         }
+      if (!success) {
+         dump_blend_type(stderr, blend, type);
+         fprintf(stderr, "\n");
+         fprintf(stderr, "MISMATCH\n");
 
-         start_counter = rdtsc();
-         blend_test_ptr(src, dst, con, res);
-         end_counter = rdtsc();
+         fprintf(stderr, "  Src: ");
+         dump_vec(stderr, type, src);
+         fprintf(stderr, "\n");
 
-         for(j = 0; j < type.length; ++j) {
-            int delta = (int)res[j] - (int)ref[j];
-            if (delta < 0)
-               delta = -delta;
-            if(delta > 1)
-               success = FALSE;
-         }
+         fprintf(stderr, "  Dst: ");
+         dump_vec(stderr, type, dst);
+         fprintf(stderr, "\n");
 
-         if (!success) {
-            dump_blend_type(stderr, blend, type);
-            fprintf(stderr, "\n");
-            fprintf(stderr, "MISMATCH\n");
-            fprintf(stderr, "  Result:   ");
-            for(j = 0; j < type.length; ++j)
-               fprintf(stderr, " %3u", res[j]);
-            fprintf(stderr, "\n");
-            fprintf(stderr, "  Expected: ");
-            for(j = 0; j < type.length; ++j)
-               fprintf(stderr, " %3u", ref[j]);
-            fprintf(stderr, "\n");
-         }
-      }
-      else
-         assert(0);
+         fprintf(stderr, "  Con: ");
+         dump_vec(stderr, type, con);
+         fprintf(stderr, "\n");
 
-      cycles[i] = end_counter - start_counter;
+         fprintf(stderr, "  Res: ");
+         dump_vec(stderr, type, res);
+         fprintf(stderr, "\n");
+
+         fprintf(stderr, "  Ref: ");
+         dump_vec(stderr, type, ref);
+         fprintf(stderr, "\n");
+      }
    }
 
    /*
@@ -725,8 +644,8 @@ const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]);
 const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]);
 
 
-static boolean 
-test_all(FILE *fp)
+boolean
+test_all(unsigned verbose, FILE *fp)
 {
    const unsigned *rgb_func;
    const unsigned *rgb_src_factor;
@@ -759,7 +678,7 @@ test_all(FILE *fp)
                         blend.alpha_src_factor  = *alpha_src_factor;
                         blend.alpha_dst_factor  = *alpha_dst_factor;
 
-                        if(!test_one(fp, &blend, *type))
+                        if(!test_one(verbose, fp, &blend, *type))
                           success = FALSE;
 
                      }
@@ -774,8 +693,8 @@ test_all(FILE *fp)
 }
 
 
-static boolean 
-test_some(FILE *fp, unsigned long n)
+boolean
+test_some(unsigned verbose, FILE *fp, unsigned long n)
 {
    const unsigned *rgb_func;
    const unsigned *rgb_src_factor;
@@ -813,45 +732,10 @@ test_some(FILE *fp, unsigned long n)
          blend.alpha_src_factor  = *alpha_src_factor;
          blend.alpha_dst_factor  = *alpha_dst_factor;
 
-         if(!test_one(fp, &blend, *type))
+         if(!test_one(verbose, fp, &blend, *type))
            success = FALSE;
       }
    }
 
    return success;
 }
-
-
-int main(int argc, char **argv)
-{
-   unsigned long n = 1000;
-   FILE *fp = NULL;
-   unsigned i;
-   boolean success;
-
-   for(i = 1; i < argc; ++i) {
-      if(strcmp(argv[i], "-v") == 0)
-         ++verbose;
-      else if(strcmp(argv[i], "-o") == 0)
-         fp = fopen(argv[++i], "wt");
-      else
-         n = atoi(argv[i]);
-   }
-
-   if(fp) {
-      /* Warm up the caches */
-      test_some(NULL, 100);
-
-      write_tsv_header(fp);
-   }
-      
-   if(n)
-      success = test_some(fp, n);
-   else
-      success = test_all(fp);
-
-   if(fp)
-      fclose(fp);
-
-   return success ? 0 : 1;
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c
new file mode 100644
index 0000000000..2ce3fa1c0b
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_test_main.c
@@ -0,0 +1,404 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Shared testing code.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "lp_bld_const.h"
+#include "lp_test.h"
+
+
+void
+dump_type(FILE *fp,
+          union lp_type type)
+{
+   fprintf(fp, "%s%u%sx%u",
+           type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
+           type.width,
+           type.norm ? "n" : "",
+           type.length);
+}
+
+
+double
+read_elem(union lp_type type, const void *src, unsigned index)
+{
+   double scale = lp_const_scale(type);
+   double value;
+   assert(index < type.length);
+   if (type.floating) {
+      switch(type.width) {
+      case 32:
+         value = *((const float *)src + index);
+         break;
+      case 64:
+         value =  *((const double *)src + index);
+         break;
+      default:
+         assert(0);
+         return 0.0;
+      }
+   }
+   else {
+      if(type.sign) {
+         switch(type.width) {
+         case 8:
+            value = *((const int8_t *)src + index);
+            break;
+         case 16:
+            value = *((const int16_t *)src + index);
+            break;
+         case 32:
+            value = *((const int32_t *)src + index);
+            break;
+         case 64:
+            value = *((const int64_t *)src + index);
+            break;
+         default:
+            assert(0);
+            return 0.0;
+         }
+      }
+      else {
+         switch(type.width) {
+         case 8:
+            value = *((const uint8_t *)src + index);
+            break;
+         case 16:
+            value = *((const uint16_t *)src + index);
+            break;
+         case 32:
+            value = *((const uint32_t *)src + index);
+            break;
+         case 64:
+            value = *((const uint64_t *)src + index);
+            break;
+         default:
+            assert(0);
+            return 0.0;
+         }
+      }
+   }
+   return value/scale;
+}
+
+
+void
+write_elem(union lp_type type, void *dst, unsigned index, double src)
+{
+   double scale = lp_const_scale(type);
+   double value = scale*src;
+   assert(index < type.length);
+   if (type.floating) {
+      switch(type.width) {
+      case 32:
+         *((float *)dst + index) = (float)(value);
+         break;
+      case 64:
+          *((double *)dst + index) = value;
+         break;
+      default:
+         assert(0);
+      }
+   }
+   else {
+      if(type.sign) {
+         switch(type.width) {
+         case 8:
+            *((int8_t *)dst + index) = (int8_t)round(value);
+            break;
+         case 16:
+            *((int16_t *)dst + index) = (int16_t)round(value);
+            break;
+         case 32:
+            *((int32_t *)dst + index) = (int32_t)round(value);
+            break;
+         case 64:
+            *((int64_t *)dst + index) = (int32_t)round(value);
+            break;
+         default:
+            assert(0);
+         }
+      }
+      else {
+         switch(type.width) {
+         case 8:
+            *((uint8_t *)dst + index) = (uint8_t)round(value);
+            break;
+         case 16:
+            *((uint16_t *)dst + index) = (uint16_t)round(value);
+            break;
+         case 32:
+            *((uint32_t *)dst + index) = (uint32_t)round(value);
+            break;
+         case 64:
+            *((uint64_t *)dst + index) = (uint64_t)round(value);
+            break;
+         default:
+            assert(0);
+         }
+      }
+   }
+}
+
+
+void
+random_elem(union lp_type type, void *dst, unsigned index)
+{
+   assert(index < type.length);
+   if (type.floating) {
+      double value = (double)random()/(double)RAND_MAX;
+      if(!type.norm)
+         value += (double)random();
+      if(type.sign)
+         if(random() & 1)
+            value = -value;
+      switch(type.width) {
+      case 32:
+         *((float *)dst + index) = (float)value;
+         break;
+      case 64:
+          *((double *)dst + index) = value;
+         break;
+      default:
+         assert(0);
+      }
+   }
+   else {
+      switch(type.width) {
+      case 8:
+         *((uint8_t *)dst + index) = (uint8_t)random();
+         break;
+      case 16:
+         *((uint16_t *)dst + index) = (uint16_t)random();
+         break;
+      case 32:
+         *((uint32_t *)dst + index) = (uint32_t)random();
+         break;
+      case 64:
+         *((uint64_t *)dst + index) = (uint64_t)random();
+         break;
+      default:
+         assert(0);
+      }
+   }
+}
+
+
+void
+read_vec(union lp_type type, const void *src, double *dst)
+{
+   unsigned i;
+   for (i = 0; i < type.length; ++i)
+      dst[i] = read_elem(type, src, i);
+}
+
+
+void
+write_vec(union lp_type type, void *dst, const double *src)
+{
+   unsigned i;
+   for (i = 0; i < type.length; ++i)
+      write_elem(type, dst, i, src[i]);
+}
+
+
+float
+random_float(void)
+{
+    return (float)((double)random()/(double)RAND_MAX);
+}
+
+
+void
+random_vec(union lp_type type, void *dst)
+{
+   unsigned i;
+   for (i = 0; i < type.length; ++i)
+      random_elem(type, dst, i);
+}
+
+
+boolean
+compare_vec(union lp_type type, const void *res, const double *ref)
+{
+   double eps;
+   unsigned i;
+
+   if (type.floating) {
+      switch(type.width) {
+      case 32:
+         eps = FLT_EPSILON;
+         break;
+      case 64:
+         eps = DBL_EPSILON;
+         break;
+      default:
+         assert(0);
+         eps = 0.0;
+         break;
+      }
+   }
+   else {
+      double scale = lp_const_scale(type);
+      eps = 1.0/scale;
+   }
+
+   for (i = 0; i < type.length; ++i) {
+      double res_elem = read_elem(type, res, i);
+      double ref_elem = ref[i];
+      double delta = fabs(res_elem - ref_elem);
+      if(delta >= 2.0*eps)
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+
+void
+dump_vec(FILE *fp, union lp_type type, const void *src)
+{
+   unsigned i;
+   for (i = 0; i < type.length; ++i) {
+      if(i)
+         fprintf(fp, " ");
+      if (type.floating) {
+         double value;
+         switch(type.width) {
+         case 32:
+            value = *((const float *)src + i);
+            break;
+         case 64:
+            value = *((const double *)src + i);
+            break;
+         default:
+            assert(0);
+            value = 0.0;
+         }
+         fprintf(fp, "%f", value);
+      }
+      else {
+         if(type.sign) {
+            long long value;
+            const char *format;
+            switch(type.width) {
+            case 8:
+               value = *((const int8_t *)src + i);
+               format = "%3lli";
+               break;
+            case 16:
+               value = *((const int16_t *)src + i);
+               format = "%5lli";
+               break;
+            case 32:
+               value = *((const int32_t *)src + i);
+               format = "%10lli";
+               break;
+            case 64:
+               value = *((const int64_t *)src + i);
+               format = "%20lli";
+               break;
+            default:
+               assert(0);
+               value = 0.0;
+               format = "?";
+            }
+            fprintf(fp, format, value);
+         }
+         else {
+            unsigned long long value;
+            const char *format;
+            switch(type.width) {
+            case 8:
+               value = *((const uint8_t *)src + i);
+               format = "%4llu";
+               break;
+            case 16:
+               value = *((const uint16_t *)src + i);
+               format = "%6llu";
+               break;
+            case 32:
+               value = *((const uint32_t *)src + i);
+               format = "%11llu";
+               break;
+            case 64:
+               value = *((const uint64_t *)src + i);
+               format = "%21llu";
+               break;
+            default:
+               assert(0);
+               value = 0.0;
+               format = "?";
+            }
+            fprintf(fp, format, value);
+         }
+      }
+   }
+}
+
+
+int main(int argc, char **argv)
+{
+   unsigned verbose = 0;
+   FILE *fp = NULL;
+   unsigned long n = 1000;
+   unsigned i;
+   boolean success;
+
+   for(i = 1; i < argc; ++i) {
+      if(strcmp(argv[i], "-v") == 0)
+         ++verbose;
+      else if(strcmp(argv[i], "-o") == 0)
+         fp = fopen(argv[++i], "wt");
+      else
+         n = atoi(argv[i]);
+   }
+
+   if(fp) {
+      /* Warm up the caches */
+      test_some(0, NULL, 100);
+
+      write_tsv_header(fp);
+   }
+      
+   if(n)
+      success = test_some(verbose, fp, n);
+   else
+      success = test_all(verbose, fp);
+
+   if(fp)
+      fclose(fp);
+
+   return success ? 0 : 1;
+}
-- 
cgit v1.2.3


From 8988424ee8de7948e55279fe622ffdacdb6e5f8a Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 7 Aug 2009 09:51:48 +0100
Subject: llvmpipe: Bootstrap type conversions.

---
 src/gallium/drivers/llvmpipe/SConscript     |   6 +
 src/gallium/drivers/llvmpipe/lp_bld_conv.c  | 190 ++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_conv.h  |  54 ++++
 src/gallium/drivers/llvmpipe/lp_test_conv.c | 392 ++++++++++++++++++++++++++++
 4 files changed, 642 insertions(+)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_conv.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_conv.h
 create mode 100644 src/gallium/drivers/llvmpipe/lp_test_conv.c

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 7982e4219a..71e4e6a03f 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -12,6 +12,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_fs_llvm.c',
 		'lp_bld_arit.c',
 		'lp_bld_const.c',
+		'lp_bld_conv.c',
 		'lp_bld_intr.c',
 		'lp_bld_pack.c',
 		'lp_bld_unpack.c',
@@ -67,4 +68,9 @@ env.Program(
     source = ['lp_test_blend.c', 'lp_test_main.c'],
 )
 
+env.Program(
+    target = 'lp_test_conv',
+    source = ['lp_test_conv.c', 'lp_test_main.c'],
+)
+
 Export('llvmpipe')
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
new file mode 100644
index 0000000000..aea1ac2526
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
@@ -0,0 +1,190 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Helper
+ *
+ * LLVM IR doesn't support all basic arithmetic operations we care about (most
+ * notably min/max and saturated operations), and it is often necessary to
+ * resort machine-specific intrinsics directly. The functions here hide all
+ * these implementation details from the other modules.
+ *
+ * We also do simple expressions simplification here. Reasons are:
+ * - it is very easy given we have all necessary information readily available
+ * - LLVM optimization passes fail to simplify several vector expressions
+ * - We often know value constraints which the optimization passes have no way
+ *   of knowing, such as when source arguments are known to be in [0, 1] range.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "util/u_debug.h"
+
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_intr.h"
+#include "lp_bld_conv.h"
+
+
+static LLVMValueRef
+lp_build_trunc(LLVMBuilderRef builder,
+               union lp_type src_type,
+               union lp_type dst_type,
+               LLVMValueRef *src, unsigned num_srcs)
+{
+   LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
+   unsigned i;
+
+   /* Register width must remain constant */
+   assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
+
+   /* We must not loose or gain channels. Only precision */
+   assert(src_type.length * num_srcs == dst_type.length);
+
+   for(i = 0; i < num_srcs; ++i)
+      tmp[i] = src[i];
+
+   while(src_type.width > dst_type.width) {
+      LLVMTypeRef tmp_vec_type = lp_build_vec_type(src_type);
+      union lp_type new_type = src_type;
+      LLVMTypeRef new_vec_type;
+
+      new_type.width /= 2;
+      new_type.length *= 2;
+      new_vec_type = lp_build_vec_type(new_type);
+
+      for(i = 0; i < num_srcs/2; ++i) {
+         LLVMValueRef lo = tmp[2*i + 0];
+         LLVMValueRef hi = tmp[2*i + 1];
+         LLVMValueRef packed = NULL;
+
+         if(src_type.width == 32) {
+            /* FIXME: we only have a packed signed intrinsic */
+            packed = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", tmp_vec_type, lo, hi);
+         }
+         else if(src_type.width == 16) {
+            if(dst_type.sign)
+               packed = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", tmp_vec_type, lo, hi);
+            else
+               packed = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", tmp_vec_type, lo, hi);
+         }
+         else
+            assert(0);
+
+         tmp[i] = LLVMBuildBitCast(builder, packed, new_vec_type, "");
+      }
+
+      src_type = new_type;
+
+      num_srcs /= 2;
+   }
+
+   assert(num_srcs == 1);
+
+   return tmp[0];
+}
+
+
+/**
+ * Convert between two SIMD types.
+ *
+ * Converting between SIMD types of different element width poses a problem:
+ * SIMD registers have a fixed number of bits, so different element widths
+ * imply different vector lengths. Therefore we must multiplex the multiple
+ * incoming sources into a single destination vector, or demux a single incoming
+ * vector into multiple vectors.
+ */
+void
+lp_build_conv(LLVMBuilderRef builder,
+              union lp_type src_type,
+              union lp_type dst_type,
+              LLVMValueRef *src, unsigned num_srcs,
+              LLVMValueRef *dst, unsigned num_dsts)
+{
+   unsigned i;
+
+   /* Register width must remain constant */
+   assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
+
+   /* We must not loose or gain channels. Only precision */
+   assert(src_type.length * num_srcs == dst_type.length * num_dsts);
+
+   if(!src_type.norm && dst_type.norm) {
+      /* FIXME: clamp */
+   }
+
+   if(src_type.floating && !dst_type.floating) {
+      double dscale;
+      LLVMTypeRef tmp;
+
+      /* Rescale */
+      dscale = lp_const_scale(dst_type);
+      if (dscale != 1.0) {
+         LLVMValueRef scale = lp_build_const_uni(src_type, dscale);
+         for(i = 0; i < num_srcs; ++i)
+            src[i] = LLVMBuildMul(builder, src[i], scale, "");
+      }
+
+      /* Use an equally sized integer for intermediate computations */
+      src_type.floating = FALSE;
+      tmp = lp_build_vec_type(src_type);
+      for(i = 0; i < num_srcs; ++i) {
+#if 0
+         if(dst_type.sign)
+            src[i] = LLVMBuildFPToSI(builder, src[i], tmp, "");
+         else
+            src[i] = LLVMBuildFPToUI(builder, src[i], tmp, "");
+#else
+        /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */
+         src[i] = LLVMBuildFPToSI(builder, src[i], tmp, "");
+#endif
+      }
+   }
+   else {
+      unsigned src_shift = lp_const_shift(src_type);
+      unsigned dst_shift = lp_const_shift(dst_type);
+
+      if(src_shift > dst_shift) {
+         LLVMValueRef shift = lp_build_int_const_uni(src_type, src_shift - dst_shift);
+         for(i = 0; i < num_srcs; ++i)
+            if(dst_type.sign)
+               src[i] = LLVMBuildAShr(builder, src[i], shift, "");
+            else
+               src[i] = LLVMBuildLShr(builder, src[i], shift, "");
+      }
+   }
+
+   if(src_type.width > dst_type.width) {
+      assert(num_dsts == 1);
+      dst[0] = lp_build_trunc(builder, src_type, dst_type, src, num_srcs);
+   }
+   else
+      assert(0);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.h b/src/gallium/drivers/llvmpipe/lp_bld_conv.h
new file mode 100644
index 0000000000..03be8f28ca
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.h
@@ -0,0 +1,54 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Helper functions for type conversions.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifndef LP_BLD_CONV_H
+#define LP_BLD_CONV_H
+
+
+#include <llvm-c/Core.h>  
+
+
+union lp_type type;
+
+
+void
+lp_build_conv(LLVMBuilderRef builder,
+              union lp_type src_type,
+              union lp_type dst_type,
+              LLVMValueRef *srcs, unsigned num_srcs,
+              LLVMValueRef *dsts, unsigned num_dsts);
+
+
+#endif /* !LP_BLD_CONV_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
new file mode 100644
index 0000000000..6b43279ae5
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -0,0 +1,392 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Unit tests for type conversion.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "lp_bld_type.h"
+#include "lp_bld_conv.h"
+#include "lp_test.h"
+
+
+typedef void (*conv_test_ptr_t)(const void *src, const void *dst);
+
+
+void
+write_tsv_header(FILE *fp)
+{
+   fprintf(fp,
+           "result\t"
+           "cycles\t"
+           "type\t"
+           "src_type\t"
+           "dst_type\n");
+
+   fflush(fp);
+}
+
+
+static void
+write_tsv_row(FILE *fp,
+              union lp_type src_type,
+              union lp_type dst_type,
+              double cycles,
+              boolean success)
+{
+   fprintf(fp, "%s\t", success ? "pass" : "fail");
+
+   fprintf(fp, "%.1f\t", cycles + 0.5);
+
+   dump_type(fp, src_type);
+   fprintf(fp, "\t");
+
+   dump_type(fp, dst_type);
+   fprintf(fp, "\t");
+
+   fflush(fp);
+}
+
+
+static void
+dump_conv_types(FILE *fp,
+               union lp_type src_type,
+               union lp_type dst_type)
+{
+   fprintf(fp, "src_type=");
+   dump_type(fp, src_type);
+
+   fprintf(fp, " dst_type=");
+   dump_type(fp, dst_type);
+
+   fflush(fp);
+}
+
+
+static LLVMValueRef
+add_conv_test(LLVMModuleRef module,
+              union lp_type src_type, unsigned num_srcs,
+              union lp_type dst_type, unsigned num_dsts)
+{
+   LLVMTypeRef args[2];
+   LLVMValueRef func;
+   LLVMValueRef src_ptr;
+   LLVMValueRef dst_ptr;
+   LLVMBasicBlockRef block;
+   LLVMBuilderRef builder;
+   LLVMValueRef src[LP_MAX_VECTOR_LENGTH];
+   LLVMValueRef dst[LP_MAX_VECTOR_LENGTH];
+   unsigned i;
+
+   args[0] = LLVMPointerType(lp_build_vec_type(src_type), 0);
+   args[1] = LLVMPointerType(lp_build_vec_type(dst_type), 0);
+
+   func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
+   LLVMSetFunctionCallConv(func, LLVMCCallConv);
+   src_ptr = LLVMGetParam(func, 0);
+   dst_ptr = LLVMGetParam(func, 1);
+
+   block = LLVMAppendBasicBlock(func, "entry");
+   builder = LLVMCreateBuilder();
+   LLVMPositionBuilderAtEnd(builder, block);
+
+   for(i = 0; i < num_srcs; ++i) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+      LLVMValueRef ptr = LLVMBuildGEP(builder, src_ptr, &index, 1, "");
+      src[i] = LLVMBuildLoad(builder, ptr, "");
+   }
+
+   lp_build_conv(builder, src_type, dst_type, src, num_srcs, dst, num_dsts);
+
+   for(i = 0; i < num_dsts; ++i) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+      LLVMValueRef ptr = LLVMBuildGEP(builder, dst_ptr, &index, 1, "");
+      LLVMBuildStore(builder, dst[i], ptr);
+   }
+
+   LLVMBuildRetVoid(builder);;
+
+   LLVMDisposeBuilder(builder);
+   return func;
+}
+
+
+static boolean
+test_one(unsigned verbose,
+         FILE *fp,
+         union lp_type src_type,
+         union lp_type dst_type)
+{
+   LLVMModuleRef module = NULL;
+   LLVMValueRef func = NULL;
+   LLVMExecutionEngineRef engine = NULL;
+   LLVMModuleProviderRef provider = NULL;
+   LLVMPassManagerRef pass = NULL;
+   char *error = NULL;
+   conv_test_ptr_t conv_test_ptr;
+   boolean success;
+   const unsigned n = 32;
+   int64_t cycles[n];
+   double cycles_avg = 0.0;
+   unsigned num_srcs;
+   unsigned num_dsts;
+   unsigned i, j;
+
+   if(verbose >= 1)
+      dump_conv_types(stdout, src_type, dst_type);
+
+   if(src_type.length > dst_type.length) {
+      num_srcs = 1;
+      num_dsts = src_type.length/dst_type.length;
+   }
+   else  {
+      num_dsts = 1;
+      num_srcs = dst_type.length/src_type.length;
+   }
+
+   assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
+
+   /* We must not loose or gain channels. Only precision */
+   assert(src_type.length * num_srcs == dst_type.length * num_dsts);
+
+
+   module = LLVMModuleCreateWithName("test");
+
+   func = add_conv_test(module, src_type, num_srcs, dst_type, num_dsts);
+
+   if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
+      LLVMDumpModule(module);
+      abort();
+   }
+   LLVMDisposeMessage(error);
+
+   provider = LLVMCreateModuleProviderForExistingModule(module);
+   if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
+      dump_conv_types(stderr, src_type, dst_type);
+      fprintf(stderr, "\n");
+      fprintf(stderr, "%s\n", error);
+      LLVMDisposeMessage(error);
+      abort();
+   }
+
+#if 0
+   pass = LLVMCreatePassManager();
+   LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
+   /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+    * but there are more on SVN. */
+   LLVMAddConstantPropagationPass(pass);
+   LLVMAddInstructionCombiningPass(pass);
+   LLVMAddPromoteMemoryToRegisterPass(pass);
+   LLVMAddGVNPass(pass);
+   LLVMAddCFGSimplificationPass(pass);
+   LLVMRunPassManager(pass, module);
+#else
+   (void)pass;
+#endif
+
+   if(verbose >= 2)
+      LLVMDumpModule(module);
+
+   conv_test_ptr = (conv_test_ptr_t)LLVMGetPointerToGlobal(engine, func);
+
+   success = TRUE;
+   for(i = 0; i < n && success; ++i) {
+      unsigned src_stride = src_type.length*src_type.width/8;
+      unsigned dst_stride = dst_type.length*dst_type.width/8;
+      uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+      uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+      double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+      uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+      int64_t start_counter = 0;
+      int64_t end_counter = 0;
+
+      for(j = 0; j < num_srcs; ++j) {
+         random_vec(src_type, src + j*src_stride);
+         read_vec(src_type, src + j*src_stride, fref + j*src_type.length);
+      }
+
+      for(j = 0; j < num_dsts; ++j) {
+         write_vec(dst_type, ref + j*dst_stride, fref + j*dst_type.length);
+      }
+
+      start_counter = rdtsc();
+      conv_test_ptr(src, dst);
+      end_counter = rdtsc();
+
+      cycles[i] = end_counter - start_counter;
+
+      for(j = 0; j < num_dsts; ++j) {
+         if(!compare_vec(dst_type, dst + j*dst_stride, ref + j*dst_stride))
+            success = FALSE;
+      }
+
+      if (!success) {
+         dump_conv_types(stderr, src_type, dst_type);
+         fprintf(stderr, "\n");
+         fprintf(stderr, "MISMATCH\n");
+
+         for(j = 0; j < num_srcs; ++j) {
+            fprintf(stderr, "  Src%u: ", j);
+            dump_vec(stderr, src_type, src + j*src_stride);
+            fprintf(stderr, "\n");
+         }
+
+         for(j = 0; j < src_type.length*num_srcs; ++j)
+            fprintf(stderr, " %f", fref[j]);
+         fprintf(stderr, "\n");
+
+         for(j = 0; j < num_dsts; ++j) {
+            fprintf(stderr, "  Dst%u: ", j);
+            dump_vec(stderr, dst_type, dst + j*dst_stride);
+            fprintf(stderr, "\n");
+
+            fprintf(stderr, "  Ref%u: ", j);
+            dump_vec(stderr, dst_type, ref + j*dst_stride);
+            fprintf(stderr, "\n");
+         }
+      }
+   }
+
+   /*
+    * Unfortunately the output of cycle counter is not very reliable as it comes
+    * -- sometimes we get outliers (due IRQs perhaps?) which are
+    * better removed to avoid random or biased data.
+    */
+   {
+      double sum = 0.0, sum2 = 0.0;
+      double avg, std;
+      unsigned m;
+
+      for(i = 0; i < n; ++i) {
+         sum += cycles[i];
+         sum2 += cycles[i]*cycles[i];
+      }
+
+      avg = sum/n;
+      std = sqrtf((sum2 - n*avg*avg)/n);
+
+      m = 0;
+      sum = 0.0;
+      for(i = 0; i < n; ++i) {
+         if(fabs(cycles[i] - avg) <= 4.0*std) {
+            sum += cycles[i];
+            ++m;
+         }
+      }
+
+      cycles_avg = sum/m;
+
+   }
+
+   if(verbose >= 1) {
+      fprintf(stdout, " cycles=%.1f", cycles_avg);
+   }
+
+   if(verbose >= 1) {
+      fprintf(stdout, " result=%s\n", success ? "pass" : "fail");
+      fflush(stdout);
+   }
+
+   if(fp)
+      write_tsv_row(fp, src_type, dst_type, cycles_avg, success);
+
+   if (!success) {
+      LLVMDumpModule(module);
+      LLVMWriteBitcodeToFile(module, "conv.bc");
+      fprintf(stderr, "conv.bc written\n");
+      abort();
+   }
+
+   LLVMFreeMachineCodeForFunction(engine, func);
+
+   LLVMDisposeExecutionEngine(engine);
+   if(pass)
+      LLVMDisposePassManager(pass);
+
+   return success;
+}
+
+
+const union lp_type conv_types[] = {
+   /* float, fixed,  sign,  norm, width, len */
+   {{  TRUE, FALSE,  TRUE,  TRUE,    32,   4 }}, /* f32 x 4 */
+   {{ FALSE, FALSE, FALSE,  TRUE,     8,  16 }}, /* u8n x 16 */
+};
+
+
+const unsigned num_types = sizeof(conv_types)/sizeof(conv_types[0]);
+
+
+boolean
+test_all(unsigned verbose, FILE *fp)
+{
+   const union lp_type *src_type;
+   const union lp_type *dst_type;
+   bool success = TRUE;
+
+   for(src_type = conv_types; src_type < &conv_types[1 /* num_types */]; ++src_type) {
+      for(dst_type = conv_types; dst_type < &conv_types[num_types]; ++dst_type) {
+
+         if(src_type == dst_type)
+            continue;
+
+         if(!test_one(verbose, fp, *src_type, *dst_type))
+           success = FALSE;
+
+      }
+   }
+
+   return success;
+}
+
+
+boolean
+test_some(unsigned verbose, FILE *fp, unsigned long n)
+{
+   const union lp_type *src_type;
+   const union lp_type *dst_type;
+   unsigned long i;
+   bool success = TRUE;
+
+   for(i = 0; i < n; ++i) {
+      src_type = &conv_types[0 /* random() % num_types */];
+      
+      do {
+         dst_type = &conv_types[random() % num_types];
+      } while (src_type == dst_type);
+
+      if(!test_one(verbose, fp, *src_type, *dst_type))
+        success = FALSE;
+   }
+
+   return success;
+}
-- 
cgit v1.2.3


From 684a47f0f6a16f9a76844bcd60d8f5fb7e3d1be5 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 7 Aug 2009 14:20:57 +0100
Subject: llvmpipe: Only get the preprocessor flags from llvm-config.

Otherwise we get -O2 optimization flag, preventing proper debugging.
---
 src/gallium/drivers/llvmpipe/SConscript | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 71e4e6a03f..aca4f21b9e 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -2,7 +2,7 @@ Import('*')
 
 env = env.Clone()
 
-env.ParseConfig('llvm-config --cflags')
+env.ParseConfig('llvm-config --cppflags')
 
 llvmpipe = env.ConvenienceLibrary(
 	target = 'llvmpipe',
-- 
cgit v1.2.3


From 2529ed5616b1b152766a3355444260b88184cd6e Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 9 Aug 2009 12:39:38 +0100
Subject: llvmpipe: SoA blending.

Throughput seems to be 4x higher.
---
 src/gallium/drivers/llvmpipe/SConscript         |   3 +-
 src/gallium/drivers/llvmpipe/lp_bld.h           |  11 -
 src/gallium/drivers/llvmpipe/lp_bld_blend.c     | 313 ---------------------
 src/gallium/drivers/llvmpipe/lp_bld_blend.h     |  94 +++++++
 src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c | 341 +++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c | 237 ++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_test_blend.c    | 347 +++++++++++++++++-------
 7 files changed, 918 insertions(+), 428 deletions(-)
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_blend.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_blend.h
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index aca4f21b9e..0a8e6e8fad 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -11,6 +11,8 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_fs_sse.c',
 		'lp_fs_llvm.c',
 		'lp_bld_arit.c',
+		'lp_bld_blend_aos.c',
+		'lp_bld_blend_soa.c',
 		'lp_bld_const.c',
 		'lp_bld_conv.c',
 		'lp_bld_intr.c',
@@ -20,7 +22,6 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_store.c',
 		'lp_bld_loop.c',
 		'lp_bld_logicop.c',
-		'lp_bld_blend.c',
 		'lp_bld_swizzle.c',
 		'lp_bld_type.c',
 		'lp_clear.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld.h b/src/gallium/drivers/llvmpipe/lp_bld.h
index e9d9c25a80..a725cbb474 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld.h
@@ -45,7 +45,6 @@
 #include "pipe/p_format.h"
 
 
-struct pipe_blend_state;
 union lp_type;
 
 
@@ -132,14 +131,4 @@ lp_build_logicop(LLVMBuilderRef builder,
                  LLVMValueRef dst);
 
 
-LLVMValueRef
-lp_build_blend(LLVMBuilderRef builder,
-               const struct pipe_blend_state *blend,
-               union lp_type type,
-               LLVMValueRef src,
-               LLVMValueRef dst,
-               LLVMValueRef const_,
-               unsigned alpha_swizzle);
-
-
 #endif /* !LP_BLD_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.c b/src/gallium/drivers/llvmpipe/lp_bld_blend.c
deleted file mode 100644
index a144469b35..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend.c
+++ /dev/null
@@ -1,313 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/**
- * @file
- * Blend LLVM IR generation.
- *
- * This code is generic -- it should be able to cope both with floating point
- * and integer inputs in AOS form.
- *
- * @author Jose Fonseca <jfonseca@vmware.com>
- */
-
-
-#include "pipe/p_state.h"
-
-#include "lp_bld.h"
-#include "lp_bld_type.h"
-#include "lp_bld_const.h"
-#include "lp_bld_arit.h"
-#include "lp_bld_swizzle.h"
-
-
-/**
- * We may the same values several times, so we keep them here to avoid
- * recomputing them. Also reusing the values allows us to do simplifications
- * that LLVM optimization passes wouldn't normally be able to do.
- */
-struct lp_build_blend_context
-{
-   struct lp_build_context base;
-   
-   LLVMValueRef src;
-   LLVMValueRef dst;
-   LLVMValueRef const_;
-
-   LLVMValueRef inv_src;
-   LLVMValueRef inv_dst;
-   LLVMValueRef inv_const;
-   LLVMValueRef saturate;
-
-   LLVMValueRef rgb_src_factor;
-   LLVMValueRef alpha_src_factor;
-   LLVMValueRef rgb_dst_factor;
-   LLVMValueRef alpha_dst_factor;
-};
-
-
-static LLVMValueRef
-lp_build_blend_factor_unswizzled(struct lp_build_blend_context *bld,
-                                 unsigned factor,
-                                 boolean alpha)
-{
-   switch (factor) {
-   case PIPE_BLENDFACTOR_ZERO:
-      return bld->base.zero;
-   case PIPE_BLENDFACTOR_ONE:
-      return bld->base.one;
-   case PIPE_BLENDFACTOR_SRC_COLOR:
-   case PIPE_BLENDFACTOR_SRC_ALPHA:
-      return bld->src;
-   case PIPE_BLENDFACTOR_DST_COLOR:
-   case PIPE_BLENDFACTOR_DST_ALPHA:
-      return bld->dst;
-   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-      if(alpha)
-         return bld->base.one;
-      else {
-         if(!bld->inv_dst)
-            bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
-         if(!bld->saturate)
-            bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst);
-         return bld->saturate;
-      }
-   case PIPE_BLENDFACTOR_CONST_COLOR:
-   case PIPE_BLENDFACTOR_CONST_ALPHA:
-      return bld->const_;
-   case PIPE_BLENDFACTOR_SRC1_COLOR:
-   case PIPE_BLENDFACTOR_SRC1_ALPHA:
-      /* TODO */
-      assert(0);
-      return bld->base.zero;
-   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
-   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-      if(!bld->inv_src)
-         bld->inv_src = lp_build_comp(&bld->base, bld->src);
-      return bld->inv_src;
-   case PIPE_BLENDFACTOR_INV_DST_COLOR:
-   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
-      if(!bld->inv_dst)
-         bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
-      return bld->inv_dst;
-   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
-   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-      if(!bld->inv_const)
-         bld->inv_const = lp_build_comp(&bld->base, bld->const_);
-      return bld->inv_const;
-   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
-   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
-      /* TODO */
-      assert(0);
-      return bld->base.zero;
-   default:
-      assert(0);
-      return bld->base.zero;
-   }
-}
-
-
-enum lp_build_blend_swizzle {
-   LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
-   LP_BUILD_BLEND_SWIZZLE_AAAA = 1,
-};
-
-
-/**
- * How should we shuffle the base factor.
- */
-static enum lp_build_blend_swizzle
-lp_build_blend_factor_swizzle(unsigned factor)
-{
-   switch (factor) {
-   case PIPE_BLENDFACTOR_ONE:
-   case PIPE_BLENDFACTOR_ZERO:
-   case PIPE_BLENDFACTOR_SRC_COLOR:
-   case PIPE_BLENDFACTOR_DST_COLOR:
-   case PIPE_BLENDFACTOR_CONST_COLOR:
-   case PIPE_BLENDFACTOR_SRC1_COLOR:
-   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
-   case PIPE_BLENDFACTOR_INV_DST_COLOR:
-   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
-   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
-      return LP_BUILD_BLEND_SWIZZLE_RGBA;
-   case PIPE_BLENDFACTOR_SRC_ALPHA:
-   case PIPE_BLENDFACTOR_DST_ALPHA:
-   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-   case PIPE_BLENDFACTOR_SRC1_ALPHA:
-   case PIPE_BLENDFACTOR_CONST_ALPHA:
-   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
-   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
-      return LP_BUILD_BLEND_SWIZZLE_AAAA;
-   default:
-      assert(0);
-      return LP_BUILD_BLEND_SWIZZLE_RGBA;
-   }
-}
-
-
-static LLVMValueRef
-lp_build_blend_swizzle(struct lp_build_blend_context *bld,
-                       LLVMValueRef rgb, 
-                       LLVMValueRef alpha, 
-                       enum lp_build_blend_swizzle rgb_swizzle,
-                       unsigned alpha_swizzle)
-{
-   if(rgb == alpha) {
-      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA)
-         return rgb;
-      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA)
-         return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle);
-   }
-   else {
-      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) {
-         boolean cond[4] = {0, 0, 0, 0};
-         cond[alpha_swizzle] = 1;
-         return lp_build_select_aos(&bld->base, alpha, rgb, cond);
-      }
-      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) {
-         unsigned char swizzle[4];
-         swizzle[0] = alpha_swizzle;
-         swizzle[1] = alpha_swizzle;
-         swizzle[2] = alpha_swizzle;
-         swizzle[3] = alpha_swizzle;
-         swizzle[alpha_swizzle] += 4;
-         return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle);
-      }
-   }
-   assert(0);
-   return bld->base.undef;
-}
-
-
-/**
- * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml
- */
-static LLVMValueRef
-lp_build_blend_factor(struct lp_build_blend_context *bld,
-                      LLVMValueRef factor1,
-                      unsigned rgb_factor,
-                      unsigned alpha_factor,
-                      unsigned alpha_swizzle)
-{
-   LLVMValueRef rgb_factor_;
-   LLVMValueRef alpha_factor_;
-   LLVMValueRef factor2;
-   enum lp_build_blend_swizzle rgb_swizzle;
-
-   rgb_factor_   = lp_build_blend_factor_unswizzled(bld, rgb_factor,   FALSE);
-   alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
-
-   rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
-
-   factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle);
-
-   return lp_build_mul(&bld->base, factor1, factor2);
-}
-
-
-/**
- * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml
- */
-static LLVMValueRef
-lp_build_blend_func(struct lp_build_blend_context *bld,
-                    unsigned func,
-                    LLVMValueRef term1, 
-                    LLVMValueRef term2)
-{
-   switch (func) {
-   case PIPE_BLEND_ADD:
-      return lp_build_add(&bld->base, term1, term2);
-      break;
-   case PIPE_BLEND_SUBTRACT:
-      return lp_build_sub(&bld->base, term1, term2);
-   case PIPE_BLEND_REVERSE_SUBTRACT:
-      return lp_build_sub(&bld->base, term2, term1);
-   case PIPE_BLEND_MIN:
-      return lp_build_min(&bld->base, term1, term2);
-   case PIPE_BLEND_MAX:
-      return lp_build_max(&bld->base, term1, term2);
-   default:
-      assert(0);
-      return bld->base.zero;
-   }
-}
-
-
-LLVMValueRef
-lp_build_blend(LLVMBuilderRef builder,
-               const struct pipe_blend_state *blend,
-               union lp_type type,
-               LLVMValueRef src,
-               LLVMValueRef dst,
-               LLVMValueRef const_,
-               unsigned alpha_swizzle)
-{
-   struct lp_build_blend_context bld;
-   LLVMValueRef src_term;
-   LLVMValueRef dst_term;
-
-   /* It makes no sense to blend unless values are normalized */
-   assert(type.norm);
-
-   /* Setup build context */
-   memset(&bld, 0, sizeof bld);
-   lp_build_context_init(&bld.base, builder, type);
-   bld.src = src;
-   bld.dst = dst;
-   bld.const_ = const_;
-
-   /* TODO: There are still a few optimization oportunities here. For certain
-    * combinations it is possible to reorder the operations and therefor saving
-    * some instructions. */
-
-   src_term = lp_build_blend_factor(&bld, src, blend->rgb_src_factor, blend->alpha_src_factor, alpha_swizzle);
-   dst_term = lp_build_blend_factor(&bld, dst, blend->rgb_dst_factor, blend->alpha_dst_factor, alpha_swizzle);
-
-#ifdef DEBUG
-   LLVMSetValueName(src_term, "src_term");
-   LLVMSetValueName(dst_term, "dst_term");
-#endif
-
-   if(blend->rgb_func == blend->alpha_func) {
-      return lp_build_blend_func(&bld, blend->rgb_func, src_term, dst_term);
-   }
-   else {
-      /* Seperate RGB / A functions */
-
-      LLVMValueRef rgb;
-      LLVMValueRef alpha;
-
-      rgb   = lp_build_blend_func(&bld, blend->rgb_func,   src_term, dst_term);
-      alpha = lp_build_blend_func(&bld, blend->alpha_func, src_term, dst_term);
-
-      return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle);
-   }
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
new file mode 100644
index 0000000000..36f53dae93
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
@@ -0,0 +1,94 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef LP_BLD_BLEND_H
+#define LP_BLD_BLEND_H
+
+
+/**
+ * @file
+ * LLVM IR building helpers interfaces.
+ *
+ * We use LLVM-C bindings for now. They are not documented, but follow the C++
+ * interfaces very closely, and appear to be complete enough for code
+ * genration. See
+ * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html
+ * for a standalone example.
+ */
+
+#include <llvm-c/Core.h>  
+ 
+#include "pipe/p_format.h"
+
+
+struct pipe_blend_state;
+union lp_type;
+struct lp_build_context;
+
+
+/**
+ * Whether the blending function is commutative or not.
+ */
+boolean
+lp_build_blend_func_commutative(unsigned func);
+
+
+/**
+ * Whether the blending functions are the reverse of each other.
+ */
+boolean
+lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func);
+
+
+LLVMValueRef
+lp_build_blend_func(struct lp_build_context *bld,
+                    unsigned func,
+                    LLVMValueRef term1,
+                    LLVMValueRef term2);
+
+
+LLVMValueRef
+lp_build_blend_aos(LLVMBuilderRef builder,
+                   const struct pipe_blend_state *blend,
+                   union lp_type type,
+                   LLVMValueRef src,
+                   LLVMValueRef dst,
+                   LLVMValueRef const_,
+                   unsigned alpha_swizzle);
+
+
+void
+lp_build_blend_soa(LLVMBuilderRef builder,
+                   const struct pipe_blend_state *blend,
+                   union lp_type type,
+                   LLVMValueRef src[4],
+                   LLVMValueRef dst[4],
+                   LLVMValueRef const_[4],
+                   LLVMValueRef res[4]);
+
+
+#endif /* !LP_BLD_BLEND_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
new file mode 100644
index 0000000000..e4a57af94c
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -0,0 +1,341 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Blend LLVM IR generation -- AOS form.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "pipe/p_state.h"
+
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_blend.h"
+
+
+/**
+ * We may the same values several times, so we keep them here to avoid
+ * recomputing them. Also reusing the values allows us to do simplifications
+ * that LLVM optimization passes wouldn't normally be able to do.
+ */
+struct lp_build_blend_aos_context
+{
+   struct lp_build_context base;
+   
+   LLVMValueRef src;
+   LLVMValueRef dst;
+   LLVMValueRef const_;
+
+   LLVMValueRef inv_src;
+   LLVMValueRef inv_dst;
+   LLVMValueRef inv_const;
+   LLVMValueRef saturate;
+
+   LLVMValueRef rgb_src_factor;
+   LLVMValueRef alpha_src_factor;
+   LLVMValueRef rgb_dst_factor;
+   LLVMValueRef alpha_dst_factor;
+};
+
+
+static LLVMValueRef
+lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
+                                 unsigned factor,
+                                 boolean alpha)
+{
+   switch (factor) {
+   case PIPE_BLENDFACTOR_ZERO:
+      return bld->base.zero;
+   case PIPE_BLENDFACTOR_ONE:
+      return bld->base.one;
+   case PIPE_BLENDFACTOR_SRC_COLOR:
+   case PIPE_BLENDFACTOR_SRC_ALPHA:
+      return bld->src;
+   case PIPE_BLENDFACTOR_DST_COLOR:
+   case PIPE_BLENDFACTOR_DST_ALPHA:
+      return bld->dst;
+   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+      if(alpha)
+         return bld->base.one;
+      else {
+         if(!bld->inv_dst)
+            bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
+         if(!bld->saturate)
+            bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst);
+         return bld->saturate;
+      }
+   case PIPE_BLENDFACTOR_CONST_COLOR:
+   case PIPE_BLENDFACTOR_CONST_ALPHA:
+      return bld->const_;
+   case PIPE_BLENDFACTOR_SRC1_COLOR:
+   case PIPE_BLENDFACTOR_SRC1_ALPHA:
+      /* TODO */
+      assert(0);
+      return bld->base.zero;
+   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+      if(!bld->inv_src)
+         bld->inv_src = lp_build_comp(&bld->base, bld->src);
+      return bld->inv_src;
+   case PIPE_BLENDFACTOR_INV_DST_COLOR:
+   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+      if(!bld->inv_dst)
+         bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
+      return bld->inv_dst;
+   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+      if(!bld->inv_const)
+         bld->inv_const = lp_build_comp(&bld->base, bld->const_);
+      return bld->inv_const;
+   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+      /* TODO */
+      assert(0);
+      return bld->base.zero;
+   default:
+      assert(0);
+      return bld->base.zero;
+   }
+}
+
+
+enum lp_build_blend_swizzle {
+   LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
+   LP_BUILD_BLEND_SWIZZLE_AAAA = 1,
+};
+
+
+/**
+ * How should we shuffle the base factor.
+ */
+static enum lp_build_blend_swizzle
+lp_build_blend_factor_swizzle(unsigned factor)
+{
+   switch (factor) {
+   case PIPE_BLENDFACTOR_ONE:
+   case PIPE_BLENDFACTOR_ZERO:
+   case PIPE_BLENDFACTOR_SRC_COLOR:
+   case PIPE_BLENDFACTOR_DST_COLOR:
+   case PIPE_BLENDFACTOR_CONST_COLOR:
+   case PIPE_BLENDFACTOR_SRC1_COLOR:
+   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+   case PIPE_BLENDFACTOR_INV_DST_COLOR:
+   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+      return LP_BUILD_BLEND_SWIZZLE_RGBA;
+   case PIPE_BLENDFACTOR_SRC_ALPHA:
+   case PIPE_BLENDFACTOR_DST_ALPHA:
+   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+   case PIPE_BLENDFACTOR_SRC1_ALPHA:
+   case PIPE_BLENDFACTOR_CONST_ALPHA:
+   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+      return LP_BUILD_BLEND_SWIZZLE_AAAA;
+   default:
+      assert(0);
+      return LP_BUILD_BLEND_SWIZZLE_RGBA;
+   }
+}
+
+
+static LLVMValueRef
+lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
+                       LLVMValueRef rgb, 
+                       LLVMValueRef alpha, 
+                       enum lp_build_blend_swizzle rgb_swizzle,
+                       unsigned alpha_swizzle)
+{
+   if(rgb == alpha) {
+      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA)
+         return rgb;
+      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA)
+         return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle);
+   }
+   else {
+      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) {
+         boolean cond[4] = {0, 0, 0, 0};
+         cond[alpha_swizzle] = 1;
+         return lp_build_select_aos(&bld->base, alpha, rgb, cond);
+      }
+      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) {
+         unsigned char swizzle[4];
+         swizzle[0] = alpha_swizzle;
+         swizzle[1] = alpha_swizzle;
+         swizzle[2] = alpha_swizzle;
+         swizzle[3] = alpha_swizzle;
+         swizzle[alpha_swizzle] += 4;
+         return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle);
+      }
+   }
+   assert(0);
+   return bld->base.undef;
+}
+
+
+/**
+ * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml
+ */
+static LLVMValueRef
+lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
+                      LLVMValueRef factor1,
+                      unsigned rgb_factor,
+                      unsigned alpha_factor,
+                      unsigned alpha_swizzle)
+{
+   LLVMValueRef rgb_factor_;
+   LLVMValueRef alpha_factor_;
+   LLVMValueRef factor2;
+   enum lp_build_blend_swizzle rgb_swizzle;
+
+   rgb_factor_   = lp_build_blend_factor_unswizzled(bld, rgb_factor,   FALSE);
+   alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
+
+   rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
+
+   factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle);
+
+   return lp_build_mul(&bld->base, factor1, factor2);
+}
+
+
+boolean
+lp_build_blend_func_commutative(unsigned func)
+{
+   switch (func) {
+   case PIPE_BLEND_ADD:
+   case PIPE_BLEND_MIN:
+   case PIPE_BLEND_MAX:
+      return TRUE;
+   case PIPE_BLEND_SUBTRACT:
+   case PIPE_BLEND_REVERSE_SUBTRACT:
+      return FALSE;
+   default:
+      assert(0);
+      return TRUE;
+   }
+}
+
+
+boolean
+lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func)
+{
+   if(rgb_func == alpha_func)
+      return FALSE;
+   if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT)
+      return TRUE;
+   if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT)
+      return TRUE;
+   return FALSE;
+}
+
+
+/**
+ * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml
+ */
+LLVMValueRef
+lp_build_blend_func(struct lp_build_context *bld,
+                    unsigned func,
+                    LLVMValueRef term1, 
+                    LLVMValueRef term2)
+{
+   switch (func) {
+   case PIPE_BLEND_ADD:
+      return lp_build_add(bld, term1, term2);
+      break;
+   case PIPE_BLEND_SUBTRACT:
+      return lp_build_sub(bld, term1, term2);
+   case PIPE_BLEND_REVERSE_SUBTRACT:
+      return lp_build_sub(bld, term2, term1);
+   case PIPE_BLEND_MIN:
+      return lp_build_min(bld, term1, term2);
+   case PIPE_BLEND_MAX:
+      return lp_build_max(bld, term1, term2);
+   default:
+      assert(0);
+      return bld->zero;
+   }
+}
+
+
+LLVMValueRef
+lp_build_blend_aos(LLVMBuilderRef builder,
+                   const struct pipe_blend_state *blend,
+                   union lp_type type,
+                   LLVMValueRef src,
+                   LLVMValueRef dst,
+                   LLVMValueRef const_,
+                   unsigned alpha_swizzle)
+{
+   struct lp_build_blend_aos_context bld;
+   LLVMValueRef src_term;
+   LLVMValueRef dst_term;
+
+   /* It makes no sense to blend unless values are normalized */
+   assert(type.norm);
+
+   /* Setup build context */
+   memset(&bld, 0, sizeof bld);
+   lp_build_context_init(&bld.base, builder, type);
+   bld.src = src;
+   bld.dst = dst;
+   bld.const_ = const_;
+
+   /* TODO: There are still a few optimization opportunities here. For certain
+    * combinations it is possible to reorder the operations and therefore saving
+    * some instructions. */
+
+   src_term = lp_build_blend_factor(&bld, src, blend->rgb_src_factor, blend->alpha_src_factor, alpha_swizzle);
+   dst_term = lp_build_blend_factor(&bld, dst, blend->rgb_dst_factor, blend->alpha_dst_factor, alpha_swizzle);
+
+#ifdef DEBUG
+   LLVMSetValueName(src_term, "src_term");
+   LLVMSetValueName(dst_term, "dst_term");
+#endif
+
+   if(blend->rgb_func == blend->alpha_func) {
+      return lp_build_blend_func(&bld.base, blend->rgb_func, src_term, dst_term);
+   }
+   else {
+      /* Seperate RGB / A functions */
+
+      LLVMValueRef rgb;
+      LLVMValueRef alpha;
+
+      rgb   = lp_build_blend_func(&bld.base, blend->rgb_func,   src_term, dst_term);
+      alpha = lp_build_blend_func(&bld.base, blend->alpha_func, src_term, dst_term);
+
+      return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle);
+   }
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
new file mode 100644
index 0000000000..1ef1718cfd
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
@@ -0,0 +1,237 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Blend LLVM IR generation -- SoA.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "pipe/p_state.h"
+
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_blend.h"
+
+
+/**
+ * We may the same values several times, so we keep them here to avoid
+ * recomputing them. Also reusing the values allows us to do simplifications
+ * that LLVM optimization passes wouldn't normally be able to do.
+ */
+struct lp_build_blend_soa_context
+{
+   struct lp_build_context base;
+
+   LLVMValueRef src[4];
+   LLVMValueRef dst[4];
+   LLVMValueRef con[4];
+
+   LLVMValueRef inv_src[4];
+   LLVMValueRef inv_dst[4];
+   LLVMValueRef inv_con[4];
+
+   LLVMValueRef src_alpha_saturate;
+
+   /**
+    * We store all factors in a table in order to eliminate redundant
+    * multiplications later.
+    */
+   LLVMValueRef factor[2][8];
+
+   /**
+    * Table with all terms.
+    */
+   LLVMValueRef term[8];
+};
+
+
+static LLVMValueRef
+lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld,
+                          unsigned factor, unsigned i)
+{
+   /*
+    * Compute src/first term RGB
+    */
+   switch (factor) {
+   case PIPE_BLENDFACTOR_ONE:
+      return bld->base.one;
+   case PIPE_BLENDFACTOR_SRC_COLOR:
+      return bld->src[i];
+   case PIPE_BLENDFACTOR_SRC_ALPHA:
+      return bld->src[3];
+   case PIPE_BLENDFACTOR_DST_COLOR:
+      return bld->dst[i];
+   case PIPE_BLENDFACTOR_DST_ALPHA:
+      return bld->dst[3];
+   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+      if(i == 3)
+         return bld->base.one;
+      else {
+         if(!bld->inv_dst[3])
+            bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]);
+         if(!bld->src_alpha_saturate)
+            bld->src_alpha_saturate = lp_build_min(&bld->base, bld->src[3], bld->inv_dst[3]);
+         return bld->src_alpha_saturate;
+      }
+   case PIPE_BLENDFACTOR_CONST_COLOR:
+      return bld->con[i];
+   case PIPE_BLENDFACTOR_CONST_ALPHA:
+      return bld->con[3];
+   case PIPE_BLENDFACTOR_SRC1_COLOR:
+      /* TODO */
+      assert(0);
+      return bld->base.zero;
+   case PIPE_BLENDFACTOR_SRC1_ALPHA:
+      /* TODO */
+      assert(0);
+      return bld->base.zero;
+   case PIPE_BLENDFACTOR_ZERO:
+      return bld->base.zero;
+   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+      if(!bld->inv_src[i])
+         bld->inv_src[i] = lp_build_comp(&bld->base, bld->src[i]);
+      return bld->inv_src[i];
+   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+      if(!bld->inv_src[3])
+         bld->inv_src[3] = lp_build_comp(&bld->base, bld->src[3]);
+      return bld->inv_src[3];
+   case PIPE_BLENDFACTOR_INV_DST_COLOR:
+      if(!bld->inv_dst[i])
+         bld->inv_dst[i] = lp_build_comp(&bld->base, bld->dst[i]);
+      return bld->inv_dst[i];
+   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+      if(!bld->inv_dst[3])
+         bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]);
+      return bld->inv_dst[3];
+   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+      if(!bld->inv_con[i])
+         bld->inv_con[i] = lp_build_comp(&bld->base, bld->con[i]);
+      return bld->inv_con[i];
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+      if(!bld->inv_con[3])
+         bld->inv_con[3] = lp_build_comp(&bld->base, bld->con[3]);
+      return bld->inv_con[3];
+   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+      /* TODO */
+      assert(0);
+      return bld->base.zero;
+   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+      /* TODO */
+      assert(0);
+      return bld->base.zero;
+   default:
+      assert(0);
+      return bld->base.zero;
+   }
+}
+
+
+void
+lp_build_blend_soa(LLVMBuilderRef builder,
+                   const struct pipe_blend_state *blend,
+                   union lp_type type,
+                   LLVMValueRef src[4],
+                   LLVMValueRef dst[4],
+                   LLVMValueRef con[4],
+                   LLVMValueRef res[4])
+{
+   struct lp_build_blend_soa_context bld;
+   unsigned i, j;
+
+   /* Setup build context */
+   memset(&bld, 0, sizeof bld);
+   lp_build_context_init(&bld.base, builder, type);
+   for (i = 0; i < 4; ++i) {
+      bld.src[i] = src[i];
+      bld.dst[i] = dst[i];
+      bld.con[i] = con[i];
+   }
+
+   /*
+    * Compute src/dst factors.
+    */
+   for (i = 0; i < 4; ++i) {
+      unsigned src_factor = i < 3 ? blend->rgb_src_factor : blend->alpha_src_factor;
+      unsigned dst_factor = i < 3 ? blend->rgb_dst_factor : blend->alpha_dst_factor;
+      bld.factor[0][0 + i] = src[i];
+      bld.factor[1][0 + i] = lp_build_blend_soa_factor(&bld, src_factor, i);
+      bld.factor[0][4 + i] = dst[i];
+      bld.factor[1][4 + i] = lp_build_blend_soa_factor(&bld, dst_factor, i);
+   }
+
+   /*
+    * Compute src/dst terms
+    */
+   for (i = 0; i < 8; ++i) {
+
+      /* See if this multiplication has been previously computed */
+      for(j = 0; j < i; ++j) {
+         if((bld.factor[0][j] == bld.factor[0][i] &&
+             bld.factor[1][j] == bld.factor[1][i]) ||
+            (bld.factor[0][j] == bld.factor[1][i] &&
+             bld.factor[1][j] == bld.factor[0][i]))
+            break;
+      }
+
+      if(j < i)
+         bld.term[i] = bld.term[j];
+      else
+         bld.term[i] = lp_build_mul(&bld.base, bld.factor[0][i], bld.factor[1][i]);
+   }
+
+   /*
+    * Combine terms
+    */
+   for (i = 0; i < 4; ++i) {
+      unsigned func = i < 3 ? blend->rgb_func : blend->alpha_func;
+      boolean func_commutative = lp_build_blend_func_commutative(func);
+
+      /* See if this function has been previously applied */
+      for(j = 0; j < i; ++j) {
+         unsigned prev_func = j < 3 ? blend->rgb_func : blend->alpha_func;
+         unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func);
+
+         if((!func_reverse &&
+             bld.factor[0 + j] == bld.factor[0 + i] &&
+             bld.factor[4 + j] == bld.factor[4 + i]) ||
+            ((func_commutative || func_reverse) &&
+             bld.factor[0 + j] == bld.factor[4 + i] &&
+             bld.factor[4 + j] == bld.factor[0 + i]))
+            break;
+      }
+
+      if(j < i)
+         res[i] = res[j];
+      else
+         res[i] = lp_build_blend_func(&bld.base, func, bld.term[i + 0], bld.term[i + 4]);
+   }
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index c8901fea98..8bf5508bd4 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -37,12 +37,19 @@
  */
 
 
-#include "lp_bld.h"
 #include "lp_bld_type.h"
 #include "lp_bld_arit.h"
+#include "lp_bld_blend.h"
 #include "lp_test.h"
 
 
+enum vector_mode
+{
+   AoS = 0,
+   SoA = 1
+};
+
+
 typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res);
 
 
@@ -52,6 +59,7 @@ write_tsv_header(FILE *fp)
    fprintf(fp,
            "result\t"
            "cycles_per_channel\t"
+           "mode\t"
            "type\t"
            "sep_func\t"
            "sep_src_factor\t"
@@ -70,13 +78,22 @@ write_tsv_header(FILE *fp)
 static void
 write_tsv_row(FILE *fp,
               const struct pipe_blend_state *blend,
+              enum vector_mode mode,
               union lp_type type,
               double cycles,
               boolean success)
 {
    fprintf(fp, "%s\t", success ? "pass" : "fail");
 
-   fprintf(fp, "%.1f\t", cycles / type.length);
+   if (mode == AoS) {
+      fprintf(fp, "%.1f\t", cycles / type.length);
+      fprintf(fp, "aos\t");
+   }
+
+   if (mode == SoA) {
+      fprintf(fp, "%.1f\t", cycles / (4 * type.length));
+      fprintf(fp, "soa\t");
+   }
 
    fprintf(fp, "%s%u%sx%u\t",
            type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
@@ -106,10 +123,19 @@ write_tsv_row(FILE *fp,
 static void
 dump_blend_type(FILE *fp,
                 const struct pipe_blend_state *blend,
+                enum vector_mode mode,
                 union lp_type type)
 {
+   fprintf(fp, "%s", mode ? "soa" : "aos");
+
+   fprintf(fp, " type=%s%u%sx%u",
+           type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
+           type.width,
+           type.norm ? "n" : "",
+           type.length);
+
    fprintf(fp,
-           "%s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
+           " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
            "rgb_func",         debug_dump_blend_func(blend->rgb_func, TRUE),
            "rgb_src_factor",   debug_dump_blend_factor(blend->rgb_src_factor, TRUE),
            "rgb_dst_factor",   debug_dump_blend_factor(blend->rgb_dst_factor, TRUE),
@@ -117,12 +143,6 @@ dump_blend_type(FILE *fp,
            "alpha_src_factor", debug_dump_blend_factor(blend->alpha_src_factor, TRUE),
            "alpha_dst_factor", debug_dump_blend_factor(blend->alpha_dst_factor, TRUE));
 
-   fprintf(fp, " type=%s%u%sx%u",
-           type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
-           type.width,
-           type.norm ? "n" : "",
-           type.length);
-
    fprintf(fp, " ...\n");
    fflush(fp);
 }
@@ -131,6 +151,7 @@ dump_blend_type(FILE *fp,
 static LLVMValueRef
 add_blend_test(LLVMModuleRef module,
                const struct pipe_blend_state *blend,
+               enum vector_mode mode,
                union lp_type type)
 {
    LLVMTypeRef ret_type;
@@ -143,10 +164,6 @@ add_blend_test(LLVMModuleRef module,
    LLVMValueRef res_ptr;
    LLVMBasicBlockRef block;
    LLVMBuilderRef builder;
-   LLVMValueRef src;
-   LLVMValueRef dst;
-   LLVMValueRef con;
-   LLVMValueRef res;
 
    ret_type = LLVMInt64Type();
    vec_type = lp_build_vec_type(type);
@@ -163,15 +180,51 @@ add_blend_test(LLVMModuleRef module,
    builder = LLVMCreateBuilder();
    LLVMPositionBuilderAtEnd(builder, block);
 
-   src = LLVMBuildLoad(builder, src_ptr, "src");
-   dst = LLVMBuildLoad(builder, dst_ptr, "dst");
-   con = LLVMBuildLoad(builder, const_ptr, "const");
+   if (mode == AoS) {
+      LLVMValueRef src;
+      LLVMValueRef dst;
+      LLVMValueRef con;
+      LLVMValueRef res;
+
+      src = LLVMBuildLoad(builder, src_ptr, "src");
+      dst = LLVMBuildLoad(builder, dst_ptr, "dst");
+      con = LLVMBuildLoad(builder, const_ptr, "const");
 
-   res = lp_build_blend(builder, blend, type, src, dst, con, 3);
+      res = lp_build_blend_aos(builder, blend, type, src, dst, con, 3);
 
-   LLVMSetValueName(res, "res");
+      LLVMSetValueName(res, "res");
 
-   LLVMBuildStore(builder, res, res_ptr);
+      LLVMBuildStore(builder, res, res_ptr);
+   }
+
+   if (mode == SoA) {
+      LLVMValueRef src[4];
+      LLVMValueRef dst[4];
+      LLVMValueRef con[4];
+      LLVMValueRef res[4];
+      char src_name[5] = "src?";
+      char dst_name[5] = "dst?";
+      char con_name[5] = "con?";
+      char res_name[5] = "res?";
+      unsigned i;
+
+      for(i = 0; i < 4; ++i) {
+         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+         con_name[3] = dst_name[3] = src_name[3] = "rgba"[i];
+         src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), src_name);
+         dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), dst_name);
+         con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), con_name);
+      }
+
+      lp_build_blend_soa(builder, blend, type, src, dst, con, res);
+
+      for(i = 0; i < 4; ++i) {
+         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+         res_name[3] = "rgba"[i];
+         LLVMSetValueName(res[i], res_name);
+         LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
+      }
+   }
 
    LLVMBuildRetVoid(builder);;
 
@@ -415,6 +468,7 @@ static boolean
 test_one(unsigned verbose,
          FILE *fp,
          const struct pipe_blend_state *blend,
+         enum vector_mode mode,
          union lp_type type)
 {
    LLVMModuleRef module = NULL;
@@ -431,11 +485,11 @@ test_one(unsigned verbose,
    unsigned i, j;
 
    if(verbose >= 1)
-      dump_blend_type(stdout, blend, type);
+      dump_blend_type(stdout, blend, mode, type);
 
    module = LLVMModuleCreateWithName("test");
 
-   func = add_blend_test(module, blend, type);
+   func = add_blend_test(module, blend, mode, type);
 
    if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
       LLVMDumpModule(module);
@@ -446,7 +500,7 @@ test_one(unsigned verbose,
    provider = LLVMCreateModuleProviderForExistingModule(module);
    if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
       if(verbose < 1)
-         dump_blend_type(stderr, blend, type);
+         dump_blend_type(stderr, blend, mode, type);
       fprintf(stderr, "%s\n", error);
       LLVMDisposeMessage(error);
       abort();
@@ -474,66 +528,148 @@ test_one(unsigned verbose,
 
    success = TRUE;
    for(i = 0; i < n && success; ++i) {
-      uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
-      uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
-      uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
-      uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
-      uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
-      int64_t start_counter = 0;
-      int64_t end_counter = 0;
-
-      random_vec(type, src);
-      random_vec(type, dst);
-      random_vec(type, con);
-
-      {
-         double fsrc[LP_MAX_VECTOR_LENGTH];
-         double fdst[LP_MAX_VECTOR_LENGTH];
-         double fcon[LP_MAX_VECTOR_LENGTH];
-         double fref[LP_MAX_VECTOR_LENGTH];
-
-         read_vec(type, src, fsrc);
-         read_vec(type, dst, fdst);
-         read_vec(type, con, fcon);
-
-         for(j = 0; j < type.length; j += 4)
-            compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
-
-         write_vec(type, ref, fref);
+      if(mode == AoS) {
+         uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+         uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+         uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+         uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+         uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+         int64_t start_counter = 0;
+         int64_t end_counter = 0;
+
+         random_vec(type, src);
+         random_vec(type, dst);
+         random_vec(type, con);
+
+         {
+            double fsrc[LP_MAX_VECTOR_LENGTH];
+            double fdst[LP_MAX_VECTOR_LENGTH];
+            double fcon[LP_MAX_VECTOR_LENGTH];
+            double fref[LP_MAX_VECTOR_LENGTH];
+
+            read_vec(type, src, fsrc);
+            read_vec(type, dst, fdst);
+            read_vec(type, con, fcon);
+
+            for(j = 0; j < type.length; j += 4)
+               compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
+
+            write_vec(type, ref, fref);
+         }
+
+         start_counter = rdtsc();
+         blend_test_ptr(src, dst, con, res);
+         end_counter = rdtsc();
+
+         cycles[i] = end_counter - start_counter;
+
+         if(!compare_vec(type, res, ref)) {
+            success = FALSE;
+
+            if(verbose < 1)
+               dump_blend_type(stderr, blend, mode, type);
+            fprintf(stderr, "MISMATCH\n");
+
+            fprintf(stderr, "  Src: ");
+            dump_vec(stderr, type, src);
+            fprintf(stderr, "\n");
+
+            fprintf(stderr, "  Dst: ");
+            dump_vec(stderr, type, dst);
+            fprintf(stderr, "\n");
+
+            fprintf(stderr, "  Con: ");
+            dump_vec(stderr, type, con);
+            fprintf(stderr, "\n");
+
+            fprintf(stderr, "  Res: ");
+            dump_vec(stderr, type, res);
+            fprintf(stderr, "\n");
+
+            fprintf(stderr, "  Ref: ");
+            dump_vec(stderr, type, ref);
+            fprintf(stderr, "\n");
+         }
       }
 
-      start_counter = rdtsc();
-      blend_test_ptr(src, dst, con, res);
-      end_counter = rdtsc();
+      if(mode == SoA) {
+         const unsigned stride = type.length*type.width/8;
+         uint8_t src[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+         uint8_t dst[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+         uint8_t con[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+         uint8_t res[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+         uint8_t ref[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+         int64_t start_counter = 0;
+         int64_t end_counter = 0;
+         boolean mismatch;
+
+         for(j = 0; j < 4; ++j) {
+            random_vec(type, src + j*stride);
+            random_vec(type, dst + j*stride);
+            random_vec(type, con + j*stride);
+         }
 
-      cycles[i] = end_counter - start_counter;
+         {
+            double fsrc[4];
+            double fdst[4];
+            double fcon[4];
+            double fref[4];
+            unsigned k;
+
+            for(k = 0; k < type.length; ++k) {
+               for(j = 0; j < 4; ++j) {
+                  fsrc[j] = read_elem(type, src + j*stride, k);
+                  fdst[j] = read_elem(type, dst + j*stride, k);
+                  fcon[j] = read_elem(type, con + j*stride, k);
+               }
 
-      success = compare_vec(type, res, ref);
+               compute_blend_ref(blend, fsrc, fdst, fcon, fref);
 
-      if (!success) {
-         if(verbose < 1)
-            dump_blend_type(stderr, blend, type);
-         fprintf(stderr, "MISMATCH\n");
+               for(j = 0; j < 4; ++j)
+                  write_elem(type, ref + j*stride, k, fref[j]);
+            }
+         }
+
+         start_counter = rdtsc();
+         blend_test_ptr(src, dst, con, res);
+         end_counter = rdtsc();
+
+         cycles[i] = end_counter - start_counter;
+
+         mismatch = FALSE;
+         for (j = 0; j < 4; ++j)
+            if(!compare_vec(type, res + j*stride, ref + j*stride))
+               mismatch = TRUE;
 
-         fprintf(stderr, "  Src: ");
-         dump_vec(stderr, type, src);
-         fprintf(stderr, "\n");
+         if (mismatch) {
+            success = FALSE;
 
-         fprintf(stderr, "  Dst: ");
-         dump_vec(stderr, type, dst);
-         fprintf(stderr, "\n");
+            if(verbose < 1)
+               dump_blend_type(stderr, blend, mode, type);
+            fprintf(stderr, "MISMATCH\n");
+            for(j = 0; j < 4; ++j) {
+               char channel = "RGBA"[j];
+               fprintf(stderr, "  Src%c: ", channel);
+               dump_vec(stderr, type, src + j*stride);
+               fprintf(stderr, "\n");
 
-         fprintf(stderr, "  Con: ");
-         dump_vec(stderr, type, con);
-         fprintf(stderr, "\n");
+               fprintf(stderr, "  Dst%c: ", channel);
+               dump_vec(stderr, type, dst + j*stride);
+               fprintf(stderr, "\n");
 
-         fprintf(stderr, "  Res: ");
-         dump_vec(stderr, type, res);
-         fprintf(stderr, "\n");
+               fprintf(stderr, "  Con%c: ", channel);
+               dump_vec(stderr, type, con + j*stride);
+               fprintf(stderr, "\n");
 
-         fprintf(stderr, "  Ref: ");
-         dump_vec(stderr, type, ref);
-         fprintf(stderr, "\n");
+               fprintf(stderr, "  Res%c: ", channel);
+               dump_vec(stderr, type, res + j*stride);
+               fprintf(stderr, "\n");
+
+               fprintf(stderr, "  Ref%c: ", channel);
+               dump_vec(stderr, type, ref + j*stride);
+               fprintf(stderr, "\n");
+            }
+         }
       }
    }
 
@@ -569,7 +705,7 @@ test_one(unsigned verbose,
    }
 
    if(fp)
-      write_tsv_row(fp, blend, type, cycles_avg, success);
+      write_tsv_row(fp, blend, mode, type, cycles_avg, success);
 
    if (!success) {
       if(verbose < 2)
@@ -650,6 +786,7 @@ test_all(unsigned verbose, FILE *fp)
    const unsigned *alpha_src_factor;
    const unsigned *alpha_dst_factor;
    struct pipe_blend_state blend;
+   enum vector_mode mode;
    const union lp_type *type;
    bool success = TRUE;
 
@@ -659,24 +796,26 @@ test_all(unsigned verbose, FILE *fp)
             for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) {
                for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) {
                   for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) {
-                     for(type = blend_types; type < &blend_types[num_types]; ++type) {
-
-                        if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
-                           *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
-                           continue;
-
-                        memset(&blend, 0, sizeof blend);
-                        blend.blend_enable      = 1;
-                        blend.rgb_func          = *rgb_func;
-                        blend.rgb_src_factor    = *rgb_src_factor;
-                        blend.rgb_dst_factor    = *rgb_dst_factor;
-                        blend.alpha_func        = *alpha_func;
-                        blend.alpha_src_factor  = *alpha_src_factor;
-                        blend.alpha_dst_factor  = *alpha_dst_factor;
-
-                        if(!test_one(verbose, fp, &blend, *type))
-                          success = FALSE;
-
+                     for(mode = 0; mode < 2; ++mode) {
+                        for(type = blend_types; type < &blend_types[num_types]; ++type) {
+
+                           if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+                              *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
+                              continue;
+
+                           memset(&blend, 0, sizeof blend);
+                           blend.blend_enable      = 1;
+                           blend.rgb_func          = *rgb_func;
+                           blend.rgb_src_factor    = *rgb_src_factor;
+                           blend.rgb_dst_factor    = *rgb_dst_factor;
+                           blend.alpha_func        = *alpha_func;
+                           blend.alpha_src_factor  = *alpha_src_factor;
+                           blend.alpha_dst_factor  = *alpha_dst_factor;
+
+                           if(!test_one(verbose, fp, &blend, mode, *type))
+                             success = FALSE;
+
+                        }
                      }
                   }
                }
@@ -699,6 +838,7 @@ test_some(unsigned verbose, FILE *fp, unsigned long n)
    const unsigned *alpha_src_factor;
    const unsigned *alpha_dst_factor;
    struct pipe_blend_state blend;
+   enum vector_mode mode;
    const union lp_type *type;
    unsigned long i;
    bool success = TRUE;
@@ -717,20 +857,21 @@ test_some(unsigned verbose, FILE *fp, unsigned long n)
          alpha_dst_factor = &blend_factors[random() % num_factors];
       } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
 
-      for(type = blend_types; type < &blend_types[num_types]; ++type) {
+      mode = random() & 1;
 
-         memset(&blend, 0, sizeof blend);
-         blend.blend_enable      = 1;
-         blend.rgb_func          = *rgb_func;
-         blend.rgb_src_factor    = *rgb_src_factor;
-         blend.rgb_dst_factor    = *rgb_dst_factor;
-         blend.alpha_func        = *alpha_func;
-         blend.alpha_src_factor  = *alpha_src_factor;
-         blend.alpha_dst_factor  = *alpha_dst_factor;
+      type = &blend_types[random() % num_types];
 
-         if(!test_one(verbose, fp, &blend, *type))
-           success = FALSE;
-      }
+      memset(&blend, 0, sizeof blend);
+      blend.blend_enable      = 1;
+      blend.rgb_func          = *rgb_func;
+      blend.rgb_src_factor    = *rgb_src_factor;
+      blend.rgb_dst_factor    = *rgb_dst_factor;
+      blend.alpha_func        = *alpha_func;
+      blend.alpha_src_factor  = *alpha_src_factor;
+      blend.alpha_dst_factor  = *alpha_dst_factor;
+
+      if(!test_one(verbose, fp, &blend, mode, *type))
+        success = FALSE;
    }
 
    return success;
-- 
cgit v1.2.3


From 0318f3e53eed88f0feea6e7a4fd8a8d9becc9774 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 9 Aug 2009 17:22:01 +0100
Subject: llvmpipe: Split the texture cache from the color/depth/stencil cache.

---
 src/gallium/drivers/llvmpipe/SConscript         |   1 +
 src/gallium/drivers/llvmpipe/lp_context.c       |   5 +-
 src/gallium/drivers/llvmpipe/lp_context.h       |   3 +-
 src/gallium/drivers/llvmpipe/lp_flush.c         |   3 +-
 src/gallium/drivers/llvmpipe/lp_state_derived.c |   3 +-
 src/gallium/drivers/llvmpipe/lp_state_sampler.c |   4 +-
 src/gallium/drivers/llvmpipe/lp_tex_cache.c     | 332 ++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_tex_cache.h     | 154 +++++++++++
 src/gallium/drivers/llvmpipe/lp_tex_sample.c    |  20 +-
 src/gallium/drivers/llvmpipe/lp_tex_sample.h    |   5 +-
 src/gallium/drivers/llvmpipe/lp_texture.c       |   2 +-
 src/gallium/drivers/llvmpipe/lp_tile_cache.c    | 127 ---------
 src/gallium/drivers/llvmpipe/lp_tile_cache.h    |  22 --
 13 files changed, 513 insertions(+), 168 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_tex_cache.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_tex_cache.h

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 0a8e6e8fad..8565c7e011 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -47,6 +47,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_state_surface.c',
 		'lp_state_vertex.c',
 		'lp_surface.c',
+		'lp_tex_cache.c',
 		'lp_tex_sample.c',
 		'lp_texture.c',
 		'lp_tile_cache.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 7f2c2b6acd..a30db444d4 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -42,6 +42,7 @@
 #include "lp_state.h"
 #include "lp_surface.h"
 #include "lp_tile_cache.h"
+#include "lp_tex_cache.h"
 #include "lp_texture.h"
 #include "lp_winsys.h"
 #include "lp_query.h"
@@ -97,7 +98,7 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
    lp_destroy_tile_cache(llvmpipe->zsbuf_cache);
 
    for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
-      lp_destroy_tile_cache(llvmpipe->tex_cache[i]);
+      lp_destroy_tex_tile_cache(llvmpipe->tex_cache[i]);
 
    for (i = 0; i < Elements(llvmpipe->constants); i++) {
       if (llvmpipe->constants[i].buffer) {
@@ -220,7 +221,7 @@ llvmpipe_create( struct pipe_screen *screen )
    llvmpipe->zsbuf_cache = lp_create_tile_cache( screen );
 
    for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
-      llvmpipe->tex_cache[i] = lp_create_tile_cache( screen );
+      llvmpipe->tex_cache[i] = lp_create_tex_tile_cache( screen );
 
 
    /* setup quad rendering stages */
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 036585808d..7b5da6ee91 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -43,6 +43,7 @@ struct llvmpipe_vbuf_render;
 struct draw_context;
 struct draw_stage;
 struct llvmpipe_tile_cache;
+struct llvmpipe_tex_tile_cache;
 struct lp_fragment_shader;
 struct lp_vertex_shader;
 
@@ -141,7 +142,7 @@ struct llvmpipe_context {
    struct llvmpipe_tile_cache *zsbuf_cache;
    
    unsigned tex_timestamp;
-   struct llvmpipe_tile_cache *tex_cache[PIPE_MAX_SAMPLERS];
+   struct llvmpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS];
 
    unsigned use_sse : 1;
    unsigned dump_fs : 1;
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c
index 866d4fb099..f3b43cfce9 100644
--- a/src/gallium/drivers/llvmpipe/lp_flush.c
+++ b/src/gallium/drivers/llvmpipe/lp_flush.c
@@ -37,6 +37,7 @@
 #include "lp_surface.h"
 #include "lp_state.h"
 #include "lp_tile_cache.h"
+#include "lp_tex_cache.h"
 #include "lp_winsys.h"
 
 
@@ -52,7 +53,7 @@ llvmpipe_flush( struct pipe_context *pipe,
 
    if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
       for (i = 0; i < llvmpipe->num_textures; i++) {
-         lp_flush_tile_cache(llvmpipe->tex_cache[i]);
+         lp_flush_tex_tile_cache(llvmpipe->tex_cache[i]);
       }
    }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index a2bf27cc67..79861b2d13 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -33,6 +33,7 @@
 #include "draw/draw_private.h"
 #include "lp_context.h"
 #include "lp_screen.h"
+#include "lp_tex_cache.h"
 #include "lp_state.h"
 
 
@@ -211,7 +212,7 @@ update_tgsi_samplers( struct llvmpipe_context *llvmpipe )
    }
 
    for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
-      lp_tile_cache_validate_texture( llvmpipe->tex_cache[i] );
+      lp_tex_tile_cache_validate_texture( llvmpipe->tex_cache[i] );
    }
 }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index 02ae2c17e1..4fef541b1e 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -37,7 +37,7 @@
 #include "lp_context.h"
 #include "lp_state.h"
 #include "lp_texture.h"
-#include "lp_tile_cache.h"
+#include "lp_tex_cache.h"
 #include "draw/draw_context.h"
 
 
@@ -97,7 +97,7 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe,
       struct pipe_texture *tex = i < num ? texture[i] : NULL;
 
       pipe_texture_reference(&llvmpipe->texture[i], tex);
-      lp_tile_cache_set_texture(llvmpipe->tex_cache[i], tex);
+      lp_tex_tile_cache_set_texture(llvmpipe->tex_cache[i], tex);
    }
 
    llvmpipe->num_textures = num;
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c
new file mode 100644
index 0000000000..984f71688b
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c
@@ -0,0 +1,332 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/**
+ * Texture tile caching.
+ *
+ * Author:
+ *    Brian Paul
+ */
+
+#include "pipe/p_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_tile.h"
+#include "lp_context.h"
+#include "lp_surface.h"
+#include "lp_texture.h"
+#include "lp_tex_cache.h"
+
+
+
+/**
+ * Return the position in the cache for the tile that contains win pos (x,y).
+ * We currently use a direct mapped cache so this is like a hack key.
+ * At some point we should investige something more sophisticated, like
+ * a LRU replacement policy.
+ */
+#define CACHE_POS(x, y) \
+   (((x) + (y) * 5) % NUM_ENTRIES)
+
+
+
+/**
+ * Is the tile at (x,y) in cleared state?
+ */
+static INLINE uint
+is_clear_flag_set(const uint *bitvec, union tex_tile_address addr)
+{
+   int pos, bit;
+   pos = addr.bits.y * (MAX_TEX_WIDTH / TEX_TILE_SIZE) + addr.bits.x;
+   assert(pos / 32 < (MAX_TEX_WIDTH / TEX_TILE_SIZE) * (MAX_TEX_HEIGHT / TEX_TILE_SIZE) / 32);
+   bit = bitvec[pos / 32] & (1 << (pos & 31));
+   return bit;
+}
+   
+
+/**
+ * Mark the tile at (x,y) as not cleared.
+ */
+static INLINE void
+clear_clear_flag(uint *bitvec, union tex_tile_address addr)
+{
+   int pos;
+   pos = addr.bits.y * (MAX_TEX_WIDTH / TEX_TILE_SIZE) + addr.bits.x;
+   assert(pos / 32 < (MAX_TEX_WIDTH / TEX_TILE_SIZE) * (MAX_TEX_HEIGHT / TEX_TILE_SIZE) / 32);
+   bitvec[pos / 32] &= ~(1 << (pos & 31));
+}
+   
+
+struct llvmpipe_tex_tile_cache *
+lp_create_tex_tile_cache( struct pipe_screen *screen )
+{
+   struct llvmpipe_tex_tile_cache *tc;
+   uint pos;
+
+   tc = CALLOC_STRUCT( llvmpipe_tex_tile_cache );
+   if (tc) {
+      tc->screen = screen;
+      for (pos = 0; pos < NUM_ENTRIES; pos++) {
+         tc->entries[pos].addr.bits.invalid = 1;
+      }
+      tc->last_tile = &tc->entries[0]; /* any tile */
+   }
+   return tc;
+}
+
+
+void
+lp_destroy_tex_tile_cache(struct llvmpipe_tex_tile_cache *tc)
+{
+   struct pipe_screen *screen;
+   uint pos;
+
+   for (pos = 0; pos < NUM_ENTRIES; pos++) {
+      /*assert(tc->entries[pos].x < 0);*/
+   }
+   if (tc->transfer) {
+      screen = tc->transfer->texture->screen;
+      screen->tex_transfer_destroy(tc->transfer);
+   }
+   if (tc->tex_trans) {
+      screen = tc->tex_trans->texture->screen;
+      screen->tex_transfer_destroy(tc->tex_trans);
+   }
+
+   FREE( tc );
+}
+
+
+void
+lp_tex_tile_cache_map_transfers(struct llvmpipe_tex_tile_cache *tc)
+{
+   if (tc->transfer && !tc->transfer_map)
+      tc->transfer_map = tc->screen->transfer_map(tc->screen, tc->transfer);
+
+   if (tc->tex_trans && !tc->tex_trans_map)
+      tc->tex_trans_map = tc->screen->transfer_map(tc->screen, tc->tex_trans);
+}
+
+
+void
+lp_tex_tile_cache_unmap_transfers(struct llvmpipe_tex_tile_cache *tc)
+{
+   if (tc->transfer_map) {
+      tc->screen->transfer_unmap(tc->screen, tc->transfer);
+      tc->transfer_map = NULL;
+   }
+
+   if (tc->tex_trans_map) {
+      tc->screen->transfer_unmap(tc->screen, tc->tex_trans);
+      tc->tex_trans_map = NULL;
+   }
+}
+
+void
+lp_tex_tile_cache_validate_texture(struct llvmpipe_tex_tile_cache *tc)
+{
+   if (tc->texture) {
+      struct llvmpipe_texture *lpt = llvmpipe_texture(tc->texture);
+      if (lpt->timestamp != tc->timestamp) {
+         /* texture was modified, invalidate all cached tiles */
+         uint i;
+         _debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp);
+         for (i = 0; i < NUM_ENTRIES; i++) {
+            tc->entries[i].addr.bits.invalid = 1;
+         }
+
+         tc->timestamp = lpt->timestamp;
+      }
+   }
+}
+
+/**
+ * Specify the texture to cache.
+ */
+void
+lp_tex_tile_cache_set_texture(struct llvmpipe_tex_tile_cache *tc,
+                          struct pipe_texture *texture)
+{
+   uint i;
+
+   assert(!tc->transfer);
+
+   if (tc->texture != texture) {
+      pipe_texture_reference(&tc->texture, texture);
+
+      if (tc->tex_trans) {
+         struct pipe_screen *screen = tc->tex_trans->texture->screen;
+         
+         if (tc->tex_trans_map) {
+            screen->transfer_unmap(screen, tc->tex_trans);
+            tc->tex_trans_map = NULL;
+         }
+
+         screen->tex_transfer_destroy(tc->tex_trans);
+         tc->tex_trans = NULL;
+      }
+
+      /* mark as entries as invalid/empty */
+      /* XXX we should try to avoid this when the teximage hasn't changed */
+      for (i = 0; i < NUM_ENTRIES; i++) {
+         tc->entries[i].addr.bits.invalid = 1;
+      }
+
+      tc->tex_face = -1; /* any invalid value here */
+   }
+}
+
+
+/**
+ * Flush the tile cache: write all dirty tiles back to the transfer.
+ * any tiles "flagged" as cleared will be "really" cleared.
+ */
+void
+lp_flush_tex_tile_cache(struct llvmpipe_tex_tile_cache *tc)
+{
+   struct pipe_transfer *pt = tc->transfer;
+   int inuse = 0, pos;
+
+   if (pt) {
+      /* caching a drawing transfer */
+      for (pos = 0; pos < NUM_ENTRIES; pos++) {
+         struct llvmpipe_cached_tex_tile *tile = tc->entries + pos;
+         if (!tile->addr.bits.invalid) {
+            pipe_put_tile_rgba(pt,
+                               tile->addr.bits.x * TEX_TILE_SIZE,
+                               tile->addr.bits.y * TEX_TILE_SIZE,
+                               TEX_TILE_SIZE, TEX_TILE_SIZE,
+                               (float *) tile->color);
+            tile->addr.bits.invalid = 1;  /* mark as empty */
+            inuse++;
+         }
+      }
+   }
+   else if (tc->texture) {
+      /* caching a texture, mark all entries as empty */
+      for (pos = 0; pos < NUM_ENTRIES; pos++) {
+         tc->entries[pos].addr.bits.invalid = 1;
+      }
+      tc->tex_face = -1;
+   }
+
+#if 0
+   debug_printf("flushed tiles in use: %d\n", inuse);
+#endif
+}
+
+
+/**
+ * Given the texture face, level, zslice, x and y values, compute
+ * the cache entry position/index where we'd hope to find the
+ * cached texture tile.
+ * This is basically a direct-map cache.
+ * XXX There's probably lots of ways in which we can improve this.
+ */
+static INLINE uint
+tex_cache_pos( union tex_tile_address addr )
+{
+   uint entry = (addr.bits.x + 
+                 addr.bits.y * 9 + 
+                 addr.bits.z * 3 + 
+                 addr.bits.face + 
+                 addr.bits.level * 7);
+
+   return entry % NUM_ENTRIES;
+}
+
+/**
+ * Similar to lp_get_cached_tile() but for textures.
+ * Tiles are read-only and indexed with more params.
+ */
+const struct llvmpipe_cached_tex_tile *
+lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc,
+                        union tex_tile_address addr )
+{
+   struct pipe_screen *screen = tc->screen;
+   struct llvmpipe_cached_tex_tile *tile;
+   
+   tile = tc->entries + tex_cache_pos( addr );
+
+   if (addr.value != tile->addr.value) {
+
+      /* cache miss.  Most misses are because we've invaldiated the
+       * texture cache previously -- most commonly on binding a new
+       * texture.  Currently we effectively flush the cache on texture
+       * bind.
+       */
+#if 0
+      _debug_printf("miss at %u:  x=%d y=%d z=%d face=%d level=%d\n"
+                    "   tile %u:  x=%d y=%d z=%d face=%d level=%d\n",
+                    pos, x/TEX_TILE_SIZE, y/TEX_TILE_SIZE, z, face, level,
+                    pos, tile->addr.bits.x, tile->addr.bits.y, tile->z, tile->face, tile->level);
+#endif
+
+      /* check if we need to get a new transfer */
+      if (!tc->tex_trans ||
+          tc->tex_face != addr.bits.face ||
+          tc->tex_level != addr.bits.level ||
+          tc->tex_z != addr.bits.z) {
+         /* get new transfer (view into texture) */
+
+         if (tc->tex_trans) {
+            if (tc->tex_trans_map) {
+               tc->screen->transfer_unmap(tc->screen, tc->tex_trans);
+               tc->tex_trans_map = NULL;
+            }
+
+            screen->tex_transfer_destroy(tc->tex_trans);
+            tc->tex_trans = NULL;
+         }
+
+         tc->tex_trans = 
+            screen->get_tex_transfer(screen, tc->texture, 
+                                     addr.bits.face, 
+                                     addr.bits.level, 
+                                     addr.bits.z, 
+                                     PIPE_TRANSFER_READ, 0, 0,
+                                     tc->texture->width[addr.bits.level],
+                                     tc->texture->height[addr.bits.level]);
+
+         tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans);
+
+         tc->tex_face = addr.bits.face;
+         tc->tex_level = addr.bits.level;
+         tc->tex_z = addr.bits.z;
+      }
+
+      /* get tile from the transfer (view into texture) */
+      pipe_get_tile_rgba(tc->tex_trans,
+                         addr.bits.x * TEX_TILE_SIZE,
+                         addr.bits.y * TEX_TILE_SIZE,
+                         TEX_TILE_SIZE, TEX_TILE_SIZE,
+                         (float *) tile->color);
+      tile->addr = addr;
+   }
+
+   tc->last_tile = tile;
+   return tile;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.h b/src/gallium/drivers/llvmpipe/lp_tex_cache.h
new file mode 100644
index 0000000000..f521b2ae0b
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.h
@@ -0,0 +1,154 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef LP_TEX_CACHE_H
+#define LP_TEX_CACHE_H
+
+
+#include "pipe/p_compiler.h"
+
+
+struct llvmpipe_context;
+struct llvmpipe_tex_tile_cache;
+
+
+/**
+ * Cache tile size (width and height). This needs to be a power of two.
+ */
+#define TEX_TILE_SIZE 64
+
+
+/* If we need to support > 4096, just expand this to be a 64 bit
+ * union, or consider tiling in Z as well.
+ */
+union tex_tile_address {
+   struct {
+      unsigned x:6;             /* 4096 / TEX_TILE_SIZE */
+      unsigned y:6;             /* 4096 / TEX_TILE_SIZE */
+      unsigned z:12;            /* 4096 -- z not tiled */
+      unsigned face:3;
+      unsigned level:4;
+      unsigned invalid:1;
+   } bits;
+   unsigned value;
+};
+
+
+struct llvmpipe_cached_tex_tile
+{
+   union tex_tile_address addr;
+   float color[TEX_TILE_SIZE][TEX_TILE_SIZE][4];
+};
+
+#define NUM_ENTRIES 50
+
+
+/** XXX move these */
+#define MAX_TEX_WIDTH 2048
+#define MAX_TEX_HEIGHT 2048
+
+
+struct llvmpipe_tex_tile_cache
+{
+   struct pipe_screen *screen;
+   struct pipe_surface *surface;  /**< the surface we're caching */
+   struct pipe_transfer *transfer;
+   void *transfer_map;
+
+   struct pipe_texture *texture;  /**< if caching a texture */
+   unsigned timestamp;
+
+   struct llvmpipe_cached_tex_tile entries[NUM_ENTRIES];
+
+   struct pipe_transfer *tex_trans;
+   void *tex_trans_map;
+   int tex_face, tex_level, tex_z;
+
+   struct llvmpipe_cached_tex_tile *last_tile;  /**< most recently retrieved tile */
+};
+
+
+extern struct llvmpipe_tex_tile_cache *
+lp_create_tex_tile_cache( struct pipe_screen *screen );
+
+extern void
+lp_destroy_tex_tile_cache(struct llvmpipe_tex_tile_cache *tc);
+
+extern void
+lp_tex_tile_cache_map_transfers(struct llvmpipe_tex_tile_cache *tc);
+
+extern void
+lp_tex_tile_cache_unmap_transfers(struct llvmpipe_tex_tile_cache *tc);
+
+extern void
+lp_tex_tile_cache_set_texture(struct llvmpipe_tex_tile_cache *tc,
+                          struct pipe_texture *texture);
+
+void
+lp_tex_tile_cache_validate_texture(struct llvmpipe_tex_tile_cache *tc);
+
+extern void
+lp_flush_tex_tile_cache(struct llvmpipe_tex_tile_cache *tc);
+
+extern const struct llvmpipe_cached_tex_tile *
+lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc,
+                        union tex_tile_address addr );
+
+static INLINE const union tex_tile_address
+tex_tile_address( unsigned x,
+                  unsigned y,
+                  unsigned z,
+                  unsigned face,
+                  unsigned level )
+{
+   union tex_tile_address addr;
+
+   addr.value = 0;
+   addr.bits.x = x / TEX_TILE_SIZE;
+   addr.bits.y = y / TEX_TILE_SIZE;
+   addr.bits.z = z;
+   addr.bits.face = face;
+   addr.bits.level = level;
+      
+   return addr;
+}
+
+/* Quickly retrieve tile if it matches last lookup.
+ */
+static INLINE const struct llvmpipe_cached_tex_tile *
+lp_get_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc,
+                       union tex_tile_address addr )
+{
+   if (tc->last_tile->addr.value == addr.value)
+      return tc->last_tile;
+
+   return lp_find_cached_tex_tile( tc, addr );
+}
+
+
+#endif /* LP_TEX_CACHE_H */
+
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample.c
index d24845cac9..ff2dbce66b 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.c
@@ -38,7 +38,7 @@
 #include "lp_surface.h"
 #include "lp_texture.h"
 #include "lp_tex_sample.h"
-#include "lp_tile_cache.h"
+#include "lp_tex_cache.h"
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "util/u_math.h"
@@ -680,17 +680,17 @@ get_texel(const struct tgsi_sampler *tgsi_sampler,
       rgba[3][j] = sampler->border_color[3];
    }
    else {
-      const unsigned tx = x % TILE_SIZE;
-      const unsigned ty = y % TILE_SIZE;
-      const struct llvmpipe_cached_tile *tile;
+      const unsigned tx = x % TEX_TILE_SIZE;
+      const unsigned ty = y % TEX_TILE_SIZE;
+      const struct llvmpipe_cached_tex_tile *tile;
 
-      tile = lp_get_cached_tile_tex(samp->cache, 
-                                    tile_address(x, y, z, face, level));
+      tile = lp_get_cached_tex_tile(samp->cache,
+                                    tex_tile_address(x, y, z, face, level));
 
-      rgba[0][j] = tile->data.color[ty][tx][0];
-      rgba[1][j] = tile->data.color[ty][tx][1];
-      rgba[2][j] = tile->data.color[ty][tx][2];
-      rgba[3][j] = tile->data.color[ty][tx][3];
+      rgba[0][j] = tile->color[ty][tx][0];
+      rgba[1][j] = tile->color[ty][tx][1];
+      rgba[2][j] = tile->color[ty][tx][2];
+      rgba[3][j] = tile->color[ty][tx][3];
       if (0)
       {
          debug_printf("Get texel %f %f %f %f from %s\n",
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
index 08f1451331..727d56056f 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
@@ -32,6 +32,9 @@
 #include "tgsi/tgsi_exec.h"
 
 
+struct llvmpipe_tex_tile_cache;
+
+
 /**
  * Subclass of tgsi_sampler
  */
@@ -42,7 +45,7 @@ struct lp_shader_sampler
    const struct pipe_texture *texture;
    const struct pipe_sampler_state *sampler;
 
-   struct llvmpipe_tile_cache *cache;
+   struct llvmpipe_tex_tile_cache *cache;
 };
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 542e313445..0fad1fcfdf 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -40,7 +40,7 @@
 #include "lp_context.h"
 #include "lp_state.h"
 #include "lp_texture.h"
-#include "lp_tile_cache.h"
+#include "lp_tex_cache.h"
 #include "lp_screen.h"
 #include "lp_winsys.h"
 
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c
index 96bfe733e1..01ba843806 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c
+++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c
@@ -200,60 +200,6 @@ lp_tile_cache_unmap_transfers(struct llvmpipe_tile_cache *tc)
    }
 }
 
-void
-lp_tile_cache_validate_texture(struct llvmpipe_tile_cache *tc)
-{
-   if (tc->texture) {
-      struct llvmpipe_texture *lpt = llvmpipe_texture(tc->texture);
-      if (lpt->timestamp != tc->timestamp) {
-         /* texture was modified, invalidate all cached tiles */
-         uint i;
-         _debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp);
-         for (i = 0; i < NUM_ENTRIES; i++) {
-            tc->entries[i].addr.bits.invalid = 1;
-         }
-
-         tc->timestamp = lpt->timestamp;
-      }
-   }
-}
-
-/**
- * Specify the texture to cache.
- */
-void
-lp_tile_cache_set_texture(struct llvmpipe_tile_cache *tc,
-                          struct pipe_texture *texture)
-{
-   uint i;
-
-   assert(!tc->transfer);
-
-   if (tc->texture != texture) {
-      pipe_texture_reference(&tc->texture, texture);
-
-      if (tc->tex_trans) {
-         struct pipe_screen *screen = tc->tex_trans->texture->screen;
-         
-         if (tc->tex_trans_map) {
-            screen->transfer_unmap(screen, tc->tex_trans);
-            tc->tex_trans_map = NULL;
-         }
-
-         screen->tex_transfer_destroy(tc->tex_trans);
-         tc->tex_trans = NULL;
-      }
-
-      /* mark as entries as invalid/empty */
-      /* XXX we should try to avoid this when the teximage hasn't changed */
-      for (i = 0; i < NUM_ENTRIES; i++) {
-         tc->entries[i].addr.bits.invalid = 1;
-      }
-
-      tc->tex_face = -1; /* any invalid value here */
-   }
-}
-
 
 /**
  * Set pixels in a tile to the given clear color/value, float.
@@ -507,79 +453,6 @@ tex_cache_pos( union tile_address addr )
    return entry % NUM_ENTRIES;
 }
 
-/**
- * Similar to lp_get_cached_tile() but for textures.
- * Tiles are read-only and indexed with more params.
- */
-const struct llvmpipe_cached_tile *
-lp_find_cached_tile_tex(struct llvmpipe_tile_cache *tc, 
-                        union tile_address addr )
-{
-   struct pipe_screen *screen = tc->screen;
-   struct llvmpipe_cached_tile *tile;
-   
-   tile = tc->entries + tex_cache_pos( addr );
-
-   if (addr.value != tile->addr.value) {
-
-      /* cache miss.  Most misses are because we've invaldiated the
-       * texture cache previously -- most commonly on binding a new
-       * texture.  Currently we effectively flush the cache on texture
-       * bind.
-       */
-#if 0
-      _debug_printf("miss at %u:  x=%d y=%d z=%d face=%d level=%d\n"
-                    "   tile %u:  x=%d y=%d z=%d face=%d level=%d\n",
-                    pos, x/TILE_SIZE, y/TILE_SIZE, z, face, level,
-                    pos, tile->addr.bits.x, tile->addr.bits.y, tile->z, tile->face, tile->level);
-#endif
-
-      /* check if we need to get a new transfer */
-      if (!tc->tex_trans ||
-          tc->tex_face != addr.bits.face ||
-          tc->tex_level != addr.bits.level ||
-          tc->tex_z != addr.bits.z) {
-         /* get new transfer (view into texture) */
-
-         if (tc->tex_trans) {
-            if (tc->tex_trans_map) {
-               tc->screen->transfer_unmap(tc->screen, tc->tex_trans);
-               tc->tex_trans_map = NULL;
-            }
-
-            screen->tex_transfer_destroy(tc->tex_trans);
-            tc->tex_trans = NULL;
-         }
-
-         tc->tex_trans = 
-            screen->get_tex_transfer(screen, tc->texture, 
-                                     addr.bits.face, 
-                                     addr.bits.level, 
-                                     addr.bits.z, 
-                                     PIPE_TRANSFER_READ, 0, 0,
-                                     tc->texture->width[addr.bits.level],
-                                     tc->texture->height[addr.bits.level]);
-
-         tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans);
-
-         tc->tex_face = addr.bits.face;
-         tc->tex_level = addr.bits.level;
-         tc->tex_z = addr.bits.z;
-      }
-
-      /* get tile from the transfer (view into texture) */
-      pipe_get_tile_rgba(tc->tex_trans,
-                         addr.bits.x * TILE_SIZE, 
-                         addr.bits.y * TILE_SIZE,
-                         TILE_SIZE, TILE_SIZE,
-                         (float *) tile->data.color);
-      tile->addr = addr;
-   }
-
-   tc->last_tile = tile;
-   return tile;
-}
-
 
 /**
  * When a whole surface is being cleared to a value we can avoid
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.h b/src/gallium/drivers/llvmpipe/lp_tile_cache.h
index 1cc5a17bb5..19676392dc 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_cache.h
+++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.h
@@ -126,13 +126,6 @@ lp_tile_cache_map_transfers(struct llvmpipe_tile_cache *tc);
 extern void
 lp_tile_cache_unmap_transfers(struct llvmpipe_tile_cache *tc);
 
-extern void
-lp_tile_cache_set_texture(struct llvmpipe_tile_cache *tc,
-                          struct pipe_texture *texture);
-
-void
-lp_tile_cache_validate_texture(struct llvmpipe_tile_cache *tc);
-
 extern void
 lp_flush_tile_cache(struct llvmpipe_tile_cache *tc);
 
@@ -144,10 +137,6 @@ extern struct llvmpipe_cached_tile *
 lp_find_cached_tile(struct llvmpipe_tile_cache *tc, 
                     union tile_address addr );
 
-extern const struct llvmpipe_cached_tile *
-lp_find_cached_tile_tex(struct llvmpipe_tile_cache *tc, 
-                         union tile_address addr );
-
 static INLINE const union tile_address
 tile_address( unsigned x,
               unsigned y,
@@ -169,17 +158,6 @@ tile_address( unsigned x,
 
 /* Quickly retrieve tile if it matches last lookup.
  */
-static INLINE const struct llvmpipe_cached_tile *
-lp_get_cached_tile_tex(struct llvmpipe_tile_cache *tc, 
-                         union tile_address addr )
-{
-   if (tc->last_tile->addr.value == addr.value)
-      return tc->last_tile;
-
-   return lp_find_cached_tile_tex( tc, addr );
-}
-
-
 static INLINE struct llvmpipe_cached_tile *
 lp_get_cached_tile(struct llvmpipe_tile_cache *tc, 
                    int x, int y )
-- 
cgit v1.2.3


From 3ce1abf950b7175d65a32fb9d182561a9d3d57f7 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 9 Aug 2009 23:58:06 +0100
Subject: llvmpipe: Store tile color in SoA.

---
 src/gallium/drivers/llvmpipe/SConscript      |   1 +
 src/gallium/drivers/llvmpipe/lp_quad_blend.c |   4 +-
 src/gallium/drivers/llvmpipe/lp_tile_cache.c |  42 +-
 src/gallium/drivers/llvmpipe/lp_tile_cache.h |   5 +-
 src/gallium/drivers/llvmpipe/lp_tile_soa.c   | 934 +++++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_tile_soa.h   |  60 ++
 6 files changed, 1019 insertions(+), 27 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_tile_soa.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_tile_soa.h

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 8565c7e011..e3106763cb 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -51,6 +51,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_tex_sample.c',
 		'lp_texture.c',
 		'lp_tile_cache.c',
+		'lp_tile_soa.c',
 	])
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_blend.c b/src/gallium/drivers/llvmpipe/lp_quad_blend.c
index 2f4c46d5cb..fbfd51f600 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_blend.c
@@ -183,7 +183,7 @@ blend_run(struct quad_stage *qs,
             int x = itx + (j & 1);
             int y = ity + (j >> 1);
             for (i = 0; i < 4; i++) {
-               dest[i][j] = tile->data.color[y][x][i];
+               dest[i][j] = tile->data.color[i][y][x];
             }
          }
 
@@ -207,7 +207,7 @@ blend_run(struct quad_stage *qs,
                int x = itx + (j & 1);
                int y = ity + (j >> 1);
                for (i = 0; i < 4; i++) { /* loop over color chans */
-                  tile->data.color[y][x][i] = quadColor[i][j];
+                  tile->data.color[i][y][x] = quadColor[i][j];
                }
             }
          }
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c
index 01ba843806..65b62c44dc 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c
+++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c
@@ -38,6 +38,7 @@
 #include "lp_context.h"
 #include "lp_surface.h"
 #include "lp_texture.h"
+#include "lp_tile_soa.h"
 #include "lp_tile_cache.h"
 
 
@@ -216,15 +217,11 @@ clear_tile_rgba(struct llvmpipe_cached_tile *tile,
       memset(tile->data.color, 0, sizeof(tile->data.color));
    }
    else {
-      uint i, j;
-      for (i = 0; i < TILE_SIZE; i++) {
-         for (j = 0; j < TILE_SIZE; j++) {
-            tile->data.color[i][j][0] = clear_value[0];
-            tile->data.color[i][j][1] = clear_value[1];
-            tile->data.color[i][j][2] = clear_value[2];
-            tile->data.color[i][j][3] = clear_value[3];
-         }
-      }
+      uint i, x, y;
+      for (i = 0; i < 4; ++i)
+         for (y = 0; y < TILE_SIZE; y++)
+            for (x = 0; x < TILE_SIZE; x++)
+               tile->data.color[i][y][x] = clear_value[i];
    }
 }
 
@@ -334,11 +331,10 @@ lp_flush_tile_cache(struct llvmpipe_tile_cache *tc)
                                  tile->data.depth32, 0/*STRIDE*/);
             }
             else {
-               pipe_put_tile_rgba(pt,
-                                  tile->addr.bits.x * TILE_SIZE, 
-                                  tile->addr.bits.y * TILE_SIZE, 
-                                  TILE_SIZE, TILE_SIZE,
-                                  (float *) tile->data.color);
+               lp_put_tile_rgba_soa(pt,
+                                    tile->addr.bits.x * TILE_SIZE,
+                                    tile->addr.bits.y * TILE_SIZE,
+                                    tile->data.color);
             }
             tile->addr.bits.invalid = 1;  /* mark as empty */
             inuse++;
@@ -390,11 +386,10 @@ lp_find_cached_tile(struct llvmpipe_tile_cache *tc,
                               tile->data.depth32, 0/*STRIDE*/);
          }
          else {
-            pipe_put_tile_rgba(pt,
-                               tile->addr.bits.x * TILE_SIZE,
-                               tile->addr.bits.y * TILE_SIZE,
-                               TILE_SIZE, TILE_SIZE,
-                               (float *) tile->data.color);
+            lp_put_tile_rgba_soa(pt,
+                                 tile->addr.bits.x * TILE_SIZE,
+                                 tile->addr.bits.y * TILE_SIZE,
+                                 tile->data.color);
          }
       }
 
@@ -420,11 +415,10 @@ lp_find_cached_tile(struct llvmpipe_tile_cache *tc,
                               tile->data.depth32, 0/*STRIDE*/);
          }
          else {
-            pipe_get_tile_rgba(pt,
-                               tile->addr.bits.x * TILE_SIZE, 
-                               tile->addr.bits.y * TILE_SIZE,
-                               TILE_SIZE, TILE_SIZE,
-                               (float *) tile->data.color);
+            lp_get_tile_rgba_soa(pt,
+                                 tile->addr.bits.x * TILE_SIZE,
+                                 tile->addr.bits.y * TILE_SIZE,
+                                 tile->data.color);
          }
       }
    }
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.h b/src/gallium/drivers/llvmpipe/lp_tile_cache.h
index 19676392dc..2f904d9e53 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_cache.h
+++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.h
@@ -64,7 +64,10 @@ struct llvmpipe_cached_tile
 {
    union tile_address addr;
    union {
-      float color[TILE_SIZE][TILE_SIZE][4];
+
+      /** color in SOA format (rrrr, gggg, bbbb, aaaa) */
+      float color[4][TILE_SIZE][TILE_SIZE];
+
       uint color32[TILE_SIZE][TILE_SIZE];
       uint depth32[TILE_SIZE][TILE_SIZE];
       ushort depth16[TILE_SIZE][TILE_SIZE];
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.c b/src/gallium/drivers/llvmpipe/lp_tile_soa.c
new file mode 100644
index 0000000000..9d37cf6805
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.c
@@ -0,0 +1,934 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/**
+ * RGBA/float tile get/put functions.
+ * Usable both by drivers and state trackers.
+ */
+
+
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_rect.h"
+#include "util/u_tile.h"
+#include "lp_tile_soa.h"
+
+
+#define PIXEL(_p, _x, _y, _c) ((_p)[(_c)*TILE_SIZE*TILE_SIZE + (_y)*TILE_SIZE + (_x)])
+
+
+/** Convert short in [-32768,32767] to GLfloat in [-1.0,1.0] */
+#define SHORT_TO_FLOAT(S)   ((2.0F * (S) + 1.0F) * (1.0F/65535.0F))
+
+#define UNCLAMPED_FLOAT_TO_SHORT(us, f)  \
+   us = ( (short) ( CLAMP((f), -1.0, 1.0) * 32767.0F) )
+
+
+
+/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/
+
+static void
+a8r8g8b8_get_tile_rgba(const unsigned *src,
+                       unsigned w, unsigned h,
+                       float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         const unsigned pixel = *src++;
+         PIXEL(p, j, i, 0) = ubyte_to_float((pixel >> 16) & 0xff);
+         PIXEL(p, j, i, 1) = ubyte_to_float((pixel >>  8) & 0xff);
+         PIXEL(p, j, i, 2) = ubyte_to_float((pixel >>  0) & 0xff);
+         PIXEL(p, j, i, 3) = ubyte_to_float((pixel >> 24) & 0xff);
+      }
+   }
+}
+
+
+static void
+a8r8g8b8_put_tile_rgba(unsigned *dst,
+                       unsigned w, unsigned h,
+                       const float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         unsigned r, g, b, a;
+         r = float_to_ubyte(PIXEL(p, j, i, 0));
+         g = float_to_ubyte(PIXEL(p, j, i, 1));
+         b = float_to_ubyte(PIXEL(p, j, i, 2));
+         a = float_to_ubyte(PIXEL(p, j, i, 3));
+         *dst++ = (a << 24) | (r << 16) | (g << 8) | b;
+      }
+   }
+}
+
+
+/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/
+
+static void
+x8r8g8b8_get_tile_rgba(const unsigned *src,
+                       unsigned w, unsigned h,
+                       float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         const unsigned pixel = *src++;
+         PIXEL(p, j, i, 0) = ubyte_to_float((pixel >> 16) & 0xff);
+         PIXEL(p, j, i, 1) = ubyte_to_float((pixel >>  8) & 0xff);
+         PIXEL(p, j, i, 2) = ubyte_to_float((pixel >>  0) & 0xff);
+         PIXEL(p, j, i, 3) = ubyte_to_float(0xff);
+      }
+   }
+}
+
+
+static void
+x8r8g8b8_put_tile_rgba(unsigned *dst,
+                       unsigned w, unsigned h,
+                       const float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         unsigned r, g, b;
+         r = float_to_ubyte(PIXEL(p, j, i, 0));
+         g = float_to_ubyte(PIXEL(p, j, i, 1));
+         b = float_to_ubyte(PIXEL(p, j, i, 2));
+         *dst++ = (0xff << 24) | (r << 16) | (g << 8) | b;
+      }
+   }
+}
+
+
+/*** PIPE_FORMAT_B8G8R8A8_UNORM ***/
+
+static void
+b8g8r8a8_get_tile_rgba(const unsigned *src,
+                       unsigned w, unsigned h,
+                       float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         const unsigned pixel = *src++;
+         PIXEL(p, j, i, 0) = ubyte_to_float((pixel >>  8) & 0xff);
+         PIXEL(p, j, i, 1) = ubyte_to_float((pixel >> 16) & 0xff);
+         PIXEL(p, j, i, 2) = ubyte_to_float((pixel >> 24) & 0xff);
+         PIXEL(p, j, i, 3) = ubyte_to_float((pixel >>  0) & 0xff);
+      }
+   }
+}
+
+
+static void
+b8g8r8a8_put_tile_rgba(unsigned *dst,
+                       unsigned w, unsigned h,
+                       const float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         unsigned r, g, b, a;
+         r = float_to_ubyte(PIXEL(p, j, i, 0));
+         g = float_to_ubyte(PIXEL(p, j, i, 1));
+         b = float_to_ubyte(PIXEL(p, j, i, 2));
+         a = float_to_ubyte(PIXEL(p, j, i, 3));
+         *dst++ = (b << 24) | (g << 16) | (r << 8) | a;
+      }
+   }
+}
+
+
+/*** PIPE_FORMAT_A1R5G5B5_UNORM ***/
+
+static void
+a1r5g5b5_get_tile_rgba(const ushort *src,
+                       unsigned w, unsigned h,
+                       float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         const ushort pixel = *src++;
+         PIXEL(p, j, i, 0) = ((pixel >> 10) & 0x1f) * (1.0f / 31.0f);
+         PIXEL(p, j, i, 1) = ((pixel >>  5) & 0x1f) * (1.0f / 31.0f);
+         PIXEL(p, j, i, 2) = ((pixel      ) & 0x1f) * (1.0f / 31.0f);
+         PIXEL(p, j, i, 3) = ((pixel >> 15)       ) * 1.0f;
+      }
+   }
+}
+
+
+static void
+a1r5g5b5_put_tile_rgba(ushort *dst,
+                       unsigned w, unsigned h,
+                       const float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         unsigned r, g, b, a;
+         r = float_to_ubyte(PIXEL(p, j, i, 0));
+         g = float_to_ubyte(PIXEL(p, j, i, 1));
+         b = float_to_ubyte(PIXEL(p, j, i, 2));
+         a = float_to_ubyte(PIXEL(p, j, i, 3));
+         r = r >> 3;  /* 5 bits */
+         g = g >> 3;  /* 5 bits */
+         b = b >> 3;  /* 5 bits */
+         a = a >> 7;  /* 1 bit */
+         *dst++ = (a << 15) | (r << 10) | (g << 5) | b;
+      }
+   }
+}
+
+
+/*** PIPE_FORMAT_A4R4G4B4_UNORM ***/
+
+static void
+a4r4g4b4_get_tile_rgba(const ushort *src,
+                       unsigned w, unsigned h,
+                       float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         const ushort pixel = *src++;
+         PIXEL(p, j, i, 0) = ((pixel >>  8) & 0xf) * (1.0f / 15.0f);
+         PIXEL(p, j, i, 1) = ((pixel >>  4) & 0xf) * (1.0f / 15.0f);
+         PIXEL(p, j, i, 2) = ((pixel      ) & 0xf) * (1.0f / 15.0f);
+         PIXEL(p, j, i, 3) = ((pixel >> 12)      ) * (1.0f / 15.0f);
+      }
+   }
+}
+
+
+static void
+a4r4g4b4_put_tile_rgba(ushort *dst,
+                       unsigned w, unsigned h,
+                       const float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         unsigned r, g, b, a;
+         r = float_to_ubyte(PIXEL(p, j, i, 0));
+         g = float_to_ubyte(PIXEL(p, j, i, 1));
+         b = float_to_ubyte(PIXEL(p, j, i, 2));
+         a = float_to_ubyte(PIXEL(p, j, i, 3));
+         r >>= 4;
+         g >>= 4;
+         b >>= 4;
+         a >>= 4;
+         *dst++ = (a << 12) | (r << 16) | (g << 4) | b;
+      }
+   }
+}
+
+
+/*** PIPE_FORMAT_R5G6B5_UNORM ***/
+
+static void
+r5g6b5_get_tile_rgba(const ushort *src,
+                     unsigned w, unsigned h,
+                     float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         const ushort pixel = *src++;
+         PIXEL(p, j, i, 0) = ((pixel >> 11) & 0x1f) * (1.0f / 31.0f);
+         PIXEL(p, j, i, 1) = ((pixel >>  5) & 0x3f) * (1.0f / 63.0f);
+         PIXEL(p, j, i, 2) = ((pixel      ) & 0x1f) * (1.0f / 31.0f);
+         PIXEL(p, j, i, 3) = 1.0f;
+      }
+   }
+}
+
+
+static void
+r5g6b5_put_tile_rgba(ushort *dst,
+                     unsigned w, unsigned h,
+                     const float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         uint r = (uint) (CLAMP(PIXEL(p, j, i, 0), 0.0, 1.0) * 31.0);
+         uint g = (uint) (CLAMP(PIXEL(p, j, i, 1), 0.0, 1.0) * 63.0);
+         uint b = (uint) (CLAMP(PIXEL(p, j, i, 2), 0.0, 1.0) * 31.0);
+         *dst++ = (r << 11) | (g << 5) | (b);
+      }
+   }
+}
+
+
+
+/*** PIPE_FORMAT_Z16_UNORM ***/
+
+/**
+ * Return each Z value as four floats in [0,1].
+ */
+static void
+z16_get_tile_rgba(const ushort *src,
+                  unsigned w, unsigned h,
+                  float *p)
+{
+   const float scale = 1.0f / 65535.0f;
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         PIXEL(p, j, i, 0) =
+         PIXEL(p, j, i, 1) =
+         PIXEL(p, j, i, 2) =
+         PIXEL(p, j, i, 3) = *src++ * scale;
+      }
+   }
+}
+
+
+
+
+/*** PIPE_FORMAT_L8_UNORM ***/
+
+static void
+l8_get_tile_rgba(const ubyte *src,
+                 unsigned w, unsigned h,
+                 float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++, src++) {
+         PIXEL(p, j, i, 0) =
+         PIXEL(p, j, i, 1) =
+         PIXEL(p, j, i, 2) = ubyte_to_float(*src);
+         PIXEL(p, j, i, 3) = 1.0;
+      }
+   }
+}
+
+
+static void
+l8_put_tile_rgba(ubyte *dst,
+                 unsigned w, unsigned h,
+                 const float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         unsigned r;
+         r = float_to_ubyte(PIXEL(p, j, i, 0));
+         *dst++ = (ubyte) r;
+      }
+   }
+}
+
+
+
+/*** PIPE_FORMAT_A8_UNORM ***/
+
+static void
+a8_get_tile_rgba(const ubyte *src,
+                 unsigned w, unsigned h,
+                 float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++, src++) {
+         PIXEL(p, j, i, 0) =
+         PIXEL(p, j, i, 1) =
+         PIXEL(p, j, i, 2) = 0.0;
+         PIXEL(p, j, i, 3) = ubyte_to_float(*src);
+      }
+   }
+}
+
+
+static void
+a8_put_tile_rgba(ubyte *dst,
+                 unsigned w, unsigned h,
+                 const float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         unsigned a;
+         a = float_to_ubyte(PIXEL(p, j, i, 3));
+         *dst++ = (ubyte) a;
+      }
+   }
+}
+
+
+
+/*** PIPE_FORMAT_R16_SNORM ***/
+
+static void
+r16_get_tile_rgba(const short *src,
+                  unsigned w, unsigned h,
+                  float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++, src++) {
+         PIXEL(p, j, i, 0) = SHORT_TO_FLOAT(src[0]);
+         PIXEL(p, j, i, 1) =
+         PIXEL(p, j, i, 2) = 0.0;
+         PIXEL(p, j, i, 3) = 1.0;
+      }
+   }
+}
+
+
+static void
+r16_put_tile_rgba(short *dst,
+                  unsigned w, unsigned h,
+                  const float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++, dst++) {
+         UNCLAMPED_FLOAT_TO_SHORT(dst[0], PIXEL(p, j, i, 0));
+      }
+   }
+}
+
+
+/*** PIPE_FORMAT_R16G16B16A16_SNORM ***/
+
+static void
+r16g16b16a16_get_tile_rgba(const short *src,
+                           unsigned w, unsigned h,
+                           float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++, src += 4) {
+         PIXEL(p, j, i, 0) = SHORT_TO_FLOAT(src[0]);
+         PIXEL(p, j, i, 1) = SHORT_TO_FLOAT(src[1]);
+         PIXEL(p, j, i, 2) = SHORT_TO_FLOAT(src[2]);
+         PIXEL(p, j, i, 3) = SHORT_TO_FLOAT(src[3]);
+      }
+   }
+}
+
+
+static void
+r16g16b16a16_put_tile_rgba(short *dst,
+                           unsigned w, unsigned h,
+                           const float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++, dst += 4) {
+         UNCLAMPED_FLOAT_TO_SHORT(dst[0], PIXEL(p, j, i, 0));
+         UNCLAMPED_FLOAT_TO_SHORT(dst[1], PIXEL(p, j, i, 1));
+         UNCLAMPED_FLOAT_TO_SHORT(dst[2], PIXEL(p, j, i, 2));
+         UNCLAMPED_FLOAT_TO_SHORT(dst[3], PIXEL(p, j, i, 3));
+      }
+   }
+}
+
+
+
+/*** PIPE_FORMAT_I8_UNORM ***/
+
+static void
+i8_get_tile_rgba(const ubyte *src,
+                 unsigned w, unsigned h,
+                 float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++, src++) {
+         PIXEL(p, j, i, 0) =
+         PIXEL(p, j, i, 1) =
+         PIXEL(p, j, i, 2) =
+         PIXEL(p, j, i, 3) = ubyte_to_float(*src);
+      }
+   }
+}
+
+
+static void
+i8_put_tile_rgba(ubyte *dst,
+                 unsigned w, unsigned h,
+                 const float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         unsigned r;
+         r = float_to_ubyte(PIXEL(p, j, i, 0));
+         *dst++ = (ubyte) r;
+      }
+   }
+}
+
+
+/*** PIPE_FORMAT_A8L8_UNORM ***/
+
+static void
+a8l8_get_tile_rgba(const ushort *src,
+                   unsigned w, unsigned h,
+                   float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         ushort ra = *src++;
+         PIXEL(p, j, i, 0) =
+         PIXEL(p, j, i, 1) =
+         PIXEL(p, j, i, 2) = ubyte_to_float(ra & 0xff);
+         PIXEL(p, j, i, 3) = ubyte_to_float(ra >> 8);
+      }
+   }
+}
+
+
+static void
+a8l8_put_tile_rgba(ushort *dst,
+                   unsigned w, unsigned h,
+                   const float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         unsigned r, a;
+         r = float_to_ubyte(PIXEL(p, j, i, 0));
+         a = float_to_ubyte(PIXEL(p, j, i, 3));
+         *dst++ = (a << 8) | r;
+      }
+   }
+}
+
+
+
+
+/*** PIPE_FORMAT_Z32_UNORM ***/
+
+/**
+ * Return each Z value as four floats in [0,1].
+ */
+static void
+z32_get_tile_rgba(const unsigned *src,
+                  unsigned w, unsigned h,
+                  float *p)
+{
+   const double scale = 1.0 / (double) 0xffffffff;
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         PIXEL(p, j, i, 0) =
+         PIXEL(p, j, i, 1) =
+         PIXEL(p, j, i, 2) =
+         PIXEL(p, j, i, 3) = (float) (*src++ * scale);
+      }
+   }
+}
+
+
+/*** PIPE_FORMAT_S8Z24_UNORM ***/
+
+/**
+ * Return Z component as four float in [0,1].  Stencil part ignored.
+ */
+static void
+s8z24_get_tile_rgba(const unsigned *src,
+                    unsigned w, unsigned h,
+                    float *p)
+{
+   const double scale = 1.0 / ((1 << 24) - 1);
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         PIXEL(p, j, i, 0) =
+         PIXEL(p, j, i, 1) =
+         PIXEL(p, j, i, 2) =
+         PIXEL(p, j, i, 3) = (float) (scale * (*src++ & 0xffffff));
+      }
+   }
+}
+
+
+/*** PIPE_FORMAT_Z24S8_UNORM ***/
+
+/**
+ * Return Z component as four float in [0,1].  Stencil part ignored.
+ */
+static void
+z24s8_get_tile_rgba(const unsigned *src,
+                    unsigned w, unsigned h,
+                    float *p)
+{
+   const double scale = 1.0 / ((1 << 24) - 1);
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         PIXEL(p, j, i, 0) =
+         PIXEL(p, j, i, 1) =
+         PIXEL(p, j, i, 2) =
+         PIXEL(p, j, i, 3) = (float) (scale * (*src++ >> 8));
+      }
+   }
+}
+
+
+/*** PIPE_FORMAT_Z32_FLOAT ***/
+
+/**
+ * Return each Z value as four floats in [0,1].
+ */
+static void
+z32f_get_tile_rgba(const float *src,
+                   unsigned w, unsigned h,
+                   float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         PIXEL(p, j, i, 0) =
+         PIXEL(p, j, i, 1) =
+         PIXEL(p, j, i, 2) =
+         PIXEL(p, j, i, 3) = *src++;
+      }
+   }
+}
+
+
+/*** PIPE_FORMAT_YCBCR / PIPE_FORMAT_YCBCR_REV ***/
+
+/**
+ * Convert YCbCr (or YCrCb) to RGBA.
+ */
+static void
+ycbcr_get_tile_rgba(const ushort *src,
+                    unsigned w, unsigned h,
+                    float *p,
+                    boolean rev)
+{
+   const float scale = 1.0f / 255.0f;
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      /* do two texels at a time */
+      for (j = 0; j < (w & ~1); j += 2, src += 2) {
+         const ushort t0 = src[0];
+         const ushort t1 = src[1];
+         const ubyte y0 = (t0 >> 8) & 0xff;  /* luminance */
+         const ubyte y1 = (t1 >> 8) & 0xff;  /* luminance */
+         ubyte cb, cr;
+         float r, g, b;
+
+         if (rev) {
+            cb = t1 & 0xff;         /* chroma U */
+            cr = t0 & 0xff;         /* chroma V */
+         }
+         else {
+            cb = t0 & 0xff;         /* chroma U */
+            cr = t1 & 0xff;         /* chroma V */
+         }
+
+         /* even pixel: y0,cr,cb */
+         r = 1.164f * (y0-16) + 1.596f * (cr-128);
+         g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128);
+         b = 1.164f * (y0-16) + 2.018f * (cb-128);
+         PIXEL(p, j, i, 0) = r * scale;
+         PIXEL(p, j, i, 1) = g * scale;
+         PIXEL(p, j, i, 2) = b * scale;
+         PIXEL(p, j, i, 3) = 1.0f;
+
+         /* odd pixel: use y1,cr,cb */
+         r = 1.164f * (y1-16) + 1.596f * (cr-128);
+         g = 1.164f * (y1-16) - 0.813f * (cr-128) - 0.391f * (cb-128);
+         b = 1.164f * (y1-16) + 2.018f * (cb-128);
+         PIXEL(p, j + 1, i, 0) = r * scale;
+         PIXEL(p, j + 1, i, 1) = g * scale;
+         PIXEL(p, j + 1, i, 2) = b * scale;
+         PIXEL(p, j + 1, i, 3) = 1.0f;
+      }
+      /* do the last texel */
+      if (w & 1) {
+         const ushort t0 = src[0];
+         const ushort t1 = src[1];
+         const ubyte y0 = (t0 >> 8) & 0xff;  /* luminance */
+         ubyte cb, cr;
+         float r, g, b;
+
+         if (rev) {
+            cb = t1 & 0xff;         /* chroma U */
+            cr = t0 & 0xff;         /* chroma V */
+         }
+         else {
+            cb = t0 & 0xff;         /* chroma U */
+            cr = t1 & 0xff;         /* chroma V */
+         }
+
+         /* even pixel: y0,cr,cb */
+         r = 1.164f * (y0-16) + 1.596f * (cr-128);
+         g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128);
+         b = 1.164f * (y0-16) + 2.018f * (cb-128);
+         PIXEL(p, j, i, 0) = r * scale;
+         PIXEL(p, j, i, 1) = g * scale;
+         PIXEL(p, j, i, 2) = b * scale;
+         PIXEL(p, j, i, 3) = 1.0f;
+      }
+   }
+}
+
+
+static void
+fake_get_tile_rgba(const ushort *src,
+                   unsigned w, unsigned h,
+                   float *p)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      for (j = 0; j < w; j++) {
+         PIXEL(p, j, i, 0) =
+         PIXEL(p, j, i, 1) =
+         PIXEL(p, j, i, 2) =
+         PIXEL(p, j, i, 3) = (i ^ j) & 1 ? 1.0f : 0.0f;
+      }
+   }
+}
+
+
+static void
+lp_tile_raw_to_rgba_soa(enum pipe_format format,
+                        void *src,
+                        uint w, uint h,
+                        float *p)
+{
+   switch (format) {
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
+      a8r8g8b8_get_tile_rgba((unsigned *) src, w, h, p);
+      break;
+   case PIPE_FORMAT_X8R8G8B8_UNORM:
+      x8r8g8b8_get_tile_rgba((unsigned *) src, w, h, p);
+      break;
+   case PIPE_FORMAT_B8G8R8A8_UNORM:
+      b8g8r8a8_get_tile_rgba((unsigned *) src, w, h, p);
+      break;
+   case PIPE_FORMAT_A1R5G5B5_UNORM:
+      a1r5g5b5_get_tile_rgba((ushort *) src, w, h, p);
+      break;
+   case PIPE_FORMAT_A4R4G4B4_UNORM:
+      a4r4g4b4_get_tile_rgba((ushort *) src, w, h, p);
+      break;
+   case PIPE_FORMAT_R5G6B5_UNORM:
+      r5g6b5_get_tile_rgba((ushort *) src, w, h, p);
+      break;
+   case PIPE_FORMAT_L8_UNORM:
+      l8_get_tile_rgba((ubyte *) src, w, h, p);
+      break;
+   case PIPE_FORMAT_A8_UNORM:
+      a8_get_tile_rgba((ubyte *) src, w, h, p);
+      break;
+   case PIPE_FORMAT_I8_UNORM:
+      i8_get_tile_rgba((ubyte *) src, w, h, p);
+      break;
+   case PIPE_FORMAT_A8L8_UNORM:
+      a8l8_get_tile_rgba((ushort *) src, w, h, p);
+      break;
+   case PIPE_FORMAT_R16_SNORM:
+      r16_get_tile_rgba((short *) src, w, h, p);
+      break;
+   case PIPE_FORMAT_R16G16B16A16_SNORM:
+      r16g16b16a16_get_tile_rgba((short *) src, w, h, p);
+      break;
+   case PIPE_FORMAT_Z16_UNORM:
+      z16_get_tile_rgba((ushort *) src, w, h, p);
+      break;
+   case PIPE_FORMAT_Z32_UNORM:
+      z32_get_tile_rgba((unsigned *) src, w, h, p);
+      break;
+   case PIPE_FORMAT_S8Z24_UNORM:
+   case PIPE_FORMAT_X8Z24_UNORM:
+      s8z24_get_tile_rgba((unsigned *) src, w, h, p);
+      break;
+   case PIPE_FORMAT_Z24S8_UNORM:
+   case PIPE_FORMAT_Z24X8_UNORM:
+      z24s8_get_tile_rgba((unsigned *) src, w, h, p);
+      break;
+   case PIPE_FORMAT_Z32_FLOAT:
+      z32f_get_tile_rgba((float *) src, w, h, p);
+      break;
+   case PIPE_FORMAT_YCBCR:
+      ycbcr_get_tile_rgba((ushort *) src, w, h, p, FALSE);
+      break;
+   case PIPE_FORMAT_YCBCR_REV:
+      ycbcr_get_tile_rgba((ushort *) src, w, h, p, TRUE);
+      break;
+   default:
+      debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(format));
+      fake_get_tile_rgba(src, w, h, p);
+   }
+}
+
+
+void
+lp_get_tile_rgba_soa(struct pipe_transfer *pt,
+                     uint x, uint y,
+                     float *p)
+{
+   uint w = TILE_SIZE, h = TILE_SIZE;
+   void *packed;
+
+   if (pipe_clip_tile(x, y, &w, &h, pt))
+      return;
+
+   packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size);
+
+   if (!packed)
+      return;
+
+   if(pt->format == PIPE_FORMAT_YCBCR || pt->format == PIPE_FORMAT_YCBCR_REV)
+      assert((x & 1) == 0);
+
+   pipe_get_tile_raw(pt, x, y, w, h, packed, 0);
+
+   lp_tile_raw_to_rgba_soa(pt->format, packed, w, h, p);
+
+   FREE(packed);
+}
+
+
+void
+lp_put_tile_rgba_soa(struct pipe_transfer *pt,
+                     uint x, uint y,
+                     const float *p)
+{
+   uint w = TILE_SIZE, h = TILE_SIZE;
+   void *packed;
+
+   if (pipe_clip_tile(x, y, &w, &h, pt))
+      return;
+
+   packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size);
+
+   if (!packed)
+      return;
+
+   switch (pt->format) {
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
+      a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p);
+      break;
+   case PIPE_FORMAT_X8R8G8B8_UNORM:
+      x8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p);
+      break;
+   case PIPE_FORMAT_B8G8R8A8_UNORM:
+      b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p);
+      break;
+   case PIPE_FORMAT_A1R5G5B5_UNORM:
+      a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p);
+      break;
+   case PIPE_FORMAT_R5G6B5_UNORM:
+      r5g6b5_put_tile_rgba((ushort *) packed, w, h, p);
+      break;
+   case PIPE_FORMAT_R8G8B8A8_UNORM:
+      assert(0);
+      break;
+   case PIPE_FORMAT_A4R4G4B4_UNORM:
+      a4r4g4b4_put_tile_rgba((ushort *) packed, w, h, p);
+      break;
+   case PIPE_FORMAT_L8_UNORM:
+      l8_put_tile_rgba((ubyte *) packed, w, h, p);
+      break;
+   case PIPE_FORMAT_A8_UNORM:
+      a8_put_tile_rgba((ubyte *) packed, w, h, p);
+      break;
+   case PIPE_FORMAT_I8_UNORM:
+      i8_put_tile_rgba((ubyte *) packed, w, h, p);
+      break;
+   case PIPE_FORMAT_A8L8_UNORM:
+      a8l8_put_tile_rgba((ushort *) packed, w, h, p);
+      break;
+   case PIPE_FORMAT_R16_SNORM:
+      r16_put_tile_rgba((short *) packed, w, h, p);
+      break;
+   case PIPE_FORMAT_R16G16B16A16_SNORM:
+      r16g16b16a16_put_tile_rgba((short *) packed, w, h, p);
+      break;
+   case PIPE_FORMAT_Z16_UNORM:
+      /*z16_put_tile_rgba((ushort *) packed, w, h, p);*/
+      break;
+   case PIPE_FORMAT_Z32_UNORM:
+      /*z32_put_tile_rgba((unsigned *) packed, w, h, p);*/
+      break;
+   case PIPE_FORMAT_S8Z24_UNORM:
+   case PIPE_FORMAT_X8Z24_UNORM:
+      /*s8z24_put_tile_rgba((unsigned *) packed, w, h, p);*/
+      break;
+   case PIPE_FORMAT_Z24S8_UNORM:
+   case PIPE_FORMAT_Z24X8_UNORM:
+      /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p);*/
+      break;
+   default:
+      debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(pt->format));
+   }
+
+   pipe_put_tile_raw(pt, x, y, w, h, packed, 0);
+
+   FREE(packed);
+}
+
+
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
new file mode 100644
index 0000000000..23b27b77eb
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
@@ -0,0 +1,60 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef LP_TILE_SOA_H
+#define LP_TILE_SOA_H
+
+#include "pipe/p_compiler.h"
+
+#include "lp_tile_cache.h"
+
+
+struct pipe_transfer;
+
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+void
+lp_get_tile_rgba_soa(struct pipe_transfer *pt,
+                     uint x, uint y,
+                     float *p);
+
+void
+lp_put_tile_rgba_soa(struct pipe_transfer *pt,
+                     uint x, uint y,
+                     const float *p);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
-- 
cgit v1.2.3


From 63b07618b3e4034e11968f1c5323445dc4a0377f Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 11 Aug 2009 13:17:24 +0100
Subject: llvmpipe: TGSI -> LLVM SoA IR converstion.

Based on tgsi_sse2.c.
---
 src/gallium/drivers/llvmpipe/SConscript        |    1 +
 src/gallium/drivers/llvmpipe/lp_bld_tgsi.h     |   53 +
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 1534 ++++++++++++++++++++++++
 3 files changed, 1588 insertions(+)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index e3106763cb..705a8bdfd4 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -23,6 +23,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_loop.c',
 		'lp_bld_logicop.c',
 		'lp_bld_swizzle.c',
+		'lp_bld_tgsi_soa.c',		
 		'lp_bld_type.c',
 		'lp_clear.c',
 		'lp_context.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
new file mode 100644
index 0000000000..020db003c2
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
@@ -0,0 +1,53 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef LP_BLD_TGSI_H
+#define LP_BLD_TGSI_H
+
+#include <llvm-c/Core.h>
+
+
+struct tgsi_token;
+union lp_type;
+struct lp_build_context;
+
+void PIPE_CDECL
+lp_build_tgsi_fetch_texel_soa( struct tgsi_sampler **samplers,
+                               uint32_t unit,
+                               float *store );
+
+void
+lp_build_tgsi_soa(LLVMBuilderRef builder,
+                  const struct tgsi_token *tokens,
+                  union lp_type type,
+                  LLVMValueRef (*inputs)[4],
+                  LLVMValueRef consts_ptr,
+                  LLVMValueRef (*outputs)[4],
+                  LLVMValueRef samplers_ptr);
+
+
+#endif /* LP_BLD_TGSI_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
new file mode 100644
index 0000000000..d5acafe840
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -0,0 +1,1534 @@
+/**************************************************************************
+ * 
+ * Copyright 2009 VMware, Inc.
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "pipe/p_config.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_exec.h"
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_intr.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_tgsi.h"
+
+
+#define LP_MAX_TEMPS 256
+#define LP_MAX_IMMEDIATES 256
+
+
+#define FOR_EACH_CHANNEL( CHAN )\
+   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
+
+#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
+   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
+
+#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
+   if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
+
+#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
+   FOR_EACH_CHANNEL( CHAN )\
+      IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
+
+#define CHAN_X 0
+#define CHAN_Y 1
+#define CHAN_Z 2
+#define CHAN_W 3
+
+
+struct lp_build_tgsi_soa_context
+{
+   struct lp_build_context base;
+
+   LLVMValueRef (*inputs)[4];
+   LLVMValueRef consts_ptr;
+   LLVMValueRef (*outputs)[4];
+   LLVMValueRef samplers_ptr;
+
+   LLVMValueRef immediates[LP_MAX_IMMEDIATES][4];
+   LLVMValueRef temps[LP_MAX_TEMPS][4];
+};
+
+
+/**
+ * Function call helpers.
+ */
+
+/**
+ * NOTE: In gcc, if the destination uses the SSE intrinsics, then it must be 
+ * defined with __attribute__((force_align_arg_pointer)), as we do not guarantee
+ * that the stack pointer is 16 byte aligned, as expected.
+ */
+static void
+emit_func_call(
+   struct lp_build_tgsi_soa_context *bld,
+   const LLVMValueRef *args,
+   unsigned nr_args,
+   void (PIPE_CDECL *code)() )
+{
+#if 0
+   LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global,
+                             void* Addr);
+#endif
+
+}
+
+
+/**
+ * Register fetch.
+ */
+
+static LLVMValueRef
+emit_fetch(
+   struct lp_build_tgsi_soa_context *bld,
+   const struct tgsi_full_src_register *reg,
+   const unsigned chan_index )
+{
+   unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
+   LLVMValueRef res;
+
+   switch (swizzle) {
+   case TGSI_EXTSWIZZLE_X:
+   case TGSI_EXTSWIZZLE_Y:
+   case TGSI_EXTSWIZZLE_Z:
+   case TGSI_EXTSWIZZLE_W:
+
+      switch (reg->SrcRegister.File) {
+      case TGSI_FILE_CONSTANT: {
+         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0);
+         LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
+         LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
+         res = lp_build_broadcast_scalar(&bld->base, scalar);
+         break;
+      }
+
+      case TGSI_FILE_IMMEDIATE:
+         res = bld->immediates[reg->SrcRegister.Index][swizzle];
+         assert(res);
+         break;
+
+      case TGSI_FILE_INPUT:
+         res = bld->inputs[reg->SrcRegister.Index][swizzle];
+         assert(res);
+         break;
+
+      case TGSI_FILE_TEMPORARY:
+         res = bld->temps[reg->SrcRegister.Index][swizzle];
+         if(!res)
+            return bld->base.undef;
+         break;
+
+      default:
+         assert( 0 );
+      }
+      break;
+
+   case TGSI_EXTSWIZZLE_ZERO:
+      res = bld->base.zero;
+      break;
+
+   case TGSI_EXTSWIZZLE_ONE:
+      res = bld->base.one;
+      break;
+
+   default:
+      assert( 0 );
+   }
+
+   switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
+   case TGSI_UTIL_SIGN_CLEAR:
+      res = lp_build_abs( &bld->base, res );
+      break;
+
+   case TGSI_UTIL_SIGN_SET:
+      res = lp_build_abs( &bld->base, res );
+      res = LLVMBuildNeg( bld->base.builder, res, "" );
+      break;
+
+   case TGSI_UTIL_SIGN_TOGGLE:
+      res = LLVMBuildNeg( bld->base.builder, res, "" );
+      break;
+
+   case TGSI_UTIL_SIGN_KEEP:
+      break;
+   }
+
+   return res;
+}
+
+#define FETCH( FUNC, INST, INDEX, CHAN )\
+   emit_fetch( FUNC, &(INST).FullSrcRegisters[INDEX], CHAN )
+
+/**
+ * Register store.
+ */
+
+static void
+emit_store(
+   struct lp_build_tgsi_soa_context *bld,
+   const struct tgsi_full_dst_register *reg,
+   const struct tgsi_full_instruction *inst,
+   unsigned chan_index,
+   LLVMValueRef value)
+{
+   switch( inst->Instruction.Saturate ) {
+   case TGSI_SAT_NONE:
+      break;
+
+   case TGSI_SAT_ZERO_ONE:
+      /* assert( 0 ); */
+      break;
+
+   case TGSI_SAT_MINUS_PLUS_ONE:
+      assert( 0 );
+      break;
+   }
+
+   switch( reg->DstRegister.File ) {
+   case TGSI_FILE_OUTPUT:
+      bld->outputs[reg->DstRegister.Index][chan_index] = value;
+      break;
+
+   case TGSI_FILE_TEMPORARY:
+      bld->temps[reg->DstRegister.Index][chan_index] = value;
+      break;
+
+   case TGSI_FILE_ADDRESS:
+      /* FIXME */
+      assert(0);
+      break;
+
+   default:
+      assert( 0 );
+   }
+}
+
+#define STORE( FUNC, INST, INDEX, CHAN, VAL )\
+   emit_store( FUNC, &(INST).FullDstRegisters[INDEX], &(INST), CHAN, VAL )
+
+
+void PIPE_CDECL
+lp_build_tgsi_fetch_texel_soa( struct tgsi_sampler **samplers,
+                               uint32_t unit,
+                               float *store )
+{
+   struct tgsi_sampler *sampler = samplers[unit];
+
+#if 0
+   uint j;
+
+   debug_printf("%s sampler: %p (%p) store: %p\n", 
+                __FUNCTION__,
+                sampler, *sampler,
+                store );
+
+   debug_printf("lodbias %f\n", store[12]);
+
+   for (j = 0; j < 4; j++)
+      debug_printf("sample %d texcoord %f %f\n", 
+                   j, 
+                   store[0+j],
+                   store[4+j]);
+#endif
+
+   {
+      float rgba[NUM_CHANNELS][QUAD_SIZE];
+      sampler->get_samples(sampler,
+                           &store[0],
+                           &store[4],
+                           &store[8],
+                           0.0f, /*store[12],  lodbias */
+                           rgba);
+      memcpy(store, rgba, sizeof rgba);
+   }
+
+#if 0
+   for (j = 0; j < 4; j++)
+      debug_printf("sample %d result %f %f %f %f\n", 
+                   j, 
+                   store[0+j],
+                   store[4+j],
+                   store[8+j],
+                   store[12+j]);
+#endif
+}
+
+/**
+ * High-level instruction translators.
+ */
+
+static void
+emit_tex( struct lp_build_tgsi_soa_context *bld,
+          const struct tgsi_full_instruction *inst,
+          boolean apply_lodbias,
+          boolean projected)
+{
+   LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
+   const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+   LLVMValueRef lodbias;
+   LLVMValueRef oow;
+   LLVMValueRef store_ptr;
+   LLVMValueRef args[3];
+   unsigned count;
+   unsigned i;
+
+   switch (inst->InstructionExtTexture.Texture) {
+   case TGSI_TEXTURE_1D:
+   case TGSI_TEXTURE_SHADOW1D:
+      count = 1;
+      break;
+   case TGSI_TEXTURE_2D:
+   case TGSI_TEXTURE_RECT:
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_SHADOWRECT:
+      count = 2;
+      break;
+   case TGSI_TEXTURE_3D:
+   case TGSI_TEXTURE_CUBE:
+      count = 3;
+      break;
+   default:
+      assert(0);
+      return;
+   }
+
+   if(apply_lodbias)
+      lodbias = FETCH( bld, *inst, 0, 3 );
+   else
+      lodbias = bld->base.zero;
+
+   store_ptr = LLVMBuildArrayAlloca(bld->base.builder,
+                                    vec_type,
+                                    LLVMConstInt(LLVMInt32Type(), 4, 0),
+                                    "store");
+
+   if (projected) {
+      oow = FETCH( bld, *inst, 0, 3 );
+      oow = lp_build_rcp(&bld->base, oow);
+   }
+
+   for (i = 0; i < count; i++) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+      LLVMValueRef coord_ptr = LLVMBuildGEP(bld->base.builder, store_ptr, &index, 1, "");
+      LLVMValueRef coord;
+
+      coord = FETCH( bld, *inst, 0, i );
+
+      if (projected)
+         coord = lp_build_mul(&bld->base, coord, oow);
+
+      LLVMBuildStore(bld->base.builder, coord, coord_ptr);
+   }
+
+   args[0] = bld->samplers_ptr;
+   args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
+   args[2] = store_ptr;
+
+   lp_build_intrinsic(bld->base.builder, "fetch_texel", LLVMVoidType(), args, 3);
+
+   FOR_EACH_DST0_ENABLED_CHANNEL( *inst, i ) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+      LLVMValueRef res_ptr = LLVMBuildGEP(bld->base.builder, store_ptr, &index, 1, "");
+      LLVMValueRef res = LLVMBuildLoad(bld->base.builder, res_ptr, "");
+      STORE( bld, *inst, 0, i, res );
+   }
+}
+
+
+static void
+emit_kil(
+   struct lp_build_tgsi_soa_context *bld,
+   const struct tgsi_full_src_register *reg )
+{
+#if 0
+   unsigned uniquemask;
+   unsigned unique_count = 0;
+   unsigned chan_index;
+   unsigned i;
+
+   /* This mask stores component bits that were already tested. Note that
+    * we test if the value is less than zero, so 1.0 and 0.0 need not to be
+    * tested. */
+   uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
+
+   FOR_EACH_CHANNEL( chan_index ) {
+      unsigned swizzle;
+
+      /* unswizzle channel */
+      swizzle = tgsi_util_get_full_src_register_extswizzle(
+         reg,
+         chan_index );
+
+      /* check if the component has not been already tested */
+      if( !(uniquemask & (1 << swizzle)) ) {
+         uniquemask |= 1 << swizzle;
+
+         /* allocate register */
+         emit_fetch(
+            bld,
+            unique_count++,
+            reg,
+            chan_index );
+      }
+   }
+
+   x86_push(
+      bld,
+      x86_make_reg( file_REG32, reg_AX ) );
+   x86_push(
+      bld,
+      x86_make_reg( file_REG32, reg_DX ) );
+
+   for (i = 0 ; i < unique_count; i++ ) {
+      LLVMValueRef dataXMM = make_xmm(i);
+
+      sse_cmpps(
+         bld,
+         dataXMM,
+         get_temp(
+            TGSI_EXEC_TEMP_00000000_I,
+            TGSI_EXEC_TEMP_00000000_C ),
+         cc_LessThan );
+      
+      if( i == 0 ) {
+         sse_movmskps(
+            bld,
+            x86_make_reg( file_REG32, reg_AX ),
+            dataXMM );
+      }
+      else {
+         sse_movmskps(
+            bld,
+            x86_make_reg( file_REG32, reg_DX ),
+            dataXMM );
+         x86_or(
+            bld,
+            x86_make_reg( file_REG32, reg_AX ),
+            x86_make_reg( file_REG32, reg_DX ) );
+      }
+   }
+
+   x86_or(
+      bld,
+      get_temp(
+         TGSI_EXEC_TEMP_KILMASK_I,
+         TGSI_EXEC_TEMP_KILMASK_C ),
+      x86_make_reg( file_REG32, reg_AX ) );
+
+   x86_pop(
+      bld,
+      x86_make_reg( file_REG32, reg_DX ) );
+   x86_pop(
+      bld,
+      x86_make_reg( file_REG32, reg_AX ) );
+#endif
+}
+
+
+static void
+emit_kilp(
+   struct lp_build_tgsi_soa_context *bld )
+{
+   /* XXX todo / fix me */
+}
+
+
+/**
+ * Check if inst src/dest regs use indirect addressing into temporary
+ * register file.
+ */
+static boolean
+indirect_temp_reference(const struct tgsi_full_instruction *inst)
+{
+   uint i;
+   for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+      const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i];
+      if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
+          reg->SrcRegister.Indirect)
+         return TRUE;
+   }
+   for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+      const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i];
+      if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
+          reg->DstRegister.Indirect)
+         return TRUE;
+   }
+   return FALSE;
+}
+
+
+static int
+emit_instruction(
+   struct lp_build_tgsi_soa_context *bld,
+   struct tgsi_full_instruction *inst )
+{
+   unsigned chan_index;
+   LLVMValueRef tmp;
+
+   /* we can't handle indirect addressing into temp register file yet */
+   if (indirect_temp_reference(inst))
+      return FALSE;
+
+   switch (inst->Instruction.Opcode) {
+#if 0
+   case TGSI_OPCODE_ARL:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( bld, *inst, 0, 0, chan_index );
+         emit_flr(bld, 0, 0);
+         emit_f2it( bld, 0 );
+         STORE( bld, *inst, 0, 0, chan_index );
+      }
+      break;
+#endif
+
+   case TGSI_OPCODE_MOV:
+   case TGSI_OPCODE_SWZ:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( bld, *inst, 0, chan_index, FETCH( bld, *inst, 0, chan_index ) );
+      }
+      break;
+
+#if 0
+   case TGSI_OPCODE_LIT:
+      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
+         emit_tempf(
+            bld,
+            0,
+            TEMP_ONE_I,
+            TEMP_ONE_C);
+         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
+            STORE( bld, *inst, 0, 0, CHAN_X );
+         }
+         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
+            STORE( bld, *inst, 0, 0, CHAN_W );
+         }
+      }
+      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
+            tmp = FETCH( bld, *inst, 0, 0, CHAN_X );
+            sse_maxps(
+               bld,
+               make_xmm( 0 ),
+               get_temp(
+                  TGSI_EXEC_TEMP_00000000_I,
+                  TGSI_EXEC_TEMP_00000000_C ) );
+            STORE( bld, *inst, 0, 0, CHAN_Y );
+         }
+         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+            /* XMM[1] = SrcReg[0].yyyy */
+            FETCH( bld, *inst, 1, 0, CHAN_Y );
+            /* XMM[1] = max(XMM[1], 0) */
+            sse_maxps(
+               bld,
+               make_xmm( 1 ),
+               get_temp(
+                  TGSI_EXEC_TEMP_00000000_I,
+                  TGSI_EXEC_TEMP_00000000_C ) );
+            /* XMM[2] = SrcReg[0].wwww */
+            FETCH( bld, *inst, 2, 0, CHAN_W );
+            /* XMM[2] = min(XMM[2], 128.0) */
+            sse_minps(
+               bld,
+               make_xmm( 2 ),
+               get_temp(
+                  TGSI_EXEC_TEMP_128_I,
+                  TGSI_EXEC_TEMP_128_C ) );
+            /* XMM[2] = max(XMM[2], -128.0) */
+            sse_maxps(
+               bld,
+               make_xmm( 2 ),
+               get_temp(
+                  TGSI_EXEC_TEMP_MINUS_128_I,
+                  TGSI_EXEC_TEMP_MINUS_128_C ) );
+            emit_pow( bld, 3, 1, 1, 2 );
+            FETCH( bld, *inst, 0, 0, CHAN_X );
+            sse_xorps(
+               bld,
+               make_xmm( 2 ),
+               make_xmm( 2 ) );
+            sse_cmpps(
+               bld,
+               make_xmm( 2 ),
+               make_xmm( 0 ),
+               cc_LessThan );
+            sse_andps(
+               bld,
+               make_xmm( 2 ),
+               make_xmm( 1 ) );
+            STORE( bld, *inst, 2, 0, CHAN_Z );
+         }
+      }
+      break;
+#endif
+
+   case TGSI_OPCODE_RCP:
+   /* TGSI_OPCODE_RECIP */
+      tmp = FETCH( bld, *inst, 0, CHAN_X );
+      tmp = lp_build_rcp(&bld->base, tmp);
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( bld, *inst, 0, chan_index, tmp );
+      }
+      break;
+
+   case TGSI_OPCODE_RSQ:
+   /* TGSI_OPCODE_RECIPSQRT */
+      tmp = FETCH( bld, *inst, 0, CHAN_X );
+      tmp = lp_build_abs(&bld->base, tmp);
+      tmp = lp_build_rsqrt(&bld->base, tmp);
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( bld, *inst, 0, chan_index, tmp );
+      }
+      break;
+
+#if 0
+   case TGSI_OPCODE_EXP:
+      if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+         FETCH( bld, *inst, 0, 0, CHAN_X );
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+             IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+            emit_MOV( bld, 1, 0 );
+            emit_flr( bld, 2, 1 );
+            /* dst.x = ex2(floor(src.x)) */
+            if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
+               emit_MOV( bld, 2, 1 );
+               emit_ex2( bld, 3, 2 );
+               STORE( bld, *inst, 2, 0, CHAN_X );
+            }
+            /* dst.y = src.x - floor(src.x) */
+            if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+               emit_MOV( bld, 2, 0 );
+               emit_sub( bld, 2, 1 );
+               STORE( bld, *inst, 2, 0, CHAN_Y );
+            }
+         }
+         /* dst.z = ex2(src.x) */
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+            emit_ex2( bld, 3, 0 );
+            STORE( bld, *inst, 0, 0, CHAN_Z );
+         }
+      }
+      /* dst.w = 1.0 */
+      if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
+         emit_tempf( bld, 0, TEMP_ONE_I, TEMP_ONE_C );
+         STORE( bld, *inst, 0, 0, CHAN_W );
+      }
+      break;
+#endif
+
+#if 0
+   case TGSI_OPCODE_LOG:
+      if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+         FETCH( bld, *inst, 0, 0, CHAN_X );
+         emit_abs( bld, 0 );
+         emit_MOV( bld, 1, 0 );
+         emit_lg2( bld, 2, 1 );
+         /* dst.z = lg2(abs(src.x)) */
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+            STORE( bld, *inst, 1, 0, CHAN_Z );
+         }
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+             IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+            emit_flr( bld, 2, 1 );
+            /* dst.x = floor(lg2(abs(src.x))) */
+            if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
+               STORE( bld, *inst, 1, 0, CHAN_X );
+            }
+            /* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */
+            if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+               emit_ex2( bld, 2, 1 );
+               emit_rcp( bld, 1, 1 );
+               emit_mul( bld, 0, 1 );
+               STORE( bld, *inst, 0, 0, CHAN_Y );
+            }
+         }
+      }
+      /* dst.w = 1.0 */
+      if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
+         emit_tempf( bld, 0, TEMP_ONE_I, TEMP_ONE_C );
+         STORE( bld, *inst, 0, 0, CHAN_W );
+      }
+      break;
+#endif
+
+   case TGSI_OPCODE_MUL:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         LLVMValueRef a = FETCH( bld, *inst, 0, chan_index );
+         LLVMValueRef b = FETCH( bld, *inst, 1, chan_index );
+         tmp = lp_build_mul(&bld->base, a, b);
+         STORE( bld, *inst, 0, chan_index, tmp );
+      }
+      break;
+
+   case TGSI_OPCODE_ADD:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         LLVMValueRef a = FETCH( bld, *inst, 0, chan_index );
+         LLVMValueRef b = FETCH( bld, *inst, 1, chan_index );
+         tmp = lp_build_add(&bld->base, a, b);
+         STORE( bld, *inst, 0, chan_index, tmp );
+      }
+      break;
+
+#if 0
+   case TGSI_OPCODE_DP3:
+   /* TGSI_OPCODE_DOT3 */
+      FETCH( bld, *inst, 0, 0, CHAN_X );
+      FETCH( bld, *inst, 1, 1, CHAN_X );
+      emit_mul( bld, 0, 1 );
+      FETCH( bld, *inst, 1, 0, CHAN_Y );
+      FETCH( bld, *inst, 2, 1, CHAN_Y );
+      emit_mul( bld, 1, 2 );
+      emit_add( bld, 0, 1 );
+      FETCH( bld, *inst, 1, 0, CHAN_Z );
+      FETCH( bld, *inst, 2, 1, CHAN_Z );
+      emit_mul( bld, 1, 2 );
+      emit_add( bld, 0, 1 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( bld, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DP4:
+   /* TGSI_OPCODE_DOT4 */
+      FETCH( bld, *inst, 0, 0, CHAN_X );
+      FETCH( bld, *inst, 1, 1, CHAN_X );
+      emit_mul( bld, 0, 1 );
+      FETCH( bld, *inst, 1, 0, CHAN_Y );
+      FETCH( bld, *inst, 2, 1, CHAN_Y );
+      emit_mul( bld, 1, 2 );
+      emit_add( bld, 0, 1 );
+      FETCH( bld, *inst, 1, 0, CHAN_Z );
+      FETCH( bld, *inst, 2, 1, CHAN_Z );
+      emit_mul(bld, 1, 2 );
+      emit_add(bld, 0, 1 );
+      FETCH( bld, *inst, 1, 0, CHAN_W );
+      FETCH( bld, *inst, 2, 1, CHAN_W );
+      emit_mul( bld, 1, 2 );
+      emit_add( bld, 0, 1 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( bld, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DST:
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
+         emit_tempf(
+            bld,
+            0,
+            TEMP_ONE_I,
+            TEMP_ONE_C );
+         STORE( bld, *inst, 0, 0, CHAN_X );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
+         FETCH( bld, *inst, 0, 0, CHAN_Y );
+         FETCH( bld, *inst, 1, 1, CHAN_Y );
+         emit_mul( bld, 0, 1 );
+         STORE( bld, *inst, 0, 0, CHAN_Y );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
+         FETCH( bld, *inst, 0, 0, CHAN_Z );
+         STORE( bld, *inst, 0, 0, CHAN_Z );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
+         FETCH( bld, *inst, 0, 1, CHAN_W );
+         STORE( bld, *inst, 0, 0, CHAN_W );
+      }
+      break;
+
+   case TGSI_OPCODE_MIN:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( bld, *inst, 0, 0, chan_index );
+         FETCH( bld, *inst, 1, 1, chan_index );
+         sse_minps(
+            bld,
+            make_xmm( 0 ),
+            make_xmm( 1 ) );
+         STORE( bld, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_MAX:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( bld, *inst, 0, 0, chan_index );
+         FETCH( bld, *inst, 1, 1, chan_index );
+         sse_maxps(
+            bld,
+            make_xmm( 0 ),
+            make_xmm( 1 ) );
+         STORE( bld, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SLT:
+   /* TGSI_OPCODE_SETLT */
+      emit_setcc( bld, inst, cc_LessThan );
+      break;
+
+   case TGSI_OPCODE_SGE:
+   /* TGSI_OPCODE_SETGE */
+      emit_setcc( bld, inst, cc_NotLessThan );
+      break;
+
+   case TGSI_OPCODE_MAD:
+   /* TGSI_OPCODE_MADD */
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( bld, *inst, 0, 0, chan_index );
+         FETCH( bld, *inst, 1, 1, chan_index );
+         FETCH( bld, *inst, 2, 2, chan_index );
+         emit_mul( bld, 0, 1 );
+         emit_add( bld, 0, 2 );
+         STORE( bld, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SUB:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( bld, *inst, 0, 0, chan_index );
+         FETCH( bld, *inst, 1, 1, chan_index );
+         emit_sub( bld, 0, 1 );
+         STORE( bld, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_LRP:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( bld, *inst, 0, 0, chan_index );
+         FETCH( bld, *inst, 1, 1, chan_index );
+         FETCH( bld, *inst, 2, 2, chan_index );
+         emit_sub( bld, 1, 2 );
+         emit_mul( bld, 0, 1 );
+         emit_add( bld, 0, 2 );
+         STORE( bld, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_CND:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_CND0:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_DP2A:
+      FETCH( bld, *inst, 0, 0, CHAN_X );  /* xmm0 = src[0].x */
+      FETCH( bld, *inst, 1, 1, CHAN_X );  /* xmm1 = src[1].x */
+      emit_mul( bld, 0, 1 );              /* xmm0 = xmm0 * xmm1 */
+      FETCH( bld, *inst, 1, 0, CHAN_Y );  /* xmm1 = src[0].y */
+      FETCH( bld, *inst, 2, 1, CHAN_Y );  /* xmm2 = src[1].y */
+      emit_mul( bld, 1, 2 );              /* xmm1 = xmm1 * xmm2 */
+      emit_add( bld, 0, 1 );              /* xmm0 = xmm0 + xmm1 */
+      FETCH( bld, *inst, 1, 2, CHAN_X );  /* xmm1 = src[2].x */
+      emit_add( bld, 0, 1 );              /* xmm0 = xmm0 + xmm1 */
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( bld, *inst, 0, 0, chan_index );  /* dest[ch] = xmm0 */
+      }
+      break;
+
+   case TGSI_OPCODE_FRC:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( bld, *inst, 0, 0, chan_index );
+         emit_frc( bld, 0, 0 );
+         STORE( bld, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_CLAMP:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_FLR:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( bld, *inst, 0, 0, chan_index );
+         emit_flr( bld, 0, 0 );
+         STORE( bld, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_ROUND:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( bld, *inst, 0, 0, chan_index );
+         emit_rnd( bld, 0, 0 );
+         STORE( bld, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_EX2:
+      FETCH( bld, *inst, 0, 0, CHAN_X );
+      emit_ex2( bld, 0, 0 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( bld, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_LG2:
+      FETCH( bld, *inst, 0, 0, CHAN_X );
+      emit_lg2( bld, 0, 0 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( bld, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_POW:
+      FETCH( bld, *inst, 0, 0, CHAN_X );
+      FETCH( bld, *inst, 1, 1, CHAN_X );
+      emit_pow( bld, 0, 0, 0, 1 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( bld, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_XPD:
+      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
+         FETCH( bld, *inst, 1, 1, CHAN_Z );
+         FETCH( bld, *inst, 3, 0, CHAN_Z );
+      }
+      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+         FETCH( bld, *inst, 0, 0, CHAN_Y );
+         FETCH( bld, *inst, 4, 1, CHAN_Y );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
+         emit_MOV( bld, 2, 0 );
+         emit_mul( bld, 2, 1 );
+         emit_MOV( bld, 5, 3 );
+         emit_mul( bld, 5, 4 );
+         emit_sub( bld, 2, 5 );
+         STORE( bld, *inst, 2, 0, CHAN_X );
+      }
+      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+         FETCH( bld, *inst, 2, 1, CHAN_X );
+         FETCH( bld, *inst, 5, 0, CHAN_X );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
+         emit_mul( bld, 3, 2 );
+         emit_mul( bld, 1, 5 );
+         emit_sub( bld, 3, 1 );
+         STORE( bld, *inst, 3, 0, CHAN_Y );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
+         emit_mul( bld, 5, 4 );
+         emit_mul( bld, 0, 2 );
+         emit_sub( bld, 5, 0 );
+         STORE( bld, *inst, 5, 0, CHAN_Z );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
+	 emit_tempf(
+	    bld,
+	    0,
+	    TEMP_ONE_I,
+	    TEMP_ONE_C );
+         STORE( bld, *inst, 0, 0, CHAN_W );
+      }
+      break;
+
+   case TGSI_OPCODE_ABS:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( bld, *inst, 0, 0, chan_index );
+         emit_abs( bld, 0) ;
+
+         STORE( bld, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_RCC:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_DPH:
+      FETCH( bld, *inst, 0, 0, CHAN_X );
+      FETCH( bld, *inst, 1, 1, CHAN_X );
+      emit_mul( bld, 0, 1 );
+      FETCH( bld, *inst, 1, 0, CHAN_Y );
+      FETCH( bld, *inst, 2, 1, CHAN_Y );
+      emit_mul( bld, 1, 2 );
+      emit_add( bld, 0, 1 );
+      FETCH( bld, *inst, 1, 0, CHAN_Z );
+      FETCH( bld, *inst, 2, 1, CHAN_Z );
+      emit_mul( bld, 1, 2 );
+      emit_add( bld, 0, 1 );
+      FETCH( bld, *inst, 1, 1, CHAN_W );
+      emit_add( bld, 0, 1 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( bld, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_COS:
+      FETCH( bld, *inst, 0, 0, CHAN_X );
+      emit_cos( bld, 0, 0 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( bld, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DDX:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_DDY:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_KILP:
+      /* predicated kill */
+      emit_kilp( bld );
+      return 0; /* XXX fix me */
+      break;
+
+   case TGSI_OPCODE_KIL:
+      /* conditional kill */
+      emit_kil( bld, &inst->FullSrcRegisters[0] );
+      break;
+
+   case TGSI_OPCODE_PK2H:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_PK2US:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_PK4B:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_PK4UB:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_RFL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SEQ:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SFL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SGT:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SIN:
+      FETCH( bld, *inst, 0, 0, CHAN_X );
+      emit_sin( bld, 0, 0 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( bld, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SLE:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SNE:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_STR:
+      return 0;
+      break;
+#endif
+
+   case TGSI_OPCODE_TEX:
+      emit_tex( bld, inst, FALSE, FALSE );
+      break;
+
+#if 0
+   case TGSI_OPCODE_TXD:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_UP2H:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_UP2US:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_UP4B:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_UP4UB:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_X2D:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ARA:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ARR:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( bld, *inst, 0, 0, chan_index );
+         emit_rnd( bld, 0, 0 );
+         emit_f2it( bld, 0 );
+         STORE( bld, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_BRA:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_CAL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_RET:
+      emit_ret( bld );
+      break;
+#endif
+
+   case TGSI_OPCODE_END:
+      break;
+
+#if 0
+   case TGSI_OPCODE_SSG:
+   /* TGSI_OPCODE_SGN */
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( bld, *inst, 0, 0, chan_index );
+         emit_sgn( bld, 0, 0 );
+         STORE( bld, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_CMP:
+      emit_cmp (bld, inst);
+      break;
+
+   case TGSI_OPCODE_SCS:
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
+         FETCH( bld, *inst, 0, 0, CHAN_X );
+         emit_cos( bld, 0, 0 );
+         STORE( bld, *inst, 0, 0, CHAN_X );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
+         FETCH( bld, *inst, 0, 0, CHAN_X );
+         emit_sin( bld, 0, 0 );
+         STORE( bld, *inst, 0, 0, CHAN_Y );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
+	 emit_tempf(
+	    bld,
+	    0,
+	    TGSI_EXEC_TEMP_00000000_I,
+	    TGSI_EXEC_TEMP_00000000_C );
+         STORE( bld, *inst, 0, 0, CHAN_Z );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
+	 emit_tempf(
+	    bld,
+	    0,
+	    TEMP_ONE_I,
+	    TEMP_ONE_C );
+         STORE( bld, *inst, 0, 0, CHAN_W );
+      }
+      break;
+#endif
+
+   case TGSI_OPCODE_TXB:
+      emit_tex( bld, inst, TRUE, FALSE );
+      break;
+
+#if 0
+   case TGSI_OPCODE_NRM:
+      /* fall-through */
+   case TGSI_OPCODE_NRM4:
+      /* 3 or 4-component normalization */
+      {
+         uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
+
+         if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) ||
+             IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) ||
+             IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z) ||
+             (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W) && dims == 4)) {
+
+            /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
+
+            /* xmm4 = src.x */
+            /* xmm0 = src.x * src.x */
+            FETCH(bld, *inst, 0, 0, CHAN_X);
+            if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
+               emit_MOV(bld, 4, 0);
+            }
+            emit_mul(bld, 0, 0);
+
+            /* xmm5 = src.y */
+            /* xmm0 = xmm0 + src.y * src.y */
+            FETCH(bld, *inst, 1, 0, CHAN_Y);
+            if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+               emit_MOV(bld, 5, 1);
+            }
+            emit_mul(bld, 1, 1);
+            emit_add(bld, 0, 1);
+
+            /* xmm6 = src.z */
+            /* xmm0 = xmm0 + src.z * src.z */
+            FETCH(bld, *inst, 1, 0, CHAN_Z);
+            if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+               emit_MOV(bld, 6, 1);
+            }
+            emit_mul(bld, 1, 1);
+            emit_add(bld, 0, 1);
+
+            if (dims == 4) {
+               /* xmm7 = src.w */
+               /* xmm0 = xmm0 + src.w * src.w */
+               FETCH(bld, *inst, 1, 0, CHAN_W);
+               if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) {
+                  emit_MOV(bld, 7, 1);
+               }
+               emit_mul(bld, 1, 1);
+               emit_add(bld, 0, 1);
+            }
+
+            /* xmm1 = 1 / sqrt(xmm0) */
+            emit_rsqrt(bld, 1, 0);
+
+            /* dst.x = xmm1 * src.x */
+            if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
+               emit_mul(bld, 4, 1);
+               STORE(bld, *inst, 4, 0, CHAN_X);
+            }
+
+            /* dst.y = xmm1 * src.y */
+            if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+               emit_mul(bld, 5, 1);
+               STORE(bld, *inst, 5, 0, CHAN_Y);
+            }
+
+            /* dst.z = xmm1 * src.z */
+            if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+               emit_mul(bld, 6, 1);
+               STORE(bld, *inst, 6, 0, CHAN_Z);
+            }
+
+            /* dst.w = xmm1 * src.w */
+            if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) && dims == 4) {
+               emit_mul(bld, 7, 1);
+               STORE(bld, *inst, 7, 0, CHAN_W);
+            }
+         }
+
+         /* dst0.w = 1.0 */
+         if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W) && dims == 3) {
+            emit_tempf(bld, 0, TEMP_ONE_I, TEMP_ONE_C);
+            STORE(bld, *inst, 0, 0, CHAN_W);
+         }
+      }
+      break;
+
+   case TGSI_OPCODE_DIV:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_DP2:
+      FETCH( bld, *inst, 0, 0, CHAN_X );  /* xmm0 = src[0].x */
+      FETCH( bld, *inst, 1, 1, CHAN_X );  /* xmm1 = src[1].x */
+      emit_mul( bld, 0, 1 );              /* xmm0 = xmm0 * xmm1 */
+      FETCH( bld, *inst, 1, 0, CHAN_Y );  /* xmm1 = src[0].y */
+      FETCH( bld, *inst, 2, 1, CHAN_Y );  /* xmm2 = src[1].y */
+      emit_mul( bld, 1, 2 );              /* xmm1 = xmm1 * xmm2 */
+      emit_add( bld, 0, 1 );              /* xmm0 = xmm0 + xmm1 */
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( bld, *inst, 0, 0, chan_index );  /* dest[ch] = xmm0 */
+      }
+      break;
+#endif
+
+   case TGSI_OPCODE_TXL:
+      emit_tex( bld, inst, TRUE, FALSE );
+      break;
+
+   case TGSI_OPCODE_TXP:
+      emit_tex( bld, inst, FALSE, TRUE );
+      break;
+      
+#if 0
+   case TGSI_OPCODE_BRK:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_IF:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_LOOP:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_REP:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ELSE:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ENDIF:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ENDLOOP:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ENDREP:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_PUSHA:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_POPA:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_CEIL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_I2F:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_NOT:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_TRUNC:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( bld, *inst, 0, 0, chan_index );
+         emit_f2it( bld, 0 );
+         emit_i2f( bld, 0 );
+         STORE( bld, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SHL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SHR:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_AND:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_OR:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_MOD:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_XOR:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SAD:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_TXF:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_TXQ:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_CONT:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_EMIT:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ENDPRIM:
+      return 0;
+      break;
+#endif
+
+   default:
+      return 0;
+   }
+   
+   return 1;
+}
+
+static void
+emit_declaration(
+   struct lp_build_tgsi_soa_context *bld,
+   struct tgsi_full_declaration *decl )
+{
+#if 0
+   if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+      unsigned first, last, mask;
+      unsigned i, j;
+      LLVMValueRef tmp;
+
+      first = decl->DeclarationRange.First;
+      last = decl->DeclarationRange.Last;
+      mask = decl->Declaration.UsageMask;
+
+      for( i = first; i <= last; i++ ) {
+         for( j = 0; j < NUM_CHANNELS; j++ ) {
+            if( mask & (1 << j) ) {
+               switch( decl->Declaration.Interpolate ) {
+               case TGSI_INTERPOLATE_CONSTANT:
+                  bld->inputs[i][j] = bld->interp_coefs[i].a0[j];
+                  break;
+
+               case TGSI_INTERPOLATE_LINEAR:
+                  tmp = bld->interp_coefs[i].a0[j];
+                  tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[0], bld->interp_coefs[i].dadx[j]));
+                  tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[1], bld->interp_coefs[i].dady[j]));
+                  bld->inputs[i][j] = tmp;
+                  break;
+
+               case TGSI_INTERPOLATE_PERSPECTIVE:
+                  tmp = bld->interp_coefs[i].a0[j];
+                  tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[0], bld->interp_coefs[i].dadx[j]));
+                  tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[1], bld->interp_coefs[i].dady[j]));
+                  tmp = lp_build_div(&bld->base, tmp, bld->pos[3]);
+                  bld->inputs[i][j] = tmp;
+                  break;
+
+               default:
+                  assert( 0 );
+		  break;
+               }
+            }
+         }
+      }
+   }
+#endif
+}
+
+/**
+ * Translate a TGSI vertex/fragment shader to SSE2 code.
+ * Slightly different things are done for vertex vs. fragment shaders.
+ *
+ * \param tokens  the TGSI input shader
+ * \param bld  the output SSE code/function
+ * \param immediates  buffer to place immediates, later passed to SSE bld
+ * \param return  1 for success, 0 if translation failed
+ */
+void
+lp_build_tgsi_soa(LLVMBuilderRef builder,
+                  const struct tgsi_token *tokens,
+                  union lp_type type,
+                  LLVMValueRef (*inputs)[4],
+                  LLVMValueRef consts_ptr,
+                  LLVMValueRef (*outputs)[4],
+                  LLVMValueRef samplers_ptr)
+{
+   struct lp_build_tgsi_soa_context bld;
+   struct tgsi_parse_context parse;
+   uint num_immediates = 0;
+   unsigned i;
+
+   /* Setup build context */
+   memset(&bld, 0, sizeof bld);
+   lp_build_context_init(&bld.base, builder, type);
+   bld.inputs = inputs;
+   bld.outputs = outputs;
+   bld.consts_ptr = consts_ptr;
+   bld.samplers_ptr = samplers_ptr;
+
+   tgsi_parse_init( &parse, tokens );
+
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
+            emit_declaration( &bld, &parse.FullToken.FullDeclaration );
+         }
+         break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) {
+	    debug_printf("failed to translate tgsi opcode %d to SSE (%s)\n", 
+			 parse.FullToken.FullInstruction.Instruction.Opcode,
+                         parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ?
+                         "vertex shader" : "fragment shader");
+	 }
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         /* simply copy the immediate values into the next immediates[] slot */
+         {
+            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
+            assert(size <= 4);
+            assert(num_immediates < LP_MAX_IMMEDIATES);
+            for( i = 0; i < size; ++i )
+               bld.immediates[num_immediates][i] =
+                  lp_build_const_uni(type, parse.FullToken.FullImmediate.u[i].Float);
+            for( i = size; i < 4; ++i )
+               bld.immediates[num_immediates][i] = bld.base.undef;
+            num_immediates++;
+         }
+         break;
+
+      default:
+         assert( 0 );
+      }
+   }
+
+   tgsi_parse_free( &parse );
+}
+
-- 
cgit v1.2.3


From 73af91e938eb27b001404f11195fb06ff9b08903 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 14 Aug 2009 10:27:32 +0100
Subject: llvmpipe: Eliminate non-LLVM fs execution paths.

---
 src/gallium/drivers/llvmpipe/Makefile      |   3 -
 src/gallium/drivers/llvmpipe/SConscript    |   3 -
 src/gallium/drivers/llvmpipe/lp_context.c  |   8 -
 src/gallium/drivers/llvmpipe/lp_context.h  |   4 +-
 src/gallium/drivers/llvmpipe/lp_fs.h       |  54 -----
 src/gallium/drivers/llvmpipe/lp_fs_exec.c  | 162 -------------
 src/gallium/drivers/llvmpipe/lp_fs_llvm.c  | 360 -----------------------------
 src/gallium/drivers/llvmpipe/lp_fs_sse.c   | 169 --------------
 src/gallium/drivers/llvmpipe/lp_quad_fs.c  | 119 +++++++++-
 src/gallium/drivers/llvmpipe/lp_state.h    |  31 ++-
 src/gallium/drivers/llvmpipe/lp_state_fs.c | 178 ++++++++++++--
 11 files changed, 287 insertions(+), 804 deletions(-)
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_fs.h
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_fs_exec.c
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_fs_llvm.c
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_fs_sse.c

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 1b6cd5ed85..236062a5f3 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -4,9 +4,6 @@ include $(TOP)/configs/current
 LIBNAME = llvmpipe
 
 C_SOURCES = \
-	lp_fs_exec.c \
-	lp_fs_sse.c \
-	lp_fs_llvm.c \
 	lp_bld_arit.c \
 	lp_bld_pack.c \
 	lp_bld_unpack.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 705a8bdfd4..154964bf7a 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -7,9 +7,6 @@ env.ParseConfig('llvm-config --cppflags')
 llvmpipe = env.ConvenienceLibrary(
 	target = 'llvmpipe',
 	source = [
-		'lp_fs_exec.c',
-		'lp_fs_sse.c',
-		'lp_fs_llvm.c',
 		'lp_bld_arit.c',
 		'lp_bld_blend_aos.c',
 		'lp_bld_blend_soa.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index a30db444d4..66d0cf7759 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -148,14 +148,6 @@ llvmpipe_create( struct pipe_screen *screen )
 
    util_init_math();
 
-#ifdef PIPE_ARCH_X86
-   llvmpipe->use_sse = !debug_get_bool_option( "GALLIUM_NOSSE", FALSE );
-#else
-   llvmpipe->use_sse = FALSE;
-#endif
-
-   llvmpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE );
-
    llvmpipe->pipe.winsys = screen->winsys;
    llvmpipe->pipe.screen = screen;
    llvmpipe->pipe.destroy = llvmpipe_destroy;
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 7ac83c1e7c..6cda5e602f 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -57,7 +57,7 @@ struct llvmpipe_context {
    const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS];
    const struct pipe_depth_stencil_alpha_state *depth_stencil;
    const struct pipe_rasterizer_state *rasterizer;
-   const struct lp_fragment_shader *fs;
+   struct lp_fragment_shader *fs;
    const struct lp_vertex_shader *vs;
 
    /** Other rendering state */
@@ -145,8 +145,6 @@ struct llvmpipe_context {
    unsigned tex_timestamp;
    struct llvmpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS];
 
-   unsigned use_sse : 1;
-   unsigned dump_fs : 1;
    unsigned no_rast : 1;
 };
 
diff --git a/src/gallium/drivers/llvmpipe/lp_fs.h b/src/gallium/drivers/llvmpipe/lp_fs.h
deleted file mode 100644
index 505e21138f..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_fs.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/* Authors:  Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#ifndef LP_FS_H
-#define LP_FS_H
-
-struct lp_fragment_shader *
-llvmpipe_create_fs_exec(struct llvmpipe_context *llvmpipe,
-		       const struct pipe_shader_state *templ);
-
-struct lp_fragment_shader *
-llvmpipe_create_fs_sse(struct llvmpipe_context *llvmpipe,
-		       const struct pipe_shader_state *templ);
-
-struct lp_fragment_shader *
-llvmpipe_create_fs_llvm(struct llvmpipe_context *llvmpipe,
-			const struct pipe_shader_state *templ);
-
-struct tgsi_interp_coef;
-struct tgsi_exec_vector;
-
-void lp_setup_pos_vector(const struct tgsi_interp_coef *coef,
-			 float x, float y,
-			 struct tgsi_exec_vector *quadpos);
-
-
-#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_fs_exec.c b/src/gallium/drivers/llvmpipe/lp_fs_exec.c
deleted file mode 100644
index ea85697fe2..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_fs_exec.c
+++ /dev/null
@@ -1,162 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * Execute fragment shader using the TGSI interpreter.
- */
-
-#include "lp_context.h"
-#include "lp_state.h"
-#include "lp_fs.h"
-#include "lp_quad.h"
-
-#include "pipe/p_state.h"
-#include "pipe/p_defines.h"
-#include "util/u_memory.h"
-#include "tgsi/tgsi_exec.h"
-#include "tgsi/tgsi_parse.h"
-
-
-/**
- * Subclass of lp_fragment_shader
- */
-struct lp_exec_fragment_shader
-{
-   struct lp_fragment_shader base;
-   /* No other members for now */
-};
-
-
-/** cast wrapper */
-static INLINE struct lp_exec_fragment_shader *
-lp_exec_fragment_shader(const struct lp_fragment_shader *base)
-{
-   return (struct lp_exec_fragment_shader *) base;
-}
-
-
-/**
- * Compute quad X,Y,Z,W for the four fragments in a quad.
- *
- * This should really be part of the compiled shader.
- */
-void
-lp_setup_pos_vector(const struct tgsi_interp_coef *coef,
-		    float x, float y,
-		    struct tgsi_exec_vector *quadpos)
-{
-   uint chan;
-   /* do X */
-   quadpos->xyzw[0].f[0] = x;
-   quadpos->xyzw[0].f[1] = x + 1;
-   quadpos->xyzw[0].f[2] = x;
-   quadpos->xyzw[0].f[3] = x + 1;
-
-   /* do Y */
-   quadpos->xyzw[1].f[0] = y;
-   quadpos->xyzw[1].f[1] = y;
-   quadpos->xyzw[1].f[2] = y + 1;
-   quadpos->xyzw[1].f[3] = y + 1;
-
-   /* do Z and W for all fragments in the quad */
-   for (chan = 2; chan < 4; chan++) {
-      const float dadx = coef->dadx[chan];
-      const float dady = coef->dady[chan];
-      const float a0 = coef->a0[chan] + dadx * x + dady * y;
-      quadpos->xyzw[chan].f[0] = a0;
-      quadpos->xyzw[chan].f[1] = a0 + dadx;
-      quadpos->xyzw[chan].f[2] = a0 + dady;
-      quadpos->xyzw[chan].f[3] = a0 + dadx + dady;
-   }
-}
-
-
-static void
-exec_prepare( const struct lp_fragment_shader *base,
-	      struct tgsi_exec_machine *machine,
-	      struct tgsi_sampler **samplers )
-{
-   /*
-    * Bind tokens/shader to the interpreter's machine state.
-    * Avoid redundant binding.
-    */
-   if (machine->Tokens != base->shader.tokens) {
-      tgsi_exec_machine_bind_shader( machine,
-                                     base->shader.tokens,
-                                     PIPE_MAX_SAMPLERS,
-                                     samplers );
-   }
-}
-
-
-/* TODO: hide the machine struct in here somewhere, remove from this
- * interface:
- */
-static unsigned 
-exec_run( const struct lp_fragment_shader *base,
-	  struct tgsi_exec_machine *machine,
-	  struct quad_header *quad )
-{
-   /* Compute X, Y, Z, W vals for this quad */
-   lp_setup_pos_vector(quad->posCoef, 
-		       (float)quad->input.x0, (float)quad->input.y0, 
-		       &machine->QuadPos);
-   
-   return tgsi_exec_machine_run( machine );
-}
-
-
-static void 
-exec_delete( struct lp_fragment_shader *base )
-{
-   FREE((void *) base->shader.tokens);
-   FREE(base);
-}
-
-
-struct lp_fragment_shader *
-llvmpipe_create_fs_exec(struct llvmpipe_context *llvmpipe,
-			const struct pipe_shader_state *templ)
-{
-   struct lp_exec_fragment_shader *shader;
-
-   /* Decide whether we'll be codegenerating this shader and if so do
-    * that now.
-    */
-
-   shader = CALLOC_STRUCT(lp_exec_fragment_shader);
-   if (!shader)
-      return NULL;
-
-   /* we need to keep a local copy of the tokens */
-   shader->base.shader.tokens = tgsi_dup_tokens(templ->tokens);
-   shader->base.prepare = exec_prepare;
-   shader->base.run = exec_run;
-   shader->base.delete = exec_delete;
-
-   return &shader->base;
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_fs_llvm.c b/src/gallium/drivers/llvmpipe/lp_fs_llvm.c
deleted file mode 100644
index ef1c8c32c2..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_fs_llvm.c
+++ /dev/null
@@ -1,360 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2009 VMware, Inc.
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * Execute fragment shader using LLVM code generation.
- */
-
-
-#include "pipe/p_state.h"
-#include "pipe/p_defines.h"
-#include "util/u_memory.h"
-#include "tgsi/tgsi_parse.h"
-#include "tgsi/tgsi_exec.h"
-#include "tgsi/tgsi_dump.h"
-#include "lp_bld_type.h"
-#include "lp_bld_tgsi.h"
-#include "lp_screen.h"
-#include "lp_context.h"
-#include "lp_state.h"
-#include "lp_fs.h"
-#include "lp_quad.h"
-
-
-typedef void
-(*lp_shader_fs_func)(void *pos,
-                     void *a0,
-                     void *dadx,
-                     void *dady,
-                     void *consts,
-                     void *outputs,
-                     struct tgsi_sampler **samplers);
-
-
-/**
- * Subclass of lp_fragment_shader
- */
-struct lp_llvm_fragment_shader
-{
-   struct lp_fragment_shader base;
-
-   struct llvmpipe_screen *screen;
-
-   LLVMValueRef function;
-
-   lp_shader_fs_func jit_function;
-
-   union tgsi_exec_channel ALIGN16_ATTRIB pos[NUM_CHANNELS];
-   union tgsi_exec_channel ALIGN16_ATTRIB a0[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
-   union tgsi_exec_channel ALIGN16_ATTRIB dadx[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
-   union tgsi_exec_channel ALIGN16_ATTRIB dady[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
-
-   uint32_t magic;
-};
-
-
-/** cast wrapper */
-static INLINE struct lp_llvm_fragment_shader *
-lp_llvm_fragment_shader(const struct lp_fragment_shader *base)
-{
-   return (struct lp_llvm_fragment_shader *) base;
-}
-
-
-static void
-shader_generate(struct llvmpipe_screen *screen,
-                struct lp_llvm_fragment_shader *shader)
-{
-   const struct tgsi_token *tokens = shader->base.shader.tokens;
-   union lp_type type;
-   LLVMTypeRef elem_type;
-   LLVMTypeRef vec_type;
-   LLVMTypeRef arg_types[7];
-   LLVMTypeRef func_type;
-   LLVMValueRef pos_ptr;
-   LLVMValueRef a0_ptr;
-   LLVMValueRef dadx_ptr;
-   LLVMValueRef dady_ptr;
-   LLVMValueRef consts_ptr;
-   LLVMValueRef outputs_ptr;
-   LLVMValueRef samplers_ptr;
-   LLVMBasicBlockRef block;
-   LLVMBuilderRef builder;
-   LLVMValueRef pos[NUM_CHANNELS];
-   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
-   char name[32];
-   unsigned i, j;
-
-   type.value = 0;
-   type.floating = TRUE;
-   type.sign = TRUE;
-   type.norm = FALSE;
-   type.width = 32;
-   type.length = 4;
-
-   elem_type = lp_build_elem_type(type);
-   vec_type = lp_build_vec_type(type);
-
-   arg_types[0] = LLVMPointerType(vec_type, 0);        /* pos */
-   arg_types[1] = LLVMPointerType(vec_type, 0);        /* a0 */
-   arg_types[2] = LLVMPointerType(vec_type, 0);        /* dadx */
-   arg_types[3] = LLVMPointerType(vec_type, 0);        /* dady */
-   arg_types[4] = LLVMPointerType(elem_type, 0);       /* consts */
-   arg_types[5] = LLVMPointerType(vec_type, 0);        /* outputs */
-   arg_types[6] = LLVMPointerType(LLVMInt8Type(), 0);  /* samplers */
-
-   func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
-
-   shader->function = LLVMAddFunction(screen->module, "shader", func_type);
-   LLVMSetFunctionCallConv(shader->function, LLVMCCallConv);
-
-   pos_ptr = LLVMGetParam(shader->function, 0);
-   a0_ptr = LLVMGetParam(shader->function, 1);
-   dadx_ptr = LLVMGetParam(shader->function, 2);
-   dady_ptr = LLVMGetParam(shader->function, 3);
-   consts_ptr = LLVMGetParam(shader->function, 4);
-   outputs_ptr = LLVMGetParam(shader->function, 5);
-   samplers_ptr = LLVMGetParam(shader->function, 6);
-
-   LLVMSetValueName(pos_ptr, "pos");
-   LLVMSetValueName(a0_ptr, "a0");
-   LLVMSetValueName(dadx_ptr, "dadx");
-   LLVMSetValueName(dady_ptr, "dady");
-   LLVMSetValueName(consts_ptr, "consts");
-   LLVMSetValueName(outputs_ptr, "outputs");
-   LLVMSetValueName(samplers_ptr, "samplers");
-
-   block = LLVMAppendBasicBlock(shader->function, "entry");
-   builder = LLVMCreateBuilder();
-   LLVMPositionBuilderAtEnd(builder, block);
-
-   for(j = 0; j < NUM_CHANNELS; ++j) {
-      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), j, 0);
-      util_snprintf(name, sizeof name, "pos.%c", "xyzw"[j]);
-      pos[j] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, pos_ptr, &index, 1, ""), name);
-   }
-
-   memset(outputs, 0, sizeof outputs);
-
-   lp_build_tgsi_soa(builder, tokens, type,
-                     pos, a0_ptr, dadx_ptr, dady_ptr,
-                     consts_ptr, outputs, samplers_ptr);
-
-   for(i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) {
-      for(j = 0; j < NUM_CHANNELS; ++j) {
-         if(outputs[i][j]) {
-            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i*NUM_CHANNELS + j, 0);
-            util_snprintf(name, sizeof name, "output%u.%c", i, "xyzw"[j]);
-            LLVMBuildStore(builder, outputs[i][j], LLVMBuildGEP(builder, outputs_ptr, &index, 1, name));
-         }
-      }
-   }
-
-   LLVMBuildRetVoid(builder);;
-
-   LLVMDisposeBuilder(builder);
-}
-
-
-
-static void
-fs_llvm_prepare( const struct lp_fragment_shader *base,
-		struct tgsi_exec_machine *machine,
-		struct tgsi_sampler **samplers )
-{
-   /*
-    * Bind tokens/shader to the interpreter's machine state.
-    * Avoid redundant binding.
-    */
-   if (machine->Tokens != base->shader.tokens) {
-      tgsi_exec_machine_bind_shader( machine,
-                                     base->shader.tokens,
-                                     PIPE_MAX_SAMPLERS,
-                                     samplers );
-   }
-}
-
-
-
-static void
-setup_pos_vector(struct lp_llvm_fragment_shader *shader,
-                 const struct tgsi_interp_coef *coef,
-                 float x, float y)
-{
-   uint chan;
-
-   /* do X */
-   shader->pos[0].f[0] = x;
-   shader->pos[0].f[1] = x + 1;
-   shader->pos[0].f[2] = x;
-   shader->pos[0].f[3] = x + 1;
-
-   /* do Y */
-   shader->pos[1].f[0] = y;
-   shader->pos[1].f[1] = y;
-   shader->pos[1].f[2] = y + 1;
-   shader->pos[1].f[3] = y + 1;
-
-   /* do Z and W for all fragments in the quad */
-   for (chan = 2; chan < 4; chan++) {
-      const float dadx = coef->dadx[chan];
-      const float dady = coef->dady[chan];
-      const float a0 = coef->a0[chan] + dadx * x + dady * y;
-      shader->pos[chan].f[0] = a0;
-      shader->pos[chan].f[1] = a0 + dadx;
-      shader->pos[chan].f[2] = a0 + dady;
-      shader->pos[chan].f[3] = a0 + dadx + dady;
-   }
-}
-
-
-static void
-setup_coef_vector(struct lp_llvm_fragment_shader *shader,
-                  const struct tgsi_interp_coef *coef)
-{
-   unsigned attrib, chan, i;
-
-   for (attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; ++attrib) {
-      for (chan = 0; chan < NUM_CHANNELS; ++chan) {
-         for( i = 0; i < QUAD_SIZE; ++i ) {
-            shader->a0[attrib][chan].f[i] = coef[attrib].a0[chan];
-            shader->dadx[attrib][chan].f[i] = coef[attrib].dadx[chan];
-            shader->dady[attrib][chan].f[i] = coef[attrib].dady[chan];
-         }
-      }
-   }
-}
-
-
-/* TODO: codegenerate the whole run function, skip this wrapper.
- * TODO: break dependency on tgsi_exec_machine struct
- * TODO: push Position calculation into the generated shader
- * TODO: process >1 quad at a time
- */
-static unsigned 
-fs_llvm_run( const struct lp_fragment_shader *base,
-	    struct tgsi_exec_machine *machine,
-	    struct quad_header *quad )
-{
-   struct lp_llvm_fragment_shader *shader = lp_llvm_fragment_shader(base);
-   unsigned mask;
-
-   /* Compute X, Y, Z, W vals for this quad */
-   setup_pos_vector(shader,
-                    quad->posCoef,
-                   (float)quad->input.x0, (float)quad->input.y0);
-
-   setup_coef_vector(shader,
-                     quad->coef);
-
-   /* init kill mask */
-   tgsi_set_kill_mask(machine, 0x0);
-   tgsi_set_exec_mask(machine, 1, 1, 1, 1);
-
-   memset(machine->Outputs, 0, sizeof machine->Outputs);
-
-   shader->jit_function( shader->pos,
-                         shader->a0, shader->dadx, shader->dady,
-                         machine->Consts,
-                         machine->Outputs,
-                         machine->Samplers);
-
-   /* FIXME */
-   mask = ~0;
-
-   return mask;
-}
-
-
-static void 
-fs_llvm_delete( struct lp_fragment_shader *base )
-{
-   struct lp_llvm_fragment_shader *shader = lp_llvm_fragment_shader(base);
-   struct llvmpipe_screen *screen = shader->screen;
-
-   if(shader->function) {
-      if(shader->jit_function)
-         LLVMFreeMachineCodeForFunction(screen->engine, shader->function);
-      LLVMDeleteFunction(shader->function);
-   }
-
-   FREE((void *) shader->base.shader.tokens);
-   FREE(shader);
-}
-
-
-struct lp_fragment_shader *
-llvmpipe_create_fs_llvm(struct llvmpipe_context *llvmpipe,
-                        const struct pipe_shader_state *templ)
-{
-   struct llvmpipe_screen *screen = llvmpipe_screen(llvmpipe->pipe.screen);
-   struct lp_llvm_fragment_shader *shader;
-   LLVMValueRef fetch_texel;
-
-   shader = CALLOC_STRUCT(lp_llvm_fragment_shader);
-   if (!shader)
-      return NULL;
-
-   /* we need to keep a local copy of the tokens */
-   shader->base.shader.tokens = tgsi_dup_tokens(templ->tokens);
-   shader->base.prepare = fs_llvm_prepare;
-   shader->base.run = fs_llvm_run;
-   shader->base.delete = fs_llvm_delete;
-
-   shader->screen = screen;
-
-   tgsi_dump(templ->tokens, 0);
-
-   shader_generate(screen, shader);
-
-   LLVMRunFunctionPassManager(screen->pass, shader->function);
-
-#if 1
-   LLVMDumpValue(shader->function);
-   debug_printf("\n");
-#endif
-
-   if(LLVMVerifyFunction(shader->function, LLVMPrintMessageAction)) {
-      LLVMDumpValue(shader->function);
-      abort();
-   }
-
-   fetch_texel = LLVMGetNamedFunction(screen->module, "fetch_texel");
-   if(fetch_texel) {
-      static boolean first_time = TRUE;
-      if(first_time) {
-         LLVMAddGlobalMapping(screen->engine, fetch_texel, lp_build_tgsi_fetch_texel_soa);
-         first_time = FALSE;
-      }
-   }
-
-   shader->jit_function = (lp_shader_fs_func)LLVMGetPointerToGlobal(screen->engine, shader->function);
-
-   return &shader->base;
-}
-
diff --git a/src/gallium/drivers/llvmpipe/lp_fs_sse.c b/src/gallium/drivers/llvmpipe/lp_fs_sse.c
deleted file mode 100644
index 61c40dd4b3..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_fs_sse.c
+++ /dev/null
@@ -1,169 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * Execute fragment shader using runtime SSE code generation.
- */
-
-#include "lp_context.h"
-#include "lp_state.h"
-#include "lp_fs.h"
-#include "lp_quad.h"
-
-#include "pipe/p_state.h"
-#include "pipe/p_defines.h"
-#include "util/u_memory.h"
-#include "tgsi/tgsi_exec.h"
-#include "tgsi/tgsi_sse2.h"
-
-
-#if defined(PIPE_ARCH_X86)
-
-#include "rtasm/rtasm_x86sse.h"
-
-
-
-/**
- * Subclass of lp_fragment_shader
- */
-struct lp_sse_fragment_shader
-{
-   struct lp_fragment_shader base;
-   struct x86_function sse2_program;
-   tgsi_sse2_fs_function func;
-   float immediates[TGSI_EXEC_NUM_IMMEDIATES][4];
-};
-
-
-/** cast wrapper */
-static INLINE struct lp_sse_fragment_shader *
-lp_sse_fragment_shader(const struct lp_fragment_shader *base)
-{
-   return (struct lp_sse_fragment_shader *) base;
-}
-
-
-static void
-fs_sse_prepare( const struct lp_fragment_shader *base,
-		struct tgsi_exec_machine *machine,
-		struct tgsi_sampler **samplers )
-{
-   machine->Samplers = samplers;
-}
-
-
-/* TODO: codegenerate the whole run function, skip this wrapper.
- * TODO: break dependency on tgsi_exec_machine struct
- * TODO: push Position calculation into the generated shader
- * TODO: process >1 quad at a time
- */
-static unsigned 
-fs_sse_run( const struct lp_fragment_shader *base,
-	    struct tgsi_exec_machine *machine,
-	    struct quad_header *quad )
-{
-   struct lp_sse_fragment_shader *shader = lp_sse_fragment_shader(base);
-
-   /* Compute X, Y, Z, W vals for this quad -- place in temp[0] for now */
-   lp_setup_pos_vector(quad->posCoef, 
-		       (float)quad->input.x0, (float)quad->input.y0, 
-		       machine->Temps);
-
-   /* init kill mask */
-   tgsi_set_kill_mask(machine, 0x0);
-   tgsi_set_exec_mask(machine, 1, 1, 1, 1);
-
-   shader->func( machine,
-		 machine->Consts,
-                 (const float (*)[4])shader->immediates,
-		 machine->InterpCoefs
-		 //	 , &machine->QuadPos
-      );
-
-   return ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]);
-}
-
-
-static void 
-fs_sse_delete( struct lp_fragment_shader *base )
-{
-   struct lp_sse_fragment_shader *shader = lp_sse_fragment_shader(base);
-
-   x86_release_func( &shader->sse2_program );
-   FREE(shader);
-}
-
-
-struct lp_fragment_shader *
-llvmpipe_create_fs_sse(struct llvmpipe_context *llvmpipe,
-		       const struct pipe_shader_state *templ)
-{
-   struct lp_sse_fragment_shader *shader;
-
-   if (!llvmpipe->use_sse)
-      return NULL;
-
-   shader = CALLOC_STRUCT(lp_sse_fragment_shader);
-   if (!shader)
-      return NULL;
-
-   x86_init_func( &shader->sse2_program );
-   
-   if (!tgsi_emit_sse2( templ->tokens, &shader->sse2_program,
-                        shader->immediates, FALSE )) {
-      FREE(shader);
-      return NULL;
-   }
-
-   shader->func = (tgsi_sse2_fs_function) x86_get_func( &shader->sse2_program );
-   if (!shader->func) {
-      x86_release_func( &shader->sse2_program );
-      FREE(shader);
-      return NULL;
-   }
-
-   shader->base.shader.tokens = NULL; /* don't hold reference to templ->tokens */
-   shader->base.prepare = fs_sse_prepare;
-   shader->base.run = fs_sse_run;
-   shader->base.delete = fs_sse_delete;
-
-   return &shader->base;
-}
-
-
-#else
-
-/* Maybe put this variant in the header file.
- */
-struct lp_fragment_shader *
-llvmpipe_create_fs_sse(struct llvmpipe_context *llvmpipe,
-		       const struct pipe_shader_state *templ)
-{
-   return NULL;
-}
-
-#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_fs.c b/src/gallium/drivers/llvmpipe/lp_quad_fs.c
index 1da50e493b..338a6be80c 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_fs.c
@@ -1,8 +1,8 @@
 /**************************************************************************
  * 
+ * Copyright 2008-2009 VMware, Inc.
  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
- * Copyright 2008 VMware, Inc.  All rights reserved.
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
@@ -65,6 +65,114 @@ quad_shade_stage(struct quad_stage *qs)
 }
 
 
+static void
+shader_prepare( const struct lp_fragment_shader *shader,
+                struct tgsi_exec_machine *machine,
+                struct tgsi_sampler **samplers )
+{
+   /*
+    * Bind tokens/shader to the interpreter's machine state.
+    * Avoid redundant binding.
+    */
+   if (machine->Tokens != shader->base.tokens) {
+      tgsi_exec_machine_bind_shader( machine,
+                                     shader->base.tokens,
+                                     PIPE_MAX_SAMPLERS,
+                                     samplers );
+   }
+}
+
+
+
+static void
+setup_pos_vector(struct lp_fragment_shader *shader,
+                 const struct tgsi_interp_coef *coef,
+                 float x, float y)
+{
+   uint chan;
+
+   /* do X */
+   shader->pos[0].f[0] = x;
+   shader->pos[0].f[1] = x + 1;
+   shader->pos[0].f[2] = x;
+   shader->pos[0].f[3] = x + 1;
+
+   /* do Y */
+   shader->pos[1].f[0] = y;
+   shader->pos[1].f[1] = y;
+   shader->pos[1].f[2] = y + 1;
+   shader->pos[1].f[3] = y + 1;
+
+   /* do Z and W for all fragments in the quad */
+   for (chan = 2; chan < 4; chan++) {
+      const float dadx = coef->dadx[chan];
+      const float dady = coef->dady[chan];
+      const float a0 = coef->a0[chan] + dadx * x + dady * y;
+      shader->pos[chan].f[0] = a0;
+      shader->pos[chan].f[1] = a0 + dadx;
+      shader->pos[chan].f[2] = a0 + dady;
+      shader->pos[chan].f[3] = a0 + dadx + dady;
+   }
+}
+
+
+static void
+setup_coef_vector(struct lp_fragment_shader *shader,
+                  const struct tgsi_interp_coef *coef)
+{
+   unsigned attrib, chan, i;
+
+   for (attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; ++attrib) {
+      for (chan = 0; chan < NUM_CHANNELS; ++chan) {
+         for( i = 0; i < QUAD_SIZE; ++i ) {
+            shader->a0[attrib][chan].f[i] = coef[attrib].a0[chan];
+            shader->dadx[attrib][chan].f[i] = coef[attrib].dadx[chan];
+            shader->dady[attrib][chan].f[i] = coef[attrib].dady[chan];
+         }
+      }
+   }
+}
+
+
+/* TODO: codegenerate the whole run function, skip this wrapper.
+ * TODO: break dependency on tgsi_exec_machine struct
+ * TODO: push Position calculation into the generated shader
+ * TODO: process >1 quad at a time
+ */
+static unsigned
+shader_run( struct lp_fragment_shader *shader,
+            struct tgsi_exec_machine *machine,
+            struct quad_header *quad )
+{
+   unsigned mask;
+
+   /* Compute X, Y, Z, W vals for this quad */
+   setup_pos_vector(shader,
+                    quad->posCoef,
+                    (float)quad->input.x0, (float)quad->input.y0);
+
+   setup_coef_vector(shader,
+                     quad->coef);
+
+   /* init kill mask */
+   tgsi_set_kill_mask(machine, 0x0);
+   tgsi_set_exec_mask(machine, 1, 1, 1, 1);
+
+   memset(machine->Outputs, 0, sizeof machine->Outputs);
+
+   shader->jit_function( shader->pos,
+                         shader->a0, shader->dadx, shader->dady,
+                         machine->Consts,
+                         machine->Outputs,
+                         machine->Samplers);
+
+   /* FIXME */
+   mask = ~0;
+
+   return mask;
+}
+
+
 /**
  * Execute fragment shader for the four fragments in the quad.
  */
@@ -77,7 +185,7 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad)
    boolean z_written;
 
    /* run shader */
-   quad->inout.mask &= llvmpipe->fs->run( llvmpipe->fs, machine, quad );
+   quad->inout.mask &= shader_run( llvmpipe->fs, machine, quad );
    if (quad->inout.mask == 0)
       return FALSE;
 
@@ -177,10 +285,9 @@ shade_begin(struct quad_stage *qs)
    struct quad_shade_stage *qss = quad_shade_stage(qs);
    struct llvmpipe_context *llvmpipe = qs->llvmpipe;
 
-   llvmpipe->fs->prepare( llvmpipe->fs, 
-			  qss->machine,
-			  (struct tgsi_sampler **)
-                             llvmpipe->tgsi.frag_samplers_list );
+   shader_prepare( llvmpipe->fs,
+                   qss->machine,
+                   (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list );
 
    qs->next->begin(qs->next);
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index 6b757a6ba7..8638732b51 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -59,29 +59,36 @@ struct tgsi_exec_machine;
 struct vertex_info;
 
 
+typedef void
+(*lp_shader_fs_func)(void *pos,
+                     void *a0,
+                     void *dadx,
+                     void *dady,
+                     void *consts,
+                     void *outputs,
+                     struct tgsi_sampler **samplers);
+
 /**
  * Subclass of pipe_shader_state (though it doesn't really need to be).
  *
  * This is starting to look an awful lot like a quad pipeline stage...
  */
-struct lp_fragment_shader {
-   struct pipe_shader_state shader;
+struct lp_fragment_shader
+{
+   struct pipe_shader_state base;
 
    struct tgsi_shader_info info;
 
-   void (*prepare)( const struct lp_fragment_shader *shader,
-		    struct tgsi_exec_machine *machine,
-		    struct tgsi_sampler **samplers);
+   struct llvmpipe_screen *screen;
 
-   /* Run the shader - this interface will get cleaned up in the
-    * future:
-    */
-   unsigned (*run)( const struct lp_fragment_shader *shader,
-		    struct tgsi_exec_machine *machine,
-		    struct quad_header *quad );
+   LLVMValueRef function;
 
+   lp_shader_fs_func jit_function;
 
-   void (*delete)( struct lp_fragment_shader * );
+   union tgsi_exec_channel ALIGN16_ATTRIB pos[NUM_CHANNELS];
+   union tgsi_exec_channel ALIGN16_ATTRIB a0[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+   union tgsi_exec_channel ALIGN16_ATTRIB dadx[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+   union tgsi_exec_channel ALIGN16_ATTRIB dady[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
 };
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 1dc0dadef6..e6efa426fe 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -1,5 +1,6 @@
 /**************************************************************************
  * 
+ * Copyright 2009 VMware, Inc.
  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
  * 
@@ -25,10 +26,6 @@
  * 
  **************************************************************************/
 
-#include "lp_context.h"
-#include "lp_state.h"
-#include "lp_fs.h"
-
 #include "pipe/p_defines.h"
 #include "util/u_memory.h"
 #include "pipe/internal/p_winsys_screen.h"
@@ -37,34 +34,159 @@
 #include "tgsi/tgsi_dump.h"
 #include "tgsi/tgsi_scan.h"
 #include "tgsi/tgsi_parse.h"
+#include "lp_bld_type.h"
+#include "lp_bld_tgsi.h"
+#include "lp_screen.h"
+#include "lp_context.h"
+#include "lp_state.h"
+#include "lp_quad.h"
+
+
+static void
+shader_generate(struct llvmpipe_screen *screen,
+                struct lp_fragment_shader *shader)
+{
+   const struct tgsi_token *tokens = shader->base.tokens;
+   union lp_type type;
+   LLVMTypeRef elem_type;
+   LLVMTypeRef vec_type;
+   LLVMTypeRef arg_types[7];
+   LLVMTypeRef func_type;
+   LLVMValueRef pos_ptr;
+   LLVMValueRef a0_ptr;
+   LLVMValueRef dadx_ptr;
+   LLVMValueRef dady_ptr;
+   LLVMValueRef consts_ptr;
+   LLVMValueRef outputs_ptr;
+   LLVMValueRef samplers_ptr;
+   LLVMBasicBlockRef block;
+   LLVMBuilderRef builder;
+   LLVMValueRef pos[NUM_CHANNELS];
+   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
+   char name[32];
+   unsigned i, j;
+
+   type.value = 0;
+   type.floating = TRUE;
+   type.sign = TRUE;
+   type.norm = FALSE;
+   type.width = 32;
+   type.length = 4;
+
+   elem_type = lp_build_elem_type(type);
+   vec_type = lp_build_vec_type(type);
+
+   arg_types[0] = LLVMPointerType(vec_type, 0);        /* pos */
+   arg_types[1] = LLVMPointerType(vec_type, 0);        /* a0 */
+   arg_types[2] = LLVMPointerType(vec_type, 0);        /* dadx */
+   arg_types[3] = LLVMPointerType(vec_type, 0);        /* dady */
+   arg_types[4] = LLVMPointerType(elem_type, 0);       /* consts */
+   arg_types[5] = LLVMPointerType(vec_type, 0);        /* outputs */
+   arg_types[6] = LLVMPointerType(LLVMInt8Type(), 0);  /* samplers */
+
+   func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
+
+   shader->function = LLVMAddFunction(screen->module, "shader", func_type);
+   LLVMSetFunctionCallConv(shader->function, LLVMCCallConv);
+
+   pos_ptr = LLVMGetParam(shader->function, 0);
+   a0_ptr = LLVMGetParam(shader->function, 1);
+   dadx_ptr = LLVMGetParam(shader->function, 2);
+   dady_ptr = LLVMGetParam(shader->function, 3);
+   consts_ptr = LLVMGetParam(shader->function, 4);
+   outputs_ptr = LLVMGetParam(shader->function, 5);
+   samplers_ptr = LLVMGetParam(shader->function, 6);
+
+   LLVMSetValueName(pos_ptr, "pos");
+   LLVMSetValueName(a0_ptr, "a0");
+   LLVMSetValueName(dadx_ptr, "dadx");
+   LLVMSetValueName(dady_ptr, "dady");
+   LLVMSetValueName(consts_ptr, "consts");
+   LLVMSetValueName(outputs_ptr, "outputs");
+   LLVMSetValueName(samplers_ptr, "samplers");
+
+   block = LLVMAppendBasicBlock(shader->function, "entry");
+   builder = LLVMCreateBuilder();
+   LLVMPositionBuilderAtEnd(builder, block);
+
+   for(j = 0; j < NUM_CHANNELS; ++j) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), j, 0);
+      util_snprintf(name, sizeof name, "pos.%c", "xyzw"[j]);
+      pos[j] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, pos_ptr, &index, 1, ""), name);
+   }
+
+   memset(outputs, 0, sizeof outputs);
+
+   lp_build_tgsi_soa(builder, tokens, type,
+                     pos, a0_ptr, dadx_ptr, dady_ptr,
+                     consts_ptr, outputs, samplers_ptr);
+
+   for(i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) {
+      for(j = 0; j < NUM_CHANNELS; ++j) {
+         if(outputs[i][j]) {
+            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i*NUM_CHANNELS + j, 0);
+            util_snprintf(name, sizeof name, "output%u.%c", i, "xyzw"[j]);
+            LLVMBuildStore(builder, outputs[i][j], LLVMBuildGEP(builder, outputs_ptr, &index, 1, name));
+         }
+      }
+   }
+
+   LLVMBuildRetVoid(builder);;
+
+   LLVMDisposeBuilder(builder);
+}
 
 
 void *
 llvmpipe_create_fs_state(struct pipe_context *pipe,
                          const struct pipe_shader_state *templ)
 {
-   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
-   struct lp_fragment_shader *state;
-
-   /* debug */
-   if (llvmpipe->dump_fs) 
-      tgsi_dump(templ->tokens, 0);
-
-   /* codegen */
-   state = llvmpipe_create_fs_llvm( llvmpipe, templ );
-   if (!state) {
-      state = llvmpipe_create_fs_sse( llvmpipe, templ );
-      if (!state) {
-         state = llvmpipe_create_fs_exec( llvmpipe, templ );
-      }
-   }
+   struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
+   struct lp_fragment_shader *shader;
+   LLVMValueRef fetch_texel;
 
-   assert(state);
+   shader = CALLOC_STRUCT(lp_fragment_shader);
+   if (!shader)
+      return NULL;
 
    /* get/save the summary info for this shader */
-   tgsi_scan_shader(templ->tokens, &state->info);
+   tgsi_scan_shader(templ->tokens, &shader->info);
 
-   return state;
+   /* we need to keep a local copy of the tokens */
+   shader->base.tokens = tgsi_dup_tokens(templ->tokens);
+
+   shader->screen = screen;
+
+#ifdef DEBUG
+   tgsi_dump(templ->tokens, 0);
+#endif
+
+   shader_generate(screen, shader);
+
+   LLVMRunFunctionPassManager(screen->pass, shader->function);
+
+#ifdef DEBUG
+   LLVMDumpValue(shader->function);
+   debug_printf("\n");
+#endif
+
+   if(LLVMVerifyFunction(shader->function, LLVMPrintMessageAction)) {
+      LLVMDumpValue(shader->function);
+      abort();
+   }
+
+   fetch_texel = LLVMGetNamedFunction(screen->module, "fetch_texel");
+   if(fetch_texel) {
+      static boolean first_time = TRUE;
+      if(first_time) {
+         LLVMAddGlobalMapping(screen->engine, fetch_texel, lp_build_tgsi_fetch_texel_soa);
+         first_time = FALSE;
+      }
+   }
+
+   shader->jit_function = (lp_shader_fs_func)LLVMGetPointerToGlobal(screen->engine, shader->function);
+
+   return shader;
 }
 
 
@@ -82,11 +204,19 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
 void
 llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
 {
-   struct lp_fragment_shader *state = fs;
+   struct lp_fragment_shader *shader = fs;
+   struct llvmpipe_screen *screen = shader->screen;
 
    assert(fs != llvmpipe_context(pipe)->fs);
    
-   state->delete( state );
+   if(shader->function) {
+      if(shader->jit_function)
+         LLVMFreeMachineCodeForFunction(screen->engine, shader->function);
+      LLVMDeleteFunction(shader->function);
+   }
+
+   FREE((void *) shader->base.tokens);
+   FREE(shader);
 }
 
 
-- 
cgit v1.2.3


From 818d444e12bb57568fbf3bf5f06ee24c6c73a61a Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 16 Aug 2009 11:50:17 +0100
Subject: llvmpipe: Disassemble generated x86 code.

---
 src/gallium/drivers/llvmpipe/README           | 10 +++-
 src/gallium/drivers/llvmpipe/SConscript       |  2 +
 src/gallium/drivers/llvmpipe/lp_bld_debug.c   | 70 +++++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_debug.h   | 37 ++++++++++++++
 src/gallium/drivers/llvmpipe/lp_state_blend.c |  7 ++-
 src/gallium/drivers/llvmpipe/lp_state_fs.c    |  5 ++
 src/gallium/drivers/llvmpipe/lp_test_blend.c  |  4 ++
 src/gallium/drivers/llvmpipe/lp_test_conv.c   |  4 ++
 src/gallium/winsys/xlib/SConscript            |  8 +++
 9 files changed, 145 insertions(+), 2 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_debug.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_debug.h

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README
index afa9cbaf3e..af0ef2b317 100644
--- a/src/gallium/drivers/llvmpipe/README
+++ b/src/gallium/drivers/llvmpipe/README
@@ -6,11 +6,19 @@ Requirements
 
  - Linux
  
+ - udis86, http://udis86.sourceforge.net/
+ 
+     git clone git://udis86.git.sourceforge.net/gitroot/udis86
+     cd udis86
+     ./configure --with-pic
+     make
+     sudo make install
+ 
  - LLVM. On Debian based distributions do:
  
      aptitude install llvm-dev
 
-   There is a type in one of the llvm-dev 2.5 headers, that causes compilation
+   There is a typo in one of the llvm-dev 2.5 headers, that causes compilation
    errors in the debug build:
 
      --- /usr/include/llvm-c/Core.h.orig	2009-08-10 15:38:54.000000000 +0100
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 154964bf7a..cdd301b029 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -12,6 +12,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_blend_soa.c',
 		'lp_bld_const.c',
 		'lp_bld_conv.c',
+		'lp_bld_debug.c',
 		'lp_bld_intr.c',
 		'lp_bld_pack.c',
 		'lp_bld_unpack.c',
@@ -55,6 +56,7 @@ llvmpipe = env.ConvenienceLibrary(
 
 env = env.Clone()
 
+env.Prepend(LIBS = 'udis86')
 env['LINK'] = env['CXX']
 env.ParseConfig('llvm-config --libs jit interpreter nativecodegen bitwriter')
 env.Prepend(LIBS = [llvmpipe] + auxiliaries)
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.c b/src/gallium/drivers/llvmpipe/lp_bld_debug.c
new file mode 100644
index 0000000000..49a6065a8c
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.c
@@ -0,0 +1,70 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include <udis86.h>
+
+#include "util/u_debug.h"
+#include "lp_bld_debug.h"
+
+
+void
+lp_disassemble(const void* func)
+{
+   ud_t ud_obj;
+
+   ud_init(&ud_obj);
+
+   ud_set_input_buffer(&ud_obj, (void*)func, 0xffff);
+   ud_set_pc(&ud_obj, (uint64_t) (uintptr_t) func);
+
+#ifdef PIPE_ARCH_X86
+   ud_set_mode(&ud_obj, 32);
+#endif
+#ifdef PIPE_ARCH_X86_64
+   ud_set_mode(&ud_obj, 64);
+#endif
+
+   ud_set_syntax(&ud_obj, UD_SYN_ATT);
+
+   while (ud_disassemble(&ud_obj)) {
+#ifdef PIPE_ARCH_X86
+      debug_printf("%08lx:\t%s\n",
+                   (unsigned long)ud_insn_off(&ud_obj),
+                   ud_insn_asm(&ud_obj));
+#endif
+#ifdef PIPE_ARCH_X86_64
+   debug_printf("%016llx:\t%s\n",
+                (unsigned long long)ud_insn_off(&ud_obj),
+                ud_insn_asm(&ud_obj));
+#endif
+
+      if (ud_obj.mnemonic == UD_Iret)
+         break;
+   }
+   debug_printf("\n");
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.h b/src/gallium/drivers/llvmpipe/lp_bld_debug.h
new file mode 100644
index 0000000000..d83652504b
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.h
@@ -0,0 +1,37 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_BLD_DEBUG_H
+#define LP_BLD_DEBUG_H
+
+
+void
+lp_disassemble(const void* func);
+
+
+#endif /* !LP_BLD_DEBUG_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c
index be3e7b1629..d31fc6c5fa 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c
@@ -41,6 +41,7 @@
 #include "lp_bld_type.h"
 #include "lp_bld_arit.h"
 #include "lp_bld_blend.h"
+#include "lp_bld_debug.h"
 
 
 static void
@@ -127,7 +128,7 @@ llvmpipe_create_blend_state(struct pipe_context *pipe,
 
    LLVMRunFunctionPassManager(screen->pass, blend->function);
 
-#if 1
+#ifdef DEBUG
    debug_printf("%s=%s %s=%s %s=%s %s=%s %s=%s %s=%s\n",
                 "rgb_func",         debug_dump_blend_func  (blend->base.rgb_func, TRUE),
                 "rgb_src_factor",   debug_dump_blend_factor(blend->base.rgb_src_factor, TRUE),
@@ -146,6 +147,10 @@ llvmpipe_create_blend_state(struct pipe_context *pipe,
 
    blend->jit_function = (lp_blend_func)LLVMGetPointerToGlobal(screen->engine, blend->function);
 
+#ifdef DEBUG
+   lp_disassemble(blend->jit_function);
+#endif
+
    return blend;
 }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index b9bb7abb0b..157f4eb59c 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -36,6 +36,7 @@
 #include "tgsi/tgsi_parse.h"
 #include "lp_bld_type.h"
 #include "lp_bld_tgsi.h"
+#include "lp_bld_debug.h"
 #include "lp_screen.h"
 #include "lp_context.h"
 #include "lp_state.h"
@@ -188,6 +189,10 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
 
    shader->jit_function = (lp_shader_fs_func)LLVMGetPointerToGlobal(screen->engine, shader->function);
 
+#ifdef DEBUG
+   lp_disassemble(shader->jit_function);
+#endif
+
    return shader;
 }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index 645d3880b9..0b6d2da590 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -40,6 +40,7 @@
 #include "lp_bld_type.h"
 #include "lp_bld_arit.h"
 #include "lp_bld_blend.h"
+#include "lp_bld_debug.h"
 #include "lp_test.h"
 
 
@@ -526,6 +527,9 @@ test_one(unsigned verbose,
 
    blend_test_ptr = (blend_test_ptr_t)LLVMGetPointerToGlobal(engine, func);
 
+   if(verbose >= 2)
+      lp_disassemble(blend_test_ptr);
+
    success = TRUE;
    for(i = 0; i < n && success; ++i) {
       if(mode == AoS) {
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
index 7e8b9347c2..91815509b7 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -36,6 +36,7 @@
 
 #include "lp_bld_type.h"
 #include "lp_bld_conv.h"
+#include "lp_bld_debug.h"
 #include "lp_test.h"
 
 
@@ -217,6 +218,9 @@ test_one(unsigned verbose,
 
    conv_test_ptr = (conv_test_ptr_t)LLVMGetPointerToGlobal(engine, func);
 
+   if(verbose >= 2)
+      lp_disassemble(conv_test_ptr);
+
    success = TRUE;
    for(i = 0; i < n && success; ++i) {
       unsigned src_stride = src_type.length*src_type.width/8;
diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript
index f67a94466a..d2be07b384 100644
--- a/src/gallium/winsys/xlib/SConscript
+++ b/src/gallium/winsys/xlib/SConscript
@@ -29,6 +29,14 @@ if env['platform'] == 'linux' \
         sources += ['xlib_softpipe.c']
         drivers += [softpipe]
 
+    if 'llvmpipe' in env['drivers']:
+        env.Append(CPPDEFINES = 'GALLIUM_LLVMPIPE')
+        env.Prepend(LIBS = 'udis86')
+        env.ParseConfig('llvm-config --libs jit interpreter nativecodegen')
+        env['LINK'] = env['CXX']
+        sources += ['xlib_llvmpipe.c']
+        drivers += [llvmpipe]
+
     if 'i965simple' in env['drivers']:
         env.Append(CPPDEFINES = 'GALLIUM_I965SIMPLE')
         sources += [
-- 
cgit v1.2.3


From 1aede69d3a8d288af11c2ef620b51e71c2ce89b2 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 16 Aug 2009 20:59:38 +0100
Subject: llvmpipe: Comparisons translation.

---
 src/gallium/drivers/llvmpipe/SConscript     |   1 +
 src/gallium/drivers/llvmpipe/lp_bld_logic.c | 174 ++++++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_logic.h |  57 +++++++++
 3 files changed, 232 insertions(+)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_logic.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_logic.h

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index cdd301b029..b880ca0d68 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -19,6 +19,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_load.c',
 		'lp_bld_store.c',
 		'lp_bld_loop.c',
+		'lp_bld_logic.c',
 		'lp_bld_logicop.c',
 		'lp_bld_swizzle.c',
 		'lp_bld_tgsi_soa.c',		
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c
new file mode 100644
index 0000000000..dbca2921e6
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c
@@ -0,0 +1,174 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_defines.h"
+#include "lp_bld_type.h"
+#include "lp_bld_intr.h"
+#include "lp_bld_logic.h"
+
+
+LLVMValueRef
+lp_build_cmp(struct lp_build_context *bld,
+             unsigned func,
+             LLVMValueRef a,
+             LLVMValueRef b)
+{
+   const union lp_type type = bld->type;
+   LLVMTypeRef vec_type = lp_build_vec_type(type);
+   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+   LLVMValueRef zeros = LLVMConstNull(int_vec_type);
+   LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
+   LLVMValueRef cond;
+
+   if(func == PIPE_FUNC_NEVER)
+      return zeros;
+   if(func == PIPE_FUNC_ALWAYS)
+      return ones;
+
+   /* TODO: optimize the constant case */
+
+   /* XXX: It is not clear if we should use the ordered or unordered operators */
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+   if(type.width * type.length == 128) {
+      if(type.floating) {
+         LLVMValueRef args[3];
+         unsigned cc;
+         boolean swap;
+         LLVMValueRef res;
+
+         swap = FALSE;
+         switch(func) {
+         case PIPE_FUNC_EQUAL:
+            cc = 0;
+            break;
+         case PIPE_FUNC_NOTEQUAL:
+            cc = 4;
+            break;
+         case PIPE_FUNC_LESS:
+            cc = 1;
+            break;
+         case PIPE_FUNC_LEQUAL:
+            cc = 2;
+            break;
+         case PIPE_FUNC_GREATER:
+            cc = 1;
+            swap = TRUE;
+            break;
+         case PIPE_FUNC_GEQUAL:
+            cc = 2;
+            swap = TRUE;
+            break;
+         default:
+            assert(0);
+            return bld->undef;
+         }
+
+         if(swap) {
+            args[0] = b;
+            args[1] = a;
+         }
+         else {
+            args[0] = a;
+            args[1] = b;
+         }
+
+         args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0);
+         res = lp_build_intrinsic(bld->builder,
+                                  "llvm.x86.sse.cmp.ps",
+                                  vec_type,
+                                  args, 3);
+         res = LLVMBuildBitCast(bld->builder, res, int_vec_type, "");
+         return res;
+      }
+   }
+#endif
+
+   if(type.floating) {
+      LLVMRealPredicate op;
+      switch(func) {
+      case PIPE_FUNC_NEVER:
+         op = LLVMRealPredicateFalse;
+         break;
+      case PIPE_FUNC_ALWAYS:
+         op = LLVMRealPredicateTrue;
+         break;
+      case PIPE_FUNC_EQUAL:
+         op = LLVMRealUEQ;
+         break;
+      case PIPE_FUNC_NOTEQUAL:
+         op = LLVMRealUNE;
+         break;
+      case PIPE_FUNC_LESS:
+         op = LLVMRealULT;
+         break;
+      case PIPE_FUNC_LEQUAL:
+         op = LLVMRealULE;
+         break;
+      case PIPE_FUNC_GREATER:
+         op = LLVMRealUGT;
+         break;
+      case PIPE_FUNC_GEQUAL:
+         op = LLVMRealUGE;
+         break;
+      default:
+         assert(0);
+         return bld->undef;
+      }
+      cond = LLVMBuildFCmp(bld->builder, op, a, b, "");
+   }
+   else {
+      LLVMIntPredicate op;
+      switch(func) {
+      case PIPE_FUNC_EQUAL:
+         op = LLVMIntEQ;
+         break;
+      case PIPE_FUNC_NOTEQUAL:
+         op = LLVMIntNE;
+         break;
+      case PIPE_FUNC_LESS:
+         op = type.sign ? LLVMIntSLT : LLVMIntULT;
+         break;
+      case PIPE_FUNC_LEQUAL:
+         op = type.sign ? LLVMIntSLE : LLVMIntULE;
+         break;
+      case PIPE_FUNC_GREATER:
+         op = type.sign ? LLVMIntSGT : LLVMIntUGT;
+         break;
+      case PIPE_FUNC_GEQUAL:
+         op = type.sign ? LLVMIntSGE : LLVMIntUGE;
+         break;
+      default:
+         assert(0);
+         return bld->undef;
+      }
+      cond = LLVMBuildICmp(bld->builder, op, a, b, "");
+   }
+
+   return LLVMBuildSelect(bld->builder, cond, ones, zeros, "");
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h
new file mode 100644
index 0000000000..a8c10d670a
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.h
@@ -0,0 +1,57 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Helper functions for logical operations.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifndef LP_BLD_LOGIC_H
+#define LP_BLD_LOGIC_H
+
+
+#include <llvm-c/Core.h>  
+
+
+union lp_type type;
+struct lp_build_context;
+
+
+/**
+ * @param func is one of PIPE_FUNC_xxx
+ */
+LLVMValueRef
+lp_build_cmp(struct lp_build_context *bld,
+             unsigned func,
+             LLVMValueRef a,
+             LLVMValueRef b);
+
+
+#endif /* !LP_BLD_LOGIC_H */
-- 
cgit v1.2.3


From 556eecea67354068f6e328da6564bef6cb74cb4e Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 18 Aug 2009 21:08:38 +0100
Subject: llvmpipe: Allow to build without udis86.

---
 scons/udis86.py                             | 42 +++++++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/SConscript     |  2 +-
 src/gallium/drivers/llvmpipe/lp_bld_debug.c |  6 +++++
 src/gallium/winsys/xlib/SConscript          |  2 +-
 4 files changed, 50 insertions(+), 2 deletions(-)
 create mode 100644 scons/udis86.py

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/scons/udis86.py b/scons/udis86.py
new file mode 100644
index 0000000000..ba71d4eb0b
--- /dev/null
+++ b/scons/udis86.py
@@ -0,0 +1,42 @@
+"""udis86
+
+Tool-specific initialization for udis86
+
+"""
+
+#
+# Copyright (c) 2009 VMware, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+
+def generate(env):
+    conf = env.Configure()
+
+    if conf.CheckHeader('udis86.h'): # and conf.CheckLib('udis86'):
+        env.Append(CPPDEFINES = [('HAVE_UDIS86', '1')])
+        env.Prepend(LIBS = ['udis86'])
+
+    conf.Finish()
+
+def exists(env):
+    return True
+
+# vim:set ts=4 sw=4 et:
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index b880ca0d68..97af1b95c3 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -2,6 +2,7 @@ Import('*')
 
 env = env.Clone()
 
+env.Tool('udis86')
 env.ParseConfig('llvm-config --cppflags')
 
 llvmpipe = env.ConvenienceLibrary(
@@ -57,7 +58,6 @@ llvmpipe = env.ConvenienceLibrary(
 
 env = env.Clone()
 
-env.Prepend(LIBS = 'udis86')
 env['LINK'] = env['CXX']
 env.ParseConfig('llvm-config --libs jit interpreter nativecodegen bitwriter')
 env.Prepend(LIBS = [llvmpipe] + auxiliaries)
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.c b/src/gallium/drivers/llvmpipe/lp_bld_debug.c
index f8da1c9f8e..ccbafca8b8 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_debug.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.c
@@ -26,7 +26,9 @@
  **************************************************************************/
 
 
+#ifdef HAVE_UDIS86
 #include <udis86.h>
+#endif
 
 #include "util/u_debug.h"
 #include "lp_bld_debug.h"
@@ -35,6 +37,7 @@
 void
 lp_disassemble(const void* func)
 {
+#ifdef HAVE_UDIS86
    ud_t ud_obj;
 
    ud_init(&ud_obj);
@@ -69,4 +72,7 @@ lp_disassemble(const void* func)
          break;
    }
    debug_printf("\n");
+#else
+   (void)func;
+#endif
 }
diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript
index d2be07b384..518fd2b5a8 100644
--- a/src/gallium/winsys/xlib/SConscript
+++ b/src/gallium/winsys/xlib/SConscript
@@ -31,7 +31,7 @@ if env['platform'] == 'linux' \
 
     if 'llvmpipe' in env['drivers']:
         env.Append(CPPDEFINES = 'GALLIUM_LLVMPIPE')
-        env.Prepend(LIBS = 'udis86')
+        env.Tool('udis86')
         env.ParseConfig('llvm-config --libs jit interpreter nativecodegen')
         env['LINK'] = env['CXX']
         sources += ['xlib_llvmpipe.c']
-- 
cgit v1.2.3


From 9ae47069b4a2b67e381b02d805f1ca74f31ea7b8 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Wed, 19 Aug 2009 20:42:50 +0100
Subject: llvmpipe: Code generate alpha testing and append to generated
 fragment shader.

---
 src/gallium/drivers/llvmpipe/Makefile             |   1 +
 src/gallium/drivers/llvmpipe/SConscript           |   1 +
 src/gallium/drivers/llvmpipe/lp_bld_alpha.c       |  68 +++++++++
 src/gallium/drivers/llvmpipe/lp_bld_alpha.h       |  52 +++++++
 src/gallium/drivers/llvmpipe/lp_quad_depth_test.c |  81 +----------
 src/gallium/drivers/llvmpipe/lp_quad_fs.c         |  25 ++--
 src/gallium/drivers/llvmpipe/lp_state.h           |  27 +++-
 src/gallium/drivers/llvmpipe/lp_state_derived.c   |   5 +
 src/gallium/drivers/llvmpipe/lp_state_fs.c        | 166 ++++++++++++++--------
 9 files changed, 279 insertions(+), 147 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_alpha.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_alpha.h

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 170eefd51a..96e0380497 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -4,6 +4,7 @@ include $(TOP)/configs/current
 LIBNAME = llvmpipe
 
 C_SOURCES = \
+	lp_bld_alpha.c \
 	lp_bld_arit.c \
 	lp_bld_blend_aos.c \
 	lp_bld_blend_soa.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 97af1b95c3..68989792aa 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -8,6 +8,7 @@ env.ParseConfig('llvm-config --cppflags')
 llvmpipe = env.ConvenienceLibrary(
 	target = 'llvmpipe',
 	source = [
+		'lp_bld_alpha.c',
 		'lp_bld_arit.c',
 		'lp_bld_blend_aos.c',
 		'lp_bld_blend_soa.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
new file mode 100644
index 0000000000..a3faa22b99
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
@@ -0,0 +1,68 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Alpha testing to LLVM IR translation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_state.h"
+
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_logic.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_alpha.h"
+
+
+LLVMValueRef
+lp_build_alpha_test(LLVMBuilderRef builder,
+                    const struct pipe_alpha_state *state,
+                    union lp_type type,
+                    LLVMValueRef alpha,
+                    LLVMValueRef mask)
+{
+   struct lp_build_context bld;
+
+   lp_build_context_init(&bld, builder, type);
+
+   if(state->enabled) {
+      LLVMValueRef ref = lp_build_const_uni(type, state->ref_value);
+      LLVMValueRef test = lp_build_cmp(&bld, state->func, alpha, ref);
+
+      lp_build_name(test, "alpha_mask");
+
+      if(mask)
+         mask = LLVMBuildAnd(builder, mask, test, "");
+      else
+         mask = test;
+   }
+
+   return mask;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
new file mode 100644
index 0000000000..f3fa8b6053
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
@@ -0,0 +1,52 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Alpha testing to LLVM IR translation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#ifndef LP_BLD_ALPHA_H
+#define LP_BLD_ALPHA_H
+
+
+#include <llvm-c/Core.h>  
+
+struct pipe_alpha_state;
+union lp_type;
+
+
+LLVMValueRef
+lp_build_alpha_test(LLVMBuilderRef builder,
+                    const struct pipe_alpha_state *state,
+                    union lp_type type,
+                    LLVMValueRef alpha,
+                    LLVMValueRef mask);
+
+
+#endif /* !LP_BLD_ALPHA_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_depth_test.c b/src/gallium/drivers/llvmpipe/lp_quad_depth_test.c
index fefb99c1ff..124301688f 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_depth_test.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_depth_test.c
@@ -564,75 +564,6 @@ depth_stencil_test_quad(struct quad_stage *qs,
 }
 
 
-#define ALPHATEST( FUNC, COMP )                                         \
-   static int                                                          \
-   alpha_test_quads_##FUNC( struct quad_stage *qs,                      \
-                           struct quad_header *quads[],                 \
-                           unsigned nr )                                \
-   {                                                                    \
-      const float ref = qs->llvmpipe->depth_stencil->alpha.ref_value;   \
-      const uint cbuf = 0; /* only output[0].alpha is tested */         \
-      unsigned pass_nr = 0;                                             \
-      unsigned i;                                                       \
-                                                                        \
-      for (i = 0; i < nr; i++) {                                        \
-         const float *aaaa = quads[i]->output.color[cbuf][3];           \
-         unsigned passMask = 0;                                         \
-                                                                        \
-         if (!quads[i]->inout.mask)                                     \
-            continue;                                                   \
-                                                                        \
-         if (aaaa[0] COMP ref) passMask |= (1 << 0);                    \
-         if (aaaa[1] COMP ref) passMask |= (1 << 1);                    \
-         if (aaaa[2] COMP ref) passMask |= (1 << 2);                    \
-         if (aaaa[3] COMP ref) passMask |= (1 << 3);                    \
-                                                                        \
-         quads[i]->inout.mask &= passMask;                              \
-                                                                        \
-         if (quads[i]->inout.mask)                                      \
-            ++pass_nr;                                                  \
-      }                                                                 \
-                                                                        \
-      return pass_nr;                                                   \
-   }
-
-
-ALPHATEST( LESS,     < )
-ALPHATEST( EQUAL,    == )
-ALPHATEST( LEQUAL,   <= )
-ALPHATEST( GREATER,  > )
-ALPHATEST( NOTEQUAL, != )
-ALPHATEST( GEQUAL,   >= )
-
-
-/* XXX: Incorporate into shader using KILP.
- */
-static int
-alpha_test_quads(struct quad_stage *qs, 
-                 struct quad_header *quads[], 
-                 unsigned nr)
-{
-   switch (qs->llvmpipe->depth_stencil->alpha.func) {
-   case PIPE_FUNC_LESS:
-      return alpha_test_quads_LESS( qs, quads, nr );
-   case PIPE_FUNC_EQUAL:
-      return alpha_test_quads_EQUAL( qs, quads, nr );
-      break;
-   case PIPE_FUNC_LEQUAL:
-      return alpha_test_quads_LEQUAL( qs, quads, nr );
-   case PIPE_FUNC_GREATER:
-      return alpha_test_quads_GREATER( qs, quads, nr );
-   case PIPE_FUNC_NOTEQUAL:
-      return alpha_test_quads_NOTEQUAL( qs, quads, nr );
-   case PIPE_FUNC_GEQUAL:
-      return alpha_test_quads_GEQUAL( qs, quads, nr );
-   case PIPE_FUNC_ALWAYS:
-      return nr;
-   case PIPE_FUNC_NEVER:
-   default:
-      return 0;
-   }
-}
 
 static unsigned mask_count[0x8] = 
 {
@@ -659,10 +590,6 @@ depth_test_quads_fallback(struct quad_stage *qs,
    struct depth_data data;
 
 
-   if (qs->llvmpipe->depth_stencil->alpha.enabled) {
-      alpha_test_quads(qs, quads, nr);
-   }
-
    if (qs->llvmpipe->framebuffer.zsbuf && 
        (qs->llvmpipe->depth_stencil->depth.enabled ||
         qs->llvmpipe->depth_stencil->stencil[0].enabled)) {
@@ -801,8 +728,6 @@ choose_depth_test(struct quad_stage *qs,
 {
    boolean interp_depth = !qs->llvmpipe->fs->info.writes_z;
 
-   boolean alpha = qs->llvmpipe->depth_stencil->alpha.enabled;
-
    boolean depth = (qs->llvmpipe->framebuffer.zsbuf && 
                     qs->llvmpipe->depth_stencil->depth.enabled);
 
@@ -815,13 +740,11 @@ choose_depth_test(struct quad_stage *qs,
 
    qs->run = depth_test_quads_fallback;
 
-   if (!alpha &&
-       !depth &&
+   if (!depth &&
        !stencil) {
       qs->run = depth_noop;
    }
-   else if (!alpha && 
-            interp_depth && 
+   else if (interp_depth &&
             depth && 
             depthfunc == PIPE_FUNC_LESS && 
             depthwrite && 
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_fs.c b/src/gallium/drivers/llvmpipe/lp_quad_fs.c
index 78b4e1bab6..2736efc956 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_fs.c
@@ -76,10 +76,15 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad)
 {
    struct quad_shade_stage *qss = quad_shade_stage( qs );
    struct llvmpipe_context *llvmpipe = qs->llvmpipe;
+   struct lp_fragment_shader *fs = llvmpipe->fs;
    void *constants;
    struct tgsi_sampler **samplers;
    unsigned chan_index;
 
+   assert(fs->current);
+   if(!fs->current)
+      return FALSE;
+
    constants = llvmpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
    samplers = (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list;
 
@@ -87,16 +92,16 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad)
       qss->mask[chan_index] = ~0;
 
    /* run shader */
-   llvmpipe->fs->jit_function( quad->input.x0,
-                               quad->input.y0,
-                               quad->coef->a0,
-                               quad->coef->dadx,
-                               quad->coef->dady,
-                               constants,
-                               qss->mask,
-                               quad->output.color,
-                               quad->output.depth,
-                               samplers);
+   fs->current->jit_function( quad->input.x0,
+                              quad->input.y0,
+                              quad->coef->a0,
+                              quad->coef->dadx,
+                              quad->coef->dady,
+                              constants,
+                              qss->mask,
+                              quad->output.color,
+                              quad->output.depth,
+                              samplers);
 
    for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index)
       if(!qss->mask[chan_index])
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index f8b3793a59..db21096f21 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -70,6 +70,28 @@ typedef void
                      void *depth,
                      struct tgsi_sampler **samplers);
 
+
+struct lp_fragment_shader;
+
+
+/**
+ * Subclass of pipe_shader_state (though it doesn't really need to be).
+ *
+ * This is starting to look an awful lot like a quad pipeline stage...
+ */
+struct lp_fragment_shader_variant
+{
+   struct lp_fragment_shader *shader;
+   struct pipe_alpha_state alpha;
+
+   LLVMValueRef function;
+
+   lp_shader_fs_func jit_function;
+
+   struct lp_fragment_shader_variant *next;
+};
+
+
 /**
  * Subclass of pipe_shader_state (though it doesn't really need to be).
  *
@@ -83,9 +105,9 @@ struct lp_fragment_shader
 
    struct llvmpipe_screen *screen;
 
-   LLVMValueRef function;
+   struct lp_fragment_shader_variant *variants;
 
-   lp_shader_fs_func jit_function;
+   struct lp_fragment_shader_variant *current;
 };
 
 
@@ -183,6 +205,7 @@ void llvmpipe_set_vertex_buffers(struct pipe_context *,
                                  unsigned count,
                                  const struct pipe_vertex_buffer *);
 
+void llvmpipe_update_fs(struct llvmpipe_context *lp);
 
 void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe );
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index 79861b2d13..b42e6b1502 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -244,6 +244,11 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
                           LP_NEW_FRAMEBUFFER))
       compute_cliprect(llvmpipe);
 
+   if (llvmpipe->dirty & (LP_NEW_FS |
+                          LP_NEW_DEPTH_STENCIL_ALPHA))
+      llvmpipe_update_fs( llvmpipe );
+
+
    if (llvmpipe->dirty & (LP_NEW_BLEND |
                           LP_NEW_DEPTH_STENCIL_ALPHA |
                           LP_NEW_FRAMEBUFFER |
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index ffcc8336b1..702be42916 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -28,6 +28,7 @@
 
 #include "pipe/p_defines.h"
 #include "util/u_memory.h"
+#include "util/u_debug_dump.h"
 #include "pipe/internal/p_winsys_screen.h"
 #include "pipe/p_shader_tokens.h"
 #include "draw/draw_context.h"
@@ -36,6 +37,7 @@
 #include "tgsi/tgsi_parse.h"
 #include "lp_bld_type.h"
 #include "lp_bld_tgsi.h"
+#include "lp_bld_alpha.h"
 #include "lp_bld_swizzle.h"
 #include "lp_bld_debug.h"
 #include "lp_screen.h"
@@ -103,10 +105,12 @@ setup_pos_vector(LLVMBuilderRef builder,
 }
 
 
-static void
+static struct lp_fragment_shader_variant *
 shader_generate(struct llvmpipe_screen *screen,
-                struct lp_fragment_shader *shader)
+                struct lp_fragment_shader *shader,
+                const struct pipe_alpha_state *alpha)
 {
+   struct lp_fragment_shader_variant *variant;
    const struct tgsi_token *tokens = shader->base.tokens;
    union lp_type type;
    LLVMTypeRef elem_type;
@@ -129,10 +133,25 @@ shader_generate(struct llvmpipe_screen *screen,
    LLVMValueRef pos[NUM_CHANNELS];
    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
    LLVMValueRef mask;
+   LLVMValueRef fetch_texel;
    unsigned i;
    unsigned attrib;
    unsigned chan;
 
+#ifdef DEBUG
+   tgsi_dump(shader->base.tokens, 0);
+   debug_printf("alpha.enabled = %u\n", alpha->enabled);
+   debug_printf("alpha.func = %s\n", debug_dump_func(alpha->func, TRUE));
+   debug_printf("alpha.ref_value = %f\n", alpha->ref_value);
+#endif
+
+   variant = CALLOC_STRUCT(lp_fragment_shader_variant);
+   if(!variant)
+      return NULL;
+
+   variant->shader = shader;
+   memcpy(&variant->alpha, alpha, sizeof *alpha);
+
    type.value = 0;
    type.floating = TRUE; /* floating point values */
    type.sign = TRUE;     /* values are signed */
@@ -157,22 +176,22 @@ shader_generate(struct llvmpipe_screen *screen,
 
    func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
 
-   shader->function = LLVMAddFunction(screen->module, "shader", func_type);
-   LLVMSetFunctionCallConv(shader->function, LLVMCCallConv);
+   variant->function = LLVMAddFunction(screen->module, "shader", func_type);
+   LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
    for(i = 0; i < Elements(arg_types); ++i)
       if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
-         LLVMAddAttribute(LLVMGetParam(shader->function, i), LLVMNoAliasAttribute);
-
-   x            = LLVMGetParam(shader->function, 0);
-   y            = LLVMGetParam(shader->function, 1);
-   a0_ptr       = LLVMGetParam(shader->function, 2);
-   dadx_ptr     = LLVMGetParam(shader->function, 3);
-   dady_ptr     = LLVMGetParam(shader->function, 4);
-   consts_ptr   = LLVMGetParam(shader->function, 5);
-   mask_ptr     = LLVMGetParam(shader->function, 6);
-   color_ptr    = LLVMGetParam(shader->function, 7);
-   depth_ptr    = LLVMGetParam(shader->function, 8);
-   samplers_ptr = LLVMGetParam(shader->function, 9);
+         LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute);
+
+   x            = LLVMGetParam(variant->function, 0);
+   y            = LLVMGetParam(variant->function, 1);
+   a0_ptr       = LLVMGetParam(variant->function, 2);
+   dadx_ptr     = LLVMGetParam(variant->function, 3);
+   dady_ptr     = LLVMGetParam(variant->function, 4);
+   consts_ptr   = LLVMGetParam(variant->function, 5);
+   mask_ptr     = LLVMGetParam(variant->function, 6);
+   color_ptr    = LLVMGetParam(variant->function, 7);
+   depth_ptr    = LLVMGetParam(variant->function, 8);
+   samplers_ptr = LLVMGetParam(variant->function, 9);
 
    lp_build_name(x, "x");
    lp_build_name(y, "y");
@@ -185,7 +204,7 @@ shader_generate(struct llvmpipe_screen *screen,
    lp_build_name(depth_ptr, "depth");
    lp_build_name(samplers_ptr, "samplers");
 
-   block = LLVMAppendBasicBlock(shader->function, "entry");
+   block = LLVMAppendBasicBlock(variant->function, "entry");
    builder = LLVMCreateBuilder();
    LLVMPositionBuilderAtEnd(builder, block);
 
@@ -210,6 +229,12 @@ shader_generate(struct llvmpipe_screen *screen,
                   LLVMValueRef output_ptr = LLVMBuildGEP(builder, color_ptr, &index, 1, "");
                   lp_build_name(outputs[attrib][chan], "color%u.%c", attrib, "rgba"[chan]);
                   LLVMBuildStore(builder, outputs[attrib][chan], output_ptr);
+
+                  /* Alpha test */
+                  /* XXX: should the alpha reference value be passed separately? */
+                  if(cbuf == 0 && chan == 3)
+                     mask = lp_build_alpha_test(builder, alpha, type, outputs[attrib][chan], mask);
+
                   break;
                }
 
@@ -228,44 +253,16 @@ shader_generate(struct llvmpipe_screen *screen,
    LLVMBuildRetVoid(builder);;
 
    LLVMDisposeBuilder(builder);
-}
-
 
-void *
-llvmpipe_create_fs_state(struct pipe_context *pipe,
-                         const struct pipe_shader_state *templ)
-{
-   struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
-   struct lp_fragment_shader *shader;
-   LLVMValueRef fetch_texel;
-
-   shader = CALLOC_STRUCT(lp_fragment_shader);
-   if (!shader)
-      return NULL;
-
-   /* get/save the summary info for this shader */
-   tgsi_scan_shader(templ->tokens, &shader->info);
-
-   /* we need to keep a local copy of the tokens */
-   shader->base.tokens = tgsi_dup_tokens(templ->tokens);
-
-   shader->screen = screen;
+   LLVMRunFunctionPassManager(screen->pass, variant->function);
 
 #ifdef DEBUG
-   tgsi_dump(templ->tokens, 0);
-#endif
-
-   shader_generate(screen, shader);
-
-   LLVMRunFunctionPassManager(screen->pass, shader->function);
-
-#ifdef DEBUG
-   LLVMDumpValue(shader->function);
+   LLVMDumpValue(variant->function);
    debug_printf("\n");
 #endif
 
-   if(LLVMVerifyFunction(shader->function, LLVMPrintMessageAction)) {
-      LLVMDumpValue(shader->function);
+   if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
+      LLVMDumpValue(variant->function);
       abort();
    }
 
@@ -278,12 +275,38 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
       }
    }
 
-   shader->jit_function = (lp_shader_fs_func)LLVMGetPointerToGlobal(screen->engine, shader->function);
+   variant->jit_function = (lp_shader_fs_func)LLVMGetPointerToGlobal(screen->engine, variant->function);
 
 #ifdef DEBUG
-   lp_disassemble(shader->jit_function);
+   lp_disassemble(variant->jit_function);
 #endif
 
+   variant->next = shader->variants;
+   shader->variants = variant;
+
+   return variant;
+}
+
+
+void *
+llvmpipe_create_fs_state(struct pipe_context *pipe,
+                         const struct pipe_shader_state *templ)
+{
+   struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
+   struct lp_fragment_shader *shader;
+
+   shader = CALLOC_STRUCT(lp_fragment_shader);
+   if (!shader)
+      return NULL;
+
+   /* get/save the summary info for this shader */
+   tgsi_scan_shader(templ->tokens, &shader->info);
+
+   /* we need to keep a local copy of the tokens */
+   shader->base.tokens = tgsi_dup_tokens(templ->tokens);
+
+   shader->screen = screen;
+
    return shader;
 }
 
@@ -303,14 +326,24 @@ void
 llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
 {
    struct lp_fragment_shader *shader = fs;
+   struct lp_fragment_shader_variant *variant;
    struct llvmpipe_screen *screen = shader->screen;
 
    assert(fs != llvmpipe_context(pipe)->fs);
-   
-   if(shader->function) {
-      if(shader->jit_function)
-         LLVMFreeMachineCodeForFunction(screen->engine, shader->function);
-      LLVMDeleteFunction(shader->function);
+
+   variant = shader->variants;
+   while(variant) {
+      struct lp_fragment_shader_variant *next = variant->next;
+
+      if(variant->function) {
+         if(variant->jit_function)
+            LLVMFreeMachineCodeForFunction(screen->engine, variant->function);
+         LLVMDeleteFunction(variant->function);
+      }
+
+      FREE(variant);
+
+      variant = next;
    }
 
    FREE((void *) shader->base.tokens);
@@ -395,3 +428,24 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
 
    llvmpipe->dirty |= LP_NEW_CONSTANTS;
 }
+
+
+void llvmpipe_update_fs(struct llvmpipe_context *lp)
+{
+   struct lp_fragment_shader *shader = lp->fs;
+   const struct pipe_alpha_state *alpha = &lp->depth_stencil->alpha;
+   struct lp_fragment_shader_variant *variant;
+
+   variant = shader->variants;
+   while(variant) {
+      if(memcmp(&variant->alpha, alpha, sizeof *alpha) == 0)
+         break;
+
+      variant = variant->next;
+   }
+
+   if(!variant)
+      variant = shader_generate(shader->screen, shader, alpha);
+
+   shader->current = variant;
+}
-- 
cgit v1.2.3


From 343ccc8dd0d3578aeeb9b635f0933c9f323c7fda Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 21 Aug 2009 07:43:49 +0100
Subject: llvmpipe: Depth test to LLVM IR translation.

---
 src/gallium/drivers/llvmpipe/Makefile       |   2 +-
 src/gallium/drivers/llvmpipe/SConscript     |   2 +-
 src/gallium/drivers/llvmpipe/lp_bld_depth.c | 181 ++++++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_depth.h |  65 ++++++++++
 4 files changed, 248 insertions(+), 2 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_depth.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_depth.h

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 96e0380497..ed24a1c7b9 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -11,6 +11,7 @@ C_SOURCES = \
 	lp_bld_const.c \
 	lp_bld_conv.c \
 	lp_bld_debug.c \
+	lp_bld_depth.c \
 	lp_bld_intr.c \
 	lp_bld_pack.c \
 	lp_bld_unpack.c \
@@ -31,7 +32,6 @@ C_SOURCES = \
 	lp_setup.c \
 	lp_quad_blend.c \
 	lp_quad_pipe.c \
-	lp_quad_depth_test.c \
 	lp_quad_fs.c \
 	lp_quad_stipple.c \
 	lp_query.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 68989792aa..a2987c11f3 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -15,6 +15,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_const.c',
 		'lp_bld_conv.c',
 		'lp_bld_debug.c',
+		'lp_bld_depth.c',
 		'lp_bld_intr.c',
 		'lp_bld_pack.c',
 		'lp_bld_unpack.c',
@@ -35,7 +36,6 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_setup.c',
 		'lp_quad_blend.c',
 		'lp_quad_pipe.c',
-		'lp_quad_depth_test.c',
 		'lp_quad_fs.c',
 		'lp_quad_stipple.c',
 		'lp_query.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
new file mode 100644
index 0000000000..0cd6a5a95a
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -0,0 +1,181 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Depth/stencil testing to LLVM IR translation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_format.h"
+
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_logic.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_depth.h"
+
+
+union lp_type
+lp_depth_type(const struct util_format_description *format_desc,
+              unsigned length)
+{
+   union lp_type type;
+   unsigned swizzle;
+
+   assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
+   assert(format_desc->block.width == 1);
+   assert(format_desc->block.height == 1);
+
+   swizzle = format_desc->swizzle[0];
+   assert(swizzle < 4);
+
+   type.value = 0;
+   type.width = format_desc->block.bits;
+
+   if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
+      type.floating = TRUE;
+      assert(swizzle = 0);
+      assert(format_desc->channel[swizzle].size == format_desc->block.bits);
+   }
+   else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+      assert(format_desc->block.bits <= 32);
+      if(format_desc->channel[swizzle].normalized)
+         type.norm = TRUE;
+   }
+   else
+      assert(0);
+
+   assert(type.width <= length);
+   type.length = length / type.width;
+
+   return type;
+}
+
+
+void
+lp_build_depth_test(LLVMBuilderRef builder,
+                    const struct pipe_depth_state *state,
+                    union lp_type type,
+                    const struct util_format_description *format_desc,
+                    LLVMValueRef *mask,
+                    LLVMValueRef src,
+                    LLVMValueRef dst_ptr)
+{
+   struct lp_build_context bld;
+   unsigned z_swizzle;
+   LLVMValueRef dst;
+   LLVMValueRef z_bitmask = NULL;
+   LLVMValueRef test;
+
+   if(!state->enabled)
+      return;
+
+   assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
+   assert(format_desc->block.width == 1);
+   assert(format_desc->block.height == 1);
+
+   z_swizzle = format_desc->swizzle[0];
+   if(z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
+      return;
+
+   /* Sanity checking */
+   assert(z_swizzle < 4);
+   assert(format_desc->block.bits == type.width);
+   if(type.floating) {
+      assert(z_swizzle == 0);
+      assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT);
+      assert(format_desc->channel[z_swizzle].size == format_desc->block.bits);
+   }
+   else {
+      assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED);
+      assert(format_desc->channel[z_swizzle].normalized);
+      assert(!type.fixed);
+      assert(!type.sign);
+      assert(type.norm);
+   }
+
+   /* Setup build context */
+   lp_build_context_init(&bld, builder, type);
+
+   dst = LLVMBuildLoad(builder, dst_ptr, "");
+
+   lp_build_name(dst, "zsbuf");
+
+   /* Align the source depth bits with the destination's, and mask out any
+    * stencil or padding bits from both */
+   if(format_desc->channel[z_swizzle].size == format_desc->block.bits) {
+      assert(z_swizzle == 0);
+      /* nothing to do */
+   }
+   else {
+      unsigned padding_left;
+      unsigned padding_right;
+      unsigned chan;
+
+      assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+      assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED);
+      assert(format_desc->channel[z_swizzle].size <= format_desc->block.bits);
+      assert(format_desc->channel[z_swizzle].normalized);
+
+      padding_right = 0;
+      for(chan = 0; chan < z_swizzle; ++chan)
+         padding_right += format_desc->channel[chan].size;
+      padding_left = format_desc->block.bits - format_desc->channel[z_swizzle].size;
+
+      if(padding_left || padding_right) {
+         const long long mask_left = ((long long)1 << (format_desc->block.bits - padding_left)) - 1;
+         const long long mask_right = ((long long)1 << (padding_right)) - 1;
+         z_bitmask = lp_build_int_const_uni(type, mask_left & mask_right);
+      }
+
+      if(padding_left)
+         src = LLVMBuildLShr(builder, src, lp_build_int_const_uni(type, padding_left), "");
+      if(padding_right)
+         src = LLVMBuildAnd(builder, src, z_bitmask, "");
+      if(padding_left || padding_right)
+         dst = LLVMBuildAnd(builder, dst, z_bitmask, "");
+   }
+
+   lp_build_name(dst, "zsbuf.z");
+
+   test = lp_build_cmp(&bld, state->func, src, dst);
+   lp_build_mask_and(bld.builder, mask, test);
+
+   if(state->writemask) {
+      if(z_bitmask)
+         z_bitmask = LLVMBuildAnd(builder, *mask, z_bitmask, "");
+      else
+         z_bitmask = *mask;
+
+      dst = lp_build_select(&bld, z_bitmask, src, dst);
+      LLVMBuildStore(builder, dst, dst_ptr);
+   }
+
+   assert(!state->occlusion_count);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
new file mode 100644
index 0000000000..4df6cdf984
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
@@ -0,0 +1,65 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * Depth/stencil testing to LLVM IR translation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#ifndef LP_BLD_DEPTH_H
+#define LP_BLD_DEPTH_H
+
+
+#include <llvm-c/Core.h>  
+
+ 
+struct pipe_depth_state;
+struct util_format_description;
+union lp_type;
+
+
+/**
+ * Return a type appropriate for depth testing.
+ */
+union lp_type
+lp_depth_type(const struct util_format_description *format_desc,
+              unsigned length);
+
+
+void
+lp_build_depth_test(LLVMBuilderRef builder,
+                    const struct pipe_depth_state *state,
+                    union lp_type type,
+                    const struct util_format_description *format_desc,
+                    LLVMValueRef *mask,
+                    LLVMValueRef src,
+                    LLVMValueRef dst_ptr);
+
+
+#endif /* !LP_BLD_DEPTH_H */
-- 
cgit v1.2.3


From c5531f575b85e39a63578cd48f70193a4888cfb5 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 21 Aug 2009 10:57:48 +0100
Subject: llvmpipe: Split control flow function declarations and notes.

---
 src/gallium/drivers/llvmpipe/Makefile         |   2 +-
 src/gallium/drivers/llvmpipe/README           |   9 ++
 src/gallium/drivers/llvmpipe/SConscript       |   2 +-
 src/gallium/drivers/llvmpipe/lp_bld.h         | 122 --------------------------
 src/gallium/drivers/llvmpipe/lp_bld_flow.c    |  86 ++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_flow.h    |  69 +++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_format.h  | 101 +++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_load.c    |   2 +-
 src/gallium/drivers/llvmpipe/lp_bld_loop.c    |  91 -------------------
 src/gallium/drivers/llvmpipe/lp_bld_pack.c    |   2 +-
 src/gallium/drivers/llvmpipe/lp_bld_store.c   |   2 +-
 src/gallium/drivers/llvmpipe/lp_bld_unpack.c  |   2 +-
 src/gallium/drivers/llvmpipe/lp_test_format.c |   3 +-
 13 files changed, 273 insertions(+), 220 deletions(-)
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld.h
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_flow.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_flow.h
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_format.h
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_loop.c

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index ed24a1c7b9..91a2e2ee74 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -12,12 +12,12 @@ C_SOURCES = \
 	lp_bld_conv.c \
 	lp_bld_debug.c \
 	lp_bld_depth.c \
+	lp_bld_flow.c \
 	lp_bld_intr.c \
 	lp_bld_pack.c \
 	lp_bld_unpack.c \
 	lp_bld_load.c \
 	lp_bld_store.c \
-	lp_bld_loop.c \
 	lp_bld_logic.c \
 	lp_bld_logicop.c \
 	lp_bld_swizzle.c \
diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README
index 6e4edaa947..677352eaa1 100644
--- a/src/gallium/drivers/llvmpipe/README
+++ b/src/gallium/drivers/llvmpipe/README
@@ -106,3 +106,12 @@ for posterior analysis, e.g.:
 
   build/linux-x86_64/gallium/drivers/llvmpipe/lp_test_blend -o blend.tsv
 
+
+Development Notes
+=================
+
+- We use LLVM-C bindings for now. They are not documented, but follow the C++
+  interfaces very closely, and appear to be complete enough for code
+  generation. See 
+  http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html
+  for a standalone example.
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index a2987c11f3..b8b577fe5a 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -16,12 +16,12 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_conv.c',
 		'lp_bld_debug.c',
 		'lp_bld_depth.c',
+		'lp_bld_flow.c',
 		'lp_bld_intr.c',
 		'lp_bld_pack.c',
 		'lp_bld_unpack.c',
 		'lp_bld_load.c',
 		'lp_bld_store.c',
-		'lp_bld_loop.c',
 		'lp_bld_logic.c',
 		'lp_bld_logicop.c',
 		'lp_bld_swizzle.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld.h b/src/gallium/drivers/llvmpipe/lp_bld.h
deleted file mode 100644
index c2dea1036f..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_bld.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef LP_BLD_H
-#define LP_BLD_H
-
-
-/**
- * @file
- * LLVM IR building helpers interfaces.
- *
- * We use LLVM-C bindings for now. They are not documented, but follow the C++
- * interfaces very closely, and appear to be complete enough for code
- * genration. See
- * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html
- * for a standalone example.
- */
-
-#include <llvm-c/Core.h>  
- 
-#include "pipe/p_format.h"
-
-
-union lp_type;
-
-
-/**
- * Unpack a pixel into its RGBA components.
- *
- * @param packed integer.
- *
- * @return RGBA in a 4 floats vector.
- */
-LLVMValueRef
-lp_build_unpack_rgba(LLVMBuilderRef builder,
-                     enum pipe_format format, 
-                     LLVMValueRef packed);
-
-
-/**
- * Pack a pixel.
- *
- * @param rgba 4 float vector with the unpacked components.
- */
-LLVMValueRef
-lp_build_pack_rgba(LLVMBuilderRef builder,
-                   enum pipe_format format,
-                   LLVMValueRef rgba);
-
-
-/**
- * Load a pixel into its RGBA components.
- *
- * @param ptr value with the pointer to the packed pixel. Pointer type is
- * irrelevant.
- *
- * @return RGBA in a 4 floats vector.
- */
-LLVMValueRef
-lp_build_load_rgba(LLVMBuilderRef builder,
-                   enum pipe_format format, 
-                   LLVMValueRef ptr);
-
-
-/**
- * Store a pixel.
- *
- * @param rgba 4 float vector with the unpacked components.
- */
-void 
-lp_build_store_rgba(LLVMBuilderRef builder,
-                    enum pipe_format format,
-                    LLVMValueRef ptr,
-                    LLVMValueRef rgba);
-
-
-struct lp_build_loop_state
-{
-  LLVMBasicBlockRef block;
-  LLVMValueRef counter;
-};
-
-
-void
-lp_build_loop_begin(LLVMBuilderRef builder,
-                    LLVMValueRef start,
-                    struct lp_build_loop_state *state);
-
-
-void
-lp_build_loop_end(LLVMBuilderRef builder,
-                  LLVMValueRef end,
-                  LLVMValueRef step,
-                  struct lp_build_loop_state *state);
-
-
-
-#endif /* !LP_BLD_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c
new file mode 100644
index 0000000000..5fc85a19b8
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c
@@ -0,0 +1,86 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * LLVM control flow build helpers.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "lp_bld_flow.h"
+
+
+
+void
+lp_build_loop_begin(LLVMBuilderRef builder,
+                    LLVMValueRef start,
+                    struct lp_build_loop_state *state)
+{
+   LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
+   LLVMValueRef function = LLVMGetBasicBlockParent(block);
+
+   state->block = LLVMAppendBasicBlock(function, "loop");
+
+   LLVMBuildBr(builder, state->block);
+
+   LLVMPositionBuilderAtEnd(builder, state->block);
+
+   state->counter = LLVMBuildPhi(builder, LLVMTypeOf(start), "");
+
+   LLVMAddIncoming(state->counter, &start, &block, 1);
+
+}
+
+
+void
+lp_build_loop_end(LLVMBuilderRef builder,
+                  LLVMValueRef end,
+                  LLVMValueRef step,
+                  struct lp_build_loop_state *state)
+{
+   LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
+   LLVMValueRef function = LLVMGetBasicBlockParent(block);
+   LLVMValueRef next;
+   LLVMValueRef cond;
+   LLVMBasicBlockRef after_block;
+
+   if (!step)
+      step = LLVMConstInt(LLVMTypeOf(end), 1, 0);
+
+   next = LLVMBuildAdd(builder, state->counter, step, "");
+
+   cond = LLVMBuildICmp(builder, LLVMIntNE, next, end, "");
+
+   after_block = LLVMAppendBasicBlock(function, "");
+
+   LLVMBuildCondBr(builder, cond, after_block, state->block);
+
+   LLVMAddIncoming(state->counter, &next, &block, 1);
+
+   LLVMPositionBuilderAtEnd(builder, after_block);
+}
+
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/drivers/llvmpipe/lp_bld_flow.h
new file mode 100644
index 0000000000..7281b278a0
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.h
@@ -0,0 +1,69 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * LLVM control flow build helpers.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#ifndef LP_BLD_FLOW_H
+#define LP_BLD_FLOW_H
+
+
+#include <llvm-c/Core.h>  
+
+
+/**
+ * LLVM's IR doesn't represent for-loops directly. Furthermore it
+ * it requires creating code blocks, branches, phi variables, so it
+ * requires a fair amount of code.
+ *
+ * @sa http://www.llvm.org/docs/tutorial/LangImpl5.html#for
+ */
+struct lp_build_loop_state
+{
+  LLVMBasicBlockRef block;
+  LLVMValueRef counter;
+};
+
+
+void
+lp_build_loop_begin(LLVMBuilderRef builder,
+                    LLVMValueRef start,
+                    struct lp_build_loop_state *state);
+
+
+void
+lp_build_loop_end(LLVMBuilderRef builder,
+                  LLVMValueRef end,
+                  LLVMValueRef step,
+                  struct lp_build_loop_state *state);
+
+
+
+#endif /* !LP_BLD_FLOW_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h
new file mode 100644
index 0000000000..01c8a752d1
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h
@@ -0,0 +1,101 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef LP_BLD_H
+#define LP_BLD_H
+
+
+/**
+ * @file
+ * LLVM IR building helpers interfaces.
+ *
+ * We use LLVM-C bindings for now. They are not documented, but follow the C++
+ * interfaces very closely, and appear to be complete enough for code
+ * genration. See
+ * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html
+ * for a standalone example.
+ */
+
+#include <llvm-c/Core.h>  
+ 
+#include "pipe/p_format.h"
+
+
+union lp_type;
+
+
+/**
+ * Unpack a pixel into its RGBA components.
+ *
+ * @param packed integer.
+ *
+ * @return RGBA in a 4 floats vector.
+ */
+LLVMValueRef
+lp_build_unpack_rgba(LLVMBuilderRef builder,
+                     enum pipe_format format, 
+                     LLVMValueRef packed);
+
+
+/**
+ * Pack a pixel.
+ *
+ * @param rgba 4 float vector with the unpacked components.
+ */
+LLVMValueRef
+lp_build_pack_rgba(LLVMBuilderRef builder,
+                   enum pipe_format format,
+                   LLVMValueRef rgba);
+
+
+/**
+ * Load a pixel into its RGBA components.
+ *
+ * @param ptr value with the pointer to the packed pixel. Pointer type is
+ * irrelevant.
+ *
+ * @return RGBA in a 4 floats vector.
+ */
+LLVMValueRef
+lp_build_load_rgba(LLVMBuilderRef builder,
+                   enum pipe_format format, 
+                   LLVMValueRef ptr);
+
+
+/**
+ * Store a pixel.
+ *
+ * @param rgba 4 float vector with the unpacked components.
+ */
+void 
+lp_build_store_rgba(LLVMBuilderRef builder,
+                    enum pipe_format format,
+                    LLVMValueRef ptr,
+                    LLVMValueRef rgba);
+
+
+#endif /* !LP_BLD_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_load.c b/src/gallium/drivers/llvmpipe/lp_bld_load.c
index b9734bdbed..27db7b2db0 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_load.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_load.c
@@ -28,7 +28,7 @@
 
 #include "util/u_format.h"
 
-#include "lp_bld.h"
+#include "lp_bld_format.h"
 
 
 LLVMValueRef
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_loop.c b/src/gallium/drivers/llvmpipe/lp_bld_loop.c
deleted file mode 100644
index eb6126e5f0..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_bld_loop.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "lp_bld.h"
-
-
-/**
- * @file
- * Auxiliaries to build loops.
- *
- * LLVM's IR doesn't represent for-loops directly. Furthermore it
- * it requires creating code blocks, branches, phi variables, so it
- * requires a fair amount of code.
- *
- * @sa http://www.llvm.org/docs/tutorial/LangImpl5.html#for
- */
-
-
-void
-lp_build_loop_begin(LLVMBuilderRef builder,
-                    LLVMValueRef start,
-                    struct lp_build_loop_state *state)
-{
-   LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
-   LLVMValueRef function = LLVMGetBasicBlockParent(block);
-
-   state->block = LLVMAppendBasicBlock(function, "loop");
-
-   LLVMBuildBr(builder, state->block);
-
-   LLVMPositionBuilderAtEnd(builder, state->block);
-
-   state->counter = LLVMBuildPhi(builder, LLVMTypeOf(start), "");
-
-   LLVMAddIncoming(state->counter, &start, &block, 1);
-
-}
-
-
-void
-lp_build_loop_end(LLVMBuilderRef builder,
-                  LLVMValueRef end,
-                  LLVMValueRef step,
-                  struct lp_build_loop_state *state)
-{
-   LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
-   LLVMValueRef function = LLVMGetBasicBlockParent(block);
-   LLVMValueRef next;
-   LLVMValueRef cond;
-   LLVMBasicBlockRef after_block;
-
-   if (!step)
-      step = LLVMConstInt(LLVMTypeOf(end), 1, 0);
-
-   next = LLVMBuildAdd(builder, state->counter, step, "");
-
-   cond = LLVMBuildICmp(builder, LLVMIntNE, next, end, "");
-
-   after_block = LLVMAppendBasicBlock(function, "");
-
-   LLVMBuildCondBr(builder, cond, after_block, state->block);
-
-   LLVMAddIncoming(state->counter, &next, &block, 1);
-
-   LLVMPositionBuilderAtEnd(builder, after_block);
-}
-
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.c b/src/gallium/drivers/llvmpipe/lp_bld_pack.c
index f436f09737..71261e4f39 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_pack.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_pack.c
@@ -28,7 +28,7 @@
 
 #include "util/u_format.h"
 
-#include "lp_bld.h"
+#include "lp_bld_format.h"
 
 
 LLVMValueRef
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_store.c b/src/gallium/drivers/llvmpipe/lp_bld_store.c
index 6273c9ee62..1da6dac8a2 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_store.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_store.c
@@ -28,7 +28,7 @@
 
 #include "util/u_format.h"
 
-#include "lp_bld.h"
+#include "lp_bld_format.h"
 
 
 void
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_unpack.c b/src/gallium/drivers/llvmpipe/lp_bld_unpack.c
index 3545bdf225..d70faac1ba 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_unpack.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_unpack.c
@@ -28,7 +28,7 @@
 
 #include "util/u_format.h"
 
-#include "lp_bld.h"
+#include "lp_bld_format.h"
 
 
 LLVMValueRef
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index 3086bf871b..1d192355ee 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -37,7 +37,8 @@
 
 #include "util/u_format.h"
 
-#include "lp_bld.h"
+#include "lp_bld_flow.h"
+#include "lp_bld_format.h"
 
 
 struct pixel_test_case
-- 
cgit v1.2.3


From 53f9a1180ef5a24cd8ffe235e716a9061a129bb3 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 21 Aug 2009 19:11:50 +0100
Subject: llvmpipe: Delete the quad polygon stipple stage.

Not used now -- stipple done by the draw module. May code generate later.
---
 src/gallium/drivers/llvmpipe/Makefile          |  1 -
 src/gallium/drivers/llvmpipe/SConscript        |  1 -
 src/gallium/drivers/llvmpipe/lp_quad_stipple.c | 85 --------------------------
 3 files changed, 87 deletions(-)
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_quad_stipple.c

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 91a2e2ee74..fbd1300c85 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -33,7 +33,6 @@ C_SOURCES = \
 	lp_quad_blend.c \
 	lp_quad_pipe.c \
 	lp_quad_fs.c \
-	lp_quad_stipple.c \
 	lp_query.c \
 	lp_screen.c \
 	lp_state_blend.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index b8b577fe5a..614d92b0f5 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -37,7 +37,6 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_quad_blend.c',
 		'lp_quad_pipe.c',
 		'lp_quad_fs.c',
-		'lp_quad_stipple.c',
 		'lp_query.c',
 		'lp_screen.c',
 		'lp_state_blend.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_stipple.c b/src/gallium/drivers/llvmpipe/lp_quad_stipple.c
deleted file mode 100644
index b89978fd5f..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_quad_stipple.c
+++ /dev/null
@@ -1,85 +0,0 @@
-
-/**
- * quad polygon stipple stage
- */
-
-#include "lp_context.h"
-#include "lp_quad.h"
-#include "lp_quad_pipe.h"
-#include "pipe/p_defines.h"
-#include "util/u_memory.h"
-
-
-/**
- * Apply polygon stipple to quads produced by triangle rasterization
- */
-static void
-stipple_quad(struct quad_stage *qs, struct quad_header *quads[], unsigned nr)
-{
-   static const uint bit31 = 1 << 31;
-   static const uint bit30 = 1 << 30;
-   unsigned pass = nr;
-
-   struct llvmpipe_context *llvmpipe = qs->llvmpipe;
-   unsigned q;
-
-   pass = 0;
-
-   for (q = 0; q < nr; q++)  {
-      struct quad_header *quad = quads[q];
-
-      const int col0 = quad->input.x0 % 32;
-      const int y0 = quad->input.y0;
-      const int y1 = y0 + 1;
-      const uint stipple0 = llvmpipe->poly_stipple.stipple[y0 % 32];
-      const uint stipple1 = llvmpipe->poly_stipple.stipple[y1 % 32];
-
-      if (!quad->inout.mask)
-         continue;
-
-      /* turn off quad mask bits that fail the stipple test */
-      if ((stipple0 & (bit31 >> col0)) == 0)
-         quad->inout.mask &= ~MASK_TOP_LEFT;
-
-      if ((stipple0 & (bit30 >> col0)) == 0)
-         quad->inout.mask &= ~MASK_TOP_RIGHT;
-
-      if ((stipple1 & (bit31 >> col0)) == 0)
-         quad->inout.mask &= ~MASK_BOTTOM_LEFT;
-
-      if ((stipple1 & (bit30 >> col0)) == 0)
-         quad->inout.mask &= ~MASK_BOTTOM_RIGHT;
-
-      if (quad->inout.mask)
-         ++pass;
-   }
-
-   if(pass)
-      qs->next->run(qs->next, quads, nr);
-}
-
-
-static void stipple_begin(struct quad_stage *qs)
-{
-   qs->next->begin(qs->next);
-}
-
-
-static void stipple_destroy(struct quad_stage *qs)
-{
-   FREE( qs );
-}
-
-
-struct quad_stage *
-lp_quad_polygon_stipple_stage( struct llvmpipe_context *llvmpipe )
-{
-   struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
-
-   stage->llvmpipe = llvmpipe;
-   stage->begin = stipple_begin;
-   stage->run = stipple_quad;
-   stage->destroy = stipple_destroy;
-
-   return stage;
-}
-- 
cgit v1.2.3


From 98971802798354cdba45c421cc340ec938143e03 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 22 Aug 2009 12:39:44 +0100
Subject: llvmpipe: Generate the fragment pipeline into a single function.

Still hackish. Will document and optimize later.
---
 src/gallium/drivers/llvmpipe/Makefile           |   1 -
 src/gallium/drivers/llvmpipe/SConscript         |   1 -
 src/gallium/drivers/llvmpipe/lp_bld_flow.c      |   3 +
 src/gallium/drivers/llvmpipe/lp_context.c       |   2 -
 src/gallium/drivers/llvmpipe/lp_context.h       |   1 -
 src/gallium/drivers/llvmpipe/lp_quad.h          |   1 -
 src/gallium/drivers/llvmpipe/lp_quad_blend.c    | 132 --------
 src/gallium/drivers/llvmpipe/lp_quad_fs.c       | 105 +++----
 src/gallium/drivers/llvmpipe/lp_quad_pipe.c     |  13 +-
 src/gallium/drivers/llvmpipe/lp_quad_pipe.h     |   7 -
 src/gallium/drivers/llvmpipe/lp_state.h         |   1 +
 src/gallium/drivers/llvmpipe/lp_state_derived.c |   5 +-
 src/gallium/drivers/llvmpipe/lp_state_fs.c      | 394 +++++++++++++++++-------
 13 files changed, 327 insertions(+), 339 deletions(-)
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_quad_blend.c

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index fbd1300c85..fb77f2a4c9 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -30,7 +30,6 @@ C_SOURCES = \
 	lp_prim_setup.c \
 	lp_prim_vbuf.c \
 	lp_setup.c \
-	lp_quad_blend.c \
 	lp_quad_pipe.c \
 	lp_quad_fs.c \
 	lp_query.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 614d92b0f5..f9c09f7074 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -34,7 +34,6 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_prim_setup.c',
 		'lp_prim_vbuf.c',
 		'lp_setup.c',
-		'lp_quad_blend.c',
 		'lp_quad_pipe.c',
 		'lp_quad_fs.c',
 		'lp_query.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c
index d94af0dea4..9d99e1a9d9 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c
@@ -66,6 +66,8 @@ lp_build_mask_update(struct lp_build_mask_context *mask,
    else
       mask->value = value;
 
+   /* FIXME: disabled until we have proper control flow helpers */
+#if 0
    cond = LLVMBuildICmp(mask->builder,
                         LLVMIntEQ,
                         LLVMBuildBitCast(mask->builder, mask->value, mask->reg_type, ""),
@@ -95,6 +97,7 @@ lp_build_mask_update(struct lp_build_mask_context *mask,
    LLVMBuildCondBr(mask->builder, cond, mask->skip_block, new_block);
 
    LLVMPositionBuilderAtEnd(mask->builder, new_block);
+#endif
 }
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 39019ab3f8..b9fd681e73 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -86,7 +86,6 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
       draw_destroy( llvmpipe->draw );
 
       llvmpipe->quad.shade->destroy( llvmpipe->quad.shade );
-      llvmpipe->quad.blend->destroy( llvmpipe->quad.blend );
 
    for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
       lp_destroy_tile_cache(llvmpipe->cbuf_cache[i]);
@@ -217,7 +216,6 @@ llvmpipe_create( struct pipe_screen *screen )
 
    /* setup quad rendering stages */
       llvmpipe->quad.shade = lp_quad_shade_stage(llvmpipe);
-      llvmpipe->quad.blend = lp_quad_blend_stage(llvmpipe);
 
    /* vertex shader samplers */
    for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index c31df0bf39..9de21d0cd0 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -117,7 +117,6 @@ struct llvmpipe_context {
    /** Software quad rendering pipeline */
    struct {
       struct quad_stage *shade;
-      struct quad_stage *blend;
 
       struct quad_stage *first; /**< points to one of the above stages */
    } quad;
diff --git a/src/gallium/drivers/llvmpipe/lp_quad.h b/src/gallium/drivers/llvmpipe/lp_quad.h
index 0902716739..7eb05de77a 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad.h
+++ b/src/gallium/drivers/llvmpipe/lp_quad.h
@@ -105,7 +105,6 @@ struct quad_interp_coef
 struct quad_header {
    struct quad_header_input input;
    struct quad_header_inout inout;
-   struct quad_header_output output;
 
    /* Redundant/duplicated:
     */
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_blend.c b/src/gallium/drivers/llvmpipe/lp_quad_blend.c
deleted file mode 100644
index ba12322a29..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_quad_blend.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * Quad blending.
- *
- * @author Jose Fonseca <jfonseca@vmware.com>
- * @author Brian Paul
- */
-
-#include "pipe/p_defines.h"
-#include "util/u_math.h"
-#include "util/u_memory.h"
-#include "util/u_debug_dump.h"
-#include "lp_context.h"
-#include "lp_state.h"
-#include "lp_quad.h"
-#include "lp_surface.h"
-#include "lp_tile_cache.h"
-#include "lp_tile_soa.h"
-#include "lp_quad_pipe.h"
-
-
-static void blend_begin(struct quad_stage *qs)
-{
-}
-
-
-static void
-blend_run(struct quad_stage *qs,
-          struct quad_header *quads[],
-          unsigned nr)
-{
-   struct llvmpipe_context *llvmpipe = qs->llvmpipe;
-   struct lp_blend_state *blend = llvmpipe->blend;
-   unsigned cbuf;
-   uint q, i, j;
-
-   for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) 
-   {
-      unsigned x0 = quads[0]->input.x0;
-      unsigned y0 = quads[0]->input.y0;
-      uint8_t ALIGN16_ATTRIB src[NUM_CHANNELS][TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH];
-      uint8_t ALIGN16_ATTRIB mask[16];
-      uint8_t *tile = lp_get_cached_tile(llvmpipe->cbuf_cache[cbuf], x0, y0);
-      uint8_t *dst;
-
-      assert(nr * QUAD_SIZE == TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH);
-
-      assert(x0 % TILE_VECTOR_WIDTH == 0);
-      assert(y0 % TILE_VECTOR_HEIGHT == 0);
-
-      dst = &TILE_PIXEL(tile, x0 & (TILE_SIZE-1), y0 & (TILE_SIZE-1), 0);
-
-      for (q = 0; q < nr; ++q) {
-         struct quad_header *quad = quads[q];
-         const int itx = (quad->input.x0 & (TILE_SIZE-1));
-         const int ity = (quad->input.y0 & (TILE_SIZE-1));
-
-         assert(quad->input.x0 == x0 + q*2);
-         assert(quad->input.y0 == y0);
-
-         /* get/swizzle src/dest colors
-          */
-         for (j = 0; j < QUAD_SIZE; j++) {
-            int x = itx + (j & 1);
-            int y = ity + (j >> 1);
-
-            assert(x < TILE_SIZE);
-            assert(y < TILE_SIZE);
-
-            for (i = 0; i < 4; i++) {
-               src[i][4*q + j] = float_to_ubyte(quad->output.color[cbuf][i][j]);
-            }
-            mask[4*q + j] = quad->inout.mask & (1 << j) ? ~0 : 0;
-         }
-      }
-
-      assert(blend->jit_function);
-      assert((((uintptr_t)src) & 0xf) == 0);
-      assert((((uintptr_t)dst) & 0xf) == 0);
-      assert((((uintptr_t)llvmpipe->blend_color) & 0xf) == 0);
-      if(blend->jit_function)
-         blend->jit_function( mask,
-                              &src[0][0],
-                              &llvmpipe->blend_color[0][0],
-                              dst );
-   }
-}
-
-
-static void blend_destroy(struct quad_stage *qs)
-{
-   FREE( qs );
-}
-
-
-struct quad_stage *lp_quad_blend_stage( struct llvmpipe_context *llvmpipe )
-{
-   struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
-
-   stage->llvmpipe = llvmpipe;
-   stage->begin = blend_begin;
-   stage->run = blend_run;
-   stage->destroy = blend_destroy;
-
-   return stage;
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_fs.c b/src/gallium/drivers/llvmpipe/lp_quad_fs.c
index 4f7a061fd6..9ead0864a6 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_fs.c
@@ -46,6 +46,8 @@
 #include "lp_quad.h"
 #include "lp_quad_pipe.h"
 #include "lp_texture.h"
+#include "lp_tile_cache.h"
+#include "lp_tile_soa.h"
 
 
 struct quad_shade_stage
@@ -69,30 +71,48 @@ quad_shade_stage(struct quad_stage *qs)
 /**
  * Execute fragment shader for the four fragments in the quad.
  */
-static boolean
-shade_quad(struct quad_stage *qs, struct quad_header *quad)
+static void
+shade_quads(struct quad_stage *qs,
+                 struct quad_header *quads[],
+                 unsigned nr)
 {
    struct quad_shade_stage *qss = quad_shade_stage( qs );
    struct llvmpipe_context *llvmpipe = qs->llvmpipe;
    struct lp_fragment_shader *fs = llvmpipe->fs;
    void *constants;
    struct tgsi_sampler **samplers;
+   struct quad_header *quad = quads[0];
    const unsigned x = quad->input.x0;
    const unsigned y = quad->input.y0;
+   uint8_t *tile = lp_get_cached_tile(llvmpipe->cbuf_cache[0], x, y);
+   uint8_t *color;
    void *depth;
-   uint32_t ALIGN16_ATTRIB mask[NUM_CHANNELS];
+   uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS];
    unsigned chan_index;
+   unsigned q;
 
    assert(fs->current);
    if(!fs->current)
-      return FALSE;
+      return;
+
+   /* Sanity checks */
+   assert(nr * QUAD_SIZE == TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH);
+   assert(x % TILE_VECTOR_WIDTH == 0);
+   assert(y % TILE_VECTOR_HEIGHT == 0);
+   for (q = 0; q < nr; ++q) {
+      assert(quads[q]->input.x0 == x + q*2);
+      assert(quads[q]->input.y0 == y);
+   }
 
-   constants = llvmpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
-   samplers = (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list;
+   /* mask */
+   for (q = 0; q < 4; ++q)
+      for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index)
+         mask[q][chan_index] = quads[q]->inout.mask & (1 << chan_index) ? ~0 : 0;
 
-   for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index)
-      mask[chan_index] = quad->inout.mask & (1 << chan_index) ? ~0 : 0;
+   /* color buffer */
+   color = &TILE_PIXEL(tile, x & (TILE_SIZE-1), y & (TILE_SIZE-1), 0);
 
+   /* depth buffer */
    if(qss->map) {
       assert((x % 2) == 0);
       assert((y % 2) == 0);
@@ -103,9 +123,14 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad)
    else
       depth = NULL;
 
+   constants = llvmpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
+   samplers = (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list;
+   /* TODO: blend color */
+
    assert((((uintptr_t)mask) & 0xf) == 0);
-   assert((((uintptr_t)quad->output.color) & 0xf) == 0);
    assert((((uintptr_t)depth) & 0xf) == 0);
+   assert((((uintptr_t)color) & 0xf) == 0);
+   assert((((uintptr_t)llvmpipe->blend_color) & 0xf) == 0);
 
    /* run shader */
    fs->current->jit_function( x,
@@ -114,71 +139,14 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad)
                               quad->coef->dadx,
                               quad->coef->dady,
                               constants,
-                              mask,
-                              quad->output.color,
+                              &mask[0][0],
+                              color,
                               depth,
                               samplers);
-
-   for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index)
-      if(!mask[chan_index])
-         quad->inout.mask &= ~(1 << chan_index);
-
-   if (quad->inout.mask == 0)
-      return FALSE;
-
-   return TRUE;
-}
-
-
-
-static void
-coverage_quad(struct quad_stage *qs, struct quad_header *quad)
-{
-   struct llvmpipe_context *llvmpipe = qs->llvmpipe;
-   uint cbuf;
-
-   /* loop over colorbuffer outputs */
-   for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) {
-      float (*quadColor)[4] = quad->output.color[cbuf];
-      unsigned j;
-      for (j = 0; j < QUAD_SIZE; j++) {
-         assert(quad->input.coverage[j] >= 0.0);
-         assert(quad->input.coverage[j] <= 1.0);
-         quadColor[3][j] *= quad->input.coverage[j];
-      }
-   }
 }
 
 
-static void
-shade_quads(struct quad_stage *qs, 
-                 struct quad_header *quads[],
-                 unsigned nr)
-{
-   unsigned i, pass = 0;
-   
-   for (i = 0; i < nr; i++) {
-      if(!quads[i]->inout.mask)
-         continue;
-
-      if (!shade_quad(qs, quads[i]))
-         continue;
-
-      if (/*do_coverage*/ 0)
-         coverage_quad( qs, quads[i] );
-
-      ++pass;
-   }
-   
-   if (pass)
-      qs->next->run(qs->next, quads, nr);
-}
-   
-
-
-
-
 /**
  * Per-primitive (or per-begin?) setup
  */
@@ -210,7 +178,6 @@ shade_begin(struct quad_stage *qs)
 
    }
 
-   qs->next->begin(qs->next);
 }
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_pipe.c b/src/gallium/drivers/llvmpipe/lp_quad_pipe.c
index 70d3ad39a6..e672dc9e03 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_pipe.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_pipe.c
@@ -30,20 +30,9 @@
 #include "lp_state.h"
 #include "pipe/p_shader_tokens.h"
 
-static void
-lp_push_quad_first( struct llvmpipe_context *lp,
-                    struct quad_stage *quad )
-{
-   quad->next = lp->quad.first;
-   lp->quad.first = quad;
-}
-
-
 void
 lp_build_quad_pipeline(struct llvmpipe_context *lp)
 {
-   lp->quad.first = lp->quad.blend;
-
-   lp_push_quad_first( lp, lp->quad.shade );
+   lp->quad.first = lp->quad.shade;
 }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_pipe.h b/src/gallium/drivers/llvmpipe/lp_quad_pipe.h
index 52d4d68661..ff4747f33f 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_pipe.h
+++ b/src/gallium/drivers/llvmpipe/lp_quad_pipe.h
@@ -55,14 +55,7 @@ struct quad_stage {
 };
 
 
-struct quad_stage *lp_quad_polygon_stipple_stage( struct llvmpipe_context *llvmpipe );
-struct quad_stage *lp_quad_earlyz_stage( struct llvmpipe_context *llvmpipe );
 struct quad_stage *lp_quad_shade_stage( struct llvmpipe_context *llvmpipe );
-struct quad_stage *lp_quad_stencil_test_stage( struct llvmpipe_context *llvmpipe );
-struct quad_stage *lp_quad_occlusion_stage( struct llvmpipe_context *llvmpipe );
-struct quad_stage *lp_quad_coverage_stage( struct llvmpipe_context *llvmpipe );
-struct quad_stage *lp_quad_blend_stage( struct llvmpipe_context *llvmpipe );
-struct quad_stage *lp_quad_output_stage( struct llvmpipe_context *llvmpipe );
 
 void lp_build_quad_pipeline(struct llvmpipe_context *lp);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index 2d6add8f3a..83dace30ce 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -78,6 +78,7 @@ struct lp_fragment_shader_variant_key
 {
    struct pipe_depth_state depth;
    struct pipe_alpha_state alpha;
+   struct pipe_blend_state blend;
 };
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index 35b24a12ff..9f5d2ffb11 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -247,13 +247,12 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
       compute_cliprect(llvmpipe);
 
    if (llvmpipe->dirty & (LP_NEW_FS |
+                          LP_NEW_BLEND |
                           LP_NEW_DEPTH_STENCIL_ALPHA))
       llvmpipe_update_fs( llvmpipe );
 
 
-   if (llvmpipe->dirty & (LP_NEW_BLEND |
-                          LP_NEW_DEPTH_STENCIL_ALPHA |
-                          LP_NEW_FRAMEBUFFER |
+   if (llvmpipe->dirty & (LP_NEW_FRAMEBUFFER |
                           LP_NEW_FS))
       lp_build_quad_pipeline(llvmpipe);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 521700acd5..9b0e7cdd37 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -38,9 +38,11 @@
 #include "tgsi/tgsi_parse.h"
 #include "lp_bld_type.h"
 #include "lp_bld_conv.h"
+#include "lp_bld_logic.h"
 #include "lp_bld_depth.h"
 #include "lp_bld_tgsi.h"
 #include "lp_bld_alpha.h"
+#include "lp_bld_blend.h"
 #include "lp_bld_swizzle.h"
 #include "lp_bld_flow.h"
 #include "lp_bld_debug.h"
@@ -55,13 +57,13 @@ static const unsigned char quad_offset_y[4] = {0, 0, 1, 1};
 
 
 static void
-setup_pos_vector(LLVMBuilderRef builder,
-                 LLVMValueRef x,
-                 LLVMValueRef y,
-                 LLVMValueRef a0_ptr,
-                 LLVMValueRef dadx_ptr,
-                 LLVMValueRef dady_ptr,
-                 LLVMValueRef *pos)
+generate_pos(LLVMBuilderRef builder,
+             LLVMValueRef x,
+             LLVMValueRef y,
+             LLVMValueRef a0_ptr,
+             LLVMValueRef dadx_ptr,
+             LLVMValueRef dady_ptr,
+             LLVMValueRef *pos)
 {
    LLVMTypeRef int_elem_type = LLVMInt32Type();
    LLVMTypeRef int_vec_type = LLVMVectorType(int_elem_type, QUAD_SIZE);
@@ -110,13 +112,13 @@ setup_pos_vector(LLVMBuilderRef builder,
 
 
 static void
-depth_test_generate(struct llvmpipe_context *lp,
-                    LLVMBuilderRef builder,
-                    const struct pipe_depth_state *state,
-                    union lp_type src_type,
-                    struct lp_build_mask_context *mask,
-                    LLVMValueRef src,
-                    LLVMValueRef dst_ptr)
+generate_depth(struct llvmpipe_context *lp,
+               LLVMBuilderRef builder,
+               const struct pipe_depth_state *state,
+               union lp_type src_type,
+               struct lp_build_mask_context *mask,
+               LLVMValueRef src,
+               LLVMValueRef dst_ptr)
 {
    const struct util_format_description *format_desc;
    union lp_type dst_type;
@@ -151,18 +153,177 @@ depth_test_generate(struct llvmpipe_context *lp,
 }
 
 
-static struct lp_fragment_shader_variant *
-shader_generate(struct llvmpipe_context *lp,
-                struct lp_fragment_shader *shader,
-                const struct lp_fragment_shader_variant_key *key)
+/**
+ * Generate the fragment shader, depth/stencil and alpha tests.
+ */
+static void
+generate_fs(struct llvmpipe_context *lp,
+            struct lp_fragment_shader *shader,
+            const struct lp_fragment_shader_variant_key *key,
+            LLVMBuilderRef builder,
+            union lp_type type,
+            unsigned i,
+            LLVMValueRef x,
+            LLVMValueRef y,
+            LLVMValueRef a0_ptr,
+            LLVMValueRef dadx_ptr,
+            LLVMValueRef dady_ptr,
+            LLVMValueRef consts_ptr,
+            LLVMValueRef *pmask,
+            LLVMValueRef *color,
+            LLVMValueRef depth_ptr,
+            LLVMValueRef samplers_ptr)
 {
-   struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
-   struct lp_fragment_shader_variant *variant;
    const struct tgsi_token *tokens = shader->base.tokens;
-   union lp_type type;
    LLVMTypeRef elem_type;
    LLVMTypeRef vec_type;
    LLVMTypeRef int_vec_type;
+   LLVMValueRef pos[NUM_CHANNELS];
+   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
+   struct lp_build_mask_context mask;
+   boolean early_depth_test;
+   unsigned attrib;
+   unsigned chan;
+
+   elem_type = lp_build_elem_type(type);
+   vec_type = lp_build_vec_type(type);
+   int_vec_type = lp_build_int_vec_type(type);
+
+   generate_pos(builder, x, y, a0_ptr, dadx_ptr, dady_ptr, pos);
+
+   lp_build_mask_begin(&mask, builder, type, *pmask);
+
+   early_depth_test =
+      lp->depth_stencil->depth.enabled &&
+      lp->framebuffer.zsbuf &&
+      !lp->depth_stencil->alpha.enabled &&
+      !lp->fs->info.uses_kill &&
+      !lp->fs->info.writes_z;
+
+   if(early_depth_test)
+      generate_depth(lp, builder, &key->depth,
+                          type, &mask,
+                          pos[2], depth_ptr);
+
+   memset(outputs, 0, sizeof outputs);
+
+   lp_build_tgsi_soa(builder, tokens, type, &mask,
+                     pos, a0_ptr, dadx_ptr, dady_ptr,
+                     consts_ptr, outputs, samplers_ptr);
+
+   for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
+      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+         if(outputs[attrib][chan]) {
+            lp_build_name(outputs[attrib][chan], "output%u.%u.%c", i, attrib, "xyzw"[chan]);
+
+            switch (shader->info.output_semantic_name[attrib]) {
+            case TGSI_SEMANTIC_COLOR:
+               {
+                  unsigned cbuf = shader->info.output_semantic_index[attrib];
+
+                  lp_build_name(outputs[attrib][chan], "color%u.%u.%c", i, attrib, "rgba"[chan]);
+
+                  /* Alpha test */
+                  /* XXX: should the alpha reference value be passed separately? */
+                  if(cbuf == 0 && chan == 3)
+                     lp_build_alpha_test(builder, &key->alpha, type,
+                                         &mask,
+                                         outputs[attrib][chan]);
+
+                  if(cbuf == 0)
+                     color[chan] = outputs[attrib][chan];
+
+                  break;
+               }
+
+            case TGSI_SEMANTIC_POSITION:
+               if(chan == 2)
+                  pos[2] = outputs[attrib][chan];
+               break;
+            }
+         }
+      }
+   }
+
+   if(!early_depth_test)
+      generate_depth(lp, builder, &key->depth,
+                          type, &mask,
+                          pos[2], depth_ptr);
+
+   lp_build_mask_end(&mask);
+
+   *pmask = mask.value;
+
+}
+
+
+/**
+ * Generate blending code according to blend->base state.
+ * The blend function will look like:
+ *    blend(mask, src_color, constant color, dst_color)
+ * dst_color will be modified and contain the result of the blend func.
+ */
+static void
+generate_blend(const struct pipe_blend_state *blend,
+               LLVMBuilderRef builder,
+               union lp_type type,
+               LLVMValueRef mask,
+               LLVMValueRef *src,
+               LLVMValueRef const_ptr,
+               LLVMValueRef dst_ptr)
+{
+   struct lp_build_context bld;
+   LLVMTypeRef vec_type;
+   LLVMTypeRef int_vec_type;
+   LLVMValueRef con[4];
+   LLVMValueRef dst[4];
+   LLVMValueRef res[4];
+   unsigned chan;
+
+   vec_type = lp_build_vec_type(type);
+   int_vec_type = lp_build_int_vec_type(type);
+
+   lp_build_context_init(&bld, builder, type);
+
+   for(chan = 0; chan < 4; ++chan) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
+
+      if(const_ptr)
+         con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
+      else
+         con[chan] = LLVMGetUndef(vec_type); /* FIXME */
+
+      dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
+
+      lp_build_name(con[chan], "con.%c", "rgba"[chan]);
+      lp_build_name(dst[chan], "dst.%c", "rgba"[chan]);
+   }
+
+   lp_build_blend_soa(builder, blend, type, src, dst, con, res);
+
+   for(chan = 0; chan < 4; ++chan) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
+      lp_build_name(res[chan], "res.%c", "rgba"[chan]);
+      res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]);
+      LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, ""));
+   }
+}
+
+
+static struct lp_fragment_shader_variant *
+generate_fragment(struct llvmpipe_context *lp,
+                  struct lp_fragment_shader *shader,
+                  const struct lp_fragment_shader_variant_key *key)
+{
+   struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
+   struct lp_fragment_shader_variant *variant;
+   union lp_type fs_type;
+   union lp_type blend_type;
+   LLVMTypeRef fs_elem_type;
+   LLVMTypeRef fs_vec_type;
+   LLVMTypeRef fs_int_vec_type;
+   LLVMTypeRef blend_vec_type;
+   LLVMTypeRef blend_int_vec_type;
    LLVMTypeRef arg_types[10];
    LLVMTypeRef func_type;
    LLVMValueRef x;
@@ -177,24 +338,38 @@ shader_generate(struct llvmpipe_context *lp,
    LLVMValueRef samplers_ptr;
    LLVMBasicBlockRef block;
    LLVMBuilderRef builder;
-   LLVMValueRef pos[NUM_CHANNELS];
-   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
-   struct lp_build_mask_context mask;
-   boolean early_depth_test;
+   LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH];
+   LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH];
+   LLVMValueRef blend_mask;
+   LLVMValueRef blend_in_color[NUM_CHANNELS];
    LLVMValueRef fetch_texel;
+   unsigned num_fs;
    unsigned i;
-   unsigned attrib;
    unsigned chan;
 
 #ifdef DEBUG
    tgsi_dump(shader->base.tokens, 0);
-   debug_printf("depth.enabled = %u\n", key->depth.enabled);
-   debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE));
-   debug_printf("depth.writemask = %u\n", key->depth.writemask);
-   debug_printf("depth.occlusion_count = %u\n", key->depth.occlusion_count);
-   debug_printf("alpha.enabled = %u\n", key->alpha.enabled);
-   debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE));
-   debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value);
+   if(key->depth.enabled) {
+      debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE));
+      debug_printf("depth.writemask = %u\n", key->depth.writemask);
+      debug_printf("depth.occlusion_count = %u\n", key->depth.occlusion_count);
+   }
+   if(key->alpha.enabled) {
+      debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE));
+      debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value);
+   }
+   if(key->blend.logicop_enable) {
+      debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func);
+   }
+   else if(key->blend.blend_enable) {
+      debug_printf("blend.rgb_func = %s\n",   debug_dump_blend_func  (key->blend.rgb_func, TRUE));
+      debug_printf("rgb_src_factor = %s\n",   debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE));
+      debug_printf("rgb_dst_factor = %s\n",   debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE));
+      debug_printf("alpha_func = %s\n",       debug_dump_blend_func  (key->blend.alpha_func, TRUE));
+      debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE));
+      debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE));
+   }
+   debug_printf("blend.colormask = 0x%x\n", key->blend.colormask);
 #endif
 
    variant = CALLOC_STRUCT(lp_fragment_shader_variant);
@@ -204,26 +379,37 @@ shader_generate(struct llvmpipe_context *lp,
    variant->shader = shader;
    memcpy(&variant->key, key, sizeof *key);
 
-   type.value = 0;
-   type.floating = TRUE; /* floating point values */
-   type.sign = TRUE;     /* values are signed */
-   type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
-   type.width = 32;      /* 32-bit float */
-   type.length = 4;      /* 4 element per vector */
+   fs_type.value = 0;
+   fs_type.floating = TRUE; /* floating point values */
+   fs_type.sign = TRUE;     /* values are signed */
+   fs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
+   fs_type.width = 32;      /* 32-bit float */
+   fs_type.length = 4;      /* 4 element per vector */
+   num_fs = 4;
 
-   elem_type = lp_build_elem_type(type);
-   vec_type = lp_build_vec_type(type);
-   int_vec_type = lp_build_int_vec_type(type);
+   blend_type.value = 0;
+   blend_type.floating = FALSE; /* values are integers */
+   blend_type.sign = FALSE;     /* values are unsigned */
+   blend_type.norm = TRUE;      /* values are in [0,1] or [-1,1] */
+   blend_type.width = 8;        /* 8-bit ubyte values */
+   blend_type.length = 16;      /* 16 elements per vector */
+
+   fs_elem_type = lp_build_elem_type(fs_type);
+   fs_vec_type = lp_build_vec_type(fs_type);
+   fs_int_vec_type = lp_build_int_vec_type(fs_type);
+
+   blend_vec_type = lp_build_vec_type(blend_type);
+   blend_int_vec_type = lp_build_int_vec_type(blend_type);
 
    arg_types[0] = LLVMInt32Type();                     /* x */
    arg_types[1] = LLVMInt32Type();                     /* y */
-   arg_types[2] = LLVMPointerType(elem_type, 0);       /* a0 */
-   arg_types[3] = LLVMPointerType(elem_type, 0);       /* dadx */
-   arg_types[4] = LLVMPointerType(elem_type, 0);       /* dady */
-   arg_types[5] = LLVMPointerType(elem_type, 0);       /* consts */
-   arg_types[6] = LLVMPointerType(int_vec_type, 0);    /* mask */
-   arg_types[7] = LLVMPointerType(vec_type, 0);        /* color */
-   arg_types[8] = LLVMPointerType(int_vec_type, 0);    /* depth */
+   arg_types[2] = LLVMPointerType(fs_elem_type, 0);    /* a0 */
+   arg_types[3] = LLVMPointerType(fs_elem_type, 0);    /* dadx */
+   arg_types[4] = LLVMPointerType(fs_elem_type, 0);    /* dady */
+   arg_types[5] = LLVMPointerType(fs_elem_type, 0);    /* consts */
+   arg_types[6] = LLVMPointerType(fs_int_vec_type, 0); /* mask */
+   arg_types[7] = LLVMPointerType(blend_vec_type, 0);  /* color */
+   arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */
    arg_types[9] = LLVMPointerType(LLVMInt8Type(), 0);  /* samplers */
 
    func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
@@ -260,70 +446,57 @@ shader_generate(struct llvmpipe_context *lp,
    builder = LLVMCreateBuilder();
    LLVMPositionBuilderAtEnd(builder, block);
 
-   setup_pos_vector(builder, x, y, a0_ptr, dadx_ptr, dady_ptr, pos);
-
-   lp_build_mask_begin(&mask, builder, type,
-                       LLVMBuildLoad(builder, mask_ptr, ""));
-
-   early_depth_test =
-      lp->depth_stencil->depth.enabled &&
-      lp->framebuffer.zsbuf &&
-      !lp->depth_stencil->alpha.enabled &&
-      !lp->fs->info.uses_kill &&
-      !lp->fs->info.writes_z;
-
-   if(early_depth_test)
-      depth_test_generate(lp, builder, &key->depth,
-                          type, &mask,
-                          pos[2], depth_ptr);
-
-   memset(outputs, 0, sizeof outputs);
-
-   lp_build_tgsi_soa(builder, tokens, type, &mask,
-                     pos, a0_ptr, dadx_ptr, dady_ptr,
-                     consts_ptr, outputs, samplers_ptr);
-
-   for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
-      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
-         if(outputs[attrib][chan]) {
-            lp_build_name(outputs[attrib][chan], "output%u.%c", attrib, "xyzw"[chan]);
-
-            switch (shader->info.output_semantic_name[attrib]) {
-            case TGSI_SEMANTIC_COLOR:
-               {
-                  unsigned cbuf = shader->info.output_semantic_index[attrib];
-                  LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), cbuf*NUM_CHANNELS + chan, 0);
-                  LLVMValueRef output_ptr = LLVMBuildGEP(builder, color_ptr, &index, 1, "");
-                  lp_build_name(outputs[attrib][chan], "color%u.%c", attrib, "rgba"[chan]);
-                  LLVMBuildStore(builder, outputs[attrib][chan], output_ptr);
-
-                  /* Alpha test */
-                  /* XXX: should the alpha reference value be passed separately? */
-                  if(cbuf == 0 && chan == 3)
-                     lp_build_alpha_test(builder, &key->alpha, type,
-                                         &mask,
-                                         outputs[attrib][chan]);
-
-                  break;
-               }
+   for(i = 0; i < num_fs; ++i) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+      LLVMValueRef out_color[NUM_CHANNELS];
+      LLVMValueRef x_i;
+      LLVMValueRef depth_ptr_i;
+
+      /* TODO: Reuse position interpolation */
+      x_i = LLVMBuildAdd(builder, x, LLVMConstInt(LLVMInt32Type(), 2*i, 0), "");
+
+      fs_mask[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, mask_ptr, &index, 1, ""), "");
+      depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, "");
+
+      generate_fs(lp,
+                  shader,
+                  key,
+                  builder,
+                  fs_type,
+                  i,
+                  x_i,
+                  y,
+                  a0_ptr,
+                  dadx_ptr,
+                  dady_ptr,
+                  consts_ptr,
+                  &fs_mask[i],
+                  out_color,
+                  depth_ptr_i,
+                  samplers_ptr);
+
+      for(chan = 0; chan < NUM_CHANNELS; ++chan)
+         fs_out_color[chan][i] = out_color[chan];
+   }
 
-            case TGSI_SEMANTIC_POSITION:
-               if(chan == 2)
-                  pos[2] = outputs[attrib][chan];
-               break;
-            }
-         }
-      }
+   for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+      lp_build_conv(builder, fs_type, blend_type,
+                    fs_out_color[chan], num_fs,
+                    &blend_in_color[chan], 1);
+      lp_build_name(blend_in_color[chan], "color.%c", "rgba"[chan]);
    }
 
-   if(!early_depth_test)
-      depth_test_generate(lp, builder, &key->depth,
-                          type, &mask,
-                          pos[2], depth_ptr);
+   lp_build_conv_mask(builder, fs_type, blend_type,
+                               fs_mask, num_fs,
+                               &blend_mask, 1);
 
-   lp_build_mask_end(&mask);
-   if(mask.value)
-      LLVMBuildStore(builder, mask.value, mask_ptr);
+   generate_blend(&key->blend,
+                  builder,
+                  blend_type,
+                  blend_mask,
+                  blend_in_color,
+                  NULL /* FIXME: blend_const_color */,
+                  color_ptr);
 
    LLVMBuildRetVoid(builder);;
 
@@ -512,6 +685,7 @@ void llvmpipe_update_fs(struct llvmpipe_context *lp)
    memset(&key, 0, sizeof key);
    memcpy(&key.depth, &lp->depth_stencil->depth, sizeof &key.depth);
    memcpy(&key.alpha, &lp->depth_stencil->alpha, sizeof &key.alpha);
+   memcpy(&key.blend, &lp->blend->base, sizeof &key.blend);
 
    variant = shader->variants;
    while(variant) {
@@ -522,7 +696,7 @@ void llvmpipe_update_fs(struct llvmpipe_context *lp)
    }
 
    if(!variant)
-      variant = shader_generate(lp, shader, &key);
+      variant = generate_fragment(lp, shader, &key);
 
    shader->current = variant;
 }
-- 
cgit v1.2.3


From 3f36f4b0519f7a568d6de9919de1001880ab5c8a Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 22 Aug 2009 19:07:56 +0100
Subject: llvmpipe: Split off vs stuff from lp_state_fs.c.

lp_state_fs.c is already too big without it.
---
 src/gallium/drivers/llvmpipe/Makefile      |  1 +
 src/gallium/drivers/llvmpipe/SConscript    |  1 +
 src/gallium/drivers/llvmpipe/lp_state_fs.c | 60 -------------------
 src/gallium/drivers/llvmpipe/lp_state_vs.c | 96 ++++++++++++++++++++++++++++++
 4 files changed, 98 insertions(+), 60 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_state_vs.c

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index fb77f2a4c9..c6c8754dad 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -42,6 +42,7 @@ C_SOURCES = \
 	lp_state_sampler.c \
 	lp_state_surface.c \
 	lp_state_vertex.c \
+	lp_state_vs.c \
 	lp_surface.c \
 	lp_tex_cache.c \
 	lp_tex_sample.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index f9c09f7074..a9501b4765 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -46,6 +46,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_state_sampler.c',
 		'lp_state_surface.c',
 		'lp_state_vertex.c',
+		'lp_state_vs.c',
 		'lp_surface.c',
 		'lp_tex_cache.c',
 		'lp_tex_sample.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 9b0e7cdd37..5bd0d7b982 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -597,66 +597,6 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
 }
 
 
-void *
-llvmpipe_create_vs_state(struct pipe_context *pipe,
-                         const struct pipe_shader_state *templ)
-{
-   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
-   struct lp_vertex_shader *state;
-
-   state = CALLOC_STRUCT(lp_vertex_shader);
-   if (state == NULL ) 
-      goto fail;
-
-   /* copy shader tokens, the ones passed in will go away.
-    */
-   state->shader.tokens = tgsi_dup_tokens(templ->tokens);
-   if (state->shader.tokens == NULL)
-      goto fail;
-
-   state->draw_data = draw_create_vertex_shader(llvmpipe->draw, templ);
-   if (state->draw_data == NULL) 
-      goto fail;
-
-   return state;
-
-fail:
-   if (state) {
-      FREE( (void *)state->shader.tokens );
-      FREE( state->draw_data );
-      FREE( state );
-   }
-   return NULL;
-}
-
-
-void
-llvmpipe_bind_vs_state(struct pipe_context *pipe, void *vs)
-{
-   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
-
-   llvmpipe->vs = (const struct lp_vertex_shader *)vs;
-
-   draw_bind_vertex_shader(llvmpipe->draw,
-                           (llvmpipe->vs ? llvmpipe->vs->draw_data : NULL));
-
-   llvmpipe->dirty |= LP_NEW_VS;
-}
-
-
-void
-llvmpipe_delete_vs_state(struct pipe_context *pipe, void *vs)
-{
-   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
-
-   struct lp_vertex_shader *state =
-      (struct lp_vertex_shader *)vs;
-
-   draw_delete_vertex_shader(llvmpipe->draw, state->draw_data);
-   FREE( state );
-}
-
-
 
 void
 llvmpipe_set_constant_buffer(struct pipe_context *pipe,
diff --git a/src/gallium/drivers/llvmpipe/lp_state_vs.c b/src/gallium/drivers/llvmpipe/lp_state_vs.c
new file mode 100644
index 0000000000..15c3029614
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_state_vs.c
@@ -0,0 +1,96 @@
+/**************************************************************************
+ * 
+ * Copyright 2009 VMware, Inc.
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "pipe/p_defines.h"
+#include "tgsi/tgsi_parse.h"
+#include "util/u_memory.h"
+#include "draw/draw_context.h"
+
+#include "lp_context.h"
+#include "lp_state.h"
+
+
+void *
+llvmpipe_create_vs_state(struct pipe_context *pipe,
+                         const struct pipe_shader_state *templ)
+{
+   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+   struct lp_vertex_shader *state;
+
+   state = CALLOC_STRUCT(lp_vertex_shader);
+   if (state == NULL ) 
+      goto fail;
+
+   /* copy shader tokens, the ones passed in will go away.
+    */
+   state->shader.tokens = tgsi_dup_tokens(templ->tokens);
+   if (state->shader.tokens == NULL)
+      goto fail;
+
+   state->draw_data = draw_create_vertex_shader(llvmpipe->draw, templ);
+   if (state->draw_data == NULL) 
+      goto fail;
+
+   return state;
+
+fail:
+   if (state) {
+      FREE( (void *)state->shader.tokens );
+      FREE( state->draw_data );
+      FREE( state );
+   }
+   return NULL;
+}
+
+
+void
+llvmpipe_bind_vs_state(struct pipe_context *pipe, void *vs)
+{
+   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+   llvmpipe->vs = (const struct lp_vertex_shader *)vs;
+
+   draw_bind_vertex_shader(llvmpipe->draw,
+                           (llvmpipe->vs ? llvmpipe->vs->draw_data : NULL));
+
+   llvmpipe->dirty |= LP_NEW_VS;
+}
+
+
+void
+llvmpipe_delete_vs_state(struct pipe_context *pipe, void *vs)
+{
+   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+   struct lp_vertex_shader *state =
+      (struct lp_vertex_shader *)vs;
+
+   draw_delete_vertex_shader(llvmpipe->draw, state->draw_data);
+   FREE( state );
+}
-- 
cgit v1.2.3


From 5811ed87d732101ab8cfbd087bc99d8c6c963f30 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 22 Aug 2009 22:26:55 +0100
Subject: llvmpipe: Add a bunch of comments.

Description/rationale/to-do items, while I still remember them...
---
 src/gallium/drivers/llvmpipe/Makefile              |   2 +-
 src/gallium/drivers/llvmpipe/README                |  71 +++++++++-----
 src/gallium/drivers/llvmpipe/SConscript            |   2 +-
 src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c    |  11 ++-
 .../drivers/llvmpipe/lp_bld_blend_logicop.c        | 108 +++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c    |  36 ++++++-
 src/gallium/drivers/llvmpipe/lp_bld_conv.c         |  93 +++++++++++++-----
 src/gallium/drivers/llvmpipe/lp_bld_depth.c        |  32 ++++++
 src/gallium/drivers/llvmpipe/lp_bld_depth.h        |   3 -
 src/gallium/drivers/llvmpipe/lp_bld_intr.c         |  19 ++--
 src/gallium/drivers/llvmpipe/lp_bld_intr.h         |   3 +
 src/gallium/drivers/llvmpipe/lp_bld_logic.c        |   7 ++
 src/gallium/drivers/llvmpipe/lp_bld_logicop.c      | 100 -------------------
 src/gallium/drivers/llvmpipe/lp_bld_swizzle.c      |   7 ++
 src/gallium/drivers/llvmpipe/lp_bld_swizzle.h      |   2 +-
 src/gallium/drivers/llvmpipe/lp_bld_tgsi.h         |   7 ++
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c     |  10 ++
 src/gallium/drivers/llvmpipe/lp_state_fs.c         | 103 ++++++++++++++++++--
 18 files changed, 446 insertions(+), 170 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_logicop.c

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index c6c8754dad..102227f0f8 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -7,6 +7,7 @@ C_SOURCES = \
 	lp_bld_alpha.c \
 	lp_bld_arit.c \
 	lp_bld_blend_aos.c \
+	lp_bld_blend_logicop.c \
 	lp_bld_blend_soa.c \
 	lp_bld_const.c \
 	lp_bld_conv.c \
@@ -19,7 +20,6 @@ C_SOURCES = \
 	lp_bld_load.c \
 	lp_bld_store.c \
 	lp_bld_logic.c \
-	lp_bld_logicop.c \
 	lp_bld_swizzle.c \
 	lp_bld_tgsi_soa.c \
 	lp_bld_type.c \
diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README
index 677352eaa1..498d21dea6 100644
--- a/src/gallium/drivers/llvmpipe/README
+++ b/src/gallium/drivers/llvmpipe/README
@@ -6,31 +6,40 @@ Status
 
 Done so far is:
 
-- TGSI -> LLVM fragment shader translation
-  - same level of support as the TGSI SSE2 exec machine
-  - texture sampling via an intrinsic call
-  - done in SoA
-  - input interpolation also code generated
-
-- blend -> LLVM (including logic ops)
-  - SoA and AoS, but only the former used
-
-- code is generic
-  - intermediates can be vectors of floats, ubytes, fixed point, etc, and of
-    any width and length
-  - not all operations are implemented for these types yet though
+ - the whole fragment pipeline is code generated in a single function
+ 
+   - depth testing
+ 
+   - fragment shader TGSI translation
+     - same level of support as the TGSI SSE2 exec machine, with the exception
+       we don't fallback to TGSI interpretation when an unsupported opcode is
+       found, but just ignore it
+     - texture sampling via an intrinsic call
+     - done in SoA layout
+     - input interpolation also code generated
+ 
+   - alpha testing
+ 
+   - blend (including logic ops)
+     - both in SoA and AoS layouts, but only the former used for now
+ 
+ - code is generic
+   - intermediates can be vectors of floats, ubytes, fixed point, etc, and of
+     any width and length
+   - not all operations are implemented for these types yet though
 
 Most mesa/progs/demos/* work. Speed is on par with Keith's softpipe-opt branch,
 which includes hand written fast implementations for common cases.
 
 To do (probably by this order):
-- code generate the rest of the fragment pipeline, namely the
-  depth/alpha/stencil state
-- concatenate the fragment pipeline (shader + depth/stencil/alpha + blend) in a
-  single function
-- code generate texture sampling
-- translate TGSI control flow instructions
-- code generate the triangle setup and rasterization
+
+ - code generate stipple and stencil testing
+
+ - code generate texture sampling
+
+ - translate TGSI control flow instructions, and all other remaining opcodes
+
+ - code generate the triangle setup and rasterization
 
 
 Requirements
@@ -70,7 +79,7 @@ Requirements
    instructions. This is necessary because we emit several SSE intrinsics for
    convenience. See /proc/cpuinfo to know what your CPU supports.
  
- - scons (although it should be straightforward to fix the Makefiles as well)
+ - scons
 
 
 Building
@@ -80,6 +89,12 @@ To build everything invoke scons as:
 
   scons debug=yes statetrackers=mesa drivers=llvmpipe winsys=xlib dri=false -k
 
+Alternatively, you can build it with GNU make, if you prefer, by invoking it as
+
+  make linux-llvm
+
+but the rest of these instructions assume scons is used.
+
 
 Using
 =====
@@ -87,9 +102,12 @@ Using
 Building will create a drop-in alternative for libGL.so. To use it set the
 environment variables:
 
-  export LD_LIBRARY_PATH=$PWD/build/linux-x86-debug/lib:$LD_LIBRARY_PATH
   export LD_LIBRARY_PATH=$PWD/build/linux-x86_64-debug/lib:$LD_LIBRARY_PATH
 
+or
+
+  export LD_LIBRARY_PATH=$PWD/build/linux-x86-debug/lib:$LD_LIBRARY_PATH
+
 
 Unit testing
 ============
@@ -104,12 +122,19 @@ build/linux-???-debug/gallium/drivers/llvmpipe:
 Some of this tests can output results and benchmarks to a tab-seperated-file
 for posterior analysis, e.g.:
 
-  build/linux-x86_64/gallium/drivers/llvmpipe/lp_test_blend -o blend.tsv
+  build/linux-x86_64-debug/gallium/drivers/llvmpipe/lp_test_blend -o blend.tsv
 
 
 Development Notes
 =================
 
+- When looking to this code by the first time start in lp_state_fs.c, and 
+  then skim through the lp_bld_* functions called in there, and the comments
+  at the top of the lp_bld_*.c functions.  
+
+- All lp_bld_*.[ch] are isolated from the rest of the driver, and could/may be 
+  put in a standalone Gallium state -> LLVM IR translation module.
+
 - We use LLVM-C bindings for now. They are not documented, but follow the C++
   interfaces very closely, and appear to be complete enough for code
   generation. See 
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index a9501b4765..84fd8fe95a 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -11,6 +11,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_alpha.c',
 		'lp_bld_arit.c',
 		'lp_bld_blend_aos.c',
+		'lp_bld_blend_logicop.c',
 		'lp_bld_blend_soa.c',
 		'lp_bld_const.c',
 		'lp_bld_conv.c',
@@ -23,7 +24,6 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_load.c',
 		'lp_bld_store.c',
 		'lp_bld_logic.c',
-		'lp_bld_logicop.c',
 		'lp_bld_swizzle.c',
 		'lp_bld_tgsi_soa.c',		
 		'lp_bld_type.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
index 87ba456065..c11a9398f8 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -28,7 +28,16 @@
 
 /**
  * @file
- * Blend LLVM IR generation -- AOS form.
+ * Blend LLVM IR generation -- AoS layout.
+ *
+ * AoS blending is in general much slower than SoA, but there are some cases
+ * where it might be faster. In particular, if a pixel is rendered only once
+ * then the overhead of tiling and untiling will dominate over the speedup that
+ * SoA gives. So we might want to detect such cases and fallback to AoS in the
+ * future, but for now this function is here for historical/benchmarking
+ * purposes.
+ *
+ * Run lp_blend_test after any change to this file.
  *
  * @author Jose Fonseca <jfonseca@vmware.com>
  */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c
new file mode 100644
index 0000000000..88321f62a2
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c
@@ -0,0 +1,108 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Blend LLVM IR generation -- logic ops.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "pipe/p_state.h"
+
+#include "lp_bld_blend.h"
+
+
+LLVMValueRef
+lp_build_logicop(LLVMBuilderRef builder,
+                 unsigned logicop_func,
+                 LLVMValueRef src,
+                 LLVMValueRef dst)
+{
+   LLVMTypeRef type;
+   LLVMValueRef res;
+
+   type = LLVMTypeOf(src);
+
+   switch (logicop_func) {
+   case PIPE_LOGICOP_CLEAR:
+      res = LLVMConstNull(type);
+      break;
+   case PIPE_LOGICOP_NOR:
+      res = LLVMBuildNot(builder, LLVMBuildOr(builder, src, dst, ""), "");
+      break;
+   case PIPE_LOGICOP_AND_INVERTED:
+      res = LLVMBuildAnd(builder, LLVMBuildNot(builder, src, ""), dst, "");
+      break;
+   case PIPE_LOGICOP_COPY_INVERTED:
+      res = LLVMBuildNot(builder, src, "");
+      break;
+   case PIPE_LOGICOP_AND_REVERSE:
+      res = LLVMBuildAnd(builder, src, LLVMBuildNot(builder, dst, ""), "");
+      break;
+   case PIPE_LOGICOP_INVERT:
+      res = LLVMBuildNot(builder, dst, "");
+      break;
+   case PIPE_LOGICOP_XOR:
+      res = LLVMBuildXor(builder, src, dst, "");
+      break;
+   case PIPE_LOGICOP_NAND:
+      res = LLVMBuildNot(builder, LLVMBuildAnd(builder, src, dst, ""), "");
+      break;
+   case PIPE_LOGICOP_AND:
+      res = LLVMBuildAnd(builder, src, dst, "");
+      break;
+   case PIPE_LOGICOP_EQUIV:
+      res = LLVMBuildNot(builder, LLVMBuildXor(builder, src, dst, ""), "");
+      break;
+   case PIPE_LOGICOP_NOOP:
+      res = dst;
+      break;
+   case PIPE_LOGICOP_OR_INVERTED:
+      res = LLVMBuildOr(builder, LLVMBuildNot(builder, src, ""), dst, "");
+      break;
+   case PIPE_LOGICOP_COPY:
+      res = src;
+      break;
+   case PIPE_LOGICOP_OR_REVERSE:
+      res = LLVMBuildOr(builder, src, LLVMBuildNot(builder, dst, ""), "");
+      break;
+   case PIPE_LOGICOP_OR:
+      res = LLVMBuildOr(builder, src, dst, "");
+      break;
+   case PIPE_LOGICOP_SET:
+      res = LLVMConstAllOnes(type);
+      break;
+   default:
+      assert(0);
+      res = src;
+   }
+
+   return res;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
index 73516fd81b..b92254a7d6 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
@@ -28,7 +28,41 @@
 
 /**
  * @file
- * Blend LLVM IR generation -- SoA.
+ * Blend LLVM IR generation -- SoA layout.
+ *
+ * Blending in SoA is much faster than AoS, especially when separate rgb/alpha
+ * factors/functions are used, since no channel masking/shuffling is necessary
+ * and we can achieve the full throughput of the SIMD operations. Furthermore
+ * the fragment shader output is also in SoA, so it fits nicely with the rest of
+ * the fragment pipeline.
+ *
+ * The drawback is that to be displayed the color buffer needs to be in AoS
+ * layout, so we need to tile/untile the color buffer before/after rendering.
+ * A color buffer like
+ *
+ *  R11 G11 B11 A11 R12 G12 B12 A12  R13 G13 B13 A13 R14 G14 B14 A14  ...
+ *  R21 G21 B21 A21 R22 G22 B22 A22  R23 G23 B23 A23 R24 G24 B24 A24  ...
+ *
+ *  R31 G31 B31 A31 R32 G32 B32 A32  R33 G33 B33 A33 R34 G34 B34 A34  ...
+ *  R41 G41 B41 A41 R42 G42 B42 A42  R43 G43 B43 A43 R44 G44 B44 A44  ...
+ *
+ *  ... ... ... ... ... ... ... ...  ... ... ... ... ... ... ... ...  ...
+ *
+ * will actually be stored in memory as
+ *
+ *  R11 R12 R21 R22 R13 R14 R23 R24 ... G11 G12 G21 G22 G13 G14 G23 G24 ... B11 B12 B21 B22 B13 B14 B23 B24 ... A11 A12 A21 A22 A13 A14 A23 A24 ...
+ *  R31 R32 R41 R42 R33 R34 R43 R44 ... G31 G32 G41 G42 G33 G34 G43 G44 ... B31 B32 B41 B42 B33 B34 B43 B44 ... A31 A32 A41 A42 A33 A34 A43 A44 ...
+ *  ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
+ *
+ * NOTE: Run lp_blend_test after any change to this file.
+ *
+ * You can also run lp_blend_test to obtain AoS vs SoA benchmarks. Invoking it
+ * as:
+ *
+ *  lp_blend_test -o blend.tsv
+ *
+ * will generate a tab-seperated-file with the test results and performance
+ * measurements.
  *
  * @author Jose Fonseca <jfonseca@vmware.com>
  */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
index 54d2e13d34..3a54272cbd 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
@@ -28,18 +28,34 @@
 
 /**
  * @file
- * Helper
+ * Helper functions for type conversions.
  *
- * LLVM IR doesn't support all basic arithmetic operations we care about (most
- * notably min/max and saturated operations), and it is often necessary to
- * resort machine-specific intrinsics directly. The functions here hide all
- * these implementation details from the other modules.
+ * We want to use the fastest type for a given computation whenever feasible.
+ * The other side of this is that we need to be able convert between several
+ * types accurately and efficiently.
  *
- * We also do simple expressions simplification here. Reasons are:
- * - it is very easy given we have all necessary information readily available
- * - LLVM optimization passes fail to simplify several vector expressions
- * - We often know value constraints which the optimization passes have no way
- *   of knowing, such as when source arguments are known to be in [0, 1] range.
+ * Conversion between types of different bit width is quite complex since a 
+ *
+ * To remember there are a few invariants in type conversions:
+ *
+ * - register width must remain constant:
+ *
+ *     src_type.width * src_type.length == dst_type.width * dst_type.length
+ *
+ * - total number of elements must remain constant:
+ *
+ *     src_type.length * num_srcs == dst_type.length * num_dsts
+ *
+ * It is not always possible to do the conversion both accurately and
+ * efficiently, usually due to lack of adequate machine instructions. In these
+ * cases it is important not to cut shortcuts here and sacrifice accuracy, as
+ * there this functions can be used anywhere. In the future we might have a
+ * precision parameter which can gauge the accuracy vs efficiency compromise,
+ * but for now if the data conversion between two stages happens to be the
+ * bottleneck, then most likely should just avoid converting at all and run
+ * both stages with the same type.
+ *
+ * Make sure to run lp_test_conv unit test after any change to this file.
  *
  * @author Jose Fonseca <jfonseca@vmware.com>
  */
@@ -55,6 +71,19 @@
 #include "lp_bld_conv.h"
 
 
+/**
+ * Special case for converting clamped IEEE-754 floats to unsigned norms.
+ *
+ * The mathematical voodoo below may seem excessive but it is actually
+ * paramount we do it this way for several reasons. First, there is no single
+ * precision FP to unsigned integer conversion Intel SSE instruction. Second,
+ * secondly, even if there was, since the FP's mantissa takes only a fraction
+ * of register bits the typically scale and cast approach would require double
+ * precision for accurate results, and therefore half the throughput
+ *
+ * Although the result values can be scaled to an arbitrary bit width specified
+ * by dst_width, the actual result type will have the same width.
+ */
 LLVMValueRef
 lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
                                         union lp_type src_type,
@@ -118,7 +147,7 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
 
 
 /**
- * Inverse of lp_build_clamped_float_to_unsigned_norm.
+ * Inverse of lp_build_clamped_float_to_unsigned_norm above.
  */
 LLVMValueRef
 lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
@@ -139,7 +168,6 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
 
    mantissa = lp_mantissa(dst_type);
 
-   /* We cannot carry more bits than the mantissa */
    n = MIN2(mantissa, src_width);
 
    ubound = ((unsigned long long)1 << n);
@@ -212,6 +240,12 @@ lp_build_const_pack_shuffle(unsigned n)
 }
 
 
+/**
+ * Expand the bit width.
+ *
+ * This will only change the number of bits the values are represented, not the
+ * values themselved.
+ */
 static void
 lp_build_expand(LLVMBuilderRef builder,
                union lp_type src_type,
@@ -270,9 +304,13 @@ lp_build_expand(LLVMBuilderRef builder,
 /**
  * Non-interleaved pack.
  *
- * lo =   __ l0 __ l1 __ l2 __..  __ ln
- * hi  =  __ h0 __ h1 __ h2 __..  __ hn
- * res =  l0 l1 l2 .. ln h0 h1 h2 .. hn
+ * This will move values as
+ *
+ *   lo =   __ l0 __ l1 __ l2 __..  __ ln
+ *   hi =   __ h0 __ h1 __ h2 __..  __ hn
+ *   res =  l0 l1 l2 .. ln h0 h1 h2 .. hn
+ *
+ * TODO: handle saturation consistently.
  */
 static LLVMValueRef
 lp_build_pack2(LLVMBuilderRef builder,
@@ -347,6 +385,11 @@ lp_build_pack2(LLVMBuilderRef builder,
 }
 
 
+/**
+ * Truncate the bit width.
+ *
+ * TODO: Handle saturation consistently.
+ */
 static LLVMValueRef
 lp_build_trunc(LLVMBuilderRef builder,
                union lp_type src_type,
@@ -392,13 +435,10 @@ lp_build_trunc(LLVMBuilderRef builder,
 
 
 /**
- * Convert between two SIMD types.
+ * Generic type conversion.
  *
- * Converting between SIMD types of different element width poses a problem:
- * SIMD registers have a fixed number of bits, so different element widths
- * imply different vector lengths. Therefore we must multiplex the multiple
- * incoming sources into a single destination vector, or demux a single incoming
- * vector into multiple vectors.
+ * TODO: Take a precision argument, or even better, add a new precision member
+ * to the lp_type union.
  */
 void
 lp_build_conv(LLVMBuilderRef builder,
@@ -605,7 +645,14 @@ lp_build_conv(LLVMBuilderRef builder,
 
 
 /**
- * Convenience wrapper around lp_build_conv for bit masks.
+ * Bit mask conversion.
+ *
+ * This will convert the integer masks that match the given types.
+ *
+ * The mask values should 0 or -1, i.e., all bits either set to zero or one.
+ * Any other value will likely cause in unpredictable results.
+ *
+ * This is basically a very trimmed down version of lp_build_conv.
  */
 void
 lp_build_conv_mask(LLVMBuilderRef builder,
@@ -621,6 +668,8 @@ lp_build_conv_mask(LLVMBuilderRef builder,
    assert(src_type.length * num_srcs == dst_type.length * num_dsts);
 
    /*
+    * Drop
+    *
     * We assume all values are 0 or -1
     */
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 118c7c5213..6018feda1c 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -26,8 +26,34 @@
  **************************************************************************/
 
 /**
+ * @file
  * Depth/stencil testing to LLVM IR translation.
  *
+ * To be done accurately/efficiently the depth/stencil test must be done with
+ * the same type/format of the depth/stencil buffer, which implies massaging
+ * the incoming depths to fit into place. Using a more straightforward
+ * type/format for depth/stencil values internally and only convert when
+ * flushing would avoid this, but it would most likely result in depth fighting
+ * artifacts.
+ *
+ * We are free to use a different pixel layout though. Since our basic
+ * processing unit is a quad (2x2 pixel block) we store the depth/stencil
+ * values tiled, a quad at time. That is, a depth buffer containing 
+ *
+ *  Z11 Z12 Z13 Z14 ...
+ *  Z21 Z22 Z23 Z24 ...
+ *  Z31 Z32 Z33 Z34 ...
+ *  Z41 Z42 Z43 Z44 ...
+ *  ... ... ... ... ...
+ *
+ * will actually be stored in memory as
+ *
+ *  Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ...
+ *  Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ...
+ *  ... ... ... ... ... ... ... ... ...
+ *
+ * FIXME: Code generate stencil test
+ *
  * @author Jose Fonseca <jfonseca@vmware.com>
  */
 
@@ -42,6 +68,9 @@
 #include "lp_bld_depth.h"
 
 
+/**
+ * Return a type appropriate for depth/stencil testing.
+ */
 union lp_type
 lp_depth_type(const struct util_format_description *format_desc,
               unsigned length)
@@ -79,6 +108,9 @@ lp_depth_type(const struct util_format_description *format_desc,
 }
 
 
+/**
+ * Depth test.
+ */
 void
 lp_build_depth_test(LLVMBuilderRef builder,
                     const struct pipe_depth_state *state,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
index a5de698ebb..5d2e042fcc 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
@@ -45,9 +45,6 @@ union lp_type;
 struct lp_build_mask_context;
 
 
-/**
- * Return a type appropriate for depth testing.
- */
 union lp_type
 lp_depth_type(const struct util_format_description *format_desc,
               unsigned length);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_intr.c b/src/gallium/drivers/llvmpipe/lp_bld_intr.c
index 4f03ce7d0a..42fd57fdf0 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_intr.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_intr.c
@@ -28,18 +28,17 @@
 
 /**
  * @file
- * Helper
+ * Helpers for emiting intrinsic calls.
  *
- * LLVM IR doesn't support all basic arithmetic operations we care about (most
- * notably min/max and saturated operations), and it is often necessary to
- * resort machine-specific intrinsics directly. The functions here hide all
- * these implementation details from the other modules.
+ * LLVM vanilla IR doesn't represent all basic arithmetic operations we care
+ * about, and it is often necessary to resort target-specific intrinsics for
+ * performance, convenience.
  *
- * We also do simple expressions simplification here. Reasons are:
- * - it is very easy given we have all necessary information readily available
- * - LLVM optimization passes fail to simplify several vector expressions
- * - We often know value constraints which the optimization passes have no way
- *   of knowing, such as when source arguments are known to be in [0, 1] range.
+ * Ideally we would like to stay away from target specific intrinsics and
+ * move all the instruction selection logic into upstream LLVM where it belongs.
+ *
+ * These functions are also used for calling C functions provided by us from
+ * generated LLVM code.
  *
  * @author Jose Fonseca <jfonseca@vmware.com>
  */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_intr.h b/src/gallium/drivers/llvmpipe/lp_bld_intr.h
index 1e8e0edd83..3608988dc4 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_intr.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_intr.h
@@ -40,6 +40,9 @@
 #include <llvm-c/Core.h>  
 
 
+/**
+ * Max number of arguments in an intrinsic.
+ */
 #define LP_MAX_FUNC_ARGS 32
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c
index 5b8efb0577..b99fa89be3 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c
@@ -25,6 +25,13 @@
  *
  **************************************************************************/
 
+/**
+ * @file
+ * Helper functions for logical operations.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
 
 #include "pipe/p_defines.h"
 #include "lp_bld_type.h"
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logicop.c b/src/gallium/drivers/llvmpipe/lp_bld_logicop.c
deleted file mode 100644
index f9202d1a83..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_bld_logicop.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include "pipe/p_state.h"
-
-#include "lp_bld_blend.h"
-
-
-LLVMValueRef
-lp_build_logicop(LLVMBuilderRef builder,
-                 unsigned logicop_func,
-                 LLVMValueRef src,
-                 LLVMValueRef dst)
-{
-   LLVMTypeRef type;
-   LLVMValueRef res;
-
-   type = LLVMTypeOf(src);
-
-   switch (logicop_func) {
-   case PIPE_LOGICOP_CLEAR:
-      res = LLVMConstNull(type);
-      break;
-   case PIPE_LOGICOP_NOR:
-      res = LLVMBuildNot(builder, LLVMBuildOr(builder, src, dst, ""), "");
-      break;
-   case PIPE_LOGICOP_AND_INVERTED:
-      res = LLVMBuildAnd(builder, LLVMBuildNot(builder, src, ""), dst, "");
-      break;
-   case PIPE_LOGICOP_COPY_INVERTED:
-      res = LLVMBuildNot(builder, src, "");
-      break;
-   case PIPE_LOGICOP_AND_REVERSE:
-      res = LLVMBuildAnd(builder, src, LLVMBuildNot(builder, dst, ""), "");
-      break;
-   case PIPE_LOGICOP_INVERT:
-      res = LLVMBuildNot(builder, dst, "");
-      break;
-   case PIPE_LOGICOP_XOR:
-      res = LLVMBuildXor(builder, src, dst, "");
-      break;
-   case PIPE_LOGICOP_NAND:
-      res = LLVMBuildNot(builder, LLVMBuildAnd(builder, src, dst, ""), "");
-      break;
-   case PIPE_LOGICOP_AND:
-      res = LLVMBuildAnd(builder, src, dst, "");
-      break;
-   case PIPE_LOGICOP_EQUIV:
-      res = LLVMBuildNot(builder, LLVMBuildXor(builder, src, dst, ""), "");
-      break;
-   case PIPE_LOGICOP_NOOP:
-      res = dst;
-      break;
-   case PIPE_LOGICOP_OR_INVERTED:
-      res = LLVMBuildOr(builder, LLVMBuildNot(builder, src, ""), dst, "");
-      break;
-   case PIPE_LOGICOP_COPY:
-      res = src;
-      break;
-   case PIPE_LOGICOP_OR_REVERSE:
-      res = LLVMBuildOr(builder, src, LLVMBuildNot(builder, dst, ""), "");
-      break;
-   case PIPE_LOGICOP_OR:
-      res = LLVMBuildOr(builder, src, dst, "");
-      break;
-   case PIPE_LOGICOP_SET:
-      res = LLVMConstAllOnes(type);
-      break;
-   default:
-      assert(0);
-      res = src;
-   }
-
-   return res;
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
index 5204a851d6..27ca9b0edc 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
@@ -25,6 +25,13 @@
  *
  **************************************************************************/
 
+/**
+ * @file
+ * Helper functions for swizzling/shuffling.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
 
 #include "util/u_debug.h"
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
index 7a4aa88382..d7dd6a8a60 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
@@ -27,7 +27,7 @@
 
 /**
  * @file
- * Helper functions for constant building.
+ * Helper functions for swizzling/shuffling.
  *
  * @author Jose Fonseca <jfonseca@vmware.com>
  */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
index e77cf26de3..8aaf494d2b 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
@@ -25,6 +25,13 @@
  *
  **************************************************************************/
 
+/**
+ * @file
+ * TGSI to LLVM IR translation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
 #ifndef LP_BLD_TGSI_H
 #define LP_BLD_TGSI_H
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index d35c8c6b7b..a37776aa7f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -26,6 +26,16 @@
  * 
  **************************************************************************/
 
+/**
+ * @file
+ * TGSI to LLVM IR translation -- SoA.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ *
+ * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
+ * Brian Paul, and others.
+ */
+
 #include "pipe/p_config.h"
 #include "pipe/p_shader_tokens.h"
 #include "util/u_debug.h"
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 5bd0d7b982..a9b2d48244 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -26,6 +26,38 @@
  * 
  **************************************************************************/
 
+/**
+ * @file
+ * Code generate the whole fragment pipeline.
+ *
+ * The fragment pipeline consists of the following stages:
+ * - stipple (TBI)
+ * - early depth test
+ * - fragment shader
+ * - alpha test
+ * - depth/stencil test (stencil TBI)
+ * - blending
+ *
+ * This file has only the glue to assembly the fragment pipeline.  The actual
+ * plumbing of converting Gallium state into LLVM IR is done elsewhere, in the
+ * lp_bld_*.[ch] files, and in a complete generic and reusable way. Here we
+ * muster the LLVM JIT execution engine to create a function that follows an
+ * established binary interface and that can be called from C directly.
+ *
+ * A big source of complexity here is that we often want to run different
+ * stages with different precisions and data types and precisions. For example,
+ * the fragment shader needs typically to be done in floats, but the
+ * depth/stencil test and blending is better done in the type that most closely
+ * matches the depth/stencil and color buffer respectively.
+ *
+ * Since the width of a SIMD vector register stays the same regardless of the
+ * element type, different types imply different number of elements, so we must
+ * code generate more instances of the stages with larger types to be able to
+ * feed/consume the stages with smaller types.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
 #include "pipe/p_defines.h"
 #include "util/u_memory.h"
 #include "util/u_format.h"
@@ -56,6 +88,14 @@ static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
 static const unsigned char quad_offset_y[4] = {0, 0, 1, 1};
 
 
+/**
+ * Generate the position vectors.
+ *
+ * TODO: This should be called only once per fragment pipeline, for the first
+ * quad, and the neighboring quad positions obtained by additions.
+ *
+ * Parameter x, y are the integer values with the quad upper left coordinates.
+ */
 static void
 generate_pos(LLVMBuilderRef builder,
              LLVMValueRef x,
@@ -74,6 +114,11 @@ generate_pos(LLVMBuilderRef builder,
    unsigned chan;
    unsigned i;
 
+   /*
+    * Derive from the quad's upper left scalar coordinates the coordinates for
+    * all other quad pixels
+    */
+
    x = lp_build_broadcast(builder, int_vec_type, x);
    y = lp_build_broadcast(builder, int_vec_type, y);
 
@@ -91,6 +136,10 @@ generate_pos(LLVMBuilderRef builder,
    pos[0] = x;
    pos[1] = y;
 
+   /* 
+    * Calculate z and w from the interpolation factors.
+    */
+
    for(chan = 2; chan < NUM_CHANNELS; ++chan) {
       LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
       LLVMValueRef a0   = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr,   &index, 1, ""), "");
@@ -111,6 +160,9 @@ generate_pos(LLVMBuilderRef builder,
 }
 
 
+/**
+ * Generate the depth test.
+ */
 static void
 generate_depth(struct llvmpipe_context *lp,
                LLVMBuilderRef builder,
@@ -129,8 +181,10 @@ generate_depth(struct llvmpipe_context *lp,
    format_desc = util_format_description(lp->framebuffer.zsbuf->format);
    assert(format_desc);
 
+   /* Pick the depth type. */
    dst_type = lp_depth_type(format_desc, src_type.width*src_type.length);
 
+   /* FIXME: Cope with a depth test type with a different bit width. */
    assert(dst_type.width == src_type.width);
    assert(dst_type.length == src_type.length);
 
@@ -154,7 +208,7 @@ generate_depth(struct llvmpipe_context *lp,
 
 
 /**
- * Generate the fragment shader, depth/stencil and alpha tests.
+ * Generate the fragment shader, depth/stencil test, and alpha tests.
  */
 static void
 generate_fs(struct llvmpipe_context *lp,
@@ -258,10 +312,7 @@ generate_fs(struct llvmpipe_context *lp,
 
 
 /**
- * Generate blending code according to blend->base state.
- * The blend function will look like:
- *    blend(mask, src_color, constant color, dst_color)
- * dst_color will be modified and contain the result of the blend func.
+ * Generate color blending and color output.
  */
 static void
 generate_blend(const struct pipe_blend_state *blend,
@@ -310,6 +361,9 @@ generate_blend(const struct pipe_blend_state *blend,
 }
 
 
+/**
+ * Generate the runtime callable function for the whole fragment pipeline.
+ */
 static struct lp_fragment_shader_variant *
 generate_fragment(struct llvmpipe_context *lp,
                   struct lp_fragment_shader *shader,
@@ -379,6 +433,9 @@ generate_fragment(struct llvmpipe_context *lp,
    variant->shader = shader;
    memcpy(&variant->key, key, sizeof *key);
 
+   /* TODO: actually pick these based on the fs and color buffer
+    * characteristics. */
+
    fs_type.value = 0;
    fs_type.floating = TRUE; /* floating point values */
    fs_type.sign = TRUE;     /* values are signed */
@@ -394,6 +451,11 @@ generate_fragment(struct llvmpipe_context *lp,
    blend_type.width = 8;        /* 8-bit ubyte values */
    blend_type.length = 16;      /* 16 elements per vector */
 
+   /* 
+    * Generate the function prototype. Any change here must be reflected in
+    * lp_state.h's lp_shader_fs_func function pointer type, and vice-versa.
+    */
+
    fs_elem_type = lp_build_elem_type(fs_type);
    fs_vec_type = lp_build_vec_type(fs_type);
    fs_int_vec_type = lp_build_int_vec_type(fs_type);
@@ -442,6 +504,10 @@ generate_fragment(struct llvmpipe_context *lp,
    lp_build_name(depth_ptr, "depth");
    lp_build_name(samplers_ptr, "samplers");
 
+   /*
+    * Function body
+    */
+
    block = LLVMAppendBasicBlock(variant->function, "entry");
    builder = LLVMCreateBuilder();
    LLVMPositionBuilderAtEnd(builder, block);
@@ -479,6 +545,10 @@ generate_fragment(struct llvmpipe_context *lp,
          fs_out_color[chan][i] = out_color[chan];
    }
 
+   /* 
+    * Convert the fs's output color and mask to fit to the blending type. 
+    */
+
    for(chan = 0; chan < NUM_CHANNELS; ++chan) {
       lp_build_conv(builder, fs_type, blend_type,
                     fs_out_color[chan], num_fs,
@@ -490,6 +560,10 @@ generate_fragment(struct llvmpipe_context *lp,
                                fs_mask, num_fs,
                                &blend_mask, 1);
 
+   /*
+    * Blending.
+    */
+
    generate_blend(&key->blend,
                   builder,
                   blend_type,
@@ -498,10 +572,14 @@ generate_fragment(struct llvmpipe_context *lp,
                   NULL /* FIXME: blend_const_color */,
                   color_ptr);
 
-   LLVMBuildRetVoid(builder);;
+   LLVMBuildRetVoid(builder);
 
    LLVMDisposeBuilder(builder);
 
+   /*
+    * Translate the LLVM IR into machine code.
+    */
+
    LLVMRunFunctionPassManager(screen->pass, variant->function);
 
 #ifdef DEBUG
@@ -514,6 +592,9 @@ generate_fragment(struct llvmpipe_context *lp,
       abort();
    }
 
+   /* Tell where the fetch_texel function is, if the shader refers to it.
+    * TODO: this should be done elsewhere.
+    */
    fetch_texel = LLVMGetNamedFunction(screen->module, "fetch_texel");
    if(fetch_texel) {
       static boolean first_time = TRUE;
@@ -616,12 +697,20 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
 }
 
 
-void llvmpipe_update_fs(struct llvmpipe_context *lp)
+void 
+llvmpipe_update_fs(struct llvmpipe_context *lp)
 {
    struct lp_fragment_shader *shader = lp->fs;
    struct lp_fragment_shader_variant_key key;
    struct lp_fragment_shader_variant *variant;
 
+   /* We need to generate several variants of the fragment pipeline to match
+    * all the combinations of the contributing state atoms.
+    *
+    * TODO: there is actually no reason to tie this to context state -- the
+    * generated code could be cached globally in the screen.
+    */
+
    memset(&key, 0, sizeof key);
    memcpy(&key.depth, &lp->depth_stencil->depth, sizeof &key.depth);
    memcpy(&key.alpha, &lp->depth_stencil->alpha, sizeof &key.alpha);
-- 
cgit v1.2.3


From 4d2b0eb19e432f83845a55d552b5d1d61f040459 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 22 Aug 2009 23:01:58 +0100
Subject: llvmpipe: Squash the quad pipeline.

It had been reduced to one fixed stage.
---
 src/gallium/drivers/llvmpipe/Makefile           |   2 -
 src/gallium/drivers/llvmpipe/SConscript         |   2 -
 src/gallium/drivers/llvmpipe/lp_context.c       |  28 ++-
 src/gallium/drivers/llvmpipe/lp_context.h       |  12 +-
 src/gallium/drivers/llvmpipe/lp_quad_fs.c       | 222 ------------------------
 src/gallium/drivers/llvmpipe/lp_quad_pipe.c     |  38 ----
 src/gallium/drivers/llvmpipe/lp_quad_pipe.h     |  62 -------
 src/gallium/drivers/llvmpipe/lp_setup.c         |  88 +++++++++-
 src/gallium/drivers/llvmpipe/lp_state_derived.c |   4 -
 src/gallium/drivers/llvmpipe/lp_state_surface.c |  13 ++
 10 files changed, 120 insertions(+), 351 deletions(-)
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_quad_fs.c
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_quad_pipe.c
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_quad_pipe.h

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 102227f0f8..9290720aaf 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -30,8 +30,6 @@ C_SOURCES = \
 	lp_prim_setup.c \
 	lp_prim_vbuf.c \
 	lp_setup.c \
-	lp_quad_pipe.c \
-	lp_quad_fs.c \
 	lp_query.c \
 	lp_screen.c \
 	lp_state_blend.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 84fd8fe95a..4161edb5cf 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -34,8 +34,6 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_prim_setup.c',
 		'lp_prim_vbuf.c',
 		'lp_setup.c',
-		'lp_quad_pipe.c',
-		'lp_quad_fs.c',
 		'lp_query.c',
 		'lp_screen.c',
 		'lp_state_blend.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index b9fd681e73..233d1df0e1 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -55,11 +55,24 @@
 void
 llvmpipe_map_transfers(struct llvmpipe_context *lp)
 {
+   struct pipe_screen *screen = lp->pipe.screen;
+   struct pipe_surface *zsbuf = lp->framebuffer.zsbuf;
    unsigned i;
 
    for (i = 0; i < lp->framebuffer.nr_cbufs; i++) {
       lp_tile_cache_map_transfers(lp->cbuf_cache[i]);
    }
+
+   if(zsbuf) {
+      if(!lp->zsbuf_transfer)
+         lp->zsbuf_transfer = screen->get_tex_transfer(screen, zsbuf->texture,
+                                                       zsbuf->face, zsbuf->level, zsbuf->zslice,
+                                                       PIPE_TRANSFER_READ_WRITE,
+                                                       0, 0, zsbuf->width, zsbuf->height);
+      if(lp->zsbuf_transfer && !lp->zsbuf_map)
+         lp->zsbuf_map = screen->transfer_map(screen, lp->zsbuf_transfer);
+
+   }
 }
 
 
@@ -74,6 +87,15 @@ llvmpipe_unmap_transfers(struct llvmpipe_context *lp)
    for (i = 0; i < lp->framebuffer.nr_cbufs; i++) {
       lp_tile_cache_unmap_transfers(lp->cbuf_cache[i]);
    }
+
+   if(lp->zsbuf_transfer) {
+      struct pipe_screen *screen = lp->pipe.screen;
+
+      if(lp->zsbuf_map) {
+         screen->transfer_unmap(screen, lp->zsbuf_transfer);
+         lp->zsbuf_map = NULL;
+      }
+   }
 }
 
 
@@ -85,8 +107,6 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
    if (llvmpipe->draw)
       draw_destroy( llvmpipe->draw );
 
-      llvmpipe->quad.shade->destroy( llvmpipe->quad.shade );
-
    for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
       lp_destroy_tile_cache(llvmpipe->cbuf_cache[i]);
 
@@ -205,7 +225,6 @@ llvmpipe_create( struct pipe_screen *screen )
 
    /*
     * Alloc caches for accessing drawing surfaces and textures.
-    * Must be before quad stage setup!
     */
    for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
       llvmpipe->cbuf_cache[i] = lp_create_tile_cache( screen );
@@ -214,9 +233,6 @@ llvmpipe_create( struct pipe_screen *screen )
       llvmpipe->tex_cache[i] = lp_create_tex_tile_cache( screen );
 
 
-   /* setup quad rendering stages */
-      llvmpipe->quad.shade = lp_quad_shade_stage(llvmpipe);
-
    /* vertex shader samplers */
    for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
       llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples;
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 9de21d0cd0..1d0896a568 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -35,7 +35,6 @@
 
 #include "draw/draw_vertex.h"
 
-#include "lp_quad_pipe.h"
 #include "lp_tex_sample.h"
 
 
@@ -114,13 +113,6 @@ struct llvmpipe_context {
 
    unsigned line_stipple_counter;
 
-   /** Software quad rendering pipeline */
-   struct {
-      struct quad_stage *shade;
-
-      struct quad_stage *first; /**< points to one of the above stages */
-   } quad;
-
    /** TGSI exec things */
    struct {
       struct lp_shader_sampler vert_samplers[PIPE_MAX_SAMPLERS];
@@ -139,6 +131,10 @@ struct llvmpipe_context {
    
    struct llvmpipe_tile_cache *cbuf_cache[PIPE_MAX_COLOR_BUFS];
    
+   /* TODO: we shouldn't be using external interfaces internally like this */
+   struct pipe_transfer *zsbuf_transfer;
+   uint8_t *zsbuf_map;
+
    unsigned tex_timestamp;
    struct llvmpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS];
 
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_fs.c b/src/gallium/drivers/llvmpipe/lp_quad_fs.c
deleted file mode 100644
index 9ead0864a6..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_quad_fs.c
+++ /dev/null
@@ -1,222 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008-2009 VMware, Inc.
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/* Vertices are just an array of floats, with all the attributes
- * packed.  We currently assume a layout like:
- *
- * attr[0][0..3] - window position
- * attr[1..n][0..3] - remaining attributes.
- *
- * Attributes are assumed to be 4 floats wide but are packed so that
- * all the enabled attributes run contiguously.
- */
-
-#include "util/u_math.h"
-#include "util/u_memory.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_screen.h"
-
-#include "lp_context.h"
-#include "lp_state.h"
-#include "lp_quad.h"
-#include "lp_quad_pipe.h"
-#include "lp_texture.h"
-#include "lp_tile_cache.h"
-#include "lp_tile_soa.h"
-
-
-struct quad_shade_stage
-{
-   struct quad_stage stage;  /**< base class */
-
-   struct pipe_transfer *transfer;
-   uint8_t *map;
-};
-
-
-/** cast wrapper */
-static INLINE struct quad_shade_stage *
-quad_shade_stage(struct quad_stage *qs)
-{
-   return (struct quad_shade_stage *) qs;
-}
-
-
-
-/**
- * Execute fragment shader for the four fragments in the quad.
- */
-static void
-shade_quads(struct quad_stage *qs,
-                 struct quad_header *quads[],
-                 unsigned nr)
-{
-   struct quad_shade_stage *qss = quad_shade_stage( qs );
-   struct llvmpipe_context *llvmpipe = qs->llvmpipe;
-   struct lp_fragment_shader *fs = llvmpipe->fs;
-   void *constants;
-   struct tgsi_sampler **samplers;
-   struct quad_header *quad = quads[0];
-   const unsigned x = quad->input.x0;
-   const unsigned y = quad->input.y0;
-   uint8_t *tile = lp_get_cached_tile(llvmpipe->cbuf_cache[0], x, y);
-   uint8_t *color;
-   void *depth;
-   uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS];
-   unsigned chan_index;
-   unsigned q;
-
-   assert(fs->current);
-   if(!fs->current)
-      return;
-
-   /* Sanity checks */
-   assert(nr * QUAD_SIZE == TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH);
-   assert(x % TILE_VECTOR_WIDTH == 0);
-   assert(y % TILE_VECTOR_HEIGHT == 0);
-   for (q = 0; q < nr; ++q) {
-      assert(quads[q]->input.x0 == x + q*2);
-      assert(quads[q]->input.y0 == y);
-   }
-
-   /* mask */
-   for (q = 0; q < 4; ++q)
-      for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index)
-         mask[q][chan_index] = quads[q]->inout.mask & (1 << chan_index) ? ~0 : 0;
-
-   /* color buffer */
-   color = &TILE_PIXEL(tile, x & (TILE_SIZE-1), y & (TILE_SIZE-1), 0);
-
-   /* depth buffer */
-   if(qss->map) {
-      assert((x % 2) == 0);
-      assert((y % 2) == 0);
-      depth = qss->map +
-              y*qss->transfer->stride +
-              2*x*qss->transfer->block.size;
-   }
-   else
-      depth = NULL;
-
-   constants = llvmpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
-   samplers = (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list;
-   /* TODO: blend color */
-
-   assert((((uintptr_t)mask) & 0xf) == 0);
-   assert((((uintptr_t)depth) & 0xf) == 0);
-   assert((((uintptr_t)color) & 0xf) == 0);
-   assert((((uintptr_t)llvmpipe->blend_color) & 0xf) == 0);
-
-   /* run shader */
-   fs->current->jit_function( x,
-                              y,
-                              quad->coef->a0,
-                              quad->coef->dadx,
-                              quad->coef->dady,
-                              constants,
-                              &mask[0][0],
-                              color,
-                              depth,
-                              samplers);
-}
-
-
-
-/**
- * Per-primitive (or per-begin?) setup
- */
-static void
-shade_begin(struct quad_stage *qs)
-{
-   struct quad_shade_stage *qss = quad_shade_stage( qs );
-   struct llvmpipe_context *llvmpipe = qs->llvmpipe;
-   struct pipe_screen *screen = llvmpipe->pipe.screen;
-   struct pipe_surface *zsbuf = llvmpipe->framebuffer.zsbuf;
-
-   if(qss->transfer) {
-      if(qss->map) {
-         screen->transfer_unmap(screen, qss->transfer);
-         qss->map = NULL;
-      }
-
-      screen->tex_transfer_destroy(qss->transfer);
-      qss->transfer = NULL;
-   }
-
-   if(zsbuf) {
-      qss->transfer = screen->get_tex_transfer(screen, zsbuf->texture,
-                                               zsbuf->face, zsbuf->level, zsbuf->zslice,
-                                               PIPE_TRANSFER_READ_WRITE,
-                                               0, 0, zsbuf->width, zsbuf->height);
-      if(qss->transfer)
-         qss->map = screen->transfer_map(screen, qss->transfer);
-
-   }
-
-}
-
-
-static void
-shade_destroy(struct quad_stage *qs)
-{
-   struct quad_shade_stage *qss = quad_shade_stage( qs );
-   struct llvmpipe_context *llvmpipe = qs->llvmpipe;
-   struct pipe_screen *screen = llvmpipe->pipe.screen;
-
-   if(qss->transfer) {
-      if(qss->map) {
-         screen->transfer_unmap(screen, qss->transfer);
-         qss->map = NULL;
-      }
-
-      screen->tex_transfer_destroy(qss->transfer);
-      qss->transfer = NULL;
-   }
-
-   align_free( qs );
-}
-
-
-struct quad_stage *
-lp_quad_shade_stage( struct llvmpipe_context *llvmpipe )
-{
-   struct quad_shade_stage *qss;
-
-   qss = align_malloc(sizeof(struct quad_shade_stage), 16);
-   if (!qss)
-      return NULL;
-
-   memset(qss, 0, sizeof *qss);
-
-   qss->stage.llvmpipe = llvmpipe;
-   qss->stage.begin = shade_begin;
-   qss->stage.run = shade_quads;
-   qss->stage.destroy = shade_destroy;
-
-   return &qss->stage;
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_pipe.c b/src/gallium/drivers/llvmpipe/lp_quad_pipe.c
deleted file mode 100644
index e672dc9e03..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_quad_pipe.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#include "lp_context.h"
-#include "lp_state.h"
-#include "pipe/p_shader_tokens.h"
-
-void
-lp_build_quad_pipeline(struct llvmpipe_context *lp)
-{
-   lp->quad.first = lp->quad.shade;
-}
-
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_pipe.h b/src/gallium/drivers/llvmpipe/lp_quad_pipe.h
deleted file mode 100644
index ff4747f33f..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_quad_pipe.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/* Authors:  Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#ifndef LP_QUAD_PIPE_H
-#define LP_QUAD_PIPE_H
-
-
-struct llvmpipe_context;
-struct quad_header;
-
-
-/**
- * Fragment processing is performed on 2x2 blocks of pixels called "quads".
- * Quad processing is performed with a pipeline of stages represented by
- * this type.
- */
-struct quad_stage {
-   struct llvmpipe_context *llvmpipe;
-
-   struct quad_stage *next;
-
-   void (*begin)(struct quad_stage *qs);
-
-   /** the stage action */
-   void (*run)(struct quad_stage *qs, struct quad_header *quad[], unsigned nr);
-
-   void (*destroy)(struct quad_stage *qs);
-};
-
-
-struct quad_stage *lp_quad_shade_stage( struct llvmpipe_context *llvmpipe );
-
-void lp_build_quad_pipeline(struct llvmpipe_context *lp);
-
-#endif /* LP_QUAD_PIPE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 84e1963c44..421cccd302 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -35,7 +35,6 @@
 #include "lp_context.h"
 #include "lp_prim_setup.h"
 #include "lp_quad.h"
-#include "lp_quad_pipe.h"
 #include "lp_setup.h"
 #include "lp_state.h"
 #include "draw/draw_context.h"
@@ -45,6 +44,7 @@
 #include "pipe/p_thread.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "lp_tile_cache.h"
 #include "lp_tile_soa.h"
 
 
@@ -111,6 +111,83 @@ struct setup_context {
 
 
+/**
+ * Execute fragment shader for the four fragments in the quad.
+ */
+static void
+shade_quads(struct llvmpipe_context *llvmpipe,
+            struct quad_header *quads[],
+            unsigned nr)
+{
+   struct lp_fragment_shader *fs = llvmpipe->fs;
+   void *constants;
+   struct tgsi_sampler **samplers;
+   struct quad_header *quad = quads[0];
+   const unsigned x = quad->input.x0;
+   const unsigned y = quad->input.y0;
+   uint8_t *tile = lp_get_cached_tile(llvmpipe->cbuf_cache[0], x, y);
+   uint8_t *color;
+   void *depth;
+   uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS];
+   unsigned chan_index;
+   unsigned q;
+
+   assert(fs->current);
+   if(!fs->current)
+      return;
+
+   /* Sanity checks */
+   assert(nr * QUAD_SIZE == TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH);
+   assert(x % TILE_VECTOR_WIDTH == 0);
+   assert(y % TILE_VECTOR_HEIGHT == 0);
+   for (q = 0; q < nr; ++q) {
+      assert(quads[q]->input.x0 == x + q*2);
+      assert(quads[q]->input.y0 == y);
+   }
+
+   /* mask */
+   for (q = 0; q < 4; ++q)
+      for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index)
+         mask[q][chan_index] = quads[q]->inout.mask & (1 << chan_index) ? ~0 : 0;
+
+   /* color buffer */
+   color = &TILE_PIXEL(tile, x & (TILE_SIZE-1), y & (TILE_SIZE-1), 0);
+
+   /* depth buffer */
+   if(llvmpipe->zsbuf_map) {
+      assert((x % 2) == 0);
+      assert((y % 2) == 0);
+      depth = llvmpipe->zsbuf_map +
+              y*llvmpipe->zsbuf_transfer->stride +
+              2*x*llvmpipe->zsbuf_transfer->block.size;
+   }
+   else
+      depth = NULL;
+
+   constants = llvmpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
+   samplers = (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list;
+   /* TODO: blend color */
+
+   assert((((uintptr_t)mask) & 0xf) == 0);
+   assert((((uintptr_t)depth) & 0xf) == 0);
+   assert((((uintptr_t)color) & 0xf) == 0);
+   assert((((uintptr_t)llvmpipe->blend_color) & 0xf) == 0);
+
+   /* run shader */
+   fs->current->jit_function( x,
+                              y,
+                              quad->coef->a0,
+                              quad->coef->dadx,
+                              quad->coef->dady,
+                              constants,
+                              &mask[0][0],
+                              color,
+                              depth,
+                              samplers);
+}
+
+
+
 
 /**
  * Do triangle cull test using tri determinant (sign indicates orientation)
@@ -217,9 +294,9 @@ clip_emit_quad( struct setup_context *setup, struct quad_header *quad )
          quad_ptrs[i] = &quads[i];
       }
 
-      lp->quad.first->run( lp->quad.first, quad_ptrs, nr_quads );
+      shade_quads( lp, quad_ptrs, nr_quads );
 #else
-      lp->quad.first->run( lp->quad.first, &quad, 1 );
+      shade_quads( lp, &quad, 1 );
 #endif
    }
 }
@@ -235,7 +312,6 @@ static void flush_spans( struct setup_context *setup )
    const int xleft1 = setup->span.left[1];
    const int xright0 = setup->span.right[0];
    const int xright1 = setup->span.right[1];
-   struct quad_stage *pipe = setup->llvmpipe->quad.first;
 
 
    int minleft = block_x(MIN2(xleft0, xleft1));
@@ -275,7 +351,7 @@ static void flush_spans( struct setup_context *setup )
          }
          assert(!(mask0 | mask1));
 
-         pipe->run( pipe, setup->quad_ptrs, nr_quads );
+         shade_quads(setup->llvmpipe, setup->quad_ptrs, nr_quads );
       }
    }
 
@@ -1365,8 +1441,6 @@ void llvmpipe_setup_prepare( struct setup_context *setup )
       llvmpipe_update_derived(lp);
    }
 
-   lp->quad.first->begin( lp->quad.first );
-
    if (lp->reduced_api_prim == PIPE_PRIM_TRIANGLES &&
        lp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL &&
        lp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) {
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index 9f5d2ffb11..5f800eb17f 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -252,9 +252,5 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
       llvmpipe_update_fs( llvmpipe );
 
 
-   if (llvmpipe->dirty & (LP_NEW_FRAMEBUFFER |
-                          LP_NEW_FS))
-      lp_build_quad_pipeline(llvmpipe);
-
    llvmpipe->dirty = 0;
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c
index 00c61c47ef..177a26b7b1 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_surface.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c
@@ -67,6 +67,19 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe,
 
    /* zbuf changing? */
    if (lp->framebuffer.zsbuf != fb->zsbuf) {
+
+      if(lp->zsbuf_transfer) {
+         struct pipe_screen *screen = pipe->screen;
+
+         if(lp->zsbuf_map) {
+            screen->transfer_unmap(screen, lp->zsbuf_transfer);
+            lp->zsbuf_map = NULL;
+         }
+
+         screen->tex_transfer_destroy(lp->zsbuf_transfer);
+         lp->zsbuf_transfer = NULL;
+      }
+
       /* assign new */
       lp->framebuffer.zsbuf = fb->zsbuf;
 
-- 
cgit v1.2.3


From 08dd41fd6899bd6b3289d30dc31f8b2998406889 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 23 Aug 2009 05:52:20 +0100
Subject: llvmpipe: Centralize the C <-> JIT interfaces in one place.

---
 src/gallium/drivers/llvmpipe/Makefile      |  1 +
 src/gallium/drivers/llvmpipe/SConscript    |  1 +
 src/gallium/drivers/llvmpipe/lp_jit.c      | 77 ++++++++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_jit.h      | 67 ++++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_screen.c   | 30 ++----------
 src/gallium/drivers/llvmpipe/lp_state.h    | 16 +------
 src/gallium/drivers/llvmpipe/lp_state_fs.c |  4 +-
 7 files changed, 153 insertions(+), 43 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_jit.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_jit.h

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 9290720aaf..5603f06b39 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -27,6 +27,7 @@ C_SOURCES = \
 	lp_context.c \
 	lp_draw_arrays.c \
 	lp_flush.c \
+	lp_jit.c \
 	lp_prim_setup.c \
 	lp_prim_vbuf.c \
 	lp_setup.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 4161edb5cf..ed23660a68 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -31,6 +31,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_context.c',
 		'lp_draw_arrays.c',
 		'lp_flush.c',
+		'lp_jit.c',
 		'lp_prim_setup.c',
 		'lp_prim_vbuf.c',
 		'lp_setup.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
new file mode 100644
index 0000000000..c3ba03a5a1
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -0,0 +1,77 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * C - JIT interfaces
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include <llvm-c/Transforms/Scalar.h>
+
+#include "lp_screen.h"
+#include "lp_jit.h"
+
+
+void
+lp_jit_screen_cleanup(struct llvmpipe_screen *screen)
+{
+   if(screen->engine)
+      LLVMDisposeExecutionEngine(screen->engine);
+
+   if(screen->pass)
+      LLVMDisposePassManager(screen->pass);
+}
+
+
+void
+lp_jit_screen_init(struct llvmpipe_screen *screen)
+{
+   char *error = NULL;
+
+   screen->module = LLVMModuleCreateWithName("llvmpipe");
+
+   screen->provider = LLVMCreateModuleProviderForExistingModule(screen->module);
+
+   if (LLVMCreateJITCompiler(&screen->engine, screen->provider, 1, &error)) {
+      fprintf(stderr, "%s\n", error);
+      LLVMDisposeMessage(error);
+      abort();
+   }
+
+   screen->pass = LLVMCreateFunctionPassManager(screen->provider);
+   LLVMAddTargetData(LLVMGetExecutionEngineTargetData(screen->engine), screen->pass);
+   /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+    * but there are more on SVN. */
+   LLVMAddConstantPropagationPass(screen->pass);
+   LLVMAddInstructionCombiningPass(screen->pass);
+   LLVMAddPromoteMemoryToRegisterPass(screen->pass);
+   LLVMAddGVNPass(screen->pass);
+   LLVMAddCFGSimplificationPass(screen->pass);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
new file mode 100644
index 0000000000..03ab268d0c
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -0,0 +1,67 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * C - JIT interfaces
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#ifndef LP_JIT_H
+#define LP_JIT_H
+
+
+#include <llvm-c/Core.h>
+
+
+struct tgsi_sampler;
+struct llvmpipe_screen;
+
+
+typedef void
+(*lp_jit_frag_func)(uint32_t x,
+                    uint32_t y,
+                    const void *a0,
+                    const void *dadx,
+                    const void *dady,
+                    const void *consts,
+                    uint32_t *mask,
+                    void *color,
+                    void *depth,
+                    struct tgsi_sampler **samplers);
+
+
+void
+lp_jit_screen_cleanup(struct llvmpipe_screen *screen);
+
+
+void
+lp_jit_screen_init(struct llvmpipe_screen *screen);
+
+
+#endif /* LP_JIT_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 750573a4a9..f302b99ad7 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -26,8 +26,6 @@
  **************************************************************************/
 
 
-#include <llvm-c/Transforms/Scalar.h>
-
 #include "util/u_memory.h"
 #include "util/u_simple_screen.h"
 #include "pipe/internal/p_winsys_screen.h"
@@ -36,6 +34,7 @@
 
 #include "lp_texture.h"
 #include "lp_winsys.h"
+#include "lp_jit.h"
 #include "lp_screen.h"
 
 
@@ -162,11 +161,7 @@ llvmpipe_destroy_screen( struct pipe_screen *_screen )
 
    struct pipe_winsys *winsys = _screen->winsys;
 
-   if(screen->engine)
-      LLVMDisposeExecutionEngine(screen->engine);
-
-   if(screen->pass)
-      LLVMDisposePassManager(screen->pass);
+   lp_jit_screen_cleanup(screen);
 
    if(winsys->destroy)
       winsys->destroy(winsys);
@@ -184,7 +179,6 @@ struct pipe_screen *
 llvmpipe_create_screen(struct pipe_winsys *winsys)
 {
    struct llvmpipe_screen *screen = CALLOC_STRUCT(llvmpipe_screen);
-   char *error = NULL;
 
    if (!screen)
       return NULL;
@@ -202,25 +196,7 @@ llvmpipe_create_screen(struct pipe_winsys *winsys)
    llvmpipe_init_screen_texture_funcs(&screen->base);
    u_simple_screen_init(&screen->base);
 
-   screen->module = LLVMModuleCreateWithName("llvmpipe");
-
-   screen->provider = LLVMCreateModuleProviderForExistingModule(screen->module);
-
-   if (LLVMCreateJITCompiler(&screen->engine, screen->provider, 1, &error)) {
-      fprintf(stderr, "%s\n", error);
-      LLVMDisposeMessage(error);
-      abort();
-   }
-
-   screen->pass = LLVMCreateFunctionPassManager(screen->provider);
-   LLVMAddTargetData(LLVMGetExecutionEngineTargetData(screen->engine), screen->pass);
-   /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
-    * but there are more on SVN. */
-   LLVMAddConstantPropagationPass(screen->pass);
-   LLVMAddInstructionCombiningPass(screen->pass);
-   LLVMAddPromoteMemoryToRegisterPass(screen->pass);
-   LLVMAddGVNPass(screen->pass);
-   LLVMAddCFGSimplificationPass(screen->pass);
+   lp_jit_screen_init(screen);
 
    return &screen->base;
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index 2b1f2e452d..fb10329887 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -35,6 +35,7 @@
 
 #include "pipe/p_state.h"
 #include "tgsi/tgsi_scan.h"
+#include "lp_jit.h"
 
 
 #define LP_NEW_VIEWPORT      0x1
@@ -58,19 +59,6 @@ struct tgsi_sampler;
 struct vertex_info;
 
 
-typedef void
-(*lp_shader_fs_func)(uint32_t x,
-                     uint32_t y,
-                     const void *a0,
-                     const void *dadx,
-                     const void *dady,
-                     const void *consts,
-                     uint32_t *mask,
-                     void *color,
-                     void *depth,
-                     struct tgsi_sampler **samplers);
-
-
 struct lp_fragment_shader;
 
 
@@ -90,7 +78,7 @@ struct lp_fragment_shader_variant
 
    LLVMValueRef function;
 
-   lp_shader_fs_func jit_function;
+   lp_jit_frag_func jit_function;
 
    struct lp_fragment_shader_variant *next;
 };
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index cf0a90bc18..f77b488e6d 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -453,7 +453,7 @@ generate_fragment(struct llvmpipe_context *lp,
 
    /* 
     * Generate the function prototype. Any change here must be reflected in
-    * lp_state.h's lp_shader_fs_func function pointer type, and vice-versa.
+    * lp_jit.h's lp_jit_frag_func function pointer type, and vice-versa.
     */
 
    fs_elem_type = lp_build_elem_type(fs_type);
@@ -604,7 +604,7 @@ generate_fragment(struct llvmpipe_context *lp,
       }
    }
 
-   variant->jit_function = (lp_shader_fs_func)LLVMGetPointerToGlobal(screen->engine, variant->function);
+   variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, variant->function);
 
 #ifdef DEBUG
    lp_disassemble(variant->jit_function);
-- 
cgit v1.2.3


From 8c7c108d712f17a5f307b9c8914b4abc4d5f148e Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 23 Aug 2009 06:18:28 +0100
Subject: llvmpipe: Structure manipulation helpers.

---
 src/gallium/drivers/llvmpipe/Makefile        |  1 +
 src/gallium/drivers/llvmpipe/SConscript      |  1 +
 src/gallium/drivers/llvmpipe/lp_bld_struct.c | 54 ++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_struct.h | 63 ++++++++++++++++++++++++++++
 4 files changed, 119 insertions(+)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_struct.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_struct.h

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 5603f06b39..ca0a8bf619 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -21,6 +21,7 @@ C_SOURCES = \
 	lp_bld_store.c \
 	lp_bld_logic.c \
 	lp_bld_swizzle.c \
+	lp_bld_struct.c \
 	lp_bld_tgsi_soa.c \
 	lp_bld_type.c \
 	lp_clear.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index ed23660a68..10ead09eec 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -23,6 +23,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_unpack.c',
 		'lp_bld_load.c',
 		'lp_bld_store.c',
+		'lp_bld_struct.c',
 		'lp_bld_logic.c',
 		'lp_bld_swizzle.c',
 		'lp_bld_tgsi_soa.c',		
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.c b/src/gallium/drivers/llvmpipe/lp_bld_struct.c
new file mode 100644
index 0000000000..4877c4d3f7
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_struct.c
@@ -0,0 +1,54 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Helper functions for manipulation structures.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+
+#include "lp_bld_struct.h"
+
+
+LLVMValueRef
+lp_build_struct_get(LLVMBuilderRef builder,
+                    LLVMValueRef ptr,
+                    unsigned member,
+                    const char *name)
+{
+   LLVMValueRef indices[2];
+   indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+   indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0);
+   ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), "");
+   return LLVMBuildLoad(builder, ptr, name);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.h b/src/gallium/drivers/llvmpipe/lp_bld_struct.h
new file mode 100644
index 0000000000..cbefdc9f81
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_struct.h
@@ -0,0 +1,63 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Helper functions for type conversions.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifndef LP_BLD_STRUCT_H
+#define LP_BLD_STRUCT_H
+
+
+#include <llvm-c/Core.h>  
+#include <llvm-c/Target.h>
+
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+
+
+#define LP_CHECK_STRUCT_SIZE(_ctype, _ltarget, _ltype) \
+      assert(LLVMABISizeOfType(_ltarget, _ltype) == \
+             sizeof(_ctype))
+
+#define LP_CHECK_MEMBER_OFFSET(_ctype, _cmember, _ltarget, _ltype, _lindex) \
+      assert(LLVMOffsetOfElement(_ltarget, _ltype, _lindex) == \
+             offsetof(_ctype, _cmember))
+
+
+LLVMValueRef
+lp_build_struct_get(LLVMBuilderRef builder,
+                    LLVMValueRef ptr,
+                    unsigned member,
+                    const char *name);
+
+
+#endif /* !LP_BLD_STRUCT_H */
-- 
cgit v1.2.3


From f85c5f8621382ba1c8baa1582d87b46b388258d2 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 23 Aug 2009 12:28:34 +0100
Subject: llvmpipe: Factor out and optimize the input interpolation.

Special attention is given to the interpolation of side by side quads.
Multiplications are made only for the first quad. Interpolation of
inputs for posterior quads are done exclusively with additions, and
perspective divide if necessary.
---
 src/gallium/drivers/llvmpipe/Makefile          |   1 +
 src/gallium/drivers/llvmpipe/SConscript        |   1 +
 src/gallium/drivers/llvmpipe/lp_bld_interp.c   | 377 +++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_interp.h   |  99 +++++++
 src/gallium/drivers/llvmpipe/lp_bld_tgsi.h     |   6 +-
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 106 +------
 src/gallium/drivers/llvmpipe/lp_state_fs.c     | 100 +++----
 7 files changed, 521 insertions(+), 169 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_interp.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_interp.h

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index ca0a8bf619..4f31788e75 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -14,6 +14,7 @@ C_SOURCES = \
 	lp_bld_debug.c \
 	lp_bld_depth.c \
 	lp_bld_flow.c \
+	lp_bld_interp.c \
 	lp_bld_intr.c \
 	lp_bld_pack.c \
 	lp_bld_unpack.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 10ead09eec..ec82be14c8 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -18,6 +18,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_debug.c',
 		'lp_bld_depth.c',
 		'lp_bld_flow.c',
+		'lp_bld_interp.c',
 		'lp_bld_intr.c',
 		'lp_bld_pack.c',
 		'lp_bld_unpack.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
new file mode 100644
index 0000000000..cfe20a0d75
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -0,0 +1,377 @@
+/**************************************************************************
+ * 
+ * Copyright 2009 VMware, Inc.
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/**
+ * @file
+ * Position and shader input interpolation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_shader_tokens.h"
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "tgsi/tgsi_parse.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_interp.h"
+
+
+static void
+attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
+{
+   if(attrib == 0)
+      lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
+   else
+      lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
+}
+
+
+static void
+coeffs_init(struct lp_build_interp_soa_context *bld,
+            LLVMValueRef a0_ptr,
+            LLVMValueRef dadx_ptr,
+            LLVMValueRef dady_ptr)
+{
+   LLVMBuilderRef builder = bld->base.builder;
+   unsigned attrib;
+   unsigned chan;
+
+   for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
+      unsigned mask = bld->mask[attrib];
+      unsigned mode = bld->mode[attrib];
+      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+         if(mask & (1 << chan)) {
+            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0);
+            LLVMValueRef a0 = NULL;
+            LLVMValueRef dadx = NULL;
+            LLVMValueRef dady = NULL;
+
+            switch( mode ) {
+            case TGSI_INTERPOLATE_PERSPECTIVE:
+               /* fall-through */
+
+            case TGSI_INTERPOLATE_LINEAR:
+               dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
+               dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
+               dadx = lp_build_broadcast_scalar(&bld->base, dadx);
+               dady = lp_build_broadcast_scalar(&bld->base, dady);
+               attrib_name(dadx, attrib, chan, ".dadx");
+               attrib_name(dady, attrib, chan, ".dady");
+               /* fall-through */
+
+            case TGSI_INTERPOLATE_CONSTANT:
+               a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), "");
+               a0 = lp_build_broadcast_scalar(&bld->base, a0);
+               attrib_name(a0, attrib, chan, ".dady");
+               break;
+
+            default:
+               assert(0);
+               break;
+            }
+
+            bld->a0  [attrib][chan] = a0;
+            bld->dadx[attrib][chan] = dadx;
+            bld->dady[attrib][chan] = dady;
+         }
+      }
+   }
+}
+
+
+/**
+ * Small vector x scale multiplication optimization.
+ *
+ * TODO: Should be elsewhere.
+ */
+static LLVMValueRef
+coeff_multiply(struct lp_build_interp_soa_context *bld,
+               LLVMValueRef coeff,
+               int step)
+{
+   LLVMValueRef factor;
+
+   switch(step) {
+   case 0:
+      return bld->base.zero;
+   case 1:
+      return coeff;
+   case 2:
+      return lp_build_add(&bld->base, coeff, coeff);
+   default:
+      factor = lp_build_const_scalar(bld->base.type, (double)step);
+      return lp_build_mul(&bld->base, coeff, factor);
+   }
+}
+
+
+/**
+ * Multiply the dadx and dady with the xstep and ystep respectively.
+ */
+static void
+coeffs_update(struct lp_build_interp_soa_context *bld)
+{
+   unsigned attrib;
+   unsigned chan;
+
+   for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
+      unsigned mask = bld->mask[attrib];
+      unsigned mode = bld->mode[attrib];
+      if (mode != TGSI_INTERPOLATE_CONSTANT) {
+         for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+            if(mask & (1 << chan)) {
+               bld->dadx[attrib][chan] = coeff_multiply(bld, bld->dadx[attrib][chan], bld->xstep);
+               bld->dady[attrib][chan] = coeff_multiply(bld, bld->dady[attrib][chan], bld->ystep);
+            }
+         }
+      }
+   }
+}
+
+
+static void
+attribs_init(struct lp_build_interp_soa_context *bld)
+{
+   LLVMValueRef x = bld->pos[0];
+   LLVMValueRef y = bld->pos[1];
+   LLVMValueRef oow = NULL;
+   unsigned attrib;
+   unsigned chan;
+
+   for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
+      unsigned mask = bld->mask[attrib];
+      unsigned mode = bld->mode[attrib];
+      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+         if(mask & (1 << chan)) {
+            LLVMValueRef a0   = bld->a0  [attrib][chan];
+            LLVMValueRef dadx = bld->dadx[attrib][chan];
+            LLVMValueRef dady = bld->dady[attrib][chan];
+            LLVMValueRef res;
+
+            res = a0;
+
+            if (mode != TGSI_INTERPOLATE_CONSTANT) {
+               res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, x, dadx));
+               res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, y, dady));
+            }
+
+            /* Keep the value of the attribue before perspective divide for faster updates */
+            bld->attribs_pre[attrib][chan] = res;
+
+            if (mode == TGSI_INTERPOLATE_PERSPECTIVE) {
+               LLVMValueRef w = bld->pos[3];
+               assert(attrib != 0);
+               if(!oow)
+                  oow = lp_build_rcp(&bld->base, w);
+               res = lp_build_mul(&bld->base, res, oow);
+            }
+
+            attrib_name(res, attrib, chan, "");
+
+            bld->attribs[attrib][chan] = res;
+         }
+      }
+   }
+}
+
+
+static void
+attribs_update(struct lp_build_interp_soa_context *bld)
+{
+   LLVMValueRef oow = NULL;
+   unsigned attrib;
+   unsigned chan;
+
+   for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
+      unsigned mask = bld->mask[attrib];
+      unsigned mode = bld->mode[attrib];
+
+      if (mode != TGSI_INTERPOLATE_CONSTANT) {
+         for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+            if(mask & (1 << chan)) {
+               LLVMValueRef dadx = bld->dadx[attrib][chan];
+               LLVMValueRef dady = bld->dady[attrib][chan];
+               LLVMValueRef res;
+
+               res = bld->attribs_pre[attrib][chan];
+
+               if(bld->xstep)
+                  res = lp_build_add(&bld->base, res, dadx);
+
+               if(bld->ystep)
+                  res = lp_build_add(&bld->base, res, dady);
+
+               bld->attribs_pre[attrib][chan] = res;
+
+               if (mode == TGSI_INTERPOLATE_PERSPECTIVE) {
+                  LLVMValueRef w = bld->pos[3];
+                  assert(attrib != 0);
+                  if(!oow)
+                     oow = lp_build_rcp(&bld->base, w);
+                  res = lp_build_mul(&bld->base, res, oow);
+               }
+
+               attrib_name(res, attrib, chan, "");
+
+               bld->attribs[attrib][chan] = res;
+            }
+         }
+      }
+   }
+}
+
+
+/**
+ * Generate the position vectors.
+ *
+ * Parameter x0, y0 are the integer values with the quad upper left coordinates.
+ */
+static void
+pos_init(struct lp_build_interp_soa_context *bld,
+         LLVMValueRef x0,
+         LLVMValueRef y0)
+{
+   lp_build_name(x0, "pos.x");
+   lp_build_name(y0, "pos.y");
+
+   bld->attribs[0][0] = x0;
+   bld->attribs[0][1] = y0;
+}
+
+
+static void
+pos_update(struct lp_build_interp_soa_context *bld)
+{
+   LLVMValueRef x = bld->attribs[0][0];
+   LLVMValueRef y = bld->attribs[0][1];
+
+   if(bld->xstep)
+      x = lp_build_add(&bld->base, x, lp_build_const_scalar(bld->base.type, bld->xstep));
+
+   if(bld->ystep)
+      y = lp_build_add(&bld->base, y, lp_build_const_scalar(bld->base.type, bld->ystep));
+
+   lp_build_name(x, "pos.x");
+   lp_build_name(y, "pos.y");
+
+   bld->attribs[0][0] = x;
+   bld->attribs[0][1] = y;
+}
+
+
+void
+lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
+                         const struct tgsi_token *tokens,
+                         LLVMBuilderRef builder,
+                         union lp_type type,
+                         LLVMValueRef a0_ptr,
+                         LLVMValueRef dadx_ptr,
+                         LLVMValueRef dady_ptr,
+                         LLVMValueRef x0,
+                         LLVMValueRef y0,
+                         int xstep,
+                         int ystep)
+{
+   struct tgsi_parse_context parse;
+   struct tgsi_full_declaration *decl;
+
+   memset(bld, 0, sizeof *bld);
+
+   lp_build_context_init(&bld->base, builder, type);
+
+   /* For convenience */
+   bld->pos = bld->attribs[0];
+   bld->inputs = (const LLVMValueRef (*)[NUM_CHANNELS]) bld->attribs[1];
+
+   /* Position */
+   bld->num_attribs = 1;
+   bld->mask[0] = TGSI_WRITEMASK_ZW;
+   bld->mode[0] = TGSI_INTERPOLATE_LINEAR;
+
+   /* Inputs */
+   tgsi_parse_init( &parse, tokens );
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         decl = &parse.FullToken.FullDeclaration;
+         if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+            unsigned first, last, mask;
+            unsigned attrib;
+
+            first = decl->DeclarationRange.First;
+            last = decl->DeclarationRange.Last;
+            mask = decl->Declaration.UsageMask;
+
+            for( attrib = first; attrib <= last; ++attrib ) {
+               bld->mask[1 + attrib] = mask;
+               bld->mode[1 + attrib] = decl->Declaration.Interpolate;
+            }
+
+            bld->num_attribs = MAX2(bld->num_attribs, 1 + last + 1);
+         }
+         break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         break;
+
+      default:
+         assert( 0 );
+      }
+   }
+   tgsi_parse_free( &parse );
+
+   coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
+
+   pos_init(bld, x0, y0);
+
+   attribs_init(bld);
+
+   bld->xstep = xstep;
+   bld->ystep = ystep;
+
+   coeffs_update(bld);
+}
+
+
+/**
+ * Advance the position and inputs with the xstep and ystep.
+ */
+void
+lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld)
+{
+   pos_update(bld);
+
+   attribs_update(bld);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
new file mode 100644
index 0000000000..9194f6233a
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
@@ -0,0 +1,99 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Position and shader input interpolation.
+ *
+ * Special attention is given to the interpolation of side by side quads.
+ * Multiplications are made only for the first quad. Interpolation of
+ * inputs for posterior quads are done exclusively with additions, and
+ * perspective divide if necessary.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#ifndef LP_BLD_INTERP_H
+#define LP_BLD_INTERP_H
+
+
+#include <llvm-c/Core.h>
+
+#include "tgsi/tgsi_exec.h"
+
+#include "lp_bld_type.h"
+
+
+struct tgsi_token;
+
+
+struct lp_build_interp_soa_context
+{
+   struct lp_build_context base;
+
+   unsigned num_attribs;
+   unsigned mask[1 + PIPE_MAX_SHADER_INPUTS];
+   unsigned mode[1 + PIPE_MAX_SHADER_INPUTS];
+
+   LLVMValueRef a0  [1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+   LLVMValueRef dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+   LLVMValueRef dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+
+   int xstep;
+   int ystep;
+
+   /* Attribute values before perspective divide */
+   LLVMValueRef attribs_pre[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+
+   LLVMValueRef attribs[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+
+   /*
+    * Convenience pointers. Callers may access this one.
+    */
+   const LLVMValueRef *pos;
+   const LLVMValueRef (*inputs)[NUM_CHANNELS];
+};
+
+
+void
+lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
+                         const struct tgsi_token *tokens,
+                         LLVMBuilderRef builder,
+                         union lp_type type,
+                         LLVMValueRef a0_ptr,
+                         LLVMValueRef dadx_ptr,
+                         LLVMValueRef dady_ptr,
+                         LLVMValueRef x0,
+                         LLVMValueRef y0,
+                         int xstep,
+                         int ystep);
+
+void
+lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld);
+
+
+#endif /* LP_BLD_INTERP_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
index 86380a1dca..d42ab99cf8 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
@@ -54,11 +54,9 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
                   const struct tgsi_token *tokens,
                   union lp_type type,
                   struct lp_build_mask_context *mask,
-                  LLVMValueRef *pos,
-                  LLVMValueRef a0_ptr,
-                  LLVMValueRef dadx_ptr,
-                  LLVMValueRef dady_ptr,
                   LLVMValueRef consts_ptr,
+                  const LLVMValueRef *pos,
+                  const LLVMValueRef (*inputs)[4],
                   LLVMValueRef (*outputs)[4],
                   LLVMValueRef samplers_ptr);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index 60cf5e9af7..1335ba862e 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -83,19 +83,12 @@ struct lp_build_tgsi_soa_context
 {
    struct lp_build_context base;
 
-   LLVMValueRef x, y, w;
-   LLVMValueRef a0_ptr;
-   LLVMValueRef dadx_ptr;
-   LLVMValueRef dady_ptr;
-
    LLVMValueRef consts_ptr;
+   const LLVMValueRef *pos;
+   const LLVMValueRef (*inputs)[NUM_CHANNELS];
    LLVMValueRef (*outputs)[NUM_CHANNELS];
    LLVMValueRef samplers_ptr;
 
-   LLVMValueRef oow;
-
-   LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
-
    LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
    LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
 
@@ -1350,93 +1343,16 @@ emit_instruction(
    return 1;
 }
 
-static void
-emit_declaration(
-   struct lp_build_tgsi_soa_context *bld,
-   struct tgsi_full_declaration *decl )
-{
-   if( decl->Declaration.File == TGSI_FILE_INPUT ) {
-      LLVMBuilderRef builder = bld->base.builder;
-      unsigned first, last, mask;
-      unsigned attrib, chan;
-
-      first = decl->DeclarationRange.First;
-      last = decl->DeclarationRange.Last;
-      mask = decl->Declaration.UsageMask;
-
-      for( attrib = first; attrib <= last; attrib++ ) {
-         for( chan = 0; chan < NUM_CHANNELS; chan++ ) {
-            LLVMValueRef input = bld->base.undef;
-
-            if( mask & (1 << chan) ) {
-               LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), (1 + attrib)*NUM_CHANNELS + chan, 0);
-               LLVMValueRef a0;
-               LLVMValueRef dadx;
-               LLVMValueRef dady;
-
-               switch( decl->Declaration.Interpolate ) {
-               case TGSI_INTERPOLATE_PERSPECTIVE:
-                  /* fall-through */
-
-               case TGSI_INTERPOLATE_LINEAR: {
-                  LLVMValueRef dadx_ptr = LLVMBuildGEP(builder, bld->dadx_ptr, &index, 1, "");
-                  LLVMValueRef dady_ptr = LLVMBuildGEP(builder, bld->dady_ptr, &index, 1, "");
-                  dadx = LLVMBuildLoad(builder, dadx_ptr, "");
-                  dady = LLVMBuildLoad(builder, dady_ptr, "");
-                  dadx = lp_build_broadcast_scalar(&bld->base, dadx);
-                  dady = lp_build_broadcast_scalar(&bld->base, dady);
-                  lp_build_name(dadx, "dadx_%u.%c", attrib, "xyzw"[chan]);
-                  lp_build_name(dady, "dady_%u.%c", attrib, "xyzw"[chan]);
-                  /* fall-through */
-               }
-
-               case TGSI_INTERPOLATE_CONSTANT: {
-                  LLVMValueRef a0_ptr = LLVMBuildGEP(builder, bld->a0_ptr, &index, 1, "");
-                  a0 = LLVMBuildLoad(builder, a0_ptr, "");
-                  a0 = lp_build_broadcast_scalar(&bld->base, a0);
-                  lp_build_name(a0, "a0_%u.%c", attrib, "xyzw"[chan]);
-                  break;
-               }
-
-               default:
-                  assert(0);
-                  break;
-               }
-
-               input = a0;
-
-               if (decl->Declaration.Interpolate != TGSI_INTERPOLATE_CONSTANT) {
-                  input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->x, dadx));
-                  input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->y, dady));
-               }
-
-               if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
-                  if(!bld->oow)
-                     bld->oow = lp_build_rcp(&bld->base, bld->w);
-                  input = lp_build_mul(&bld->base, input, bld->oow);
-               }
-
-               lp_build_name(input, "input%u.%c", attrib, "xyzw"[chan]);
-            }
-
-            bld->inputs[attrib][chan] = input;
-         }
-      }
-   }
-}
-
 
 void
 lp_build_tgsi_soa(LLVMBuilderRef builder,
                   const struct tgsi_token *tokens,
                   union lp_type type,
                   struct lp_build_mask_context *mask,
-                  LLVMValueRef *pos,
-                  LLVMValueRef a0_ptr,
-                  LLVMValueRef dadx_ptr,
-                  LLVMValueRef dady_ptr,
                   LLVMValueRef consts_ptr,
-                  LLVMValueRef (*outputs)[4],
+                  const LLVMValueRef *pos,
+                  const LLVMValueRef (*inputs)[NUM_CHANNELS],
+                  LLVMValueRef (*outputs)[NUM_CHANNELS],
                   LLVMValueRef samplers_ptr)
 {
    struct lp_build_tgsi_soa_context bld;
@@ -1448,12 +1364,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
    memset(&bld, 0, sizeof bld);
    lp_build_context_init(&bld.base, builder, type);
    bld.mask = mask;
-   bld.x = pos[0];
-   bld.y = pos[1];
-   bld.w = pos[3];
-   bld.a0_ptr = a0_ptr;
-   bld.dadx_ptr = dadx_ptr;
-   bld.dady_ptr = dady_ptr;
+   bld.pos = pos;
+   bld.inputs = inputs;
    bld.outputs = outputs;
    bld.consts_ptr = consts_ptr;
    bld.samplers_ptr = samplers_ptr;
@@ -1465,9 +1377,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
 
       switch( parse.FullToken.Token.Type ) {
       case TGSI_TOKEN_TYPE_DECLARATION:
-         if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
-            emit_declaration( &bld, &parse.FullToken.FullDeclaration );
-         }
+         /* Input already interpolated */
          break;
 
       case TGSI_TOKEN_TYPE_INSTRUCTION:
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index e639f9c20f..361b30699c 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -69,9 +69,11 @@
 #include "tgsi/tgsi_scan.h"
 #include "tgsi/tgsi_parse.h"
 #include "lp_bld_type.h"
+#include "lp_bld_const.h"
 #include "lp_bld_conv.h"
 #include "lp_bld_logic.h"
 #include "lp_bld_depth.h"
+#include "lp_bld_interp.h"
 #include "lp_bld_tgsi.h"
 #include "lp_bld_alpha.h"
 #include "lp_bld_blend.h"
@@ -88,22 +90,16 @@ static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
 static const unsigned char quad_offset_y[4] = {0, 0, 1, 1};
 
 
-/**
- * Generate the position vectors.
- *
- * TODO: This should be called only once per fragment pipeline, for the first
- * quad, and the neighboring quad positions obtained by additions.
- *
- * Parameter x, y are the integer values with the quad upper left coordinates.
+/*
+ * Derive from the quad's upper left scalar coordinates the coordinates for
+ * all other quad pixels
  */
 static void
-generate_pos(LLVMBuilderRef builder,
-             LLVMValueRef x,
-             LLVMValueRef y,
-             LLVMValueRef a0_ptr,
-             LLVMValueRef dadx_ptr,
-             LLVMValueRef dady_ptr,
-             LLVMValueRef *pos)
+generate_pos0(LLVMBuilderRef builder,
+              LLVMValueRef x,
+              LLVMValueRef y,
+              LLVMValueRef *x0,
+              LLVMValueRef *y0)
 {
    LLVMTypeRef int_elem_type = LLVMInt32Type();
    LLVMTypeRef int_vec_type = LLVMVectorType(int_elem_type, QUAD_SIZE);
@@ -111,14 +107,8 @@ generate_pos(LLVMBuilderRef builder,
    LLVMTypeRef vec_type = LLVMVectorType(elem_type, QUAD_SIZE);
    LLVMValueRef x_offsets[QUAD_SIZE];
    LLVMValueRef y_offsets[QUAD_SIZE];
-   unsigned chan;
    unsigned i;
 
-   /*
-    * Derive from the quad's upper left scalar coordinates the coordinates for
-    * all other quad pixels
-    */
-
    x = lp_build_broadcast(builder, int_vec_type, x);
    y = lp_build_broadcast(builder, int_vec_type, y);
 
@@ -130,33 +120,8 @@ generate_pos(LLVMBuilderRef builder,
    x = LLVMBuildAdd(builder, x, LLVMConstVector(x_offsets, QUAD_SIZE), "");
    y = LLVMBuildAdd(builder, y, LLVMConstVector(y_offsets, QUAD_SIZE), "");
 
-   x = LLVMBuildSIToFP(builder, x, vec_type, "");
-   y = LLVMBuildSIToFP(builder, y, vec_type, "");
-
-   pos[0] = x;
-   pos[1] = y;
-
-   /* 
-    * Calculate z and w from the interpolation factors.
-    */
-
-   for(chan = 2; chan < NUM_CHANNELS; ++chan) {
-      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
-      LLVMValueRef a0   = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr,   &index, 1, ""), "");
-      LLVMValueRef dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
-      LLVMValueRef dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
-      LLVMValueRef res;
-      a0   = lp_build_broadcast(builder, vec_type, a0);
-      dadx = lp_build_broadcast(builder, vec_type, dadx);
-      dady = lp_build_broadcast(builder, vec_type, dady);
-      res = a0;
-      res = LLVMBuildAdd(builder, res, LLVMBuildMul(builder, dadx, x, ""), "");
-      res = LLVMBuildAdd(builder, res, LLVMBuildMul(builder, dady, y, ""), "");
-      pos[chan] = res;
-   }
-
-   for(chan = 0; chan < NUM_CHANNELS; ++chan)
-      lp_build_name(pos[chan], "pos.%c", "xyzw"[chan]);
+   *x0 = LLVMBuildSIToFP(builder, x, vec_type, "");
+   *y0 = LLVMBuildSIToFP(builder, y, vec_type, "");
 }
 
 
@@ -218,11 +183,7 @@ generate_fs(struct llvmpipe_context *lp,
             union lp_type type,
             LLVMValueRef context_ptr,
             unsigned i,
-            LLVMValueRef x,
-            LLVMValueRef y,
-            LLVMValueRef a0_ptr,
-            LLVMValueRef dadx_ptr,
-            LLVMValueRef dady_ptr,
+            const struct lp_build_interp_soa_context *interp,
             LLVMValueRef *pmask,
             LLVMValueRef *color,
             LLVMValueRef depth_ptr)
@@ -233,8 +194,8 @@ generate_fs(struct llvmpipe_context *lp,
    LLVMTypeRef int_vec_type;
    LLVMValueRef consts_ptr;
    LLVMValueRef samplers_ptr;
-   LLVMValueRef pos[NUM_CHANNELS];
    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
+   LLVMValueRef z = interp->pos[2];
    struct lp_build_mask_context mask;
    boolean early_depth_test;
    unsigned attrib;
@@ -247,8 +208,6 @@ generate_fs(struct llvmpipe_context *lp,
    consts_ptr = lp_jit_context_constants(builder, context_ptr);
    samplers_ptr = lp_jit_context_samplers(builder, context_ptr);
 
-   generate_pos(builder, x, y, a0_ptr, dadx_ptr, dady_ptr, pos);
-
    lp_build_mask_begin(&mask, builder, type, *pmask);
 
    early_depth_test =
@@ -260,14 +219,14 @@ generate_fs(struct llvmpipe_context *lp,
 
    if(early_depth_test)
       generate_depth(lp, builder, &key->depth,
-                          type, &mask,
-                          pos[2], depth_ptr);
+                     type, &mask,
+                     z, depth_ptr);
 
    memset(outputs, 0, sizeof outputs);
 
    lp_build_tgsi_soa(builder, tokens, type, &mask,
-                     pos, a0_ptr, dadx_ptr, dady_ptr,
-                     consts_ptr, outputs, samplers_ptr);
+                     consts_ptr, interp->pos, interp->inputs,
+                     outputs, samplers_ptr);
 
    for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
       for(chan = 0; chan < NUM_CHANNELS; ++chan) {
@@ -300,7 +259,7 @@ generate_fs(struct llvmpipe_context *lp,
 
             case TGSI_SEMANTIC_POSITION:
                if(chan == 2)
-                  pos[2] = outputs[attrib][chan];
+                  z = outputs[attrib][chan];
                break;
             }
          }
@@ -309,8 +268,8 @@ generate_fs(struct llvmpipe_context *lp,
 
    if(!early_depth_test)
       generate_depth(lp, builder, &key->depth,
-                          type, &mask,
-                          pos[2], depth_ptr);
+                     type, &mask,
+                     z, depth_ptr);
 
    lp_build_mask_end(&mask);
 
@@ -400,6 +359,9 @@ generate_fragment(struct llvmpipe_context *lp,
    LLVMValueRef depth_ptr;
    LLVMBasicBlockRef block;
    LLVMBuilderRef builder;
+   LLVMValueRef x0;
+   LLVMValueRef y0;
+   struct lp_build_interp_soa_context interp;
    LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH];
    LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH];
    LLVMValueRef blend_mask;
@@ -516,14 +478,19 @@ generate_fragment(struct llvmpipe_context *lp,
    builder = LLVMCreateBuilder();
    LLVMPositionBuilderAtEnd(builder, block);
 
+   generate_pos0(builder, x, y, &x0, &y0);
+
+   lp_build_interp_soa_init(&interp, shader->base.tokens, builder, fs_type,
+                            a0_ptr, dadx_ptr, dady_ptr,
+                            x0, y0, 2, 0);
+
    for(i = 0; i < num_fs; ++i) {
       LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
       LLVMValueRef out_color[NUM_CHANNELS];
-      LLVMValueRef x_i;
       LLVMValueRef depth_ptr_i;
 
-      /* TODO: Reuse position interpolation */
-      x_i = LLVMBuildAdd(builder, x, LLVMConstInt(LLVMInt32Type(), 2*i, 0), "");
+      if(i != 0)
+         lp_build_interp_soa_update(&interp);
 
       fs_mask[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, mask_ptr, &index, 1, ""), "");
       depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, "");
@@ -533,8 +500,7 @@ generate_fragment(struct llvmpipe_context *lp,
                   fs_type,
                   context_ptr,
                   i,
-                  x_i, y,
-                  a0_ptr, dadx_ptr, dady_ptr,
+                  &interp,
                   &fs_mask[i],
                   out_color,
                   depth_ptr_i);
-- 
cgit v1.2.3


From f311bacebd167651e5be3bb3cef14fdfb6e1d925 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 23 Aug 2009 14:25:31 +0100
Subject: llvmpipe: Merge all pixel format related files.

---
 src/gallium/drivers/llvmpipe/Makefile            |   5 +-
 src/gallium/drivers/llvmpipe/SConscript          |   5 +-
 src/gallium/drivers/llvmpipe/lp_bld_format_aos.c | 303 +++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_load.c       |  59 -----
 src/gallium/drivers/llvmpipe/lp_bld_pack.c       | 130 ----------
 src/gallium/drivers/llvmpipe/lp_bld_store.c      |  58 -----
 src/gallium/drivers/llvmpipe/lp_bld_unpack.c     | 152 ------------
 7 files changed, 305 insertions(+), 407 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_load.c
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_pack.c
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_store.c
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_unpack.c

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 4f31788e75..0179a0bb26 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -14,12 +14,9 @@ C_SOURCES = \
 	lp_bld_debug.c \
 	lp_bld_depth.c \
 	lp_bld_flow.c \
+	lp_bld_format.c \
 	lp_bld_interp.c \
 	lp_bld_intr.c \
-	lp_bld_pack.c \
-	lp_bld_unpack.c \
-	lp_bld_load.c \
-	lp_bld_store.c \
 	lp_bld_logic.c \
 	lp_bld_swizzle.c \
 	lp_bld_struct.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index ec82be14c8..218d0387df 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -18,12 +18,9 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_debug.c',
 		'lp_bld_depth.c',
 		'lp_bld_flow.c',
+		'lp_bld_format.c',
 		'lp_bld_interp.c',
 		'lp_bld_intr.c',
-		'lp_bld_pack.c',
-		'lp_bld_unpack.c',
-		'lp_bld_load.c',
-		'lp_bld_store.c',
 		'lp_bld_struct.c',
 		'lp_bld_logic.c',
 		'lp_bld_swizzle.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
new file mode 100644
index 0000000000..dcbc0076c7
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
@@ -0,0 +1,303 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_format.h"
+
+#include "lp_bld_format.h"
+
+
+LLVMValueRef
+lp_build_unpack_rgba(LLVMBuilderRef builder,
+                     enum pipe_format format,
+                     LLVMValueRef packed)
+{
+   const struct util_format_description *desc;
+   LLVMTypeRef type;
+   LLVMValueRef shifted, casted, scaled, masked;
+   LLVMValueRef shifts[4];
+   LLVMValueRef masks[4];
+   LLVMValueRef scales[4];
+   LLVMValueRef swizzles[4];
+   LLVMValueRef aux[4];
+   bool normalized;
+   int empty_channel;
+   unsigned shift;
+   unsigned i;
+
+   desc = util_format_description(format);
+
+   /* FIXME: Support more formats */
+   assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+   assert(desc->block.width == 1);
+   assert(desc->block.height == 1);
+   assert(desc->block.bits <= 32);
+
+   type = LLVMIntType(desc->block.bits);
+
+   /* Do the intermediate integer computations with 32bit integers since it
+    * matches floating point size */
+   if (desc->block.bits < 32)
+      packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), "");
+
+   /* Broadcast the packed value to all four channels */
+   packed = LLVMBuildInsertElement(builder,
+                                   LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
+                                   packed,
+                                   LLVMConstNull(LLVMInt32Type()),
+                                   "");
+   packed = LLVMBuildShuffleVector(builder,
+                                   packed,
+                                   LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
+                                   LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)),
+                                   "");
+
+   /* Initialize vector constants */
+   normalized = FALSE;
+   empty_channel = -1;
+   shift = 0;
+   for (i = 0; i < 4; ++i) {
+      unsigned bits = desc->channel[i].size;
+
+      if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
+         shifts[i] = LLVMGetUndef(LLVMInt32Type());
+         masks[i] = LLVMConstNull(LLVMInt32Type());
+         scales[i] =  LLVMConstNull(LLVMFloatType());
+         empty_channel = i;
+      }
+      else {
+         unsigned mask = (1 << bits) - 1;
+
+         assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
+         assert(bits < 32);
+
+         shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
+         masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0);
+
+         if (desc->channel[i].normalized) {
+            scales[i] = LLVMConstReal(LLVMFloatType(), 1.0/mask);
+            normalized = TRUE;
+         }
+         else
+            scales[i] =  LLVMConstReal(LLVMFloatType(), 1.0);
+      }
+
+      shift += bits;
+   }
+
+   shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
+   masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
+   // UIToFP can't be expressed in SSE2
+   casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
+
+   if (normalized)
+      scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), "");
+   else
+      scaled = casted;
+
+   for (i = 0; i < 4; ++i)
+      aux[i] = LLVMGetUndef(LLVMFloatType());
+
+   for (i = 0; i < 4; ++i) {
+      enum util_format_swizzle swizzle = desc->swizzle[i];
+
+      switch (swizzle) {
+      case UTIL_FORMAT_SWIZZLE_X:
+      case UTIL_FORMAT_SWIZZLE_Y:
+      case UTIL_FORMAT_SWIZZLE_Z:
+      case UTIL_FORMAT_SWIZZLE_W:
+         swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
+         break;
+      case UTIL_FORMAT_SWIZZLE_0:
+         assert(empty_channel >= 0);
+         swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0);
+         break;
+      case UTIL_FORMAT_SWIZZLE_1:
+         swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0);
+         aux[0] = LLVMConstReal(LLVMFloatType(), 1.0);
+         break;
+      case UTIL_FORMAT_SWIZZLE_NONE:
+         swizzles[i] = LLVMGetUndef(LLVMFloatType());
+         assert(0);
+         break;
+      }
+   }
+
+   return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4), LLVMConstVector(swizzles, 4), "");
+}
+
+
+LLVMValueRef
+lp_build_pack_rgba(LLVMBuilderRef builder,
+                   enum pipe_format format,
+                   LLVMValueRef rgba)
+{
+   const struct util_format_description *desc;
+   LLVMTypeRef type;
+   LLVMValueRef packed = NULL;
+   LLVMValueRef swizzles[4];
+   LLVMValueRef shifted, casted, scaled, unswizzled;
+   LLVMValueRef shifts[4];
+   LLVMValueRef scales[4];
+   bool normalized;
+   unsigned shift;
+   unsigned i, j;
+
+   desc = util_format_description(format);
+
+   assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+   assert(desc->block.width == 1);
+   assert(desc->block.height == 1);
+
+   type = LLVMIntType(desc->block.bits);
+
+   /* Unswizzle the color components into the source vector. */
+   for (i = 0; i < 4; ++i) {
+      for (j = 0; j < 4; ++j) {
+         if (desc->swizzle[j] == i)
+            break;
+      }
+      if (j < 4)
+         swizzles[i] = LLVMConstInt(LLVMInt32Type(), j, 0);
+      else
+         swizzles[i] = LLVMGetUndef(LLVMInt32Type());
+   }
+
+   unswizzled = LLVMBuildShuffleVector(builder, rgba,
+                                       LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)),
+                                       LLVMConstVector(swizzles, 4), "");
+
+   normalized = FALSE;
+   shift = 0;
+   for (i = 0; i < 4; ++i) {
+      unsigned bits = desc->channel[i].size;
+
+      if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
+         shifts[i] = LLVMGetUndef(LLVMInt32Type());
+         scales[i] =  LLVMGetUndef(LLVMFloatType());
+      }
+      else {
+         unsigned mask = (1 << bits) - 1;
+
+         assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
+         assert(bits < 32);
+
+         shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
+
+         if (desc->channel[i].normalized) {
+            scales[i] = LLVMConstReal(LLVMFloatType(), mask);
+            normalized = TRUE;
+         }
+         else
+            scales[i] =  LLVMConstReal(LLVMFloatType(), 1.0);
+      }
+
+      shift += bits;
+   }
+
+   if (normalized)
+      scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
+   else
+      scaled = unswizzled;
+
+   casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32Type(), 4), "");
+
+   shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
+   
+   /* Bitwise or all components */
+   for (i = 0; i < 4; ++i) {
+      if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+         LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, LLVMConstInt(LLVMInt32Type(), i, 0), "");
+         if (packed)
+            packed = LLVMBuildOr(builder, packed, component, "");
+         else
+            packed = component;
+      }
+   }
+
+   if (!packed)
+      packed = LLVMGetUndef(LLVMInt32Type());
+
+   if (desc->block.bits < 32)
+      packed = LLVMBuildTrunc(builder, packed, type, "");
+
+   return packed;
+}
+
+
+LLVMValueRef
+lp_build_load_rgba(LLVMBuilderRef builder,
+                   enum pipe_format format,
+                   LLVMValueRef ptr)
+{
+   const struct util_format_description *desc;
+   LLVMTypeRef type;
+   LLVMValueRef packed;
+
+   desc = util_format_description(format);
+
+   /* FIXME: Support more formats */
+   assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+   assert(desc->block.width == 1);
+   assert(desc->block.height == 1);
+   assert(desc->block.bits <= 32);
+
+   type = LLVMIntType(desc->block.bits);
+
+   ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), "");
+
+   packed = LLVMBuildLoad(builder, ptr, "");
+
+   return lp_build_unpack_rgba(builder, format, packed);
+}
+
+
+void
+lp_build_store_rgba(LLVMBuilderRef builder,
+                    enum pipe_format format,
+                    LLVMValueRef ptr,
+                    LLVMValueRef rgba)
+{
+   const struct util_format_description *desc;
+   LLVMTypeRef type;
+   LLVMValueRef packed;
+
+   desc = util_format_description(format);
+
+   assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+   assert(desc->block.width == 1);
+   assert(desc->block.height == 1);
+
+   type = LLVMIntType(desc->block.bits);
+
+   packed = lp_build_pack_rgba(builder, format, rgba);
+
+   ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), "");
+
+   LLVMBuildStore(builder, packed, ptr);
+}
+
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_load.c b/src/gallium/drivers/llvmpipe/lp_bld_load.c
deleted file mode 100644
index 27db7b2db0..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_bld_load.c
+++ /dev/null
@@ -1,59 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include "util/u_format.h"
-
-#include "lp_bld_format.h"
-
-
-LLVMValueRef
-lp_build_load_rgba(LLVMBuilderRef builder,
-                   enum pipe_format format,
-                   LLVMValueRef ptr)
-{
-   const struct util_format_description *desc;
-   LLVMTypeRef type;
-   LLVMValueRef packed;
-
-   desc = util_format_description(format);
-
-   /* FIXME: Support more formats */
-   assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
-   assert(desc->block.width == 1);
-   assert(desc->block.height == 1);
-   assert(desc->block.bits <= 32);
-
-   type = LLVMIntType(desc->block.bits);
-
-   ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), "");
-
-   packed = LLVMBuildLoad(builder, ptr, "");
-
-   return lp_build_unpack_rgba(builder, format, packed);
-}
-
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.c b/src/gallium/drivers/llvmpipe/lp_bld_pack.c
deleted file mode 100644
index 71261e4f39..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_bld_pack.c
+++ /dev/null
@@ -1,130 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include "util/u_format.h"
-
-#include "lp_bld_format.h"
-
-
-LLVMValueRef
-lp_build_pack_rgba(LLVMBuilderRef builder,
-                   enum pipe_format format,
-                   LLVMValueRef rgba)
-{
-   const struct util_format_description *desc;
-   LLVMTypeRef type;
-   LLVMValueRef packed = NULL;
-   LLVMValueRef swizzles[4];
-   LLVMValueRef shifted, casted, scaled, unswizzled;
-   LLVMValueRef shifts[4];
-   LLVMValueRef scales[4];
-   bool normalized;
-   unsigned shift;
-   unsigned i, j;
-
-   desc = util_format_description(format);
-
-   assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
-   assert(desc->block.width == 1);
-   assert(desc->block.height == 1);
-
-   type = LLVMIntType(desc->block.bits);
-
-   /* Unswizzle the color components into the source vector. */
-   for (i = 0; i < 4; ++i) {
-      for (j = 0; j < 4; ++j) {
-         if (desc->swizzle[j] == i)
-            break;
-      }
-      if (j < 4)
-         swizzles[i] = LLVMConstInt(LLVMInt32Type(), j, 0);
-      else
-         swizzles[i] = LLVMGetUndef(LLVMInt32Type());
-   }
-
-   unswizzled = LLVMBuildShuffleVector(builder, rgba,
-                                       LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)),
-                                       LLVMConstVector(swizzles, 4), "");
-
-   normalized = FALSE;
-   shift = 0;
-   for (i = 0; i < 4; ++i) {
-      unsigned bits = desc->channel[i].size;
-
-      if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
-         shifts[i] = LLVMGetUndef(LLVMInt32Type());
-         scales[i] =  LLVMGetUndef(LLVMFloatType());
-      }
-      else {
-         unsigned mask = (1 << bits) - 1;
-
-         assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
-         assert(bits < 32);
-
-         shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
-
-         if (desc->channel[i].normalized) {
-            scales[i] = LLVMConstReal(LLVMFloatType(), mask);
-            normalized = TRUE;
-         }
-         else
-            scales[i] =  LLVMConstReal(LLVMFloatType(), 1.0);
-      }
-
-      shift += bits;
-   }
-
-   if (normalized)
-      scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
-   else
-      scaled = unswizzled;
-
-   casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32Type(), 4), "");
-
-   shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
-   
-   /* Bitwise or all components */
-   for (i = 0; i < 4; ++i) {
-      if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
-         LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, LLVMConstInt(LLVMInt32Type(), i, 0), "");
-         if (packed)
-            packed = LLVMBuildOr(builder, packed, component, "");
-         else
-            packed = component;
-      }
-   }
-
-   if (!packed)
-      packed = LLVMGetUndef(LLVMInt32Type());
-
-   if (desc->block.bits < 32)
-      packed = LLVMBuildTrunc(builder, packed, type, "");
-
-   return packed;
-}
-
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_store.c b/src/gallium/drivers/llvmpipe/lp_bld_store.c
deleted file mode 100644
index 1da6dac8a2..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_bld_store.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include "util/u_format.h"
-
-#include "lp_bld_format.h"
-
-
-void
-lp_build_store_rgba(LLVMBuilderRef builder,
-                    enum pipe_format format,
-                    LLVMValueRef ptr,
-                    LLVMValueRef rgba)
-{
-   const struct util_format_description *desc;
-   LLVMTypeRef type;
-   LLVMValueRef packed;
-
-   desc = util_format_description(format);
-
-   assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
-   assert(desc->block.width == 1);
-   assert(desc->block.height == 1);
-
-   type = LLVMIntType(desc->block.bits);
-
-   packed = lp_build_pack_rgba(builder, format, rgba);
-
-   ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), "");
-
-   LLVMBuildStore(builder, packed, ptr);
-}
-
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_unpack.c b/src/gallium/drivers/llvmpipe/lp_bld_unpack.c
deleted file mode 100644
index d70faac1ba..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_bld_unpack.c
+++ /dev/null
@@ -1,152 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include "util/u_format.h"
-
-#include "lp_bld_format.h"
-
-
-LLVMValueRef
-lp_build_unpack_rgba(LLVMBuilderRef builder,
-                     enum pipe_format format,
-                     LLVMValueRef packed)
-{
-   const struct util_format_description *desc;
-   LLVMTypeRef type;
-   LLVMValueRef shifted, casted, scaled, masked;
-   LLVMValueRef shifts[4];
-   LLVMValueRef masks[4];
-   LLVMValueRef scales[4];
-   LLVMValueRef swizzles[4];
-   LLVMValueRef aux[4];
-   bool normalized;
-   int empty_channel;
-   unsigned shift;
-   unsigned i;
-
-   desc = util_format_description(format);
-
-   /* FIXME: Support more formats */
-   assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
-   assert(desc->block.width == 1);
-   assert(desc->block.height == 1);
-   assert(desc->block.bits <= 32);
-
-   type = LLVMIntType(desc->block.bits);
-
-   /* Do the intermediate integer computations with 32bit integers since it
-    * matches floating point size */
-   if (desc->block.bits < 32)
-      packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), "");
-
-   /* Broadcast the packed value to all four channels */
-   packed = LLVMBuildInsertElement(builder,
-                                   LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
-                                   packed,
-                                   LLVMConstNull(LLVMInt32Type()),
-                                   "");
-   packed = LLVMBuildShuffleVector(builder,
-                                   packed,
-                                   LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
-                                   LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)),
-                                   "");
-
-   /* Initialize vector constants */
-   normalized = FALSE;
-   empty_channel = -1;
-   shift = 0;
-   for (i = 0; i < 4; ++i) {
-      unsigned bits = desc->channel[i].size;
-
-      if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
-         shifts[i] = LLVMGetUndef(LLVMInt32Type());
-         masks[i] = LLVMConstNull(LLVMInt32Type());
-         scales[i] =  LLVMConstNull(LLVMFloatType());
-         empty_channel = i;
-      }
-      else {
-         unsigned mask = (1 << bits) - 1;
-
-         assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
-         assert(bits < 32);
-
-         shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
-         masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0);
-
-         if (desc->channel[i].normalized) {
-            scales[i] = LLVMConstReal(LLVMFloatType(), 1.0/mask);
-            normalized = TRUE;
-         }
-         else
-            scales[i] =  LLVMConstReal(LLVMFloatType(), 1.0);
-      }
-
-      shift += bits;
-   }
-
-   shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
-   masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
-   // UIToFP can't be expressed in SSE2
-   casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
-
-   if (normalized)
-      scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), "");
-   else
-      scaled = casted;
-
-   for (i = 0; i < 4; ++i)
-      aux[i] = LLVMGetUndef(LLVMFloatType());
-
-   for (i = 0; i < 4; ++i) {
-      enum util_format_swizzle swizzle = desc->swizzle[i];
-
-      switch (swizzle) {
-      case UTIL_FORMAT_SWIZZLE_X:
-      case UTIL_FORMAT_SWIZZLE_Y:
-      case UTIL_FORMAT_SWIZZLE_Z:
-      case UTIL_FORMAT_SWIZZLE_W:
-         swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
-         break;
-      case UTIL_FORMAT_SWIZZLE_0:
-         assert(empty_channel >= 0);
-         swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0);
-         break;
-      case UTIL_FORMAT_SWIZZLE_1:
-         swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0);
-         aux[0] = LLVMConstReal(LLVMFloatType(), 1.0);
-         break;
-      case UTIL_FORMAT_SWIZZLE_NONE:
-         swizzles[i] = LLVMGetUndef(LLVMFloatType());
-         assert(0);
-         break;
-      }
-   }
-
-   return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4), LLVMConstVector(swizzles, 4), "");
-}
-
-- 
cgit v1.2.3


From 1e6cc1cf3728f715eb9c63c942b09fe42570cfc2 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 24 Aug 2009 14:56:51 +0100
Subject: llvmpipe: Fix build.

---
 src/gallium/drivers/llvmpipe/Makefile   | 2 +-
 src/gallium/drivers/llvmpipe/SConscript | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 0179a0bb26..db662c339e 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -14,7 +14,7 @@ C_SOURCES = \
 	lp_bld_debug.c \
 	lp_bld_depth.c \
 	lp_bld_flow.c \
-	lp_bld_format.c \
+	lp_bld_format_aos.c \
 	lp_bld_interp.c \
 	lp_bld_intr.c \
 	lp_bld_logic.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 218d0387df..26bf8b08fb 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -18,7 +18,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_debug.c',
 		'lp_bld_depth.c',
 		'lp_bld_flow.c',
-		'lp_bld_format.c',
+		'lp_bld_format_aos.c',
 		'lp_bld_interp.c',
 		'lp_bld_intr.c',
 		'lp_bld_struct.c',
-- 
cgit v1.2.3


From e173a9bbd64dc38dba6b881ed7a9faea02861042 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 29 Aug 2009 20:02:25 +0100
Subject: llvmpipe: Define an winsys for LLVM. Drop pipe_winsys

lp_winsys will eventually be unified with softpipe's eventually, but we
are free to move quicker since we don't have the myriad of users yet.

Will provide a pipe_winsys adaptor from Keith's softpipe-private-winsys
soon.
---
 src/gallium/drivers/llvmpipe/SConscript       |   1 +
 src/gallium/drivers/llvmpipe/lp_draw_arrays.c |  10 +--
 src/gallium/drivers/llvmpipe/lp_screen.c      |  53 +++++++++++--
 src/gallium/drivers/llvmpipe/lp_screen.h      |   5 ++
 src/gallium/drivers/llvmpipe/lp_texture.c     | 106 +++++++++++++-------------
 src/gallium/drivers/llvmpipe/lp_texture.h     |  13 +++-
 src/gallium/drivers/llvmpipe/lp_winsys.h      |  87 ++++++++++++++++++---
 7 files changed, 196 insertions(+), 79 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 26bf8b08fb..6bceb84da4 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -26,6 +26,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_swizzle.c',
 		'lp_bld_tgsi_soa.c',		
 		'lp_bld_type.c',
+		'lp_buffer.c',
 		'lp_clear.c',
 		'lp_context.c',
 		'lp_draw_arrays.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
index 6a89b74e3a..0f75afc79b 100644
--- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
+++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
@@ -47,13 +47,13 @@
 static void
 llvmpipe_map_constant_buffers(struct llvmpipe_context *lp)
 {
-   struct pipe_winsys *ws = lp->pipe.winsys;
+   struct pipe_screen *screen = lp->pipe.screen;
    uint i, size;
 
    for (i = 0; i < PIPE_SHADER_TYPES; i++) {
       if (lp->constants[i].buffer && lp->constants[i].buffer->size)
-         lp->mapped_constants[i] = ws->buffer_map(ws, lp->constants[i].buffer,
-                                                  PIPE_BUFFER_USAGE_CPU_READ);
+         lp->mapped_constants[i] = screen->buffer_map(screen, lp->constants[i].buffer,
+                                                      PIPE_BUFFER_USAGE_CPU_READ);
    }
 
    if (lp->constants[PIPE_SHADER_VERTEX].buffer)
@@ -72,7 +72,7 @@ llvmpipe_map_constant_buffers(struct llvmpipe_context *lp)
 static void
 llvmpipe_unmap_constant_buffers(struct llvmpipe_context *lp)
 {
-   struct pipe_winsys *ws = lp->pipe.winsys;
+   struct pipe_screen *screen = lp->pipe.screen;
    uint i;
 
    /* really need to flush all prims since the vert/frag shaders const buffers
@@ -86,7 +86,7 @@ llvmpipe_unmap_constant_buffers(struct llvmpipe_context *lp)
 
    for (i = 0; i < 2; i++) {
       if (lp->constants[i].buffer && lp->constants[i].buffer->size)
-         ws->buffer_unmap(ws, lp->constants[i].buffer);
+         screen->buffer_unmap(screen, lp->constants[i].buffer);
       lp->mapped_constants[i] = NULL;
    }
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index f302b99ad7..125035771e 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -33,6 +33,7 @@
 #include "pipe/p_screen.h"
 
 #include "lp_texture.h"
+#include "lp_buffer.h"
 #include "lp_winsys.h"
 #include "lp_jit.h"
 #include "lp_screen.h"
@@ -41,7 +42,7 @@
 static const char *
 llvmpipe_get_vendor(struct pipe_screen *screen)
 {
-   return "Tungsten Graphics, Inc.";
+   return "VMware, Inc.";
 }
 
 
@@ -126,12 +127,15 @@ llvmpipe_get_paramf(struct pipe_screen *screen, int param)
  * \param type  one of PIPE_TEXTURE, PIPE_SURFACE
  */
 static boolean
-llvmpipe_is_format_supported( struct pipe_screen *screen,
+llvmpipe_is_format_supported( struct pipe_screen *_screen,
                               enum pipe_format format, 
                               enum pipe_texture_target target,
                               unsigned tex_usage, 
                               unsigned geom_flags )
 {
+   struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
+   struct llvmpipe_winsys *winsys = screen->winsys;
+
    assert(target == PIPE_TEXTURE_1D ||
           target == PIPE_TEXTURE_2D ||
           target == PIPE_TEXTURE_3D ||
@@ -149,8 +153,42 @@ llvmpipe_is_format_supported( struct pipe_screen *screen,
    case PIPE_FORMAT_DXT5_RGBA:
       return FALSE;
    default:
-      return TRUE;
+      break;
    }
+
+   if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET)
+      return winsys->is_displaytarget_format_supported(winsys, format);
+
+   return TRUE;
+}
+
+
+static struct pipe_buffer *
+llvmpipe_surface_buffer_create(struct pipe_screen *screen,
+                               unsigned width, unsigned height,
+                               enum pipe_format format,
+                               unsigned tex_usage,
+                               unsigned usage,
+                               unsigned *stride)
+{
+   /* This function should never be used */
+   assert(0);
+   return NULL;
+}
+
+
+static void
+llvmpipe_flush_frontbuffer(struct pipe_screen *_screen,
+                           struct pipe_surface *surface,
+                           void *context_private)
+{
+   struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
+   struct llvmpipe_winsys *winsys = screen->winsys;
+   struct llvmpipe_texture *texture = llvmpipe_texture(surface->texture);
+
+   assert(texture->dt);
+   if (texture->dt)
+      winsys->displaytarget_display(winsys, texture->dt, context_private);
 }
 
 
@@ -176,14 +214,14 @@ llvmpipe_destroy_screen( struct pipe_screen *_screen )
  * Note: we're not presently subclassing pipe_screen (no llvmpipe_screen).
  */
 struct pipe_screen *
-llvmpipe_create_screen(struct pipe_winsys *winsys)
+llvmpipe_create_screen(struct llvmpipe_winsys *winsys)
 {
    struct llvmpipe_screen *screen = CALLOC_STRUCT(llvmpipe_screen);
 
    if (!screen)
       return NULL;
 
-   screen->base.winsys = winsys;
+   screen->winsys = winsys;
 
    screen->base.destroy = llvmpipe_destroy_screen;
 
@@ -193,8 +231,11 @@ llvmpipe_create_screen(struct pipe_winsys *winsys)
    screen->base.get_paramf = llvmpipe_get_paramf;
    screen->base.is_format_supported = llvmpipe_is_format_supported;
 
+   screen->base.surface_buffer_create = llvmpipe_surface_buffer_create;
+   screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer;
+
    llvmpipe_init_screen_texture_funcs(&screen->base);
-   u_simple_screen_init(&screen->base);
+   llvmpipe_init_screen_buffer_funcs(&screen->base);
 
    lp_jit_screen_init(screen);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.h b/src/gallium/drivers/llvmpipe/lp_screen.h
index 98d2789159..4a1b4d6f3e 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.h
+++ b/src/gallium/drivers/llvmpipe/lp_screen.h
@@ -43,10 +43,15 @@
 #include "pipe/p_defines.h"
 
 
+struct llvmpipe_winsys;
+
+
 struct llvmpipe_screen
 {
    struct pipe_screen base;
 
+   struct llvmpipe_winsys *winsys;
+
    LLVMModuleRef module;
    LLVMExecutionEngineRef engine;
    LLVMModuleProviderRef provider;
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 18e348d69e..724d437833 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -52,7 +52,7 @@
 /* Conventional allocation path for non-display textures:
  */
 static boolean
-llvmpipe_texture_layout(struct pipe_screen *screen,
+llvmpipe_texture_layout(struct llvmpipe_screen *screen,
                         struct llvmpipe_texture * lpt)
 {
    struct pipe_texture *pt = &lpt->base;
@@ -84,34 +84,29 @@ llvmpipe_texture_layout(struct pipe_screen *screen,
       depth = minify(depth);
    }
 
-   lpt->buffer = screen->buffer_create(screen, 32,
-                                       PIPE_BUFFER_USAGE_PIXEL,
-                                       buffer_size);
+   lpt->data = align_malloc(buffer_size, 16);
 
-   return lpt->buffer != NULL;
+   return lpt->data != NULL;
 }
 
 static boolean
-llvmpipe_displaytarget_layout(struct pipe_screen *screen,
+llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen,
                               struct llvmpipe_texture * lpt)
 {
-   unsigned usage = (PIPE_BUFFER_USAGE_CPU_READ_WRITE |
-                     PIPE_BUFFER_USAGE_GPU_READ_WRITE);
-   unsigned tex_usage = lpt->base.tex_usage;
+   struct llvmpipe_winsys *winsys = screen->winsys;
 
    pf_get_block(lpt->base.format, &lpt->base.block);
    lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width[0]);  
    lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height[0]);  
 
-   lpt->buffer = screen->surface_buffer_create( screen, 
-                                                lpt->base.width[0], 
-                                                lpt->base.height[0],
-                                                lpt->base.format,
-                                                usage,
-                                                tex_usage,
-                                                &lpt->stride[0]);
+   lpt->dt = winsys->displaytarget_create(winsys,
+                                          lpt->base.format,
+                                          lpt->base.width[0],
+                                          lpt->base.height[0],
+                                          16,
+                                          &lpt->stride[0] );
 
-   return lpt->buffer != NULL;
+   return lpt->dt != NULL;
 }
 
 
@@ -119,16 +114,17 @@ llvmpipe_displaytarget_layout(struct pipe_screen *screen,
 
 
 static struct pipe_texture *
-llvmpipe_texture_create(struct pipe_screen *screen,
+llvmpipe_texture_create(struct pipe_screen *_screen,
                         const struct pipe_texture *templat)
 {
+   struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
    struct llvmpipe_texture *lpt = CALLOC_STRUCT(llvmpipe_texture);
    if (!lpt)
       return NULL;
 
    lpt->base = *templat;
    pipe_reference_init(&lpt->base.reference, 1);
-   lpt->base.screen = screen;
+   lpt->base.screen = &screen->base;
 
    /* XXX: The xlib state tracker is brain-dead and will request
     * PIPE_FORMAT_Z16_UNORM no matter how much we tell it we don't support it.
@@ -160,6 +156,8 @@ llvmpipe_texture_blanket(struct pipe_screen * screen,
                          const unsigned *stride,
                          struct pipe_buffer *buffer)
 {
+   /* FIXME */
+#if 0
    struct llvmpipe_texture *lpt;
    assert(screen);
 
@@ -184,15 +182,25 @@ llvmpipe_texture_blanket(struct pipe_screen * screen,
    pipe_buffer_reference(&lpt->buffer, buffer);
 
    return &lpt->base;
+#else
+   return NULL;
+#endif
 }
 
 
 static void
 llvmpipe_texture_destroy(struct pipe_texture *pt)
 {
+   struct llvmpipe_screen *screen = llvmpipe_screen(pt->screen);
    struct llvmpipe_texture *lpt = llvmpipe_texture(pt);
 
-   pipe_buffer_reference(&lpt->buffer, NULL);
+   if(lpt->dt) {
+      struct llvmpipe_winsys *winsys = screen->winsys;
+      winsys->displaytarget_destroy(winsys, lpt->dt);
+   }
+   else
+      align_free(lpt->data);
+
    FREE(lpt);
 }
 
@@ -333,27 +341,34 @@ llvmpipe_tex_transfer_destroy(struct pipe_transfer *transfer)
 
 
 static void *
-llvmpipe_transfer_map( struct pipe_screen *screen,
+llvmpipe_transfer_map( struct pipe_screen *_screen,
                        struct pipe_transfer *transfer )
 {
+   struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
    ubyte *map, *xfer_map;
    struct llvmpipe_texture *lpt;
-   unsigned flags = 0;
 
    assert(transfer->texture);
    lpt = llvmpipe_texture(transfer->texture);
 
-   if (transfer->usage != PIPE_TRANSFER_READ) {
-      flags |= PIPE_BUFFER_USAGE_CPU_WRITE;
-   }
+   if(lpt->dt) {
+      struct llvmpipe_winsys *winsys = screen->winsys;
+      unsigned flags = 0;
 
-   if (transfer->usage != PIPE_TRANSFER_WRITE) {
-      flags |= PIPE_BUFFER_USAGE_CPU_READ;
-   }
+      if (transfer->usage != PIPE_TRANSFER_READ) {
+         flags |= PIPE_BUFFER_USAGE_CPU_WRITE;
+      }
 
-   map = pipe_buffer_map(screen, lpt->buffer, flags);
-   if (map == NULL)
-      return NULL;
+      if (transfer->usage != PIPE_TRANSFER_WRITE) {
+         flags |= PIPE_BUFFER_USAGE_CPU_READ;
+      }
+
+      map = winsys->displaytarget_map(winsys, lpt->dt, flags);
+      if (map == NULL)
+         return NULL;
+   }
+   else
+      map = lpt->data;
 
    /* May want to different things here depending on read/write nature
     * of the map:
@@ -363,7 +378,7 @@ llvmpipe_transfer_map( struct pipe_screen *screen,
       /* Do something to notify sharing contexts of a texture change.
        * In llvmpipe, that would mean flushing the texture cache.
        */
-      llvmpipe_screen(screen)->timestamp++;
+      screen->timestamp++;
    }
    
    xfer_map = map + llvmpipe_transfer(transfer)->offset +
@@ -375,15 +390,19 @@ llvmpipe_transfer_map( struct pipe_screen *screen,
 
 
 static void
-llvmpipe_transfer_unmap(struct pipe_screen *screen,
+llvmpipe_transfer_unmap(struct pipe_screen *_screen,
                        struct pipe_transfer *transfer)
 {
+   struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
    struct llvmpipe_texture *lpt;
 
    assert(transfer->texture);
    lpt = llvmpipe_texture(transfer->texture);
 
-   pipe_buffer_unmap( screen, lpt->buffer );
+   if(lpt->dt) {
+      struct llvmpipe_winsys *winsys = screen->winsys;
+      winsys->displaytarget_unmap(winsys, lpt->dt);
+   }
 }
 
 
@@ -408,22 +427,3 @@ llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen)
    screen->transfer_map = llvmpipe_transfer_map;
    screen->transfer_unmap = llvmpipe_transfer_unmap;
 }
-
-
-boolean
-llvmpipe_get_texture_buffer( struct pipe_texture *texture,
-                             struct pipe_buffer **buf,
-                             unsigned *stride )
-{
-   struct llvmpipe_texture *tex = (struct llvmpipe_texture *)texture;
-
-   if (!tex)
-      return FALSE;
-
-   pipe_buffer_reference(buf, tex->buffer);
-
-   if (stride)
-      *stride = tex->stride[0];
-
-   return TRUE;
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h
index a1ed6b0ac2..00a20763e4 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.h
+++ b/src/gallium/drivers/llvmpipe/lp_texture.h
@@ -35,7 +35,7 @@
 struct pipe_context;
 struct pipe_screen;
 struct llvmpipe_context;
-
+struct llvmpipe_displaytarget;
 
 struct llvmpipe_texture
 {
@@ -44,9 +44,16 @@ struct llvmpipe_texture
    unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS];
    unsigned stride[PIPE_MAX_TEXTURE_LEVELS];
 
-   /* The data is held here:
+   /**
+    * Display target, for textures with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET
+    * usage.
+    */
+   struct llvmpipe_displaytarget *dt;
+
+   /**
+    * Malloc'ed data for regular textures, or a mapping to dt above.
     */
-   struct pipe_buffer *buffer;
+   void *data;
 
    unsigned timestamp;
 };
diff --git a/src/gallium/drivers/llvmpipe/lp_winsys.h b/src/gallium/drivers/llvmpipe/lp_winsys.h
index 268336b690..595481c2cb 100644
--- a/src/gallium/drivers/llvmpipe/lp_winsys.h
+++ b/src/gallium/drivers/llvmpipe/lp_winsys.h
@@ -1,6 +1,6 @@
 /**************************************************************************
  * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007-2009 VMware, Inc.
  * All Rights Reserved.
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,9 +25,9 @@
  * 
  **************************************************************************/
 
-/* This is the interface that llvmpipe requires any window system
- * hosting it to implement.  This is the only include file in llvmpipe
- * which is public.
+/**
+ * @file
+ * llvmpipe public interface.
  */
 
 
@@ -35,27 +35,90 @@
 #define LP_WINSYS_H
 
 
+#include "pipe/p_compiler.h" // for boolean
+#include "pipe/p_format.h"
+
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 
 struct pipe_screen;
-struct pipe_winsys;
 struct pipe_context;
 
 
-struct pipe_context *llvmpipe_create( struct pipe_screen * );
+/**
+ * Opaque pointer.
+ */
+struct llvmpipe_displaytarget;
 
 
-struct pipe_screen *
-llvmpipe_create_screen(struct pipe_winsys *);
+/**
+ * This is the interface that llvmpipe expects any window system
+ * hosting it to implement.
+ * 
+ * llvmpipe is for the most part a self sufficient driver. The only thing it
+ * does not know is how to display a surface.
+ */
+struct llvmpipe_winsys
+{
+   void 
+   (*destroy)( struct llvmpipe_winsys *ws );
+
+   boolean
+   (*is_displaytarget_format_supported)( struct llvmpipe_winsys *ws,
+                                         enum pipe_format format );
+   
+   /**
+    * Allocate storage for a render target.
+    * 
+    * Often surfaces which are meant to be blitted to the front screen (i.e.,
+    * display targets) must be allocated with special characteristics, memory 
+    * pools, or obtained directly from the windowing system.
+    *  
+    * This callback is invoked by the pipe_screen when creating a texture marked
+    * with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET flag to get the underlying 
+    * storage.
+    */
+   struct llvmpipe_displaytarget *
+   (*displaytarget_create)( struct llvmpipe_winsys *ws,
+                            enum pipe_format format,
+                            unsigned width, unsigned height,
+                            unsigned alignment,
+                            unsigned *stride );
+
+   void *
+   (*displaytarget_map)( struct llvmpipe_winsys *ws, 
+                         struct llvmpipe_displaytarget *dt,
+                         unsigned flags );
+
+   void
+   (*displaytarget_unmap)( struct llvmpipe_winsys *ws,
+                           struct llvmpipe_displaytarget *dt );
+
+   /**
+    * @sa pipe_screen:flush_frontbuffer.
+    *
+    * This call will likely become asynchronous eventually.
+    */
+   void
+   (*displaytarget_display)( struct llvmpipe_winsys *ws, 
+                             struct llvmpipe_displaytarget *dt,
+                             void *context_private );
+
+   void 
+   (*displaytarget_destroy)( struct llvmpipe_winsys *ws, 
+                             struct llvmpipe_displaytarget *dt );
+};
+
+
+struct pipe_context *
+llvmpipe_create( struct pipe_screen * );
 
 
-boolean
-llvmpipe_get_texture_buffer( struct pipe_texture *texture,
-                             struct pipe_buffer **buf,
-                             unsigned *stride );
+struct pipe_screen *
+llvmpipe_create_screen( struct llvmpipe_winsys * );
 
 
 #ifdef __cplusplus
-- 
cgit v1.2.3


From 836a9f0ae6e03d2f92dc024703015c25a5b3c353 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 1 Sep 2009 12:22:52 +0100
Subject: scons: Tool for LLVM. Gracefully disable llvmpipe if LLVM not found.

---
 scons/llvm.py                           | 81 +++++++++++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/SConscript |  8 ++--
 src/gallium/winsys/xlib/SConscript      | 12 ++---
 3 files changed, 92 insertions(+), 9 deletions(-)
 create mode 100644 scons/llvm.py

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/scons/llvm.py b/scons/llvm.py
new file mode 100644
index 0000000000..14306bc0fe
--- /dev/null
+++ b/scons/llvm.py
@@ -0,0 +1,81 @@
+"""llvm
+
+Tool-specific initialization for LLVM
+
+"""
+
+#
+# Copyright (c) 2009 VMware, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+
+import os
+import os.path
+import subprocess
+
+import SCons.Action
+import SCons.Errors
+import SCons.Util
+
+
+def generate(env):
+    try:
+        llvm_dir = os.environ['LLVM']
+    except KeyError:
+        # Do nothing -- use the system headers/libs
+        pass
+    else:
+        if not os.path.isdir(llvm_dir):
+            raise SCons.Errors.InternalError, "Specified LLVM directory not found"
+
+        if env['debug']:
+            llvm_subdir = 'Debug'
+        else:
+            llvm_subdir = 'Release'
+
+        llvm_bin_dir = os.path.join(llvm_dir, llvm_subdir, 'bin')
+        if not os.path.isdir(llvm_bin_dir):
+            raise SCons.Errors.InternalError, "LLVM build directory not found"
+
+        env.PrependENVPath('PATH', llvm_bin_dir)
+
+    if env.Detect('llvm-config'):
+        pipe = SCons.Action._subproc(env, 
+                                     ['llvm-config', '--version'],
+                                     stdin = 'devnull',
+                                     stderr = 'devnull',
+                                     stdout = subprocess.PIPE)
+        if pipe.wait() != 0:
+            return
+        line = pipe.stdout.read().strip()
+        if not line:
+            return
+        env['LLVM_VERSION'] = line
+
+        env.ParseConfig('llvm-config --cppflags')
+        env.ParseConfig('llvm-config --libs jit interpreter nativecodegen bitwriter')
+        env.ParseConfig('llvm-config --ldflags')
+        env['LINK'] = env['CXX']
+
+def exists(env):
+    return True
+
+# vim:set ts=4 sw=4 et:
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 6bceb84da4..5c29bdac56 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -2,8 +2,12 @@ Import('*')
 
 env = env.Clone()
 
+env.Tool('llvm')
+if 'LLVM_VERSION' not in env:
+    print 'warning: LLVM not found: not building llvmpipe'
+    Return()
+
 env.Tool('udis86')
-env.ParseConfig('llvm-config --cppflags')
 
 llvmpipe = env.ConvenienceLibrary(
 	target = 'llvmpipe',
@@ -57,8 +61,6 @@ llvmpipe = env.ConvenienceLibrary(
 
 env = env.Clone()
 
-env['LINK'] = env['CXX']
-env.ParseConfig('llvm-config --libs jit interpreter nativecodegen bitwriter')
 env.Prepend(LIBS = [llvmpipe] + auxiliaries)
 
 env.Program(
diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript
index 518fd2b5a8..467d595d33 100644
--- a/src/gallium/winsys/xlib/SConscript
+++ b/src/gallium/winsys/xlib/SConscript
@@ -30,12 +30,12 @@ if env['platform'] == 'linux' \
         drivers += [softpipe]
 
     if 'llvmpipe' in env['drivers']:
-        env.Append(CPPDEFINES = 'GALLIUM_LLVMPIPE')
-        env.Tool('udis86')
-        env.ParseConfig('llvm-config --libs jit interpreter nativecodegen')
-        env['LINK'] = env['CXX']
-        sources += ['xlib_llvmpipe.c']
-        drivers += [llvmpipe]
+        env.Tool('llvm')
+        if 'LLVM_VERSION' in env:
+            env.Append(CPPDEFINES = 'GALLIUM_LLVMPIPE')
+            env.Tool('udis86')
+            sources += ['xlib_llvmpipe.c']
+            drivers += [llvmpipe]
 
     if 'i965simple' in env['drivers']:
         env.Append(CPPDEFINES = 'GALLIUM_I965SIMPLE')
-- 
cgit v1.2.3


From 866fbacf2bf93282f622f1f455250491d0b3b63f Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 7 Sep 2009 14:24:31 +0100
Subject: llvmpipe: SoA pixel unpacking specialization.

---
 src/gallium/drivers/llvmpipe/Makefile            |   1 +
 src/gallium/drivers/llvmpipe/SConscript          |   1 +
 src/gallium/drivers/llvmpipe/lp_bld_format.h     |  62 +++++---
 src/gallium/drivers/llvmpipe/lp_bld_format_aos.c |  30 ++--
 src/gallium/drivers/llvmpipe/lp_bld_format_soa.c | 193 +++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_test_format.c    |   4 +-
 6 files changed, 252 insertions(+), 39 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_format_soa.c

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 6e63a0c2b7..54bb3a0e73 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -15,6 +15,7 @@ C_SOURCES = \
 	lp_bld_depth.c \
 	lp_bld_flow.c \
 	lp_bld_format_aos.c \
+	lp_bld_format_soa.c \
 	lp_bld_interp.c \
 	lp_bld_intr.c \
 	lp_bld_logic.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 5c29bdac56..3dcd5cb994 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -23,6 +23,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_depth.c',
 		'lp_bld_flow.c',
 		'lp_bld_format_aos.c',
+		'lp_bld_format_soa.c',
 		'lp_bld_interp.c',
 		'lp_bld_intr.c',
 		'lp_bld_struct.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h
index 01c8a752d1..5ee0656093 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h
@@ -31,20 +31,14 @@
 
 /**
  * @file
- * LLVM IR building helpers interfaces.
- *
- * We use LLVM-C bindings for now. They are not documented, but follow the C++
- * interfaces very closely, and appear to be complete enough for code
- * genration. See
- * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html
- * for a standalone example.
+ * Pixel format helpers.
  */
 
 #include <llvm-c/Core.h>  
- 
-#include "pipe/p_format.h"
 
+#include "pipe/p_format.h"
 
+struct util_format_description;
 union lp_type;
 
 
@@ -56,9 +50,9 @@ union lp_type;
  * @return RGBA in a 4 floats vector.
  */
 LLVMValueRef
-lp_build_unpack_rgba(LLVMBuilderRef builder,
-                     enum pipe_format format, 
-                     LLVMValueRef packed);
+lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
+                         enum pipe_format format,
+                         LLVMValueRef packed);
 
 
 /**
@@ -67,9 +61,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder,
  * @param rgba 4 float vector with the unpacked components.
  */
 LLVMValueRef
-lp_build_pack_rgba(LLVMBuilderRef builder,
-                   enum pipe_format format,
-                   LLVMValueRef rgba);
+lp_build_pack_rgba_aos(LLVMBuilderRef builder,
+                       enum pipe_format format,
+                       LLVMValueRef rgba);
 
 
 /**
@@ -81,9 +75,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder,
  * @return RGBA in a 4 floats vector.
  */
 LLVMValueRef
-lp_build_load_rgba(LLVMBuilderRef builder,
-                   enum pipe_format format, 
-                   LLVMValueRef ptr);
+lp_build_load_rgba_aos(LLVMBuilderRef builder,
+                       enum pipe_format format,
+                       LLVMValueRef ptr);
 
 
 /**
@@ -92,10 +86,34 @@ lp_build_load_rgba(LLVMBuilderRef builder,
  * @param rgba 4 float vector with the unpacked components.
  */
 void 
-lp_build_store_rgba(LLVMBuilderRef builder,
-                    enum pipe_format format,
-                    LLVMValueRef ptr,
-                    LLVMValueRef rgba);
+lp_build_store_rgba_aos(LLVMBuilderRef builder,
+                        enum pipe_format format,
+                        LLVMValueRef ptr,
+                        LLVMValueRef rgba);
 
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+                unsigned length,
+                unsigned src_width,
+                unsigned dst_width,
+                LLVMValueRef base_ptr,
+                LLVMValueRef offsets);
+
+
+void
+lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
+                         const struct util_format_description *format_desc,
+                         union lp_type type,
+                         LLVMValueRef packed,
+                         LLVMValueRef *rgba);
+
+
+void
+lp_build_load_rgba_soa(LLVMBuilderRef builder,
+                       const struct util_format_description *format_desc,
+                       union lp_type type,
+                       LLVMValueRef base_ptr,
+                       LLVMValueRef offsets,
+                       LLVMValueRef *rgba);
 
 #endif /* !LP_BLD_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
index dcbc0076c7..b9b5d84bed 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
@@ -32,9 +32,9 @@
 
 
 LLVMValueRef
-lp_build_unpack_rgba(LLVMBuilderRef builder,
-                     enum pipe_format format,
-                     LLVMValueRef packed)
+lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
+                         enum pipe_format format,
+                         LLVMValueRef packed)
 {
    const struct util_format_description *desc;
    LLVMTypeRef type;
@@ -152,9 +152,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder,
 
 
 LLVMValueRef
-lp_build_pack_rgba(LLVMBuilderRef builder,
-                   enum pipe_format format,
-                   LLVMValueRef rgba)
+lp_build_pack_rgba_aos(LLVMBuilderRef builder,
+                       enum pipe_format format,
+                       LLVMValueRef rgba)
 {
    const struct util_format_description *desc;
    LLVMTypeRef type;
@@ -250,9 +250,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder,
 
 
 LLVMValueRef
-lp_build_load_rgba(LLVMBuilderRef builder,
-                   enum pipe_format format,
-                   LLVMValueRef ptr)
+lp_build_load_rgba_aos(LLVMBuilderRef builder,
+                       enum pipe_format format,
+                       LLVMValueRef ptr)
 {
    const struct util_format_description *desc;
    LLVMTypeRef type;
@@ -272,15 +272,15 @@ lp_build_load_rgba(LLVMBuilderRef builder,
 
    packed = LLVMBuildLoad(builder, ptr, "");
 
-   return lp_build_unpack_rgba(builder, format, packed);
+   return lp_build_unpack_rgba_aos(builder, format, packed);
 }
 
 
 void
-lp_build_store_rgba(LLVMBuilderRef builder,
-                    enum pipe_format format,
-                    LLVMValueRef ptr,
-                    LLVMValueRef rgba)
+lp_build_store_rgba_aos(LLVMBuilderRef builder,
+                        enum pipe_format format,
+                        LLVMValueRef ptr,
+                        LLVMValueRef rgba)
 {
    const struct util_format_description *desc;
    LLVMTypeRef type;
@@ -294,7 +294,7 @@ lp_build_store_rgba(LLVMBuilderRef builder,
 
    type = LLVMIntType(desc->block.bits);
 
-   packed = lp_build_pack_rgba(builder, format, rgba);
+   packed = lp_build_pack_rgba_aos(builder, format, rgba);
 
    ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), "");
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
new file mode 100644
index 0000000000..36bac06d2e
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
@@ -0,0 +1,193 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_format.h"
+
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
+#include "lp_bld_format.h"
+
+
+/**
+ * Gather elements from scatter positions in memory into a single vector.
+ *
+ * @param src_width src element width
+ * @param dst_width result element width (source will be expanded to fit)
+ * @param length length of the offsets,
+ * @param base_ptr base pointer, should be a i8 pointer type.
+ * @param offsets vector with offsets
+ */
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+                unsigned length,
+                unsigned src_width,
+                unsigned dst_width,
+                LLVMValueRef base_ptr,
+                LLVMValueRef offsets)
+{
+   LLVMTypeRef src_type = LLVMIntType(src_width);
+   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
+   LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
+   LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
+   LLVMValueRef res;
+   unsigned i;
+
+   res = LLVMGetUndef(dst_vec_type);
+   for(i = 0; i < length; ++i) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+      LLVMValueRef elem_offset;
+      LLVMValueRef elem_ptr;
+      LLVMValueRef elem;
+
+      elem_offset = LLVMBuildExtractElement(builder, offsets, index, "");
+      elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, "");
+      elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, "");
+      elem = LLVMBuildLoad(builder, elem_ptr, "");
+
+      assert(src_width <= dst_width);
+      if(src_width > dst_width)
+         elem = LLVMBuildTrunc(builder, elem, dst_elem_type, "");
+      if(src_width < dst_width)
+         elem = LLVMBuildZExt(builder, elem, dst_elem_type, "");
+
+      res = LLVMBuildInsertElement(builder, res, elem, index, "");
+   }
+
+   return res;
+}
+
+
+void
+lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
+                         const struct util_format_description *format_desc,
+                         union lp_type type,
+                         LLVMValueRef packed,
+                         LLVMValueRef *rgba)
+{
+   LLVMValueRef inputs[4];
+   unsigned start;
+   unsigned chan;
+
+   /* FIXME: Support more formats */
+   assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+   assert(format_desc->block.width == 1);
+   assert(format_desc->block.height == 1);
+   assert(format_desc->block.bits <= 32);
+
+   /* Decode the input vector components */
+   start = 0;
+   for (chan = 0; chan < 4; ++chan) {
+      unsigned width = format_desc->channel[chan].size;
+      unsigned stop = start + width;
+      LLVMValueRef input;
+
+      input = packed;
+
+      switch(format_desc->channel[chan].type) {
+      case UTIL_FORMAT_TYPE_VOID:
+         input = NULL;
+         break;
+
+      case UTIL_FORMAT_TYPE_UNSIGNED:
+         if(type.floating) {
+            if(start)
+               input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(type, start), "");
+            if(stop < format_desc->block.bits) {
+               unsigned mask = ((unsigned long long)1 << width) - 1;
+               input = LLVMBuildAnd(builder, input, lp_build_int_const_scalar(type, mask), "");
+            }
+
+            if(format_desc->channel[chan].normalized)
+               input = lp_build_unsigned_norm_to_float(builder, width, type, input);
+            else
+               input = LLVMBuildFPToSI(builder, input, lp_build_vec_type(type), "");
+         }
+         else {
+            /* FIXME */
+            assert(0);
+            input = lp_build_undef(type);
+         }
+         break;
+
+      default:
+         /* fall through */
+         input = lp_build_undef(type);
+         break;
+      }
+
+      inputs[chan] = input;
+
+      start = stop;
+   }
+
+   for (chan = 0; chan < 4; ++chan) {
+      enum util_format_swizzle swizzle = format_desc->swizzle[chan];
+
+      switch (swizzle) {
+      case UTIL_FORMAT_SWIZZLE_X:
+      case UTIL_FORMAT_SWIZZLE_Y:
+      case UTIL_FORMAT_SWIZZLE_Z:
+      case UTIL_FORMAT_SWIZZLE_W:
+         rgba[chan] = inputs[swizzle];
+         break;
+      case UTIL_FORMAT_SWIZZLE_0:
+         rgba[chan] = lp_build_zero(type);
+         break;
+      case UTIL_FORMAT_SWIZZLE_1:
+         rgba[chan] = lp_build_one(type);
+         break;
+      case UTIL_FORMAT_SWIZZLE_NONE:
+         rgba[chan] = lp_build_undef(type);
+         break;
+      }
+   }
+}
+
+
+void
+lp_build_load_rgba_soa(LLVMBuilderRef builder,
+                       const struct util_format_description *format_desc,
+                       union lp_type type,
+                       LLVMValueRef base_ptr,
+                       LLVMValueRef offsets,
+                       LLVMValueRef *rgba)
+{
+   LLVMValueRef packed;
+
+   assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+   assert(format_desc->block.width == 1);
+   assert(format_desc->block.height == 1);
+   assert(format_desc->block.bits <= 32);
+
+   packed = lp_build_gather(builder,
+                            type.length, format_desc->block.bits, type.width,
+                            base_ptr, offsets);
+
+   lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index 1d192355ee..d8455e5649 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -119,7 +119,7 @@ add_load_rgba_test(LLVMModuleRef module,
 
    lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), &loop);
 
-   rgba = lp_build_load_rgba(builder, format, ptr);
+   rgba = lp_build_load_rgba_aos(builder, format, ptr);
    LLVMBuildStore(builder, rgba, rgba_ptr);
 
    lp_build_loop_end(builder, LLVMConstInt(LLVMInt32Type(), 4, 0), NULL, &loop);
@@ -160,7 +160,7 @@ add_store_rgba_test(LLVMModuleRef module,
 
    rgba = LLVMBuildLoad(builder, rgba_ptr, "");
 
-   lp_build_store_rgba(builder, format, ptr, rgba);
+   lp_build_store_rgba_aos(builder, format, ptr, rgba);
 
    LLVMBuildRetVoid(builder);
 
-- 
cgit v1.2.3


From de8376e2f22a59a0bc18bb7ddab88ee3153678b8 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 7 Sep 2009 14:43:51 +0100
Subject: llvmpipe: Texture sampling code generation primitives.

Only supports single level 2d textures, with neareast and bilinear
filtering for now.
---
 src/gallium/drivers/llvmpipe/Makefile            |   1 +
 src/gallium/drivers/llvmpipe/SConscript          |   1 +
 src/gallium/drivers/llvmpipe/lp_bld_sample.h     | 134 +++++++++
 src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 342 +++++++++++++++++++++++
 4 files changed, 478 insertions(+)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_sample.h
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 54bb3a0e73..c0033dea34 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -19,6 +19,7 @@ C_SOURCES = \
 	lp_bld_interp.c \
 	lp_bld_intr.c \
 	lp_bld_logic.c \
+	lp_bld_sample_soa.c \
 	lp_bld_swizzle.c \
 	lp_bld_struct.c \
 	lp_bld_tgsi_soa.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 3dcd5cb994..b74c9c4b6e 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -26,6 +26,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_format_soa.c',
 		'lp_bld_interp.c',
 		'lp_bld_intr.c',
+		'lp_bld_sample_soa.c',
 		'lp_bld_struct.c',
 		'lp_bld_logic.c',
 		'lp_bld_swizzle.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
new file mode 100644
index 0000000000..5798f191fe
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
@@ -0,0 +1,134 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Texture sampling.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#ifndef LP_BLD_SAMPLE_H
+#define LP_BLD_SAMPLE_H
+
+
+#include <llvm-c/Core.h>
+
+struct pipe_texture;
+struct pipe_sampler_state;
+union lp_type;
+
+
+/**
+ * Sampler static state.
+ *
+ * These are the bits of state from pipe_texture and pipe_sampler_state that
+ * are embedded in the generated code.
+ */
+struct lp_sampler_static_state
+{
+   /* pipe_texture's state */
+   enum pipe_format format;
+   unsigned pot_width:1;
+   unsigned pot_height:1;
+   unsigned pot_depth:1;
+
+   /* pipe_sampler_state's state */
+   unsigned wrap_s:3;
+   unsigned wrap_t:3;
+   unsigned wrap_r:3;
+   unsigned min_img_filter:2;
+   unsigned min_mip_filter:2;
+   unsigned mag_img_filter:2;
+   unsigned compare_mode:1;
+   unsigned compare_func:3;
+   unsigned normalized_coords:1;
+   unsigned prefilter:4;
+};
+
+
+/**
+ * Sampler dynamic state.
+ *
+ * These are the bits of state from pipe_texture and pipe_sampler_state that
+ * are computed in runtime.
+ *
+ * There are obtained through callbacks, as we don't want to tie the texture
+ * sampling code generation logic to any particular texture layout or pipe
+ * driver.
+ */
+struct lp_sampler_dynamic_state
+{
+
+   /** Obtain the base texture width. */
+   LLVMValueRef
+   (*width)( struct lp_sampler_dynamic_state *state,
+             LLVMBuilderRef builder,
+             unsigned unit);
+
+   /** Obtain the base texture height. */
+   LLVMValueRef
+   (*height)( struct lp_sampler_dynamic_state *state,
+              LLVMBuilderRef builder,
+              unsigned unit);
+
+   LLVMValueRef
+   (*stride)( struct lp_sampler_dynamic_state *state,
+              LLVMBuilderRef builder,
+              unsigned unit);
+
+   LLVMValueRef
+   (*data_ptr)( struct lp_sampler_dynamic_state *state,
+                LLVMBuilderRef builder,
+                unsigned unit);
+
+};
+
+
+/**
+ * Derive the sampler static state.
+ */
+void
+lp_sampler_static_state(struct lp_sampler_static_state *state,
+                        const struct pipe_texture *texture,
+                        const struct pipe_sampler_state *sampler);
+
+
+void
+lp_build_sample_soa(LLVMBuilderRef builder,
+                    const struct lp_sampler_static_state *static_state,
+                    struct lp_sampler_dynamic_state *dynamic_state,
+                    union lp_type fp_type,
+                    unsigned unit,
+                    unsigned num_coords,
+                    const LLVMValueRef *coords,
+                    LLVMValueRef lodbias,
+                    LLVMValueRef *texel);
+
+
+
+#endif /* LP_BLD_SAMPLE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
new file mode 100644
index 0000000000..25c8d84501
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -0,0 +1,342 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Texture sampling.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_format.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_format.h"
+#include "lp_bld_sample.h"
+
+
+void
+lp_sampler_static_state(struct lp_sampler_static_state *state,
+                        const struct pipe_texture *texture,
+                        const struct pipe_sampler_state *sampler)
+{
+   memset(state, 0, sizeof *state);
+
+   if(!texture)
+      return;
+
+   if(!sampler)
+      return;
+
+   state->format            = texture->format;
+   state->pot_width         = util_is_pot(texture->width[0]);
+   state->pot_height        = util_is_pot(texture->height[0]);
+   state->pot_depth         = util_is_pot(texture->depth[0]);
+
+   state->wrap_s            = sampler->wrap_s;
+   state->wrap_t            = sampler->wrap_t;
+   state->wrap_r            = sampler->wrap_r;
+   state->min_img_filter    = sampler->min_img_filter;
+   state->min_mip_filter    = sampler->min_mip_filter;
+   state->mag_img_filter    = sampler->mag_img_filter;
+   state->compare_mode      = sampler->compare_mode;
+   state->compare_func      = sampler->compare_func;
+   state->normalized_coords = sampler->normalized_coords;
+   state->prefilter         = sampler->prefilter;
+}
+
+
+
+/**
+ * Keep all information for sampling code generation in a single place.
+ */
+struct lp_build_sample_context
+{
+   LLVMBuilderRef builder;
+
+   const struct lp_sampler_static_state *static_state;
+
+   struct lp_sampler_dynamic_state *dynamic_state;
+
+   const struct util_format_description *format_desc;
+
+   /** Incoming coordinates type and build context */
+   union lp_type coord_type;
+   struct lp_build_context coord_bld;
+
+   /** Integer coordinates */
+   union lp_type int_coord_type;
+   struct lp_build_context int_coord_bld;
+
+   /** Output texels type and build context */
+   union lp_type texel_type;
+   struct lp_build_context texel_bld;
+};
+
+
+static void
+lp_build_sample_texel(struct lp_build_sample_context *bld,
+                      LLVMValueRef x,
+                      LLVMValueRef y,
+                      LLVMValueRef y_stride,
+                      LLVMValueRef data_ptr,
+                      LLVMValueRef *texel)
+{
+   LLVMValueRef x_stride;
+   LLVMValueRef x_offset;
+   LLVMValueRef y_offset;
+   LLVMValueRef offset;
+
+   x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8);
+
+   x_offset = lp_build_mul(&bld->int_coord_bld, x, x_stride);
+   y_offset = lp_build_mul(&bld->int_coord_bld, y, y_stride);
+
+   offset = lp_build_add(&bld->int_coord_bld, x_offset, y_offset);
+
+   lp_build_load_rgba_soa(bld->builder,
+                          bld->format_desc,
+                          bld->texel_type,
+                          data_ptr,
+                          offset,
+                          texel);
+}
+
+
+static LLVMValueRef
+lp_build_sample_wrap(struct lp_build_sample_context *bld,
+                     LLVMValueRef coord,
+                     LLVMValueRef length,
+                     boolean is_pot,
+                     unsigned wrap_mode)
+{
+   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+   LLVMValueRef length_minus_one;
+
+   length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
+
+   switch(wrap_mode) {
+   case PIPE_TEX_WRAP_REPEAT:
+      if(is_pot)
+         coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
+      else
+         /* Signed remainder won't give the right results for negative
+          * dividends but unsigned remainder does.*/
+         coord = LLVMBuildURem(bld->builder, coord, length, "");
+      break;
+
+   case PIPE_TEX_WRAP_CLAMP:
+      coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
+      coord = lp_build_min(int_coord_bld, coord, length_minus_one);
+      break;
+
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+   case PIPE_TEX_WRAP_MIRROR_REPEAT:
+   case PIPE_TEX_WRAP_MIRROR_CLAMP:
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+   default:
+      assert(0);
+   }
+
+   return coord;
+}
+
+
+static void
+lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
+                               LLVMValueRef s,
+                               LLVMValueRef t,
+                               LLVMValueRef width,
+                               LLVMValueRef height,
+                               LLVMValueRef stride,
+                               LLVMValueRef data_ptr,
+                               LLVMValueRef *texel)
+{
+   LLVMValueRef x;
+   LLVMValueRef y;
+
+   x = lp_build_ifloor(&bld->coord_bld, s);
+   y = lp_build_ifloor(&bld->coord_bld, t);
+
+   x = lp_build_sample_wrap(bld, x, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
+   y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+   lp_build_sample_texel(bld, x, y, stride, data_ptr, texel);
+}
+
+
+static void
+lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
+                              LLVMValueRef s,
+                              LLVMValueRef t,
+                              LLVMValueRef width,
+                              LLVMValueRef height,
+                              LLVMValueRef stride,
+                              LLVMValueRef data_ptr,
+                              LLVMValueRef *texel)
+{
+   LLVMValueRef half;
+   LLVMValueRef s_ipart;
+   LLVMValueRef t_ipart;
+   LLVMValueRef s_fpart;
+   LLVMValueRef t_fpart;
+   LLVMValueRef x0, x1;
+   LLVMValueRef y0, y1;
+   LLVMValueRef neighbors[2][2][4];
+   unsigned chan;
+
+   half = lp_build_const_scalar(bld->coord_type, 0.5);
+   s = lp_build_sub(&bld->coord_bld, s, half);
+   t = lp_build_sub(&bld->coord_bld, t, half);
+
+   s_ipart = lp_build_floor(&bld->coord_bld, s);
+   t_ipart = lp_build_floor(&bld->coord_bld, t);
+
+   s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart);
+   t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart);
+
+   x0 = lp_build_int(&bld->coord_bld, s_ipart);
+   y0 = lp_build_int(&bld->coord_bld, t_ipart);
+
+   x0 = lp_build_sample_wrap(bld, x0, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
+   y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+   x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
+   y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
+
+   x1 = lp_build_sample_wrap(bld, x1, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
+   y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+   lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]);
+   lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]);
+   lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]);
+   lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]);
+
+   /* TODO: Don't interpolate missing channels */
+   for(chan = 0; chan < 4; ++chan) {
+      switch(bld->format_desc->swizzle[chan]) {
+      case UTIL_FORMAT_SWIZZLE_X:
+      case UTIL_FORMAT_SWIZZLE_Y:
+      case UTIL_FORMAT_SWIZZLE_Z:
+      case UTIL_FORMAT_SWIZZLE_W:
+         texel[chan] = lp_build_lerp_2d(&bld->texel_bld,
+                                        s_fpart, t_fpart,
+                                        neighbors[0][0][chan],
+                                        neighbors[0][1][chan],
+                                        neighbors[1][0][chan],
+                                        neighbors[1][1][chan]);
+         break;
+      case UTIL_FORMAT_SWIZZLE_0:
+         texel[chan] = bld->texel_bld.zero;
+         break;
+      case UTIL_FORMAT_SWIZZLE_1:
+         texel[chan] = bld->texel_bld.one;
+         break;
+      default:
+         assert(0);
+         texel[chan] = bld->texel_bld.undef;
+         break;
+      }
+   }
+}
+
+
+void
+lp_build_sample_soa(LLVMBuilderRef builder,
+                    const struct lp_sampler_static_state *static_state,
+                    struct lp_sampler_dynamic_state *dynamic_state,
+                    union lp_type type,
+                    unsigned unit,
+                    unsigned num_coords,
+                    const LLVMValueRef *coords,
+                    LLVMValueRef lodbias,
+                    LLVMValueRef *texel)
+{
+   struct lp_build_sample_context bld;
+   LLVMValueRef width;
+   LLVMValueRef height;
+   LLVMValueRef stride;
+   LLVMValueRef data_ptr;
+   LLVMValueRef s;
+   LLVMValueRef t;
+   LLVMValueRef p;
+
+   /* Setup our build context */
+   memset(&bld, 0, sizeof bld);
+   bld.builder = builder;
+   bld.static_state = static_state;
+   bld.dynamic_state = dynamic_state;
+   bld.format_desc = util_format_description(static_state->format);
+   bld.coord_type = type;
+   bld.int_coord_type = lp_int_type(type);
+   bld.texel_type = type;
+   lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
+   lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
+   lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
+
+   /* Get the dynamic state */
+   width = dynamic_state->width(dynamic_state, builder, unit);
+   height = dynamic_state->height(dynamic_state, builder, unit);
+   stride = dynamic_state->stride(dynamic_state, builder, unit);
+   data_ptr = dynamic_state->data_ptr(dynamic_state, builder, unit);
+
+   s = coords[0];
+   t = coords[1];
+   p = coords[2];
+
+   width = lp_build_broadcast_scalar(&bld.int_coord_bld, width);
+   height = lp_build_broadcast_scalar(&bld.int_coord_bld, height);
+   stride = lp_build_broadcast_scalar(&bld.int_coord_bld, stride);
+
+   if(static_state->normalized_coords) {
+      LLVMTypeRef coord_vec_type = lp_build_vec_type(bld.coord_type);
+      LLVMValueRef fp_width = LLVMBuildSIToFP(builder, width, coord_vec_type, "");
+      LLVMValueRef fp_height = LLVMBuildSIToFP(builder, height, coord_vec_type, "");
+      s = lp_build_mul(&bld.coord_bld, s, fp_width);
+      t = lp_build_mul(&bld.coord_bld, t, fp_height);
+   }
+
+   switch (static_state->min_img_filter) {
+   case PIPE_TEX_FILTER_NEAREST:
+      lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel);
+      break;
+   case PIPE_TEX_FILTER_LINEAR:
+   case PIPE_TEX_FILTER_ANISO:
+      lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel);
+      break;
+   }
+}
-- 
cgit v1.2.3


From e4c76c02f77ed6e86537b546f4200f8f8132d114 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 7 Sep 2009 14:52:39 +0100
Subject: llvmpipe: Code generate the texture sampling inside the shader.

Finally a substantial performance improvement: framerates of apps using
texturing tripled, and furthermore, enabling/disabling texturing only
affects around 15% of the framerate, which means the bottleneck is now
somewhere else.

Generated texture sampling code is not complete though -- we always
sample from the base level -- so final figures will be different.
---
 src/gallium/drivers/llvmpipe/Makefile             |    3 +-
 src/gallium/drivers/llvmpipe/SConscript           |    3 +-
 src/gallium/drivers/llvmpipe/lp_jit.c             |   37 +-
 src/gallium/drivers/llvmpipe/lp_jit.h             |   27 +
 src/gallium/drivers/llvmpipe/lp_state.h           |    6 +-
 src/gallium/drivers/llvmpipe/lp_state_derived.c   |    4 +-
 src/gallium/drivers/llvmpipe/lp_state_fs.c        |   15 +-
 src/gallium/drivers/llvmpipe/lp_state_sampler.c   |   10 +
 src/gallium/drivers/llvmpipe/lp_tex_sample.c      | 1713 ---------------------
 src/gallium/drivers/llvmpipe/lp_tex_sample.h      |   11 +
 src/gallium/drivers/llvmpipe/lp_tex_sample_c.c    | 1713 +++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c |  196 +++
 12 files changed, 2019 insertions(+), 1719 deletions(-)
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_tex_sample.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index c0033dea34..06c586e6bb 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -46,7 +46,8 @@ C_SOURCES = \
 	lp_state_vs.c \
 	lp_surface.c \
 	lp_tex_cache.c \
-	lp_tex_sample.c \
+	lp_tex_sample_c.c \
+	lp_tex_sample_llvm.c \
 	lp_texture.c \
 	lp_tile_cache.c \
 	lp_tile_soa.c
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index b74c9c4b6e..ac1b5d6d1d 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -54,7 +54,8 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_state_vs.c',
 		'lp_surface.c',
 		'lp_tex_cache.c',
-		'lp_tex_sample.c',
+		'lp_tex_sample_c.c',
+		'lp_tex_sample_llvm.c',
 		'lp_texture.c',
 		'lp_tile_cache.c',
 		'lp_tile_soa.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index d288460a1b..9465f763d5 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -44,15 +44,47 @@
 static void
 lp_jit_init_globals(struct llvmpipe_screen *screen)
 {
-   /* struct lp_jit_context */
+   LLVMTypeRef texture_type;
+
+   /* struct lp_jit_texture */
    {
       LLVMTypeRef elem_types[4];
+
+      elem_types[LP_JIT_TEXTURE_WIDTH]  = LLVMInt32Type();
+      elem_types[LP_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
+      elem_types[LP_JIT_TEXTURE_STRIDE] = LLVMInt32Type();
+      elem_types[LP_JIT_TEXTURE_DATA]   = LLVMPointerType(LLVMInt8Type(), 0);
+
+      texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
+
+      LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width,
+                             screen->target, texture_type,
+                             LP_JIT_TEXTURE_WIDTH);
+      LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, height,
+                             screen->target, texture_type,
+                             LP_JIT_TEXTURE_HEIGHT);
+      LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, stride,
+                             screen->target, texture_type,
+                             LP_JIT_TEXTURE_STRIDE);
+      LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, data,
+                             screen->target, texture_type,
+                             LP_JIT_TEXTURE_DATA);
+      LP_CHECK_STRUCT_SIZE(struct lp_jit_texture,
+                           screen->target, texture_type);
+
+      LLVMAddTypeName(screen->module, "texture", texture_type);
+   }
+
+   /* struct lp_jit_context */
+   {
+      LLVMTypeRef elem_types[5];
       LLVMTypeRef context_type;
 
       elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */
       elem_types[1] = LLVMPointerType(LLVMInt8Type(), 0);  /* samplers */
       elem_types[2] = LLVMFloatType();                     /* alpha_ref_value */
       elem_types[3] = LLVMPointerType(LLVMInt8Type(), 0);  /* blend_color */
+      elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */
 
       context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
 
@@ -64,6 +96,9 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
                              screen->target, context_type, 2);
       LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color,
                              screen->target, context_type, 3);
+      LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures,
+                             screen->target, context_type,
+                             LP_JIT_CONTEXT_TEXTURES_INDEX);
       LP_CHECK_STRUCT_SIZE(struct lp_jit_context,
                            screen->target, context_type);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index a7fb60f9f5..c3e3e1af67 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -38,11 +38,31 @@
 
 #include "lp_bld_struct.h"
 
+#include "pipe/p_state.h"
+
 
 struct tgsi_sampler;
 struct llvmpipe_screen;
 
 
+struct lp_jit_texture
+{
+   uint32_t width;
+   uint32_t height;
+   uint32_t stride;
+   const void *data;
+};
+
+
+enum {
+   LP_JIT_TEXTURE_WIDTH = 0,
+   LP_JIT_TEXTURE_HEIGHT,
+   LP_JIT_TEXTURE_STRIDE,
+   LP_JIT_TEXTURE_DATA
+};
+
+
+
 /**
  * This structure is passed directly to the generated fragment shader.
  *
@@ -65,6 +85,8 @@ struct lp_jit_context
 
    /* TODO: blend constant color */
    uint8_t *blend_color;
+
+   struct lp_jit_texture textures[PIPE_MAX_SAMPLERS];
 };
 
 
@@ -80,6 +102,11 @@ struct lp_jit_context
 #define lp_jit_context_blend_color(_builder, _ptr) \
    lp_build_struct_get(_builder, _ptr, 3, "blend_color")
 
+#define LP_JIT_CONTEXT_TEXTURES_INDEX 4
+
+#define lp_jit_context_textures(_builder, _ptr) \
+   lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures")
+
 
 typedef void
 (*lp_jit_frag_func)(struct lp_jit_context *context,
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index fb10329887..0b846ecb13 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -36,6 +36,7 @@
 #include "pipe/p_state.h"
 #include "tgsi/tgsi_scan.h"
 #include "lp_jit.h"
+#include "lp_bld_sample.h" /* for struct lp_sampler_static_state */
 
 
 #define LP_NEW_VIEWPORT      0x1
@@ -57,7 +58,8 @@
 
 struct tgsi_sampler;
 struct vertex_info;
-
+struct pipe_context;
+struct llvmpipe_context;
 
 struct lp_fragment_shader;
 
@@ -67,6 +69,8 @@ struct lp_fragment_shader_variant_key
    struct pipe_depth_state depth;
    struct pipe_alpha_state alpha;
    struct pipe_blend_state blend;
+
+   struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS];
 };
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index 6fbb057937..e87976b9f3 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -250,7 +250,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
 
    if (llvmpipe->dirty & (LP_NEW_FS |
                           LP_NEW_BLEND |
-                          LP_NEW_DEPTH_STENCIL_ALPHA))
+                          LP_NEW_DEPTH_STENCIL_ALPHA |
+                          LP_NEW_SAMPLER |
+                          LP_NEW_TEXTURE))
       llvmpipe_update_fs( llvmpipe );
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 80f705c871..1a3e168245 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -486,7 +486,13 @@ generate_fragment(struct llvmpipe_context *lp,
                             a0_ptr, dadx_ptr, dady_ptr,
                             x0, y0, 2, 0);
 
+#if 0
+   /* C texture sampling */
    sampler = lp_c_sampler_soa_create(context_ptr);
+#else
+   /* code generated texture sampling */
+   sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr);
+#endif
 
    for(i = 0; i < num_fs; ++i) {
       LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
@@ -666,8 +672,11 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
  */
 static void
 make_variant_key(struct llvmpipe_context *lp,
+                 struct lp_fragment_shader *shader,
                  struct lp_fragment_shader_variant_key *key)
 {
+   unsigned i;
+
    memset(key, 0, sizeof *key);
 
    memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth);
@@ -678,6 +687,10 @@ make_variant_key(struct llvmpipe_context *lp,
    /* alpha.ref_value is passed in jit_context */
 
    memcpy(&key->blend, lp->blend, sizeof key->blend);
+
+   for(i = 0; i < PIPE_MAX_SAMPLERS; ++i)
+      if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i))
+         lp_sampler_static_state(&key->sampler[i], lp->texture[i], lp->sampler[i]);
 }
 
 
@@ -688,7 +701,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
    struct lp_fragment_shader_variant_key key;
    struct lp_fragment_shader_variant *variant;
 
-   make_variant_key(lp, &key);
+   make_variant_key(lp, shader, &key);
 
    variant = shader->variants;
    while(variant) {
diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index 4fef541b1e..c69d90c723 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -98,6 +98,16 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe,
 
       pipe_texture_reference(&llvmpipe->texture[i], tex);
       lp_tex_tile_cache_set_texture(llvmpipe->tex_cache[i], tex);
+
+      if(tex) {
+         struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex);
+         struct lp_jit_texture *jit_tex = &llvmpipe->jit_context.textures[i];
+         jit_tex->width = tex->width[0];
+         jit_tex->height = tex->height[0];
+         jit_tex->stride = lp_tex->stride[0];
+         if(!lp_tex->dt)
+            jit_tex->data = lp_tex->data;
+      }
    }
 
    llvmpipe->num_textures = num;
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample.c
deleted file mode 100644
index 9a876f404d..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c
+++ /dev/null
@@ -1,1713 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * Copyright 2008 VMware, Inc.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * Texture sampling
- *
- * Authors:
- *   Brian Paul
- */
-
-#include "lp_context.h"
-#include "lp_quad.h"
-#include "lp_surface.h"
-#include "lp_texture.h"
-#include "lp_tex_sample.h"
-#include "lp_tex_cache.h"
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_shader_tokens.h"
-#include "util/u_math.h"
-#include "util/u_memory.h"
-
-
-
-/*
- * Note, the FRAC macro has to work perfectly.  Otherwise you'll sometimes
- * see 1-pixel bands of improperly weighted linear-filtered textures.
- * The tests/texwrap.c demo is a good test.
- * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0.
- * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x).
- */
-#define FRAC(f)  ((f) - util_ifloor(f))
-
-
-/**
- * Linear interpolation macro
- */
-static INLINE float
-lerp(float a, float v0, float v1)
-{
-   return v0 + a * (v1 - v0);
-}
-
-
-/**
- * Do 2D/biliner interpolation of float values.
- * v00, v10, v01 and v11 are typically four texture samples in a square/box.
- * a and b are the horizontal and vertical interpolants.
- * It's important that this function is inlined when compiled with
- * optimization!  If we find that's not true on some systems, convert
- * to a macro.
- */
-static INLINE float
-lerp_2d(float a, float b,
-        float v00, float v10, float v01, float v11)
-{
-   const float temp0 = lerp(a, v00, v10);
-   const float temp1 = lerp(a, v01, v11);
-   return lerp(b, temp0, temp1);
-}
-
-
-/**
- * As above, but 3D interpolation of 8 values.
- */
-static INLINE float
-lerp_3d(float a, float b, float c,
-        float v000, float v100, float v010, float v110,
-        float v001, float v101, float v011, float v111)
-{
-   const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
-   const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
-   return lerp(c, temp0, temp1);
-}
-
-
-
-/**
- * If A is a signed integer, A % B doesn't give the right value for A < 0
- * (in terms of texture repeat).  Just casting to unsigned fixes that.
- */
-#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B))
-
-
-/**
- * Apply texture coord wrapping mode and return integer texture indexes
- * for a vector of four texcoords (S or T or P).
- * \param wrapMode  PIPE_TEX_WRAP_x
- * \param s  the incoming texcoords
- * \param size  the texture image size
- * \param icoord  returns the integer texcoords
- * \return  integer texture index
- */
-static INLINE void
-nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
-                   int icoord[4])
-{
-   uint ch;
-   switch (wrapMode) {
-   case PIPE_TEX_WRAP_REPEAT:
-      /* s limited to [0,1) */
-      /* i limited to [0,size-1] */
-      for (ch = 0; ch < 4; ch++) {
-         int i = util_ifloor(s[ch] * size);
-         icoord[ch] = REMAINDER(i, size);
-      }
-      return;
-   case PIPE_TEX_WRAP_CLAMP:
-      /* s limited to [0,1] */
-      /* i limited to [0,size-1] */
-      for (ch = 0; ch < 4; ch++) {
-         if (s[ch] <= 0.0F)
-            icoord[ch] = 0;
-         else if (s[ch] >= 1.0F)
-            icoord[ch] = size - 1;
-         else
-            icoord[ch] = util_ifloor(s[ch] * size);
-      }
-      return;
-   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
-      {
-         /* s limited to [min,max] */
-         /* i limited to [0, size-1] */
-         const float min = 1.0F / (2.0F * size);
-         const float max = 1.0F - min;
-         for (ch = 0; ch < 4; ch++) {
-            if (s[ch] < min)
-               icoord[ch] = 0;
-            else if (s[ch] > max)
-               icoord[ch] = size - 1;
-            else
-               icoord[ch] = util_ifloor(s[ch] * size);
-         }
-      }
-      return;
-   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
-      {
-         /* s limited to [min,max] */
-         /* i limited to [-1, size] */
-         const float min = -1.0F / (2.0F * size);
-         const float max = 1.0F - min;
-         for (ch = 0; ch < 4; ch++) {
-            if (s[ch] <= min)
-               icoord[ch] = -1;
-            else if (s[ch] >= max)
-               icoord[ch] = size;
-            else
-               icoord[ch] = util_ifloor(s[ch] * size);
-         }
-      }
-      return;
-   case PIPE_TEX_WRAP_MIRROR_REPEAT:
-      {
-         const float min = 1.0F / (2.0F * size);
-         const float max = 1.0F - min;
-         for (ch = 0; ch < 4; ch++) {
-            const int flr = util_ifloor(s[ch]);
-            float u;
-            if (flr & 1)
-               u = 1.0F - (s[ch] - (float) flr);
-            else
-               u = s[ch] - (float) flr;
-            if (u < min)
-               icoord[ch] = 0;
-            else if (u > max)
-               icoord[ch] = size - 1;
-            else
-               icoord[ch] = util_ifloor(u * size);
-         }
-      }
-      return;
-   case PIPE_TEX_WRAP_MIRROR_CLAMP:
-      for (ch = 0; ch < 4; ch++) {
-         /* s limited to [0,1] */
-         /* i limited to [0,size-1] */
-         const float u = fabsf(s[ch]);
-         if (u <= 0.0F)
-            icoord[ch] = 0;
-         else if (u >= 1.0F)
-            icoord[ch] = size - 1;
-         else
-            icoord[ch] = util_ifloor(u * size);
-      }
-      return;
-   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
-      {
-         /* s limited to [min,max] */
-         /* i limited to [0, size-1] */
-         const float min = 1.0F / (2.0F * size);
-         const float max = 1.0F - min;
-         for (ch = 0; ch < 4; ch++) {
-            const float u = fabsf(s[ch]);
-            if (u < min)
-               icoord[ch] = 0;
-            else if (u > max)
-               icoord[ch] = size - 1;
-            else
-               icoord[ch] = util_ifloor(u * size);
-         }
-      }
-      return;
-   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
-      {
-         /* s limited to [min,max] */
-         /* i limited to [0, size-1] */
-         const float min = -1.0F / (2.0F * size);
-         const float max = 1.0F - min;
-         for (ch = 0; ch < 4; ch++) {
-            const float u = fabsf(s[ch]);
-            if (u < min)
-               icoord[ch] = -1;
-            else if (u > max)
-               icoord[ch] = size;
-            else
-               icoord[ch] = util_ifloor(u * size);
-         }
-      }
-      return;
-   default:
-      assert(0);
-   }
-}
-
-
-/**
- * Used to compute texel locations for linear sampling for four texcoords.
- * \param wrapMode  PIPE_TEX_WRAP_x
- * \param s  the texcoords
- * \param size  the texture image size
- * \param icoord0  returns first texture indexes
- * \param icoord1  returns second texture indexes (usually icoord0 + 1)
- * \param w  returns blend factor/weight between texture indexes
- * \param icoord  returns the computed integer texture coords
- */
-static INLINE void
-linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
-                  int icoord0[4], int icoord1[4], float w[4])
-{
-   uint ch;
-
-   switch (wrapMode) {
-   case PIPE_TEX_WRAP_REPEAT:
-      for (ch = 0; ch < 4; ch++) {
-         float u = s[ch] * size - 0.5F;
-         icoord0[ch] = REMAINDER(util_ifloor(u), size);
-         icoord1[ch] = REMAINDER(icoord0[ch] + 1, size);
-         w[ch] = FRAC(u);
-      }
-      break;;
-   case PIPE_TEX_WRAP_CLAMP:
-      for (ch = 0; ch < 4; ch++) {
-         float u = CLAMP(s[ch], 0.0F, 1.0F);
-         u = u * size - 0.5f;
-         icoord0[ch] = util_ifloor(u);
-         icoord1[ch] = icoord0[ch] + 1;
-         w[ch] = FRAC(u);
-      }
-      break;;
-   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
-      for (ch = 0; ch < 4; ch++) {
-         float u = CLAMP(s[ch], 0.0F, 1.0F);
-         u = u * size - 0.5f;
-         icoord0[ch] = util_ifloor(u);
-         icoord1[ch] = icoord0[ch] + 1;
-         if (icoord0[ch] < 0)
-            icoord0[ch] = 0;
-         if (icoord1[ch] >= (int) size)
-            icoord1[ch] = size - 1;
-         w[ch] = FRAC(u);
-      }
-      break;;
-   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
-      {
-         const float min = -1.0F / (2.0F * size);
-         const float max = 1.0F - min;
-         for (ch = 0; ch < 4; ch++) {
-            float u = CLAMP(s[ch], min, max);
-            u = u * size - 0.5f;
-            icoord0[ch] = util_ifloor(u);
-            icoord1[ch] = icoord0[ch] + 1;
-            w[ch] = FRAC(u);
-         }
-      }
-      break;;
-   case PIPE_TEX_WRAP_MIRROR_REPEAT:
-      for (ch = 0; ch < 4; ch++) {
-         const int flr = util_ifloor(s[ch]);
-         float u;
-         if (flr & 1)
-            u = 1.0F - (s[ch] - (float) flr);
-         else
-            u = s[ch] - (float) flr;
-         u = u * size - 0.5F;
-         icoord0[ch] = util_ifloor(u);
-         icoord1[ch] = icoord0[ch] + 1;
-         if (icoord0[ch] < 0)
-            icoord0[ch] = 0;
-         if (icoord1[ch] >= (int) size)
-            icoord1[ch] = size - 1;
-         w[ch] = FRAC(u);
-      }
-      break;;
-   case PIPE_TEX_WRAP_MIRROR_CLAMP:
-      for (ch = 0; ch < 4; ch++) {
-         float u = fabsf(s[ch]);
-         if (u >= 1.0F)
-            u = (float) size;
-         else
-            u *= size;
-         u -= 0.5F;
-         icoord0[ch] = util_ifloor(u);
-         icoord1[ch] = icoord0[ch] + 1;
-         w[ch] = FRAC(u);
-      }
-      break;;
-   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
-      for (ch = 0; ch < 4; ch++) {
-         float u = fabsf(s[ch]);
-         if (u >= 1.0F)
-            u = (float) size;
-         else
-            u *= size;
-         u -= 0.5F;
-         icoord0[ch] = util_ifloor(u);
-         icoord1[ch] = icoord0[ch] + 1;
-         if (icoord0[ch] < 0)
-            icoord0[ch] = 0;
-         if (icoord1[ch] >= (int) size)
-            icoord1[ch] = size - 1;
-         w[ch] = FRAC(u);
-      }
-      break;;
-   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
-      {
-         const float min = -1.0F / (2.0F * size);
-         const float max = 1.0F - min;
-         for (ch = 0; ch < 4; ch++) {
-            float u = fabsf(s[ch]);
-            if (u <= min)
-               u = min * size;
-            else if (u >= max)
-               u = max * size;
-            else
-               u *= size;
-            u -= 0.5F;
-            icoord0[ch] = util_ifloor(u);
-            icoord1[ch] = icoord0[ch] + 1;
-            w[ch] = FRAC(u);
-         }
-      }
-      break;;
-   default:
-      assert(0);
-   }
-}
-
-
-/**
- * For RECT textures / unnormalized texcoords
- * Only a subset of wrap modes supported.
- */
-static INLINE void
-nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
-                          int icoord[4])
-{
-   uint ch;
-   switch (wrapMode) {
-   case PIPE_TEX_WRAP_CLAMP:
-      for (ch = 0; ch < 4; ch++) {
-         int i = util_ifloor(s[ch]);
-         icoord[ch]= CLAMP(i, 0, (int) size-1);
-      }
-      return;
-   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
-      /* fall-through */
-   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
-      for (ch = 0; ch < 4; ch++) {
-         icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) );
-      }
-      return;
-   default:
-      assert(0);
-   }
-}
-
-
-/**
- * For RECT textures / unnormalized texcoords.
- * Only a subset of wrap modes supported.
- */
-static INLINE void
-linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
-                         int icoord0[4], int icoord1[4], float w[4])
-{
-   uint ch;
-   switch (wrapMode) {
-   case PIPE_TEX_WRAP_CLAMP:
-      for (ch = 0; ch < 4; ch++) {
-         /* Not exactly what the spec says, but it matches NVIDIA output */
-         float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f);
-         icoord0[ch] = util_ifloor(u);
-         icoord1[ch] = icoord0[ch] + 1;
-         w[ch] = FRAC(u);
-      }
-      return;
-   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
-      /* fall-through */
-   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
-      for (ch = 0; ch < 4; ch++) {
-         float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F);
-         u -= 0.5F;
-         icoord0[ch] = util_ifloor(u);
-         icoord1[ch] = icoord0[ch] + 1;
-         if (icoord1[ch] > (int) size - 1)
-            icoord1[ch] = size - 1;
-         w[ch] = FRAC(u);
-      }
-      break;
-   default:
-      assert(0);
-   }
-}
-
-
-static unsigned
-choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
-{
-   /*
-      major axis
-      direction     target                             sc     tc    ma
-      ----------    -------------------------------    ---    ---   ---
-       +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
-       -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
-       +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
-       -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
-       +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
-       -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
-   */
-   const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
-   unsigned face;
-   float sc, tc, ma;
-
-   if (arx > ary && arx > arz) {
-      if (rx >= 0.0F) {
-         face = PIPE_TEX_FACE_POS_X;
-         sc = -rz;
-         tc = -ry;
-         ma = arx;
-      }
-      else {
-         face = PIPE_TEX_FACE_NEG_X;
-         sc = rz;
-         tc = -ry;
-         ma = arx;
-      }
-   }
-   else if (ary > arx && ary > arz) {
-      if (ry >= 0.0F) {
-         face = PIPE_TEX_FACE_POS_Y;
-         sc = rx;
-         tc = rz;
-         ma = ary;
-      }
-      else {
-         face = PIPE_TEX_FACE_NEG_Y;
-         sc = rx;
-         tc = -rz;
-         ma = ary;
-      }
-   }
-   else {
-      if (rz > 0.0F) {
-         face = PIPE_TEX_FACE_POS_Z;
-         sc = rx;
-         tc = -ry;
-         ma = arz;
-      }
-      else {
-         face = PIPE_TEX_FACE_NEG_Z;
-         sc = -rx;
-         tc = -ry;
-         ma = arz;
-      }
-   }
-
-   *newS = ( sc / ma + 1.0F ) * 0.5F;
-   *newT = ( tc / ma + 1.0F ) * 0.5F;
-
-   return face;
-}
-
-
-/**
- * Examine the quad's texture coordinates to compute the partial
- * derivatives w.r.t X and Y, then compute lambda (level of detail).
- *
- * This is only done for fragment shaders, not vertex shaders.
- */
-static float
-compute_lambda(struct tgsi_sampler *tgsi_sampler,
-               const float s[QUAD_SIZE],
-               const float t[QUAD_SIZE],
-               const float p[QUAD_SIZE],
-               float lodbias)
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   const struct pipe_sampler_state *sampler = samp->sampler;
-   float rho, lambda;
-
-   if (samp->processor == TGSI_PROCESSOR_VERTEX)
-      return lodbias;
-
-   assert(sampler->normalized_coords);
-
-   assert(s);
-   {
-      float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT];
-      float dsdy = s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT];
-      dsdx = fabsf(dsdx);
-      dsdy = fabsf(dsdy);
-      rho = MAX2(dsdx, dsdy) * texture->width[0];
-   }
-   if (t) {
-      float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT];
-      float dtdy = t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT];
-      float max;
-      dtdx = fabsf(dtdx);
-      dtdy = fabsf(dtdy);
-      max = MAX2(dtdx, dtdy) * texture->height[0];
-      rho = MAX2(rho, max);
-   }
-   if (p) {
-      float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT];
-      float dpdy = p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT];
-      float max;
-      dpdx = fabsf(dpdx);
-      dpdy = fabsf(dpdy);
-      max = MAX2(dpdx, dpdy) * texture->depth[0];
-      rho = MAX2(rho, max);
-   }
-
-   lambda = util_fast_log2(rho);
-   lambda += lodbias + sampler->lod_bias;
-   lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
-
-   return lambda;
-}
-
-
-/**
- * Do several things here:
- * 1. Compute lambda from the texcoords, if needed
- * 2. Determine if we're minifying or magnifying
- * 3. If minifying, choose mipmap levels
- * 4. Return image filter to use within mipmap images
- * \param level0  Returns first mipmap level to sample from
- * \param level1  Returns second mipmap level to sample from
- * \param levelBlend  Returns blend factor between levels, in [0,1]
- * \param imgFilter  Returns either the min or mag filter, depending on lambda
- */
-static void
-choose_mipmap_levels(struct tgsi_sampler *tgsi_sampler,
-                     const float s[QUAD_SIZE],
-                     const float t[QUAD_SIZE],
-                     const float p[QUAD_SIZE],
-                     float lodbias,
-                     unsigned *level0, unsigned *level1, float *levelBlend,
-                     unsigned *imgFilter)
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   const struct pipe_sampler_state *sampler = samp->sampler;
-
-   if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
-      /* no mipmap selection needed */
-      *level0 = *level1 = CLAMP((int) sampler->min_lod,
-                                0, (int) texture->last_level);
-
-      if (sampler->min_img_filter != sampler->mag_img_filter) {
-         /* non-mipmapped texture, but still need to determine if doing
-          * minification or magnification.
-          */
-         float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias);
-         if (lambda <= 0.0) {
-            *imgFilter = sampler->mag_img_filter;
-         }
-         else {
-            *imgFilter = sampler->min_img_filter;
-         }
-      }
-      else {
-         *imgFilter = sampler->mag_img_filter;
-      }
-   }
-   else {
-      float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias);
-
-      if (lambda <= 0.0) { /* XXX threshold depends on the filter */
-         /* magnifying */
-         *imgFilter = sampler->mag_img_filter;
-         *level0 = *level1 = 0;
-      }
-      else {
-         /* minifying */
-         *imgFilter = sampler->min_img_filter;
-
-         /* choose mipmap level(s) and compute the blend factor between them */
-         if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
-            /* Nearest mipmap level */
-            const int lvl = (int) (lambda + 0.5);
-            *level0 =
-            *level1 = CLAMP(lvl, 0, (int) texture->last_level);
-         }
-         else {
-            /* Linear interpolation between mipmap levels */
-            const int lvl = (int) lambda;
-            *level0 = CLAMP(lvl,     0, (int) texture->last_level);
-            *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level);
-            *levelBlend = FRAC(lambda);  /* blending weight between levels */
-         }
-      }
-   }
-}
-
-
-/**
- * Get a texel from a texture, using the texture tile cache.
- *
- * \param face  the cube face in 0..5
- * \param level  the mipmap level
- * \param x  the x coord of texel within 2D image
- * \param y  the y coord of texel within 2D image
- * \param z  which slice of a 3D texture
- * \param rgba  the quad to put the texel/color into
- * \param j  which element of the rgba quad to write to
- *
- * XXX maybe move this into lp_tile_cache.c and merge with the
- * lp_get_cached_tile_tex() function.  Also, get 4 texels instead of 1...
- */
-static void
-get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler,
-                  unsigned face, unsigned level, int x, int y, 
-                  const uint8_t *out[4])
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-
-   const struct llvmpipe_cached_tex_tile *tile
-      = lp_get_cached_tex_tile(samp->cache,
-                               tex_tile_address(x, y, 0, face, level));
-
-   y %= TEX_TILE_SIZE;
-   x %= TEX_TILE_SIZE;
-      
-   out[0] = &tile->color[y  ][x  ][0];
-   out[1] = &tile->color[y  ][x+1][0];
-   out[2] = &tile->color[y+1][x  ][0];
-   out[3] = &tile->color[y+1][x+1][0];
-}
-
-static INLINE const uint8_t *
-get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler,
-                 unsigned face, unsigned level, int x, int y)
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-
-   const struct llvmpipe_cached_tex_tile *tile
-      = lp_get_cached_tex_tile(samp->cache,
-                               tex_tile_address(x, y, 0, face, level));
-
-   y %= TEX_TILE_SIZE;
-   x %= TEX_TILE_SIZE;
-
-   return &tile->color[y][x][0];
-}
-
-
-static void
-get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler,
-                     unsigned face, unsigned level, 
-                     int x0, int y0, 
-                     int x1, int y1,
-                     const uint8_t *out[4])
-{
-   unsigned i;
-
-   for (i = 0; i < 4; i++) {
-      unsigned tx = (i & 1) ? x1 : x0;
-      unsigned ty = (i >> 1) ? y1 : y0;
-
-      out[i] = get_texel_2d_ptr( tgsi_sampler, face, level, tx, ty );
-   }
-}
-
-static void
-get_texel(const struct tgsi_sampler *tgsi_sampler,
-                 unsigned face, unsigned level, int x, int y, int z,
-                 float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j)
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   const struct pipe_sampler_state *sampler = samp->sampler;
-
-   if (x < 0 || x >= (int) texture->width[level] ||
-       y < 0 || y >= (int) texture->height[level] ||
-       z < 0 || z >= (int) texture->depth[level]) {
-      rgba[0][j] = sampler->border_color[0];
-      rgba[1][j] = sampler->border_color[1];
-      rgba[2][j] = sampler->border_color[2];
-      rgba[3][j] = sampler->border_color[3];
-   }
-   else {
-      const unsigned tx = x % TEX_TILE_SIZE;
-      const unsigned ty = y % TEX_TILE_SIZE;
-      const struct llvmpipe_cached_tex_tile *tile;
-
-      tile = lp_get_cached_tex_tile(samp->cache,
-                                    tex_tile_address(x, y, z, face, level));
-
-      rgba[0][j] = ubyte_to_float(tile->color[ty][tx][0]);
-      rgba[1][j] = ubyte_to_float(tile->color[ty][tx][1]);
-      rgba[2][j] = ubyte_to_float(tile->color[ty][tx][2]);
-      rgba[3][j] = ubyte_to_float(tile->color[ty][tx][3]);
-      if (0)
-      {
-         debug_printf("Get texel %f %f %f %f from %s\n",
-                      rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j],
-                      pf_name(texture->format));
-      }
-   }
-}
-
-
-/**
- * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
- * When we sampled the depth texture, the depth value was put into all
- * RGBA channels.  We look at the red channel here.
- * \param rgba  quad of (depth) texel values
- * \param p  texture 'P' components for four pixels in quad
- * \param j  which pixel in the quad to test [0..3]
- */
-static INLINE void
-shadow_compare(const struct pipe_sampler_state *sampler,
-               float rgba[NUM_CHANNELS][QUAD_SIZE],
-               const float p[QUAD_SIZE],
-               uint j)
-{
-   int k;
-   switch (sampler->compare_func) {
-   case PIPE_FUNC_LESS:
-      k = p[j] < rgba[0][j];
-      break;
-   case PIPE_FUNC_LEQUAL:
-      k = p[j] <= rgba[0][j];
-      break;
-   case PIPE_FUNC_GREATER:
-      k = p[j] > rgba[0][j];
-      break;
-   case PIPE_FUNC_GEQUAL:
-      k = p[j] >= rgba[0][j];
-      break;
-   case PIPE_FUNC_EQUAL:
-      k = p[j] == rgba[0][j];
-      break;
-   case PIPE_FUNC_NOTEQUAL:
-      k = p[j] != rgba[0][j];
-      break;
-   case PIPE_FUNC_ALWAYS:
-      k = 1;
-      break;
-   case PIPE_FUNC_NEVER:
-      k = 0;
-      break;
-   default:
-      k = 0;
-      assert(0);
-      break;
-   }
-
-   /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
-   rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k;
-   rgba[3][j] = 1.0F;
-}
-
-
-/**
- * As above, but do four z/texture comparisons.
- */
-static INLINE void
-shadow_compare4(const struct pipe_sampler_state *sampler,
-                float rgba[NUM_CHANNELS][QUAD_SIZE],
-                const float p[QUAD_SIZE])
-{
-   int j, k0, k1, k2, k3;
-   float val;
-
-   /* compare four texcoords vs. four texture samples */
-   switch (sampler->compare_func) {
-   case PIPE_FUNC_LESS:
-      k0 = p[0] < rgba[0][0];
-      k1 = p[1] < rgba[0][1];
-      k2 = p[2] < rgba[0][2];
-      k3 = p[3] < rgba[0][3];
-      break;
-   case PIPE_FUNC_LEQUAL:
-      k0 = p[0] <= rgba[0][0];
-      k1 = p[1] <= rgba[0][1];
-      k2 = p[2] <= rgba[0][2];
-      k3 = p[3] <= rgba[0][3];
-      break;
-   case PIPE_FUNC_GREATER:
-      k0 = p[0] > rgba[0][0];
-      k1 = p[1] > rgba[0][1];
-      k2 = p[2] > rgba[0][2];
-      k3 = p[3] > rgba[0][3];
-      break;
-   case PIPE_FUNC_GEQUAL:
-      k0 = p[0] >= rgba[0][0];
-      k1 = p[1] >= rgba[0][1];
-      k2 = p[2] >= rgba[0][2];
-      k3 = p[3] >= rgba[0][3];
-      break;
-   case PIPE_FUNC_EQUAL:
-      k0 = p[0] == rgba[0][0];
-      k1 = p[1] == rgba[0][1];
-      k2 = p[2] == rgba[0][2];
-      k3 = p[3] == rgba[0][3];
-      break;
-   case PIPE_FUNC_NOTEQUAL:
-      k0 = p[0] != rgba[0][0];
-      k1 = p[1] != rgba[0][1];
-      k2 = p[2] != rgba[0][2];
-      k3 = p[3] != rgba[0][3];
-      break;
-   case PIPE_FUNC_ALWAYS:
-      k0 = k1 = k2 = k3 = 1;
-      break;
-   case PIPE_FUNC_NEVER:
-      k0 = k1 = k2 = k3 = 0;
-      break;
-   default:
-      k0 = k1 = k2 = k3 = 0;
-      assert(0);
-      break;
-   }
-
-   /* convert four pass/fail values to an intensity in [0,1] */
-   val = 0.25F * (k0 + k1 + k2 + k3);
-
-   /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
-   for (j = 0; j < 4; j++) {
-      rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
-      rgba[3][j] = 1.0F;
-   }
-}
-
-
-
-static void
-lp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
-                                    const float s[QUAD_SIZE],
-                                    const float t[QUAD_SIZE],
-                                    const float p[QUAD_SIZE],
-                                    float lodbias,
-                                    float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   unsigned  j;
-   unsigned level = samp->level;
-   unsigned xpot = 1 << (samp->xpot - level);
-   unsigned ypot = 1 << (samp->ypot - level);
-   unsigned xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
-   unsigned ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
-      
-   for (j = 0; j < QUAD_SIZE; j++) {
-      int c;
-
-      float u = s[j] * xpot - 0.5F;
-      float v = t[j] * ypot - 0.5F;
-
-      int uflr = util_ifloor(u);
-      int vflr = util_ifloor(v);
-
-      float xw = u - (float)uflr;
-      float yw = v - (float)vflr;
-
-      int x0 = uflr & (xpot - 1);
-      int y0 = vflr & (ypot - 1);
-
-      const uint8_t *tx[4];
-      
-
-      /* Can we fetch all four at once:
-       */
-      if (x0 < xmax && y0 < ymax)
-      {
-         get_texel_quad_2d(tgsi_sampler, 0, level, x0, y0, tx);
-      }
-      else 
-      {
-         unsigned x1 = (x0 + 1) & (xpot - 1);
-         unsigned y1 = (y0 + 1) & (ypot - 1);
-         get_texel_quad_2d_mt(tgsi_sampler, 0, level, 
-                              x0, y0, x1, y1, tx);
-      }
-
-
-      /* interpolate R, G, B, A */
-      for (c = 0; c < 4; c++) {
-         rgba[c][j] = lerp_2d(xw, yw, 
-                              ubyte_to_float(tx[0][c]), ubyte_to_float(tx[1][c]),
-                              ubyte_to_float(tx[2][c]), ubyte_to_float(tx[3][c]));
-      }
-   }
-}
-
-
-static void
-lp_get_samples_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
-                                     const float s[QUAD_SIZE],
-                                     const float t[QUAD_SIZE],
-                                     const float p[QUAD_SIZE],
-                                     float lodbias,
-                                     float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   unsigned  j;
-   unsigned level = samp->level;
-   unsigned xpot = 1 << (samp->xpot - level);
-   unsigned ypot = 1 << (samp->ypot - level);
-
-   for (j = 0; j < QUAD_SIZE; j++) {
-      int c;
-
-      float u = s[j] * xpot;
-      float v = t[j] * ypot;
-
-      int uflr = util_ifloor(u);
-      int vflr = util_ifloor(v);
-
-      int x0 = uflr & (xpot - 1);
-      int y0 = vflr & (ypot - 1);
-
-      const uint8_t *out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0);
-
-      for (c = 0; c < 4; c++) {
-         rgba[c][j] = ubyte_to_float(out[c]);
-      }
-   }
-}
-
-
-static void
-lp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
-                                     const float s[QUAD_SIZE],
-                                     const float t[QUAD_SIZE],
-                                     const float p[QUAD_SIZE],
-                                     float lodbias,
-                                     float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   unsigned  j;
-   unsigned level = samp->level;
-   unsigned xpot = 1 << (samp->xpot - level);
-   unsigned ypot = 1 << (samp->ypot - level);
-
-   for (j = 0; j < QUAD_SIZE; j++) {
-      int c;
-
-      float u = s[j] * xpot;
-      float v = t[j] * ypot;
-
-      int x0, y0;
-      const uint8_t *out;
-
-      x0 = util_ifloor(u);
-      if (x0 < 0) 
-         x0 = 0;
-      else if (x0 > xpot - 1)
-         x0 = xpot - 1;
-
-      y0 = util_ifloor(v);
-      if (y0 < 0) 
-         y0 = 0;
-      else if (y0 > ypot - 1)
-         y0 = ypot - 1;
-      
-      out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0);
-
-      for (c = 0; c < 4; c++) {
-         rgba[c][j] = ubyte_to_float(out[c]);
-      }
-   }
-}
-
-
-static void
-lp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
-                                               const float s[QUAD_SIZE],
-                                               const float t[QUAD_SIZE],
-                                               const float p[QUAD_SIZE],
-                                               float lodbias,
-                                               float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   int level0;
-   float lambda;
-
-   lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias);
-   level0 = (int)lambda;
-
-   if (lambda < 0.0) { 
-      samp->level = 0;
-      lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
-                                           s, t, p, 0, rgba );
-   }
-   else if (level0 >= texture->last_level) {
-      samp->level = texture->last_level;
-      lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
-                                           s, t, p, 0, rgba );
-   }
-   else {
-      float levelBlend = lambda - level0;
-      float rgba0[4][4];
-      float rgba1[4][4];
-      int c,j;
-
-      samp->level = level0;
-      lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
-                                           s, t, p, 0, rgba0 );
-
-      samp->level = level0+1;
-      lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
-                                           s, t, p, 0, rgba1 );
-
-      for (j = 0; j < QUAD_SIZE; j++) {
-         for (c = 0; c < 4; c++) {
-            rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
-         }
-      }
-   }
-}
-
-/**
- * Common code for sampling 1D/2D/cube textures.
- * Could probably extend for 3D...
- */
-static void
-lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler,
-                         const float s[QUAD_SIZE],
-                         const float t[QUAD_SIZE],
-                         const float p[QUAD_SIZE],
-                         float lodbias,
-                         float rgba[NUM_CHANNELS][QUAD_SIZE],
-                         const unsigned faces[4])
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   const struct pipe_sampler_state *sampler = samp->sampler;
-   unsigned level0, level1, j, imgFilter;
-   int width, height;
-   float levelBlend;
-
-   choose_mipmap_levels(tgsi_sampler, s, t, p, 
-                        lodbias,
-                        &level0, &level1, &levelBlend, &imgFilter);
-
-   assert(sampler->normalized_coords);
-
-   width = texture->width[level0];
-   height = texture->height[level0];
-
-   assert(width > 0);
-
-   switch (imgFilter) {
-   case PIPE_TEX_FILTER_NEAREST:
-      {
-         int x[4], y[4];
-         nearest_texcoord_4(sampler->wrap_s, s, width, x);
-         nearest_texcoord_4(sampler->wrap_t, t, height, y);
-
-         for (j = 0; j < QUAD_SIZE; j++) {
-            get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j);
-            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
-               shadow_compare(sampler, rgba, p, j);
-            }
-
-            if (level0 != level1) {
-               /* get texels from second mipmap level and blend */
-               float rgba2[4][4];
-               unsigned c;
-               x[j] /= 2;
-               y[j] /= 2;
-               get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0,
-                         rgba2, j);
-               if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
-                  shadow_compare(sampler, rgba2, p, j);
-               }
-
-               for (c = 0; c < NUM_CHANNELS; c++) {
-                  rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
-               }
-            }
-         }
-      }
-      break;
-   case PIPE_TEX_FILTER_LINEAR:
-   case PIPE_TEX_FILTER_ANISO:
-      {
-         int x0[4], y0[4], x1[4], y1[4];
-         float xw[4], yw[4]; /* weights */
-
-         linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw);
-         linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
-
-         for (j = 0; j < QUAD_SIZE; j++) {
-            float tx[4][4]; /* texels */
-            int c;
-            get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0);
-            get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1);
-            get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2);
-            get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3);
-            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
-               shadow_compare4(sampler, tx, p);
-            }
-
-            /* interpolate R, G, B, A */
-            for (c = 0; c < 4; c++) {
-               rgba[c][j] = lerp_2d(xw[j], yw[j],
-                                    tx[c][0], tx[c][1],
-                                    tx[c][2], tx[c][3]);
-            }
-
-            if (level0 != level1) {
-               /* get texels from second mipmap level and blend */
-               float rgba2[4][4];
-
-               /* XXX: This is incorrect -- will often end up with (x0
-                *  == x1 && y0 == y1), meaning that we fetch the same
-                *  texel four times and linearly interpolate between
-                *  identical values.  The correct approach would be to
-                *  call linear_texcoord again for the second level.
-                */
-               x0[j] /= 2;
-               y0[j] /= 2;
-               x1[j] /= 2;
-               y1[j] /= 2;
-               get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0);
-               get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1);
-               get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2);
-               get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3);
-               if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
-                  shadow_compare4(sampler, tx, p);
-               }
-
-               /* interpolate R, G, B, A */
-               for (c = 0; c < 4; c++) {
-                  rgba2[c][j] = lerp_2d(xw[j], yw[j],
-                                        tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
-               }
-
-               for (c = 0; c < NUM_CHANNELS; c++) {
-                  rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
-               }
-            }
-         }
-      }
-      break;
-   default:
-      assert(0);
-   }
-}
-
-
-static INLINE void
-lp_get_samples_1d(struct tgsi_sampler *sampler,
-                  const float s[QUAD_SIZE],
-                  const float t[QUAD_SIZE],
-                  const float p[QUAD_SIZE],
-                  float lodbias,
-                  float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   static const unsigned faces[4] = {0, 0, 0, 0};
-   static const float tzero[4] = {0, 0, 0, 0};
-   lp_get_samples_2d_common(sampler, s, tzero, NULL,
-                            lodbias, rgba, faces);
-}
-
-
-static INLINE void
-lp_get_samples_2d(struct tgsi_sampler *sampler,
-                  const float s[QUAD_SIZE],
-                  const float t[QUAD_SIZE],
-                  const float p[QUAD_SIZE],
-                  float lodbias,
-                  float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   static const unsigned faces[4] = {0, 0, 0, 0};
-   lp_get_samples_2d_common(sampler, s, t, p,
-                            lodbias, rgba, faces);
-}
-
-
-static INLINE void
-lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler,
-                  const float s[QUAD_SIZE],
-                  const float t[QUAD_SIZE],
-                  const float p[QUAD_SIZE],
-                  float lodbias,
-                  float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   const struct pipe_sampler_state *sampler = samp->sampler;
-   /* get/map pipe_surfaces corresponding to 3D tex slices */
-   unsigned level0, level1, j, imgFilter;
-   int width, height, depth;
-   float levelBlend;
-   const uint face = 0;
-
-   choose_mipmap_levels(tgsi_sampler, s, t, p, 
-                        lodbias,
-                        &level0, &level1, &levelBlend, &imgFilter);
-
-   assert(sampler->normalized_coords);
-
-   width = texture->width[level0];
-   height = texture->height[level0];
-   depth = texture->depth[level0];
-
-   assert(width > 0);
-   assert(height > 0);
-   assert(depth > 0);
-
-   switch (imgFilter) {
-   case PIPE_TEX_FILTER_NEAREST:
-      {
-         int x[4], y[4], z[4];
-         nearest_texcoord_4(sampler->wrap_s, s, width, x);
-         nearest_texcoord_4(sampler->wrap_t, t, height, y);
-         nearest_texcoord_4(sampler->wrap_r, p, depth, z);
-         for (j = 0; j < QUAD_SIZE; j++) {
-            get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j);
-            if (level0 != level1) {
-               /* get texels from second mipmap level and blend */
-               float rgba2[4][4];
-               unsigned c;
-               x[j] /= 2;
-               y[j] /= 2;
-               z[j] /= 2;
-               get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j);
-               for (c = 0; c < NUM_CHANNELS; c++) {
-                  rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]);
-               }
-            }
-         }
-      }
-      break;
-   case PIPE_TEX_FILTER_LINEAR:
-   case PIPE_TEX_FILTER_ANISO:
-      {
-         int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
-         float xw[4], yw[4], zw[4]; /* interpolation weights */
-         linear_texcoord_4(sampler->wrap_s, s, width,  x0, x1, xw);
-         linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
-         linear_texcoord_4(sampler->wrap_r, p, depth,  z0, z1, zw);
-
-         for (j = 0; j < QUAD_SIZE; j++) {
-            int c;
-            float tx0[4][4], tx1[4][4];
-            get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0);
-            get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1);
-            get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2);
-            get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3);
-            get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0);
-            get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1);
-            get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2);
-            get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3);
-
-            /* interpolate R, G, B, A */
-            for (c = 0; c < 4; c++) {
-               rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j],
-                                    tx0[c][0], tx0[c][1],
-                                    tx0[c][2], tx0[c][3],
-                                    tx1[c][0], tx1[c][1],
-                                    tx1[c][2], tx1[c][3]);
-            }
-
-            if (level0 != level1) {
-               /* get texels from second mipmap level and blend */
-               float rgba2[4][4];
-               x0[j] /= 2;
-               y0[j] /= 2;
-               z0[j] /= 2;
-               x1[j] /= 2;
-               y1[j] /= 2;
-               z1[j] /= 2;
-               get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0);
-               get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1);
-               get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2);
-               get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3);
-               get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0);
-               get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1);
-               get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2);
-               get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3);
-
-               /* interpolate R, G, B, A */
-               for (c = 0; c < 4; c++) {
-                  rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j],
-                                        tx0[c][0], tx0[c][1],
-                                        tx0[c][2], tx0[c][3],
-                                        tx1[c][0], tx1[c][1],
-                                        tx1[c][2], tx1[c][3]);
-               }
-
-               /* blend mipmap levels */
-               for (c = 0; c < NUM_CHANNELS; c++) {
-                  rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
-               }
-            }
-         }
-      }
-      break;
-   default:
-      assert(0);
-   }
-}
-
-
-static void
-lp_get_samples_cube(struct tgsi_sampler *sampler,
-                    const float s[QUAD_SIZE],
-                    const float t[QUAD_SIZE],
-                    const float p[QUAD_SIZE],
-                    float lodbias,
-                    float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   unsigned faces[QUAD_SIZE], j;
-   float ssss[4], tttt[4];
-   for (j = 0; j < QUAD_SIZE; j++) {
-      faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j);
-   }
-   lp_get_samples_2d_common(sampler, ssss, tttt, NULL,
-                            lodbias, rgba, faces);
-}
-
-
-static void
-lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler,
-                    const float s[QUAD_SIZE],
-                    const float t[QUAD_SIZE],
-                    const float p[QUAD_SIZE],
-                    float lodbias,
-                    float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   const struct pipe_sampler_state *sampler = samp->sampler;
-   const uint face = 0;
-   unsigned level0, level1, j, imgFilter;
-   int width, height;
-   float levelBlend;
-
-   choose_mipmap_levels(tgsi_sampler, s, t, p, 
-                        lodbias,
-                        &level0, &level1, &levelBlend, &imgFilter);
-
-   /* texture RECTS cannot be mipmapped */
-   assert(level0 == level1);
-
-   width = texture->width[level0];
-   height = texture->height[level0];
-
-   assert(width > 0);
-
-   switch (imgFilter) {
-   case PIPE_TEX_FILTER_NEAREST:
-      {
-         int x[4], y[4];
-         nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x);
-         nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y);
-         for (j = 0; j < QUAD_SIZE; j++) {
-            get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j);
-            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
-               shadow_compare(sampler, rgba, p, j);
-            }
-         }
-      }
-      break;
-   case PIPE_TEX_FILTER_LINEAR:
-   case PIPE_TEX_FILTER_ANISO:
-      {
-         int x0[4], y0[4], x1[4], y1[4];
-         float xw[4], yw[4]; /* weights */
-         linear_texcoord_unnorm_4(sampler->wrap_s, s, width,  x0, x1, xw);
-         linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw);
-         for (j = 0; j < QUAD_SIZE; j++) {
-            float tx[4][4]; /* texels */
-            int c;
-            get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0);
-            get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1);
-            get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2);
-            get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3);
-            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
-               shadow_compare4(sampler, tx, p);
-            }
-            for (c = 0; c < 4; c++) {
-               rgba[c][j] = lerp_2d(xw[j], yw[j],
-                                    tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
-            }
-         }
-      }
-      break;
-   default:
-      assert(0);
-   }
-}
-
-
-/**
- * Error condition handler
- */
-static INLINE void
-lp_get_samples_null(struct tgsi_sampler *tgsi_sampler,
-                    const float s[QUAD_SIZE],
-                    const float t[QUAD_SIZE],
-                    const float p[QUAD_SIZE],
-                    float lodbias,
-                    float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   int i,j;
-
-   for (i = 0; i < 4; i++)
-      for (j = 0; j < 4; j++)
-         rgba[i][j] = 1.0;
-}
-
-/**
- * Called via tgsi_sampler::get_samples() when using a sampler for the
- * first time.  Determine the actual sampler function, link it in and
- * call it.
- */
-void
-lp_get_samples(struct tgsi_sampler *tgsi_sampler,
-               const float s[QUAD_SIZE],
-               const float t[QUAD_SIZE],
-               const float p[QUAD_SIZE],
-               float lodbias,
-               float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   const struct pipe_sampler_state *sampler = samp->sampler;
-
-   /* Default to the 'undefined' case:
-    */
-   tgsi_sampler->get_samples = lp_get_samples_null;
-
-   if (!texture) {
-      assert(0);                /* is this legal?? */
-      goto out;
-   }
-
-   if (!sampler->normalized_coords) {
-      assert (texture->target == PIPE_TEXTURE_2D);
-      tgsi_sampler->get_samples = lp_get_samples_rect;
-      goto out;
-   }
-
-   switch (texture->target) {
-   case PIPE_TEXTURE_1D:
-      tgsi_sampler->get_samples = lp_get_samples_1d;
-      break;
-   case PIPE_TEXTURE_2D:
-      tgsi_sampler->get_samples = lp_get_samples_2d;
-      break;
-   case PIPE_TEXTURE_3D:
-      tgsi_sampler->get_samples = lp_get_samples_3d;
-      break;
-   case PIPE_TEXTURE_CUBE:
-      tgsi_sampler->get_samples = lp_get_samples_cube;
-      break;
-   default:
-      assert(0);
-      break;
-   }
-
-   /* Do this elsewhere: 
-    */
-   samp->xpot = util_unsigned_logbase2( samp->texture->width[0] );
-   samp->ypot = util_unsigned_logbase2( samp->texture->height[0] );
-
-   /* Try to hook in a faster sampler.  Ultimately we'll have to
-    * code-generate these.  Luckily most of this looks like it is
-    * orthogonal state within the sampler.
-    */
-   if (texture->target == PIPE_TEXTURE_2D &&
-       sampler->min_img_filter == sampler->mag_img_filter &&
-       sampler->wrap_s == sampler->wrap_t &&
-       sampler->compare_mode == FALSE &&
-       sampler->normalized_coords) 
-   {
-      if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
-         samp->level = CLAMP((int) sampler->min_lod,
-                             0, (int) texture->last_level);
-
-         if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) {
-            switch (sampler->min_img_filter) {
-            case PIPE_TEX_FILTER_NEAREST:
-               tgsi_sampler->get_samples = lp_get_samples_2d_nearest_repeat_POT;
-               break;
-            case PIPE_TEX_FILTER_LINEAR:
-               tgsi_sampler->get_samples = lp_get_samples_2d_linear_repeat_POT;
-               break;
-            default:
-               break;
-            }
-         } 
-         else if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP) {
-            switch (sampler->min_img_filter) {
-            case PIPE_TEX_FILTER_NEAREST:
-               tgsi_sampler->get_samples = lp_get_samples_2d_nearest_clamp_POT;
-               break;
-            default:
-               break;
-            }
-         }
-      }
-      else if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
-         if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) {
-            switch (sampler->min_img_filter) {
-            case PIPE_TEX_FILTER_LINEAR:
-               tgsi_sampler->get_samples = lp_get_samples_2d_linear_mip_linear_repeat_POT;
-               break;
-            default:
-               break;
-            }
-         } 
-      }
-   }
-   else if (0) {
-      _debug_printf("target %d/%d min_mip %d/%d min_img %d/%d wrap %d/%d compare %d/%d norm %d/%d\n",
-                    texture->target, PIPE_TEXTURE_2D,
-                    sampler->min_mip_filter, PIPE_TEX_MIPFILTER_NONE,
-                    sampler->min_img_filter, sampler->mag_img_filter,
-                    sampler->wrap_s, sampler->wrap_t,
-                    sampler->compare_mode, FALSE,
-                    sampler->normalized_coords, TRUE);
-   }
-
-out:
-   tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba );
-}
-
-
-void PIPE_CDECL
-lp_fetch_texel_soa( struct tgsi_sampler **samplers,
-                    uint32_t unit,
-                    float *store )
-{
-   struct tgsi_sampler *sampler = samplers[unit];
-
-#if 0
-   uint j;
-
-   debug_printf("%s sampler: %p (%p) store: %p\n",
-                __FUNCTION__,
-                sampler, *sampler,
-                store );
-
-   debug_printf("lodbias %f\n", store[12]);
-
-   for (j = 0; j < 4; j++)
-      debug_printf("sample %d texcoord %f %f\n",
-                   j,
-                   store[0+j],
-                   store[4+j]);
-#endif
-
-   {
-      float rgba[NUM_CHANNELS][QUAD_SIZE];
-      sampler->get_samples(sampler,
-                           &store[0],
-                           &store[4],
-                           &store[8],
-                           0.0f, /*store[12],  lodbias */
-                           rgba);
-      memcpy(store, rgba, sizeof rgba);
-   }
-
-#if 0
-   for (j = 0; j < 4; j++)
-      debug_printf("sample %d result %f %f %f %f\n",
-                   j,
-                   store[0+j],
-                   store[4+j],
-                   store[8+j],
-                   store[12+j]);
-#endif
-}
-
-
-#include "lp_bld_type.h"
-#include "lp_bld_intr.h"
-#include "lp_bld_tgsi.h"
-
-
-struct lp_c_sampler_soa
-{
-   struct lp_build_sampler_soa base;
-
-   LLVMValueRef context_ptr;
-
-   LLVMValueRef samplers_ptr;
-
-   /** Coords/texels store */
-   LLVMValueRef store_ptr;
-};
-
-
-static void
-lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
-{
-   FREE(sampler);
-}
-
-
-static void
-lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler,
-                                  LLVMBuilderRef builder,
-                                  union lp_type type,
-                                  unsigned unit,
-                                  unsigned num_coords,
-                                  const LLVMValueRef *coords,
-                                  LLVMValueRef lodbias,
-                                  LLVMValueRef *texel)
-{
-   struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler;
-   LLVMTypeRef vec_type = LLVMTypeOf(coords[0]);
-   LLVMValueRef args[3];
-   unsigned i;
-
-   if(!sampler->samplers_ptr)
-      sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr);
-
-   if(!sampler->store_ptr)
-      sampler->store_ptr = LLVMBuildArrayAlloca(builder,
-                                            vec_type,
-                                            LLVMConstInt(LLVMInt32Type(), 4, 0),
-                                            "texel_store");
-
-   for (i = 0; i < num_coords; i++) {
-      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
-      LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, "");
-      LLVMBuildStore(builder, coords[i], coord_ptr);
-   }
-
-   args[0] = sampler->samplers_ptr;
-   args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
-   args[2] = sampler->store_ptr;
-
-   lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3);
-
-   for (i = 0; i < NUM_CHANNELS; ++i) {
-      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
-      LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, "");
-      texel[i] = LLVMBuildLoad(builder, texel_ptr, "");
-   }
-}
-
-
-struct lp_build_sampler_soa *
-lp_c_sampler_soa_create(LLVMValueRef context_ptr)
-{
-   struct lp_c_sampler_soa *sampler;
-
-   sampler = CALLOC_STRUCT(lp_c_sampler_soa);
-   if(!sampler)
-      return NULL;
-
-   sampler->base.destroy = lp_c_sampler_soa_destroy;
-   sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel;
-   sampler->context_ptr = context_ptr;
-
-   return &sampler->base;
-}
-
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
index 7d1e565885..9ad1bde956 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
@@ -35,6 +35,7 @@
 
 
 struct llvmpipe_tex_tile_cache;
+struct lp_sampler_static_state;
 
 
 /**
@@ -87,4 +88,14 @@ struct lp_build_sampler_soa *
 lp_c_sampler_soa_create(LLVMValueRef context_ptr);
 
 
+/**
+ * Pure-LLVM texture sampling code generator.
+ *
+ * @param context_ptr LLVM value with the pointer to the struct lp_jit_context.
+ */
+struct lp_build_sampler_soa *
+lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *key,
+                           LLVMValueRef context_ptr);
+
+
 #endif /* LP_TEX_SAMPLE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
new file mode 100644
index 0000000000..9a876f404d
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
@@ -0,0 +1,1713 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * Copyright 2008 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/**
+ * Texture sampling
+ *
+ * Authors:
+ *   Brian Paul
+ */
+
+#include "lp_context.h"
+#include "lp_quad.h"
+#include "lp_surface.h"
+#include "lp_texture.h"
+#include "lp_tex_sample.h"
+#include "lp_tex_cache.h"
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+
+
+/*
+ * Note, the FRAC macro has to work perfectly.  Otherwise you'll sometimes
+ * see 1-pixel bands of improperly weighted linear-filtered textures.
+ * The tests/texwrap.c demo is a good test.
+ * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0.
+ * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x).
+ */
+#define FRAC(f)  ((f) - util_ifloor(f))
+
+
+/**
+ * Linear interpolation macro
+ */
+static INLINE float
+lerp(float a, float v0, float v1)
+{
+   return v0 + a * (v1 - v0);
+}
+
+
+/**
+ * Do 2D/biliner interpolation of float values.
+ * v00, v10, v01 and v11 are typically four texture samples in a square/box.
+ * a and b are the horizontal and vertical interpolants.
+ * It's important that this function is inlined when compiled with
+ * optimization!  If we find that's not true on some systems, convert
+ * to a macro.
+ */
+static INLINE float
+lerp_2d(float a, float b,
+        float v00, float v10, float v01, float v11)
+{
+   const float temp0 = lerp(a, v00, v10);
+   const float temp1 = lerp(a, v01, v11);
+   return lerp(b, temp0, temp1);
+}
+
+
+/**
+ * As above, but 3D interpolation of 8 values.
+ */
+static INLINE float
+lerp_3d(float a, float b, float c,
+        float v000, float v100, float v010, float v110,
+        float v001, float v101, float v011, float v111)
+{
+   const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
+   const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
+   return lerp(c, temp0, temp1);
+}
+
+
+
+/**
+ * If A is a signed integer, A % B doesn't give the right value for A < 0
+ * (in terms of texture repeat).  Just casting to unsigned fixes that.
+ */
+#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B))
+
+
+/**
+ * Apply texture coord wrapping mode and return integer texture indexes
+ * for a vector of four texcoords (S or T or P).
+ * \param wrapMode  PIPE_TEX_WRAP_x
+ * \param s  the incoming texcoords
+ * \param size  the texture image size
+ * \param icoord  returns the integer texcoords
+ * \return  integer texture index
+ */
+static INLINE void
+nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
+                   int icoord[4])
+{
+   uint ch;
+   switch (wrapMode) {
+   case PIPE_TEX_WRAP_REPEAT:
+      /* s limited to [0,1) */
+      /* i limited to [0,size-1] */
+      for (ch = 0; ch < 4; ch++) {
+         int i = util_ifloor(s[ch] * size);
+         icoord[ch] = REMAINDER(i, size);
+      }
+      return;
+   case PIPE_TEX_WRAP_CLAMP:
+      /* s limited to [0,1] */
+      /* i limited to [0,size-1] */
+      for (ch = 0; ch < 4; ch++) {
+         if (s[ch] <= 0.0F)
+            icoord[ch] = 0;
+         else if (s[ch] >= 1.0F)
+            icoord[ch] = size - 1;
+         else
+            icoord[ch] = util_ifloor(s[ch] * size);
+      }
+      return;
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+      {
+         /* s limited to [min,max] */
+         /* i limited to [0, size-1] */
+         const float min = 1.0F / (2.0F * size);
+         const float max = 1.0F - min;
+         for (ch = 0; ch < 4; ch++) {
+            if (s[ch] < min)
+               icoord[ch] = 0;
+            else if (s[ch] > max)
+               icoord[ch] = size - 1;
+            else
+               icoord[ch] = util_ifloor(s[ch] * size);
+         }
+      }
+      return;
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+      {
+         /* s limited to [min,max] */
+         /* i limited to [-1, size] */
+         const float min = -1.0F / (2.0F * size);
+         const float max = 1.0F - min;
+         for (ch = 0; ch < 4; ch++) {
+            if (s[ch] <= min)
+               icoord[ch] = -1;
+            else if (s[ch] >= max)
+               icoord[ch] = size;
+            else
+               icoord[ch] = util_ifloor(s[ch] * size);
+         }
+      }
+      return;
+   case PIPE_TEX_WRAP_MIRROR_REPEAT:
+      {
+         const float min = 1.0F / (2.0F * size);
+         const float max = 1.0F - min;
+         for (ch = 0; ch < 4; ch++) {
+            const int flr = util_ifloor(s[ch]);
+            float u;
+            if (flr & 1)
+               u = 1.0F - (s[ch] - (float) flr);
+            else
+               u = s[ch] - (float) flr;
+            if (u < min)
+               icoord[ch] = 0;
+            else if (u > max)
+               icoord[ch] = size - 1;
+            else
+               icoord[ch] = util_ifloor(u * size);
+         }
+      }
+      return;
+   case PIPE_TEX_WRAP_MIRROR_CLAMP:
+      for (ch = 0; ch < 4; ch++) {
+         /* s limited to [0,1] */
+         /* i limited to [0,size-1] */
+         const float u = fabsf(s[ch]);
+         if (u <= 0.0F)
+            icoord[ch] = 0;
+         else if (u >= 1.0F)
+            icoord[ch] = size - 1;
+         else
+            icoord[ch] = util_ifloor(u * size);
+      }
+      return;
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+      {
+         /* s limited to [min,max] */
+         /* i limited to [0, size-1] */
+         const float min = 1.0F / (2.0F * size);
+         const float max = 1.0F - min;
+         for (ch = 0; ch < 4; ch++) {
+            const float u = fabsf(s[ch]);
+            if (u < min)
+               icoord[ch] = 0;
+            else if (u > max)
+               icoord[ch] = size - 1;
+            else
+               icoord[ch] = util_ifloor(u * size);
+         }
+      }
+      return;
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+      {
+         /* s limited to [min,max] */
+         /* i limited to [0, size-1] */
+         const float min = -1.0F / (2.0F * size);
+         const float max = 1.0F - min;
+         for (ch = 0; ch < 4; ch++) {
+            const float u = fabsf(s[ch]);
+            if (u < min)
+               icoord[ch] = -1;
+            else if (u > max)
+               icoord[ch] = size;
+            else
+               icoord[ch] = util_ifloor(u * size);
+         }
+      }
+      return;
+   default:
+      assert(0);
+   }
+}
+
+
+/**
+ * Used to compute texel locations for linear sampling for four texcoords.
+ * \param wrapMode  PIPE_TEX_WRAP_x
+ * \param s  the texcoords
+ * \param size  the texture image size
+ * \param icoord0  returns first texture indexes
+ * \param icoord1  returns second texture indexes (usually icoord0 + 1)
+ * \param w  returns blend factor/weight between texture indexes
+ * \param icoord  returns the computed integer texture coords
+ */
+static INLINE void
+linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
+                  int icoord0[4], int icoord1[4], float w[4])
+{
+   uint ch;
+
+   switch (wrapMode) {
+   case PIPE_TEX_WRAP_REPEAT:
+      for (ch = 0; ch < 4; ch++) {
+         float u = s[ch] * size - 0.5F;
+         icoord0[ch] = REMAINDER(util_ifloor(u), size);
+         icoord1[ch] = REMAINDER(icoord0[ch] + 1, size);
+         w[ch] = FRAC(u);
+      }
+      break;;
+   case PIPE_TEX_WRAP_CLAMP:
+      for (ch = 0; ch < 4; ch++) {
+         float u = CLAMP(s[ch], 0.0F, 1.0F);
+         u = u * size - 0.5f;
+         icoord0[ch] = util_ifloor(u);
+         icoord1[ch] = icoord0[ch] + 1;
+         w[ch] = FRAC(u);
+      }
+      break;;
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+      for (ch = 0; ch < 4; ch++) {
+         float u = CLAMP(s[ch], 0.0F, 1.0F);
+         u = u * size - 0.5f;
+         icoord0[ch] = util_ifloor(u);
+         icoord1[ch] = icoord0[ch] + 1;
+         if (icoord0[ch] < 0)
+            icoord0[ch] = 0;
+         if (icoord1[ch] >= (int) size)
+            icoord1[ch] = size - 1;
+         w[ch] = FRAC(u);
+      }
+      break;;
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+      {
+         const float min = -1.0F / (2.0F * size);
+         const float max = 1.0F - min;
+         for (ch = 0; ch < 4; ch++) {
+            float u = CLAMP(s[ch], min, max);
+            u = u * size - 0.5f;
+            icoord0[ch] = util_ifloor(u);
+            icoord1[ch] = icoord0[ch] + 1;
+            w[ch] = FRAC(u);
+         }
+      }
+      break;;
+   case PIPE_TEX_WRAP_MIRROR_REPEAT:
+      for (ch = 0; ch < 4; ch++) {
+         const int flr = util_ifloor(s[ch]);
+         float u;
+         if (flr & 1)
+            u = 1.0F - (s[ch] - (float) flr);
+         else
+            u = s[ch] - (float) flr;
+         u = u * size - 0.5F;
+         icoord0[ch] = util_ifloor(u);
+         icoord1[ch] = icoord0[ch] + 1;
+         if (icoord0[ch] < 0)
+            icoord0[ch] = 0;
+         if (icoord1[ch] >= (int) size)
+            icoord1[ch] = size - 1;
+         w[ch] = FRAC(u);
+      }
+      break;;
+   case PIPE_TEX_WRAP_MIRROR_CLAMP:
+      for (ch = 0; ch < 4; ch++) {
+         float u = fabsf(s[ch]);
+         if (u >= 1.0F)
+            u = (float) size;
+         else
+            u *= size;
+         u -= 0.5F;
+         icoord0[ch] = util_ifloor(u);
+         icoord1[ch] = icoord0[ch] + 1;
+         w[ch] = FRAC(u);
+      }
+      break;;
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+      for (ch = 0; ch < 4; ch++) {
+         float u = fabsf(s[ch]);
+         if (u >= 1.0F)
+            u = (float) size;
+         else
+            u *= size;
+         u -= 0.5F;
+         icoord0[ch] = util_ifloor(u);
+         icoord1[ch] = icoord0[ch] + 1;
+         if (icoord0[ch] < 0)
+            icoord0[ch] = 0;
+         if (icoord1[ch] >= (int) size)
+            icoord1[ch] = size - 1;
+         w[ch] = FRAC(u);
+      }
+      break;;
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+      {
+         const float min = -1.0F / (2.0F * size);
+         const float max = 1.0F - min;
+         for (ch = 0; ch < 4; ch++) {
+            float u = fabsf(s[ch]);
+            if (u <= min)
+               u = min * size;
+            else if (u >= max)
+               u = max * size;
+            else
+               u *= size;
+            u -= 0.5F;
+            icoord0[ch] = util_ifloor(u);
+            icoord1[ch] = icoord0[ch] + 1;
+            w[ch] = FRAC(u);
+         }
+      }
+      break;;
+   default:
+      assert(0);
+   }
+}
+
+
+/**
+ * For RECT textures / unnormalized texcoords
+ * Only a subset of wrap modes supported.
+ */
+static INLINE void
+nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
+                          int icoord[4])
+{
+   uint ch;
+   switch (wrapMode) {
+   case PIPE_TEX_WRAP_CLAMP:
+      for (ch = 0; ch < 4; ch++) {
+         int i = util_ifloor(s[ch]);
+         icoord[ch]= CLAMP(i, 0, (int) size-1);
+      }
+      return;
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+      /* fall-through */
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+      for (ch = 0; ch < 4; ch++) {
+         icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) );
+      }
+      return;
+   default:
+      assert(0);
+   }
+}
+
+
+/**
+ * For RECT textures / unnormalized texcoords.
+ * Only a subset of wrap modes supported.
+ */
+static INLINE void
+linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
+                         int icoord0[4], int icoord1[4], float w[4])
+{
+   uint ch;
+   switch (wrapMode) {
+   case PIPE_TEX_WRAP_CLAMP:
+      for (ch = 0; ch < 4; ch++) {
+         /* Not exactly what the spec says, but it matches NVIDIA output */
+         float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f);
+         icoord0[ch] = util_ifloor(u);
+         icoord1[ch] = icoord0[ch] + 1;
+         w[ch] = FRAC(u);
+      }
+      return;
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+      /* fall-through */
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+      for (ch = 0; ch < 4; ch++) {
+         float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F);
+         u -= 0.5F;
+         icoord0[ch] = util_ifloor(u);
+         icoord1[ch] = icoord0[ch] + 1;
+         if (icoord1[ch] > (int) size - 1)
+            icoord1[ch] = size - 1;
+         w[ch] = FRAC(u);
+      }
+      break;
+   default:
+      assert(0);
+   }
+}
+
+
+static unsigned
+choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
+{
+   /*
+      major axis
+      direction     target                             sc     tc    ma
+      ----------    -------------------------------    ---    ---   ---
+       +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
+       -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
+       +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
+       -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
+       +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
+       -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
+   */
+   const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
+   unsigned face;
+   float sc, tc, ma;
+
+   if (arx > ary && arx > arz) {
+      if (rx >= 0.0F) {
+         face = PIPE_TEX_FACE_POS_X;
+         sc = -rz;
+         tc = -ry;
+         ma = arx;
+      }
+      else {
+         face = PIPE_TEX_FACE_NEG_X;
+         sc = rz;
+         tc = -ry;
+         ma = arx;
+      }
+   }
+   else if (ary > arx && ary > arz) {
+      if (ry >= 0.0F) {
+         face = PIPE_TEX_FACE_POS_Y;
+         sc = rx;
+         tc = rz;
+         ma = ary;
+      }
+      else {
+         face = PIPE_TEX_FACE_NEG_Y;
+         sc = rx;
+         tc = -rz;
+         ma = ary;
+      }
+   }
+   else {
+      if (rz > 0.0F) {
+         face = PIPE_TEX_FACE_POS_Z;
+         sc = rx;
+         tc = -ry;
+         ma = arz;
+      }
+      else {
+         face = PIPE_TEX_FACE_NEG_Z;
+         sc = -rx;
+         tc = -ry;
+         ma = arz;
+      }
+   }
+
+   *newS = ( sc / ma + 1.0F ) * 0.5F;
+   *newT = ( tc / ma + 1.0F ) * 0.5F;
+
+   return face;
+}
+
+
+/**
+ * Examine the quad's texture coordinates to compute the partial
+ * derivatives w.r.t X and Y, then compute lambda (level of detail).
+ *
+ * This is only done for fragment shaders, not vertex shaders.
+ */
+static float
+compute_lambda(struct tgsi_sampler *tgsi_sampler,
+               const float s[QUAD_SIZE],
+               const float t[QUAD_SIZE],
+               const float p[QUAD_SIZE],
+               float lodbias)
+{
+   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+   const struct pipe_texture *texture = samp->texture;
+   const struct pipe_sampler_state *sampler = samp->sampler;
+   float rho, lambda;
+
+   if (samp->processor == TGSI_PROCESSOR_VERTEX)
+      return lodbias;
+
+   assert(sampler->normalized_coords);
+
+   assert(s);
+   {
+      float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT];
+      float dsdy = s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT];
+      dsdx = fabsf(dsdx);
+      dsdy = fabsf(dsdy);
+      rho = MAX2(dsdx, dsdy) * texture->width[0];
+   }
+   if (t) {
+      float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT];
+      float dtdy = t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT];
+      float max;
+      dtdx = fabsf(dtdx);
+      dtdy = fabsf(dtdy);
+      max = MAX2(dtdx, dtdy) * texture->height[0];
+      rho = MAX2(rho, max);
+   }
+   if (p) {
+      float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT];
+      float dpdy = p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT];
+      float max;
+      dpdx = fabsf(dpdx);
+      dpdy = fabsf(dpdy);
+      max = MAX2(dpdx, dpdy) * texture->depth[0];
+      rho = MAX2(rho, max);
+   }
+
+   lambda = util_fast_log2(rho);
+   lambda += lodbias + sampler->lod_bias;
+   lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
+
+   return lambda;
+}
+
+
+/**
+ * Do several things here:
+ * 1. Compute lambda from the texcoords, if needed
+ * 2. Determine if we're minifying or magnifying
+ * 3. If minifying, choose mipmap levels
+ * 4. Return image filter to use within mipmap images
+ * \param level0  Returns first mipmap level to sample from
+ * \param level1  Returns second mipmap level to sample from
+ * \param levelBlend  Returns blend factor between levels, in [0,1]
+ * \param imgFilter  Returns either the min or mag filter, depending on lambda
+ */
+static void
+choose_mipmap_levels(struct tgsi_sampler *tgsi_sampler,
+                     const float s[QUAD_SIZE],
+                     const float t[QUAD_SIZE],
+                     const float p[QUAD_SIZE],
+                     float lodbias,
+                     unsigned *level0, unsigned *level1, float *levelBlend,
+                     unsigned *imgFilter)
+{
+   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+   const struct pipe_texture *texture = samp->texture;
+   const struct pipe_sampler_state *sampler = samp->sampler;
+
+   if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+      /* no mipmap selection needed */
+      *level0 = *level1 = CLAMP((int) sampler->min_lod,
+                                0, (int) texture->last_level);
+
+      if (sampler->min_img_filter != sampler->mag_img_filter) {
+         /* non-mipmapped texture, but still need to determine if doing
+          * minification or magnification.
+          */
+         float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias);
+         if (lambda <= 0.0) {
+            *imgFilter = sampler->mag_img_filter;
+         }
+         else {
+            *imgFilter = sampler->min_img_filter;
+         }
+      }
+      else {
+         *imgFilter = sampler->mag_img_filter;
+      }
+   }
+   else {
+      float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias);
+
+      if (lambda <= 0.0) { /* XXX threshold depends on the filter */
+         /* magnifying */
+         *imgFilter = sampler->mag_img_filter;
+         *level0 = *level1 = 0;
+      }
+      else {
+         /* minifying */
+         *imgFilter = sampler->min_img_filter;
+
+         /* choose mipmap level(s) and compute the blend factor between them */
+         if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
+            /* Nearest mipmap level */
+            const int lvl = (int) (lambda + 0.5);
+            *level0 =
+            *level1 = CLAMP(lvl, 0, (int) texture->last_level);
+         }
+         else {
+            /* Linear interpolation between mipmap levels */
+            const int lvl = (int) lambda;
+            *level0 = CLAMP(lvl,     0, (int) texture->last_level);
+            *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level);
+            *levelBlend = FRAC(lambda);  /* blending weight between levels */
+         }
+      }
+   }
+}
+
+
+/**
+ * Get a texel from a texture, using the texture tile cache.
+ *
+ * \param face  the cube face in 0..5
+ * \param level  the mipmap level
+ * \param x  the x coord of texel within 2D image
+ * \param y  the y coord of texel within 2D image
+ * \param z  which slice of a 3D texture
+ * \param rgba  the quad to put the texel/color into
+ * \param j  which element of the rgba quad to write to
+ *
+ * XXX maybe move this into lp_tile_cache.c and merge with the
+ * lp_get_cached_tile_tex() function.  Also, get 4 texels instead of 1...
+ */
+static void
+get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler,
+                  unsigned face, unsigned level, int x, int y, 
+                  const uint8_t *out[4])
+{
+   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+
+   const struct llvmpipe_cached_tex_tile *tile
+      = lp_get_cached_tex_tile(samp->cache,
+                               tex_tile_address(x, y, 0, face, level));
+
+   y %= TEX_TILE_SIZE;
+   x %= TEX_TILE_SIZE;
+      
+   out[0] = &tile->color[y  ][x  ][0];
+   out[1] = &tile->color[y  ][x+1][0];
+   out[2] = &tile->color[y+1][x  ][0];
+   out[3] = &tile->color[y+1][x+1][0];
+}
+
+static INLINE const uint8_t *
+get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler,
+                 unsigned face, unsigned level, int x, int y)
+{
+   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+
+   const struct llvmpipe_cached_tex_tile *tile
+      = lp_get_cached_tex_tile(samp->cache,
+                               tex_tile_address(x, y, 0, face, level));
+
+   y %= TEX_TILE_SIZE;
+   x %= TEX_TILE_SIZE;
+
+   return &tile->color[y][x][0];
+}
+
+
+static void
+get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler,
+                     unsigned face, unsigned level, 
+                     int x0, int y0, 
+                     int x1, int y1,
+                     const uint8_t *out[4])
+{
+   unsigned i;
+
+   for (i = 0; i < 4; i++) {
+      unsigned tx = (i & 1) ? x1 : x0;
+      unsigned ty = (i >> 1) ? y1 : y0;
+
+      out[i] = get_texel_2d_ptr( tgsi_sampler, face, level, tx, ty );
+   }
+}
+
+static void
+get_texel(const struct tgsi_sampler *tgsi_sampler,
+                 unsigned face, unsigned level, int x, int y, int z,
+                 float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j)
+{
+   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+   const struct pipe_texture *texture = samp->texture;
+   const struct pipe_sampler_state *sampler = samp->sampler;
+
+   if (x < 0 || x >= (int) texture->width[level] ||
+       y < 0 || y >= (int) texture->height[level] ||
+       z < 0 || z >= (int) texture->depth[level]) {
+      rgba[0][j] = sampler->border_color[0];
+      rgba[1][j] = sampler->border_color[1];
+      rgba[2][j] = sampler->border_color[2];
+      rgba[3][j] = sampler->border_color[3];
+   }
+   else {
+      const unsigned tx = x % TEX_TILE_SIZE;
+      const unsigned ty = y % TEX_TILE_SIZE;
+      const struct llvmpipe_cached_tex_tile *tile;
+
+      tile = lp_get_cached_tex_tile(samp->cache,
+                                    tex_tile_address(x, y, z, face, level));
+
+      rgba[0][j] = ubyte_to_float(tile->color[ty][tx][0]);
+      rgba[1][j] = ubyte_to_float(tile->color[ty][tx][1]);
+      rgba[2][j] = ubyte_to_float(tile->color[ty][tx][2]);
+      rgba[3][j] = ubyte_to_float(tile->color[ty][tx][3]);
+      if (0)
+      {
+         debug_printf("Get texel %f %f %f %f from %s\n",
+                      rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j],
+                      pf_name(texture->format));
+      }
+   }
+}
+
+
+/**
+ * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
+ * When we sampled the depth texture, the depth value was put into all
+ * RGBA channels.  We look at the red channel here.
+ * \param rgba  quad of (depth) texel values
+ * \param p  texture 'P' components for four pixels in quad
+ * \param j  which pixel in the quad to test [0..3]
+ */
+static INLINE void
+shadow_compare(const struct pipe_sampler_state *sampler,
+               float rgba[NUM_CHANNELS][QUAD_SIZE],
+               const float p[QUAD_SIZE],
+               uint j)
+{
+   int k;
+   switch (sampler->compare_func) {
+   case PIPE_FUNC_LESS:
+      k = p[j] < rgba[0][j];
+      break;
+   case PIPE_FUNC_LEQUAL:
+      k = p[j] <= rgba[0][j];
+      break;
+   case PIPE_FUNC_GREATER:
+      k = p[j] > rgba[0][j];
+      break;
+   case PIPE_FUNC_GEQUAL:
+      k = p[j] >= rgba[0][j];
+      break;
+   case PIPE_FUNC_EQUAL:
+      k = p[j] == rgba[0][j];
+      break;
+   case PIPE_FUNC_NOTEQUAL:
+      k = p[j] != rgba[0][j];
+      break;
+   case PIPE_FUNC_ALWAYS:
+      k = 1;
+      break;
+   case PIPE_FUNC_NEVER:
+      k = 0;
+      break;
+   default:
+      k = 0;
+      assert(0);
+      break;
+   }
+
+   /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
+   rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k;
+   rgba[3][j] = 1.0F;
+}
+
+
+/**
+ * As above, but do four z/texture comparisons.
+ */
+static INLINE void
+shadow_compare4(const struct pipe_sampler_state *sampler,
+                float rgba[NUM_CHANNELS][QUAD_SIZE],
+                const float p[QUAD_SIZE])
+{
+   int j, k0, k1, k2, k3;
+   float val;
+
+   /* compare four texcoords vs. four texture samples */
+   switch (sampler->compare_func) {
+   case PIPE_FUNC_LESS:
+      k0 = p[0] < rgba[0][0];
+      k1 = p[1] < rgba[0][1];
+      k2 = p[2] < rgba[0][2];
+      k3 = p[3] < rgba[0][3];
+      break;
+   case PIPE_FUNC_LEQUAL:
+      k0 = p[0] <= rgba[0][0];
+      k1 = p[1] <= rgba[0][1];
+      k2 = p[2] <= rgba[0][2];
+      k3 = p[3] <= rgba[0][3];
+      break;
+   case PIPE_FUNC_GREATER:
+      k0 = p[0] > rgba[0][0];
+      k1 = p[1] > rgba[0][1];
+      k2 = p[2] > rgba[0][2];
+      k3 = p[3] > rgba[0][3];
+      break;
+   case PIPE_FUNC_GEQUAL:
+      k0 = p[0] >= rgba[0][0];
+      k1 = p[1] >= rgba[0][1];
+      k2 = p[2] >= rgba[0][2];
+      k3 = p[3] >= rgba[0][3];
+      break;
+   case PIPE_FUNC_EQUAL:
+      k0 = p[0] == rgba[0][0];
+      k1 = p[1] == rgba[0][1];
+      k2 = p[2] == rgba[0][2];
+      k3 = p[3] == rgba[0][3];
+      break;
+   case PIPE_FUNC_NOTEQUAL:
+      k0 = p[0] != rgba[0][0];
+      k1 = p[1] != rgba[0][1];
+      k2 = p[2] != rgba[0][2];
+      k3 = p[3] != rgba[0][3];
+      break;
+   case PIPE_FUNC_ALWAYS:
+      k0 = k1 = k2 = k3 = 1;
+      break;
+   case PIPE_FUNC_NEVER:
+      k0 = k1 = k2 = k3 = 0;
+      break;
+   default:
+      k0 = k1 = k2 = k3 = 0;
+      assert(0);
+      break;
+   }
+
+   /* convert four pass/fail values to an intensity in [0,1] */
+   val = 0.25F * (k0 + k1 + k2 + k3);
+
+   /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
+   for (j = 0; j < 4; j++) {
+      rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
+      rgba[3][j] = 1.0F;
+   }
+}
+
+
+
+static void
+lp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
+                                    const float s[QUAD_SIZE],
+                                    const float t[QUAD_SIZE],
+                                    const float p[QUAD_SIZE],
+                                    float lodbias,
+                                    float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+   unsigned  j;
+   unsigned level = samp->level;
+   unsigned xpot = 1 << (samp->xpot - level);
+   unsigned ypot = 1 << (samp->ypot - level);
+   unsigned xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
+   unsigned ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
+      
+   for (j = 0; j < QUAD_SIZE; j++) {
+      int c;
+
+      float u = s[j] * xpot - 0.5F;
+      float v = t[j] * ypot - 0.5F;
+
+      int uflr = util_ifloor(u);
+      int vflr = util_ifloor(v);
+
+      float xw = u - (float)uflr;
+      float yw = v - (float)vflr;
+
+      int x0 = uflr & (xpot - 1);
+      int y0 = vflr & (ypot - 1);
+
+      const uint8_t *tx[4];
+      
+
+      /* Can we fetch all four at once:
+       */
+      if (x0 < xmax && y0 < ymax)
+      {
+         get_texel_quad_2d(tgsi_sampler, 0, level, x0, y0, tx);
+      }
+      else 
+      {
+         unsigned x1 = (x0 + 1) & (xpot - 1);
+         unsigned y1 = (y0 + 1) & (ypot - 1);
+         get_texel_quad_2d_mt(tgsi_sampler, 0, level, 
+                              x0, y0, x1, y1, tx);
+      }
+
+
+      /* interpolate R, G, B, A */
+      for (c = 0; c < 4; c++) {
+         rgba[c][j] = lerp_2d(xw, yw, 
+                              ubyte_to_float(tx[0][c]), ubyte_to_float(tx[1][c]),
+                              ubyte_to_float(tx[2][c]), ubyte_to_float(tx[3][c]));
+      }
+   }
+}
+
+
+static void
+lp_get_samples_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
+                                     const float s[QUAD_SIZE],
+                                     const float t[QUAD_SIZE],
+                                     const float p[QUAD_SIZE],
+                                     float lodbias,
+                                     float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+   unsigned  j;
+   unsigned level = samp->level;
+   unsigned xpot = 1 << (samp->xpot - level);
+   unsigned ypot = 1 << (samp->ypot - level);
+
+   for (j = 0; j < QUAD_SIZE; j++) {
+      int c;
+
+      float u = s[j] * xpot;
+      float v = t[j] * ypot;
+
+      int uflr = util_ifloor(u);
+      int vflr = util_ifloor(v);
+
+      int x0 = uflr & (xpot - 1);
+      int y0 = vflr & (ypot - 1);
+
+      const uint8_t *out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0);
+
+      for (c = 0; c < 4; c++) {
+         rgba[c][j] = ubyte_to_float(out[c]);
+      }
+   }
+}
+
+
+static void
+lp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
+                                     const float s[QUAD_SIZE],
+                                     const float t[QUAD_SIZE],
+                                     const float p[QUAD_SIZE],
+                                     float lodbias,
+                                     float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+   unsigned  j;
+   unsigned level = samp->level;
+   unsigned xpot = 1 << (samp->xpot - level);
+   unsigned ypot = 1 << (samp->ypot - level);
+
+   for (j = 0; j < QUAD_SIZE; j++) {
+      int c;
+
+      float u = s[j] * xpot;
+      float v = t[j] * ypot;
+
+      int x0, y0;
+      const uint8_t *out;
+
+      x0 = util_ifloor(u);
+      if (x0 < 0) 
+         x0 = 0;
+      else if (x0 > xpot - 1)
+         x0 = xpot - 1;
+
+      y0 = util_ifloor(v);
+      if (y0 < 0) 
+         y0 = 0;
+      else if (y0 > ypot - 1)
+         y0 = ypot - 1;
+      
+      out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0);
+
+      for (c = 0; c < 4; c++) {
+         rgba[c][j] = ubyte_to_float(out[c]);
+      }
+   }
+}
+
+
+static void
+lp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
+                                               const float s[QUAD_SIZE],
+                                               const float t[QUAD_SIZE],
+                                               const float p[QUAD_SIZE],
+                                               float lodbias,
+                                               float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+   const struct pipe_texture *texture = samp->texture;
+   int level0;
+   float lambda;
+
+   lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias);
+   level0 = (int)lambda;
+
+   if (lambda < 0.0) { 
+      samp->level = 0;
+      lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
+                                           s, t, p, 0, rgba );
+   }
+   else if (level0 >= texture->last_level) {
+      samp->level = texture->last_level;
+      lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
+                                           s, t, p, 0, rgba );
+   }
+   else {
+      float levelBlend = lambda - level0;
+      float rgba0[4][4];
+      float rgba1[4][4];
+      int c,j;
+
+      samp->level = level0;
+      lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
+                                           s, t, p, 0, rgba0 );
+
+      samp->level = level0+1;
+      lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
+                                           s, t, p, 0, rgba1 );
+
+      for (j = 0; j < QUAD_SIZE; j++) {
+         for (c = 0; c < 4; c++) {
+            rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
+         }
+      }
+   }
+}
+
+/**
+ * Common code for sampling 1D/2D/cube textures.
+ * Could probably extend for 3D...
+ */
+static void
+lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler,
+                         const float s[QUAD_SIZE],
+                         const float t[QUAD_SIZE],
+                         const float p[QUAD_SIZE],
+                         float lodbias,
+                         float rgba[NUM_CHANNELS][QUAD_SIZE],
+                         const unsigned faces[4])
+{
+   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+   const struct pipe_texture *texture = samp->texture;
+   const struct pipe_sampler_state *sampler = samp->sampler;
+   unsigned level0, level1, j, imgFilter;
+   int width, height;
+   float levelBlend;
+
+   choose_mipmap_levels(tgsi_sampler, s, t, p, 
+                        lodbias,
+                        &level0, &level1, &levelBlend, &imgFilter);
+
+   assert(sampler->normalized_coords);
+
+   width = texture->width[level0];
+   height = texture->height[level0];
+
+   assert(width > 0);
+
+   switch (imgFilter) {
+   case PIPE_TEX_FILTER_NEAREST:
+      {
+         int x[4], y[4];
+         nearest_texcoord_4(sampler->wrap_s, s, width, x);
+         nearest_texcoord_4(sampler->wrap_t, t, height, y);
+
+         for (j = 0; j < QUAD_SIZE; j++) {
+            get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j);
+            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+               shadow_compare(sampler, rgba, p, j);
+            }
+
+            if (level0 != level1) {
+               /* get texels from second mipmap level and blend */
+               float rgba2[4][4];
+               unsigned c;
+               x[j] /= 2;
+               y[j] /= 2;
+               get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0,
+                         rgba2, j);
+               if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
+                  shadow_compare(sampler, rgba2, p, j);
+               }
+
+               for (c = 0; c < NUM_CHANNELS; c++) {
+                  rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
+               }
+            }
+         }
+      }
+      break;
+   case PIPE_TEX_FILTER_LINEAR:
+   case PIPE_TEX_FILTER_ANISO:
+      {
+         int x0[4], y0[4], x1[4], y1[4];
+         float xw[4], yw[4]; /* weights */
+
+         linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw);
+         linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
+
+         for (j = 0; j < QUAD_SIZE; j++) {
+            float tx[4][4]; /* texels */
+            int c;
+            get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0);
+            get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1);
+            get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2);
+            get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3);
+            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+               shadow_compare4(sampler, tx, p);
+            }
+
+            /* interpolate R, G, B, A */
+            for (c = 0; c < 4; c++) {
+               rgba[c][j] = lerp_2d(xw[j], yw[j],
+                                    tx[c][0], tx[c][1],
+                                    tx[c][2], tx[c][3]);
+            }
+
+            if (level0 != level1) {
+               /* get texels from second mipmap level and blend */
+               float rgba2[4][4];
+
+               /* XXX: This is incorrect -- will often end up with (x0
+                *  == x1 && y0 == y1), meaning that we fetch the same
+                *  texel four times and linearly interpolate between
+                *  identical values.  The correct approach would be to
+                *  call linear_texcoord again for the second level.
+                */
+               x0[j] /= 2;
+               y0[j] /= 2;
+               x1[j] /= 2;
+               y1[j] /= 2;
+               get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0);
+               get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1);
+               get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2);
+               get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3);
+               if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
+                  shadow_compare4(sampler, tx, p);
+               }
+
+               /* interpolate R, G, B, A */
+               for (c = 0; c < 4; c++) {
+                  rgba2[c][j] = lerp_2d(xw[j], yw[j],
+                                        tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
+               }
+
+               for (c = 0; c < NUM_CHANNELS; c++) {
+                  rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
+               }
+            }
+         }
+      }
+      break;
+   default:
+      assert(0);
+   }
+}
+
+
+static INLINE void
+lp_get_samples_1d(struct tgsi_sampler *sampler,
+                  const float s[QUAD_SIZE],
+                  const float t[QUAD_SIZE],
+                  const float p[QUAD_SIZE],
+                  float lodbias,
+                  float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   static const unsigned faces[4] = {0, 0, 0, 0};
+   static const float tzero[4] = {0, 0, 0, 0};
+   lp_get_samples_2d_common(sampler, s, tzero, NULL,
+                            lodbias, rgba, faces);
+}
+
+
+static INLINE void
+lp_get_samples_2d(struct tgsi_sampler *sampler,
+                  const float s[QUAD_SIZE],
+                  const float t[QUAD_SIZE],
+                  const float p[QUAD_SIZE],
+                  float lodbias,
+                  float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   static const unsigned faces[4] = {0, 0, 0, 0};
+   lp_get_samples_2d_common(sampler, s, t, p,
+                            lodbias, rgba, faces);
+}
+
+
+static INLINE void
+lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler,
+                  const float s[QUAD_SIZE],
+                  const float t[QUAD_SIZE],
+                  const float p[QUAD_SIZE],
+                  float lodbias,
+                  float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+   const struct pipe_texture *texture = samp->texture;
+   const struct pipe_sampler_state *sampler = samp->sampler;
+   /* get/map pipe_surfaces corresponding to 3D tex slices */
+   unsigned level0, level1, j, imgFilter;
+   int width, height, depth;
+   float levelBlend;
+   const uint face = 0;
+
+   choose_mipmap_levels(tgsi_sampler, s, t, p, 
+                        lodbias,
+                        &level0, &level1, &levelBlend, &imgFilter);
+
+   assert(sampler->normalized_coords);
+
+   width = texture->width[level0];
+   height = texture->height[level0];
+   depth = texture->depth[level0];
+
+   assert(width > 0);
+   assert(height > 0);
+   assert(depth > 0);
+
+   switch (imgFilter) {
+   case PIPE_TEX_FILTER_NEAREST:
+      {
+         int x[4], y[4], z[4];
+         nearest_texcoord_4(sampler->wrap_s, s, width, x);
+         nearest_texcoord_4(sampler->wrap_t, t, height, y);
+         nearest_texcoord_4(sampler->wrap_r, p, depth, z);
+         for (j = 0; j < QUAD_SIZE; j++) {
+            get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j);
+            if (level0 != level1) {
+               /* get texels from second mipmap level and blend */
+               float rgba2[4][4];
+               unsigned c;
+               x[j] /= 2;
+               y[j] /= 2;
+               z[j] /= 2;
+               get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j);
+               for (c = 0; c < NUM_CHANNELS; c++) {
+                  rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]);
+               }
+            }
+         }
+      }
+      break;
+   case PIPE_TEX_FILTER_LINEAR:
+   case PIPE_TEX_FILTER_ANISO:
+      {
+         int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
+         float xw[4], yw[4], zw[4]; /* interpolation weights */
+         linear_texcoord_4(sampler->wrap_s, s, width,  x0, x1, xw);
+         linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
+         linear_texcoord_4(sampler->wrap_r, p, depth,  z0, z1, zw);
+
+         for (j = 0; j < QUAD_SIZE; j++) {
+            int c;
+            float tx0[4][4], tx1[4][4];
+            get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0);
+            get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1);
+            get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2);
+            get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3);
+            get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0);
+            get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1);
+            get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2);
+            get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3);
+
+            /* interpolate R, G, B, A */
+            for (c = 0; c < 4; c++) {
+               rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j],
+                                    tx0[c][0], tx0[c][1],
+                                    tx0[c][2], tx0[c][3],
+                                    tx1[c][0], tx1[c][1],
+                                    tx1[c][2], tx1[c][3]);
+            }
+
+            if (level0 != level1) {
+               /* get texels from second mipmap level and blend */
+               float rgba2[4][4];
+               x0[j] /= 2;
+               y0[j] /= 2;
+               z0[j] /= 2;
+               x1[j] /= 2;
+               y1[j] /= 2;
+               z1[j] /= 2;
+               get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0);
+               get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1);
+               get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2);
+               get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3);
+               get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0);
+               get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1);
+               get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2);
+               get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3);
+
+               /* interpolate R, G, B, A */
+               for (c = 0; c < 4; c++) {
+                  rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j],
+                                        tx0[c][0], tx0[c][1],
+                                        tx0[c][2], tx0[c][3],
+                                        tx1[c][0], tx1[c][1],
+                                        tx1[c][2], tx1[c][3]);
+               }
+
+               /* blend mipmap levels */
+               for (c = 0; c < NUM_CHANNELS; c++) {
+                  rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
+               }
+            }
+         }
+      }
+      break;
+   default:
+      assert(0);
+   }
+}
+
+
+static void
+lp_get_samples_cube(struct tgsi_sampler *sampler,
+                    const float s[QUAD_SIZE],
+                    const float t[QUAD_SIZE],
+                    const float p[QUAD_SIZE],
+                    float lodbias,
+                    float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   unsigned faces[QUAD_SIZE], j;
+   float ssss[4], tttt[4];
+   for (j = 0; j < QUAD_SIZE; j++) {
+      faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j);
+   }
+   lp_get_samples_2d_common(sampler, ssss, tttt, NULL,
+                            lodbias, rgba, faces);
+}
+
+
+static void
+lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler,
+                    const float s[QUAD_SIZE],
+                    const float t[QUAD_SIZE],
+                    const float p[QUAD_SIZE],
+                    float lodbias,
+                    float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+   const struct pipe_texture *texture = samp->texture;
+   const struct pipe_sampler_state *sampler = samp->sampler;
+   const uint face = 0;
+   unsigned level0, level1, j, imgFilter;
+   int width, height;
+   float levelBlend;
+
+   choose_mipmap_levels(tgsi_sampler, s, t, p, 
+                        lodbias,
+                        &level0, &level1, &levelBlend, &imgFilter);
+
+   /* texture RECTS cannot be mipmapped */
+   assert(level0 == level1);
+
+   width = texture->width[level0];
+   height = texture->height[level0];
+
+   assert(width > 0);
+
+   switch (imgFilter) {
+   case PIPE_TEX_FILTER_NEAREST:
+      {
+         int x[4], y[4];
+         nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x);
+         nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y);
+         for (j = 0; j < QUAD_SIZE; j++) {
+            get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j);
+            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+               shadow_compare(sampler, rgba, p, j);
+            }
+         }
+      }
+      break;
+   case PIPE_TEX_FILTER_LINEAR:
+   case PIPE_TEX_FILTER_ANISO:
+      {
+         int x0[4], y0[4], x1[4], y1[4];
+         float xw[4], yw[4]; /* weights */
+         linear_texcoord_unnorm_4(sampler->wrap_s, s, width,  x0, x1, xw);
+         linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw);
+         for (j = 0; j < QUAD_SIZE; j++) {
+            float tx[4][4]; /* texels */
+            int c;
+            get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0);
+            get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1);
+            get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2);
+            get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3);
+            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+               shadow_compare4(sampler, tx, p);
+            }
+            for (c = 0; c < 4; c++) {
+               rgba[c][j] = lerp_2d(xw[j], yw[j],
+                                    tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
+            }
+         }
+      }
+      break;
+   default:
+      assert(0);
+   }
+}
+
+
+/**
+ * Error condition handler
+ */
+static INLINE void
+lp_get_samples_null(struct tgsi_sampler *tgsi_sampler,
+                    const float s[QUAD_SIZE],
+                    const float t[QUAD_SIZE],
+                    const float p[QUAD_SIZE],
+                    float lodbias,
+                    float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   int i,j;
+
+   for (i = 0; i < 4; i++)
+      for (j = 0; j < 4; j++)
+         rgba[i][j] = 1.0;
+}
+
+/**
+ * Called via tgsi_sampler::get_samples() when using a sampler for the
+ * first time.  Determine the actual sampler function, link it in and
+ * call it.
+ */
+void
+lp_get_samples(struct tgsi_sampler *tgsi_sampler,
+               const float s[QUAD_SIZE],
+               const float t[QUAD_SIZE],
+               const float p[QUAD_SIZE],
+               float lodbias,
+               float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
+   const struct pipe_texture *texture = samp->texture;
+   const struct pipe_sampler_state *sampler = samp->sampler;
+
+   /* Default to the 'undefined' case:
+    */
+   tgsi_sampler->get_samples = lp_get_samples_null;
+
+   if (!texture) {
+      assert(0);                /* is this legal?? */
+      goto out;
+   }
+
+   if (!sampler->normalized_coords) {
+      assert (texture->target == PIPE_TEXTURE_2D);
+      tgsi_sampler->get_samples = lp_get_samples_rect;
+      goto out;
+   }
+
+   switch (texture->target) {
+   case PIPE_TEXTURE_1D:
+      tgsi_sampler->get_samples = lp_get_samples_1d;
+      break;
+   case PIPE_TEXTURE_2D:
+      tgsi_sampler->get_samples = lp_get_samples_2d;
+      break;
+   case PIPE_TEXTURE_3D:
+      tgsi_sampler->get_samples = lp_get_samples_3d;
+      break;
+   case PIPE_TEXTURE_CUBE:
+      tgsi_sampler->get_samples = lp_get_samples_cube;
+      break;
+   default:
+      assert(0);
+      break;
+   }
+
+   /* Do this elsewhere: 
+    */
+   samp->xpot = util_unsigned_logbase2( samp->texture->width[0] );
+   samp->ypot = util_unsigned_logbase2( samp->texture->height[0] );
+
+   /* Try to hook in a faster sampler.  Ultimately we'll have to
+    * code-generate these.  Luckily most of this looks like it is
+    * orthogonal state within the sampler.
+    */
+   if (texture->target == PIPE_TEXTURE_2D &&
+       sampler->min_img_filter == sampler->mag_img_filter &&
+       sampler->wrap_s == sampler->wrap_t &&
+       sampler->compare_mode == FALSE &&
+       sampler->normalized_coords) 
+   {
+      if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+         samp->level = CLAMP((int) sampler->min_lod,
+                             0, (int) texture->last_level);
+
+         if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) {
+            switch (sampler->min_img_filter) {
+            case PIPE_TEX_FILTER_NEAREST:
+               tgsi_sampler->get_samples = lp_get_samples_2d_nearest_repeat_POT;
+               break;
+            case PIPE_TEX_FILTER_LINEAR:
+               tgsi_sampler->get_samples = lp_get_samples_2d_linear_repeat_POT;
+               break;
+            default:
+               break;
+            }
+         } 
+         else if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP) {
+            switch (sampler->min_img_filter) {
+            case PIPE_TEX_FILTER_NEAREST:
+               tgsi_sampler->get_samples = lp_get_samples_2d_nearest_clamp_POT;
+               break;
+            default:
+               break;
+            }
+         }
+      }
+      else if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+         if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) {
+            switch (sampler->min_img_filter) {
+            case PIPE_TEX_FILTER_LINEAR:
+               tgsi_sampler->get_samples = lp_get_samples_2d_linear_mip_linear_repeat_POT;
+               break;
+            default:
+               break;
+            }
+         } 
+      }
+   }
+   else if (0) {
+      _debug_printf("target %d/%d min_mip %d/%d min_img %d/%d wrap %d/%d compare %d/%d norm %d/%d\n",
+                    texture->target, PIPE_TEXTURE_2D,
+                    sampler->min_mip_filter, PIPE_TEX_MIPFILTER_NONE,
+                    sampler->min_img_filter, sampler->mag_img_filter,
+                    sampler->wrap_s, sampler->wrap_t,
+                    sampler->compare_mode, FALSE,
+                    sampler->normalized_coords, TRUE);
+   }
+
+out:
+   tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba );
+}
+
+
+void PIPE_CDECL
+lp_fetch_texel_soa( struct tgsi_sampler **samplers,
+                    uint32_t unit,
+                    float *store )
+{
+   struct tgsi_sampler *sampler = samplers[unit];
+
+#if 0
+   uint j;
+
+   debug_printf("%s sampler: %p (%p) store: %p\n",
+                __FUNCTION__,
+                sampler, *sampler,
+                store );
+
+   debug_printf("lodbias %f\n", store[12]);
+
+   for (j = 0; j < 4; j++)
+      debug_printf("sample %d texcoord %f %f\n",
+                   j,
+                   store[0+j],
+                   store[4+j]);
+#endif
+
+   {
+      float rgba[NUM_CHANNELS][QUAD_SIZE];
+      sampler->get_samples(sampler,
+                           &store[0],
+                           &store[4],
+                           &store[8],
+                           0.0f, /*store[12],  lodbias */
+                           rgba);
+      memcpy(store, rgba, sizeof rgba);
+   }
+
+#if 0
+   for (j = 0; j < 4; j++)
+      debug_printf("sample %d result %f %f %f %f\n",
+                   j,
+                   store[0+j],
+                   store[4+j],
+                   store[8+j],
+                   store[12+j]);
+#endif
+}
+
+
+#include "lp_bld_type.h"
+#include "lp_bld_intr.h"
+#include "lp_bld_tgsi.h"
+
+
+struct lp_c_sampler_soa
+{
+   struct lp_build_sampler_soa base;
+
+   LLVMValueRef context_ptr;
+
+   LLVMValueRef samplers_ptr;
+
+   /** Coords/texels store */
+   LLVMValueRef store_ptr;
+};
+
+
+static void
+lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
+{
+   FREE(sampler);
+}
+
+
+static void
+lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler,
+                                  LLVMBuilderRef builder,
+                                  union lp_type type,
+                                  unsigned unit,
+                                  unsigned num_coords,
+                                  const LLVMValueRef *coords,
+                                  LLVMValueRef lodbias,
+                                  LLVMValueRef *texel)
+{
+   struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler;
+   LLVMTypeRef vec_type = LLVMTypeOf(coords[0]);
+   LLVMValueRef args[3];
+   unsigned i;
+
+   if(!sampler->samplers_ptr)
+      sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr);
+
+   if(!sampler->store_ptr)
+      sampler->store_ptr = LLVMBuildArrayAlloca(builder,
+                                            vec_type,
+                                            LLVMConstInt(LLVMInt32Type(), 4, 0),
+                                            "texel_store");
+
+   for (i = 0; i < num_coords; i++) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+      LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, "");
+      LLVMBuildStore(builder, coords[i], coord_ptr);
+   }
+
+   args[0] = sampler->samplers_ptr;
+   args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
+   args[2] = sampler->store_ptr;
+
+   lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3);
+
+   for (i = 0; i < NUM_CHANNELS; ++i) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+      LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, "");
+      texel[i] = LLVMBuildLoad(builder, texel_ptr, "");
+   }
+}
+
+
+struct lp_build_sampler_soa *
+lp_c_sampler_soa_create(LLVMValueRef context_ptr)
+{
+   struct lp_c_sampler_soa *sampler;
+
+   sampler = CALLOC_STRUCT(lp_c_sampler_soa);
+   if(!sampler)
+      return NULL;
+
+   sampler->base.destroy = lp_c_sampler_soa_destroy;
+   sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel;
+   sampler->context_ptr = context_ptr;
+
+   return &sampler->base;
+}
+
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c
new file mode 100644
index 0000000000..7d31705d01
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c
@@ -0,0 +1,196 @@
+/**************************************************************************
+ * 
+ * Copyright 2009 VMware, Inc.
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/**
+ * Texture sampling code generation
+ *
+ * This file is nothing more than ugly glue between three largely independent
+ * entities:
+ * - TGSI -> LLVM translation (i.e., lp_build_tgsi_soa)
+ * - texture sampling code generation (i.e., lp_build_sample_soa)
+ * - LLVM pipe driver
+ *
+ * All interesting code is in the functions mentioned above. There is really
+ * nothing to see here.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_type.h"
+#include "lp_bld_intr.h"
+#include "lp_bld_sample.h"
+#include "lp_bld_tgsi.h"
+#include "lp_state.h"
+#include "lp_tex_sample.h"
+
+
+/**
+ * This provides the bridge between the sampler state store in lp_jit_context
+ * and lp_jit_texture and the sampler code generator. It provides the
+ * texture layout information required by the texture sampler code generator
+ * in terms of the state stored in lp_jit_context and lp_jit_texture in runtime.
+ */
+struct llvmpipe_sampler_dynamic_state
+{
+   struct lp_sampler_dynamic_state base;
+
+   const struct lp_sampler_static_state *static_state;
+
+   LLVMValueRef context_ptr;
+};
+
+
+/**
+ * This is the bridge between our sampler and the TGSI translator.
+ */
+struct lp_llvm_sampler_soa
+{
+   struct lp_build_sampler_soa base;
+
+   struct llvmpipe_sampler_dynamic_state dynamic_state;
+};
+
+
+/**
+ * Fetch the specified member of the lp_jit_texture structure.
+ *
+ * @sa http://llvm.org/docs/GetElementPtr.html
+ */
+static LLVMValueRef
+lp_llvm_texture_member(struct lp_sampler_dynamic_state *base,
+                       LLVMBuilderRef builder,
+                       unsigned unit,
+                       unsigned member_index,
+                       const char *member_name)
+{
+   struct llvmpipe_sampler_dynamic_state *state = (struct llvmpipe_sampler_dynamic_state *)base;
+   LLVMValueRef indices[4];
+   LLVMValueRef ptr;
+   LLVMValueRef res;
+
+   assert(unit < PIPE_MAX_SAMPLERS);
+
+   /* context[0] */
+   indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+   /* context[0].textures */
+   indices[1] = LLVMConstInt(LLVMInt32Type(), LP_JIT_CONTEXT_TEXTURES_INDEX, 0);
+   /* context[0].textures[unit] */
+   indices[2] = LLVMConstInt(LLVMInt32Type(), unit, 0);
+   /* context[0].textures[unit].member */
+   indices[3] = LLVMConstInt(LLVMInt32Type(), member_index, 0);
+
+   ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), "");
+
+   res = LLVMBuildLoad(builder, ptr, "");
+
+   lp_build_name(res, "context.texture%u.%s", unit, member_name);
+
+   return res;
+}
+
+
+/**
+ * Helper macro to instantiate the functions that generate the code to fetch
+ * the members of lp_jit_texture to fulfill the sampler code generator requests.
+ *
+ * This complexity is the price we have to pay to keep the texture sampler code
+ * generator a reusable module without dependencies to llvmpipe internals.
+ */
+#define LP_LLVM_TEXTURE_MEMBER(_name, _index) \
+   static LLVMValueRef \
+   lp_llvm_texture_##_name( struct lp_sampler_dynamic_state *base, \
+                            LLVMBuilderRef builder, \
+                            unsigned unit) \
+   { \
+      return lp_llvm_texture_member(base, builder, unit, _index, #_name ); \
+   }
+
+
+LP_LLVM_TEXTURE_MEMBER(width,    LP_JIT_TEXTURE_WIDTH)
+LP_LLVM_TEXTURE_MEMBER(height,   LP_JIT_TEXTURE_HEIGHT)
+LP_LLVM_TEXTURE_MEMBER(stride,   LP_JIT_TEXTURE_STRIDE)
+LP_LLVM_TEXTURE_MEMBER(data_ptr, LP_JIT_TEXTURE_DATA)
+
+
+static void
+lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
+{
+   FREE(sampler);
+}
+
+
+static void
+lp_llvm_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *base,
+                                     LLVMBuilderRef builder,
+                                     union lp_type type,
+                                     unsigned unit,
+                                     unsigned num_coords,
+                                     const LLVMValueRef *coords,
+                                     LLVMValueRef lodbias,
+                                     LLVMValueRef *texel)
+{
+   struct lp_llvm_sampler_soa *sampler = (struct lp_llvm_sampler_soa *)base;
+
+   assert(unit < PIPE_MAX_SAMPLERS);
+
+   lp_build_sample_soa(builder,
+                       &sampler->dynamic_state.static_state[unit],
+                       &sampler->dynamic_state.base,
+                       type,
+                       unit,
+                       num_coords,
+                       coords,
+                       lodbias,
+                       texel);
+}
+
+
+struct lp_build_sampler_soa *
+lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state,
+                           LLVMValueRef context_ptr)
+{
+   struct lp_llvm_sampler_soa *sampler;
+
+   sampler = CALLOC_STRUCT(lp_llvm_sampler_soa);
+   if(!sampler)
+      return NULL;
+
+   sampler->base.destroy = lp_llvm_sampler_soa_destroy;
+   sampler->base.emit_fetch_texel = lp_llvm_sampler_soa_emit_fetch_texel;
+   sampler->dynamic_state.base.width = lp_llvm_texture_width;
+   sampler->dynamic_state.base.height = lp_llvm_texture_height;
+   sampler->dynamic_state.base.stride = lp_llvm_texture_stride;
+   sampler->dynamic_state.base.data_ptr = lp_llvm_texture_data_ptr;
+   sampler->dynamic_state.static_state = static_state;
+   sampler->dynamic_state.context_ptr = context_ptr;
+
+   return &sampler->base;
+}
+
-- 
cgit v1.2.3


From 79f48c9f9e739a1f6b0810072e41bc826f2b789d Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Mon, 7 Sep 2009 15:16:25 +0100
Subject: scons: Don't set LLVM_VERSION if one of the llvm-config calls fails.

Ubuntu 8.10 has llvm-config version 2.2, which doesn't have
nativecodegen. This triggers an exception.
---
 scons/llvm.py                           | 18 ++++++++++--------
 src/gallium/drivers/llvmpipe/SConscript |  2 +-
 2 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe/SConscript')

diff --git a/scons/llvm.py b/scons/llvm.py
index 702f1e354f..46a8d829ca 100644
--- a/scons/llvm.py
+++ b/scons/llvm.py
@@ -56,15 +56,17 @@ def generate(env):
         env.PrependENVPath('PATH', llvm_bin_dir)
 
     if env.Detect('llvm-config'):
-        try:
-            env['LLVM_VERSION'] = env.backtick('llvm-config --version')
-        except AttributeError:
-            env['LLVM_VERSION'] = 'X.X'
+        version = env.backtick('llvm-config --version').rstrip()
 
-        env.ParseConfig('llvm-config --cppflags')
-        env.ParseConfig('llvm-config --libs jit interpreter nativecodegen bitwriter')
-        env.ParseConfig('llvm-config --ldflags')
-        env['LINK'] = env['CXX']
+        try:
+            env.ParseConfig('llvm-config --cppflags')
+            env.ParseConfig('llvm-config --libs jit interpreter nativecodegen bitwriter')
+            env.ParseConfig('llvm-config --ldflags')
+        except OSError:
+            print 'llvm-config version %s failed' % version
+        else:
+            env['LINK'] = env['CXX']
+            env['LLVM_VERSION'] = version
 
 def exists(env):
     return True
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index ac1b5d6d1d..dea4b703c4 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -3,7 +3,7 @@ Import('*')
 env = env.Clone()
 
 env.Tool('llvm')
-if 'LLVM_VERSION' not in env:
+if env.has_key('LLVM_VERSION') is False:
     print 'warning: LLVM not found: not building llvmpipe'
     Return()
 
-- 
cgit v1.2.3