From 2a45972fb2ba12a6561e5cba84d167f4c30566d4 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 27 Jul 2010 13:08:01 +0100
Subject: gallivm: Add lp_build_select_bitwise() alternative to
 lp_build_select_bitwise

When

  (mask & a) | (~mask & b)

is meant instead of

   mask ? a : b
---
 src/gallium/auxiliary/gallivm/lp_bld_logic.c | 67 +++++++++++++++++++---------
 src/gallium/auxiliary/gallivm/lp_bld_logic.h |  5 +++
 2 files changed, 50 insertions(+), 22 deletions(-)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index 39854e43b1..ab4ddb81c4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -362,10 +362,53 @@ lp_build_cmp(struct lp_build_context *bld,
 }
 
 
+/**
+ * Return (mask & a) | (~mask & b);
+ */
+LLVMValueRef
+lp_build_select_bitwise(struct lp_build_context *bld,
+                        LLVMValueRef mask,
+                        LLVMValueRef a,
+                        LLVMValueRef b)
+{
+   struct lp_type type = bld->type;
+   LLVMValueRef res;
+
+   if (a == b) {
+      return a;
+   }
+
+   if(type.floating) {
+      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+      a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+      b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
+   }
+
+   a = LLVMBuildAnd(bld->builder, a, mask, "");
+
+   /* This often gets translated to PANDN, but sometimes the NOT is
+    * pre-computed and stored in another constant. The best strategy depends
+    * on available registers, so it is not a big deal -- hopefully LLVM does
+    * the right decision attending the rest of the program.
+    */
+   b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
+
+   res = LLVMBuildOr(bld->builder, a, b, "");
+
+   if(type.floating) {
+      LLVMTypeRef vec_type = lp_build_vec_type(type);
+      res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+   }
+
+   return res;
+}
+
+
 /**
  * Return mask ? a : b;
  *
- * mask is a bitwise mask, composed of 0 or ~0 for each element.
+ * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value
+ * will yield unpredictable results.
  */
 LLVMValueRef
 lp_build_select(struct lp_build_context *bld,
@@ -424,27 +467,7 @@ lp_build_select(struct lp_build_context *bld,
       }
    }
    else {
-      if(type.floating) {
-         LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
-         a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
-         b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
-      }
-
-      a = LLVMBuildAnd(bld->builder, a, mask, "");
-
-      /* This often gets translated to PANDN, but sometimes the NOT is
-       * pre-computed and stored in another constant. The best strategy depends
-       * on available registers, so it is not a big deal -- hopefully LLVM does
-       * the right decision attending the rest of the program.
-       */
-      b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
-
-      res = LLVMBuildOr(bld->builder, a, b, "");
-
-      if(type.floating) {
-         LLVMTypeRef vec_type = lp_build_vec_type(type);
-         res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
-      }
+      res = lp_build_select_bitwise(bld, mask, a, b);
    }
 
    return res;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
index 29f9fc3b20..4e7b4c9938 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
@@ -63,6 +63,11 @@ lp_build_cmp(struct lp_build_context *bld,
              LLVMValueRef a,
              LLVMValueRef b);
 
+LLVMValueRef
+lp_build_select_bitwise(struct lp_build_context *bld,
+                        LLVMValueRef mask,
+                        LLVMValueRef a,
+                        LLVMValueRef b);
 
 LLVMValueRef
 lp_build_select(struct lp_build_context *bld,
-- 
cgit v1.2.3


From e3d2ebac115f7b7899664fefc2652fb829acfa27 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 22 Jul 2010 18:37:46 +0100
Subject: llvmpipe: Avoid corrupting the FPU stack with MMX instructions on
 32bit OSes.

Unfortunately LLVM doesn't emit EMMS itself, and there is no
easy/effective way to disable MMX.

http://llvm.org/bugs/show_bug.cgi?id=3287
---
 src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 24 ++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_state_fs.c    |  5 +++++
 2 files changed, 29 insertions(+)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 5a9488b5f7..072408b268 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -39,6 +39,7 @@
 #include <llvm/Target/TargetOptions.h>
 #include <llvm/ExecutionEngine/ExecutionEngine.h>
 #include <llvm/ExecutionEngine/JITEventListener.h>
+#include <llvm/Support/CommandLine.h>
 
 #include "pipe/p_config.h"
 #include "util/u_debug.h"
@@ -141,4 +142,27 @@ lp_set_target_options(void)
 #if 0
    llvm::UnsafeFPMath = true;
 #endif
+
+#if 0
+   /*
+    * LLVM will generate MMX instructions for vectors <= 64 bits, leading to
+    * innefficient code, and in 32bit systems, to the corruption of the FPU
+    * stack given that it expects the user to generate the EMMS instructions.
+    *
+    * See also:
+    * - http://llvm.org/bugs/show_bug.cgi?id=3287
+    * - http://l4.me.uk/post/2009/06/07/llvm-wrinkle-3-configuration-what-configuration/
+    *
+    * XXX: Unfortunately this is not working.
+    */
+   static boolean first = FALSE;
+   if (first) {
+      static const char* options[] = {
+         "prog",
+         "-disable-mmx"
+      };
+      llvm::cl::ParseCommandLineOptions(2, const_cast<char**>(options));
+      first = FALSE;
+   }
+#endif
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 5953d690a4..dbcc286417 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -676,6 +676,11 @@ generate_fragment(struct llvmpipe_context *lp,
 		     color_ptr);
    }
 
+#ifdef PIPE_ARCH_X86
+   /* Avoid corrupting the FPU stack on 32bit OSes. */
+   lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
+#endif
+
    LLVMBuildRetVoid(builder);
 
    LLVMDisposeBuilder(builder);
-- 
cgit v1.2.3


From 5f90e76c54bbf4456c977b3cbca450d7a570179e Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 29 Jul 2010 19:18:48 +0100
Subject: gallivm: fix lp_build_sample_offset() crash when indexing a 1-D
 texture

If y==NULL and y_stride==NULL it means the texture is 1D.  Return
zero for out_i and the offset instead of garbage.
---
 src/gallium/auxiliary/gallivm/lp_bld_sample.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 0fd014ab9b..655c4fb901 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -179,6 +179,9 @@ lp_build_sample_offset(struct lp_build_context *bld,
       LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride);
       offset = lp_build_add(bld, offset, y_offset);
    }
+   else {
+      *out_j = bld->zero;
+   }
 
    if (z && z_stride) {
       LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride);
-- 
cgit v1.2.3


From 02da55676bd483df5e8540e079f53c7f41178025 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 29 Jul 2010 19:38:02 +0100
Subject: Revert "gallivm: fix lp_build_sample_offset() crash when indexing a
 1-D texture"

This reverts commit 5f90e76c54bbf4456c977b3cbca450d7a570179e.

Bad cherry-pick.
---
 src/gallium/auxiliary/gallivm/lp_bld_sample.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 655c4fb901..0fd014ab9b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -179,9 +179,6 @@ lp_build_sample_offset(struct lp_build_context *bld,
       LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride);
       offset = lp_build_add(bld, offset, y_offset);
    }
-   else {
-      *out_j = bld->zero;
-   }
 
    if (z && z_stride) {
       LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride);
-- 
cgit v1.2.3


From 8f3fe7e2f0a3ce1a5c45fd204b0105f3b501e641 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 29 Jul 2010 12:03:33 -0600
Subject: gallivm: added lp_build_assert() function to make assertions in LLVM
 code

---
 src/gallium/auxiliary/Makefile                |   1 +
 src/gallium/auxiliary/SConscript              |   1 +
 src/gallium/auxiliary/gallivm/lp_bld_assert.c | 101 ++++++++++++++++++++++++++
 src/gallium/auxiliary/gallivm/lp_bld_assert.h |  41 +++++++++++
 4 files changed, 144 insertions(+)
 create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_assert.c
 create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_assert.h

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index dcebab7c0f..843b72bc38 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -149,6 +149,7 @@ C_SOURCES = \
 
 GALLIVM_SOURCES = \
         gallivm/lp_bld_arit.c \
+        gallivm/lp_bld_assert.c \
         gallivm/lp_bld_const.c \
         gallivm/lp_bld_conv.c \
         gallivm/lp_bld_debug.c \
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index 8381ae5b3e..1f09198721 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -198,6 +198,7 @@ source = [
 if env['llvm']:
     source += [
     'gallivm/lp_bld_arit.c',
+    'gallivm/lp_bld_assert.c',
     'gallivm/lp_bld_const.c',
     'gallivm/lp_bld_conv.c',
     'gallivm/lp_bld_debug.c',
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_assert.c b/src/gallium/auxiliary/gallivm/lp_bld_assert.c
new file mode 100644
index 0000000000..f2ebd868a8
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_assert.c
@@ -0,0 +1,101 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "lp_bld_assert.h"
+#include "lp_bld_init.h"
+#include "lp_bld_printf.h"
+
+
+/**
+ * A call to lp_build_assert() will build a function call to this function.
+ */
+static void
+lp_assert(int condition, const char *msg)
+{
+   if (!condition) {
+      debug_printf("LLVM assertion '%s' failed!\n", msg);
+      assert(condition);
+   }
+}
+
+
+
+/**
+ * lp_build_assert.
+ *
+ * Build an assertion in LLVM IR by building a function call to the
+ * lp_assert() function above.
+ *
+ * \param condition should be an 'i1' or 'i32' value
+ * \param msg  a string to print if the assertion fails.
+ */
+LLVMValueRef
+lp_build_assert(LLVMBuilderRef builder, LLVMValueRef condition,
+                const char *msg)
+{
+   LLVMModuleRef module;
+   LLVMTypeRef arg_types[2];
+   LLVMValueRef msg_string, assert_func, params[2], r;
+
+   module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(
+                            LLVMGetInsertBlock(builder)));
+
+   msg_string = lp_build_const_string_variable(module, msg, strlen(msg) + 1);
+
+   arg_types[0] = LLVMInt32Type();
+   arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
+
+   /* lookup the lp_assert function */
+   assert_func = LLVMGetNamedFunction(module, "lp_assert");
+
+   /* Create the assertion function if not found */
+   if (!assert_func) {
+      LLVMTypeRef func_type =
+         LLVMFunctionType(LLVMVoidType(), arg_types, 2, 0);
+
+      assert_func = LLVMAddFunction(module, "lp_assert", func_type);
+      LLVMSetFunctionCallConv(assert_func, LLVMCCallConv);
+      LLVMSetLinkage(assert_func, LLVMExternalLinkage);
+      LLVMAddGlobalMapping(lp_build_engine, assert_func,
+                           func_to_pointer((func_pointer)lp_assert));
+   }
+   assert(assert_func);
+
+   /* build function call param list */
+   params[0] = LLVMBuildZExt(builder, condition, arg_types[0], "");
+   params[1] = LLVMBuildBitCast(builder, msg_string, arg_types[1], "");
+
+   /* check arg types */
+   assert(LLVMTypeOf(params[0]) == arg_types[0]);
+   assert(LLVMTypeOf(params[1]) == arg_types[1]);
+
+   r = LLVMBuildCall(builder, assert_func, params, 2, "");
+
+   return r;
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_assert.h b/src/gallium/auxiliary/gallivm/lp_bld_assert.h
new file mode 100644
index 0000000000..ddd879dc2c
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_assert.h
@@ -0,0 +1,41 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef LP_BLD_ASSERT_H
+#define LP_BLD_ASSERT_H
+
+
+#include "lp_bld.h"
+
+
+LLVMValueRef
+lp_build_assert(LLVMBuilderRef builder, LLVMValueRef condition,
+                const char *msg);
+
+
+#endif
+
-- 
cgit v1.2.3


From 042018a943a403a4d9887b400deb3b3c83ee40c0 Mon Sep 17 00:00:00 2001
From: Zack Rusin <zackr@vmware.com>
Date: Tue, 27 Jul 2010 12:26:54 -0400
Subject: llvmpipe: delete function bodies after generating machine code

---
 src/gallium/auxiliary/draw/draw_llvm.c        | 2 ++
 src/gallium/auxiliary/gallivm/lp_bld_init.h   | 2 ++
 src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 8 ++++++++
 src/gallium/drivers/llvmpipe/lp_state_fs.c    | 1 +
 4 files changed, 13 insertions(+)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 48489e5f6f..8022b720b3 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -826,6 +826,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
    if (gallivm_debug & GALLIVM_DEBUG_ASM) {
       lp_disassemble(code);
    }
+   lp_func_delete_body(variant->function);
 }
 
 
@@ -1001,6 +1002,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
    if (gallivm_debug & GALLIVM_DEBUG_ASM) {
       lp_disassemble(code);
    }
+   lp_func_delete_body(variant->function_elts);
 }
 
 void
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h
index a32ced9b4c..f26fdac466 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h
@@ -44,5 +44,7 @@ extern LLVMPassManagerRef lp_build_pass;
 void
 lp_build_init(void);
 
+extern void
+lp_func_delete_body(LLVMValueRef func);
 
 #endif /* !LP_BLD_INIT_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 072408b268..6d5410d970 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -166,3 +166,11 @@ lp_set_target_options(void)
    }
 #endif
 }
+
+
+extern "C" void
+lp_func_delete_body(LLVMValueRef FF)
+{
+   llvm::Function *func = llvm::unwrap<llvm::Function>(FF);
+   func->deleteBody();
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index dbcc286417..5ee5bde184 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -715,6 +715,7 @@ generate_fragment(struct llvmpipe_context *lp,
       if (gallivm_debug & GALLIVM_DEBUG_ASM) {
          lp_disassemble(f);
       }
+      lp_func_delete_body(function);
    }
 }
 
-- 
cgit v1.2.3


From 4bd061b127aedfa7f6cd2c9fb4763927588c7ad1 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Thu, 5 Aug 2010 17:11:46 -0700
Subject: gallivm: Only get debug option once

---
 src/gallium/auxiliary/gallivm/lp_bld_init.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 69353dea09..ef0888079c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -45,6 +45,8 @@ static const struct debug_named_value lp_bld_debug_flags[] = {
    { "nopt",   GALLIVM_DEBUG_NO_OPT, NULL },
    DEBUG_NAMED_VALUE_END
 };
+
+DEBUG_GET_ONCE_FLAGS_OPTION(gallivm_debug, "GALLIVM_DEBUG", lp_bld_debug_flags, 0);
 #endif
 
 
@@ -89,7 +91,7 @@ void
 lp_build_init(void)
 {
 #ifdef DEBUG
-   gallivm_debug = debug_get_flags_option("GALLIVM_DEBUG", lp_bld_debug_flags, 0 );
+   gallivm_debug = debug_get_option_gallivm_debug();
 #endif
 
    lp_set_target_options();
-- 
cgit v1.2.3


From 14e9fbee1cef281c6849a5f2a6d2cc66bfd4b3fd Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 6 Aug 2010 15:09:41 -0600
Subject: gallium: remove stray semicolons

---
 src/gallium/auxiliary/gallivm/lp_bld_init.c | 2 +-
 src/gallium/auxiliary/tgsi/tgsi_sanity.c    | 2 +-
 src/gallium/auxiliary/util/u_cpu_detect.c   | 2 +-
 src/gallium/drivers/llvmpipe/lp_context.c   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index ef0888079c..60d8bcfa55 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -46,7 +46,7 @@ static const struct debug_named_value lp_bld_debug_flags[] = {
    DEBUG_NAMED_VALUE_END
 };
 
-DEBUG_GET_ONCE_FLAGS_OPTION(gallivm_debug, "GALLIVM_DEBUG", lp_bld_debug_flags, 0);
+DEBUG_GET_ONCE_FLAGS_OPTION(gallivm_debug, "GALLIVM_DEBUG", lp_bld_debug_flags, 0)
 #endif
 
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 9e02040f6c..287ee006cf 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -34,7 +34,7 @@
 #include "tgsi_iterate.h"
 
 
-DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", TRUE);
+DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", TRUE)
 
 
 typedef struct {
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index 6f38d22285..b1a8c75b99 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -73,7 +73,7 @@
 #endif
 
 
-DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", TRUE);
+DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", TRUE)
 
 
 struct util_cpu_caps util_cpu_caps;
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 28793682ed..7543bd7b2b 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -47,7 +47,7 @@
 #include "lp_setup.h"
 
 
-DEBUG_GET_ONCE_BOOL_OPTION(lp_no_rast, "LP_NO_RAST", FALSE);
+DEBUG_GET_ONCE_BOOL_OPTION(lp_no_rast, "LP_NO_RAST", FALSE)
 
 
 static void llvmpipe_destroy( struct pipe_context *pipe )
-- 
cgit v1.2.3


From d8279728165eec2da6031cf543820acad322d192 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 8 Aug 2010 21:02:59 +0100
Subject: gallivm: Add type checks for the basic operations.

---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index f5f2623e46..98e8e4916d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -190,6 +190,9 @@ lp_build_add(struct lp_build_context *bld,
    const struct lp_type type = bld->type;
    LLVMValueRef res;
 
+   assert(lp_check_value(type, a));
+   assert(lp_check_value(type, b));
+
    if(a == bld->zero)
       return b;
    if(b == bld->zero)
@@ -273,6 +276,9 @@ lp_build_sub(struct lp_build_context *bld,
    const struct lp_type type = bld->type;
    LLVMValueRef res;
 
+   assert(lp_check_value(type, a));
+   assert(lp_check_value(type, b));
+
    if(b == bld->zero)
       return a;
    if(a == bld->undef || b == bld->undef)
@@ -395,6 +401,9 @@ lp_build_mul(struct lp_build_context *bld,
    LLVMValueRef shift;
    LLVMValueRef res;
 
+   assert(lp_check_value(type, a));
+   assert(lp_check_value(type, b));
+
    if(a == bld->zero)
       return bld->zero;
    if(a == bld->one)
@@ -518,6 +527,9 @@ lp_build_div(struct lp_build_context *bld,
 {
    const struct lp_type type = bld->type;
 
+   assert(lp_check_value(type, a));
+   assert(lp_check_value(type, b));
+
    if(a == bld->zero)
       return bld->zero;
    if(a == bld->one)
-- 
cgit v1.2.3


From cd5af8c703d84dd856528554fa615e9787ebe75f Mon Sep 17 00:00:00 2001
From: nobled <nobled@dreamwidth.org>
Date: Sun, 8 Aug 2010 20:17:30 +0000
Subject: gallivm: Use the correct context for integers

See:
http://bugs.freedesktop.org/29407
---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 21236839fb..048b29929a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -533,7 +533,7 @@ emit_fetch(
                                             reg->Register.Index * 4 + swizzle);
 
          /* index_vec = index_vec + addr_vec */
-         index_vec = lp_build_add(&bld->base, index_vec, addr_vec);
+         index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec);
 
          /* Gather values from the constant buffer */
          res = build_gather(bld, bld->consts_ptr, index_vec);
-- 
cgit v1.2.3


From 12f5c0f9ce497e99854e0a3a7f5ff297a2a0a1e3 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 8 Aug 2010 22:18:53 +0100
Subject: gallivm: Fix more integer operations.

---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 048b29929a..42d796cb95 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -489,7 +489,7 @@ get_indirect_offsets(struct lp_build_tgsi_soa_context *bld,
                               int_vec_type, "");
 
    /* addr_vec = addr_vec * 4 */
-   addr_vec = lp_build_mul(&bld->base, addr_vec, vec4);
+   addr_vec = lp_build_mul(&bld->int_bld, addr_vec, vec4);
 
    return addr_vec;
 }
@@ -773,7 +773,9 @@ emit_store(
       addr = LLVMBuildExtractElement(bld->base.builder,
                                      addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
                                      "");
-      addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
+      addr = LLVMBuildMul(bld->base.builder,
+                          addr, LLVMConstInt(LLVMInt32Type(), 4, 0),
+                          "");
    }
 
    switch( reg->Register.File ) {
-- 
cgit v1.2.3


From fc9a49b638c26801951c33a570178bbb2b67ec60 Mon Sep 17 00:00:00 2001
From: nobled <nobled@dreamwidth.org>
Date: Sun, 8 Aug 2010 19:44:54 +0000
Subject: gallivm: Always use floating-point operators for floating-point types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes the assert added in LLVM 2.8:
assert(getType()->isIntOrIntVectorTy() &&
       "Tried to create an integer operation on a non-integer type!")

But it also fixes some subtle bugs, since we should've been doing this
since LLVM 2.6 anyway.

Includes a modified patch from steckdenis@yahoo.fr for the
FNeg instructions in emit_fetch(); thanks for pointing those out.

http://bugs.freedesktop.org/29404
http://bugs.freedesktop.org/29407

Signed-off-by: José Fonseca <jfonseca@vmware.com>
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c       | 181 +++++++++++++---------
 src/gallium/auxiliary/gallivm/lp_bld_conv.c       |  14 +-
 src/gallium/auxiliary/gallivm/lp_bld_format_aos.c |   4 +-
 src/gallium/auxiliary/gallivm/lp_bld_format_soa.c |   4 +-
 src/gallium/auxiliary/gallivm/lp_bld_quad.c       |   4 +-
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |  12 +-
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c   |  10 +-
 7 files changed, 137 insertions(+), 92 deletions(-)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 98e8e4916d..d2dde41e9f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -173,9 +173,15 @@ lp_build_comp(struct lp_build_context *bld,
    }
 
    if(LLVMIsConstant(a))
-      return LLVMConstSub(bld->one, a);
+      if (type.floating)
+          return LLVMConstFSub(bld->one, a);
+      else
+          return LLVMConstSub(bld->one, a);
    else
-      return LLVMBuildSub(bld->builder, bld->one, a, "");
+      if (type.floating)
+         return LLVMBuildFSub(bld->builder, bld->one, a, "");
+      else
+         return LLVMBuildSub(bld->builder, bld->one, a, "");
 }
 
 
@@ -220,9 +226,15 @@ lp_build_add(struct lp_build_context *bld,
    }
 
    if(LLVMIsConstant(a) && LLVMIsConstant(b))
-      res = LLVMConstAdd(a, b);
+      if (type.floating)
+         res = LLVMConstFAdd(a, b);
+      else
+         res = LLVMConstAdd(a, b);
    else
-      res = LLVMBuildAdd(bld->builder, a, b, "");
+      if (type.floating)
+         res = LLVMBuildFAdd(bld->builder, a, b, "");
+      else
+         res = LLVMBuildAdd(bld->builder, a, b, "");
 
    /* clamp to ceiling of 1.0 */
    if(bld->type.norm && (bld->type.floating || bld->type.fixed))
@@ -256,9 +268,16 @@ lp_build_sum_vector(struct lp_build_context *bld,
 
    for (i = 1; i < type.length; i++) {
       index = LLVMConstInt(LLVMInt32Type(), i, 0);
-      res = LLVMBuildAdd(bld->builder, res,
-                         LLVMBuildExtractElement(bld->builder, a, index, ""),
-                         "");
+      if (type.floating)
+         res = LLVMBuildFAdd(bld->builder, res,
+                            LLVMBuildExtractElement(bld->builder,
+                                                    a, index, ""),
+                            "");
+      else
+         res = LLVMBuildAdd(bld->builder, res,
+                            LLVMBuildExtractElement(bld->builder,
+                                                    a, index, ""),
+                            "");
    }
 
    return res;
@@ -306,9 +325,15 @@ lp_build_sub(struct lp_build_context *bld,
    }
 
    if(LLVMIsConstant(a) && LLVMIsConstant(b))
-      res = LLVMConstSub(a, b);
+      if (type.floating)
+         res = LLVMConstFSub(a, b);
+      else
+         res = LLVMConstSub(a, b);
    else
-      res = LLVMBuildSub(bld->builder, a, b, "");
+      if (type.floating)
+         res = LLVMBuildFSub(bld->builder, a, b, "");
+      else
+         res = LLVMBuildSub(bld->builder, a, b, "");
 
    if(bld->type.norm && (bld->type.floating || bld->type.fixed))
       res = lp_build_max_simple(bld, res, bld->zero);
@@ -442,7 +467,10 @@ lp_build_mul(struct lp_build_context *bld,
       shift = NULL;
 
    if(LLVMIsConstant(a) && LLVMIsConstant(b)) {
-      res =  LLVMConstMul(a, b);
+      if (type.floating)
+         res = LLVMConstFMul(a, b);
+      else
+         res = LLVMConstMul(a, b);
       if(shift) {
          if(type.sign)
             res = LLVMConstAShr(res, shift);
@@ -451,7 +479,10 @@ lp_build_mul(struct lp_build_context *bld,
       }
    }
    else {
-      res = LLVMBuildMul(bld->builder, a, b, "");
+      if (type.floating)
+         res = LLVMBuildFMul(bld->builder, a, b, "");
+      else
+         res = LLVMBuildMul(bld->builder, a, b, "");
       if(shift) {
          if(type.sign)
             res = LLVMBuildAShr(bld->builder, res, shift, "");
@@ -481,7 +512,10 @@ lp_build_mul_imm(struct lp_build_context *bld,
       return a;
 
    if(b == -1)
-      return LLVMBuildNeg(bld->builder, a, "");
+      if (bld->type.floating)
+         return LLVMBuildFNeg(bld->builder, a, "");
+      else
+         return LLVMBuildNeg(bld->builder, a, "");
 
    if(b == 2 && bld->type.floating)
       return lp_build_add(bld, a, a);
@@ -714,7 +748,12 @@ LLVMValueRef
 lp_build_negate(struct lp_build_context *bld,
                 LLVMValueRef a)
 {
-   return LLVMBuildNeg(bld->builder, a, "");
+   if (bld->type.floating)
+      a = LLVMBuildFNeg(bld->builder, a, "");
+   else
+      a = LLVMBuildNeg(bld->builder, a, "");
+
+   return a;
 }
 
 
@@ -1033,7 +1072,7 @@ lp_build_iround(struct lp_build_context *bld,
       half = LLVMBuildOr(bld->builder, sign, half, "");
       half = LLVMBuildBitCast(bld->builder, half, vec_type, "");
 
-      res = LLVMBuildAdd(bld->builder, a, half, "");
+      res = LLVMBuildFAdd(bld->builder, a, half, "");
    }
 
    res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
@@ -1082,7 +1121,7 @@ lp_build_ifloor(struct lp_build_context *bld,
       offset = LLVMBuildAnd(bld->builder, offset, sign, "");
       offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset");
 
-      res = LLVMBuildAdd(bld->builder, a, offset, "ifloor.res");
+      res = LLVMBuildFAdd(bld->builder, a, offset, "ifloor.res");
    }
 
    /* round to nearest (toward zero) */
@@ -1132,7 +1171,7 @@ lp_build_iceil(struct lp_build_context *bld,
       offset = LLVMBuildAnd(bld->builder, offset, sign, "");
       offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset");
 
-      res = LLVMBuildAdd(bld->builder, a, offset, "iceil.res");
+      res = LLVMBuildFAdd(bld->builder, a, offset, "iceil.res");
    }
 
    /* round to nearest (toward zero) */
@@ -1197,9 +1236,9 @@ lp_build_rcp(struct lp_build_context *bld,
 
       rcp_a = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
 
-      res = LLVMBuildMul(bld->builder, a, rcp_a, "");
-      res = LLVMBuildSub(bld->builder, two, res, "");
-      res = LLVMBuildMul(bld->builder, res, rcp_a, "");
+      res = LLVMBuildFMul(bld->builder, a, rcp_a, "");
+      res = LLVMBuildFSub(bld->builder, two, res, "");
+      res = LLVMBuildFMul(bld->builder, res, rcp_a, "");
 
       return rcp_a;
 #else
@@ -1282,7 +1321,7 @@ lp_build_sin(struct lp_build_context *bld,
     */
    
    LLVMValueRef FOPi = lp_build_const_v4sf(1.27323954473516);
-   LLVMValueRef scale_y = LLVMBuildMul(b, x_abs, FOPi, "scale_y");
+   LLVMValueRef scale_y = LLVMBuildFMul(b, x_abs, FOPi, "scale_y");
 
    /*
     * store the integer part of y in mm0
@@ -1356,9 +1395,9 @@ lp_build_sin(struct lp_build_context *bld,
     * xmm2 = _mm_mul_ps(y, xmm2);
     * xmm3 = _mm_mul_ps(y, xmm3);
     */
-   LLVMValueRef xmm1 = LLVMBuildMul(b, y_2, DP1, "xmm1");
-   LLVMValueRef xmm2 = LLVMBuildMul(b, y_2, DP2, "xmm2");
-   LLVMValueRef xmm3 = LLVMBuildMul(b, y_2, DP3, "xmm3");
+   LLVMValueRef xmm1 = LLVMBuildFMul(b, y_2, DP1, "xmm1");
+   LLVMValueRef xmm2 = LLVMBuildFMul(b, y_2, DP2, "xmm2");
+   LLVMValueRef xmm3 = LLVMBuildFMul(b, y_2, DP3, "xmm3");
 
    /*
     * x = _mm_add_ps(x, xmm1);
@@ -1366,16 +1405,16 @@ lp_build_sin(struct lp_build_context *bld,
     * x = _mm_add_ps(x, xmm3);
     */ 
 
-   LLVMValueRef x_1 = LLVMBuildAdd(b, x_abs, xmm1, "x_1");
-   LLVMValueRef x_2 = LLVMBuildAdd(b, x_1, xmm2, "x_2");
-   LLVMValueRef x_3 = LLVMBuildAdd(b, x_2, xmm3, "x_3");
+   LLVMValueRef x_1 = LLVMBuildFAdd(b, x_abs, xmm1, "x_1");
+   LLVMValueRef x_2 = LLVMBuildFAdd(b, x_1, xmm2, "x_2");
+   LLVMValueRef x_3 = LLVMBuildFAdd(b, x_2, xmm3, "x_3");
 
    /*
     * Evaluate the first polynom  (0 <= x <= Pi/4)
     *
     * z = _mm_mul_ps(x,x);
     */
-   LLVMValueRef z = LLVMBuildMul(b, x_3, x_3, "z");
+   LLVMValueRef z = LLVMBuildFMul(b, x_3, x_3, "z");
 
    /*
     * _PS_CONST(coscof_p0,  2.443315711809948E-005);
@@ -1390,12 +1429,12 @@ lp_build_sin(struct lp_build_context *bld,
     * y = *(v4sf*)_ps_coscof_p0;
     * y = _mm_mul_ps(y, z);
     */
-   LLVMValueRef y_3 = LLVMBuildMul(b, z, coscof_p0, "y_3");
-   LLVMValueRef y_4 = LLVMBuildAdd(b, y_3, coscof_p1, "y_4");
-   LLVMValueRef y_5 = LLVMBuildMul(b, y_4, z, "y_5");
-   LLVMValueRef y_6 = LLVMBuildAdd(b, y_5, coscof_p2, "y_6");
-   LLVMValueRef y_7 = LLVMBuildMul(b, y_6, z, "y_7");
-   LLVMValueRef y_8 = LLVMBuildMul(b, y_7, z, "y_8");
+   LLVMValueRef y_3 = LLVMBuildFMul(b, z, coscof_p0, "y_3");
+   LLVMValueRef y_4 = LLVMBuildFAdd(b, y_3, coscof_p1, "y_4");
+   LLVMValueRef y_5 = LLVMBuildFMul(b, y_4, z, "y_5");
+   LLVMValueRef y_6 = LLVMBuildFAdd(b, y_5, coscof_p2, "y_6");
+   LLVMValueRef y_7 = LLVMBuildFMul(b, y_6, z, "y_7");
+   LLVMValueRef y_8 = LLVMBuildFMul(b, y_7, z, "y_8");
 
 
    /*
@@ -1404,10 +1443,10 @@ lp_build_sin(struct lp_build_context *bld,
     * y = _mm_add_ps(y, *(v4sf*)_ps_1);
     */ 
    LLVMValueRef half = lp_build_const_v4sf(0.5);
-   LLVMValueRef tmp = LLVMBuildMul(b, z, half, "tmp");
-   LLVMValueRef y_9 = LLVMBuildSub(b, y_8, tmp, "y_8");
+   LLVMValueRef tmp = LLVMBuildFMul(b, z, half, "tmp");
+   LLVMValueRef y_9 = LLVMBuildFSub(b, y_8, tmp, "y_8");
    LLVMValueRef one = lp_build_const_v4sf(1.0);
-   LLVMValueRef y_10 = LLVMBuildAdd(b, y_9, one, "y_9");
+   LLVMValueRef y_10 = LLVMBuildFAdd(b, y_9, one, "y_9");
 
    /*
     * _PS_CONST(sincof_p0, -1.9515295891E-4);
@@ -1431,13 +1470,13 @@ lp_build_sin(struct lp_build_context *bld,
     * y2 = _mm_add_ps(y2, x);
     */
 
-   LLVMValueRef y2_3 = LLVMBuildMul(b, z, sincof_p0, "y2_3");
-   LLVMValueRef y2_4 = LLVMBuildAdd(b, y2_3, sincof_p1, "y2_4");
-   LLVMValueRef y2_5 = LLVMBuildMul(b, y2_4, z, "y2_5");
-   LLVMValueRef y2_6 = LLVMBuildAdd(b, y2_5, sincof_p2, "y2_6");
-   LLVMValueRef y2_7 = LLVMBuildMul(b, y2_6, z, "y2_7");
-   LLVMValueRef y2_8 = LLVMBuildMul(b, y2_7, x_3, "y2_8");
-   LLVMValueRef y2_9 = LLVMBuildAdd(b, y2_8, x_3, "y2_9");
+   LLVMValueRef y2_3 = LLVMBuildFMul(b, z, sincof_p0, "y2_3");
+   LLVMValueRef y2_4 = LLVMBuildFAdd(b, y2_3, sincof_p1, "y2_4");
+   LLVMValueRef y2_5 = LLVMBuildFMul(b, y2_4, z, "y2_5");
+   LLVMValueRef y2_6 = LLVMBuildFAdd(b, y2_5, sincof_p2, "y2_6");
+   LLVMValueRef y2_7 = LLVMBuildFMul(b, y2_6, z, "y2_7");
+   LLVMValueRef y2_8 = LLVMBuildFMul(b, y2_7, x_3, "y2_8");
+   LLVMValueRef y2_9 = LLVMBuildFAdd(b, y2_8, x_3, "y2_9");
 
    /*
     * select the correct result from the two polynoms
@@ -1493,7 +1532,7 @@ lp_build_cos(struct lp_build_context *bld,
     */
    
    LLVMValueRef FOPi = lp_build_const_v4sf(1.27323954473516);
-   LLVMValueRef scale_y = LLVMBuildMul(b, x_abs, FOPi, "scale_y");
+   LLVMValueRef scale_y = LLVMBuildFMul(b, x_abs, FOPi, "scale_y");
 
    /*
     * store the integer part of y in mm0
@@ -1573,9 +1612,9 @@ lp_build_cos(struct lp_build_context *bld,
     * xmm2 = _mm_mul_ps(y, xmm2);
     * xmm3 = _mm_mul_ps(y, xmm3);
     */
-   LLVMValueRef xmm1 = LLVMBuildMul(b, y_2, DP1, "xmm1");
-   LLVMValueRef xmm2 = LLVMBuildMul(b, y_2, DP2, "xmm2");
-   LLVMValueRef xmm3 = LLVMBuildMul(b, y_2, DP3, "xmm3");
+   LLVMValueRef xmm1 = LLVMBuildFMul(b, y_2, DP1, "xmm1");
+   LLVMValueRef xmm2 = LLVMBuildFMul(b, y_2, DP2, "xmm2");
+   LLVMValueRef xmm3 = LLVMBuildFMul(b, y_2, DP3, "xmm3");
 
    /*
     * x = _mm_add_ps(x, xmm1);
@@ -1583,16 +1622,16 @@ lp_build_cos(struct lp_build_context *bld,
     * x = _mm_add_ps(x, xmm3);
     */ 
 
-   LLVMValueRef x_1 = LLVMBuildAdd(b, x_abs, xmm1, "x_1");
-   LLVMValueRef x_2 = LLVMBuildAdd(b, x_1, xmm2, "x_2");
-   LLVMValueRef x_3 = LLVMBuildAdd(b, x_2, xmm3, "x_3");
+   LLVMValueRef x_1 = LLVMBuildFAdd(b, x_abs, xmm1, "x_1");
+   LLVMValueRef x_2 = LLVMBuildFAdd(b, x_1, xmm2, "x_2");
+   LLVMValueRef x_3 = LLVMBuildFAdd(b, x_2, xmm3, "x_3");
 
    /*
     * Evaluate the first polynom  (0 <= x <= Pi/4)
     *
     * z = _mm_mul_ps(x,x);
     */
-   LLVMValueRef z = LLVMBuildMul(b, x_3, x_3, "z");
+   LLVMValueRef z = LLVMBuildFMul(b, x_3, x_3, "z");
 
    /*
     * _PS_CONST(coscof_p0,  2.443315711809948E-005);
@@ -1607,12 +1646,12 @@ lp_build_cos(struct lp_build_context *bld,
     * y = *(v4sf*)_ps_coscof_p0;
     * y = _mm_mul_ps(y, z);
     */
-   LLVMValueRef y_3 = LLVMBuildMul(b, z, coscof_p0, "y_3");
-   LLVMValueRef y_4 = LLVMBuildAdd(b, y_3, coscof_p1, "y_4");
-   LLVMValueRef y_5 = LLVMBuildMul(b, y_4, z, "y_5");
-   LLVMValueRef y_6 = LLVMBuildAdd(b, y_5, coscof_p2, "y_6");
-   LLVMValueRef y_7 = LLVMBuildMul(b, y_6, z, "y_7");
-   LLVMValueRef y_8 = LLVMBuildMul(b, y_7, z, "y_8");
+   LLVMValueRef y_3 = LLVMBuildFMul(b, z, coscof_p0, "y_3");
+   LLVMValueRef y_4 = LLVMBuildFAdd(b, y_3, coscof_p1, "y_4");
+   LLVMValueRef y_5 = LLVMBuildFMul(b, y_4, z, "y_5");
+   LLVMValueRef y_6 = LLVMBuildFAdd(b, y_5, coscof_p2, "y_6");
+   LLVMValueRef y_7 = LLVMBuildFMul(b, y_6, z, "y_7");
+   LLVMValueRef y_8 = LLVMBuildFMul(b, y_7, z, "y_8");
 
 
    /*
@@ -1621,10 +1660,10 @@ lp_build_cos(struct lp_build_context *bld,
     * y = _mm_add_ps(y, *(v4sf*)_ps_1);
     */ 
    LLVMValueRef half = lp_build_const_v4sf(0.5);
-   LLVMValueRef tmp = LLVMBuildMul(b, z, half, "tmp");
-   LLVMValueRef y_9 = LLVMBuildSub(b, y_8, tmp, "y_8");
+   LLVMValueRef tmp = LLVMBuildFMul(b, z, half, "tmp");
+   LLVMValueRef y_9 = LLVMBuildFSub(b, y_8, tmp, "y_8");
    LLVMValueRef one = lp_build_const_v4sf(1.0);
-   LLVMValueRef y_10 = LLVMBuildAdd(b, y_9, one, "y_9");
+   LLVMValueRef y_10 = LLVMBuildFAdd(b, y_9, one, "y_9");
 
    /*
     * _PS_CONST(sincof_p0, -1.9515295891E-4);
@@ -1648,13 +1687,13 @@ lp_build_cos(struct lp_build_context *bld,
     * y2 = _mm_add_ps(y2, x);
     */
 
-   LLVMValueRef y2_3 = LLVMBuildMul(b, z, sincof_p0, "y2_3");
-   LLVMValueRef y2_4 = LLVMBuildAdd(b, y2_3, sincof_p1, "y2_4");
-   LLVMValueRef y2_5 = LLVMBuildMul(b, y2_4, z, "y2_5");
-   LLVMValueRef y2_6 = LLVMBuildAdd(b, y2_5, sincof_p2, "y2_6");
-   LLVMValueRef y2_7 = LLVMBuildMul(b, y2_6, z, "y2_7");
-   LLVMValueRef y2_8 = LLVMBuildMul(b, y2_7, x_3, "y2_8");
-   LLVMValueRef y2_9 = LLVMBuildAdd(b, y2_8, x_3, "y2_9");
+   LLVMValueRef y2_3 = LLVMBuildFMul(b, z, sincof_p0, "y2_3");
+   LLVMValueRef y2_4 = LLVMBuildFAdd(b, y2_3, sincof_p1, "y2_4");
+   LLVMValueRef y2_5 = LLVMBuildFMul(b, y2_4, z, "y2_5");
+   LLVMValueRef y2_6 = LLVMBuildFAdd(b, y2_5, sincof_p2, "y2_6");
+   LLVMValueRef y2_7 = LLVMBuildFMul(b, y2_6, z, "y2_7");
+   LLVMValueRef y2_8 = LLVMBuildFMul(b, y2_7, x_3, "y2_8");
+   LLVMValueRef y2_9 = LLVMBuildFAdd(b, y2_8, x_3, "y2_9");
 
    /*
     * select the correct result from the two polynoms
@@ -1829,7 +1868,7 @@ lp_build_exp2_approx(struct lp_build_context *bld,
       ipart = lp_build_floor(bld, x);
 
       /* fpart = x - ipart */
-      fpart = LLVMBuildSub(bld->builder, x, ipart, "");
+      fpart = LLVMBuildFSub(bld->builder, x, ipart, "");
    }
 
    if(p_exp2_int_part || p_exp2) {
@@ -1844,7 +1883,7 @@ lp_build_exp2_approx(struct lp_build_context *bld,
       expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
                                      Elements(lp_build_exp2_polynomial));
 
-      res = LLVMBuildMul(bld->builder, expipart, expfpart, "");
+      res = LLVMBuildFMul(bld->builder, expipart, expfpart, "");
    }
 
    if(p_exp2_int_part)
@@ -1957,9 +1996,9 @@ lp_build_log2_approx(struct lp_build_context *bld,
                                     Elements(lp_build_log2_polynomial));
 
       /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
-      logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), "");
+      logmant = LLVMBuildFMul(bld->builder, logmant, LLVMBuildFSub(bld->builder, mant, bld->one, ""), "");
 
-      res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
+      res = LLVMBuildFAdd(bld->builder, logmant, logexp, "");
    }
 
    if(p_exp) {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index 77012f1fac..8b477313d4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -117,8 +117,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
    scale = (double)mask/ubound;
    bias = (double)((unsigned long long)1 << (mantissa - n));
 
-   res = LLVMBuildMul(builder, src, lp_build_const_vec(src_type, scale), "");
-   res = LLVMBuildAdd(builder, res, lp_build_const_vec(src_type, bias), "");
+   res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), "");
+   res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), "");
    res = LLVMBuildBitCast(builder, res, int_vec_type, "");
 
    if(dst_width > n) {
@@ -175,6 +175,8 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
    double scale;
    double bias;
 
+   assert(dst_type.floating);
+
    mantissa = lp_mantissa(dst_type);
 
    n = MIN2(mantissa, src_width);
@@ -199,8 +201,8 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
 
    res = LLVMBuildBitCast(builder, res, vec_type, "");
 
-   res = LLVMBuildSub(builder, res, bias_, "");
-   res = LLVMBuildMul(builder, res, lp_build_const_vec(dst_type, scale), "");
+   res = LLVMBuildFSub(builder, res, bias_, "");
+   res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), "");
 
    return res;
 }
@@ -296,7 +298,7 @@ lp_build_conv(LLVMBuilderRef builder,
          if (dst_scale != 1.0) {
             LLVMValueRef scale = lp_build_const_vec(tmp_type, dst_scale);
             for(i = 0; i < num_tmps; ++i)
-               tmp[i] = LLVMBuildMul(builder, tmp[i], scale, "");
+               tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, "");
          }
 
          /* Use an equally sized integer for intermediate computations */
@@ -391,7 +393,7 @@ lp_build_conv(LLVMBuilderRef builder,
           if (src_scale != 1.0) {
              LLVMValueRef scale = lp_build_const_vec(tmp_type, 1.0/src_scale);
              for(i = 0; i < num_tmps; ++i)
-                tmp[i] = LLVMBuildMul(builder, tmp[i], scale, "");
+                tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, "");
           }
       }
     }
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index 0f01fc1d75..247cb83ce6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -240,7 +240,7 @@ lp_build_unpack_arith_rgba_aos(LLVMBuilderRef builder,
     */
 
    if (normalized)
-      scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), "");
+      scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), "");
    else
       scaled = casted;
 
@@ -322,7 +322,7 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
    }
 
    if (normalized)
-      scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
+      scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
    else
       scaled = unswizzled;
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index 9f405921b0..c724a4453e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -197,7 +197,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
             if (format_desc->channel[chan].normalized) {
                double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
                LLVMValueRef scale_val = lp_build_const_vec(type, scale);
-               input = LLVMBuildMul(builder, input, scale_val, "");
+               input = LLVMBuildFMul(builder, input, scale_val, "");
             }
          }
          else {
@@ -227,7 +227,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
             double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
             LLVMValueRef scale_val = lp_build_const_vec(type, scale);
             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
-            input = LLVMBuildMul(builder, input, scale_val, "");
+            input = LLVMBuildFMul(builder, input, scale_val, "");
          }
          else {
             /* FIXME */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
index ca36046d22..7b1088939b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
@@ -85,7 +85,7 @@ lp_build_scalar_ddx(struct lp_build_context *bld,
    LLVMValueRef idx_right = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_RIGHT, 0);
    LLVMValueRef a_left  = LLVMBuildExtractElement(bld->builder, a, idx_left, "");
    LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, "");
-   return LLVMBuildSub(bld->builder, a_right, a_left, "");
+   return lp_build_sub(bld, a_right, a_left);
 }
 
 
@@ -97,5 +97,5 @@ lp_build_scalar_ddy(struct lp_build_context *bld,
    LLVMValueRef idx_bottom = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_BOTTOM_LEFT, 0);
    LLVMValueRef a_top    = LLVMBuildExtractElement(bld->builder, a, idx_top, "");
    LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, "");
-   return LLVMBuildSub(bld->builder, a_bottom, a_top, "");
+   return lp_build_sub(bld, a_bottom, a_top);
 }
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 1a20d74cac..955d328953 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -888,17 +888,17 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
          /* Compute rho = max of all partial derivatives scaled by texture size.
           * XXX this could be vectorized somewhat
           */
-         rho = LLVMBuildMul(bld->builder,
+         rho = LLVMBuildFMul(bld->builder,
                             lp_build_max(float_bld, dsdx, dsdy),
                             lp_build_int_to_float(float_bld, width), "");
          if (dims > 1) {
             LLVMValueRef max;
-            max = LLVMBuildMul(bld->builder,
+            max = LLVMBuildFMul(bld->builder,
                                lp_build_max(float_bld, dtdx, dtdy),
                                lp_build_int_to_float(float_bld, height), "");
             rho = lp_build_max(float_bld, rho, max);
             if (dims > 2) {
-               max = LLVMBuildMul(bld->builder,
+               max = LLVMBuildFMul(bld->builder,
                                   lp_build_max(float_bld, drdx, drdy),
                                   lp_build_int_to_float(float_bld, depth), "");
                rho = lp_build_max(float_bld, rho, max);
@@ -912,12 +912,12 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
          if (lod_bias) {
             lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias,
                                                index0, "");
-            lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
+            lod = LLVMBuildFAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
          }
       }
 
       /* add sampler lod bias */
-      lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
+      lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
 
       /* clamp lod */
       lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
@@ -2029,6 +2029,8 @@ lp_build_sample_soa(LLVMBuilderRef builder,
       debug_printf("Sample from %s\n", util_format_name(fmt));
    }
 
+   assert(type.floating);
+
    /* Setup our build context */
    memset(&bld, 0, sizeof bld);
    bld.builder = builder;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 42d796cb95..becbd3bece 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -506,6 +506,7 @@ emit_fetch(
    const unsigned chan_index )
 {
    const struct tgsi_full_src_register *reg = &inst->Src[src_op];
+   const struct lp_type type = bld->base.type;
    const unsigned swizzle =
       tgsi_util_get_full_src_register_swizzle(reg, chan_index);
    LLVMValueRef res;
@@ -612,11 +613,12 @@ emit_fetch(
    case TGSI_UTIL_SIGN_SET:
       /* TODO: Use bitwese OR for floating point */
       res = lp_build_abs( &bld->base, res );
-      res = LLVMBuildNeg( bld->base.builder, res, "" );
-      break;
-
+      /* fall through */
    case TGSI_UTIL_SIGN_TOGGLE:
-      res = LLVMBuildNeg( bld->base.builder, res, "" );
+      if (type.floating)
+         res = LLVMBuildFNeg( bld->base.builder, res, "" );
+      else
+         res = LLVMBuildNeg( bld->base.builder, res, "" );
       break;
 
    case TGSI_UTIL_SIGN_KEEP:
-- 
cgit v1.2.3


From 8a3a971743a90463e65b44f1769a5301a31ce4cd Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 9 Aug 2010 17:26:18 +0100
Subject: gallivm: Don't call LLVMBuildFNeg on llvm-2.6.

It didn't exist yet.
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c     | 7 +++----
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 6 +-----
 2 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index d2dde41e9f..cecc1858bc 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -512,10 +512,7 @@ lp_build_mul_imm(struct lp_build_context *bld,
       return a;
 
    if(b == -1)
-      if (bld->type.floating)
-         return LLVMBuildFNeg(bld->builder, a, "");
-      else
-         return LLVMBuildNeg(bld->builder, a, "");
+      return lp_build_negate(bld, a);
 
    if(b == 2 && bld->type.floating)
       return lp_build_add(bld, a, a);
@@ -748,9 +745,11 @@ LLVMValueRef
 lp_build_negate(struct lp_build_context *bld,
                 LLVMValueRef a)
 {
+#if HAVE_LLVM >= 0x0207
    if (bld->type.floating)
       a = LLVMBuildFNeg(bld->builder, a, "");
    else
+#endif
       a = LLVMBuildNeg(bld->builder, a, "");
 
    return a;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index becbd3bece..0aa64affac 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -506,7 +506,6 @@ emit_fetch(
    const unsigned chan_index )
 {
    const struct tgsi_full_src_register *reg = &inst->Src[src_op];
-   const struct lp_type type = bld->base.type;
    const unsigned swizzle =
       tgsi_util_get_full_src_register_swizzle(reg, chan_index);
    LLVMValueRef res;
@@ -615,10 +614,7 @@ emit_fetch(
       res = lp_build_abs( &bld->base, res );
       /* fall through */
    case TGSI_UTIL_SIGN_TOGGLE:
-      if (type.floating)
-         res = LLVMBuildFNeg( bld->base.builder, res, "" );
-      else
-         res = LLVMBuildNeg( bld->base.builder, res, "" );
+      res = lp_build_negate( &bld->base, res );
       break;
 
    case TGSI_UTIL_SIGN_KEEP:
-- 
cgit v1.2.3


From 6e1f9bc8f62baf3854a53bf67bb025790f2cb317 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 9 Aug 2010 17:30:33 +0100
Subject: gallivm: More type checks.

---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c | 43 +++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index cecc1858bc..ec9b53be80 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -160,6 +160,8 @@ lp_build_comp(struct lp_build_context *bld,
 {
    const struct lp_type type = bld->type;
 
+   assert(lp_check_value(type, a));
+
    if(a == bld->one)
       return bld->zero;
    if(a == bld->zero)
@@ -255,6 +257,8 @@ lp_build_sum_vector(struct lp_build_context *bld,
    LLVMValueRef index, res;
    unsigned i;
 
+   assert(lp_check_value(type, a));
+
    if (a == bld->zero)
       return bld->zero;
    if (a == bld->undef)
@@ -505,6 +509,8 @@ lp_build_mul_imm(struct lp_build_context *bld,
 {
    LLVMValueRef factor;
 
+   assert(lp_check_value(bld->type, a));
+
    if(b == 0)
       return bld->zero;
 
@@ -598,6 +604,10 @@ lp_build_lerp(struct lp_build_context *bld,
    LLVMValueRef delta;
    LLVMValueRef res;
 
+   assert(lp_check_value(bld->type, x));
+   assert(lp_check_value(bld->type, v0));
+   assert(lp_check_value(bld->type, v1));
+
    delta = lp_build_sub(bld, v1, v0);
 
    res = lp_build_mul(bld, x, delta);
@@ -639,6 +649,9 @@ lp_build_min(struct lp_build_context *bld,
              LLVMValueRef a,
              LLVMValueRef b)
 {
+   assert(lp_check_value(bld->type, a));
+   assert(lp_check_value(bld->type, b));
+
    if(a == bld->undef || b == bld->undef)
       return bld->undef;
 
@@ -667,6 +680,9 @@ lp_build_max(struct lp_build_context *bld,
              LLVMValueRef a,
              LLVMValueRef b)
 {
+   assert(lp_check_value(bld->type, a));
+   assert(lp_check_value(bld->type, b));
+
    if(a == bld->undef || b == bld->undef)
       return bld->undef;
 
@@ -696,6 +712,10 @@ lp_build_clamp(struct lp_build_context *bld,
                LLVMValueRef min,
                LLVMValueRef max)
 {
+   assert(lp_check_value(bld->type, a));
+   assert(lp_check_value(bld->type, min));
+   assert(lp_check_value(bld->type, max));
+
    a = lp_build_min(bld, a, max);
    a = lp_build_max(bld, a, min);
    return a;
@@ -712,6 +732,8 @@ lp_build_abs(struct lp_build_context *bld,
    const struct lp_type type = bld->type;
    LLVMTypeRef vec_type = lp_build_vec_type(type);
 
+   assert(lp_check_value(type, a));
+
    if(!type.sign)
       return a;
 
@@ -745,6 +767,8 @@ LLVMValueRef
 lp_build_negate(struct lp_build_context *bld,
                 LLVMValueRef a)
 {
+   assert(lp_check_value(bld->type, a));
+
 #if HAVE_LLVM >= 0x0207
    if (bld->type.floating)
       a = LLVMBuildFNeg(bld->builder, a, "");
@@ -765,6 +789,8 @@ lp_build_sgn(struct lp_build_context *bld,
    LLVMValueRef cond;
    LLVMValueRef res;
 
+   assert(lp_check_value(type, a));
+
    /* Handle non-zero case */
    if(!type.sign) {
       /* if not zero then sign must be positive */
@@ -822,6 +848,7 @@ lp_build_set_sign(struct lp_build_context *bld,
                              ~((unsigned long long) 1 << (type.width - 1)));
    LLVMValueRef val, res;
 
+   assert(lp_check_value(type, a));
    assert(type.floating);
 
    /* val = reinterpret_cast<int>(a) */
@@ -1188,6 +1215,8 @@ lp_build_sqrt(struct lp_build_context *bld,
    LLVMTypeRef vec_type = lp_build_vec_type(type);
    char intrinsic[32];
 
+   assert(lp_check_value(type, a));
+
    /* TODO: optimize the constant case */
    /* TODO: optimize the constant case */
 
@@ -1204,6 +1233,8 @@ lp_build_rcp(struct lp_build_context *bld,
 {
    const struct lp_type type = bld->type;
 
+   assert(lp_check_value(type, a));
+
    if(a == bld->zero)
       return bld->undef;
    if(a == bld->one)
@@ -1258,6 +1289,8 @@ lp_build_rsqrt(struct lp_build_context *bld,
 {
    const struct lp_type type = bld->type;
 
+   assert(lp_check_value(type, a));
+
    assert(type.floating);
 
    if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
@@ -1745,6 +1778,8 @@ lp_build_exp(struct lp_build_context *bld,
    /* log2(e) = 1/log(2) */
    LLVMValueRef log2e = lp_build_const_vec(bld->type, 1.4426950408889634);
 
+   assert(lp_check_value(bld->type, x));
+
    return lp_build_mul(bld, log2e, lp_build_exp2(bld, x));
 }
 
@@ -1759,6 +1794,8 @@ lp_build_log(struct lp_build_context *bld,
    /* log(2) */
    LLVMValueRef log2 = lp_build_const_vec(bld->type, 0.69314718055994529);
 
+   assert(lp_check_value(bld->type, x));
+
    return lp_build_mul(bld, log2, lp_build_exp2(bld, x));
 }
 
@@ -1781,6 +1818,8 @@ lp_build_polynomial(struct lp_build_context *bld,
    LLVMValueRef res = NULL;
    unsigned i;
 
+   assert(lp_check_value(bld->type, x));
+
    /* TODO: optimize the constant case */
    if(LLVMIsConstant(x))
       debug_printf("%s: inefficient/imprecise constant arithmetic\n",
@@ -1852,6 +1891,8 @@ lp_build_exp2_approx(struct lp_build_context *bld,
    LLVMValueRef expfpart = NULL;
    LLVMValueRef res = NULL;
 
+   assert(lp_check_value(bld->type, x));
+
    if(p_exp2_int_part || p_frac_part || p_exp2) {
       /* TODO: optimize the constant case */
       if(LLVMIsConstant(x))
@@ -1965,6 +2006,8 @@ lp_build_log2_approx(struct lp_build_context *bld,
    LLVMValueRef logmant = NULL;
    LLVMValueRef res = NULL;
 
+   assert(lp_check_value(bld->type, x));
+
    if(p_exp || p_floor_log2 || p_log2) {
       /* TODO: optimize the constant case */
       if(LLVMIsConstant(x))
-- 
cgit v1.2.3


From a44a6960fab8c0053678fe74ce4c978ef40b06ff Mon Sep 17 00:00:00 2001
From: nobled <nobled@dreamwidth.org>
Date: Mon, 9 Aug 2010 21:15:08 +0000
Subject: gallivm: Even more type checking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

http://bugs.freedesktop.org/29407

Signed-off-by: José Fonseca <jfonseca@vmware.com>
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c  | 12 +++++++++++-
 src/gallium/auxiliary/gallivm/lp_bld_logic.c | 15 +++++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index ec9b53be80..860fbd829b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -72,6 +72,9 @@ lp_build_min_simple(struct lp_build_context *bld,
    const char *intrinsic = NULL;
    LLVMValueRef cond;
 
+   assert(lp_check_value(type, a));
+   assert(lp_check_value(type, b));
+
    /* TODO: optimize the constant case */
 
    if(type.width * type.length == 128) {
@@ -118,6 +121,9 @@ lp_build_max_simple(struct lp_build_context *bld,
    const char *intrinsic = NULL;
    LLVMValueRef cond;
 
+   assert(lp_check_value(type, a));
+   assert(lp_check_value(type, b));
+
    /* TODO: optimize the constant case */
 
    if(type.width * type.length == 128) {
@@ -395,6 +401,10 @@ lp_build_mul_u8n(LLVMBuilderRef builder,
    LLVMValueRef c8;
    LLVMValueRef ab;
 
+   assert(!i16_type.floating);
+   assert(lp_check_value(i16_type, a));
+   assert(lp_check_value(i16_type, b));
+
    c8 = lp_build_const_int_vec(i16_type, 8);
    
 #if 0
@@ -848,8 +858,8 @@ lp_build_set_sign(struct lp_build_context *bld,
                              ~((unsigned long long) 1 << (type.width - 1)));
    LLVMValueRef val, res;
 
-   assert(lp_check_value(type, a));
    assert(type.floating);
+   assert(lp_check_value(type, a));
 
    /* val = reinterpret_cast<int>(a) */
    val = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index ab4ddb81c4..96f8e21fc6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -83,6 +83,8 @@ lp_build_compare(LLVMBuilderRef builder,
 
    assert(func >= PIPE_FUNC_NEVER);
    assert(func <= PIPE_FUNC_ALWAYS);
+   assert(lp_check_value(type, a));
+   assert(lp_check_value(type, b));
 
    if(func == PIPE_FUNC_NEVER)
       return zeros;
@@ -374,6 +376,9 @@ lp_build_select_bitwise(struct lp_build_context *bld,
    struct lp_type type = bld->type;
    LLVMValueRef res;
 
+   assert(lp_check_value(type, a));
+   assert(lp_check_value(type, b));
+
    if (a == b) {
       return a;
    }
@@ -419,6 +424,9 @@ lp_build_select(struct lp_build_context *bld,
    struct lp_type type = bld->type;
    LLVMValueRef res;
 
+   assert(lp_check_value(type, a));
+   assert(lp_check_value(type, b));
+
    if(a == b)
       return a;
 
@@ -484,6 +492,9 @@ lp_build_select_aos(struct lp_build_context *bld,
    const unsigned n = type.length;
    unsigned i, j;
 
+   assert(lp_check_value(type, a));
+   assert(lp_check_value(type, b));
+
    if(a == b)
       return a;
    if(cond[0] && cond[1] && cond[2] && cond[3])
@@ -539,7 +550,11 @@ lp_build_select_aos(struct lp_build_context *bld,
 LLVMValueRef
 lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b)
 {
+   assert(lp_check_value(bld->type, a));
+   assert(lp_check_value(bld->type, b));
+
    b = LLVMBuildNot(bld->builder, b, "");
    b = LLVMBuildAnd(bld->builder, a, b, "");
+
    return b;
 }
-- 
cgit v1.2.3


From 20b3e40f166c77bd7fa5b7171e5b4169ed035280 Mon Sep 17 00:00:00 2001
From: nobled <nobled@dreamwidth.org>
Date: Mon, 9 Aug 2010 21:25:18 +0000
Subject: gallivm: Fix bitwise operations for floats, division for integers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

http://bugs.freedesktop.org/29407

Signed-off-by: José Fonseca <jfonseca@vmware.com>
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c  | 17 ++++++++++++++---
 src/gallium/auxiliary/gallivm/lp_bld_logic.c | 15 +++++++++++++--
 2 files changed, 27 insertions(+), 5 deletions(-)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 860fbd829b..cf2feeb163 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -588,13 +588,24 @@ lp_build_div(struct lp_build_context *bld,
    if(a == bld->undef || b == bld->undef)
       return bld->undef;
 
-   if(LLVMIsConstant(a) && LLVMIsConstant(b))
-      return LLVMConstFDiv(a, b);
+   if(LLVMIsConstant(a) && LLVMIsConstant(b)) {
+      if (type.floating)
+         return LLVMConstFDiv(a, b);
+      else if (type.sign)
+         return LLVMConstSDiv(a, b);
+      else
+         return LLVMConstUDiv(a, b);
+   }
 
    if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
       return lp_build_mul(bld, a, lp_build_rcp(bld, b));
 
-   return LLVMBuildFDiv(bld->builder, a, b, "");
+   if (type.floating)
+      return LLVMBuildFDiv(bld->builder, a, b, "");
+   else if (type.sign)
+      return LLVMBuildSDiv(bld->builder, a, b, "");
+   else
+      return LLVMBuildUDiv(bld->builder, a, b, "");
 }
 
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index 96f8e21fc6..7d7db3b0d9 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -550,11 +550,22 @@ lp_build_select_aos(struct lp_build_context *bld,
 LLVMValueRef
 lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b)
 {
-   assert(lp_check_value(bld->type, a));
-   assert(lp_check_value(bld->type, b));
+   const struct lp_type type = bld->type;
+
+   assert(lp_check_value(type, a));
+   assert(lp_check_value(type, b));
+
+   /* can't do bitwise ops on floating-point values */
+   if(type.floating) {
+      a = LLVMBuildBitCast(bld->builder, a, bld->int_vec_type, "");
+      b = LLVMBuildBitCast(bld->builder, b, bld->int_vec_type, "");
+   }
 
    b = LLVMBuildNot(bld->builder, b, "");
    b = LLVMBuildAnd(bld->builder, a, b, "");
 
+   if(type.floating) {
+      b = LLVMBuildBitCast(bld->builder, b, bld->vec_type, "");
+   }
    return b;
 }
-- 
cgit v1.2.3


From f263fdee8146719b14d9f9b14cf0c224461f35dc Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 10 Aug 2010 08:56:20 -0600
Subject: gallivm: fix non-SSE4.1 case in lp_build_pack2()

Since there's no SSE instruction for this case, fall through to the
generic shuffle code.

Fixes bug fd.o 29468.
---
 src/gallium/auxiliary/gallivm/lp_bld_pack.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
index 7748f8f099..ecfb13a0d4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -261,13 +261,14 @@ lp_build_pack2(LLVMBuilderRef builder,
 #endif
    LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type);
    LLVMValueRef shuffle;
-   LLVMValueRef res;
+   LLVMValueRef res = NULL;
 
    assert(!src_type.floating);
    assert(!dst_type.floating);
    assert(src_type.width == dst_type.width * 2);
    assert(src_type.length * 2 == dst_type.length);
 
+   /* Check for special cases first */
    if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) {
       switch(src_type.width) {
       case 32:
@@ -283,8 +284,8 @@ lp_build_pack2(LLVMBuilderRef builder,
                return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi);
             }
             else {
-               assert(0);
-               return LLVMGetUndef(dst_vec_type);
+               /* use generic shuffle below */
+               res = NULL;
             }
          }
          break;
@@ -310,10 +311,13 @@ lp_build_pack2(LLVMBuilderRef builder,
          break;
       }
 
-      res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
-      return res;
+      if (res) {
+         res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
+         return res;
+      }
    }
 
+   /* generic shuffle */
    lo = LLVMBuildBitCast(builder, lo, dst_vec_type, "");
    hi = LLVMBuildBitCast(builder, hi, dst_vec_type, "");
 
-- 
cgit v1.2.3


From f8533482f4a9b5ee7107f4e653d4ebf99ac63e2e Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Wed, 11 Aug 2010 15:11:12 +0100
Subject: gallivm: Use unsigned shift in lp_build_minify.

Texture dimensions are unsigned.
---
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 955d328953..665b010ece 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -811,7 +811,7 @@ lp_build_minify(struct lp_build_sample_context *bld,
                 LLVMValueRef base_size,
                 LLVMValueRef level)
 {
-   LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify");
+   LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify");
    size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
    return size;
 }
-- 
cgit v1.2.3


From 10ce6779e8a64c33add70e440f885c210f3fa6ee Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Wed, 11 Aug 2010 15:13:17 +0100
Subject: gallivm: Use lp_build_div instead of lp_build_mul + lp_build_rcp.

Single divide, so let lp_build_div decide how to implement this.

This will save a multiplication in architectures which don't have
a RCP intrinsic.
---
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 665b010ece..307506507d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -1219,8 +1219,7 @@ lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
    /* ima = -0.5 / abs(coord); */
    LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5);
    LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
-   LLVMValueRef ima = lp_build_mul(coord_bld, negHalf,
-                                   lp_build_rcp(coord_bld, absCoord));
+   LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord);
    return ima;
 }
 
-- 
cgit v1.2.3


From b481a1237e00e1e1fb68ffca0653df3a96f21788 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Wed, 11 Aug 2010 18:44:17 +0100
Subject: gallivm: Fix and enable the extra Newton/Raphson step in
 lp_build_rcp().

Thanks to Michal for spotting this.
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index cf2feeb163..816ee70119 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1274,7 +1274,7 @@ lp_build_rcp(struct lp_build_context *bld,
        * when we have a better system in place to track minimum precision.
        */
 
-#if 0
+#if 1
       /*
        * Do one Newton-Raphson step to improve precision:
        *
@@ -1291,7 +1291,7 @@ lp_build_rcp(struct lp_build_context *bld,
       res = LLVMBuildFSub(bld->builder, two, res, "");
       res = LLVMBuildFMul(bld->builder, res, rcp_a, "");
 
-      return rcp_a;
+      return res;
 #else
       return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
 #endif
-- 
cgit v1.2.3


From eacb624a4a11867427955c812e64c00d5c82bcdd Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 14 Aug 2010 18:02:47 +0100
Subject: gallivm: Refactor the Newton-Rapshon steps, and disable once again.

It causes a very ugly corruption on the Earth's halo on Google Earth.
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c | 111 +++++++++++++++++++++-------
 1 file changed, 83 insertions(+), 28 deletions(-)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 816ee70119..7b35dd4bb4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1,6 +1,6 @@
 /**************************************************************************
  *
- * Copyright 2009 VMware, Inc.
+ * Copyright 2009-2010 VMware, Inc.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -59,6 +59,19 @@
 #include "lp_bld_arit.h"
 
 
+/*
+ * XXX: Increasing eliminates some artifacts, but adds others, most
+ * noticeably corruption in the Earth halo in Google Earth.
+ */
+#define RCP_NEWTON_STEPS 0
+
+#define RSQRT_NEWTON_STEPS 0
+
+#define EXP_POLY_DEGREE 3
+
+#define LOG_POLY_DEGREE 5
+
+
 /**
  * Generate min(a, b)
  * No checks for special case values of a or b = 1 or 0 are done.
@@ -1248,6 +1261,31 @@ lp_build_sqrt(struct lp_build_context *bld,
 }
 
 
+/**
+ * Do one Newton-Raphson step to improve reciprocate precision:
+ *
+ *   x_{i+1} = x_i * (2 - a * x_i)
+ *
+ * See also:
+ * - http://en.wikipedia.org/wiki/Division_(digital)#Newton.E2.80.93Raphson_division
+ * - http://softwarecommunity.intel.com/articles/eng/1818.htm
+ */
+static INLINE LLVMValueRef
+lp_build_rcp_refine(struct lp_build_context *bld,
+                    LLVMValueRef a,
+                    LLVMValueRef rcp_a)
+{
+   LLVMValueRef two = lp_build_const_vec(bld->type, 2.0);
+   LLVMValueRef res;
+
+   res = LLVMBuildFMul(bld->builder, a, rcp_a, "");
+   res = LLVMBuildFSub(bld->builder, two, res, "");
+   res = LLVMBuildFMul(bld->builder, rcp_a, res, "");
+
+   return res;
+}
+
+
 LLVMValueRef
 lp_build_rcp(struct lp_build_context *bld,
              LLVMValueRef a)
@@ -1269,38 +1307,49 @@ lp_build_rcp(struct lp_build_context *bld,
       return LLVMConstFDiv(bld->one, a);
 
    if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
-      /*
-       * XXX: Added precision is not always necessary, so only enable this
-       * when we have a better system in place to track minimum precision.
-       */
-
-#if 1
-      /*
-       * Do one Newton-Raphson step to improve precision:
-       *
-       *   x1 = (2 - a * rcp(a)) * rcp(a)
-       */
-
-      LLVMValueRef two = lp_build_const_vec(bld->type, 2.0);
-      LLVMValueRef rcp_a;
       LLVMValueRef res;
+      unsigned i;
 
-      rcp_a = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
+      res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", bld->vec_type, a);
 
-      res = LLVMBuildFMul(bld->builder, a, rcp_a, "");
-      res = LLVMBuildFSub(bld->builder, two, res, "");
-      res = LLVMBuildFMul(bld->builder, res, rcp_a, "");
+      for (i = 0; i < RCP_NEWTON_STEPS; ++i) {
+         res = lp_build_rcp_refine(bld, a, res);
+      }
 
       return res;
-#else
-      return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
-#endif
    }
 
    return LLVMBuildFDiv(bld->builder, bld->one, a, "");
 }
 
 
+/**
+ * Do one Newton-Raphson step to improve rsqrt precision:
+ *
+ *   x_{i+1} = 0.5 * x_i * (3.0 - a * x_i * x_i)
+ *
+ * See also:
+ * - http://softwarecommunity.intel.com/articles/eng/1818.htm
+ */
+static INLINE LLVMValueRef
+lp_build_rsqrt_refine(struct lp_build_context *bld,
+                      LLVMValueRef a,
+                      LLVMValueRef rsqrt_a)
+{
+   LLVMValueRef half = lp_build_const_vec(bld->type, 0.5);
+   LLVMValueRef three = lp_build_const_vec(bld->type, 3.0);
+   LLVMValueRef res;
+
+   res = LLVMBuildFMul(bld->builder, rsqrt_a, rsqrt_a, "");
+   res = LLVMBuildFMul(bld->builder, a, res, "");
+   res = LLVMBuildFSub(bld->builder, three, res, "");
+   res = LLVMBuildFMul(bld->builder, rsqrt_a, res, "");
+   res = LLVMBuildFMul(bld->builder, half, res, "");
+
+   return res;
+}
+
+
 /**
  * Generate 1/sqrt(a)
  */
@@ -1314,8 +1363,18 @@ lp_build_rsqrt(struct lp_build_context *bld,
 
    assert(type.floating);
 
-   if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
-      return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a);
+   if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+      LLVMValueRef res;
+      unsigned i;
+
+      res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", bld->vec_type, a);
+
+      for (i = 0; i < RSQRT_NEWTON_STEPS; ++i) {
+         res = lp_build_rsqrt_refine(bld, a, res);
+      }
+
+      return res;
+   }
 
    return lp_build_rcp(bld, lp_build_sqrt(bld, a));
 }
@@ -1821,10 +1880,6 @@ lp_build_log(struct lp_build_context *bld,
 }
 
 
-#define EXP_POLY_DEGREE 3
-#define LOG_POLY_DEGREE 5
-
-
 /**
  * Generate polynomial.
  * Ex:  coeffs[0] + x * coeffs[1] + x^2 * coeffs[2].
-- 
cgit v1.2.3


From 923256626931c057d1a7c20d8900768b0c1faea9 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Fri, 13 Aug 2010 15:26:29 +0200
Subject: u_cpu_detect: remove arch and little_endian

This logic duplicates the one in p_config.h, so remove it and adjust
the only two places that were using it.
---
 src/gallium/auxiliary/gallivm/lp_bld_pack.c       |  7 +++----
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |  6 +++++-
 src/gallium/auxiliary/util/u_cpu_detect.c         | 18 ------------------
 src/gallium/auxiliary/util/u_cpu_detect.h         | 13 +------------
 4 files changed, 9 insertions(+), 35 deletions(-)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
index ecfb13a0d4..b7b630f2e8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -171,14 +171,13 @@ lp_build_unpack2(LLVMBuilderRef builder,
       msb = lp_build_zero(src_type);
 
    /* Interleave bits */
-   if(util_cpu_caps.little_endian) {
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
       *dst_lo = lp_build_interleave2(builder, src_type, src, msb, 0);
       *dst_hi = lp_build_interleave2(builder, src_type, src, msb, 1);
-   }
-   else {
+#else
       *dst_lo = lp_build_interleave2(builder, src_type, msb, src, 0);
       *dst_hi = lp_build_interleave2(builder, src_type, msb, src, 1);
-   }
+#endif
 
    /* Cast the result into the new type (twice as wide) */
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 307506507d..02d43e373a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -1840,7 +1840,11 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
       unsigned i, j;
 
       for(j = 0; j < h16.type.length; j += 4) {
-         unsigned subindex = util_cpu_caps.little_endian ? 0 : 1;
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+         unsigned subindex = 0;
+#else
+         unsigned subindex = 1;
+#endif
          LLVMValueRef index;
 
          index = LLVMConstInt(elem_type, j/2 + subindex, 0);
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index b1a8c75b99..2bbc554a90 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -391,23 +391,6 @@ util_cpu_detect(void)
 
    memset(&util_cpu_caps, 0, sizeof util_cpu_caps);
 
-   /* Check for arch type */
-#if defined(PIPE_ARCH_MIPS)
-   util_cpu_caps.arch = UTIL_CPU_ARCH_MIPS;
-#elif defined(PIPE_ARCH_ALPHA)
-   util_cpu_caps.arch = UTIL_CPU_ARCH_ALPHA;
-#elif defined(PIPE_ARCH_SPARC)
-   util_cpu_caps.arch = UTIL_CPU_ARCH_SPARC;
-#elif defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
-   util_cpu_caps.arch = UTIL_CPU_ARCH_X86;
-   util_cpu_caps.little_endian = 1;
-#elif defined(PIPE_ARCH_PPC)
-   util_cpu_caps.arch = UTIL_CPU_ARCH_POWERPC;
-   util_cpu_caps.little_endian = 0;
-#else
-   util_cpu_caps.arch = UTIL_CPU_ARCH_UNKNOWN;
-#endif
-
    /* Count the number of CPUs in system */
 #if defined(PIPE_OS_WINDOWS)
    {
@@ -504,7 +487,6 @@ util_cpu_detect(void)
 
 #ifdef DEBUG
    if (debug_get_option_dump_cpu()) {
-      debug_printf("util_cpu_caps.arch = %i\n", util_cpu_caps.arch);
       debug_printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus);
 
       debug_printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type);
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h
index 4b3dc39c34..f3bef0993c 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.h
+++ b/src/gallium/auxiliary/util/u_cpu_detect.h
@@ -36,26 +36,15 @@
 #define _UTIL_CPU_DETECT_H
 
 #include "pipe/p_compiler.h"
-
-enum util_cpu_arch {
-   UTIL_CPU_ARCH_UNKNOWN = 0,
-   UTIL_CPU_ARCH_MIPS,
-   UTIL_CPU_ARCH_ALPHA,
-   UTIL_CPU_ARCH_SPARC,
-   UTIL_CPU_ARCH_X86,
-   UTIL_CPU_ARCH_POWERPC
-};
+#include "pipe/p_config.h"
 
 struct util_cpu_caps {
-   enum util_cpu_arch arch;
    unsigned nr_cpus;
 
    /* Feature flags */
    int x86_cpu_type;
    unsigned cacheline;
 
-   unsigned little_endian:1;
-
    unsigned has_tsc:1;
    unsigned has_mmx:1;
    unsigned has_mmx2:1;
-- 
cgit v1.2.3


From 10d77f3f6b86eeb3ec1d9736c02335831e5c73c2 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sat, 14 Aug 2010 12:54:21 -0700
Subject: gallivm: Remove unnecessary header.

---
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 02d43e373a..806c7d56a8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -40,7 +40,6 @@
 #include "util/u_memory.h"
 #include "util/u_math.h"
 #include "util/u_format.h"
-#include "util/u_cpu_detect.h"
 #include "lp_bld_debug.h"
 #include "lp_bld_type.h"
 #include "lp_bld_const.h"
-- 
cgit v1.2.3