From ec8d9523d465554e3ffaa1aeef46bfff868281d3 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 20 Apr 2010 16:21:08 +0200
Subject: gallivm: Universal format support on lp_build_fetch_rgba_aos via
 util_format_description::fetch_rgba_float

This therefore adds support to half float vertex buffers.
---
 src/gallium/auxiliary/gallivm/lp_bld_format.h     |  4 +-
 src/gallium/auxiliary/gallivm/lp_bld_format_aos.c | 79 ++++++++++++++++++++++-
 src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 62 +++---------------
 3 files changed, 89 insertions(+), 56 deletions(-)

(limited to 'src/gallium/auxiliary/gallivm')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h
index ecf2cfd62c..085937588f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h
@@ -59,7 +59,9 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
 LLVMValueRef
 lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
                         const struct util_format_description *format_desc,
-                        LLVMValueRef ptr);
+                        LLVMValueRef ptr,
+                        LLVMValueRef i,
+                        LLVMValueRef j);
 
 
 /*
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index 191562d460..5cd5b93bdf 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -34,8 +34,11 @@
 
 
 #include "util/u_format.h"
+#include "util/u_memory.h"
 #include "util/u_math.h"
+#include "util/u_string.h"
 
+#include "lp_bld_init.h"
 #include "lp_bld_type.h"
 #include "lp_bld_const.h"
 #include "lp_bld_swizzle.h"
@@ -295,12 +298,17 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
 
 /**
  * Fetch a pixel into a 4 float AoS.
+ *
+ * i and j are the sub-block pixel coordinates.
  */
 LLVMValueRef
 lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
                         const struct util_format_description *format_desc,
-                        LLVMValueRef ptr)
+                        LLVMValueRef ptr,
+                        LLVMValueRef i,
+                        LLVMValueRef j)
 {
+
    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
@@ -309,7 +317,9 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
        util_is_pot(format_desc->block.bits) &&
        format_desc->block.bits <= 32 &&
        format_desc->is_bitmask &&
-       !format_desc->is_mixed)
+       !format_desc->is_mixed &&
+       (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
+        format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED))
    {
       LLVMValueRef packed;
 
@@ -321,6 +331,71 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
 
       return lp_build_unpack_rgba_aos(builder, format_desc, packed);
    }
+   else if (format_desc->fetch_rgba_float) {
+      /*
+       * Fallback to calling util_format_description::fetch_rgba_float.
+       *
+       * This is definitely not the most efficient way of fetching pixels, as
+       * we miss the opportunity to do vectorization, but this it is a
+       * convenient for formats or scenarios for which there was no opportunity
+       * or incentive to optimize.
+       */
+
+      LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
+      char name[256];
+      LLVMValueRef function;
+      LLVMValueRef tmp;
+      LLVMValueRef args[4];
+
+      util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
+                    format_desc->short_name);
+
+      /*
+       * Declare and bind format_desc->fetch_rgba_float().
+       */
+
+      function = LLVMGetNamedFunction(module, name);
+      if (!function) {
+         LLVMTypeRef ret_type;
+         LLVMTypeRef arg_types[4];
+         LLVMTypeRef function_type;
+
+         ret_type = LLVMVoidType();
+         arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
+         arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
+         arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
+         function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
+         function = LLVMAddFunction(module, name, function_type);
+
+         LLVMSetFunctionCallConv(function, LLVMCCallConv);
+         LLVMSetLinkage(function, LLVMExternalLinkage);
+
+         assert(LLVMIsDeclaration(function));
+
+         LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float);
+      }
+
+      /*
+       * XXX: this should better go to the first block in the function
+       */
+
+      tmp = LLVMBuildAlloca(builder, LLVMVectorType(LLVMFloatType(), 4), "");
+
+      /*
+       * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
+       * in the SoA vectors.
+       */
+
+      args[0] = LLVMBuildBitCast(builder, tmp,
+                                 LLVMPointerType(LLVMFloatType(), 0), "");
+      args[1] = ptr;
+      args[2] = i;
+      args[3] = j;
+
+      LLVMBuildCall(builder, function, args, 4, "");
+
+      return LLVMBuildLoad(builder, tmp, "");
+   }
    else {
       assert(0);
       return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index 2b66162eb4..c7b20f4201 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -307,70 +307,28 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
    }
    else {
       /*
-       * Fallback to calling util_format_description::fetch_rgba_float for each
-       * pixel.
+       * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
        *
-       * This is definitely not the most efficient way of fetching pixels, as
-       * we miss the opportunity to do vectorization, but this it is a
+       * This is not the most efficient way of fetching pixels, as
+       * we miss some opportunities to do vectorization, but this it is a
        * convenient for formats or scenarios for which there was no opportunity
        * or incentive to optimize.
        */
 
-      LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
-      char name[256];
-      LLVMValueRef function;
-      LLVMValueRef tmp;
       unsigned k, chan;
 
       assert(type.floating);
 
-      util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", format_desc->short_name);
-
-      /*
-       * Declare and bind format_desc->fetch_rgba_float().
-       */
-
-      function = LLVMGetNamedFunction(module, name);
-      if (!function) {
-         LLVMTypeRef ret_type;
-         LLVMTypeRef arg_types[4];
-         LLVMTypeRef function_type;
-
-         ret_type = LLVMVoidType();
-         arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
-         arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
-         arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
-         function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
-         function = LLVMAddFunction(module, name, function_type);
-
-         LLVMSetFunctionCallConv(function, LLVMCCallConv);
-         LLVMSetLinkage(function, LLVMExternalLinkage);
-
-         assert(LLVMIsDeclaration(function));
-
-         LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float);
-      }
-
       for (chan = 0; chan < 4; ++chan) {
          rgba[chan] = lp_build_undef(type);
       }
 
-      tmp = LLVMBuildArrayAlloca(builder,
-                                 LLVMFloatType(),
-                                 LLVMConstInt(LLVMInt32Type(), 4, 0),
-                                 "");
-
-      /*
-       * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
-       * in the SoA vectors.
-       */
-
       for(k = 0; k < type.length; ++k) {
          LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
          LLVMValueRef offset_elem;
          LLVMValueRef ptr;
          LLVMValueRef i_elem, j_elem;
-         LLVMValueRef args[4];
+         LLVMValueRef tmp;
 
          offset_elem = LLVMBuildExtractElement(builder, offset, index, "");
          ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, "");
@@ -378,17 +336,15 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
          i_elem = LLVMBuildExtractElement(builder, i, index, "");
          j_elem = LLVMBuildExtractElement(builder, j, index, "");
 
-         args[0] = tmp;
-         args[1] = ptr;
-         args[2] = i_elem;
-         args[3] = j_elem;
+         tmp = lp_build_fetch_rgba_aos(builder, format_desc, ptr, i_elem, j_elem);
 
-         LLVMBuildCall(builder, function, args, 4, "");
+         /*
+          * AoS to SoA
+          */
 
          for (chan = 0; chan < 4; ++chan) {
             LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0),
-            tmp_chan = LLVMBuildGEP(builder, tmp, &chan_val, 1, "");
-            tmp_chan = LLVMBuildLoad(builder, tmp_chan, "");
+            tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
             rgba[chan] = LLVMBuildInsertElement(builder, rgba[chan], tmp_chan, index, "");
          }
       }
-- 
cgit v1.2.3