1 files changed, 138 insertions, 126 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index a07f7418f2..6257e9a404 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -33,12 +33,14 @@
  */
 
 
-#include "util/u_cpu_detect.h"
 #include "util/u_format.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_string.h"
 
+#include "lp_bld_init.h"
 #include "lp_bld_type.h"
-#include "lp_bld_const.h"
-#include "lp_bld_swizzle.h"
+#include "lp_bld_flow.h"
 #include "lp_bld_format.h"
 
 
@@ -48,16 +50,12 @@
  * @param packed integer.
  *
  * @return RGBA in a 4 floats vector.
- *
- * XXX: This is mostly for reference and testing -- operating a single pixel at
- * a time is rarely if ever needed.
  */
 LLVMValueRef
 lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
                          const struct util_format_description *desc,
                          LLVMValueRef packed)
 {
-   LLVMTypeRef type;
    LLVMValueRef shifted, casted, scaled, masked;
    LLVMValueRef shifts[4];
    LLVMValueRef masks[4];
@@ -66,17 +64,16 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
    LLVMValueRef aux[4];
    bool normalized;
    int empty_channel;
+   bool needs_uitofp;
    unsigned shift;
    unsigned i;
 
-   /* FIXME: Support more formats */
+   /* TODO: Support more formats */
    assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
    assert(desc->block.width == 1);
    assert(desc->block.height == 1);
    assert(desc->block.bits <= 32);
 
-   type = LLVMIntType(desc->block.bits);
-
    /* Do the intermediate integer computations with 32bit integers since it
     * matches floating point size */
    if (desc->block.bits < 32)
@@ -96,6 +93,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
 
    /* Initialize vector constants */
    normalized = FALSE;
+   needs_uitofp = FALSE;
    empty_channel = -1;
    shift = 0;
    for (i = 0; i < 4; ++i) {
@@ -108,10 +106,13 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
          empty_channel = i;
       }
       else {
-         unsigned mask = (1 << bits) - 1;
+         unsigned long long mask = (1ULL << bits) - 1;
 
          assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
-         assert(bits < 32);
+
+         if (bits == 32) {
+            needs_uitofp = TRUE;
+         }
 
          shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
          masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0);
@@ -129,8 +130,12 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
 
    shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
    masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
-   /* UIToFP can't be expressed in SSE2 */
-   casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
+   if (!needs_uitofp) {
+      /* UIToFP can't be expressed in SSE2 */
+      casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
+   } else {
+      casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
+   }
 
    if (normalized)
       scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), "");
@@ -141,7 +146,22 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
       aux[i] = LLVMGetUndef(LLVMFloatType());
 
    for (i = 0; i < 4; ++i) {
-      enum util_format_swizzle swizzle = desc->swizzle[i];
+      enum util_format_swizzle swizzle;
+
+      if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+         /*
+          * For ZS formats do RGBA = ZZZ1
+          */
+         if (i == 3) {
+            swizzle = UTIL_FORMAT_SWIZZLE_1;
+         } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
+            swizzle = UTIL_FORMAT_SWIZZLE_0;
+         } else {
+            swizzle = desc->swizzle[0];
+         }
+      } else {
+         swizzle = desc->swizzle[i];
+      }
 
       switch (swizzle) {
       case UTIL_FORMAT_SWIZZLE_X:
@@ -170,117 +190,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
 
 
 /**
- * Take a vector with packed pixels and unpack into a rgba8 vector.
- *
- * Formats with bit depth smaller than 32bits are accepted, but they must be
- * padded to 32bits.
- */
-LLVMValueRef
-lp_build_unpack_rgba8_aos(LLVMBuilderRef builder,
-                          const struct util_format_description *desc,
-                          struct lp_type type,
-                          LLVMValueRef packed)
-{
-   struct lp_build_context bld;
-   bool rgba8;
-   LLVMValueRef res;
-   unsigned i;
-
-   lp_build_context_init(&bld, builder, type);
-
-   /* FIXME: Support more formats */
-   assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
-   assert(desc->block.width == 1);
-   assert(desc->block.height == 1);
-   assert(desc->block.bits <= 32);
-
-   assert(!type.floating);
-   assert(!type.fixed);
-   assert(type.norm);
-   assert(type.width == 8);
-   assert(type.length % 4 == 0);
-
-   rgba8 = TRUE;
-   for(i = 0; i < 4; ++i) {
-      assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED ||
-             desc->channel[i].type == UTIL_FORMAT_TYPE_VOID);
-      if(desc->channel[0].size != 8)
-         rgba8 = FALSE;
-   }
-
-   if(rgba8) {
-      /*
-       * The pixel is already in a rgba8 format variant. All it is necessary
-       * is to swizzle the channels.
-       */
-
-      unsigned char swizzles[4];
-      boolean zeros[4]; /* bitwise AND mask */
-      boolean ones[4]; /* bitwise OR mask */
-      boolean swizzles_needed = FALSE;
-      boolean zeros_needed = FALSE;
-      boolean ones_needed = FALSE;
-
-      for(i = 0; i < 4; ++i) {
-         enum util_format_swizzle swizzle = desc->swizzle[i];
-
-         /* Initialize with the no-op case */
-         swizzles[i] = util_cpu_caps.little_endian ? 3 - i : i;
-         zeros[i] = TRUE;
-         ones[i] = FALSE;
-
-         switch (swizzle) {
-         case UTIL_FORMAT_SWIZZLE_X:
-         case UTIL_FORMAT_SWIZZLE_Y:
-         case UTIL_FORMAT_SWIZZLE_Z:
-         case UTIL_FORMAT_SWIZZLE_W:
-            if(swizzle != swizzles[i]) {
-               swizzles[i] = swizzle;
-               swizzles_needed = TRUE;
-            }
-            break;
-         case UTIL_FORMAT_SWIZZLE_0:
-            zeros[i] = FALSE;
-            zeros_needed = TRUE;
-            break;
-         case UTIL_FORMAT_SWIZZLE_1:
-            ones[i] = TRUE;
-            ones_needed = TRUE;
-            break;
-         case UTIL_FORMAT_SWIZZLE_NONE:
-            assert(0);
-            break;
-         }
-      }
-
-      res = packed;
-
-      if(swizzles_needed)
-         res = lp_build_swizzle1_aos(&bld, res, swizzles);
-
-      if(zeros_needed) {
-         /* Mask out zero channels */
-         LLVMValueRef mask = lp_build_const_mask_aos(type, zeros);
-         res = LLVMBuildAnd(builder, res, mask, "");
-      }
-
-      if(ones_needed) {
-         /* Or one channels */
-         LLVMValueRef mask = lp_build_const_mask_aos(type, ones);
-         res = LLVMBuildOr(builder, res, mask, "");
-      }
-   }
-   else {
-      /* FIXME */
-      assert(0);
-      res = lp_build_undef(type);
-   }
-
-   return res;
-}
-
-
-/**
  * Pack a single pixel.
  *
  * @param rgba 4 float vector with the unpacked components.
@@ -381,3 +290,106 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
 
    return packed;
 }
+
+
+/**
+ * Fetch a pixel into a 4 float AoS.
+ *
+ * i and j are the sub-block pixel coordinates.
+ */
+LLVMValueRef
+lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
+                        const struct util_format_description *format_desc,
+                        LLVMValueRef ptr,
+                        LLVMValueRef i,
+                        LLVMValueRef j)
+{
+
+   if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
+       (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
+        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
+       format_desc->block.width == 1 &&
+       format_desc->block.height == 1 &&
+       util_is_pot(format_desc->block.bits) &&
+       format_desc->block.bits <= 32 &&
+       format_desc->is_bitmask &&
+       !format_desc->is_mixed &&
+       (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
+        format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED))
+   {
+      LLVMValueRef packed;
+
+      ptr = LLVMBuildBitCast(builder, ptr,
+                             LLVMPointerType(LLVMIntType(format_desc->block.bits), 0) ,
+                             "");
+
+      packed = LLVMBuildLoad(builder, ptr, "packed");
+
+      return lp_build_unpack_rgba_aos(builder, format_desc, packed);
+   }
+   else if (format_desc->fetch_rgba_float) {
+      /*
+       * Fallback to calling util_format_description::fetch_rgba_float.
+       *
+       * This is definitely not the most efficient way of fetching pixels, as
+       * we miss the opportunity to do vectorization, but this it is a
+       * convenient for formats or scenarios for which there was no opportunity
+       * or incentive to optimize.
+       */
+
+      LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
+      char name[256];
+      LLVMValueRef function;
+      LLVMValueRef tmp;
+      LLVMValueRef args[4];
+
+      util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
+                    format_desc->short_name);
+
+      /*
+       * Declare and bind format_desc->fetch_rgba_float().
+       */
+
+      function = LLVMGetNamedFunction(module, name);
+      if (!function) {
+         LLVMTypeRef ret_type;
+         LLVMTypeRef arg_types[4];
+         LLVMTypeRef function_type;
+
+         ret_type = LLVMVoidType();
+         arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
+         arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
+         arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
+         function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
+         function = LLVMAddFunction(module, name, function_type);
+
+         LLVMSetFunctionCallConv(function, LLVMCCallConv);
+         LLVMSetLinkage(function, LLVMExternalLinkage);
+
+         assert(LLVMIsDeclaration(function));
+
+         LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float);
+      }
+
+      tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), "");
+
+      /*
+       * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
+       * in the SoA vectors.
+       */
+
+      args[0] = LLVMBuildBitCast(builder, tmp,
+                                 LLVMPointerType(LLVMFloatType(), 0), "");
+      args[1] = ptr;
+      args[2] = i;
+      args[3] = j;
+
+      LLVMBuildCall(builder, function, args, 4, "");
+
+      return LLVMBuildLoad(builder, tmp, "");
+   }
+   else {
+      assert(0);
+      return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
+   }
+}