From ae00e34e4b0d3be247b0538b60810176397c7915 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Wed, 13 Oct 2010 20:25:17 +0100
Subject: llvmpipe: Generalize the x8z24 fast path to all depth formats.

Together with the previous commit, this generalize the benefits of
d2cf757f44f4ee5554243f3279483a25886d9927 to all depth formats, in
particular:
- simpler float -> 24unorm conversion
- avoid unsigned comparisons (not directly supported on SSE) by aligning
to the least significant bit
- avoid unecessary/repeated mask ANDing

Verified with trivial/tri-z that the exact same assembly is produced for
X8Z24.
---
 src/gallium/drivers/llvmpipe/lp_bld_depth.c | 193 ++++++++++++----------------
 1 file changed, 82 insertions(+), 111 deletions(-)

(limited to 'src/gallium/drivers/llvmpipe')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 3162f3e1c2..e4cfa97aa3 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -304,8 +304,13 @@ lp_depth_type(const struct util_format_description *format_desc,
    }
    else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
       assert(format_desc->block.bits <= 32);
-      if(format_desc->channel[swizzle].normalized)
-         type.norm = TRUE;
+      assert(format_desc->channel[swizzle].normalized);
+      if (format_desc->channel[swizzle].size < format_desc->block.bits) {
+         /* Prefer signed integers when possible, as SSE has less support
+          * for unsigned comparison;
+          */
+         type.sign = TRUE;
+      }
    }
    else
       assert(0);
@@ -325,9 +330,9 @@ lp_depth_type(const struct util_format_description *format_desc,
  * in the Z buffer (typically 0xffffff00 or 0x00ffffff).  That lets us
  * get by with fewer bit twiddling steps.
  */
-static boolean
+static void
 get_z_shift_and_mask(const struct util_format_description *format_desc,
-                     unsigned *shift, unsigned *mask)
+                     unsigned *shift, unsigned *width, unsigned *mask)
 {
    const unsigned total_bits = format_desc->block.bits;
    unsigned z_swizzle;
@@ -340,15 +345,16 @@ get_z_shift_and_mask(const struct util_format_description *format_desc,
 
    z_swizzle = format_desc->swizzle[0];
 
-   if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
-      return FALSE;
+   assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE);
+
+   *width = format_desc->channel[z_swizzle].size;
 
    padding_right = 0;
    for (chan = 0; chan < z_swizzle; ++chan)
       padding_right += format_desc->channel[chan].size;
 
    padding_left =
-      total_bits - (padding_right + format_desc->channel[z_swizzle].size);
+      total_bits - (padding_right + *width);
 
    if (padding_left || padding_right) {
       unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1;
@@ -359,9 +365,7 @@ get_z_shift_and_mask(const struct util_format_description *format_desc,
       *mask = 0xffffffff;
    }
 
-   *shift = padding_left;
-
-   return TRUE;
+   *shift = padding_right;
 }
 
 
@@ -462,6 +466,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
    struct lp_build_context z_bld;
    struct lp_build_context s_bld;
    struct lp_type s_type;
+   unsigned z_shift, z_width, z_mask;
    LLVMValueRef zs_dst, z_dst = NULL;
    LLVMValueRef stencil_vals = NULL;
    LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
@@ -469,67 +474,6 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
    LLVMValueRef orig_mask = lp_build_mask_value(mask);
    LLVMValueRef front_facing = NULL;
 
-   /* Prototype a simpler path:
-    */
-   if (z_src_type.floating &&
-       format_desc->format == PIPE_FORMAT_X8Z24_UNORM &&
-       depth->enabled) 
-   {
-      LLVMValueRef zscaled;
-      LLVMValueRef const_ffffff_float;
-      LLVMValueRef const_8_int;
-      LLVMTypeRef int32_vec_type;
-
-      /* We know the values in z_dst are all >= 0, so allow
-       * lp_build_compare to use signed compare intrinsics:
-       */
-      z_type.floating = 0;
-      z_type.fixed = 0;
-      z_type.sign = 1;
-      z_type.norm = 1;
-      z_type.width = 32;
-      z_type.length = z_src_type.length;
-
-      int32_vec_type = LLVMVectorType(LLVMInt32Type(), z_src_type.length);
-
-      const_8_int = lp_build_const_int_vec(z_type, 8);
-      const_ffffff_float = lp_build_const_vec(z_src_type, (float)0xffffff);
-
-      zscaled = LLVMBuildFMul(builder, z_src, const_ffffff_float, "zscaled");
-      z_src = LLVMBuildFPToSI(builder, zscaled, int32_vec_type, "z_src");
-      
-      /* Load current z/stencil value from z/stencil buffer */
-      zs_dst_ptr = LLVMBuildBitCast(builder,
-                                    zs_dst_ptr,
-                                    LLVMPointerType(int32_vec_type, 0), "");
-      z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval");
-      z_dst = LLVMBuildLShr(builder, z_dst, const_8_int, "z_dst");
-
-      /* compare src Z to dst Z, returning 'pass' mask */
-      z_pass = lp_build_compare(builder,
-                                z_type,
-                                depth->func, z_src, z_dst);
-
-      lp_build_mask_update(mask, z_pass);
-
-      if (do_branch)
-         lp_build_mask_check(mask);
-
-      /* No need to worry about old stencil contents, just blend the
-       * old and new values and shift into the correct position for
-       * storage.
-       */
-      if (depth->writemask) {
-         z_type.sign = 1;
-         lp_build_context_init(&z_bld, builder, z_type);
-
-         z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), z_src, z_dst);
-         z_dst = LLVMBuildShl(builder, z_dst, const_8_int, "z_dst");
-         *zs_value = z_dst;
-      }
-
-      return;
-   }
 
    /*
     * Depths are expected to be between 0 and 1, even if they are stored in
@@ -552,10 +496,6 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
    assert(z_type.width == z_src_type.width);
    assert(z_type.length == z_src_type.length);
 
-   /* Convert fragment Z from float to integer */
-   lp_build_conv(builder, z_src_type, z_type, &z_src, 1, &z_src, 1);
-
-
    /* Sanity checking */
    {
       const unsigned z_swizzle = format_desc->swizzle[0];
@@ -589,8 +529,6 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
                 UTIL_FORMAT_TYPE_UNSIGNED);
          assert(format_desc->channel[z_swizzle].normalized);
          assert(!z_type.fixed);
-         assert(!z_type.sign);
-         assert(z_type.norm);
       }
    }
 
@@ -608,34 +546,14 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
                                  LLVMPointerType(z_bld.vec_type, 0), "");
    zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, "");
 
-   lp_build_name(zs_dst, "zsbufval");
+   lp_build_name(zs_dst, "zs_dst");
 
 
    /* Compute and apply the Z/stencil bitmasks and shifts.
     */
    {
-      unsigned z_shift, z_mask;
       unsigned s_shift, s_mask;
 
-      if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) {
-         if (z_shift) {
-            LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift);
-            z_src = LLVMBuildLShr(builder, z_src, shift, "");
-         }
-
-         if (z_mask != 0xffffffff) {
-            LLVMValueRef mask = lp_build_const_int_vec(z_type, z_mask);
-            z_src = LLVMBuildAnd(builder, z_src, mask, "");
-            z_dst = LLVMBuildAnd(builder, zs_dst, mask, "");
-            z_bitmask = mask;  /* used below */
-         }
-         else {
-            z_dst = zs_dst;
-         }
-
-         lp_build_name(z_dst, "zsbuf.z");
-      }
-
       if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) {
          if (s_shift) {
             LLVMValueRef shift = lp_build_const_int_vec(s_type, s_shift);
@@ -651,7 +569,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
             stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");
          }
 
-         lp_build_name(stencil_vals, "stencil");
+         lp_build_name(stencil_vals, "s_dst");
       }
    }
 
@@ -687,6 +605,62 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
    }
 
    if (depth->enabled) {
+      get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask);
+
+      /*
+       * Convert fragment Z to the desired type, aligning the LSB to the right.
+       */
+
+      assert(z_type.width == z_src_type.width);
+      assert(z_type.length == z_src_type.length);
+      assert(lp_check_value(z_src_type, z_src));
+      if (z_src_type.floating) {
+         /*
+          * Convert from floating point values
+          */
+
+         if (!z_type.floating) {
+            z_src = lp_build_clamped_float_to_unsigned_norm(builder,
+                                                            z_src_type,
+                                                            z_width,
+                                                            z_src);
+         }
+      } else {
+         /*
+          * Convert from unsigned normalized values.
+          */
+
+         assert(!z_src_type.sign);
+         assert(!z_src_type.fixed);
+         assert(z_src_type.norm);
+         assert(!z_type.floating);
+         if (z_src_type.width > z_width) {
+            LLVMValueRef shift = lp_build_const_int_vec(z_src_type,
+                                                        z_src_type.width - z_width);
+            z_src = LLVMBuildLShr(builder, z_src, shift, "");
+         }
+      }
+      assert(lp_check_value(z_type, z_src));
+
+      lp_build_name(z_src, "z_src");
+
+      if (z_mask != 0xffffffff) {
+         z_bitmask = lp_build_const_int_vec(z_type, z_mask);
+      }
+
+      /*
+       * Align the framebuffer Z 's LSB to the right.
+       */
+      if (z_shift) {
+         LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift);
+         z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst");
+      } else if (z_bitmask) {
+         z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst");
+      } else {
+         z_dst = zs_dst;
+         lp_build_name(z_dst, "z_dst");
+      }
+
       /* compare src Z to dst Z, returning 'pass' mask */
       z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst);
 
@@ -704,25 +678,20 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
       }
 
       if (depth->writemask) {
-         LLVMValueRef zselectmask = lp_build_mask_value(mask);
+         LLVMValueRef zselectmask;
 
          /* mask off bits that failed Z test */
-         zselectmask = LLVMBuildAnd(builder, zselectmask, z_pass, "");
+         zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
 
          /* mask off bits that failed stencil test */
          if (s_pass_mask) {
             zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, "");
          }
 
-         /* if combined Z/stencil format, mask off the stencil bits */
-         if (z_bitmask) {
-            zselectmask = LLVMBuildAnd(builder, zselectmask, z_bitmask, "");
-         }
-
          /* Mix the old and new Z buffer values.
-          * z_dst[i] = (zselectmask[i] & z_src[i]) | (~zselectmask[i] & z_dst[i])
+          * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
           */
-         z_dst = lp_build_select_bitwise(&z_bld, zselectmask, z_src, z_dst);
+         z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst);
       }
 
       if (stencil[0].enabled) {
@@ -752,9 +721,11 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
                                          s_pass_mask, front_facing);
    }
 
-   /* The Z bits are already in the right place but we may need to shift the
-    * stencil bits before ORing Z with Stencil to make the final pixel value.
-    */
+   /* Put Z and ztencil bits in the right place */
+   if (z_dst && z_shift) {
+      LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift);
+      z_dst = LLVMBuildShl(builder, z_dst, shift, "");
+   }
    if (stencil_vals && stencil_shift)
       stencil_vals = LLVMBuildShl(s_bld.builder, stencil_vals,
                                   stencil_shift, "");
-- 
cgit v1.2.3