diff options
| author | José Fonseca <jfonseca@vmware.com> | 2010-10-13 20:25:17 +0100 | 
|---|---|---|
| committer | José Fonseca <jfonseca@vmware.com> | 2010-10-13 20:25:57 +0100 | 
| commit | ae00e34e4b0d3be247b0538b60810176397c7915 (patch) | |
| tree | 51ec5a1af3914e89855f2c0ee8b83f0eb6deca30 | |
| parent | 60c5d4735d5fa5642c84f6d7c3847ac213efcb53 (diff) | |
llvmpipe: Generalize the x8z24 fast path to all depth formats.
Together with the previous commit, this generalize the benefits of
d2cf757f44f4ee5554243f3279483a25886d9927 to all depth formats, in
particular:
- simpler float -> 24unorm conversion
- avoid unsigned comparisons (not directly supported on SSE) by aligning
to the least significant bit
- avoid unecessary/repeated mask ANDing
Verified with trivial/tri-z that the exact same assembly is produced for
X8Z24.
| -rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_depth.c | 193 | 
1 files changed, 82 insertions, 111 deletions
| diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 3162f3e1c2..e4cfa97aa3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -304,8 +304,13 @@ lp_depth_type(const struct util_format_description *format_desc,     }     else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {        assert(format_desc->block.bits <= 32); -      if(format_desc->channel[swizzle].normalized) -         type.norm = TRUE; +      assert(format_desc->channel[swizzle].normalized); +      if (format_desc->channel[swizzle].size < format_desc->block.bits) { +         /* Prefer signed integers when possible, as SSE has less support +          * for unsigned comparison; +          */ +         type.sign = TRUE; +      }     }     else        assert(0); @@ -325,9 +330,9 @@ lp_depth_type(const struct util_format_description *format_desc,   * in the Z buffer (typically 0xffffff00 or 0x00ffffff).  That lets us   * get by with fewer bit twiddling steps.   */ -static boolean +static void  get_z_shift_and_mask(const struct util_format_description *format_desc, -                     unsigned *shift, unsigned *mask) +                     unsigned *shift, unsigned *width, unsigned *mask)  {     const unsigned total_bits = format_desc->block.bits;     unsigned z_swizzle; @@ -340,15 +345,16 @@ get_z_shift_and_mask(const struct util_format_description *format_desc,     z_swizzle = format_desc->swizzle[0]; -   if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) -      return FALSE; +   assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE); + +   *width = format_desc->channel[z_swizzle].size;     padding_right = 0;     for (chan = 0; chan < z_swizzle; ++chan)        padding_right += format_desc->channel[chan].size;     padding_left = -      total_bits - (padding_right + format_desc->channel[z_swizzle].size); +      total_bits - (padding_right + *width);     if (padding_left || padding_right) {        unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1; @@ -359,9 +365,7 @@ get_z_shift_and_mask(const struct util_format_description *format_desc,        *mask = 0xffffffff;     } -   *shift = padding_left; - -   return TRUE; +   *shift = padding_right;  } @@ -462,6 +466,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,     struct lp_build_context z_bld;     struct lp_build_context s_bld;     struct lp_type s_type; +   unsigned z_shift, z_width, z_mask;     LLVMValueRef zs_dst, z_dst = NULL;     LLVMValueRef stencil_vals = NULL;     LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; @@ -469,67 +474,6 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,     LLVMValueRef orig_mask = lp_build_mask_value(mask);     LLVMValueRef front_facing = NULL; -   /* Prototype a simpler path: -    */ -   if (z_src_type.floating && -       format_desc->format == PIPE_FORMAT_X8Z24_UNORM && -       depth->enabled)  -   { -      LLVMValueRef zscaled; -      LLVMValueRef const_ffffff_float; -      LLVMValueRef const_8_int; -      LLVMTypeRef int32_vec_type; - -      /* We know the values in z_dst are all >= 0, so allow -       * lp_build_compare to use signed compare intrinsics: -       */ -      z_type.floating = 0; -      z_type.fixed = 0; -      z_type.sign = 1; -      z_type.norm = 1; -      z_type.width = 32; -      z_type.length = z_src_type.length; - -      int32_vec_type = LLVMVectorType(LLVMInt32Type(), z_src_type.length); - -      const_8_int = lp_build_const_int_vec(z_type, 8); -      const_ffffff_float = lp_build_const_vec(z_src_type, (float)0xffffff); - -      zscaled = LLVMBuildFMul(builder, z_src, const_ffffff_float, "zscaled"); -      z_src = LLVMBuildFPToSI(builder, zscaled, int32_vec_type, "z_src"); -       -      /* Load current z/stencil value from z/stencil buffer */ -      zs_dst_ptr = LLVMBuildBitCast(builder, -                                    zs_dst_ptr, -                                    LLVMPointerType(int32_vec_type, 0), ""); -      z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval"); -      z_dst = LLVMBuildLShr(builder, z_dst, const_8_int, "z_dst"); - -      /* compare src Z to dst Z, returning 'pass' mask */ -      z_pass = lp_build_compare(builder, -                                z_type, -                                depth->func, z_src, z_dst); - -      lp_build_mask_update(mask, z_pass); - -      if (do_branch) -         lp_build_mask_check(mask); - -      /* No need to worry about old stencil contents, just blend the -       * old and new values and shift into the correct position for -       * storage. -       */ -      if (depth->writemask) { -         z_type.sign = 1; -         lp_build_context_init(&z_bld, builder, z_type); - -         z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), z_src, z_dst); -         z_dst = LLVMBuildShl(builder, z_dst, const_8_int, "z_dst"); -         *zs_value = z_dst; -      } - -      return; -   }     /*      * Depths are expected to be between 0 and 1, even if they are stored in @@ -552,10 +496,6 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,     assert(z_type.width == z_src_type.width);     assert(z_type.length == z_src_type.length); -   /* Convert fragment Z from float to integer */ -   lp_build_conv(builder, z_src_type, z_type, &z_src, 1, &z_src, 1); - -     /* Sanity checking */     {        const unsigned z_swizzle = format_desc->swizzle[0]; @@ -589,8 +529,6 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,                  UTIL_FORMAT_TYPE_UNSIGNED);           assert(format_desc->channel[z_swizzle].normalized);           assert(!z_type.fixed); -         assert(!z_type.sign); -         assert(z_type.norm);        }     } @@ -608,34 +546,14 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,                                   LLVMPointerType(z_bld.vec_type, 0), "");     zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, ""); -   lp_build_name(zs_dst, "zsbufval"); +   lp_build_name(zs_dst, "zs_dst");     /* Compute and apply the Z/stencil bitmasks and shifts.      */     { -      unsigned z_shift, z_mask;        unsigned s_shift, s_mask; -      if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) { -         if (z_shift) { -            LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift); -            z_src = LLVMBuildLShr(builder, z_src, shift, ""); -         } - -         if (z_mask != 0xffffffff) { -            LLVMValueRef mask = lp_build_const_int_vec(z_type, z_mask); -            z_src = LLVMBuildAnd(builder, z_src, mask, ""); -            z_dst = LLVMBuildAnd(builder, zs_dst, mask, ""); -            z_bitmask = mask;  /* used below */ -         } -         else { -            z_dst = zs_dst; -         } - -         lp_build_name(z_dst, "zsbuf.z"); -      } -        if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) {           if (s_shift) {              LLVMValueRef shift = lp_build_const_int_vec(s_type, s_shift); @@ -651,7 +569,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,              stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");           } -         lp_build_name(stencil_vals, "stencil"); +         lp_build_name(stencil_vals, "s_dst");        }     } @@ -687,6 +605,62 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,     }     if (depth->enabled) { +      get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask); + +      /* +       * Convert fragment Z to the desired type, aligning the LSB to the right. +       */ + +      assert(z_type.width == z_src_type.width); +      assert(z_type.length == z_src_type.length); +      assert(lp_check_value(z_src_type, z_src)); +      if (z_src_type.floating) { +         /* +          * Convert from floating point values +          */ + +         if (!z_type.floating) { +            z_src = lp_build_clamped_float_to_unsigned_norm(builder, +                                                            z_src_type, +                                                            z_width, +                                                            z_src); +         } +      } else { +         /* +          * Convert from unsigned normalized values. +          */ + +         assert(!z_src_type.sign); +         assert(!z_src_type.fixed); +         assert(z_src_type.norm); +         assert(!z_type.floating); +         if (z_src_type.width > z_width) { +            LLVMValueRef shift = lp_build_const_int_vec(z_src_type, +                                                        z_src_type.width - z_width); +            z_src = LLVMBuildLShr(builder, z_src, shift, ""); +         } +      } +      assert(lp_check_value(z_type, z_src)); + +      lp_build_name(z_src, "z_src"); + +      if (z_mask != 0xffffffff) { +         z_bitmask = lp_build_const_int_vec(z_type, z_mask); +      } + +      /* +       * Align the framebuffer Z 's LSB to the right. +       */ +      if (z_shift) { +         LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift); +         z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst"); +      } else if (z_bitmask) { +         z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst"); +      } else { +         z_dst = zs_dst; +         lp_build_name(z_dst, "z_dst"); +      } +        /* compare src Z to dst Z, returning 'pass' mask */        z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst); @@ -704,25 +678,20 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,        }        if (depth->writemask) { -         LLVMValueRef zselectmask = lp_build_mask_value(mask); +         LLVMValueRef zselectmask;           /* mask off bits that failed Z test */ -         zselectmask = LLVMBuildAnd(builder, zselectmask, z_pass, ""); +         zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, "");           /* mask off bits that failed stencil test */           if (s_pass_mask) {              zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, "");           } -         /* if combined Z/stencil format, mask off the stencil bits */ -         if (z_bitmask) { -            zselectmask = LLVMBuildAnd(builder, zselectmask, z_bitmask, ""); -         } -           /* Mix the old and new Z buffer values. -          * z_dst[i] = (zselectmask[i] & z_src[i]) | (~zselectmask[i] & z_dst[i]) +          * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]            */ -         z_dst = lp_build_select_bitwise(&z_bld, zselectmask, z_src, z_dst); +         z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst);        }        if (stencil[0].enabled) { @@ -752,9 +721,11 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,                                           s_pass_mask, front_facing);     } -   /* The Z bits are already in the right place but we may need to shift the -    * stencil bits before ORing Z with Stencil to make the final pixel value. -    */ +   /* Put Z and ztencil bits in the right place */ +   if (z_dst && z_shift) { +      LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift); +      z_dst = LLVMBuildShl(builder, z_dst, shift, ""); +   }     if (stencil_vals && stencil_shift)        stencil_vals = LLVMBuildShl(s_bld.builder, stencil_vals,                                    stencil_shift, ""); | 
