From 87dd859b342b844add906358810445da21b6b092 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Wed, 6 Oct 2010 18:44:51 +0100
Subject: gallivm: Compute lod as integer whenever possible.

More accurate/faster results for PIPE_TEX_MIPFILTER_NEAREST. Less
FP <-> SI conversion overall.
---
 src/gallium/auxiliary/gallivm/lp_bld_sample.c     | 170 ++++++++++++++--------
 src/gallium/auxiliary/gallivm/lp_bld_sample.h     |  12 +-
 src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c |  40 ++---
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |  31 ++--
 4 files changed, 158 insertions(+), 95 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index c1c98bf859..3287cf7c37 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -167,6 +167,73 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
 }
 
 
+/**
+ * Generate code to compute coordinate gradient (rho).
+ * \param ddx  partial derivatives of (s, t, r, q) with respect to X
+ * \param ddy  partial derivatives of (s, t, r, q) with respect to Y
+ * \param width  scalar int texture width
+ * \param height  scalar int texture height
+ * \param depth  scalar int texture depth
+ *
+ * XXX: The resulting rho is scalar, so we ignore all but the first element of
+ * derivatives that are passed by the shader.
+ */
+static LLVMValueRef
+lp_build_rho(struct lp_build_sample_context *bld,
+             const LLVMValueRef ddx[4],
+             const LLVMValueRef ddy[4],
+             LLVMValueRef width,
+             LLVMValueRef height,
+             LLVMValueRef depth)
+{
+   struct lp_build_context *float_bld = &bld->float_bld;
+   const int dims = texture_dims(bld->static_state->target);
+   LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+   LLVMValueRef dsdx, dsdy;
+   LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
+   LLVMValueRef rho;
+
+   dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
+   dsdx = lp_build_abs(float_bld, dsdx);
+   dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
+   dsdy = lp_build_abs(float_bld, dsdy);
+   if (dims > 1) {
+      dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
+      dtdx = lp_build_abs(float_bld, dtdx);
+      dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
+      dtdy = lp_build_abs(float_bld, dtdy);
+      if (dims > 2) {
+         drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
+         drdx = lp_build_abs(float_bld, drdx);
+         drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
+         drdy = lp_build_abs(float_bld, drdy);
+      }
+   }
+
+   /* Compute rho = max of all partial derivatives scaled by texture size.
+    * XXX this could be vectorized somewhat
+    */
+   rho = LLVMBuildFMul(bld->builder,
+                      lp_build_max(float_bld, dsdx, dsdy),
+                      lp_build_int_to_float(float_bld, width), "");
+   if (dims > 1) {
+      LLVMValueRef max;
+      max = LLVMBuildFMul(bld->builder,
+                         lp_build_max(float_bld, dtdx, dtdy),
+                         lp_build_int_to_float(float_bld, height), "");
+      rho = lp_build_max(float_bld, rho, max);
+      if (dims > 2) {
+         max = LLVMBuildFMul(bld->builder,
+                            lp_build_max(float_bld, drdx, drdy),
+                            lp_build_int_to_float(float_bld, depth), "");
+         rho = lp_build_max(float_bld, rho, max);
+      }
+   }
+
+   return rho;
+}
+
+
 /**
  * Generate code to compute texture level of detail (lambda).
  * \param ddx  partial derivatives of (s, t, r, q) with respect to X
@@ -180,7 +247,7 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
  * XXX: The resulting lod is scalar, so ignore all but the first element of
  * derivatives, lod_bias, etc that are passed by the shader.
  */
-LLVMValueRef
+void
 lp_build_lod_selector(struct lp_build_sample_context *bld,
                       unsigned unit,
                       const LLVMValueRef ddx[4],
@@ -189,9 +256,18 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
                       LLVMValueRef explicit_lod, /* optional */
                       LLVMValueRef width,
                       LLVMValueRef height,
-                      LLVMValueRef depth)
+                      LLVMValueRef depth,
+                      unsigned mip_filter,
+                      LLVMValueRef *out_lod_ipart,
+                      LLVMValueRef *out_lod_fpart)
 
 {
+   struct lp_build_context *float_bld = &bld->float_bld;
+   LLVMValueRef lod;
+
+   *out_lod_ipart = bld->int_bld.zero;
+   *out_lod_fpart = bld->float_bld.zero;
+
    if (bld->static_state->min_max_lod_equal) {
       /* User is forcing sampling from a particular mipmap level.
        * This is hit during mipmap generation.
@@ -199,68 +275,40 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
       LLVMValueRef min_lod =
          bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit);
 
-      return min_lod;
+      lod = min_lod;
    }
    else {
-      struct lp_build_context *float_bld = &bld->float_bld;
       LLVMValueRef sampler_lod_bias =
          bld->dynamic_state->lod_bias(bld->dynamic_state, bld->builder, unit);
       LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
-      LLVMValueRef lod;
 
       if (explicit_lod) {
          lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
                                        index0, "");
       }
       else {
-         const int dims = texture_dims(bld->static_state->target);
-         LLVMValueRef dsdx, dsdy;
-         LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
          LLVMValueRef rho;
 
-         dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
-         dsdx = lp_build_abs(float_bld, dsdx);
-         dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
-         dsdy = lp_build_abs(float_bld, dsdy);
-         if (dims > 1) {
-            dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
-            dtdx = lp_build_abs(float_bld, dtdx);
-            dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
-            dtdy = lp_build_abs(float_bld, dtdy);
-            if (dims > 2) {
-               drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
-               drdx = lp_build_abs(float_bld, drdx);
-               drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
-               drdy = lp_build_abs(float_bld, drdy);
-            }
-         }
+         rho = lp_build_rho(bld, ddx, ddy, width, height, depth);
 
-         /* Compute rho = max of all partial derivatives scaled by texture size.
-          * XXX this could be vectorized somewhat
-          */
-         rho = LLVMBuildFMul(bld->builder,
-                            lp_build_max(float_bld, dsdx, dsdy),
-                            lp_build_int_to_float(float_bld, width), "");
-         if (dims > 1) {
-            LLVMValueRef max;
-            max = LLVMBuildFMul(bld->builder,
-                               lp_build_max(float_bld, dtdx, dtdy),
-                               lp_build_int_to_float(float_bld, height), "");
-            rho = lp_build_max(float_bld, rho, max);
-            if (dims > 2) {
-               max = LLVMBuildFMul(bld->builder,
-                                  lp_build_max(float_bld, drdx, drdy),
-                                  lp_build_int_to_float(float_bld, depth), "");
-               rho = lp_build_max(float_bld, rho, max);
-            }
+         /* compute lod = log2(rho) */
+         if ((mip_filter == PIPE_TEX_MIPFILTER_NONE ||
+              mip_filter == PIPE_TEX_MIPFILTER_NEAREST) &&
+             !lod_bias &&
+             !bld->static_state->lod_bias_non_zero &&
+             !bld->static_state->apply_max_lod &&
+             !bld->static_state->apply_min_lod) {
+            *out_lod_ipart = lp_build_ilog2(float_bld, rho);
+            *out_lod_fpart = bld->float_bld.zero;
+            return;
          }
 
-         /* compute lod = log2(rho) */
-#if 0
-         lod = lp_build_log2(float_bld, rho);
-#else
-         lod = lp_build_fast_log2(float_bld, rho);
-#endif
+         if (0) {
+            lod = lp_build_log2(float_bld, rho);
+         }
+         else {
+            lod = lp_build_fast_log2(float_bld, rho);
+         }
 
          /* add shader lod bias */
          if (lod_bias) {
@@ -288,9 +336,20 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
 
          lod = lp_build_max(float_bld, lod, min_lod);
       }
+   }
 
-      return lod;
+   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+      LLVMValueRef ipart = lp_build_ifloor(float_bld, lod);
+      lp_build_name(ipart, "lod_ipart");
+      *out_lod_ipart = ipart;
+      ipart = LLVMBuildSIToFP(bld->builder, ipart, float_bld->vec_type, "");
+      *out_lod_fpart = LLVMBuildFSub(bld->builder, lod, ipart, "lod_fpart");
    }
+   else {
+      *out_lod_ipart = lp_build_iround(float_bld, lod);
+   }
+
+   return;
 }
 
 
@@ -304,10 +363,9 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
 void
 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
                            unsigned unit,
-                           LLVMValueRef lod,
+                           LLVMValueRef lod_ipart,
                            LLVMValueRef *level_out)
 {
-   struct lp_build_context *float_bld = &bld->float_bld;
    struct lp_build_context *int_bld = &bld->int_bld;
    LLVMValueRef last_level, level;
 
@@ -317,7 +375,7 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
                                                bld->builder, unit);
 
    /* convert float lod to integer */
-   level = lp_build_iround(float_bld, lod);
+   level = lod_ipart;
 
    /* clamp level to legal range of levels */
    *level_out = lp_build_clamp(int_bld, level, zero, last_level);
@@ -332,12 +390,10 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
 void
 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
                            unsigned unit,
-                           LLVMValueRef lod,
+                           LLVMValueRef lod_ipart,
                            LLVMValueRef *level0_out,
-                           LLVMValueRef *level1_out,
-                           LLVMValueRef *weight_out)
+                           LLVMValueRef *level1_out)
 {
-   struct lp_build_context *float_bld = &bld->float_bld;
    struct lp_build_context *int_bld = &bld->int_bld;
    LLVMValueRef last_level, level;
 
@@ -345,7 +401,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
                                                bld->builder, unit);
 
    /* convert float lod to integer */
-   lp_build_ifloor_fract(float_bld, lod, &level, weight_out);
+   level = lod_ipart;
 
    /* compute level 0 and clamp to legal range of levels */
    *level0_out = lp_build_clamp(int_bld, level,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index bb83ede931..b019c3fa5e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -274,7 +274,7 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
                         const struct pipe_sampler_state *sampler);
 
 
-LLVMValueRef
+void
 lp_build_lod_selector(struct lp_build_sample_context *bld,
                       unsigned unit,
                       const LLVMValueRef ddx[4],
@@ -283,7 +283,10 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
                       LLVMValueRef explicit_lod, /* optional */
                       LLVMValueRef width,
                       LLVMValueRef height,
-                      LLVMValueRef depth);
+                      LLVMValueRef depth,
+                      unsigned mip_filter,
+                      LLVMValueRef *out_lod_ipart,
+                      LLVMValueRef *out_lod_fpart);
 
 void
 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
@@ -294,10 +297,9 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
 void
 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
                            unsigned unit,
-                           LLVMValueRef lod,
+                           LLVMValueRef lod_ipart,
                            LLVMValueRef *level0_out,
-                           LLVMValueRef *level1_out,
-                           LLVMValueRef *weight_out);
+                           LLVMValueRef *level1_out);
 
 LLVMValueRef
 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
index 49a6eed615..8a55681166 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -882,13 +882,13 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
                     LLVMValueRef data_array,
                     LLVMValueRef texel_out[4])
 {
-   struct lp_build_context *float_bld = &bld->float_bld;
+   struct lp_build_context *int_bld = &bld->int_bld;
    LLVMBuilderRef builder = bld->builder;
    const unsigned mip_filter = bld->static_state->min_mip_filter;
    const unsigned min_filter = bld->static_state->min_img_filter;
    const unsigned mag_filter = bld->static_state->mag_img_filter;
    const int dims = texture_dims(bld->static_state->target);
-   LLVMValueRef lod = NULL, lod_fpart = NULL;
+   LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
    LLVMValueRef ilevel0, ilevel1 = NULL;
    LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
    LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
@@ -936,7 +936,6 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
       ddy = face_ddy;
    }
 
-
    /*
     * Compute the level of detail (float).
     */
@@ -945,9 +944,13 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
       /* Need to compute lod either to choose mipmap levels or to
        * distinguish between minification/magnification with one mipmap level.
        */
-      lod = lp_build_lod_selector(bld, unit, ddx, ddy,
-                                  lod_bias, explicit_lod,
-                                  width, height, depth);
+      lp_build_lod_selector(bld, unit, ddx, ddy,
+                            lod_bias, explicit_lod,
+                            width, height, depth,
+                            mip_filter,
+                            &lod_ipart, &lod_fpart);
+   } else {
+      lod_ipart = LLVMConstInt(LLVMInt32Type(), 0, 0);
    }
 
    /*
@@ -966,30 +969,29 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
           * We should be able to set ilevel0 = const(0) but that causes
           * bad x86 code to be emitted.
           */
-         lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
-         lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+         assert(lod_ipart);
+         lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
       }
       else {
          ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
       }
       break;
    case PIPE_TEX_MIPFILTER_NEAREST:
-      assert(lod);
-      lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+      assert(lod_ipart);
+      lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
       break;
    case PIPE_TEX_MIPFILTER_LINEAR:
       {
          LLVMValueRef f256 = LLVMConstReal(LLVMFloatType(), 256.0);
-         LLVMValueRef i255 = lp_build_const_int32(255);
+         LLVMTypeRef i32_type = LLVMIntType(32);
          LLVMTypeRef i16_type = LLVMIntType(16);
 
-         assert(lod);
+         assert(lod_fpart);
+
+         lp_build_linear_mip_levels(bld, unit, lod_ipart, &ilevel0, &ilevel1);
 
-         lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
-                                    &lod_fpart);
          lod_fpart = LLVMBuildFMul(builder, lod_fpart, f256, "");
-         lod_fpart = lp_build_ifloor(&bld->float_bld, lod_fpart);
-         lod_fpart = LLVMBuildAnd(builder, lod_fpart, i255, "");
+         lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "");
          lod_fpart = LLVMBuildTrunc(builder, lod_fpart, i16_type, "");
          lod_fpart = lp_build_broadcast_scalar(&h16, lod_fpart);
 
@@ -1049,9 +1051,9 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
       lp_build_flow_scope_declare(flow_ctx, &packed_lo);
       lp_build_flow_scope_declare(flow_ctx, &packed_hi);
 
-      /* minify = lod > 0.0 */
-      minify = LLVMBuildFCmp(builder, LLVMRealUGE,
-                             lod, float_bld->zero, "");
+      /* minify = lod >= 0.0 */
+      minify = LLVMBuildICmp(builder, LLVMIntSGE,
+                             lod_ipart, int_bld->zero, "");
 
       lp_build_if(&if_ctx, flow_ctx, builder, minify);
       {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index d464147371..4f9bf6763e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -884,12 +884,12 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
                         LLVMValueRef data_array,
                         LLVMValueRef *colors_out)
 {
-   struct lp_build_context *float_bld = &bld->float_bld;
+   struct lp_build_context *int_bld = &bld->int_bld;
    const unsigned mip_filter = bld->static_state->min_mip_filter;
    const unsigned min_filter = bld->static_state->min_img_filter;
    const unsigned mag_filter = bld->static_state->mag_img_filter;
    const int dims = texture_dims(bld->static_state->target);
-   LLVMValueRef lod = NULL, lod_fpart = NULL;
+   LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
    LLVMValueRef ilevel0, ilevel1 = NULL;
    LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
    LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
@@ -935,9 +935,13 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
       /* Need to compute lod either to choose mipmap levels or to
        * distinguish between minification/magnification with one mipmap level.
        */
-      lod = lp_build_lod_selector(bld, unit, ddx, ddy,
-                                  lod_bias, explicit_lod,
-                                  width, height, depth);
+      lp_build_lod_selector(bld, unit, ddx, ddy,
+                            lod_bias, explicit_lod,
+                            width, height, depth,
+                            mip_filter,
+                            &lod_ipart, &lod_fpart);
+   } else {
+      lod_ipart = LLVMConstInt(LLVMInt32Type(), 0, 0);
    }
 
    /*
@@ -950,22 +954,21 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
           * We should be able to set ilevel0 = const(0) but that causes
           * bad x86 code to be emitted.
           */
-         lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
-         lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+         assert(lod_ipart);
+         lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
       }
       else {
          ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
       }
    }
    else {
-      assert(lod);
+      assert(lod_ipart);
       if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
-         lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+         lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
       }
       else {
          assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR);
-         lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
-                                    &lod_fpart);
+         lp_build_linear_mip_levels(bld, unit, lod_ipart, &ilevel0, &ilevel1);
          lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart);
       }
    }
@@ -1019,9 +1022,9 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
       lp_build_flow_scope_declare(flow_ctx, &colors_out[2]);
       lp_build_flow_scope_declare(flow_ctx, &colors_out[3]);
 
-      /* minify = lod > 0.0 */
-      minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
-                             lod, float_bld->zero, "");
+      /* minify = lod >= 0.0 */
+      minify = LLVMBuildICmp(bld->builder, LLVMIntSGE,
+                             lod_ipart, int_bld->zero, "");
 
       lp_build_if(&if_ctx, flow_ctx, bld->builder, minify);
       {
-- 
cgit v1.2.3