From 57907e7fd9fc63b9023d0e2b08934c2d0acf2953 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Wed, 12 Aug 2009 12:42:06 +0100
Subject: llvmpipe: Translate approximate log2/exp2.

---
 src/gallium/drivers/llvmpipe/lp_bld_arit.c     | 152 +++++++++++++++++--------
 src/gallium/drivers/llvmpipe/lp_bld_arit.h     |  14 +++
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 103 ++++++++---------
 3 files changed, 169 insertions(+), 100 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index 7b3932f522..aec3e297f4 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -736,42 +736,70 @@ const double lp_build_exp2_polynomial[] = {
 };
 
 
-LLVMValueRef
-lp_build_exp2(struct lp_build_context *bld,
-              LLVMValueRef x)
+void
+lp_build_exp2_approx(struct lp_build_context *bld,
+                     LLVMValueRef x,
+                     LLVMValueRef *p_exp2_int_part,
+                     LLVMValueRef *p_frac_part,
+                     LLVMValueRef *p_exp2)
 {
    const union lp_type type = bld->type;
    LLVMTypeRef vec_type = lp_build_vec_type(type);
    LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
    LLVMValueRef ipart;
-   LLVMValueRef fpart, expipart, expfpart;
+   LLVMValueRef fpart, expipart, expfpart, res;
 
-   /* TODO: optimize the constant case */
-   if(LLVMIsConstant(x))
-      debug_printf("%s: inefficient/imprecise constant arithmetic\n");
+   if(p_exp2_int_part || p_frac_part || p_exp2) {
+      /* TODO: optimize the constant case */
+      if(LLVMIsConstant(x))
+         debug_printf("%s: inefficient/imprecise constant arithmetic\n");
 
-   assert(type.floating && type.width == 32);
+      assert(type.floating && type.width == 32);
 
-   x = lp_build_min(bld, x, lp_build_const_uni(type,  129.0));
-   x = lp_build_max(bld, x, lp_build_const_uni(type, -126.99999));
+      x = lp_build_min(bld, x, lp_build_const_uni(type,  129.0));
+      x = lp_build_max(bld, x, lp_build_const_uni(type, -126.99999));
 
-   /* ipart = int(x - 0.5) */
-   ipart = LLVMBuildSub(bld->builder, x, lp_build_const_uni(type, 0.5f), "");
-   ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, "");
+      /* ipart = int(x - 0.5) */
+      ipart = LLVMBuildSub(bld->builder, x, lp_build_const_uni(type, 0.5f), "");
+      ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, "");
 
-   /* fpart = x - ipart */
-   fpart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, "");
-   fpart = LLVMBuildSub(bld->builder, x, fpart, "");
+      /* fpart = x - ipart */
+      fpart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, "");
+      fpart = LLVMBuildSub(bld->builder, x, fpart, "");
+   }
+
+   if(p_exp2_int_part || p_exp2) {
+      /* expipart = (float) (1 << ipart) */
+      expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_uni(type, 127), "");
+      expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_uni(type, 23), "");
+      expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, "");
+   }
+
+   if(p_exp2) {
+      expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
+                                     Elements(lp_build_exp2_polynomial));
+
+      res = LLVMBuildMul(bld->builder, expipart, expfpart, "");
+   }
 
-   /* expipart = (float) (1 << ipart) */
-   expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_uni(type, 127), "");
-   expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_uni(type, 23), "");
-   expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, "");
+   if(p_exp2_int_part)
+      *p_exp2_int_part = expipart;
 
-   expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
-                                  Elements(lp_build_exp2_polynomial));
+   if(p_frac_part)
+      *p_frac_part = fpart;
 
-   return LLVMBuildMul(bld->builder, expipart, expfpart, "");
+   if(p_exp2)
+      *p_exp2 = res;
+}
+
+
+LLVMValueRef
+lp_build_exp2(struct lp_build_context *bld,
+              LLVMValueRef x)
+{
+   LLVMValueRef res;
+   lp_build_exp2_approx(bld, x, NULL, NULL, &res);
+   return res;
 }
 
 
@@ -798,9 +826,12 @@ const double lp_build_log2_polynomial[] = {
 /**
  * See http://www.devmaster.net/forums/showthread.php?p=43580
  */
-LLVMValueRef
-lp_build_log2(struct lp_build_context *bld,
-              LLVMValueRef x)
+void
+lp_build_log2_approx(struct lp_build_context *bld,
+                     LLVMValueRef x,
+                     LLVMValueRef *p_exp,
+                     LLVMValueRef *p_floor_log2,
+                     LLVMValueRef *p_log2)
 {
    const union lp_type type = bld->type;
    LLVMTypeRef vec_type = lp_build_vec_type(type);
@@ -810,34 +841,63 @@ lp_build_log2(struct lp_build_context *bld,
    LLVMValueRef mantmask = lp_build_int_const_uni(type, 0x007fffff);
    LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type);
 
-   LLVMValueRef i = LLVMBuildBitCast(bld->builder, x, int_vec_type, "");
-
+   LLVMValueRef i;
    LLVMValueRef exp;
    LLVMValueRef mant;
+   LLVMValueRef logexp;
    LLVMValueRef logmant;
+   LLVMValueRef res;
 
-   /* TODO: optimize the constant case */
-   if(LLVMIsConstant(x))
-      debug_printf("%s: inefficient/imprecise constant arithmetic\n");
+   if(p_exp || p_floor_log2 || p_log2) {
+      /* TODO: optimize the constant case */
+      if(LLVMIsConstant(x))
+         debug_printf("%s: inefficient/imprecise constant arithmetic\n");
 
-   assert(type.floating && type.width == 32);
+      assert(type.floating && type.width == 32);
 
-   /* exp = (float) exponent(x) */
-   exp = LLVMBuildAnd(bld->builder, i, expmask, "");
-   exp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_uni(type, 23), "");
-   exp = LLVMBuildSub(bld->builder, exp, lp_build_int_const_uni(type, 127), "");
-   exp = LLVMBuildSIToFP(bld->builder, exp, vec_type, "");
+      i = LLVMBuildBitCast(bld->builder, x, int_vec_type, "");
 
-   /* mant = (float) mantissa(x) */
-   mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
-   mant = LLVMBuildOr(bld->builder, mant, one, "");
-   mant = LLVMBuildSIToFP(bld->builder, mant, vec_type, "");
+      /* exp = (float) exponent(x) */
+      exp = LLVMBuildAnd(bld->builder, i, expmask, "");
+   }
+
+   if(p_floor_log2 || p_log2) {
+      logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_uni(type, 23), "");
+      logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_uni(type, 127), "");
+      logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, "");
+   }
 
-   logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
-                                 Elements(lp_build_log2_polynomial));
+   if(p_log2) {
+      /* mant = (float) mantissa(x) */
+      mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
+      mant = LLVMBuildOr(bld->builder, mant, one, "");
+      mant = LLVMBuildSIToFP(bld->builder, mant, vec_type, "");
 
-   /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
-   logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildMul(bld->builder, mant, bld->one, ""), "");
+      logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
+                                    Elements(lp_build_log2_polynomial));
+
+      /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
+      logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildMul(bld->builder, mant, bld->one, ""), "");
+
+      res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
+   }
 
-   return LLVMBuildAdd(bld->builder, logmant, exp, "");
+   if(p_exp)
+      *p_exp = exp;
+
+   if(p_floor_log2)
+      *p_floor_log2 = logexp;
+
+   if(p_log2)
+      *p_log2 = res;
+}
+
+
+LLVMValueRef
+lp_build_log2(struct lp_build_context *bld,
+              LLVMValueRef x)
+{
+   LLVMValueRef res;
+   lp_build_log2_approx(bld, x, NULL, NULL, &res);
+   return res;
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
index 9f8fccb0d4..fc8cb25966 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
@@ -126,4 +126,18 @@ LLVMValueRef
 lp_build_log2(struct lp_build_context *bld,
               LLVMValueRef a);
 
+void
+lp_build_exp2_approx(struct lp_build_context *bld,
+                     LLVMValueRef x,
+                     LLVMValueRef *p_exp2_int_part,
+                     LLVMValueRef *p_frac_part,
+                     LLVMValueRef *p_exp2);
+
+void
+lp_build_log2_approx(struct lp_build_context *bld,
+                     LLVMValueRef x,
+                     LLVMValueRef *p_exp,
+                     LLVMValueRef *p_floor_log2,
+                     LLVMValueRef *p_log2);
+
 #endif /* !LP_BLD_ARIT_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index 48eb771486..c9143ebfe4 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -521,10 +521,7 @@ emit_instruction(
          tmp2 = FETCH( bld, *inst, 0, CHAN_W );
          tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
          tmp0 = FETCH( bld, *inst, 0, CHAN_X );
-         sse_xorps(
-            bld,
-            make_xmm( 2 ),
-            make_xmm( 2 ) );
+         tmp2 = bld->base.zero;
          sse_cmpps(
             bld,
             make_xmm( 2 ),
@@ -560,34 +557,31 @@ emit_instruction(
       }
       break;
 
-#if 0
    case TGSI_OPCODE_EXP:
       if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
           IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
           IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
-         tmp0 = FETCH( bld, *inst, 0, CHAN_X );
-         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
-             IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
-            tmp1 = tmp0;
-            emit_flr( bld, 2, 1 );
-            /* dst.x = ex2(floor(src.x)) */
-            if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
-               tmp2 = tmp1;
-               tmp2 = lp_build_exp2( &bld->base, tmp2);
-               STORE( bld, *inst, 0, CHAN_X, tmp2);
-            }
-            /* dst.y = src.x - floor(src.x) */
-            if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
-               tmp2 = tmp0;
-               tmp2 = lp_build_sub( &bld->base, tmp2, tmp1);
-               STORE( bld, *inst, 0, CHAN_Y, tmp2);
-            }
-         }
-         /* dst.z = ex2(src.x) */
-         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
-            tmp0 = lp_build_exp2( &bld->base, tmp0);
-            STORE( bld, *inst, 0, CHAN_Z, tmp0);
-         }
+         LLVMValueRef *p_exp2_int_part = NULL;
+         LLVMValueRef *p_frac_part = NULL;
+         LLVMValueRef *p_exp2 = NULL;
+
+         src0 = FETCH( bld, *inst, 0, CHAN_X );
+
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ))
+            p_exp2_int_part = &tmp0;
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ))
+            p_frac_part = &tmp1;
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ))
+            p_exp2 = &tmp2;
+
+         lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
+
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ))
+            STORE( bld, *inst, 0, CHAN_X, tmp0);
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ))
+            STORE( bld, *inst, 0, CHAN_Y, tmp1);
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ))
+            STORE( bld, *inst, 0, CHAN_Z, tmp2);
       }
       /* dst.w = 1.0 */
       if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
@@ -595,44 +589,45 @@ emit_instruction(
          STORE( bld, *inst, 0, CHAN_W, tmp0);
       }
       break;
-#endif
 
-#if 0
    case TGSI_OPCODE_LOG:
       if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
           IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
           IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
-         tmp0 = FETCH( bld, *inst, 0, CHAN_X );
-         tmp0 = lp_build_abs( &bld->base, tmp0 );
-         tmp1 = tmp0;
-         tmp1 = lp_build_log2( &bld->base, tmp1);
-         /* dst.z = lg2(abs(src.x)) */
-         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
-            STORE( bld, *inst, 0, CHAN_Z, tmp1);
-         }
-         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
-             IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
-            emit_flr( bld, 2, 1 );
-            /* dst.x = floor(lg2(abs(src.x))) */
-            if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
-               STORE( bld, *inst, 0, CHAN_X, tmp1);
-            }
-            /* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */
-            if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
-               tmp1 = lp_build_exp2( &bld->base, tmp1);
-               emit_rcp( bld, 1, 1 );
-               tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
-               STORE( bld, *inst, 0, CHAN_Y, tmp0);
-            }
+         LLVMValueRef *p_floor_log2;
+         LLVMValueRef *p_exp;
+         LLVMValueRef *p_log2;
+
+         src0 = FETCH( bld, *inst, 0, CHAN_X );
+         src0 = lp_build_abs( &bld->base, src0 );
+
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ))
+            p_floor_log2 = &tmp0;
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ))
+            p_exp = &tmp1;
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ))
+            p_log2 = &tmp2;
+
+         lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
+
+         /* dst.x = floor(lg2(abs(src.x))) */
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ))
+            STORE( bld, *inst, 0, CHAN_X, tmp0);
+         /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+            tmp1 = lp_build_div( &bld->base, src0, tmp1);
+            STORE( bld, *inst, 0, CHAN_Y, tmp1);
          }
+         /* dst.z = lg2(abs(src.x)) */
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ))
+            STORE( bld, *inst, 0, CHAN_Z, tmp2);
       }
       /* dst.w = 1.0 */
       if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
-         emit_tempf( bld, 0, TEMP_ONE_I, TEMP_ONE_C );
+         tmp0 = bld->base.one;
          STORE( bld, *inst, 0, CHAN_W, tmp0);
       }
       break;
-#endif
 
    case TGSI_OPCODE_MUL:
       FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-- 
cgit v1.2.3