diff options
| author | José Fonseca <jfonseca@vmware.com> | 2010-10-06 18:31:36 +0100 | 
|---|---|---|
| committer | José Fonseca <jfonseca@vmware.com> | 2010-10-06 18:47:01 +0100 | 
| commit | af05f6157668b3c5e6fd73c3d743b11e619b9067 (patch) | |
| tree | e381775391a680707298c0668d1792df952401b0 | |
| parent | 012d57737b1b4e4263aa3414abe433195ff8a713 (diff) | |
gallivm: Combined ifloor & fract helper.
The only way to ensure we don't do redundant FP <-> SI conversions.
| -rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.c | 42 | ||||
| -rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.h | 6 | ||||
| -rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample.c | 4 | ||||
| -rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 41 | 
4 files changed, 65 insertions, 28 deletions
| diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 15b7441018..64c468c14d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1359,6 +1359,48 @@ lp_build_iceil(struct lp_build_context *bld,  } +/** + * Combined ifloor() & fract(). + * + * Preferred to calling the functions separately, as it will ensure that the + * stratergy (floor() vs ifloor()) that results in less redundant work is used. + */ +void +lp_build_ifloor_fract(struct lp_build_context *bld, +                      LLVMValueRef a, +                      LLVMValueRef *out_ipart, +                      LLVMValueRef *out_fpart) +{ + + +   const struct lp_type type = bld->type; +   LLVMValueRef ipart; + +   assert(type.floating); +   assert(lp_check_value(type, a)); + +   if (util_cpu_caps.has_sse4_1 && +       (type.length == 1 || type.width*type.length == 128)) { +      /* +       * floor() is easier. +       */ + +      ipart = lp_build_floor(bld, a); +      *out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart"); +      *out_ipart = LLVMBuildFPToSI(bld->builder, ipart, bld->int_vec_type, "ipart"); +   } +   else { +      /* +       * ifloor() is easier. +       */ + +      *out_ipart = lp_build_ifloor(bld, a); +      ipart = LLVMBuildSIToFP(bld->builder, *out_ipart, bld->vec_type, "ipart"); +      *out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart"); +   } +} + +  LLVMValueRef  lp_build_sqrt(struct lp_build_context *bld,                LLVMValueRef a) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index f36197479f..8424384f8f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -171,6 +171,12 @@ LLVMValueRef  lp_build_itrunc(struct lp_build_context *bld,                  LLVMValueRef a); +void +lp_build_ifloor_fract(struct lp_build_context *bld, +                      LLVMValueRef a, +                      LLVMValueRef *out_ipart, +                      LLVMValueRef *out_fpart); +  LLVMValueRef  lp_build_sqrt(struct lp_build_context *bld,                LLVMValueRef a); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 9dee653eee..acd99741f1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -319,7 +319,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,                                                 bld->builder, unit);     /* convert float lod to integer */ -   level = lp_build_ifloor(float_bld, lod); +   lp_build_ifloor_fract(float_bld, lod, &level, weight_out);     /* compute level 0 and clamp to legal range of levels */     *level0_out = lp_build_clamp(int_bld, level, @@ -330,8 +330,6 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,     *level1_out = lp_build_clamp(int_bld, level,                                  int_bld->zero,                                  last_level); - -   *weight_out = lp_build_fract(float_bld, lod);  } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 36a77d3aff..d464147371 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -253,11 +253,9 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,        /* mul by size and subtract 0.5 */        coord = lp_build_mul(coord_bld, coord, length_f);        coord = lp_build_sub(coord_bld, coord, half); -      /* convert to int */ -      coord0 = lp_build_ifloor(coord_bld, coord); +      /* convert to int, compute lerp weight */ +      lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);        coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one); -      /* compute lerp weight */ -      weight = lp_build_fract(coord_bld, coord);        /* repeat wrap */        if (is_pot) {           coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, ""); @@ -284,8 +282,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,        coord = lp_build_sub(coord_bld, coord, half); -      weight = lp_build_fract(coord_bld, coord); -      coord0 = lp_build_ifloor(coord_bld, coord); +      /* convert to int, compute lerp weight */ +      lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);        coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);        break; @@ -304,10 +302,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,           max = lp_build_sub(coord_bld, length_f, min);           coord = lp_build_clamp(coord_bld, coord, min, max);        } -      /* compute lerp weight */ -      weight = lp_build_fract(coord_bld, coord); -      /* coord0 = floor(coord); */ -      coord0 = lp_build_ifloor(coord_bld, coord); +      /* convert to int, compute lerp weight */ +      lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);        coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);        /* coord0 = max(coord0, 0) */        coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero); @@ -327,10 +323,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,           max = lp_build_sub(coord_bld, length_f, min);           coord = lp_build_clamp(coord_bld, coord, min, max);           coord = lp_build_sub(coord_bld, coord, half); -         /* compute lerp weight */ -         weight = lp_build_fract(coord_bld, coord); -         /* convert to int */ -         coord0 = lp_build_ifloor(coord_bld, coord); +         /* convert to int, compute lerp weight */ +         lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);           coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);        }        break; @@ -343,11 +337,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,        coord = lp_build_mul(coord_bld, coord, length_f);        coord = lp_build_sub(coord_bld, coord, half); -      /* compute lerp weight */ -      weight = lp_build_fract(coord_bld, coord); - -      /* convert to int coords */ -      coord0 = lp_build_ifloor(coord_bld, coord); +      /* convert to int, compute lerp weight */ +      lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);        coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);        /* coord0 = max(coord0, 0) */ @@ -369,8 +360,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,        coord = lp_build_sub(coord_bld, coord, half); -      weight = lp_build_fract(coord_bld, coord); -      coord0 = lp_build_ifloor(coord_bld, coord); +      /* convert to int, compute lerp weight */ +      lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);        coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);        break; @@ -392,8 +383,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,           coord = lp_build_sub(coord_bld, coord, half); -         weight = lp_build_fract(coord_bld, coord); -         coord0 = lp_build_ifloor(coord_bld, coord); +         /* convert to int, compute lerp weight */ +         lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);           coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);        }        break; @@ -416,8 +407,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,           coord = lp_build_sub(coord_bld, coord, half); -         weight = lp_build_fract(coord_bld, coord); -         coord0 = lp_build_ifloor(coord_bld, coord); +         /* convert to int, compute lerp weight */ +         lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);           coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);        }        break; | 
