summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJosé Fonseca <jfonseca@vmware.com>2010-10-06 18:31:36 +0100
committerJosé Fonseca <jfonseca@vmware.com>2010-10-06 18:47:01 +0100
commitaf05f6157668b3c5e6fd73c3d743b11e619b9067 (patch)
treee381775391a680707298c0668d1792df952401b0
parent012d57737b1b4e4263aa3414abe433195ff8a713 (diff)
gallivm: Combined ifloor & fract helper.
The only way to ensure we don't do redundant FP <-> SI conversions.
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c42
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.h6
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.c4
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c41
4 files changed, 65 insertions, 28 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 15b7441018..64c468c14d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1359,6 +1359,48 @@ lp_build_iceil(struct lp_build_context *bld,
}
+/**
+ * Combined ifloor() & fract().
+ *
+ * Preferred to calling the functions separately, as it will ensure that the
+ * stratergy (floor() vs ifloor()) that results in less redundant work is used.
+ */
+void
+lp_build_ifloor_fract(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef *out_ipart,
+ LLVMValueRef *out_fpart)
+{
+
+
+ const struct lp_type type = bld->type;
+ LLVMValueRef ipart;
+
+ assert(type.floating);
+ assert(lp_check_value(type, a));
+
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
+ /*
+ * floor() is easier.
+ */
+
+ ipart = lp_build_floor(bld, a);
+ *out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart");
+ *out_ipart = LLVMBuildFPToSI(bld->builder, ipart, bld->int_vec_type, "ipart");
+ }
+ else {
+ /*
+ * ifloor() is easier.
+ */
+
+ *out_ipart = lp_build_ifloor(bld, a);
+ ipart = LLVMBuildSIToFP(bld->builder, *out_ipart, bld->vec_type, "ipart");
+ *out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart");
+ }
+}
+
+
LLVMValueRef
lp_build_sqrt(struct lp_build_context *bld,
LLVMValueRef a)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
index f36197479f..8424384f8f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -171,6 +171,12 @@ LLVMValueRef
lp_build_itrunc(struct lp_build_context *bld,
LLVMValueRef a);
+void
+lp_build_ifloor_fract(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef *out_ipart,
+ LLVMValueRef *out_fpart);
+
LLVMValueRef
lp_build_sqrt(struct lp_build_context *bld,
LLVMValueRef a);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 9dee653eee..acd99741f1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -319,7 +319,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
bld->builder, unit);
/* convert float lod to integer */
- level = lp_build_ifloor(float_bld, lod);
+ lp_build_ifloor_fract(float_bld, lod, &level, weight_out);
/* compute level 0 and clamp to legal range of levels */
*level0_out = lp_build_clamp(int_bld, level,
@@ -330,8 +330,6 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
*level1_out = lp_build_clamp(int_bld, level,
int_bld->zero,
last_level);
-
- *weight_out = lp_build_fract(float_bld, lod);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 36a77d3aff..d464147371 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -253,11 +253,9 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
/* mul by size and subtract 0.5 */
coord = lp_build_mul(coord_bld, coord, length_f);
coord = lp_build_sub(coord_bld, coord, half);
- /* convert to int */
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one);
- /* compute lerp weight */
- weight = lp_build_fract(coord_bld, coord);
/* repeat wrap */
if (is_pot) {
coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
@@ -284,8 +282,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
coord = lp_build_sub(coord_bld, coord, half);
- weight = lp_build_fract(coord_bld, coord);
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
break;
@@ -304,10 +302,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
max = lp_build_sub(coord_bld, length_f, min);
coord = lp_build_clamp(coord_bld, coord, min, max);
}
- /* compute lerp weight */
- weight = lp_build_fract(coord_bld, coord);
- /* coord0 = floor(coord); */
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
/* coord0 = max(coord0, 0) */
coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
@@ -327,10 +323,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
max = lp_build_sub(coord_bld, length_f, min);
coord = lp_build_clamp(coord_bld, coord, min, max);
coord = lp_build_sub(coord_bld, coord, half);
- /* compute lerp weight */
- weight = lp_build_fract(coord_bld, coord);
- /* convert to int */
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
}
break;
@@ -343,11 +337,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
coord = lp_build_mul(coord_bld, coord, length_f);
coord = lp_build_sub(coord_bld, coord, half);
- /* compute lerp weight */
- weight = lp_build_fract(coord_bld, coord);
-
- /* convert to int coords */
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
/* coord0 = max(coord0, 0) */
@@ -369,8 +360,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
coord = lp_build_sub(coord_bld, coord, half);
- weight = lp_build_fract(coord_bld, coord);
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
break;
@@ -392,8 +383,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
coord = lp_build_sub(coord_bld, coord, half);
- weight = lp_build_fract(coord_bld, coord);
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
}
break;
@@ -416,8 +407,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
coord = lp_build_sub(coord_bld, coord, half);
- weight = lp_build_fract(coord_bld, coord);
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
}
break;