summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/gallivm
diff options
context:
space:
mode:
authorChristoph Bumiller <e0425955@student.tuwien.ac.at>2010-08-18 14:37:47 +0200
committerChristoph Bumiller <e0425955@student.tuwien.ac.at>2010-08-18 14:37:47 +0200
commit3e54d63429fe7ca5db3c75c181abbaf7a7f55724 (patch)
treee129c36aaef712525f0a04fc5b06c445e3cf84df /src/gallium/auxiliary/gallivm
parenteaab76457818fad0926b84c663440e8987e1f19f (diff)
parent85d9bc236d6a8ff8f12cbc2150f8c3740354f573 (diff)
Merge remote branch 'origin/master' into nv50-compiler
Diffstat (limited to 'src/gallium/auxiliary/gallivm')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c369
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_assert.c101
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_assert.h41
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_conv.c14
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_aos.c4
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_soa.c4
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.c4
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.c93
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.h5
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_misc.cpp32
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.c21
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_quad.c4
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c24
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c14
15 files changed, 571 insertions, 161 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index f5f2623e46..7b35dd4bb4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2009 VMware, Inc.
+ * Copyright 2009-2010 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -59,6 +59,19 @@
#include "lp_bld_arit.h"
+/*
+ * XXX: Increasing eliminates some artifacts, but adds others, most
+ * noticeably corruption in the Earth halo in Google Earth.
+ */
+#define RCP_NEWTON_STEPS 0
+
+#define RSQRT_NEWTON_STEPS 0
+
+#define EXP_POLY_DEGREE 3
+
+#define LOG_POLY_DEGREE 5
+
+
/**
* Generate min(a, b)
* No checks for special case values of a or b = 1 or 0 are done.
@@ -72,6 +85,9 @@ lp_build_min_simple(struct lp_build_context *bld,
const char *intrinsic = NULL;
LLVMValueRef cond;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
/* TODO: optimize the constant case */
if(type.width * type.length == 128) {
@@ -118,6 +134,9 @@ lp_build_max_simple(struct lp_build_context *bld,
const char *intrinsic = NULL;
LLVMValueRef cond;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
/* TODO: optimize the constant case */
if(type.width * type.length == 128) {
@@ -160,6 +179,8 @@ lp_build_comp(struct lp_build_context *bld,
{
const struct lp_type type = bld->type;
+ assert(lp_check_value(type, a));
+
if(a == bld->one)
return bld->zero;
if(a == bld->zero)
@@ -173,9 +194,15 @@ lp_build_comp(struct lp_build_context *bld,
}
if(LLVMIsConstant(a))
- return LLVMConstSub(bld->one, a);
+ if (type.floating)
+ return LLVMConstFSub(bld->one, a);
+ else
+ return LLVMConstSub(bld->one, a);
else
- return LLVMBuildSub(bld->builder, bld->one, a, "");
+ if (type.floating)
+ return LLVMBuildFSub(bld->builder, bld->one, a, "");
+ else
+ return LLVMBuildSub(bld->builder, bld->one, a, "");
}
@@ -190,6 +217,9 @@ lp_build_add(struct lp_build_context *bld,
const struct lp_type type = bld->type;
LLVMValueRef res;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(a == bld->zero)
return b;
if(b == bld->zero)
@@ -217,9 +247,15 @@ lp_build_add(struct lp_build_context *bld,
}
if(LLVMIsConstant(a) && LLVMIsConstant(b))
- res = LLVMConstAdd(a, b);
+ if (type.floating)
+ res = LLVMConstFAdd(a, b);
+ else
+ res = LLVMConstAdd(a, b);
else
- res = LLVMBuildAdd(bld->builder, a, b, "");
+ if (type.floating)
+ res = LLVMBuildFAdd(bld->builder, a, b, "");
+ else
+ res = LLVMBuildAdd(bld->builder, a, b, "");
/* clamp to ceiling of 1.0 */
if(bld->type.norm && (bld->type.floating || bld->type.fixed))
@@ -240,6 +276,8 @@ lp_build_sum_vector(struct lp_build_context *bld,
LLVMValueRef index, res;
unsigned i;
+ assert(lp_check_value(type, a));
+
if (a == bld->zero)
return bld->zero;
if (a == bld->undef)
@@ -253,9 +291,16 @@ lp_build_sum_vector(struct lp_build_context *bld,
for (i = 1; i < type.length; i++) {
index = LLVMConstInt(LLVMInt32Type(), i, 0);
- res = LLVMBuildAdd(bld->builder, res,
- LLVMBuildExtractElement(bld->builder, a, index, ""),
- "");
+ if (type.floating)
+ res = LLVMBuildFAdd(bld->builder, res,
+ LLVMBuildExtractElement(bld->builder,
+ a, index, ""),
+ "");
+ else
+ res = LLVMBuildAdd(bld->builder, res,
+ LLVMBuildExtractElement(bld->builder,
+ a, index, ""),
+ "");
}
return res;
@@ -273,6 +318,9 @@ lp_build_sub(struct lp_build_context *bld,
const struct lp_type type = bld->type;
LLVMValueRef res;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(b == bld->zero)
return a;
if(a == bld->undef || b == bld->undef)
@@ -300,9 +348,15 @@ lp_build_sub(struct lp_build_context *bld,
}
if(LLVMIsConstant(a) && LLVMIsConstant(b))
- res = LLVMConstSub(a, b);
+ if (type.floating)
+ res = LLVMConstFSub(a, b);
+ else
+ res = LLVMConstSub(a, b);
else
- res = LLVMBuildSub(bld->builder, a, b, "");
+ if (type.floating)
+ res = LLVMBuildFSub(bld->builder, a, b, "");
+ else
+ res = LLVMBuildSub(bld->builder, a, b, "");
if(bld->type.norm && (bld->type.floating || bld->type.fixed))
res = lp_build_max_simple(bld, res, bld->zero);
@@ -360,6 +414,10 @@ lp_build_mul_u8n(LLVMBuilderRef builder,
LLVMValueRef c8;
LLVMValueRef ab;
+ assert(!i16_type.floating);
+ assert(lp_check_value(i16_type, a));
+ assert(lp_check_value(i16_type, b));
+
c8 = lp_build_const_int_vec(i16_type, 8);
#if 0
@@ -395,6 +453,9 @@ lp_build_mul(struct lp_build_context *bld,
LLVMValueRef shift;
LLVMValueRef res;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(a == bld->zero)
return bld->zero;
if(a == bld->one)
@@ -433,7 +494,10 @@ lp_build_mul(struct lp_build_context *bld,
shift = NULL;
if(LLVMIsConstant(a) && LLVMIsConstant(b)) {
- res = LLVMConstMul(a, b);
+ if (type.floating)
+ res = LLVMConstFMul(a, b);
+ else
+ res = LLVMConstMul(a, b);
if(shift) {
if(type.sign)
res = LLVMConstAShr(res, shift);
@@ -442,7 +506,10 @@ lp_build_mul(struct lp_build_context *bld,
}
}
else {
- res = LLVMBuildMul(bld->builder, a, b, "");
+ if (type.floating)
+ res = LLVMBuildFMul(bld->builder, a, b, "");
+ else
+ res = LLVMBuildMul(bld->builder, a, b, "");
if(shift) {
if(type.sign)
res = LLVMBuildAShr(bld->builder, res, shift, "");
@@ -465,6 +532,8 @@ lp_build_mul_imm(struct lp_build_context *bld,
{
LLVMValueRef factor;
+ assert(lp_check_value(bld->type, a));
+
if(b == 0)
return bld->zero;
@@ -472,7 +541,7 @@ lp_build_mul_imm(struct lp_build_context *bld,
return a;
if(b == -1)
- return LLVMBuildNeg(bld->builder, a, "");
+ return lp_build_negate(bld, a);
if(b == 2 && bld->type.floating)
return lp_build_add(bld, a, a);
@@ -518,6 +587,9 @@ lp_build_div(struct lp_build_context *bld,
{
const struct lp_type type = bld->type;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(a == bld->zero)
return bld->zero;
if(a == bld->one)
@@ -529,13 +601,24 @@ lp_build_div(struct lp_build_context *bld,
if(a == bld->undef || b == bld->undef)
return bld->undef;
- if(LLVMIsConstant(a) && LLVMIsConstant(b))
- return LLVMConstFDiv(a, b);
+ if(LLVMIsConstant(a) && LLVMIsConstant(b)) {
+ if (type.floating)
+ return LLVMConstFDiv(a, b);
+ else if (type.sign)
+ return LLVMConstSDiv(a, b);
+ else
+ return LLVMConstUDiv(a, b);
+ }
if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
return lp_build_mul(bld, a, lp_build_rcp(bld, b));
- return LLVMBuildFDiv(bld->builder, a, b, "");
+ if (type.floating)
+ return LLVMBuildFDiv(bld->builder, a, b, "");
+ else if (type.sign)
+ return LLVMBuildSDiv(bld->builder, a, b, "");
+ else
+ return LLVMBuildUDiv(bld->builder, a, b, "");
}
@@ -555,6 +638,10 @@ lp_build_lerp(struct lp_build_context *bld,
LLVMValueRef delta;
LLVMValueRef res;
+ assert(lp_check_value(bld->type, x));
+ assert(lp_check_value(bld->type, v0));
+ assert(lp_check_value(bld->type, v1));
+
delta = lp_build_sub(bld, v1, v0);
res = lp_build_mul(bld, x, delta);
@@ -596,6 +683,9 @@ lp_build_min(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
+ assert(lp_check_value(bld->type, a));
+ assert(lp_check_value(bld->type, b));
+
if(a == bld->undef || b == bld->undef)
return bld->undef;
@@ -624,6 +714,9 @@ lp_build_max(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
+ assert(lp_check_value(bld->type, a));
+ assert(lp_check_value(bld->type, b));
+
if(a == bld->undef || b == bld->undef)
return bld->undef;
@@ -653,6 +746,10 @@ lp_build_clamp(struct lp_build_context *bld,
LLVMValueRef min,
LLVMValueRef max)
{
+ assert(lp_check_value(bld->type, a));
+ assert(lp_check_value(bld->type, min));
+ assert(lp_check_value(bld->type, max));
+
a = lp_build_min(bld, a, max);
a = lp_build_max(bld, a, min);
return a;
@@ -669,6 +766,8 @@ lp_build_abs(struct lp_build_context *bld,
const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
+ assert(lp_check_value(type, a));
+
if(!type.sign)
return a;
@@ -702,7 +801,16 @@ LLVMValueRef
lp_build_negate(struct lp_build_context *bld,
LLVMValueRef a)
{
- return LLVMBuildNeg(bld->builder, a, "");
+ assert(lp_check_value(bld->type, a));
+
+#if HAVE_LLVM >= 0x0207
+ if (bld->type.floating)
+ a = LLVMBuildFNeg(bld->builder, a, "");
+ else
+#endif
+ a = LLVMBuildNeg(bld->builder, a, "");
+
+ return a;
}
@@ -715,6 +823,8 @@ lp_build_sgn(struct lp_build_context *bld,
LLVMValueRef cond;
LLVMValueRef res;
+ assert(lp_check_value(type, a));
+
/* Handle non-zero case */
if(!type.sign) {
/* if not zero then sign must be positive */
@@ -773,6 +883,7 @@ lp_build_set_sign(struct lp_build_context *bld,
LLVMValueRef val, res;
assert(type.floating);
+ assert(lp_check_value(type, a));
/* val = reinterpret_cast<int>(a) */
val = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
@@ -1021,7 +1132,7 @@ lp_build_iround(struct lp_build_context *bld,
half = LLVMBuildOr(bld->builder, sign, half, "");
half = LLVMBuildBitCast(bld->builder, half, vec_type, "");
- res = LLVMBuildAdd(bld->builder, a, half, "");
+ res = LLVMBuildFAdd(bld->builder, a, half, "");
}
res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
@@ -1070,7 +1181,7 @@ lp_build_ifloor(struct lp_build_context *bld,
offset = LLVMBuildAnd(bld->builder, offset, sign, "");
offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset");
- res = LLVMBuildAdd(bld->builder, a, offset, "ifloor.res");
+ res = LLVMBuildFAdd(bld->builder, a, offset, "ifloor.res");
}
/* round to nearest (toward zero) */
@@ -1120,7 +1231,7 @@ lp_build_iceil(struct lp_build_context *bld,
offset = LLVMBuildAnd(bld->builder, offset, sign, "");
offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset");
- res = LLVMBuildAdd(bld->builder, a, offset, "iceil.res");
+ res = LLVMBuildFAdd(bld->builder, a, offset, "iceil.res");
}
/* round to nearest (toward zero) */
@@ -1138,6 +1249,8 @@ lp_build_sqrt(struct lp_build_context *bld,
LLVMTypeRef vec_type = lp_build_vec_type(type);
char intrinsic[32];
+ assert(lp_check_value(type, a));
+
/* TODO: optimize the constant case */
/* TODO: optimize the constant case */
@@ -1148,12 +1261,39 @@ lp_build_sqrt(struct lp_build_context *bld,
}
+/**
+ * Do one Newton-Raphson step to improve reciprocate precision:
+ *
+ * x_{i+1} = x_i * (2 - a * x_i)
+ *
+ * See also:
+ * - http://en.wikipedia.org/wiki/Division_(digital)#Newton.E2.80.93Raphson_division
+ * - http://softwarecommunity.intel.com/articles/eng/1818.htm
+ */
+static INLINE LLVMValueRef
+lp_build_rcp_refine(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef rcp_a)
+{
+ LLVMValueRef two = lp_build_const_vec(bld->type, 2.0);
+ LLVMValueRef res;
+
+ res = LLVMBuildFMul(bld->builder, a, rcp_a, "");
+ res = LLVMBuildFSub(bld->builder, two, res, "");
+ res = LLVMBuildFMul(bld->builder, rcp_a, res, "");
+
+ return res;
+}
+
+
LLVMValueRef
lp_build_rcp(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
+ assert(lp_check_value(type, a));
+
if(a == bld->zero)
return bld->undef;
if(a == bld->one)
@@ -1167,32 +1307,16 @@ lp_build_rcp(struct lp_build_context *bld,
return LLVMConstFDiv(bld->one, a);
if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
- /*
- * XXX: Added precision is not always necessary, so only enable this
- * when we have a better system in place to track minimum precision.
- */
-
-#if 0
- /*
- * Do one Newton-Raphson step to improve precision:
- *
- * x1 = (2 - a * rcp(a)) * rcp(a)
- */
-
- LLVMValueRef two = lp_build_const_vec(bld->type, 2.0);
- LLVMValueRef rcp_a;
LLVMValueRef res;
+ unsigned i;
- rcp_a = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
+ res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", bld->vec_type, a);
- res = LLVMBuildMul(bld->builder, a, rcp_a, "");
- res = LLVMBuildSub(bld->builder, two, res, "");
- res = LLVMBuildMul(bld->builder, res, rcp_a, "");
+ for (i = 0; i < RCP_NEWTON_STEPS; ++i) {
+ res = lp_build_rcp_refine(bld, a, res);
+ }
- return rcp_a;
-#else
- return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
-#endif
+ return res;
}
return LLVMBuildFDiv(bld->builder, bld->one, a, "");
@@ -1200,6 +1324,33 @@ lp_build_rcp(struct lp_build_context *bld,
/**
+ * Do one Newton-Raphson step to improve rsqrt precision:
+ *
+ * x_{i+1} = 0.5 * x_i * (3.0 - a * x_i * x_i)
+ *
+ * See also:
+ * - http://softwarecommunity.intel.com/articles/eng/1818.htm
+ */
+static INLINE LLVMValueRef
+lp_build_rsqrt_refine(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef rsqrt_a)
+{
+ LLVMValueRef half = lp_build_const_vec(bld->type, 0.5);
+ LLVMValueRef three = lp_build_const_vec(bld->type, 3.0);
+ LLVMValueRef res;
+
+ res = LLVMBuildFMul(bld->builder, rsqrt_a, rsqrt_a, "");
+ res = LLVMBuildFMul(bld->builder, a, res, "");
+ res = LLVMBuildFSub(bld->builder, three, res, "");
+ res = LLVMBuildFMul(bld->builder, rsqrt_a, res, "");
+ res = LLVMBuildFMul(bld->builder, half, res, "");
+
+ return res;
+}
+
+
+/**
* Generate 1/sqrt(a)
*/
LLVMValueRef
@@ -1208,10 +1359,22 @@ lp_build_rsqrt(struct lp_build_context *bld,
{
const struct lp_type type = bld->type;
+ assert(lp_check_value(type, a));
+
assert(type.floating);
- if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
- return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a);
+ if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+ LLVMValueRef res;
+ unsigned i;
+
+ res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", bld->vec_type, a);
+
+ for (i = 0; i < RSQRT_NEWTON_STEPS; ++i) {
+ res = lp_build_rsqrt_refine(bld, a, res);
+ }
+
+ return res;
+ }
return lp_build_rcp(bld, lp_build_sqrt(bld, a));
}
@@ -1270,7 +1433,7 @@ lp_build_sin(struct lp_build_context *bld,
*/
LLVMValueRef FOPi = lp_build_const_v4sf(1.27323954473516);
- LLVMValueRef scale_y = LLVMBuildMul(b, x_abs, FOPi, "scale_y");
+ LLVMValueRef scale_y = LLVMBuildFMul(b, x_abs, FOPi, "scale_y");
/*
* store the integer part of y in mm0
@@ -1344,9 +1507,9 @@ lp_build_sin(struct lp_build_context *bld,
* xmm2 = _mm_mul_ps(y, xmm2);
* xmm3 = _mm_mul_ps(y, xmm3);
*/
- LLVMValueRef xmm1 = LLVMBuildMul(b, y_2, DP1, "xmm1");
- LLVMValueRef xmm2 = LLVMBuildMul(b, y_2, DP2, "xmm2");
- LLVMValueRef xmm3 = LLVMBuildMul(b, y_2, DP3, "xmm3");
+ LLVMValueRef xmm1 = LLVMBuildFMul(b, y_2, DP1, "xmm1");
+ LLVMValueRef xmm2 = LLVMBuildFMul(b, y_2, DP2, "xmm2");
+ LLVMValueRef xmm3 = LLVMBuildFMul(b, y_2, DP3, "xmm3");
/*
* x = _mm_add_ps(x, xmm1);
@@ -1354,16 +1517,16 @@ lp_build_sin(struct lp_build_context *bld,
* x = _mm_add_ps(x, xmm3);
*/
- LLVMValueRef x_1 = LLVMBuildAdd(b, x_abs, xmm1, "x_1");
- LLVMValueRef x_2 = LLVMBuildAdd(b, x_1, xmm2, "x_2");
- LLVMValueRef x_3 = LLVMBuildAdd(b, x_2, xmm3, "x_3");
+ LLVMValueRef x_1 = LLVMBuildFAdd(b, x_abs, xmm1, "x_1");
+ LLVMValueRef x_2 = LLVMBuildFAdd(b, x_1, xmm2, "x_2");
+ LLVMValueRef x_3 = LLVMBuildFAdd(b, x_2, xmm3, "x_3");
/*
* Evaluate the first polynom (0 <= x <= Pi/4)
*
* z = _mm_mul_ps(x,x);
*/
- LLVMValueRef z = LLVMBuildMul(b, x_3, x_3, "z");
+ LLVMValueRef z = LLVMBuildFMul(b, x_3, x_3, "z");
/*
* _PS_CONST(coscof_p0, 2.443315711809948E-005);
@@ -1378,12 +1541,12 @@ lp_build_sin(struct lp_build_context *bld,
* y = *(v4sf*)_ps_coscof_p0;
* y = _mm_mul_ps(y, z);
*/
- LLVMValueRef y_3 = LLVMBuildMul(b, z, coscof_p0, "y_3");
- LLVMValueRef y_4 = LLVMBuildAdd(b, y_3, coscof_p1, "y_4");
- LLVMValueRef y_5 = LLVMBuildMul(b, y_4, z, "y_5");
- LLVMValueRef y_6 = LLVMBuildAdd(b, y_5, coscof_p2, "y_6");
- LLVMValueRef y_7 = LLVMBuildMul(b, y_6, z, "y_7");
- LLVMValueRef y_8 = LLVMBuildMul(b, y_7, z, "y_8");
+ LLVMValueRef y_3 = LLVMBuildFMul(b, z, coscof_p0, "y_3");
+ LLVMValueRef y_4 = LLVMBuildFAdd(b, y_3, coscof_p1, "y_4");
+ LLVMValueRef y_5 = LLVMBuildFMul(b, y_4, z, "y_5");
+ LLVMValueRef y_6 = LLVMBuildFAdd(b, y_5, coscof_p2, "y_6");
+ LLVMValueRef y_7 = LLVMBuildFMul(b, y_6, z, "y_7");
+ LLVMValueRef y_8 = LLVMBuildFMul(b, y_7, z, "y_8");
/*
@@ -1392,10 +1555,10 @@ lp_build_sin(struct lp_build_context *bld,
* y = _mm_add_ps(y, *(v4sf*)_ps_1);
*/
LLVMValueRef half = lp_build_const_v4sf(0.5);
- LLVMValueRef tmp = LLVMBuildMul(b, z, half, "tmp");
- LLVMValueRef y_9 = LLVMBuildSub(b, y_8, tmp, "y_8");
+ LLVMValueRef tmp = LLVMBuildFMul(b, z, half, "tmp");
+ LLVMValueRef y_9 = LLVMBuildFSub(b, y_8, tmp, "y_8");
LLVMValueRef one = lp_build_const_v4sf(1.0);
- LLVMValueRef y_10 = LLVMBuildAdd(b, y_9, one, "y_9");
+ LLVMValueRef y_10 = LLVMBuildFAdd(b, y_9, one, "y_9");
/*
* _PS_CONST(sincof_p0, -1.9515295891E-4);
@@ -1419,13 +1582,13 @@ lp_build_sin(struct lp_build_context *bld,
* y2 = _mm_add_ps(y2, x);
*/
- LLVMValueRef y2_3 = LLVMBuildMul(b, z, sincof_p0, "y2_3");
- LLVMValueRef y2_4 = LLVMBuildAdd(b, y2_3, sincof_p1, "y2_4");
- LLVMValueRef y2_5 = LLVMBuildMul(b, y2_4, z, "y2_5");
- LLVMValueRef y2_6 = LLVMBuildAdd(b, y2_5, sincof_p2, "y2_6");
- LLVMValueRef y2_7 = LLVMBuildMul(b, y2_6, z, "y2_7");
- LLVMValueRef y2_8 = LLVMBuildMul(b, y2_7, x_3, "y2_8");
- LLVMValueRef y2_9 = LLVMBuildAdd(b, y2_8, x_3, "y2_9");
+ LLVMValueRef y2_3 = LLVMBuildFMul(b, z, sincof_p0, "y2_3");
+ LLVMValueRef y2_4 = LLVMBuildFAdd(b, y2_3, sincof_p1, "y2_4");
+ LLVMValueRef y2_5 = LLVMBuildFMul(b, y2_4, z, "y2_5");
+ LLVMValueRef y2_6 = LLVMBuildFAdd(b, y2_5, sincof_p2, "y2_6");
+ LLVMValueRef y2_7 = LLVMBuildFMul(b, y2_6, z, "y2_7");
+ LLVMValueRef y2_8 = LLVMBuildFMul(b, y2_7, x_3, "y2_8");
+ LLVMValueRef y2_9 = LLVMBuildFAdd(b, y2_8, x_3, "y2_9");
/*
* select the correct result from the two polynoms
@@ -1481,7 +1644,7 @@ lp_build_cos(struct lp_build_context *bld,
*/
LLVMValueRef FOPi = lp_build_const_v4sf(1.27323954473516);
- LLVMValueRef scale_y = LLVMBuildMul(b, x_abs, FOPi, "scale_y");
+ LLVMValueRef scale_y = LLVMBuildFMul(b, x_abs, FOPi, "scale_y");
/*
* store the integer part of y in mm0
@@ -1561,9 +1724,9 @@ lp_build_cos(struct lp_build_context *bld,
* xmm2 = _mm_mul_ps(y, xmm2);
* xmm3 = _mm_mul_ps(y, xmm3);
*/
- LLVMValueRef xmm1 = LLVMBuildMul(b, y_2, DP1, "xmm1");
- LLVMValueRef xmm2 = LLVMBuildMul(b, y_2, DP2, "xmm2");
- LLVMValueRef xmm3 = LLVMBuildMul(b, y_2, DP3, "xmm3");
+ LLVMValueRef xmm1 = LLVMBuildFMul(b, y_2, DP1, "xmm1");
+ LLVMValueRef xmm2 = LLVMBuildFMul(b, y_2, DP2, "xmm2");
+ LLVMValueRef xmm3 = LLVMBuildFMul(b, y_2, DP3, "xmm3");
/*
* x = _mm_add_ps(x, xmm1);
@@ -1571,16 +1734,16 @@ lp_build_cos(struct lp_build_context *bld,
* x = _mm_add_ps(x, xmm3);
*/
- LLVMValueRef x_1 = LLVMBuildAdd(b, x_abs, xmm1, "x_1");
- LLVMValueRef x_2 = LLVMBuildAdd(b, x_1, xmm2, "x_2");
- LLVMValueRef x_3 = LLVMBuildAdd(b, x_2, xmm3, "x_3");
+ LLVMValueRef x_1 = LLVMBuildFAdd(b, x_abs, xmm1, "x_1");
+ LLVMValueRef x_2 = LLVMBuildFAdd(b, x_1, xmm2, "x_2");
+ LLVMValueRef x_3 = LLVMBuildFAdd(b, x_2, xmm3, "x_3");
/*
* Evaluate the first polynom (0 <= x <= Pi/4)
*
* z = _mm_mul_ps(x,x);
*/
- LLVMValueRef z = LLVMBuildMul(b, x_3, x_3, "z");
+ LLVMValueRef z = LLVMBuildFMul(b, x_3, x_3, "z");
/*
* _PS_CONST(coscof_p0, 2.443315711809948E-005);
@@ -1595,12 +1758,12 @@ lp_build_cos(struct lp_build_context *bld,
* y = *(v4sf*)_ps_coscof_p0;
* y = _mm_mul_ps(y, z);
*/
- LLVMValueRef y_3 = LLVMBuildMul(b, z, coscof_p0, "y_3");
- LLVMValueRef y_4 = LLVMBuildAdd(b, y_3, coscof_p1, "y_4");
- LLVMValueRef y_5 = LLVMBuildMul(b, y_4, z, "y_5");
- LLVMValueRef y_6 = LLVMBuildAdd(b, y_5, coscof_p2, "y_6");
- LLVMValueRef y_7 = LLVMBuildMul(b, y_6, z, "y_7");
- LLVMValueRef y_8 = LLVMBuildMul(b, y_7, z, "y_8");
+ LLVMValueRef y_3 = LLVMBuildFMul(b, z, coscof_p0, "y_3");
+ LLVMValueRef y_4 = LLVMBuildFAdd(b, y_3, coscof_p1, "y_4");
+ LLVMValueRef y_5 = LLVMBuildFMul(b, y_4, z, "y_5");
+ LLVMValueRef y_6 = LLVMBuildFAdd(b, y_5, coscof_p2, "y_6");
+ LLVMValueRef y_7 = LLVMBuildFMul(b, y_6, z, "y_7");
+ LLVMValueRef y_8 = LLVMBuildFMul(b, y_7, z, "y_8");
/*
@@ -1609,10 +1772,10 @@ lp_build_cos(struct lp_build_context *bld,
* y = _mm_add_ps(y, *(v4sf*)_ps_1);
*/
LLVMValueRef half = lp_build_const_v4sf(0.5);
- LLVMValueRef tmp = LLVMBuildMul(b, z, half, "tmp");
- LLVMValueRef y_9 = LLVMBuildSub(b, y_8, tmp, "y_8");
+ LLVMValueRef tmp = LLVMBuildFMul(b, z, half, "tmp");
+ LLVMValueRef y_9 = LLVMBuildFSub(b, y_8, tmp, "y_8");
LLVMValueRef one = lp_build_const_v4sf(1.0);
- LLVMValueRef y_10 = LLVMBuildAdd(b, y_9, one, "y_9");
+ LLVMValueRef y_10 = LLVMBuildFAdd(b, y_9, one, "y_9");
/*
* _PS_CONST(sincof_p0, -1.9515295891E-4);
@@ -1636,13 +1799,13 @@ lp_build_cos(struct lp_build_context *bld,
* y2 = _mm_add_ps(y2, x);
*/
- LLVMValueRef y2_3 = LLVMBuildMul(b, z, sincof_p0, "y2_3");
- LLVMValueRef y2_4 = LLVMBuildAdd(b, y2_3, sincof_p1, "y2_4");
- LLVMValueRef y2_5 = LLVMBuildMul(b, y2_4, z, "y2_5");
- LLVMValueRef y2_6 = LLVMBuildAdd(b, y2_5, sincof_p2, "y2_6");
- LLVMValueRef y2_7 = LLVMBuildMul(b, y2_6, z, "y2_7");
- LLVMValueRef y2_8 = LLVMBuildMul(b, y2_7, x_3, "y2_8");
- LLVMValueRef y2_9 = LLVMBuildAdd(b, y2_8, x_3, "y2_9");
+ LLVMValueRef y2_3 = LLVMBuildFMul(b, z, sincof_p0, "y2_3");
+ LLVMValueRef y2_4 = LLVMBuildFAdd(b, y2_3, sincof_p1, "y2_4");
+ LLVMValueRef y2_5 = LLVMBuildFMul(b, y2_4, z, "y2_5");
+ LLVMValueRef y2_6 = LLVMBuildFAdd(b, y2_5, sincof_p2, "y2_6");
+ LLVMValueRef y2_7 = LLVMBuildFMul(b, y2_6, z, "y2_7");
+ LLVMValueRef y2_8 = LLVMBuildFMul(b, y2_7, x_3, "y2_8");
+ LLVMValueRef y2_9 = LLVMBuildFAdd(b, y2_8, x_3, "y2_9");
/*
* select the correct result from the two polynoms
@@ -1695,6 +1858,8 @@ lp_build_exp(struct lp_build_context *bld,
/* log2(e) = 1/log(2) */
LLVMValueRef log2e = lp_build_const_vec(bld->type, 1.4426950408889634);
+ assert(lp_check_value(bld->type, x));
+
return lp_build_mul(bld, log2e, lp_build_exp2(bld, x));
}
@@ -1709,14 +1874,12 @@ lp_build_log(struct lp_build_context *bld,
/* log(2) */
LLVMValueRef log2 = lp_build_const_vec(bld->type, 0.69314718055994529);
+ assert(lp_check_value(bld->type, x));
+
return lp_build_mul(bld, log2, lp_build_exp2(bld, x));
}
-#define EXP_POLY_DEGREE 3
-#define LOG_POLY_DEGREE 5
-
-
/**
* Generate polynomial.
* Ex: coeffs[0] + x * coeffs[1] + x^2 * coeffs[2].
@@ -1731,6 +1894,8 @@ lp_build_polynomial(struct lp_build_context *bld,
LLVMValueRef res = NULL;
unsigned i;
+ assert(lp_check_value(bld->type, x));
+
/* TODO: optimize the constant case */
if(LLVMIsConstant(x))
debug_printf("%s: inefficient/imprecise constant arithmetic\n",
@@ -1802,6 +1967,8 @@ lp_build_exp2_approx(struct lp_build_context *bld,
LLVMValueRef expfpart = NULL;
LLVMValueRef res = NULL;
+ assert(lp_check_value(bld->type, x));
+
if(p_exp2_int_part || p_frac_part || p_exp2) {
/* TODO: optimize the constant case */
if(LLVMIsConstant(x))
@@ -1817,7 +1984,7 @@ lp_build_exp2_approx(struct lp_build_context *bld,
ipart = lp_build_floor(bld, x);
/* fpart = x - ipart */
- fpart = LLVMBuildSub(bld->builder, x, ipart, "");
+ fpart = LLVMBuildFSub(bld->builder, x, ipart, "");
}
if(p_exp2_int_part || p_exp2) {
@@ -1832,7 +1999,7 @@ lp_build_exp2_approx(struct lp_build_context *bld,
expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
Elements(lp_build_exp2_polynomial));
- res = LLVMBuildMul(bld->builder, expipart, expfpart, "");
+ res = LLVMBuildFMul(bld->builder, expipart, expfpart, "");
}
if(p_exp2_int_part)
@@ -1915,6 +2082,8 @@ lp_build_log2_approx(struct lp_build_context *bld,
LLVMValueRef logmant = NULL;
LLVMValueRef res = NULL;
+ assert(lp_check_value(bld->type, x));
+
if(p_exp || p_floor_log2 || p_log2) {
/* TODO: optimize the constant case */
if(LLVMIsConstant(x))
@@ -1945,9 +2114,9 @@ lp_build_log2_approx(struct lp_build_context *bld,
Elements(lp_build_log2_polynomial));
/* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
- logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), "");
+ logmant = LLVMBuildFMul(bld->builder, logmant, LLVMBuildFSub(bld->builder, mant, bld->one, ""), "");
- res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
+ res = LLVMBuildFAdd(bld->builder, logmant, logexp, "");
}
if(p_exp) {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_assert.c b/src/gallium/auxiliary/gallivm/lp_bld_assert.c
new file mode 100644
index 0000000000..f2ebd868a8
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_assert.c
@@ -0,0 +1,101 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "lp_bld_assert.h"
+#include "lp_bld_init.h"
+#include "lp_bld_printf.h"
+
+
+/**
+ * A call to lp_build_assert() will build a function call to this function.
+ */
+static void
+lp_assert(int condition, const char *msg)
+{
+ if (!condition) {
+ debug_printf("LLVM assertion '%s' failed!\n", msg);
+ assert(condition);
+ }
+}
+
+
+
+/**
+ * lp_build_assert.
+ *
+ * Build an assertion in LLVM IR by building a function call to the
+ * lp_assert() function above.
+ *
+ * \param condition should be an 'i1' or 'i32' value
+ * \param msg a string to print if the assertion fails.
+ */
+LLVMValueRef
+lp_build_assert(LLVMBuilderRef builder, LLVMValueRef condition,
+ const char *msg)
+{
+ LLVMModuleRef module;
+ LLVMTypeRef arg_types[2];
+ LLVMValueRef msg_string, assert_func, params[2], r;
+
+ module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(
+ LLVMGetInsertBlock(builder)));
+
+ msg_string = lp_build_const_string_variable(module, msg, strlen(msg) + 1);
+
+ arg_types[0] = LLVMInt32Type();
+ arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
+
+ /* lookup the lp_assert function */
+ assert_func = LLVMGetNamedFunction(module, "lp_assert");
+
+ /* Create the assertion function if not found */
+ if (!assert_func) {
+ LLVMTypeRef func_type =
+ LLVMFunctionType(LLVMVoidType(), arg_types, 2, 0);
+
+ assert_func = LLVMAddFunction(module, "lp_assert", func_type);
+ LLVMSetFunctionCallConv(assert_func, LLVMCCallConv);
+ LLVMSetLinkage(assert_func, LLVMExternalLinkage);
+ LLVMAddGlobalMapping(lp_build_engine, assert_func,
+ func_to_pointer((func_pointer)lp_assert));
+ }
+ assert(assert_func);
+
+ /* build function call param list */
+ params[0] = LLVMBuildZExt(builder, condition, arg_types[0], "");
+ params[1] = LLVMBuildBitCast(builder, msg_string, arg_types[1], "");
+
+ /* check arg types */
+ assert(LLVMTypeOf(params[0]) == arg_types[0]);
+ assert(LLVMTypeOf(params[1]) == arg_types[1]);
+
+ r = LLVMBuildCall(builder, assert_func, params, 2, "");
+
+ return r;
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_assert.h b/src/gallium/auxiliary/gallivm/lp_bld_assert.h
new file mode 100644
index 0000000000..ddd879dc2c
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_assert.h
@@ -0,0 +1,41 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef LP_BLD_ASSERT_H
+#define LP_BLD_ASSERT_H
+
+
+#include "lp_bld.h"
+
+
+LLVMValueRef
+lp_build_assert(LLVMBuilderRef builder, LLVMValueRef condition,
+ const char *msg);
+
+
+#endif
+
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index 77012f1fac..8b477313d4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -117,8 +117,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
scale = (double)mask/ubound;
bias = (double)((unsigned long long)1 << (mantissa - n));
- res = LLVMBuildMul(builder, src, lp_build_const_vec(src_type, scale), "");
- res = LLVMBuildAdd(builder, res, lp_build_const_vec(src_type, bias), "");
+ res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), "");
+ res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), "");
res = LLVMBuildBitCast(builder, res, int_vec_type, "");
if(dst_width > n) {
@@ -175,6 +175,8 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
double scale;
double bias;
+ assert(dst_type.floating);
+
mantissa = lp_mantissa(dst_type);
n = MIN2(mantissa, src_width);
@@ -199,8 +201,8 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
res = LLVMBuildBitCast(builder, res, vec_type, "");
- res = LLVMBuildSub(builder, res, bias_, "");
- res = LLVMBuildMul(builder, res, lp_build_const_vec(dst_type, scale), "");
+ res = LLVMBuildFSub(builder, res, bias_, "");
+ res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), "");
return res;
}
@@ -296,7 +298,7 @@ lp_build_conv(LLVMBuilderRef builder,
if (dst_scale != 1.0) {
LLVMValueRef scale = lp_build_const_vec(tmp_type, dst_scale);
for(i = 0; i < num_tmps; ++i)
- tmp[i] = LLVMBuildMul(builder, tmp[i], scale, "");
+ tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, "");
}
/* Use an equally sized integer for intermediate computations */
@@ -391,7 +393,7 @@ lp_build_conv(LLVMBuilderRef builder,
if (src_scale != 1.0) {
LLVMValueRef scale = lp_build_const_vec(tmp_type, 1.0/src_scale);
for(i = 0; i < num_tmps; ++i)
- tmp[i] = LLVMBuildMul(builder, tmp[i], scale, "");
+ tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, "");
}
}
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index 0f01fc1d75..247cb83ce6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -240,7 +240,7 @@ lp_build_unpack_arith_rgba_aos(LLVMBuilderRef builder,
*/
if (normalized)
- scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), "");
+ scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), "");
else
scaled = casted;
@@ -322,7 +322,7 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
}
if (normalized)
- scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
+ scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
else
scaled = unswizzled;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index 9f405921b0..c724a4453e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -197,7 +197,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
if (format_desc->channel[chan].normalized) {
double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
LLVMValueRef scale_val = lp_build_const_vec(type, scale);
- input = LLVMBuildMul(builder, input, scale_val, "");
+ input = LLVMBuildFMul(builder, input, scale_val, "");
}
}
else {
@@ -227,7 +227,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
LLVMValueRef scale_val = lp_build_const_vec(type, scale);
input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
- input = LLVMBuildMul(builder, input, scale_val, "");
+ input = LLVMBuildFMul(builder, input, scale_val, "");
}
else {
/* FIXME */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 69353dea09..60d8bcfa55 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -45,6 +45,8 @@ static const struct debug_named_value lp_bld_debug_flags[] = {
{ "nopt", GALLIVM_DEBUG_NO_OPT, NULL },
DEBUG_NAMED_VALUE_END
};
+
+DEBUG_GET_ONCE_FLAGS_OPTION(gallivm_debug, "GALLIVM_DEBUG", lp_bld_debug_flags, 0)
#endif
@@ -89,7 +91,7 @@ void
lp_build_init(void)
{
#ifdef DEBUG
- gallivm_debug = debug_get_flags_option("GALLIVM_DEBUG", lp_bld_debug_flags, 0 );
+ gallivm_debug = debug_get_option_gallivm_debug();
#endif
lp_set_target_options();
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h
index a32ced9b4c..f26fdac466 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h
@@ -44,5 +44,7 @@ extern LLVMPassManagerRef lp_build_pass;
void
lp_build_init(void);
+extern void
+lp_func_delete_body(LLVMValueRef func);
#endif /* !LP_BLD_INIT_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index 39854e43b1..7d7db3b0d9 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -83,6 +83,8 @@ lp_build_compare(LLVMBuilderRef builder,
assert(func >= PIPE_FUNC_NEVER);
assert(func <= PIPE_FUNC_ALWAYS);
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
if(func == PIPE_FUNC_NEVER)
return zeros;
@@ -363,9 +365,55 @@ lp_build_cmp(struct lp_build_context *bld,
/**
+ * Return (mask & a) | (~mask & b);
+ */
+LLVMValueRef
+lp_build_select_bitwise(struct lp_build_context *bld,
+ LLVMValueRef mask,
+ LLVMValueRef a,
+ LLVMValueRef b)
+{
+ struct lp_type type = bld->type;
+ LLVMValueRef res;
+
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
+ if (a == b) {
+ return a;
+ }
+
+ if(type.floating) {
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
+ }
+
+ a = LLVMBuildAnd(bld->builder, a, mask, "");
+
+ /* This often gets translated to PANDN, but sometimes the NOT is
+ * pre-computed and stored in another constant. The best strategy depends
+ * on available registers, so it is not a big deal -- hopefully LLVM does
+ * the right decision attending the rest of the program.
+ */
+ b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
+
+ res = LLVMBuildOr(bld->builder, a, b, "");
+
+ if(type.floating) {
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+ }
+
+ return res;
+}
+
+
+/**
* Return mask ? a : b;
*
- * mask is a bitwise mask, composed of 0 or ~0 for each element.
+ * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value
+ * will yield unpredictable results.
*/
LLVMValueRef
lp_build_select(struct lp_build_context *bld,
@@ -376,6 +424,9 @@ lp_build_select(struct lp_build_context *bld,
struct lp_type type = bld->type;
LLVMValueRef res;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(a == b)
return a;
@@ -424,27 +475,7 @@ lp_build_select(struct lp_build_context *bld,
}
}
else {
- if(type.floating) {
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
- a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
- b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
- }
-
- a = LLVMBuildAnd(bld->builder, a, mask, "");
-
- /* This often gets translated to PANDN, but sometimes the NOT is
- * pre-computed and stored in another constant. The best strategy depends
- * on available registers, so it is not a big deal -- hopefully LLVM does
- * the right decision attending the rest of the program.
- */
- b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
-
- res = LLVMBuildOr(bld->builder, a, b, "");
-
- if(type.floating) {
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
- }
+ res = lp_build_select_bitwise(bld, mask, a, b);
}
return res;
@@ -461,6 +492,9 @@ lp_build_select_aos(struct lp_build_context *bld,
const unsigned n = type.length;
unsigned i, j;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(a == b)
return a;
if(cond[0] && cond[1] && cond[2] && cond[3])
@@ -516,7 +550,22 @@ lp_build_select_aos(struct lp_build_context *bld,
LLVMValueRef
lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b)
{
+ const struct lp_type type = bld->type;
+
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
+ /* can't do bitwise ops on floating-point values */
+ if(type.floating) {
+ a = LLVMBuildBitCast(bld->builder, a, bld->int_vec_type, "");
+ b = LLVMBuildBitCast(bld->builder, b, bld->int_vec_type, "");
+ }
+
b = LLVMBuildNot(bld->builder, b, "");
b = LLVMBuildAnd(bld->builder, a, b, "");
+
+ if(type.floating) {
+ b = LLVMBuildBitCast(bld->builder, b, bld->vec_type, "");
+ }
return b;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
index 29f9fc3b20..4e7b4c9938 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
@@ -63,6 +63,11 @@ lp_build_cmp(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b);
+LLVMValueRef
+lp_build_select_bitwise(struct lp_build_context *bld,
+ LLVMValueRef mask,
+ LLVMValueRef a,
+ LLVMValueRef b);
LLVMValueRef
lp_build_select(struct lp_build_context *bld,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 5a9488b5f7..6d5410d970 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -39,6 +39,7 @@
#include <llvm/Target/TargetOptions.h>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/ExecutionEngine/JITEventListener.h>
+#include <llvm/Support/CommandLine.h>
#include "pipe/p_config.h"
#include "util/u_debug.h"
@@ -141,4 +142,35 @@ lp_set_target_options(void)
#if 0
llvm::UnsafeFPMath = true;
#endif
+
+#if 0
+ /*
+ * LLVM will generate MMX instructions for vectors <= 64 bits, leading to
+ * innefficient code, and in 32bit systems, to the corruption of the FPU
+ * stack given that it expects the user to generate the EMMS instructions.
+ *
+ * See also:
+ * - http://llvm.org/bugs/show_bug.cgi?id=3287
+ * - http://l4.me.uk/post/2009/06/07/llvm-wrinkle-3-configuration-what-configuration/
+ *
+ * XXX: Unfortunately this is not working.
+ */
+ static boolean first = FALSE;
+ if (first) {
+ static const char* options[] = {
+ "prog",
+ "-disable-mmx"
+ };
+ llvm::cl::ParseCommandLineOptions(2, const_cast<char**>(options));
+ first = FALSE;
+ }
+#endif
+}
+
+
+extern "C" void
+lp_func_delete_body(LLVMValueRef FF)
+{
+ llvm::Function *func = llvm::unwrap<llvm::Function>(FF);
+ func->deleteBody();
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
index 7748f8f099..b7b630f2e8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -171,14 +171,13 @@ lp_build_unpack2(LLVMBuilderRef builder,
msb = lp_build_zero(src_type);
/* Interleave bits */
- if(util_cpu_caps.little_endian) {
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
*dst_lo = lp_build_interleave2(builder, src_type, src, msb, 0);
*dst_hi = lp_build_interleave2(builder, src_type, src, msb, 1);
- }
- else {
+#else
*dst_lo = lp_build_interleave2(builder, src_type, msb, src, 0);
*dst_hi = lp_build_interleave2(builder, src_type, msb, src, 1);
- }
+#endif
/* Cast the result into the new type (twice as wide) */
@@ -261,13 +260,14 @@ lp_build_pack2(LLVMBuilderRef builder,
#endif
LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type);
LLVMValueRef shuffle;
- LLVMValueRef res;
+ LLVMValueRef res = NULL;
assert(!src_type.floating);
assert(!dst_type.floating);
assert(src_type.width == dst_type.width * 2);
assert(src_type.length * 2 == dst_type.length);
+ /* Check for special cases first */
if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) {
switch(src_type.width) {
case 32:
@@ -283,8 +283,8 @@ lp_build_pack2(LLVMBuilderRef builder,
return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi);
}
else {
- assert(0);
- return LLVMGetUndef(dst_vec_type);
+ /* use generic shuffle below */
+ res = NULL;
}
}
break;
@@ -310,10 +310,13 @@ lp_build_pack2(LLVMBuilderRef builder,
break;
}
- res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
- return res;
+ if (res) {
+ res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
+ return res;
+ }
}
+ /* generic shuffle */
lo = LLVMBuildBitCast(builder, lo, dst_vec_type, "");
hi = LLVMBuildBitCast(builder, hi, dst_vec_type, "");
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
index ca36046d22..7b1088939b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
@@ -85,7 +85,7 @@ lp_build_scalar_ddx(struct lp_build_context *bld,
LLVMValueRef idx_right = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_RIGHT, 0);
LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, "");
LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, "");
- return LLVMBuildSub(bld->builder, a_right, a_left, "");
+ return lp_build_sub(bld, a_right, a_left);
}
@@ -97,5 +97,5 @@ lp_build_scalar_ddy(struct lp_build_context *bld,
LLVMValueRef idx_bottom = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_BOTTOM_LEFT, 0);
LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, "");
LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, "");
- return LLVMBuildSub(bld->builder, a_bottom, a_top, "");
+ return lp_build_sub(bld, a_bottom, a_top);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 1a20d74cac..806c7d56a8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -40,7 +40,6 @@
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_format.h"
-#include "util/u_cpu_detect.h"
#include "lp_bld_debug.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
@@ -811,7 +810,7 @@ lp_build_minify(struct lp_build_sample_context *bld,
LLVMValueRef base_size,
LLVMValueRef level)
{
- LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify");
+ LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify");
size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
return size;
}
@@ -888,17 +887,17 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
/* Compute rho = max of all partial derivatives scaled by texture size.
* XXX this could be vectorized somewhat
*/
- rho = LLVMBuildMul(bld->builder,
+ rho = LLVMBuildFMul(bld->builder,
lp_build_max(float_bld, dsdx, dsdy),
lp_build_int_to_float(float_bld, width), "");
if (dims > 1) {
LLVMValueRef max;
- max = LLVMBuildMul(bld->builder,
+ max = LLVMBuildFMul(bld->builder,
lp_build_max(float_bld, dtdx, dtdy),
lp_build_int_to_float(float_bld, height), "");
rho = lp_build_max(float_bld, rho, max);
if (dims > 2) {
- max = LLVMBuildMul(bld->builder,
+ max = LLVMBuildFMul(bld->builder,
lp_build_max(float_bld, drdx, drdy),
lp_build_int_to_float(float_bld, depth), "");
rho = lp_build_max(float_bld, rho, max);
@@ -912,12 +911,12 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
if (lod_bias) {
lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias,
index0, "");
- lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
+ lod = LLVMBuildFAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
}
}
/* add sampler lod bias */
- lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
+ lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
/* clamp lod */
lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
@@ -1219,8 +1218,7 @@ lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
/* ima = -0.5 / abs(coord); */
LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5);
LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
- LLVMValueRef ima = lp_build_mul(coord_bld, negHalf,
- lp_build_rcp(coord_bld, absCoord));
+ LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord);
return ima;
}
@@ -1841,7 +1839,11 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
unsigned i, j;
for(j = 0; j < h16.type.length; j += 4) {
- unsigned subindex = util_cpu_caps.little_endian ? 0 : 1;
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+ unsigned subindex = 0;
+#else
+ unsigned subindex = 1;
+#endif
LLVMValueRef index;
index = LLVMConstInt(elem_type, j/2 + subindex, 0);
@@ -2029,6 +2031,8 @@ lp_build_sample_soa(LLVMBuilderRef builder,
debug_printf("Sample from %s\n", util_format_name(fmt));
}
+ assert(type.floating);
+
/* Setup our build context */
memset(&bld, 0, sizeof bld);
bld.builder = builder;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 21236839fb..0aa64affac 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -489,7 +489,7 @@ get_indirect_offsets(struct lp_build_tgsi_soa_context *bld,
int_vec_type, "");
/* addr_vec = addr_vec * 4 */
- addr_vec = lp_build_mul(&bld->base, addr_vec, vec4);
+ addr_vec = lp_build_mul(&bld->int_bld, addr_vec, vec4);
return addr_vec;
}
@@ -533,7 +533,7 @@ emit_fetch(
reg->Register.Index * 4 + swizzle);
/* index_vec = index_vec + addr_vec */
- index_vec = lp_build_add(&bld->base, index_vec, addr_vec);
+ index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec);
/* Gather values from the constant buffer */
res = build_gather(bld, bld->consts_ptr, index_vec);
@@ -612,11 +612,9 @@ emit_fetch(
case TGSI_UTIL_SIGN_SET:
/* TODO: Use bitwese OR for floating point */
res = lp_build_abs( &bld->base, res );
- res = LLVMBuildNeg( bld->base.builder, res, "" );
- break;
-
+ /* fall through */
case TGSI_UTIL_SIGN_TOGGLE:
- res = LLVMBuildNeg( bld->base.builder, res, "" );
+ res = lp_build_negate( &bld->base, res );
break;
case TGSI_UTIL_SIGN_KEEP:
@@ -773,7 +771,9 @@ emit_store(
addr = LLVMBuildExtractElement(bld->base.builder,
addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
"");
- addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
+ addr = LLVMBuildMul(bld->base.builder,
+ addr, LLVMConstInt(LLVMInt32Type(), 4, 0),
+ "");
}
switch( reg->Register.File ) {