summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/gallivm
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/gallivm')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_alpha.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c314
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.h28
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_blend.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_const.c20
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_const.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_conv.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_debug.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_depth.c2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_depth.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_flow.c28
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_flow.h6
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_aos.c6
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_soa.c4
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.cpp (renamed from src/gallium/auxiliary/gallivm/lp_bld_misc.cpp)46
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.h (renamed from src/gallium/auxiliary/gallivm/lp_bld_misc.h)17
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_interp.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_intr.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.c156
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.c15
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.c59
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.h25
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c1721
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_struct.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_swizzle.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c263
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_type.c33
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_type.h56
32 files changed, 2513 insertions, 314 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_alpha.h b/src/gallium/auxiliary/gallivm/lp_bld_alpha.h
index 634575670d..fe3cedcc48 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_alpha.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_alpha.h
@@ -35,7 +35,7 @@
#define LP_BLD_ALPHA_H
-#include <llvm-c/Core.h>
+#include "os/os_llvm.h"
struct pipe_alpha_state;
struct lp_type;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 54b31befe6..233a36669d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -232,6 +232,37 @@ lp_build_add(struct lp_build_context *bld,
}
+/** Return the sum of the elements of a */
+LLVMValueRef
+lp_build_sum_vector(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const struct lp_type type = bld->type;
+ LLVMValueRef index, res;
+ int i;
+
+ if (a == bld->zero)
+ return bld->zero;
+ if (a == bld->undef)
+ return bld->undef;
+ assert(type.length > 1);
+
+ assert(!bld->type.norm);
+
+ index = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ res = LLVMBuildExtractElement(bld->builder, a, index, "");
+
+ for (i = 1; i < type.length; i++) {
+ index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ res = LLVMBuildAdd(bld->builder, res,
+ LLVMBuildExtractElement(bld->builder, a, index, ""),
+ "");
+ }
+
+ return res;
+}
+
+
/**
* Generate a - b
*/
@@ -614,6 +645,22 @@ lp_build_max(struct lp_build_context *bld,
/**
+ * Generate clamp(a, min, max)
+ * Do checks for special cases.
+ */
+LLVMValueRef
+lp_build_clamp(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef min,
+ LLVMValueRef max)
+{
+ a = lp_build_min(bld, a, max);
+ a = lp_build_max(bld, a, min);
+ return a;
+}
+
+
+/**
* Generate abs(a)
*/
LLVMValueRef
@@ -628,13 +675,26 @@ lp_build_abs(struct lp_build_context *bld,
if(type.floating) {
/* Mask out the sign bit */
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
- unsigned long long absMask = ~(1ULL << (type.width - 1));
- LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask));
- a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
- a = LLVMBuildAnd(bld->builder, a, mask, "");
- a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
- return a;
+ if (type.length == 1) {
+ LLVMTypeRef int_type = LLVMIntType(type.width);
+ LLVMTypeRef float_type = LLVMFloatType();
+ unsigned long long absMask = ~(1ULL << (type.width - 1));
+ LLVMValueRef mask = LLVMConstInt(int_type, absMask, 0);
+ a = LLVMBuildBitCast(bld->builder, a, int_type, "");
+ a = LLVMBuildAnd(bld->builder, a, mask, "");
+ a = LLVMBuildBitCast(bld->builder, a, float_type, "");
+ return a;
+ }
+ else {
+ /* vector of floats */
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ unsigned long long absMask = ~(1ULL << (type.width - 1));
+ LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask));
+ a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ a = LLVMBuildAnd(bld->builder, a, mask, "");
+ a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
+ return a;
+ }
}
if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) {
@@ -653,11 +713,19 @@ lp_build_abs(struct lp_build_context *bld,
LLVMValueRef
+lp_build_negate(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ return LLVMBuildNeg(bld->builder, a, "");
+}
+
+
+/** Return -1, 0 or +1 depending on the sign of a */
+LLVMValueRef
lp_build_sgn(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
- LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMValueRef cond;
LLVMValueRef res;
@@ -667,14 +735,29 @@ lp_build_sgn(struct lp_build_context *bld,
res = bld->one;
}
else if(type.floating) {
- /* Take the sign bit and add it to 1 constant */
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
- LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
+ LLVMTypeRef vec_type;
+ LLVMTypeRef int_type;
+ LLVMValueRef mask;
LLVMValueRef sign;
LLVMValueRef one;
- sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ unsigned long long maskBit = (unsigned long long)1 << (type.width - 1);
+
+ if (type.length == 1) {
+ int_type = lp_build_int_elem_type(type);
+ vec_type = lp_build_elem_type(type);
+ mask = LLVMConstInt(int_type, maskBit, 0);
+ }
+ else {
+ /* vector */
+ int_type = lp_build_int_vec_type(type);
+ vec_type = lp_build_vec_type(type);
+ mask = lp_build_int_const_scalar(type, maskBit);
+ }
+
+ /* Take the sign bit and add it to 1 constant */
+ sign = LLVMBuildBitCast(bld->builder, a, int_type, "");
sign = LLVMBuildAnd(bld->builder, sign, mask, "");
- one = LLVMConstBitCast(bld->one, int_vec_type);
+ one = LLVMConstBitCast(bld->one, int_type);
res = LLVMBuildOr(bld->builder, sign, one, "");
res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
}
@@ -687,12 +770,74 @@ lp_build_sgn(struct lp_build_context *bld,
/* Handle zero */
cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero);
- res = lp_build_select(bld, cond, bld->zero, bld->one);
+ res = lp_build_select(bld, cond, bld->zero, res);
return res;
}
+/**
+ * Set the sign of float vector 'a' according to 'sign'.
+ * If sign==0, return abs(a).
+ * If sign==1, return -abs(a);
+ * Other values for sign produce undefined results.
+ */
+LLVMValueRef
+lp_build_set_sign(struct lp_build_context *bld,
+ LLVMValueRef a, LLVMValueRef sign)
+{
+ const struct lp_type type = bld->type;
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMValueRef shift = lp_build_int_const_scalar(type, type.width - 1);
+ LLVMValueRef mask = lp_build_int_const_scalar(type,
+ ~((unsigned long long) 1 << (type.width - 1)));
+ LLVMValueRef val, res;
+
+ assert(type.floating);
+
+ /* val = reinterpret_cast<int>(a) */
+ val = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ /* val = val & mask */
+ val = LLVMBuildAnd(bld->builder, val, mask, "");
+ /* sign = sign << shift */
+ sign = LLVMBuildShl(bld->builder, sign, shift, "");
+ /* res = val | sign */
+ res = LLVMBuildOr(bld->builder, val, sign, "");
+ /* res = reinterpret_cast<float>(res) */
+ res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+
+ return res;
+}
+
+
+/**
+ * Convert vector of (or scalar) int to vector of (or scalar) float.
+ */
+LLVMValueRef
+lp_build_int_to_float(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const struct lp_type type = bld->type;
+
+ assert(type.floating);
+ /*assert(lp_check_value(type, a));*/
+
+ if (type.length == 1) {
+ LLVMTypeRef float_type = LLVMFloatType();
+ return LLVMBuildSIToFP(bld->builder, a, float_type, "");
+ }
+ else {
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ /*LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);*/
+ LLVMValueRef res;
+ res = LLVMBuildSIToFP(bld->builder, a, vec_type, "");
+ return res;
+ }
+}
+
+
+
enum lp_build_round_sse41_mode
{
LP_BUILD_ROUND_SSE41_NEAREST = 0,
@@ -784,6 +929,13 @@ lp_build_floor(struct lp_build_context *bld,
assert(type.floating);
+ if (type.length == 1) {
+ LLVMValueRef res;
+ res = lp_build_ifloor(bld, a);
+ res = LLVMBuildSIToFP(bld->builder, res, LLVMFloatType(), "");
+ return res;
+ }
+
if(util_cpu_caps.has_sse4_1)
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
else {
@@ -818,23 +970,45 @@ lp_build_ceil(struct lp_build_context *bld,
/**
+ * Return fractional part of 'a' computed as a - floor(f)
+ * Typically used in texture coord arithmetic.
+ */
+LLVMValueRef
+lp_build_fract(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ assert(bld->type.floating);
+ return lp_build_sub(bld, a, lp_build_floor(bld, a));
+}
+
+
+/**
* Convert to integer, through whichever rounding method that's fastest,
- * typically truncating to zero.
+ * typically truncating toward zero.
*/
LLVMValueRef
lp_build_itrunc(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
assert(type.floating);
- assert(lp_check_value(type, a));
- return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+ if (type.length == 1) {
+ LLVMTypeRef int_type = LLVMIntType(type.width);
+ return LLVMBuildFPToSI(bld->builder, a, int_type, "");
+ }
+ else {
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ assert(lp_check_value(type, a));
+ return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+ }
}
+/**
+ * Convert float[] to int[] with round().
+ */
LLVMValueRef
lp_build_iround(struct lp_build_context *bld,
LLVMValueRef a)
@@ -844,6 +1018,15 @@ lp_build_iround(struct lp_build_context *bld,
LLVMValueRef res;
assert(type.floating);
+
+ if (type.length == 1) {
+ /* scalar float to int */
+ LLVMTypeRef int_type = LLVMIntType(type.width);
+ /* XXX we want rounding here! */
+ res = LLVMBuildFPToSI(bld->builder, a, int_type, "");
+ return res;
+ }
+
assert(lp_check_value(type, a));
if(util_cpu_caps.has_sse4_1) {
@@ -886,6 +1069,14 @@ lp_build_ifloor(struct lp_build_context *bld,
LLVMValueRef res;
assert(type.floating);
+
+ if (type.length == 1) {
+ /* scalar float to int */
+ LLVMTypeRef int_type = LLVMIntType(type.width);
+ res = LLVMBuildFPToSI(bld->builder, a, int_type, "");
+ return res;
+ }
+
assert(lp_check_value(type, a));
if(util_cpu_caps.has_sse4_1) {
@@ -1112,6 +1303,7 @@ lp_build_polynomial(struct lp_build_context *bld,
unsigned num_coeffs)
{
const struct lp_type type = bld->type;
+ LLVMTypeRef float_type = LLVMFloatType();
LLVMValueRef res = NULL;
unsigned i;
@@ -1121,7 +1313,13 @@ lp_build_polynomial(struct lp_build_context *bld,
__FUNCTION__);
for (i = num_coeffs; i--; ) {
- LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]);
+ LLVMValueRef coeff;
+
+ if (type.length == 1)
+ coeff = LLVMConstReal(float_type, coeffs[i]);
+ else
+ coeff = lp_build_const_scalar(type, coeffs[i]);
+
if(res)
res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res));
else
@@ -1315,11 +1513,87 @@ lp_build_log2_approx(struct lp_build_context *bld,
}
+/** scalar version of above function */
+static void
+lp_build_float_log2_approx(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef *p_exp,
+ LLVMValueRef *p_floor_log2,
+ LLVMValueRef *p_log2)
+{
+ const struct lp_type type = bld->type;
+ LLVMTypeRef float_type = LLVMFloatType();
+ LLVMTypeRef int_type = LLVMIntType(type.width);
+
+ LLVMValueRef expmask = LLVMConstInt(int_type, 0x7f800000, 0);
+ LLVMValueRef mantmask = LLVMConstInt(int_type, 0x007fffff, 0);
+ LLVMValueRef one = LLVMConstBitCast(bld->one, int_type);
+
+ LLVMValueRef i = NULL;
+ LLVMValueRef exp = NULL;
+ LLVMValueRef mant = NULL;
+ LLVMValueRef logexp = NULL;
+ LLVMValueRef logmant = NULL;
+ LLVMValueRef res = NULL;
+
+ if(p_exp || p_floor_log2 || p_log2) {
+ /* TODO: optimize the constant case */
+ if(LLVMIsConstant(x))
+ debug_printf("%s: inefficient/imprecise constant arithmetic\n",
+ __FUNCTION__);
+
+ assert(type.floating && type.width == 32);
+
+ i = LLVMBuildBitCast(bld->builder, x, int_type, "");
+
+ /* exp = (float) exponent(x) */
+ exp = LLVMBuildAnd(bld->builder, i, expmask, "");
+ }
+
+ if(p_floor_log2 || p_log2) {
+ LLVMValueRef c23 = LLVMConstInt(int_type, 23, 0);
+ LLVMValueRef c127 = LLVMConstInt(int_type, 127, 0);
+ logexp = LLVMBuildLShr(bld->builder, exp, c23, "");
+ logexp = LLVMBuildSub(bld->builder, logexp, c127, "");
+ logexp = LLVMBuildSIToFP(bld->builder, logexp, float_type, "");
+ }
+
+ if(p_log2) {
+ /* mant = (float) mantissa(x) */
+ mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
+ mant = LLVMBuildOr(bld->builder, mant, one, "");
+ mant = LLVMBuildBitCast(bld->builder, mant, float_type, "");
+
+ logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
+ Elements(lp_build_log2_polynomial));
+
+ /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
+ logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), "");
+
+ res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
+ }
+
+ if(p_exp)
+ *p_exp = exp;
+
+ if(p_floor_log2)
+ *p_floor_log2 = logexp;
+
+ if(p_log2)
+ *p_log2 = res;
+}
+
+
LLVMValueRef
lp_build_log2(struct lp_build_context *bld,
LLVMValueRef x)
{
LLVMValueRef res;
- lp_build_log2_approx(bld, x, NULL, NULL, &res);
+ if (bld->type.length == 1) {
+ lp_build_float_log2_approx(bld, x, NULL, NULL, &res);
+ }
+ else {
+ lp_build_log2_approx(bld, x, NULL, NULL, &res);
+ }
return res;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
index 62be4b9aee..7a10fe1220 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -37,7 +37,7 @@
#define LP_BLD_ARIT_H
-#include <llvm-c/Core.h>
+#include "os/os_llvm.h"
struct lp_type;
@@ -57,6 +57,10 @@ lp_build_add(struct lp_build_context *bld,
LLVMValueRef b);
LLVMValueRef
+lp_build_sum_vector(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
lp_build_sub(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b);
@@ -107,14 +111,32 @@ lp_build_max(struct lp_build_context *bld,
LLVMValueRef b);
LLVMValueRef
+lp_build_clamp(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef min,
+ LLVMValueRef max);
+
+LLVMValueRef
lp_build_abs(struct lp_build_context *bld,
LLVMValueRef a);
LLVMValueRef
+lp_build_negate(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
lp_build_sgn(struct lp_build_context *bld,
LLVMValueRef a);
LLVMValueRef
+lp_build_set_sign(struct lp_build_context *bld,
+ LLVMValueRef a, LLVMValueRef sign);
+
+LLVMValueRef
+lp_build_int_to_float(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
lp_build_round(struct lp_build_context *bld,
LLVMValueRef a);
@@ -131,6 +153,10 @@ lp_build_trunc(struct lp_build_context *bld,
LLVMValueRef a);
LLVMValueRef
+lp_build_fract(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
lp_build_ifloor(struct lp_build_context *bld,
LLVMValueRef a);
LLVMValueRef
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_blend.h b/src/gallium/auxiliary/gallivm/lp_bld_blend.h
index da272e549f..5a9e1c1fb2 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_blend.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_blend.h
@@ -40,7 +40,7 @@
* for a standalone example.
*/
-#include <llvm-c/Core.h>
+#include "os/os_llvm.h"
#include "pipe/p_format.h"
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c
index c8eaa8c394..8a275fa72f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c
@@ -221,8 +221,16 @@ lp_build_undef(struct lp_type type)
LLVMValueRef
lp_build_zero(struct lp_type type)
{
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- return LLVMConstNull(vec_type);
+ if (type.length == 1) {
+ if (type.floating)
+ return LLVMConstReal(LLVMFloatType(), 0.0);
+ else
+ return LLVMConstInt(LLVMIntType(type.width), 0, 0);
+ }
+ else {
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ return LLVMConstNull(vec_type);
+ }
}
@@ -264,10 +272,16 @@ lp_build_one(struct lp_type type)
for(i = 1; i < type.length; ++i)
elems[i] = elems[0];
- return LLVMConstVector(elems, type.length);
+ if (type.length == 1)
+ return elems[0];
+ else
+ return LLVMConstVector(elems, type.length);
}
+/**
+ * Build constant-valued vector from a scalar value.
+ */
LLVMValueRef
lp_build_const_scalar(struct lp_type type,
double val)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h
index cb8e1c7b00..4078636103 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h
@@ -37,7 +37,7 @@
#define LP_BLD_CONST_H
-#include <llvm-c/Core.h>
+#include "os/os_llvm.h"
#include <pipe/p_compiler.h>
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.h b/src/gallium/auxiliary/gallivm/lp_bld_conv.h
index 948e68fae4..78e8155ff7 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.h
@@ -37,7 +37,7 @@
#define LP_BLD_CONV_H
-#include <llvm-c/Core.h>
+#include "os/os_llvm.h"
struct lp_type;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
index 583e6132b4..441ad94786 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
@@ -30,7 +30,7 @@
#define LP_BLD_DEBUG_H
-#include <llvm-c/Core.h>
+#include "os/os_llvm.h"
#include "pipe/p_compiler.h"
#include "util/u_string.h"
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_depth.c b/src/gallium/auxiliary/gallivm/lp_bld_depth.c
index d438c0e63d..f08f8eb6d8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_depth.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_depth.c
@@ -171,7 +171,7 @@ lp_build_depth_test(LLVMBuilderRef builder,
unsigned padding_right;
unsigned chan;
- assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+ assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED);
assert(format_desc->channel[z_swizzle].size <= format_desc->block.bits);
assert(format_desc->channel[z_swizzle].normalized);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_depth.h b/src/gallium/auxiliary/gallivm/lp_bld_depth.h
index 79d6981bb5..8be80024ae 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_depth.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_depth.h
@@ -36,7 +36,7 @@
#define LP_BLD_DEPTH_H
-#include <llvm-c/Core.h>
+#include "os/os_llvm.h"
struct pipe_depth_state;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
index bc83138908..106fc03e46 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
@@ -308,7 +308,7 @@ lp_build_flow_scope_end(struct lp_build_flow_context *flow)
* Note: this function has no dependencies on the flow code and could
* be used elsewhere.
*/
-static LLVMBasicBlockRef
+LLVMBasicBlockRef
lp_build_insert_new_block(LLVMBuilderRef builder, const char *name)
{
LLVMBasicBlockRef current_block;
@@ -648,7 +648,9 @@ lp_build_if(struct lp_build_if_state *ctx,
ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), "");
/* add add the initial value of the var from the entry block */
- LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->entry_block, 1);
+ if (!LLVMIsUndef(*flow->variables[i]))
+ LLVMAddIncoming(ifthen->phi[i], flow->variables[i],
+ &ifthen->entry_block, 1);
}
/* create/insert true_block before merge_block */
@@ -695,18 +697,21 @@ lp_build_endif(struct lp_build_if_state *ctx)
{
struct lp_build_flow_context *flow = ctx->flow;
struct lp_build_flow_if *ifthen;
+ LLVMBasicBlockRef curBlock = LLVMGetInsertBlock(ctx->builder);
unsigned i;
ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen;
assert(ifthen);
+ /* Insert branch to the merge block from current block */
+ LLVMBuildBr(ctx->builder, ifthen->merge_block);
+
if (ifthen->false_block) {
LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
/* for each variable, update the Phi node with a (variable, block) pair */
for (i = 0; i < flow->num_variables; i++) {
assert(*flow->variables[i]);
- LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->false_block, 1);
-
+ LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &curBlock, 1);
/* replace the variable ref with the phi function */
*flow->variables[i] = ifthen->phi[i];
}
@@ -742,15 +747,18 @@ lp_build_endif(struct lp_build_if_state *ctx)
ifthen->true_block, ifthen->merge_block);
}
- /* Append an unconditional Br(anch) instruction on the true_block */
- LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block);
- LLVMBuildBr(ctx->builder, ifthen->merge_block);
+ /* Insert branch from end of true_block to merge_block */
if (ifthen->false_block) {
- /* Append an unconditional Br(anch) instruction on the false_block */
- LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block);
+ /* Append an unconditional Br(anch) instruction on the true_block */
+ LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block);
LLVMBuildBr(ctx->builder, ifthen->merge_block);
}
-
+ else {
+ /* No else clause.
+ * Note that we've already inserted the branch at the end of
+ * true_block. See the very first LLVMBuildBr() call in this function.
+ */
+ }
/* Resume building code at end of the ifthen->merge_block */
LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
index 4c225a0d4f..e158836549 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
@@ -35,7 +35,7 @@
#define LP_BLD_FLOW_H
-#include <llvm-c/Core.h>
+#include "os/os_llvm.h"
struct lp_type;
@@ -145,7 +145,9 @@ lp_build_else(struct lp_build_if_state *ctx);
void
lp_build_endif(struct lp_build_if_state *ctx);
-
+
+LLVMBasicBlockRef
+lp_build_insert_new_block(LLVMBuilderRef builder, const char *name);
#endif /* !LP_BLD_FLOW_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h
index 970bee379f..8972c0dc17 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h
@@ -34,7 +34,7 @@
* Pixel format helpers.
*/
-#include <llvm-c/Core.h>
+#include "os/os_llvm.h"
#include "pipe/p_format.h"
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index dfa080b853..a07f7418f2 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -70,7 +70,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
unsigned i;
/* FIXME: Support more formats */
- assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+ assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
assert(desc->block.width == 1);
assert(desc->block.height == 1);
assert(desc->block.bits <= 32);
@@ -189,7 +189,7 @@ lp_build_unpack_rgba8_aos(LLVMBuilderRef builder,
lp_build_context_init(&bld, builder, type);
/* FIXME: Support more formats */
- assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+ assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
assert(desc->block.width == 1);
assert(desc->block.height == 1);
assert(desc->block.bits <= 32);
@@ -303,7 +303,7 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
unsigned shift;
unsigned i, j;
- assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+ assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
assert(desc->block.width == 1);
assert(desc->block.height == 1);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index 64151d169d..abb27e4c32 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -92,9 +92,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
unsigned chan;
/* FIXME: Support more formats */
- assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH ||
- (format_desc->layout == UTIL_FORMAT_LAYOUT_ARRAY &&
- format_desc->block.bits == format_desc->channel[0].size));
+ assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
assert(format_desc->block.width == 1);
assert(format_desc->block.height == 1);
assert(format_desc->block.bits <= 32);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_init.cpp
index 6e79438ead..067397a520 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.cpp
@@ -26,39 +26,34 @@
**************************************************************************/
-#include "pipe/p_config.h"
+#include <llvm/Config/config.h>
+#include <llvm/Target/TargetSelect.h>
+#include <llvm/Target/TargetOptions.h>
-#include "lp_bld_misc.h"
+#include "pipe/p_config.h"
+#include "lp_bld_init.h"
-#ifndef LLVM_NATIVE_ARCH
-namespace llvm {
- extern void LinkInJIT();
-}
+extern "C" void LLVMLinkInJIT();
-void
-LLVMLinkInJIT(void)
+extern "C" void
+lp_build_init(void)
{
- llvm::LinkInJIT();
-}
-
-
-extern "C" int X86TargetMachineModule;
-
-
-int
-LLVMInitializeNativeTarget(void)
-{
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- X86TargetMachineModule = 1;
+#if defined(PIPE_OS_WINDOWS) && defined(PIPE_ARCH_X86)
+ /*
+ * This is mis-detected on some hardware / software combinations.
+ */
+ llvm::StackAlignment = 4;
+ llvm::RealignStack = true;
#endif
- return 0;
-}
+ /* Same as LLVMInitializeNativeTarget(); */
+ llvm::InitializeNativeTarget();
-#endif
+ LLVMLinkInJIT();
+}
/*
@@ -69,7 +64,6 @@ LLVMInitializeNativeTarget(void)
*/
#if defined(_MSC_VER) && defined(_DEBUG)
#include <crtdefs.h>
-extern "C" {
- _CRTIMP void __cdecl _invalid_parameter_noinfo(void) {}
-}
+extern "C" _CRTIMP void __cdecl
+_invalid_parameter_noinfo(void) {}
#endif
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h
index 0e787e0b9c..07f50d1c43 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h
@@ -26,26 +26,17 @@
**************************************************************************/
-#ifndef LP_BLD_MISC_H
-#define LP_BLD_MISC_H
+#ifndef LP_BLD_INIT_H
+#define LP_BLD_INIT_H
-#include "llvm/Config/config.h"
-
#ifdef __cplusplus
extern "C" {
#endif
-#ifndef LLVM_NATIVE_ARCH
-
void
-LLVMLinkInJIT(void);
-
-int
-LLVMInitializeNativeTarget(void);
-
-#endif /* !LLVM_NATIVE_ARCH */
+lp_build_init(void);
#ifdef __cplusplus
@@ -53,4 +44,4 @@ LLVMInitializeNativeTarget(void);
#endif
-#endif /* !LP_BLD_MISC_H */
+#endif /* !LP_BLD_INIT_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_interp.h b/src/gallium/auxiliary/gallivm/lp_bld_interp.h
index ca958cdf34..177b5e943e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_interp.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_interp.h
@@ -41,7 +41,7 @@
#define LP_BLD_INTERP_H
-#include <llvm-c/Core.h>
+#include "os/os_llvm.h"
#include "tgsi/tgsi_exec.h"
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
index f813f27074..7d5506c733 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
@@ -37,7 +37,7 @@
#define LP_BLD_INTR_H
-#include <llvm-c/Core.h>
+#include "os/os_llvm.h"
/**
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index 41ac81b744..f3df3dd138 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -42,9 +42,30 @@
#include "lp_bld_logic.h"
+/*
+ * XXX
+ *
+ * Selection with vector conditional like
+ *
+ * select <4 x i1> %C, %A, %B
+ *
+ * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not
+ * supported on any backend.
+ *
+ * Expanding the boolean vector to full SIMD register width, as in
+ *
+ * sext <4 x i1> %C to <4 x i32>
+ *
+ * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
+ * it causes assertion failures in LLVM 2.6. It appears to work correctly on
+ * LLVM 2.7.
+ */
+
+
/**
* Build code to compare two values 'a' and 'b' of 'type' using the given func.
* \param func one of PIPE_FUNC_x
+ * The result values will be 0 for false or ~0 for true.
*/
LLVMValueRef
lp_build_compare(LLVMBuilderRef builder,
@@ -53,13 +74,11 @@ lp_build_compare(LLVMBuilderRef builder,
LLVMValueRef a,
LLVMValueRef b)
{
- LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
LLVMValueRef zeros = LLVMConstNull(int_vec_type);
LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
LLVMValueRef cond;
LLVMValueRef res;
- unsigned i;
assert(func >= PIPE_FUNC_NEVER);
assert(func <= PIPE_FUNC_ALWAYS);
@@ -73,10 +92,12 @@ lp_build_compare(LLVMBuilderRef builder,
/* XXX: It is not clear if we should use the ordered or unordered operators */
+#if HAVE_LLVM < 0x0207
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
if(type.width * type.length == 128) {
if(type.floating && util_cpu_caps.has_sse) {
/* float[4] comparison */
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMValueRef args[3];
unsigned cc;
boolean swap;
@@ -146,6 +167,7 @@ lp_build_compare(LLVMBuilderRef builder,
const char *pcmpgt;
LLVMValueRef args[2];
LLVMValueRef res;
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
switch (type.width) {
case 8:
@@ -197,8 +219,9 @@ lp_build_compare(LLVMBuilderRef builder,
return res;
}
- }
+ } /* if (type.width * type.length == 128) */
#endif
+#endif /* HAVE_LLVM < 0x0207 */
if(type.floating) {
LLVMRealPredicate op;
@@ -232,25 +255,33 @@ lp_build_compare(LLVMBuilderRef builder,
return lp_build_undef(type);
}
-#if 0
- /* XXX: Although valid IR, no LLVM target currently support this */
+#if HAVE_LLVM >= 0x0207
cond = LLVMBuildFCmp(builder, op, a, b, "");
- res = LLVMBuildSelect(builder, cond, ones, zeros, "");
+ res = LLVMBuildSExt(builder, cond, int_vec_type, "");
#else
- debug_printf("%s: warning: using slow element-wise vector comparison\n",
- __FUNCTION__);
- res = LLVMGetUndef(int_vec_type);
- for(i = 0; i < type.length; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- cond = LLVMBuildFCmp(builder, op,
- LLVMBuildExtractElement(builder, a, index, ""),
- LLVMBuildExtractElement(builder, b, index, ""),
- "");
- cond = LLVMBuildSelect(builder, cond,
- LLVMConstExtractElement(ones, index),
- LLVMConstExtractElement(zeros, index),
- "");
- res = LLVMBuildInsertElement(builder, res, cond, index, "");
+ if (type.length == 1) {
+ cond = LLVMBuildFCmp(builder, op, a, b, "");
+ res = LLVMBuildSExt(builder, cond, int_vec_type, "");
+ }
+ else {
+ unsigned i;
+
+ res = LLVMGetUndef(int_vec_type);
+
+ debug_printf("%s: warning: using slow element-wise float"
+ " vector comparison\n", __FUNCTION__);
+ for (i = 0; i < type.length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ cond = LLVMBuildFCmp(builder, op,
+ LLVMBuildExtractElement(builder, a, index, ""),
+ LLVMBuildExtractElement(builder, b, index, ""),
+ "");
+ cond = LLVMBuildSelect(builder, cond,
+ LLVMConstExtractElement(ones, index),
+ LLVMConstExtractElement(zeros, index),
+ "");
+ res = LLVMBuildInsertElement(builder, res, cond, index, "");
+ }
}
#endif
}
@@ -280,25 +311,34 @@ lp_build_compare(LLVMBuilderRef builder,
return lp_build_undef(type);
}
-#if 0
- /* XXX: Although valid IR, no LLVM target currently support this */
+#if HAVE_LLVM >= 0x0207
cond = LLVMBuildICmp(builder, op, a, b, "");
- res = LLVMBuildSelect(builder, cond, ones, zeros, "");
+ res = LLVMBuildSExt(builder, cond, int_vec_type, "");
#else
- debug_printf("%s: warning: using slow element-wise int vector comparison\n",
- __FUNCTION__);
- res = LLVMGetUndef(int_vec_type);
- for(i = 0; i < type.length; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- cond = LLVMBuildICmp(builder, op,
- LLVMBuildExtractElement(builder, a, index, ""),
- LLVMBuildExtractElement(builder, b, index, ""),
- "");
- cond = LLVMBuildSelect(builder, cond,
- LLVMConstExtractElement(ones, index),
- LLVMConstExtractElement(zeros, index),
- "");
- res = LLVMBuildInsertElement(builder, res, cond, index, "");
+ if (type.length == 1) {
+ cond = LLVMBuildICmp(builder, op, a, b, "");
+ res = LLVMBuildSExt(builder, cond, int_vec_type, "");
+ }
+ else {
+ unsigned i;
+
+ res = LLVMGetUndef(int_vec_type);
+
+ debug_printf("%s: warning: using slow element-wise int"
+ " vector comparison\n", __FUNCTION__);
+
+ for(i = 0; i < type.length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ cond = LLVMBuildICmp(builder, op,
+ LLVMBuildExtractElement(builder, a, index, ""),
+ LLVMBuildExtractElement(builder, b, index, ""),
+ "");
+ cond = LLVMBuildSelect(builder, cond,
+ LLVMConstExtractElement(ones, index),
+ LLVMConstExtractElement(zeros, index),
+ "");
+ res = LLVMBuildInsertElement(builder, res, cond, index, "");
+ }
}
#endif
}
@@ -311,6 +351,7 @@ lp_build_compare(LLVMBuilderRef builder,
/**
* Build code to compare two values 'a' and 'b' using the given func.
* \param func one of PIPE_FUNC_x
+ * The result values will be 0 for false or ~0 for true.
*/
LLVMValueRef
lp_build_cmp(struct lp_build_context *bld,
@@ -322,6 +363,11 @@ lp_build_cmp(struct lp_build_context *bld,
}
+/**
+ * Return mask ? a : b;
+ *
+ * mask is a bitwise mask, composed of 0 or ~0 for each element.
+ */
LLVMValueRef
lp_build_select(struct lp_build_context *bld,
LLVMValueRef mask,
@@ -334,26 +380,32 @@ lp_build_select(struct lp_build_context *bld,
if(a == b)
return a;
- if(type.floating) {
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
- a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
- b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
+ if (type.length == 1) {
+ mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), "");
+ res = LLVMBuildSelect(bld->builder, mask, a, b, "");
}
+ else {
+ if(type.floating) {
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
+ }
- a = LLVMBuildAnd(bld->builder, a, mask, "");
+ a = LLVMBuildAnd(bld->builder, a, mask, "");
- /* This often gets translated to PANDN, but sometimes the NOT is
- * pre-computed and stored in another constant. The best strategy depends
- * on available registers, so it is not a big deal -- hopefully LLVM does
- * the right decision attending the rest of the program.
- */
- b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
+ /* This often gets translated to PANDN, but sometimes the NOT is
+ * pre-computed and stored in another constant. The best strategy depends
+ * on available registers, so it is not a big deal -- hopefully LLVM does
+ * the right decision attending the rest of the program.
+ */
+ b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
- res = LLVMBuildOr(bld->builder, a, b, "");
+ res = LLVMBuildOr(bld->builder, a, b, "");
- if(type.floating) {
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+ if(type.floating) {
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+ }
}
return res;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
index a399ebf39e..b54ec13b70 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
@@ -37,7 +37,7 @@
#define LP_BLD_LOGIC_H
-#include <llvm-c/Core.h>
+#include "os/os_llvm.h"
#include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
index bc360ad77a..23398f41f9 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -256,7 +256,9 @@ lp_build_pack2(LLVMBuilderRef builder,
LLVMValueRef lo,
LLVMValueRef hi)
{
+#if HAVE_LLVM < 0x0207
LLVMTypeRef src_vec_type = lp_build_vec_type(src_type);
+#endif
LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type);
LLVMValueRef shuffle;
LLVMValueRef res;
@@ -272,11 +274,14 @@ lp_build_pack2(LLVMBuilderRef builder,
switch(src_type.width) {
case 32:
if(dst_type.sign) {
+#if HAVE_LLVM >= 0x0207
+ res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", dst_vec_type, lo, hi);
+#else
res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi);
+#endif
}
else {
if (util_cpu_caps.has_sse4_1) {
- /* PACKUSDW is the only instrinsic with a consistent signature */
return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi);
}
else {
@@ -288,9 +293,17 @@ lp_build_pack2(LLVMBuilderRef builder,
case 16:
if(dst_type.sign)
+#if HAVE_LLVM >= 0x0207
+ res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", dst_vec_type, lo, hi);
+#else
res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi);
+#endif
else
+#if HAVE_LLVM >= 0x0207
+ res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", dst_vec_type, lo, hi);
+#else
res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi);
+#endif
break;
default:
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
index fb2a34984a..346a17d580 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
@@ -37,7 +37,7 @@
#define LP_BLD_PACK_H
-#include <llvm-c/Core.h>
+#include "os/os_llvm.h"
struct lp_type;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index a133b56ac5..2f74aa5e00 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -44,6 +44,11 @@
#include "lp_bld_sample.h"
+/**
+ * Initialize lp_sampler_static_state object with the gallium sampler
+ * and texture state.
+ * The former is considered to be static and the later dynamic.
+ */
void
lp_sampler_static_state(struct lp_sampler_static_state *state,
const struct pipe_texture *texture,
@@ -57,6 +62,18 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
if(!sampler)
return;
+ /*
+ * We don't copy sampler state over unless it is actually enabled, to avoid
+ * spurious recompiles, as the sampler static state is part of the shader
+ * key.
+ *
+ * Ideally the state tracker or cso_cache module would make all state
+ * canonical, but until that happens it's better to be safe than sorry here.
+ *
+ * XXX: Actually there's much more than can be done here, especially
+ * regarding 1D/2D/3D/CUBE textures, wrap modes, etc.
+ */
+
state->format = texture->format;
state->target = texture->target;
state->pot_width = util_is_pot(texture->width0);
@@ -67,13 +84,26 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
state->wrap_t = sampler->wrap_t;
state->wrap_r = sampler->wrap_r;
state->min_img_filter = sampler->min_img_filter;
- state->min_mip_filter = sampler->min_mip_filter;
state->mag_img_filter = sampler->mag_img_filter;
+ if (texture->last_level) {
+ state->min_mip_filter = sampler->min_mip_filter;
+ } else {
+ state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+ }
+
state->compare_mode = sampler->compare_mode;
- if(sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
- state->compare_func = sampler->compare_func;
+ if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
+ state->compare_func = sampler->compare_func;
}
+
state->normalized_coords = sampler->normalized_coords;
+ state->lod_bias = sampler->lod_bias;
+ state->min_lod = sampler->min_lod;
+ state->max_lod = sampler->max_lod;
+ state->border_color[0] = sampler->border_color[0];
+ state->border_color[1] = sampler->border_color[1];
+ state->border_color[2] = sampler->border_color[2];
+ state->border_color[3] = sampler->border_color[3];
}
@@ -129,15 +159,16 @@ lp_build_gather(LLVMBuilderRef builder,
/**
* Compute the offset of a pixel.
*
- * x, y, y_stride are vectors
+ * x, y, z, y_stride, z_stride are vectors
*/
LLVMValueRef
lp_build_sample_offset(struct lp_build_context *bld,
const struct util_format_description *format_desc,
LLVMValueRef x,
LLVMValueRef y,
+ LLVMValueRef z,
LLVMValueRef y_stride,
- LLVMValueRef data_ptr)
+ LLVMValueRef z_stride)
{
LLVMValueRef x_stride;
LLVMValueRef offset;
@@ -153,6 +184,10 @@ lp_build_sample_offset(struct lp_build_context *bld,
LLVMValueRef y_offset_lo, y_offset_hi;
LLVMValueRef offset_lo, offset_hi;
+ /* XXX 1D & 3D addressing not done yet */
+ assert(!z);
+ assert(!z_stride);
+
x_lo = LLVMBuildAnd(bld->builder, x, bld->one, "");
y_lo = LLVMBuildAnd(bld->builder, y, bld->one, "");
@@ -176,13 +211,17 @@ lp_build_sample_offset(struct lp_build_context *bld,
offset = lp_build_add(bld, offset_hi, offset_lo);
}
else {
- LLVMValueRef x_offset;
- LLVMValueRef y_offset;
+ offset = lp_build_mul(bld, x, x_stride);
- x_offset = lp_build_mul(bld, x, x_stride);
- y_offset = lp_build_mul(bld, y, y_stride);
+ if (y && y_stride) {
+ LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride);
+ offset = lp_build_add(bld, offset, y_offset);
+ }
- offset = lp_build_add(bld, x_offset, y_offset);
+ if (z && z_stride) {
+ LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride);
+ offset = lp_build_add(bld, offset, z_offset);
+ }
}
return offset;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index 39edcf13d1..7f08bfaac1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -36,7 +36,7 @@
#define LP_BLD_SAMPLE_H
-#include <llvm-c/Core.h>
+#include "os/os_llvm.h"
struct pipe_texture;
struct pipe_sampler_state;
@@ -70,6 +70,8 @@ struct lp_sampler_static_state
unsigned compare_mode:1;
unsigned compare_func:3;
unsigned normalized_coords:1;
+ float lod_bias, min_lod, max_lod;
+ float border_color[4];
};
@@ -98,10 +100,22 @@ struct lp_sampler_dynamic_state
LLVMBuilderRef builder,
unsigned unit);
+ /** Obtain the base texture depth. */
LLVMValueRef
- (*stride)( struct lp_sampler_dynamic_state *state,
- LLVMBuilderRef builder,
- unsigned unit);
+ (*depth)( struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder,
+ unsigned unit);
+
+ /** Obtain the number of mipmap levels (minus one). */
+ LLVMValueRef
+ (*last_level)( struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder,
+ unsigned unit);
+
+ LLVMValueRef
+ (*row_stride)( struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder,
+ unsigned unit);
LLVMValueRef
(*data_ptr)( struct lp_sampler_dynamic_state *state,
@@ -134,8 +148,9 @@ lp_build_sample_offset(struct lp_build_context *bld,
const struct util_format_description *format_desc,
LLVMValueRef x,
LLVMValueRef y,
+ LLVMValueRef z,
LLVMValueRef y_stride,
- LLVMValueRef data_ptr);
+ LLVMValueRef z_stride);
void
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index e268862282..9741dbb389 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -48,6 +48,7 @@
#include "lp_bld_logic.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_pack.h"
+#include "lp_bld_flow.h"
#include "lp_bld_format.h"
#include "lp_bld_sample.h"
@@ -65,11 +66,23 @@ struct lp_build_sample_context
const struct util_format_description *format_desc;
+ /** regular scalar float type */
+ struct lp_type float_type;
+ struct lp_build_context float_bld;
+
+ /** regular scalar float type */
+ struct lp_type int_type;
+ struct lp_build_context int_bld;
+
/** Incoming coordinates type and build context */
struct lp_type coord_type;
struct lp_build_context coord_bld;
- /** Integer coordinates */
+ /** Unsigned integer coordinates */
+ struct lp_type uint_coord_type;
+ struct lp_build_context uint_coord_bld;
+
+ /** Signed integer coordinates */
struct lp_type int_coord_type;
struct lp_build_context int_coord_bld;
@@ -79,36 +92,212 @@ struct lp_build_sample_context
};
+/**
+ * Does the given texture wrap mode allow sampling the texture border color?
+ * XXX maybe move this into gallium util code.
+ */
+static boolean
+wrap_mode_uses_border_color(unsigned mode)
+{
+ switch (mode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ return FALSE;
+ case PIPE_TEX_WRAP_CLAMP:
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ return TRUE;
+ default:
+ assert(0 && "unexpected wrap mode");
+ return FALSE;
+ }
+}
+
+
+static LLVMValueRef
+lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
+ LLVMValueRef data_array, LLVMValueRef level)
+{
+ LLVMValueRef indexes[2], data_ptr;
+ indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ indexes[1] = level;
+ data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
+ data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
+ return data_ptr;
+}
+
+
+static LLVMValueRef
+lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
+ LLVMValueRef data_array, int level)
+{
+ LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
+ return lp_build_get_mipmap_level(bld, data_array, lvl);
+}
+
+
+/**
+ * Dereference stride_array[mipmap_level] array to get a stride.
+ * Return stride as a vector.
+ */
+static LLVMValueRef
+lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
+ LLVMValueRef stride_array, LLVMValueRef level)
+{
+ LLVMValueRef indexes[2], stride;
+ indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ indexes[1] = level;
+ stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, "");
+ stride = LLVMBuildLoad(bld->builder, stride, "");
+ stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
+ return stride;
+}
+
+
+/** Dereference stride_array[0] array to get a stride (as vector). */
+static LLVMValueRef
+lp_build_get_const_level_stride_vec(struct lp_build_sample_context *bld,
+ LLVMValueRef stride_array, int level)
+{
+ LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
+ return lp_build_get_level_stride_vec(bld, stride_array, lvl);
+}
+
+
+static int
+texture_dims(enum pipe_texture_target tex)
+{
+ switch (tex) {
+ case PIPE_TEXTURE_1D:
+ return 1;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_CUBE:
+ return 2;
+ case PIPE_TEXTURE_3D:
+ return 3;
+ default:
+ assert(0 && "bad texture target in texture_dims()");
+ return 2;
+ }
+}
+
+
+
+/**
+ * Generate code to fetch a texel from a texture at int coords (x, y, z).
+ * The computation depends on whether the texture is 1D, 2D or 3D.
+ * The result, texel, will be:
+ * texel[0] = red values
+ * texel[1] = green values
+ * texel[2] = blue values
+ * texel[3] = alpha values
+ */
static void
lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
+ LLVMValueRef width,
+ LLVMValueRef height,
+ LLVMValueRef depth,
LLVMValueRef x,
LLVMValueRef y,
+ LLVMValueRef z,
LLVMValueRef y_stride,
+ LLVMValueRef z_stride,
LLVMValueRef data_ptr,
LLVMValueRef *texel)
{
+ const int dims = texture_dims(bld->static_state->target);
+ struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
LLVMValueRef offset;
LLVMValueRef packed;
+ LLVMValueRef use_border = NULL;
+
+ /* use_border = x < 0 || x >= width || y < 0 || y >= height */
+ if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) {
+ LLVMValueRef b1, b2;
+ b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
+ b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
+ use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
+ }
+
+ if (dims >= 2 && wrap_mode_uses_border_color(bld->static_state->wrap_t)) {
+ LLVMValueRef b1, b2;
+ b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
+ b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
+ if (use_border) {
+ use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
+ use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
+ }
+ else {
+ use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
+ }
+ }
+
+ if (dims == 3 && wrap_mode_uses_border_color(bld->static_state->wrap_r)) {
+ LLVMValueRef b1, b2;
+ b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
+ b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
+ if (use_border) {
+ use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
+ use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
+ }
+ else {
+ use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
+ }
+ }
- offset = lp_build_sample_offset(&bld->int_coord_bld,
+ /*
+ * Note: if we find an app which frequently samples the texture border
+ * we might want to implement a true conditional here to avoid sampling
+ * the texture whenever possible (since that's quite a bit of code).
+ * Ex:
+ * if (use_border) {
+ * texel = border_color;
+ * }
+ * else {
+ * texel = sample_texture(coord);
+ * }
+ * As it is now, we always sample the texture, then selectively replace
+ * the texel color results with the border color.
+ */
+
+ /* convert x,y,z coords to linear offset from start of texture, in bytes */
+ offset = lp_build_sample_offset(&bld->uint_coord_bld,
bld->format_desc,
- x, y, y_stride,
- data_ptr);
+ x, y, z, y_stride, z_stride);
assert(bld->format_desc->block.width == 1);
assert(bld->format_desc->block.height == 1);
assert(bld->format_desc->block.bits <= bld->texel_type.width);
+ /* gather the texels from the texture */
packed = lp_build_gather(bld->builder,
bld->texel_type.length,
bld->format_desc->block.bits,
bld->texel_type.width,
data_ptr, offset);
+ texel[0] = texel[1] = texel[2] = texel[3] = NULL;
+
+ /* convert texels to float rgba */
lp_build_unpack_rgba_soa(bld->builder,
bld->format_desc,
bld->texel_type,
packed, texel);
+
+ if (use_border) {
+ /* select texel color or border color depending on use_border */
+ int chan;
+ for (chan = 0; chan < 4; chan++) {
+ LLVMValueRef border_chan =
+ lp_build_const_scalar(bld->texel_type,
+ bld->static_state->border_color[chan]);
+ texel[chan] = lp_build_select(&bld->texel_bld, use_border,
+ border_chan, texel[chan]);
+ }
+ }
}
@@ -117,19 +306,22 @@ lp_build_sample_packed(struct lp_build_sample_context *bld,
LLVMValueRef x,
LLVMValueRef y,
LLVMValueRef y_stride,
- LLVMValueRef data_ptr)
+ LLVMValueRef data_array)
{
LLVMValueRef offset;
+ LLVMValueRef data_ptr;
- offset = lp_build_sample_offset(&bld->int_coord_bld,
+ offset = lp_build_sample_offset(&bld->uint_coord_bld,
bld->format_desc,
- x, y, y_stride,
- data_ptr);
+ x, y, NULL, y_stride, NULL);
assert(bld->format_desc->block.width == 1);
assert(bld->format_desc->block.height == 1);
assert(bld->format_desc->block.bits <= bld->texel_type.width);
+ /* get pointer to mipmap level 0 data */
+ data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
+
return lp_build_gather(bld->builder,
bld->texel_type.length,
bld->format_desc->block.bits,
@@ -138,17 +330,77 @@ lp_build_sample_packed(struct lp_build_sample_context *bld,
}
+/**
+ * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
+ */
static LLVMValueRef
-lp_build_sample_wrap(struct lp_build_sample_context *bld,
- LLVMValueRef coord,
- LLVMValueRef length,
- boolean is_pot,
- unsigned wrap_mode)
+lp_build_coord_mirror(struct lp_build_sample_context *bld,
+ LLVMValueRef coord)
{
+ struct lp_build_context *coord_bld = &bld->coord_bld;
+ struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+ LLVMValueRef fract, flr, isOdd;
+
+ /* fract = coord - floor(coord) */
+ fract = lp_build_sub(coord_bld, coord, lp_build_floor(coord_bld, coord));
+
+ /* flr = ifloor(coord); */
+ flr = lp_build_ifloor(coord_bld, coord);
+
+ /* isOdd = flr & 1 */
+ isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, "");
+
+ /* make coord positive or negative depending on isOdd */
+ coord = lp_build_set_sign(coord_bld, fract, isOdd);
+
+ /* convert isOdd to float */
+ isOdd = lp_build_int_to_float(coord_bld, isOdd);
+
+ /* add isOdd to coord */
+ coord = lp_build_add(coord_bld, coord, isOdd);
+
+ return coord;
+}
+
+
+/**
+ * We only support a few wrap modes in lp_build_sample_wrap_int() at this time.
+ * Return whether the given mode is supported by that function.
+ */
+static boolean
+is_simple_wrap_mode(unsigned mode)
+{
+ switch (mode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ case PIPE_TEX_WRAP_CLAMP:
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return TRUE;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ default:
+ return FALSE;
+ }
+}
+
+
+/**
+ * Build LLVM code for texture wrap mode, for scaled integer texcoords.
+ * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size
+ * \param length the texture size along one dimension
+ * \param is_pot if TRUE, length is a power of two
+ * \param wrap_mode one of PIPE_TEX_WRAP_x
+ */
+static LLVMValueRef
+lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
+ LLVMValueRef coord,
+ LLVMValueRef length,
+ boolean is_pot,
+ unsigned wrap_mode)
+{
+ struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
LLVMValueRef length_minus_one;
- length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
+ length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
switch(wrap_mode) {
case PIPE_TEX_WRAP_REPEAT:
@@ -161,12 +413,12 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld,
break;
case PIPE_TEX_WRAP_CLAMP:
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
coord = lp_build_min(int_coord_bld, coord, length_minus_one);
break;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
case PIPE_TEX_WRAP_MIRROR_REPEAT:
case PIPE_TEX_WRAP_MIRROR_CLAMP:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
@@ -174,8 +426,8 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld,
/* FIXME */
_debug_printf("llvmpipe: failed to translate texture wrap mode %s\n",
util_dump_tex_wrap(wrap_mode, TRUE));
- coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
- coord = lp_build_min(int_coord_bld, coord, length_minus_one);
+ coord = lp_build_max(uint_coord_bld, coord, uint_coord_bld->zero);
+ coord = lp_build_min(uint_coord_bld, coord, length_minus_one);
break;
default:
@@ -186,92 +438,1270 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld,
}
+/**
+ * Build LLVM code for texture wrap mode for linear filtering.
+ * \param x0_out returns first integer texcoord
+ * \param x1_out returns second integer texcoord
+ * \param weight_out returns linear interpolation weight
+ */
static void
-lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
- LLVMValueRef s,
- LLVMValueRef t,
- LLVMValueRef width,
- LLVMValueRef height,
- LLVMValueRef stride,
- LLVMValueRef data_ptr,
- LLVMValueRef *texel)
+lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
+ LLVMValueRef coord,
+ LLVMValueRef length,
+ boolean is_pot,
+ unsigned wrap_mode,
+ LLVMValueRef *x0_out,
+ LLVMValueRef *x1_out,
+ LLVMValueRef *weight_out)
{
- LLVMValueRef x;
- LLVMValueRef y;
+ struct lp_build_context *coord_bld = &bld->coord_bld;
+ struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+ struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
+ LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0);
+ LLVMValueRef half = lp_build_const_scalar(coord_bld->type, 0.5);
+ LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
+ LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
+ LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
+ LLVMValueRef coord0, coord1, weight;
+
+ switch(wrap_mode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ /* mul by size and subtract 0.5 */
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ coord = lp_build_sub(coord_bld, coord, half);
+ /* convert to int */
+ coord0 = lp_build_ifloor(coord_bld, coord);
+ coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one);
+ /* compute lerp weight */
+ weight = lp_build_fract(coord_bld, coord);
+ /* repeat wrap */
+ if (is_pot) {
+ coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
+ coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, "");
+ }
+ else {
+ /* Signed remainder won't give the right results for negative
+ * dividends but unsigned remainder does.*/
+ coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
+ coord1 = LLVMBuildURem(bld->builder, coord1, length, "");
+ }
+ break;
- x = lp_build_ifloor(&bld->coord_bld, s);
- y = lp_build_ifloor(&bld->coord_bld, t);
- lp_build_name(x, "tex.x.floor");
- lp_build_name(y, "tex.y.floor");
+ case PIPE_TEX_WRAP_CLAMP:
+ if (bld->static_state->normalized_coords) {
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ }
+ weight = lp_build_fract(coord_bld, coord);
+ coord0 = lp_build_clamp(coord_bld, coord, coord_bld->zero,
+ length_f_minus_one);
+ coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
+ coord1 = lp_build_clamp(coord_bld, coord1, coord_bld->zero,
+ length_f_minus_one);
+ coord0 = lp_build_ifloor(coord_bld, coord0);
+ coord1 = lp_build_ifloor(coord_bld, coord1);
+ break;
- x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s);
- y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t);
- lp_build_name(x, "tex.x.wrapped");
- lp_build_name(y, "tex.y.wrapped");
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ if (bld->static_state->normalized_coords) {
+ /* clamp to [0,1] */
+ coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one);
+ /* mul by tex size and subtract 0.5 */
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ coord = lp_build_sub(coord_bld, coord, half);
+ }
+ else {
+ LLVMValueRef min, max;
+ /* clamp to [0.5, length - 0.5] */
+ min = lp_build_const_scalar(coord_bld->type, 0.5F);
+ max = lp_build_sub(coord_bld, length_f, min);
+ coord = lp_build_clamp(coord_bld, coord, min, max);
+ }
+ /* compute lerp weight */
+ weight = lp_build_fract(coord_bld, coord);
+ /* coord0 = floor(coord); */
+ coord0 = lp_build_ifloor(coord_bld, coord);
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+ /* coord0 = max(coord0, 0) */
+ coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
+ /* coord1 = min(coord1, length-1) */
+ coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ {
+ LLVMValueRef min, max;
+ if (bld->static_state->normalized_coords) {
+ /* min = -1.0 / (2 * length) = -0.5 / length */
+ min = lp_build_mul(coord_bld,
+ lp_build_const_scalar(coord_bld->type, -0.5F),
+ lp_build_rcp(coord_bld, length_f));
+ /* max = 1.0 - min */
+ max = lp_build_sub(coord_bld, coord_bld->one, min);
+ /* coord = clamp(coord, min, max) */
+ coord = lp_build_clamp(coord_bld, coord, min, max);
+ /* scale coord to length (and sub 0.5?) */
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ coord = lp_build_sub(coord_bld, coord, half);
+ }
+ else {
+ /* clamp to [-0.5, length + 0.5] */
+ min = lp_build_const_scalar(coord_bld->type, -0.5F);
+ max = lp_build_sub(coord_bld, length_f, min);
+ coord = lp_build_clamp(coord_bld, coord, min, max);
+ coord = lp_build_sub(coord_bld, coord, half);
+ }
+ /* compute lerp weight */
+ weight = lp_build_fract(coord_bld, coord);
+ /* convert to int */
+ coord0 = lp_build_ifloor(coord_bld, coord);
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+ }
+ break;
+
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ /* compute mirror function */
+ coord = lp_build_coord_mirror(bld, coord);
+
+ /* scale coord to length */
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ coord = lp_build_sub(coord_bld, coord, half);
+
+ /* compute lerp weight */
+ weight = lp_build_fract(coord_bld, coord);
+
+ /* convert to int coords */
+ coord0 = lp_build_ifloor(coord_bld, coord);
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
- lp_build_sample_texel_soa(bld, x, y, stride, data_ptr, texel);
+ /* coord0 = max(coord0, 0) */
+ coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
+ /* coord1 = min(coord1, length-1) */
+ coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
+ break;
+
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ {
+ LLVMValueRef min, max;
+ /* min = 1.0 / (2 * length) */
+ min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
+ /* max = 1.0 - min */
+ max = lp_build_sub(coord_bld, coord_bld->one, min);
+
+ coord = lp_build_abs(coord_bld, coord);
+ coord = lp_build_clamp(coord_bld, coord, min, max);
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ if(0)coord = lp_build_sub(coord_bld, coord, half);
+ weight = lp_build_fract(coord_bld, coord);
+ coord0 = lp_build_ifloor(coord_bld, coord);
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+ }
+ break;
+
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ {
+ LLVMValueRef min, max;
+ /* min = 1.0 / (2 * length) */
+ min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
+ /* max = 1.0 - min */
+ max = lp_build_sub(coord_bld, coord_bld->one, min);
+
+ coord = lp_build_abs(coord_bld, coord);
+ coord = lp_build_clamp(coord_bld, coord, min, max);
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ coord = lp_build_sub(coord_bld, coord, half);
+ weight = lp_build_fract(coord_bld, coord);
+ coord0 = lp_build_ifloor(coord_bld, coord);
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+ }
+ break;
+
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ {
+ LLVMValueRef min, max;
+ /* min = -1.0 / (2 * length) = -0.5 / length */
+ min = lp_build_mul(coord_bld,
+ lp_build_const_scalar(coord_bld->type, -0.5F),
+ lp_build_rcp(coord_bld, length_f));
+ /* max = 1.0 - min */
+ max = lp_build_sub(coord_bld, coord_bld->one, min);
+
+ coord = lp_build_abs(coord_bld, coord);
+ coord = lp_build_clamp(coord_bld, coord, min, max);
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ coord = lp_build_sub(coord_bld, coord, half);
+ weight = lp_build_fract(coord_bld, coord);
+ coord0 = lp_build_ifloor(coord_bld, coord);
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+ }
+ break;
+
+ default:
+ assert(0);
+ coord0 = NULL;
+ coord1 = NULL;
+ weight = NULL;
+ }
+
+ *x0_out = coord0;
+ *x1_out = coord1;
+ *weight_out = weight;
}
+/**
+ * Build LLVM code for texture wrap mode for nearest filtering.
+ * \param coord the incoming texcoord (nominally in [0,1])
+ * \param length the texture size along one dimension, as int
+ * \param is_pot if TRUE, length is a power of two
+ * \param wrap_mode one of PIPE_TEX_WRAP_x
+ */
+static LLVMValueRef
+lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
+ LLVMValueRef coord,
+ LLVMValueRef length,
+ boolean is_pot,
+ unsigned wrap_mode)
+{
+ struct lp_build_context *coord_bld = &bld->coord_bld;
+ struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+ struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
+ LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0);
+ LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
+ LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
+ LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
+ LLVMValueRef icoord;
+
+ switch(wrap_mode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ icoord = lp_build_ifloor(coord_bld, coord);
+ if (is_pot)
+ icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, "");
+ else
+ /* Signed remainder won't give the right results for negative
+ * dividends but unsigned remainder does.*/
+ icoord = LLVMBuildURem(bld->builder, icoord, length, "");
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP:
+ /* mul by size */
+ if (bld->static_state->normalized_coords) {
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ }
+ /* floor */
+ icoord = lp_build_ifloor(coord_bld, coord);
+ /* clamp to [0, size-1]. Note: int coord builder type */
+ icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
+ length_minus_one);
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ {
+ LLVMValueRef min, max;
+ if (bld->static_state->normalized_coords) {
+ /* min = 1.0 / (2 * length) */
+ min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
+ /* max = length - min */
+ max = lp_build_sub(coord_bld, length_f, min);
+ /* scale coord to length */
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ }
+ else {
+ /* clamp to [0.5, length - 0.5] */
+ min = lp_build_const_scalar(coord_bld->type, 0.5F);
+ max = lp_build_sub(coord_bld, length_f, min);
+ }
+ /* coord = clamp(coord, min, max) */
+ coord = lp_build_clamp(coord_bld, coord, min, max);
+ icoord = lp_build_ifloor(coord_bld, coord);
+ }
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ /* Note: this is the same as CLAMP_TO_EDGE, except min = -min */
+ {
+ LLVMValueRef min, max;
+ if (bld->static_state->normalized_coords) {
+ /* min = -1.0 / (2 * length) = -0.5 / length */
+ min = lp_build_mul(coord_bld,
+ lp_build_const_scalar(coord_bld->type, -0.5F),
+ lp_build_rcp(coord_bld, length_f));
+ /* max = length - min */
+ max = lp_build_sub(coord_bld, length_f, min);
+ /* scale coord to length */
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ }
+ else {
+ /* clamp to [-0.5, length + 0.5] */
+ min = lp_build_const_scalar(coord_bld->type, -0.5F);
+ max = lp_build_sub(coord_bld, length_f, min);
+ }
+ /* coord = clamp(coord, min, max) */
+ coord = lp_build_clamp(coord_bld, coord, min, max);
+ icoord = lp_build_ifloor(coord_bld, coord);
+ }
+ break;
+
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ {
+ LLVMValueRef min, max;
+ /* min = 1.0 / (2 * length) */
+ min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
+ /* max = length - min */
+ max = lp_build_sub(coord_bld, length_f, min);
+
+ /* compute mirror function */
+ coord = lp_build_coord_mirror(bld, coord);
+
+ /* scale coord to length */
+ coord = lp_build_mul(coord_bld, coord, length_f);
+
+ /* coord = clamp(coord, min, max) */
+ coord = lp_build_clamp(coord_bld, coord, min, max);
+ icoord = lp_build_ifloor(coord_bld, coord);
+ }
+ break;
+
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ coord = lp_build_abs(coord_bld, coord);
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f_minus_one);
+ icoord = lp_build_ifloor(coord_bld, coord);
+ break;
+
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ {
+ LLVMValueRef min, max;
+ /* min = 1.0 / (2 * length) */
+ min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
+ /* max = length - min */
+ max = lp_build_sub(coord_bld, length_f, min);
+
+ coord = lp_build_abs(coord_bld, coord);
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ coord = lp_build_clamp(coord_bld, coord, min, max);
+ icoord = lp_build_ifloor(coord_bld, coord);
+ }
+ break;
+
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ {
+ LLVMValueRef min, max;
+ /* min = 1.0 / (2 * length) */
+ min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
+ min = lp_build_negate(coord_bld, min);
+ /* max = length - min */
+ max = lp_build_sub(coord_bld, length_f, min);
+
+ coord = lp_build_abs(coord_bld, coord);
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ coord = lp_build_clamp(coord_bld, coord, min, max);
+ icoord = lp_build_ifloor(coord_bld, coord);
+ }
+ break;
+
+ default:
+ assert(0);
+ icoord = NULL;
+ }
+
+ return icoord;
+}
+
+
+/**
+ * Codegen equivalent for u_minify().
+ * Return max(1, base_size >> level);
+ */
+static LLVMValueRef
+lp_build_minify(struct lp_build_sample_context *bld,
+ LLVMValueRef base_size,
+ LLVMValueRef level)
+{
+ LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify");
+ size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
+ return size;
+}
+
+
+/**
+ * Generate code to compute texture level of detail (lambda).
+ * \param s vector of texcoord s values
+ * \param t vector of texcoord t values
+ * \param r vector of texcoord r values
+ * \param width scalar int texture width
+ * \param height scalar int texture height
+ * \param depth scalar int texture depth
+ */
+static LLVMValueRef
+lp_build_lod_selector(struct lp_build_sample_context *bld,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ LLVMValueRef width,
+ LLVMValueRef height,
+ LLVMValueRef depth)
+
+{
+ if (bld->static_state->min_lod == bld->static_state->max_lod) {
+ /* User is forcing sampling from a particular mipmap level.
+ * This is hit during mipmap generation.
+ */
+ return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
+ }
+ else {
+ const int dims = texture_dims(bld->static_state->target);
+ struct lp_build_context *float_bld = &bld->float_bld;
+ LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(),
+ bld->static_state->lod_bias);
+ LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(),
+ bld->static_state->min_lod);
+ LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
+ bld->static_state->max_lod);
+
+ LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
+ LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
+
+ LLVMValueRef s0, s1, s2;
+ LLVMValueRef t0, t1, t2;
+ LLVMValueRef r0, r1, r2;
+ LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
+ LLVMValueRef rho, lod;
+
+ /*
+ * dsdx = abs(s[1] - s[0]);
+ * dsdy = abs(s[2] - s[0]);
+ * dtdx = abs(t[1] - t[0]);
+ * dtdy = abs(t[2] - t[0]);
+ * drdx = abs(r[1] - r[0]);
+ * drdy = abs(r[2] - r[0]);
+ * XXX we're assuming a four-element quad in 2x2 layout here.
+ */
+ s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
+ s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
+ s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
+ dsdx = LLVMBuildSub(bld->builder, s1, s0, "");
+ dsdx = lp_build_abs(float_bld, dsdx);
+ dsdy = LLVMBuildSub(bld->builder, s2, s0, "");
+ dsdy = lp_build_abs(float_bld, dsdy);
+ if (dims > 1) {
+ t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
+ t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
+ t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
+ dtdx = LLVMBuildSub(bld->builder, t1, t0, "");
+ dtdx = lp_build_abs(float_bld, dtdx);
+ dtdy = LLVMBuildSub(bld->builder, t2, t0, "");
+ dtdy = lp_build_abs(float_bld, dtdy);
+ if (dims > 2) {
+ r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
+ r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
+ r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
+ drdx = LLVMBuildSub(bld->builder, r1, r0, "");
+ drdx = lp_build_abs(float_bld, drdx);
+ drdy = LLVMBuildSub(bld->builder, r2, r0, "");
+ drdy = lp_build_abs(float_bld, drdy);
+ }
+ }
+
+ /* Compute rho = max of all partial derivatives scaled by texture size.
+ * XXX this could be vectorized somewhat
+ */
+ rho = LLVMBuildMul(bld->builder,
+ lp_build_max(float_bld, dsdx, dsdy),
+ lp_build_int_to_float(float_bld, width), "");
+ if (dims > 1) {
+ LLVMValueRef max;
+ max = LLVMBuildMul(bld->builder,
+ lp_build_max(float_bld, dtdx, dtdy),
+ lp_build_int_to_float(float_bld, height), "");
+ rho = lp_build_max(float_bld, rho, max);
+ if (dims > 2) {
+ max = LLVMBuildMul(bld->builder,
+ lp_build_max(float_bld, drdx, drdy),
+ lp_build_int_to_float(float_bld, depth), "");
+ rho = lp_build_max(float_bld, rho, max);
+ }
+ }
+
+ /* compute lod = log2(rho) */
+ lod = lp_build_log2(float_bld, rho);
+
+ /* add lod bias */
+ lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias");
+
+ /* clamp lod */
+ lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
+
+ return lod;
+ }
+}
+
+
+/**
+ * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
+ * mipmap level index.
+ * Note: this is all scalar code.
+ * \param lod scalar float texture level of detail
+ * \param level_out returns integer
+ */
static void
-lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
+lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef lod,
+ LLVMValueRef *level_out)
+{
+ struct lp_build_context *float_bld = &bld->float_bld;
+ struct lp_build_context *int_bld = &bld->int_bld;
+ LLVMValueRef last_level, level;
+
+ LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
+
+ last_level = bld->dynamic_state->last_level(bld->dynamic_state,
+ bld->builder, unit);
+
+ /* convert float lod to integer */
+ level = lp_build_iround(float_bld, lod);
+
+ /* clamp level to legal range of levels */
+ *level_out = lp_build_clamp(int_bld, level, zero, last_level);
+}
+
+
+/**
+ * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
+ * two (adjacent) mipmap level indexes. Later, we'll sample from those
+ * two mipmap levels and interpolate between them.
+ */
+static void
+lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef lod,
+ LLVMValueRef *level0_out,
+ LLVMValueRef *level1_out,
+ LLVMValueRef *weight_out)
+{
+ struct lp_build_context *float_bld = &bld->float_bld;
+ struct lp_build_context *int_bld = &bld->int_bld;
+ LLVMValueRef last_level, level;
+
+ last_level = bld->dynamic_state->last_level(bld->dynamic_state,
+ bld->builder, unit);
+
+ /* convert float lod to integer */
+ level = lp_build_ifloor(float_bld, lod);
+
+ /* compute level 0 and clamp to legal range of levels */
+ *level0_out = lp_build_clamp(int_bld, level,
+ int_bld->zero,
+ last_level);
+ /* compute level 1 and clamp to legal range of levels */
+ *level1_out = lp_build_add(int_bld, *level0_out, int_bld->one);
+ *level1_out = lp_build_min(int_bld, *level1_out, last_level);
+
+ *weight_out = lp_build_fract(float_bld, lod);
+}
+
+
+/**
+ * Generate code to sample a mipmap level with nearest filtering.
+ * If sampling a cube texture, r = cube face in [0,5].
+ */
+static void
+lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
+ LLVMValueRef width_vec,
+ LLVMValueRef height_vec,
+ LLVMValueRef depth_vec,
+ LLVMValueRef row_stride_vec,
+ LLVMValueRef img_stride_vec,
+ LLVMValueRef data_ptr,
LLVMValueRef s,
LLVMValueRef t,
- LLVMValueRef width,
- LLVMValueRef height,
- LLVMValueRef stride,
- LLVMValueRef data_ptr,
- LLVMValueRef *texel)
+ LLVMValueRef r,
+ LLVMValueRef colors_out[4])
{
- LLVMValueRef half;
- LLVMValueRef s_ipart;
- LLVMValueRef t_ipart;
- LLVMValueRef s_fpart;
- LLVMValueRef t_fpart;
- LLVMValueRef x0, x1;
- LLVMValueRef y0, y1;
+ const int dims = texture_dims(bld->static_state->target);
+ LLVMValueRef x, y, z;
+
+ /*
+ * Compute integer texcoords.
+ */
+ x = lp_build_sample_wrap_nearest(bld, s, width_vec,
+ bld->static_state->pot_width,
+ bld->static_state->wrap_s);
+ lp_build_name(x, "tex.x.wrapped");
+
+ if (dims >= 2) {
+ y = lp_build_sample_wrap_nearest(bld, t, height_vec,
+ bld->static_state->pot_height,
+ bld->static_state->wrap_t);
+ lp_build_name(y, "tex.y.wrapped");
+
+ if (dims == 3) {
+ z = lp_build_sample_wrap_nearest(bld, r, depth_vec,
+ bld->static_state->pot_height,
+ bld->static_state->wrap_r);
+ lp_build_name(z, "tex.z.wrapped");
+ }
+ else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ z = r;
+ }
+ else {
+ z = NULL;
+ }
+ }
+ else {
+ y = z = NULL;
+ }
+
+ /*
+ * Get texture colors.
+ */
+ lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ x, y, z,
+ row_stride_vec, img_stride_vec,
+ data_ptr, colors_out);
+}
+
+
+/**
+ * Generate code to sample a mipmap level with linear filtering.
+ * If sampling a cube texture, r = cube face in [0,5].
+ */
+static void
+lp_build_sample_image_linear(struct lp_build_sample_context *bld,
+ LLVMValueRef width_vec,
+ LLVMValueRef height_vec,
+ LLVMValueRef depth_vec,
+ LLVMValueRef row_stride_vec,
+ LLVMValueRef img_stride_vec,
+ LLVMValueRef data_ptr,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ LLVMValueRef colors_out[4])
+{
+ const int dims = texture_dims(bld->static_state->target);
+ LLVMValueRef x0, y0, z0, x1, y1, z1;
+ LLVMValueRef s_fpart, t_fpart, r_fpart;
LLVMValueRef neighbors[2][2][4];
- unsigned chan;
+ int chan;
+
+ /*
+ * Compute integer texcoords.
+ */
+ lp_build_sample_wrap_linear(bld, s, width_vec,
+ bld->static_state->pot_width,
+ bld->static_state->wrap_s,
+ &x0, &x1, &s_fpart);
+ lp_build_name(x0, "tex.x0.wrapped");
+ lp_build_name(x1, "tex.x1.wrapped");
+
+ if (dims >= 2) {
+ lp_build_sample_wrap_linear(bld, t, height_vec,
+ bld->static_state->pot_height,
+ bld->static_state->wrap_t,
+ &y0, &y1, &t_fpart);
+ lp_build_name(y0, "tex.y0.wrapped");
+ lp_build_name(y1, "tex.y1.wrapped");
+
+ if (dims == 3) {
+ lp_build_sample_wrap_linear(bld, r, depth_vec,
+ bld->static_state->pot_depth,
+ bld->static_state->wrap_r,
+ &z0, &z1, &r_fpart);
+ lp_build_name(z0, "tex.z0.wrapped");
+ lp_build_name(z1, "tex.z1.wrapped");
+ }
+ else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ z0 = z1 = r; /* cube face */
+ r_fpart = NULL;
+ }
+ else {
+ z0 = z1 = NULL;
+ r_fpart = NULL;
+ }
+ }
+ else {
+ y0 = y1 = t_fpart = NULL;
+ z0 = z1 = r_fpart = NULL;
+ }
+
+ /*
+ * Get texture colors.
+ */
+ /* get x0/x1 texels */
+ lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ x0, y0, z0,
+ row_stride_vec, img_stride_vec,
+ data_ptr, neighbors[0][0]);
+ lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ x1, y0, z0,
+ row_stride_vec, img_stride_vec,
+ data_ptr, neighbors[0][1]);
+
+ if (dims == 1) {
+ /* Interpolate two samples from 1D image to produce one color */
+ for (chan = 0; chan < 4; chan++) {
+ colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
+ neighbors[0][0][chan],
+ neighbors[0][1][chan]);
+ }
+ }
+ else {
+ /* 2D/3D texture */
+ LLVMValueRef colors0[4];
+
+ /* get x0/x1 texels at y1 */
+ lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ x0, y1, z0,
+ row_stride_vec, img_stride_vec,
+ data_ptr, neighbors[1][0]);
+ lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ x1, y1, z0,
+ row_stride_vec, img_stride_vec,
+ data_ptr, neighbors[1][1]);
+
+ /* Bilinear interpolate the four samples from the 2D image / 3D slice */
+ for (chan = 0; chan < 4; chan++) {
+ colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
+ s_fpart, t_fpart,
+ neighbors[0][0][chan],
+ neighbors[0][1][chan],
+ neighbors[1][0][chan],
+ neighbors[1][1][chan]);
+ }
+
+ if (dims == 3) {
+ LLVMValueRef neighbors1[2][2][4];
+ LLVMValueRef colors1[4];
+
+ /* get x0/x1/y0/y1 texels at z1 */
+ lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ x0, y0, z1,
+ row_stride_vec, img_stride_vec,
+ data_ptr, neighbors1[0][0]);
+ lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ x1, y0, z1,
+ row_stride_vec, img_stride_vec,
+ data_ptr, neighbors1[0][1]);
+ lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ x0, y1, z1,
+ row_stride_vec, img_stride_vec,
+ data_ptr, neighbors1[1][0]);
+ lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ x1, y1, z1,
+ row_stride_vec, img_stride_vec,
+ data_ptr, neighbors1[1][1]);
+
+ /* Bilinear interpolate the four samples from the second Z slice */
+ for (chan = 0; chan < 4; chan++) {
+ colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
+ s_fpart, t_fpart,
+ neighbors1[0][0][chan],
+ neighbors1[0][1][chan],
+ neighbors1[1][0][chan],
+ neighbors1[1][1][chan]);
+ }
+
+ /* Linearly interpolate the two samples from the two 3D slices */
+ for (chan = 0; chan < 4; chan++) {
+ colors_out[chan] = lp_build_lerp(&bld->texel_bld,
+ r_fpart,
+ colors0[chan], colors1[chan]);
+ }
+ }
+ else {
+ /* 2D tex */
+ for (chan = 0; chan < 4; chan++) {
+ colors_out[chan] = colors0[chan];
+ }
+ }
+ }
+}
- half = lp_build_const_scalar(bld->coord_type, 0.5);
- s = lp_build_sub(&bld->coord_bld, s, half);
- t = lp_build_sub(&bld->coord_bld, t, half);
- s_ipart = lp_build_floor(&bld->coord_bld, s);
- t_ipart = lp_build_floor(&bld->coord_bld, t);
+/** Helper used by lp_build_cube_lookup() */
+static LLVMValueRef
+lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
+{
+ /* ima = -0.5 / abs(coord); */
+ LLVMValueRef negHalf = lp_build_const_scalar(coord_bld->type, -0.5);
+ LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
+ LLVMValueRef ima = lp_build_mul(coord_bld, negHalf,
+ lp_build_rcp(coord_bld, absCoord));
+ return ima;
+}
- s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart);
- t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart);
- x0 = lp_build_itrunc(&bld->coord_bld, s_ipart);
- y0 = lp_build_itrunc(&bld->coord_bld, t_ipart);
+/**
+ * Helper used by lp_build_cube_lookup()
+ * \param sign scalar +1 or -1
+ * \param coord float vector
+ * \param ima float vector
+ */
+static LLVMValueRef
+lp_build_cube_coord(struct lp_build_context *coord_bld,
+ LLVMValueRef sign, int negate_coord,
+ LLVMValueRef coord, LLVMValueRef ima)
+{
+ /* return negate(coord) * ima * sign + 0.5; */
+ LLVMValueRef half = lp_build_const_scalar(coord_bld->type, 0.5);
+ LLVMValueRef res;
- x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s);
- y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+ assert(negate_coord == +1 || negate_coord == -1);
- x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
- y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
+ if (negate_coord == -1) {
+ coord = lp_build_negate(coord_bld, coord);
+ }
- x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s);
- y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+ res = lp_build_mul(coord_bld, coord, ima);
+ if (sign) {
+ sign = lp_build_broadcast_scalar(coord_bld, sign);
+ res = lp_build_mul(coord_bld, res, sign);
+ }
+ res = lp_build_add(coord_bld, res, half);
+
+ return res;
+}
- lp_build_sample_texel_soa(bld, x0, y0, stride, data_ptr, neighbors[0][0]);
- lp_build_sample_texel_soa(bld, x1, y0, stride, data_ptr, neighbors[0][1]);
- lp_build_sample_texel_soa(bld, x0, y1, stride, data_ptr, neighbors[1][0]);
- lp_build_sample_texel_soa(bld, x1, y1, stride, data_ptr, neighbors[1][1]);
- /* TODO: Don't interpolate missing channels */
- for(chan = 0; chan < 4; ++chan) {
- texel[chan] = lp_build_lerp_2d(&bld->texel_bld,
- s_fpart, t_fpart,
- neighbors[0][0][chan],
- neighbors[0][1][chan],
- neighbors[1][0][chan],
- neighbors[1][1][chan]);
+/** Helper used by lp_build_cube_lookup()
+ * Return (major_coord >= 0) ? pos_face : neg_face;
+ */
+static LLVMValueRef
+lp_build_cube_face(struct lp_build_sample_context *bld,
+ LLVMValueRef major_coord,
+ unsigned pos_face, unsigned neg_face)
+{
+ LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
+ major_coord,
+ bld->float_bld.zero, "");
+ LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0);
+ LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0);
+ LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, "");
+ return res;
+}
+
+
+
+/**
+ * Generate code to do cube face selection and per-face texcoords.
+ */
+static void
+lp_build_cube_lookup(struct lp_build_sample_context *bld,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ LLVMValueRef *face,
+ LLVMValueRef *face_s,
+ LLVMValueRef *face_t)
+{
+ struct lp_build_context *float_bld = &bld->float_bld;
+ struct lp_build_context *coord_bld = &bld->coord_bld;
+ LLVMValueRef rx, ry, rz;
+ LLVMValueRef arx, ary, arz;
+ LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25);
+ LLVMValueRef arx_ge_ary, arx_ge_arz;
+ LLVMValueRef ary_ge_arx, ary_ge_arz;
+ LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;
+ LLVMValueRef rx_pos, ry_pos, rz_pos;
+
+ assert(bld->coord_bld.type.length == 4);
+
+ /*
+ * Use the average of the four pixel's texcoords to choose the face.
+ */
+ rx = lp_build_mul(float_bld, c25,
+ lp_build_sum_vector(&bld->coord_bld, s));
+ ry = lp_build_mul(float_bld, c25,
+ lp_build_sum_vector(&bld->coord_bld, t));
+ rz = lp_build_mul(float_bld, c25,
+ lp_build_sum_vector(&bld->coord_bld, r));
+
+ arx = lp_build_abs(float_bld, rx);
+ ary = lp_build_abs(float_bld, ry);
+ arz = lp_build_abs(float_bld, rz);
+
+ /*
+ * Compare sign/magnitude of rx,ry,rz to determine face
+ */
+ arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, "");
+ arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, "");
+ ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, "");
+ ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, "");
+
+ arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, "");
+ ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
+
+ rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, "");
+ ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, "");
+ rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, "");
+
+ {
+ struct lp_build_flow_context *flow_ctx;
+ struct lp_build_if_state if_ctx;
+
+ flow_ctx = lp_build_flow_create(bld->builder);
+ lp_build_flow_scope_begin(flow_ctx);
+
+ *face_s = bld->coord_bld.undef;
+ *face_t = bld->coord_bld.undef;
+ *face = bld->int_bld.undef;
+
+ lp_build_name(*face_s, "face_s");
+ lp_build_name(*face_t, "face_t");
+ lp_build_name(*face, "face");
+
+ lp_build_flow_scope_declare(flow_ctx, face_s);
+ lp_build_flow_scope_declare(flow_ctx, face_t);
+ lp_build_flow_scope_declare(flow_ctx, face);
+
+ lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
+ {
+ /* +/- X face */
+ LLVMValueRef sign = lp_build_sgn(float_bld, rx);
+ LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
+ *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
+ *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
+ *face = lp_build_cube_face(bld, rx,
+ PIPE_TEX_FACE_POS_X,
+ PIPE_TEX_FACE_NEG_X);
+ }
+ lp_build_else(&if_ctx);
+ {
+ struct lp_build_flow_context *flow_ctx2;
+ struct lp_build_if_state if_ctx2;
+
+ LLVMValueRef face_s2 = bld->coord_bld.undef;
+ LLVMValueRef face_t2 = bld->coord_bld.undef;
+ LLVMValueRef face2 = bld->int_bld.undef;
+
+ flow_ctx2 = lp_build_flow_create(bld->builder);
+ lp_build_flow_scope_begin(flow_ctx2);
+ lp_build_flow_scope_declare(flow_ctx2, &face_s2);
+ lp_build_flow_scope_declare(flow_ctx2, &face_t2);
+ lp_build_flow_scope_declare(flow_ctx2, &face2);
+
+ ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
+
+ lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
+ {
+ /* +/- Y face */
+ LLVMValueRef sign = lp_build_sgn(float_bld, ry);
+ LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
+ face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
+ face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
+ face2 = lp_build_cube_face(bld, ry,
+ PIPE_TEX_FACE_POS_Y,
+ PIPE_TEX_FACE_NEG_Y);
+ }
+ lp_build_else(&if_ctx2);
+ {
+ /* +/- Z face */
+ LLVMValueRef sign = lp_build_sgn(float_bld, rz);
+ LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
+ face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
+ face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
+ face2 = lp_build_cube_face(bld, rz,
+ PIPE_TEX_FACE_POS_Z,
+ PIPE_TEX_FACE_NEG_Z);
+ }
+ lp_build_endif(&if_ctx2);
+ lp_build_flow_scope_end(flow_ctx2);
+ lp_build_flow_destroy(flow_ctx2);
+
+ *face_s = face_s2;
+ *face_t = face_t2;
+ *face = face2;
+ }
+
+ lp_build_endif(&if_ctx);
+ lp_build_flow_scope_end(flow_ctx);
+ lp_build_flow_destroy(flow_ctx);
}
}
+
+/**
+ * Sample the texture/mipmap using given image filter and mip filter.
+ * data0_ptr and data1_ptr point to the two mipmap levels to sample
+ * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
+ * If we're using nearest miplevel sampling the '1' values will be null/unused.
+ */
+static void
+lp_build_sample_mipmap(struct lp_build_sample_context *bld,
+ unsigned img_filter,
+ unsigned mip_filter,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ LLVMValueRef lod_fpart,
+ LLVMValueRef width0_vec,
+ LLVMValueRef width1_vec,
+ LLVMValueRef height0_vec,
+ LLVMValueRef height1_vec,
+ LLVMValueRef depth0_vec,
+ LLVMValueRef depth1_vec,
+ LLVMValueRef row_stride0_vec,
+ LLVMValueRef row_stride1_vec,
+ LLVMValueRef img_stride0_vec,
+ LLVMValueRef img_stride1_vec,
+ LLVMValueRef data_ptr0,
+ LLVMValueRef data_ptr1,
+ LLVMValueRef *colors_out)
+{
+ LLVMValueRef colors0[4], colors1[4];
+ int chan;
+
+ if (img_filter == PIPE_TEX_FILTER_NEAREST) {
+ lp_build_sample_image_nearest(bld,
+ width0_vec, height0_vec, depth0_vec,
+ row_stride0_vec, img_stride0_vec,
+ data_ptr0, s, t, r, colors0);
+
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+ /* sample the second mipmap level, and interp */
+ lp_build_sample_image_nearest(bld,
+ width1_vec, height1_vec, depth1_vec,
+ row_stride1_vec, img_stride1_vec,
+ data_ptr1, s, t, r, colors1);
+ }
+ }
+ else {
+ assert(img_filter == PIPE_TEX_FILTER_LINEAR);
+
+ lp_build_sample_image_linear(bld,
+ width0_vec, height0_vec, depth0_vec,
+ row_stride0_vec, img_stride0_vec,
+ data_ptr0, s, t, r, colors0);
+
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+ /* sample the second mipmap level, and interp */
+ lp_build_sample_image_linear(bld,
+ width1_vec, height1_vec, depth1_vec,
+ row_stride1_vec, img_stride1_vec,
+ data_ptr1, s, t, r, colors1);
+ }
+ }
+
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+ /* interpolate samples from the two mipmap levels */
+ for (chan = 0; chan < 4; chan++) {
+ colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
+ colors0[chan], colors1[chan]);
+ }
+ }
+ else {
+ /* use first/only level's colors */
+ for (chan = 0; chan < 4; chan++) {
+ colors_out[chan] = colors0[chan];
+ }
+ }
+}
+
+
+
+/**
+ * General texture sampling codegen.
+ * This function handles texture sampling for all texture targets (1D,
+ * 2D, 3D, cube) and all filtering modes.
+ */
+static void
+lp_build_sample_general(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ LLVMValueRef width,
+ LLVMValueRef height,
+ LLVMValueRef depth,
+ LLVMValueRef width_vec,
+ LLVMValueRef height_vec,
+ LLVMValueRef depth_vec,
+ LLVMValueRef row_stride_array,
+ LLVMValueRef img_stride_vec,
+ LLVMValueRef data_array,
+ LLVMValueRef *colors_out)
+{
+ struct lp_build_context *float_bld = &bld->float_bld;
+ const unsigned mip_filter = bld->static_state->min_mip_filter;
+ const unsigned min_filter = bld->static_state->min_img_filter;
+ const unsigned mag_filter = bld->static_state->mag_img_filter;
+ const int dims = texture_dims(bld->static_state->target);
+ LLVMValueRef lod, lod_fpart;
+ LLVMValueRef ilevel0, ilevel1, ilevel0_vec, ilevel1_vec;
+ LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
+ LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
+ LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
+ LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
+ LLVMValueRef data_ptr0, data_ptr1;
+
+ /*
+ printf("%s mip %d min %d mag %d\n", __FUNCTION__,
+ mip_filter, min_filter, mag_filter);
+ */
+
+ /*
+ * Compute the level of detail (float).
+ */
+ if (min_filter != mag_filter ||
+ mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+ /* Need to compute lod either to choose mipmap levels or to
+ * distinguish between minification/magnification with one mipmap level.
+ */
+ lod = lp_build_lod_selector(bld, s, t, r, width, height, depth);
+ }
+
+ /*
+ * Compute integer mipmap level(s) to fetch texels from.
+ */
+ if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+ /* always use mip level 0 */
+ ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ }
+ else {
+ if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
+ lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+ }
+ else {
+ assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR);
+ lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
+ &lod_fpart);
+ lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart);
+ }
+ }
+
+ /*
+ * Convert scalar integer mipmap levels into vectors.
+ */
+ ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
+ ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
+
+ /*
+ * Compute width, height at mipmap level 'ilevel0'
+ */
+ width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
+ if (dims >= 2) {
+ height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
+ row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
+ ilevel0);
+ if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ img_stride0_vec = lp_build_mul(&bld->int_coord_bld,
+ row_stride0_vec, height0_vec);
+ if (dims == 3) {
+ depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
+ }
+ }
+ }
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+ /* compute width, height, depth for second mipmap level at 'ilevel1' */
+ width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
+ if (dims >= 2) {
+ height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
+ row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
+ ilevel1);
+ if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ img_stride1_vec = lp_build_mul(&bld->int_coord_bld,
+ row_stride1_vec, height1_vec);
+ if (dims ==3) {
+ depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
+ }
+ }
+ }
+ }
+
+ /*
+ * Choose cube face, recompute per-face texcoords.
+ */
+ if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ LLVMValueRef face, face_s, face_t;
+ lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
+ s = face_s; /* vec */
+ t = face_t; /* vec */
+ /* use 'r' to indicate cube face */
+ r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
+ }
+
+ /*
+ * Get pointer(s) to image data for mipmap level(s).
+ */
+ data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+ data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
+ }
+
+ /*
+ * Get/interpolate texture colors.
+ */
+ if (min_filter == mag_filter) {
+ /* no need to distinquish between minification and magnification */
+ lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, lod_fpart,
+ width0_vec, width1_vec,
+ height0_vec, height1_vec,
+ depth0_vec, depth1_vec,
+ row_stride0_vec, row_stride1_vec,
+ img_stride0_vec, img_stride1_vec,
+ data_ptr0, data_ptr1,
+ colors_out);
+ }
+ else {
+ /* Emit conditional to choose min image filter or mag image filter
+ * depending on the lod being >0 or <= 0, respectively.
+ */
+ struct lp_build_flow_context *flow_ctx;
+ struct lp_build_if_state if_ctx;
+ LLVMValueRef minify;
+
+ flow_ctx = lp_build_flow_create(bld->builder);
+ lp_build_flow_scope_begin(flow_ctx);
+
+ lp_build_flow_scope_declare(flow_ctx, &colors_out[0]);
+ lp_build_flow_scope_declare(flow_ctx, &colors_out[1]);
+ lp_build_flow_scope_declare(flow_ctx, &colors_out[2]);
+ lp_build_flow_scope_declare(flow_ctx, &colors_out[3]);
+
+ /* minify = lod > 0.0 */
+ minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
+ lod, float_bld->zero, "");
+
+ lp_build_if(&if_ctx, flow_ctx, bld->builder, minify);
+ {
+ /* Use the minification filter */
+ lp_build_sample_mipmap(bld, min_filter, mip_filter,
+ s, t, r, lod_fpart,
+ width0_vec, width1_vec,
+ height0_vec, height1_vec,
+ depth0_vec, depth1_vec,
+ row_stride0_vec, row_stride1_vec,
+ img_stride0_vec, img_stride1_vec,
+ data_ptr0, data_ptr1,
+ colors_out);
+ }
+ lp_build_else(&if_ctx);
+ {
+ /* Use the magnification filter */
+ lp_build_sample_mipmap(bld, mag_filter, mip_filter,
+ s, t, r, lod_fpart,
+ width0_vec, width1_vec,
+ height0_vec, height1_vec,
+ depth0_vec, depth1_vec,
+ row_stride0_vec, row_stride1_vec,
+ img_stride0_vec, img_stride1_vec,
+ data_ptr0, data_ptr1,
+ colors_out);
+ }
+ lp_build_endif(&if_ctx);
+
+ lp_build_flow_scope_end(flow_ctx);
+ lp_build_flow_destroy(flow_ctx);
+ }
+}
+
+
+
static void
lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
struct lp_type dst_type,
@@ -308,8 +1738,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
LLVMValueRef t,
LLVMValueRef width,
LLVMValueRef height,
- LLVMValueRef stride,
- LLVMValueRef data_ptr,
+ LLVMValueRef stride_array,
+ LLVMValueRef data_array,
LLVMValueRef *texel)
{
LLVMBuilderRef builder = bld->builder;
@@ -325,8 +1755,9 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
LLVMValueRef neighbors_hi[2][2];
LLVMValueRef packed, packed_lo, packed_hi;
LLVMValueRef unswizzled[4];
+ LLVMValueRef stride;
- lp_build_context_init(&i32, builder, lp_type_int(32));
+ lp_build_context_init(&i32, builder, lp_type_int_vec(32));
lp_build_context_init(&h16, builder, lp_type_ufixed(16));
lp_build_context_init(&u8n, builder, lp_type_unorm(8));
@@ -334,20 +1765,33 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
h16_vec_type = lp_build_vec_type(h16.type);
u8n_vec_type = lp_build_vec_type(u8n.type);
+ if (bld->static_state->normalized_coords) {
+ LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
+ LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width, coord_vec_type, "");
+ LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height, coord_vec_type, "");
+ s = lp_build_mul(&bld->coord_bld, s, fp_width);
+ t = lp_build_mul(&bld->coord_bld, t, fp_height);
+ }
+
+ /* scale coords by 256 (8 fractional bits) */
s = lp_build_mul_imm(&bld->coord_bld, s, 256);
t = lp_build_mul_imm(&bld->coord_bld, t, 256);
+ /* convert float to int */
s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
+ /* subtract 0.5 (add -128) */
i32_c128 = lp_build_int_const_scalar(i32.type, -128);
s = LLVMBuildAdd(builder, s, i32_c128, "");
t = LLVMBuildAdd(builder, t, i32_c128, "");
+ /* compute floor (shift right 8) */
i32_c8 = lp_build_int_const_scalar(i32.type, 8);
s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
+ /* compute fractional part (AND with 0xff) */
i32_c255 = lp_build_int_const_scalar(i32.type, 255);
s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
@@ -355,14 +1799,18 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
x0 = s_ipart;
y0 = t_ipart;
- x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s);
- y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t);
-
x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
- x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s);
- y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+ x0 = lp_build_sample_wrap_int(bld, x0, width, bld->static_state->pot_width,
+ bld->static_state->wrap_s);
+ y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height,
+ bld->static_state->wrap_t);
+
+ x1 = lp_build_sample_wrap_int(bld, x1, width, bld->static_state->pot_width,
+ bld->static_state->wrap_s);
+ y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height,
+ bld->static_state->wrap_t);
/*
* Transform 4 x i32 in
@@ -415,6 +1863,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
}
+ stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0);
+
/*
* Fetch the pixels as 4 x 32bit (rgba order might differ):
*
@@ -432,10 +1882,10 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
* The higher 8 bits of the resulting elements will be zero.
*/
- neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_ptr);
- neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_ptr);
- neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_ptr);
- neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_ptr);
+ neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array);
+ neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array);
+ neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array);
+ neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array);
neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
@@ -518,6 +1968,12 @@ lp_build_sample_compare(struct lp_build_sample_context *bld,
}
+/**
+ * Build texture sampling code.
+ * 'texel' will return a vector of four LLVMValueRefs corresponding to
+ * R, G, B, A.
+ * \param type vector float type to use for coords, etc.
+ */
void
lp_build_sample_soa(LLVMBuilderRef builder,
const struct lp_sampler_static_state *static_state,
@@ -530,13 +1986,19 @@ lp_build_sample_soa(LLVMBuilderRef builder,
LLVMValueRef *texel)
{
struct lp_build_sample_context bld;
- LLVMValueRef width;
- LLVMValueRef height;
- LLVMValueRef stride;
- LLVMValueRef data_ptr;
+ LLVMValueRef width, width_vec;
+ LLVMValueRef height, height_vec;
+ LLVMValueRef depth, depth_vec;
+ LLVMValueRef stride_array;
+ LLVMValueRef data_array;
LLVMValueRef s;
LLVMValueRef t;
- LLVMValueRef p;
+ LLVMValueRef r;
+
+ (void) lp_build_lod_selector; /* temporary to silence warning */
+ (void) lp_build_nearest_mip_level;
+ (void) lp_build_linear_mip_levels;
+ (void) lp_build_minify;
/* Setup our build context */
memset(&bld, 0, sizeof bld);
@@ -544,54 +2006,55 @@ lp_build_sample_soa(LLVMBuilderRef builder,
bld.static_state = static_state;
bld.dynamic_state = dynamic_state;
bld.format_desc = util_format_description(static_state->format);
+
+ bld.float_type = lp_type_float(32);
+ bld.int_type = lp_type_int(32);
bld.coord_type = type;
+ bld.uint_coord_type = lp_uint_type(type);
bld.int_coord_type = lp_int_type(type);
bld.texel_type = type;
+
+ lp_build_context_init(&bld.float_bld, builder, bld.float_type);
+ lp_build_context_init(&bld.int_bld, builder, bld.int_type);
lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
+ lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
/* Get the dynamic state */
width = dynamic_state->width(dynamic_state, builder, unit);
height = dynamic_state->height(dynamic_state, builder, unit);
- stride = dynamic_state->stride(dynamic_state, builder, unit);
- data_ptr = dynamic_state->data_ptr(dynamic_state, builder, unit);
+ depth = dynamic_state->depth(dynamic_state, builder, unit);
+ stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
+ data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
+ /* Note that data_array is an array[level] of pointers to texture images */
s = coords[0];
t = coords[1];
- p = coords[2];
-
- width = lp_build_broadcast_scalar(&bld.int_coord_bld, width);
- height = lp_build_broadcast_scalar(&bld.int_coord_bld, height);
- stride = lp_build_broadcast_scalar(&bld.int_coord_bld, stride);
-
- if(static_state->target == PIPE_TEXTURE_1D)
- t = bld.coord_bld.zero;
-
- if(static_state->normalized_coords) {
- LLVMTypeRef coord_vec_type = lp_build_vec_type(bld.coord_type);
- LLVMValueRef fp_width = LLVMBuildSIToFP(builder, width, coord_vec_type, "");
- LLVMValueRef fp_height = LLVMBuildSIToFP(builder, height, coord_vec_type, "");
- s = lp_build_mul(&bld.coord_bld, s, fp_width);
- t = lp_build_mul(&bld.coord_bld, t, fp_height);
+ r = coords[2];
+
+ width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
+ height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
+ depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
+
+ if (lp_format_is_rgba8(bld.format_desc) &&
+ static_state->target == PIPE_TEXTURE_2D &&
+ static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
+ static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
+ static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
+ is_simple_wrap_mode(static_state->wrap_s) &&
+ is_simple_wrap_mode(static_state->wrap_t)) {
+ /* special case */
+ lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
+ stride_array, data_array, texel);
}
-
- switch (static_state->min_img_filter) {
- case PIPE_TEX_FILTER_NEAREST:
- lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel);
- break;
- case PIPE_TEX_FILTER_LINEAR:
- if(lp_format_is_rgba8(bld.format_desc))
- lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel);
- else
- lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel);
- break;
- default:
- assert(0);
+ else {
+ lp_build_sample_general(&bld, unit, s, t, r,
+ width, height, depth,
+ width_vec, height_vec, depth_vec,
+ stride_array, NULL, data_array,
+ texel);
}
- /* FIXME: respect static_state->min_mip_filter */;
- /* FIXME: respect static_state->mag_img_filter */;
-
- lp_build_sample_compare(&bld, p, texel);
+ lp_build_sample_compare(&bld, r, texel);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.h b/src/gallium/auxiliary/gallivm/lp_bld_struct.h
index 740392f561..34478c10f5 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_struct.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.h
@@ -37,7 +37,7 @@
#define LP_BLD_STRUCT_H
-#include <llvm-c/Core.h>
+#include "os/os_llvm.h"
#include <llvm-c/Target.h>
#include "util/u_debug.h"
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
index b9472127a6..57b5cc079f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
@@ -37,7 +37,7 @@
#define LP_BLD_SWIZZLE_H
-#include <llvm-c/Core.h>
+#include "os/os_llvm.h"
struct lp_type;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index eddb7a83fa..0f2f8a65b1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -35,7 +35,7 @@
#ifndef LP_BLD_TGSI_H
#define LP_BLD_TGSI_H
-#include <llvm-c/Core.h>
+#include "os/os_llvm.h"
struct tgsi_token;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 4cf28a9f93..5ec59d636c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -41,6 +41,7 @@
#include "util/u_debug.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
@@ -52,6 +53,7 @@
#include "lp_bld_swizzle.h"
#include "lp_bld_flow.h"
#include "lp_bld_tgsi.h"
+#include "lp_bld_debug.h"
#define LP_MAX_TEMPS 256
@@ -81,6 +83,34 @@
#define QUAD_BOTTOM_LEFT 2
#define QUAD_BOTTOM_RIGHT 3
+#define LP_TGSI_MAX_NESTING 16
+
+struct lp_exec_mask {
+ struct lp_build_context *bld;
+
+ boolean has_mask;
+
+ LLVMTypeRef int_vec_type;
+
+ LLVMValueRef cond_stack[LP_TGSI_MAX_NESTING];
+ int cond_stack_size;
+ LLVMValueRef cond_mask;
+
+ LLVMValueRef break_stack[LP_TGSI_MAX_NESTING];
+ int break_stack_size;
+ LLVMValueRef break_mask;
+
+ LLVMValueRef cont_stack[LP_TGSI_MAX_NESTING];
+ int cont_stack_size;
+ LLVMValueRef cont_mask;
+
+ LLVMBasicBlockRef loop_stack[LP_TGSI_MAX_NESTING];
+ int loop_stack_size;
+ LLVMBasicBlockRef loop_block;
+
+
+ LLVMValueRef exec_mask;
+};
struct lp_build_tgsi_soa_context
{
@@ -97,9 +127,9 @@ struct lp_build_tgsi_soa_context
LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
struct lp_build_mask_context *mask;
+ struct lp_exec_mask exec_mask;
};
-
static const unsigned char
swizzle_left[4] = {
QUAD_TOP_LEFT, QUAD_TOP_LEFT,
@@ -124,6 +154,174 @@ swizzle_bottom[4] = {
QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
};
+static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
+{
+ mask->bld = bld;
+ mask->has_mask = FALSE;
+ mask->cond_stack_size = 0;
+ mask->loop_stack_size = 0;
+ mask->break_stack_size = 0;
+ mask->cont_stack_size = 0;
+
+ mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
+}
+
+static void lp_exec_mask_update(struct lp_exec_mask *mask)
+{
+ if (mask->loop_stack_size) {
+ /*for loops we need to update the entire mask at
+ * runtime */
+ LLVMValueRef tmp;
+ tmp = LLVMBuildAnd(mask->bld->builder,
+ mask->cont_mask,
+ mask->break_mask,
+ "maskcb");
+ mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
+ mask->cond_mask,
+ tmp,
+ "maskfull");
+ } else
+ mask->exec_mask = mask->cond_mask;
+
+
+ mask->has_mask = (mask->cond_stack_size > 0 ||
+ mask->loop_stack_size > 0);
+}
+
+static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
+ LLVMValueRef val)
+{
+ mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
+ mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val,
+ mask->int_vec_type, "");
+
+ lp_exec_mask_update(mask);
+}
+
+static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
+{
+ LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
+ LLVMValueRef inv_mask = LLVMBuildNot(mask->bld->builder,
+ mask->cond_mask, "");
+
+ /* means that we didn't have any mask before and that
+ * we were fully enabled */
+ if (mask->cond_stack_size <= 1) {
+ prev_mask = LLVMConstAllOnes(mask->int_vec_type);
+ }
+
+ mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
+ inv_mask,
+ prev_mask, "");
+ lp_exec_mask_update(mask);
+}
+
+static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
+{
+ mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
+ lp_exec_mask_update(mask);
+}
+
+static void lp_exec_bgnloop(struct lp_exec_mask *mask)
+{
+
+ if (mask->cont_stack_size == 0)
+ mask->cont_mask = LLVMConstAllOnes(mask->int_vec_type);
+ if (mask->cont_stack_size == 0)
+ mask->break_mask = LLVMConstAllOnes(mask->int_vec_type);
+ if (mask->cond_stack_size == 0)
+ mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type);
+ mask->loop_stack[mask->loop_stack_size++] = mask->loop_block;
+ mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
+ LLVMBuildBr(mask->bld->builder, mask->loop_block);
+ LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
+
+ lp_exec_mask_update(mask);
+}
+
+static void lp_exec_break(struct lp_exec_mask *mask)
+{
+ LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
+ mask->exec_mask,
+ "break");
+
+ mask->break_stack[mask->break_stack_size++] = mask->break_mask;
+ if (mask->break_stack_size > 1) {
+ mask->break_mask = LLVMBuildAnd(mask->bld->builder,
+ mask->break_mask,
+ exec_mask, "break_full");
+ } else
+ mask->break_mask = exec_mask;
+
+ lp_exec_mask_update(mask);
+}
+
+static void lp_exec_continue(struct lp_exec_mask *mask)
+{
+ LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
+ mask->exec_mask,
+ "");
+
+ mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask;
+ if (mask->cont_stack_size > 1) {
+ mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
+ mask->cont_mask,
+ exec_mask, "");
+ } else
+ mask->cont_mask = exec_mask;
+
+ lp_exec_mask_update(mask);
+}
+
+
+static void lp_exec_endloop(struct lp_exec_mask *mask)
+{
+ LLVMBasicBlockRef endloop;
+ LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
+ mask->bld->type.length);
+ /* i1cond = (mask == 0) */
+ LLVMValueRef i1cond = LLVMBuildICmp(
+ mask->bld->builder,
+ LLVMIntNE,
+ LLVMBuildBitCast(mask->bld->builder, mask->break_mask, reg_type, ""),
+ LLVMConstNull(reg_type), "");
+
+ endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
+
+ LLVMBuildCondBr(mask->bld->builder,
+ i1cond, mask->loop_block, endloop);
+
+ LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
+
+ mask->loop_block = mask->loop_stack[--mask->loop_stack_size];
+ /* pop the break mask */
+ if (mask->cont_stack_size) {
+ mask->cont_mask = mask->cont_stack[--mask->cont_stack_size];
+ }
+ if (mask->break_stack_size) {
+ mask->break_mask = mask->cont_stack[--mask->break_stack_size];
+ }
+
+ lp_exec_mask_update(mask);
+}
+
+static void lp_exec_mask_store(struct lp_exec_mask *mask,
+ LLVMValueRef val,
+ LLVMValueRef dst)
+{
+ if (mask->has_mask) {
+ LLVMValueRef real_val, dst_val;
+
+ dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
+ real_val = lp_build_select(mask->bld,
+ mask->exec_mask,
+ val, dst_val);
+
+ LLVMBuildStore(mask->bld->builder, real_val, dst);
+ } else
+ LLVMBuildStore(mask->bld->builder, val, dst);
+}
+
static LLVMValueRef
emit_ddx(struct lp_build_tgsi_soa_context *bld,
@@ -287,13 +485,13 @@ emit_store(
switch( reg->Register.File ) {
case TGSI_FILE_OUTPUT:
- LLVMBuildStore(bld->base.builder, value,
- bld->outputs[reg->Register.Index][chan_index]);
+ lp_exec_mask_store(&bld->exec_mask, value,
+ bld->outputs[reg->Register.Index][chan_index]);
break;
case TGSI_FILE_TEMPORARY:
- LLVMBuildStore(bld->base.builder, value,
- bld->temps[reg->Register.Index][chan_index]);
+ lp_exec_mask_store(&bld->exec_mask, value,
+ bld->temps[reg->Register.Index][chan_index]);
break;
case TGSI_FILE_ADDRESS:
@@ -301,6 +499,11 @@ emit_store(
assert(0);
break;
+ case TGSI_FILE_PREDICATE:
+ /* FIXME */
+ assert(0);
+ break;
+
default:
assert( 0 );
}
@@ -498,6 +701,17 @@ emit_instruction(
if (indirect_temp_reference(inst))
return FALSE;
+ /*
+ * Stores and write masks are handled in a general fashion after the long
+ * instruction opcode switch statement.
+ *
+ * Although not stricitly necessary, we avoid generating instructions for
+ * channels which won't be stored, in cases where's that easy. For some
+ * complex instructions, like texture sampling, it is more convenient to
+ * assume a full writemask and then let LLVM optimization passes eliminate
+ * redundant code.
+ */
+
assert(info->num_dst <= 1);
if(info->num_dst) {
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
@@ -1265,15 +1479,16 @@ emit_instruction(
case TGSI_OPCODE_TXP:
emit_tex( bld, inst, FALSE, TRUE, dst0 );
break;
-
+
case TGSI_OPCODE_BRK:
- /* FIXME */
- return 0;
+ lp_exec_break(&bld->exec_mask);
break;
case TGSI_OPCODE_IF:
- /* FIXME */
- return 0;
+ tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
+ tmp0, bld->base.zero);
+ lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
break;
case TGSI_OPCODE_BGNFOR:
@@ -1282,6 +1497,10 @@ emit_instruction(
return 0;
break;
+ case TGSI_OPCODE_BGNLOOP:
+ lp_exec_bgnloop(&bld->exec_mask);
+ break;
+
case TGSI_OPCODE_REP:
/* deprecated */
assert(0);
@@ -1289,13 +1508,11 @@ emit_instruction(
break;
case TGSI_OPCODE_ELSE:
- /* FIXME */
- return 0;
+ lp_exec_mask_cond_invert(&bld->exec_mask);
break;
case TGSI_OPCODE_ENDIF:
- /* FIXME */
- return 0;
+ lp_exec_mask_cond_pop(&bld->exec_mask);
break;
case TGSI_OPCODE_ENDFOR:
@@ -1304,6 +1521,10 @@ emit_instruction(
return 0;
break;
+ case TGSI_OPCODE_ENDLOOP:
+ lp_exec_endloop(&bld->exec_mask);
+ break;
+
case TGSI_OPCODE_ENDREP:
/* deprecated */
assert(0);
@@ -1403,8 +1624,7 @@ emit_instruction(
break;
case TGSI_OPCODE_CONT:
- /* FIXME */
- return 0;
+ lp_exec_continue(&bld->exec_mask);
break;
case TGSI_OPCODE_EMIT:
@@ -1458,6 +1678,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
bld.consts_ptr = consts_ptr;
bld.sampler = sampler;
+ lp_exec_mask_init(&bld.exec_mask, &bld.base);
+
tgsi_parse_init( &parse, tokens );
while( !tgsi_parse_end_of_tokens( &parse ) ) {
@@ -1505,7 +1727,14 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
assert( 0 );
}
}
-
+ if (0) {
+ LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
+ LLVMValueRef function = LLVMGetBasicBlockParent(block);
+ debug_printf("11111111111111111111111111111 \n");
+ tgsi_dump(tokens, 0);
+ LLVMDumpValue(function);
+ debug_printf("2222222222222222222222222222 \n");
+ }
tgsi_parse_free( &parse );
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.c b/src/gallium/auxiliary/gallivm/lp_bld_type.c
index 8270cd057f..796af88caa 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_type.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_type.c
@@ -58,7 +58,10 @@ LLVMTypeRef
lp_build_vec_type(struct lp_type type)
{
LLVMTypeRef elem_type = lp_build_elem_type(type);
- return LLVMVectorType(elem_type, type.length);
+ if (type.length == 1)
+ return elem_type;
+ else
+ return LLVMVectorType(elem_type, type.length);
}
@@ -115,6 +118,9 @@ lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type)
if(!vec_type)
return FALSE;
+ if (type.length == 1)
+ return lp_check_elem_type(type, vec_type);
+
if(LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind)
return FALSE;
@@ -153,7 +159,10 @@ LLVMTypeRef
lp_build_int_vec_type(struct lp_type type)
{
LLVMTypeRef elem_type = lp_build_int_elem_type(type);
- return LLVMVectorType(elem_type, type.length);
+ if (type.length == 1)
+ return elem_type;
+ else
+ return LLVMVectorType(elem_type, type.length);
}
@@ -178,6 +187,25 @@ lp_build_int32_vec4_type(void)
}
+/**
+ * Create unsigned integer type variation of given type.
+ */
+struct lp_type
+lp_uint_type(struct lp_type type)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.width = type.width;
+ res_type.length = type.length;
+
+ return res_type;
+}
+
+
+/**
+ * Create signed integer type variation of given type.
+ */
struct lp_type
lp_int_type(struct lp_type type)
{
@@ -186,6 +214,7 @@ lp_int_type(struct lp_type type)
memset(&res_type, 0, sizeof res_type);
res_type.width = type.width;
res_type.length = type.length;
+ res_type.sign = 1;
return res_type;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h
index 62ee05be4d..5b351476ac 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_type.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h
@@ -37,7 +37,7 @@
#define LP_BLD_TYPE_H
-#include <llvm-c/Core.h>
+#include "os/os_llvm.h"
#include <pipe/p_compiler.h>
@@ -103,7 +103,7 @@ struct lp_type {
unsigned width:14;
/**
- * Vector length.
+ * Vector length. If length==1, this is a scalar (float/int) type.
*
* width*length should be a power of two greater or equal to eight.
*
@@ -139,6 +139,7 @@ struct lp_build_context
};
+/** Create scalar float type */
static INLINE struct lp_type
lp_type_float(unsigned width)
{
@@ -148,12 +149,29 @@ lp_type_float(unsigned width)
res_type.floating = TRUE;
res_type.sign = TRUE;
res_type.width = width;
+ res_type.length = 1;
+
+ return res_type;
+}
+
+
+/** Create vector of float type */
+static INLINE struct lp_type
+lp_type_float_vec(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.floating = TRUE;
+ res_type.sign = TRUE;
+ res_type.width = width;
res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
return res_type;
}
+/** Create scalar int type */
static INLINE struct lp_type
lp_type_int(unsigned width)
{
@@ -162,12 +180,28 @@ lp_type_int(unsigned width)
memset(&res_type, 0, sizeof res_type);
res_type.sign = TRUE;
res_type.width = width;
+ res_type.length = 1;
+
+ return res_type;
+}
+
+
+/** Create vector int type */
+static INLINE struct lp_type
+lp_type_int_vec(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.sign = TRUE;
+ res_type.width = width;
res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
return res_type;
}
+/** Create scalar uint type */
static INLINE struct lp_type
lp_type_uint(unsigned width)
{
@@ -175,6 +209,20 @@ lp_type_uint(unsigned width)
memset(&res_type, 0, sizeof res_type);
res_type.width = width;
+ res_type.length = 1;
+
+ return res_type;
+}
+
+
+/** Create vector uint type */
+static INLINE struct lp_type
+lp_type_uint_vec(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.width = width;
res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
return res_type;
@@ -257,6 +305,10 @@ lp_build_int32_vec4_type(void);
struct lp_type
+lp_uint_type(struct lp_type type);
+
+
+struct lp_type
lp_int_type(struct lp_type type);