diff options
author | Zack Rusin <zackr@vmware.com> | 2010-03-15 15:24:38 -0400 |
---|---|---|
committer | Zack Rusin <zackr@vmware.com> | 2010-03-15 15:24:38 -0400 |
commit | 275c4bd3643d773210780cb8d578ca84f2604684 (patch) | |
tree | 8266edc39d4253ac0f2a0ecd41f560f3d815bb5c /src/gallium/auxiliary/gallivm | |
parent | c5c5cd7132e18f4aad8e73d8ee879f8823c4c1e7 (diff) | |
parent | d0b35352ed27b1e66785c45ee95a352ed06b47ce (diff) |
Merge remote branch 'origin/master' into gallium_draw_llvm
Diffstat (limited to 'src/gallium/auxiliary/gallivm')
32 files changed, 2513 insertions, 314 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_alpha.h b/src/gallium/auxiliary/gallivm/lp_bld_alpha.h index 634575670d..fe3cedcc48 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_alpha.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_alpha.h @@ -35,7 +35,7 @@ #define LP_BLD_ALPHA_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct pipe_alpha_state; struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 54b31befe6..233a36669d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -232,6 +232,37 @@ lp_build_add(struct lp_build_context *bld, } +/** Return the sum of the elements of a */ +LLVMValueRef +lp_build_sum_vector(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMValueRef index, res; + int i; + + if (a == bld->zero) + return bld->zero; + if (a == bld->undef) + return bld->undef; + assert(type.length > 1); + + assert(!bld->type.norm); + + index = LLVMConstInt(LLVMInt32Type(), 0, 0); + res = LLVMBuildExtractElement(bld->builder, a, index, ""); + + for (i = 1; i < type.length; i++) { + index = LLVMConstInt(LLVMInt32Type(), i, 0); + res = LLVMBuildAdd(bld->builder, res, + LLVMBuildExtractElement(bld->builder, a, index, ""), + ""); + } + + return res; +} + + /** * Generate a - b */ @@ -614,6 +645,22 @@ lp_build_max(struct lp_build_context *bld, /** + * Generate clamp(a, min, max) + * Do checks for special cases. + */ +LLVMValueRef +lp_build_clamp(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef min, + LLVMValueRef max) +{ + a = lp_build_min(bld, a, max); + a = lp_build_max(bld, a, min); + return a; +} + + +/** * Generate abs(a) */ LLVMValueRef @@ -628,13 +675,26 @@ lp_build_abs(struct lp_build_context *bld, if(type.floating) { /* Mask out the sign bit */ - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - unsigned long long absMask = ~(1ULL << (type.width - 1)); - LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask)); - a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - a = LLVMBuildAnd(bld->builder, a, mask, ""); - a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); - return a; + if (type.length == 1) { + LLVMTypeRef int_type = LLVMIntType(type.width); + LLVMTypeRef float_type = LLVMFloatType(); + unsigned long long absMask = ~(1ULL << (type.width - 1)); + LLVMValueRef mask = LLVMConstInt(int_type, absMask, 0); + a = LLVMBuildBitCast(bld->builder, a, int_type, ""); + a = LLVMBuildAnd(bld->builder, a, mask, ""); + a = LLVMBuildBitCast(bld->builder, a, float_type, ""); + return a; + } + else { + /* vector of floats */ + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + unsigned long long absMask = ~(1ULL << (type.width - 1)); + LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask)); + a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + a = LLVMBuildAnd(bld->builder, a, mask, ""); + a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); + return a; + } } if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) { @@ -653,11 +713,19 @@ lp_build_abs(struct lp_build_context *bld, LLVMValueRef +lp_build_negate(struct lp_build_context *bld, + LLVMValueRef a) +{ + return LLVMBuildNeg(bld->builder, a, ""); +} + + +/** Return -1, 0 or +1 depending on the sign of a */ +LLVMValueRef lp_build_sgn(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; - LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMValueRef cond; LLVMValueRef res; @@ -667,14 +735,29 @@ lp_build_sgn(struct lp_build_context *bld, res = bld->one; } else if(type.floating) { - /* Take the sign bit and add it to 1 constant */ - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMTypeRef vec_type; + LLVMTypeRef int_type; + LLVMValueRef mask; LLVMValueRef sign; LLVMValueRef one; - sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + unsigned long long maskBit = (unsigned long long)1 << (type.width - 1); + + if (type.length == 1) { + int_type = lp_build_int_elem_type(type); + vec_type = lp_build_elem_type(type); + mask = LLVMConstInt(int_type, maskBit, 0); + } + else { + /* vector */ + int_type = lp_build_int_vec_type(type); + vec_type = lp_build_vec_type(type); + mask = lp_build_int_const_scalar(type, maskBit); + } + + /* Take the sign bit and add it to 1 constant */ + sign = LLVMBuildBitCast(bld->builder, a, int_type, ""); sign = LLVMBuildAnd(bld->builder, sign, mask, ""); - one = LLVMConstBitCast(bld->one, int_vec_type); + one = LLVMConstBitCast(bld->one, int_type); res = LLVMBuildOr(bld->builder, sign, one, ""); res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); } @@ -687,12 +770,74 @@ lp_build_sgn(struct lp_build_context *bld, /* Handle zero */ cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero); - res = lp_build_select(bld, cond, bld->zero, bld->one); + res = lp_build_select(bld, cond, bld->zero, res); return res; } +/** + * Set the sign of float vector 'a' according to 'sign'. + * If sign==0, return abs(a). + * If sign==1, return -abs(a); + * Other values for sign produce undefined results. + */ +LLVMValueRef +lp_build_set_sign(struct lp_build_context *bld, + LLVMValueRef a, LLVMValueRef sign) +{ + const struct lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef shift = lp_build_int_const_scalar(type, type.width - 1); + LLVMValueRef mask = lp_build_int_const_scalar(type, + ~((unsigned long long) 1 << (type.width - 1))); + LLVMValueRef val, res; + + assert(type.floating); + + /* val = reinterpret_cast<int>(a) */ + val = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + /* val = val & mask */ + val = LLVMBuildAnd(bld->builder, val, mask, ""); + /* sign = sign << shift */ + sign = LLVMBuildShl(bld->builder, sign, shift, ""); + /* res = val | sign */ + res = LLVMBuildOr(bld->builder, val, sign, ""); + /* res = reinterpret_cast<float>(res) */ + res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); + + return res; +} + + +/** + * Convert vector of (or scalar) int to vector of (or scalar) float. + */ +LLVMValueRef +lp_build_int_to_float(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + /*assert(lp_check_value(type, a));*/ + + if (type.length == 1) { + LLVMTypeRef float_type = LLVMFloatType(); + return LLVMBuildSIToFP(bld->builder, a, float_type, ""); + } + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + /*LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);*/ + LLVMValueRef res; + res = LLVMBuildSIToFP(bld->builder, a, vec_type, ""); + return res; + } +} + + + enum lp_build_round_sse41_mode { LP_BUILD_ROUND_SSE41_NEAREST = 0, @@ -784,6 +929,13 @@ lp_build_floor(struct lp_build_context *bld, assert(type.floating); + if (type.length == 1) { + LLVMValueRef res; + res = lp_build_ifloor(bld, a); + res = LLVMBuildSIToFP(bld->builder, res, LLVMFloatType(), ""); + return res; + } + if(util_cpu_caps.has_sse4_1) return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); else { @@ -818,23 +970,45 @@ lp_build_ceil(struct lp_build_context *bld, /** + * Return fractional part of 'a' computed as a - floor(f) + * Typically used in texture coord arithmetic. + */ +LLVMValueRef +lp_build_fract(struct lp_build_context *bld, + LLVMValueRef a) +{ + assert(bld->type.floating); + return lp_build_sub(bld, a, lp_build_floor(bld, a)); +} + + +/** * Convert to integer, through whichever rounding method that's fastest, - * typically truncating to zero. + * typically truncating toward zero. */ LLVMValueRef lp_build_itrunc(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); assert(type.floating); - assert(lp_check_value(type, a)); - return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); + if (type.length == 1) { + LLVMTypeRef int_type = LLVMIntType(type.width); + return LLVMBuildFPToSI(bld->builder, a, int_type, ""); + } + else { + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + assert(lp_check_value(type, a)); + return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); + } } +/** + * Convert float[] to int[] with round(). + */ LLVMValueRef lp_build_iround(struct lp_build_context *bld, LLVMValueRef a) @@ -844,6 +1018,15 @@ lp_build_iround(struct lp_build_context *bld, LLVMValueRef res; assert(type.floating); + + if (type.length == 1) { + /* scalar float to int */ + LLVMTypeRef int_type = LLVMIntType(type.width); + /* XXX we want rounding here! */ + res = LLVMBuildFPToSI(bld->builder, a, int_type, ""); + return res; + } + assert(lp_check_value(type, a)); if(util_cpu_caps.has_sse4_1) { @@ -886,6 +1069,14 @@ lp_build_ifloor(struct lp_build_context *bld, LLVMValueRef res; assert(type.floating); + + if (type.length == 1) { + /* scalar float to int */ + LLVMTypeRef int_type = LLVMIntType(type.width); + res = LLVMBuildFPToSI(bld->builder, a, int_type, ""); + return res; + } + assert(lp_check_value(type, a)); if(util_cpu_caps.has_sse4_1) { @@ -1112,6 +1303,7 @@ lp_build_polynomial(struct lp_build_context *bld, unsigned num_coeffs) { const struct lp_type type = bld->type; + LLVMTypeRef float_type = LLVMFloatType(); LLVMValueRef res = NULL; unsigned i; @@ -1121,7 +1313,13 @@ lp_build_polynomial(struct lp_build_context *bld, __FUNCTION__); for (i = num_coeffs; i--; ) { - LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]); + LLVMValueRef coeff; + + if (type.length == 1) + coeff = LLVMConstReal(float_type, coeffs[i]); + else + coeff = lp_build_const_scalar(type, coeffs[i]); + if(res) res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res)); else @@ -1315,11 +1513,87 @@ lp_build_log2_approx(struct lp_build_context *bld, } +/** scalar version of above function */ +static void +lp_build_float_log2_approx(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef *p_exp, + LLVMValueRef *p_floor_log2, + LLVMValueRef *p_log2) +{ + const struct lp_type type = bld->type; + LLVMTypeRef float_type = LLVMFloatType(); + LLVMTypeRef int_type = LLVMIntType(type.width); + + LLVMValueRef expmask = LLVMConstInt(int_type, 0x7f800000, 0); + LLVMValueRef mantmask = LLVMConstInt(int_type, 0x007fffff, 0); + LLVMValueRef one = LLVMConstBitCast(bld->one, int_type); + + LLVMValueRef i = NULL; + LLVMValueRef exp = NULL; + LLVMValueRef mant = NULL; + LLVMValueRef logexp = NULL; + LLVMValueRef logmant = NULL; + LLVMValueRef res = NULL; + + if(p_exp || p_floor_log2 || p_log2) { + /* TODO: optimize the constant case */ + if(LLVMIsConstant(x)) + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); + + assert(type.floating && type.width == 32); + + i = LLVMBuildBitCast(bld->builder, x, int_type, ""); + + /* exp = (float) exponent(x) */ + exp = LLVMBuildAnd(bld->builder, i, expmask, ""); + } + + if(p_floor_log2 || p_log2) { + LLVMValueRef c23 = LLVMConstInt(int_type, 23, 0); + LLVMValueRef c127 = LLVMConstInt(int_type, 127, 0); + logexp = LLVMBuildLShr(bld->builder, exp, c23, ""); + logexp = LLVMBuildSub(bld->builder, logexp, c127, ""); + logexp = LLVMBuildSIToFP(bld->builder, logexp, float_type, ""); + } + + if(p_log2) { + /* mant = (float) mantissa(x) */ + mant = LLVMBuildAnd(bld->builder, i, mantmask, ""); + mant = LLVMBuildOr(bld->builder, mant, one, ""); + mant = LLVMBuildBitCast(bld->builder, mant, float_type, ""); + + logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial, + Elements(lp_build_log2_polynomial)); + + /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ + logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), ""); + + res = LLVMBuildAdd(bld->builder, logmant, logexp, ""); + } + + if(p_exp) + *p_exp = exp; + + if(p_floor_log2) + *p_floor_log2 = logexp; + + if(p_log2) + *p_log2 = res; +} + + LLVMValueRef lp_build_log2(struct lp_build_context *bld, LLVMValueRef x) { LLVMValueRef res; - lp_build_log2_approx(bld, x, NULL, NULL, &res); + if (bld->type.length == 1) { + lp_build_float_log2_approx(bld, x, NULL, NULL, &res); + } + else { + lp_build_log2_approx(bld, x, NULL, NULL, &res); + } return res; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index 62be4b9aee..7a10fe1220 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -37,7 +37,7 @@ #define LP_BLD_ARIT_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct lp_type; @@ -57,6 +57,10 @@ lp_build_add(struct lp_build_context *bld, LLVMValueRef b); LLVMValueRef +lp_build_sum_vector(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef lp_build_sub(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); @@ -107,14 +111,32 @@ lp_build_max(struct lp_build_context *bld, LLVMValueRef b); LLVMValueRef +lp_build_clamp(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef min, + LLVMValueRef max); + +LLVMValueRef lp_build_abs(struct lp_build_context *bld, LLVMValueRef a); LLVMValueRef +lp_build_negate(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef lp_build_sgn(struct lp_build_context *bld, LLVMValueRef a); LLVMValueRef +lp_build_set_sign(struct lp_build_context *bld, + LLVMValueRef a, LLVMValueRef sign); + +LLVMValueRef +lp_build_int_to_float(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef lp_build_round(struct lp_build_context *bld, LLVMValueRef a); @@ -131,6 +153,10 @@ lp_build_trunc(struct lp_build_context *bld, LLVMValueRef a); LLVMValueRef +lp_build_fract(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef lp_build_ifloor(struct lp_build_context *bld, LLVMValueRef a); LLVMValueRef diff --git a/src/gallium/auxiliary/gallivm/lp_bld_blend.h b/src/gallium/auxiliary/gallivm/lp_bld_blend.h index da272e549f..5a9e1c1fb2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_blend.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_blend.h @@ -40,7 +40,7 @@ * for a standalone example. */ -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include "pipe/p_format.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c index c8eaa8c394..8a275fa72f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c @@ -221,8 +221,16 @@ lp_build_undef(struct lp_type type) LLVMValueRef lp_build_zero(struct lp_type type) { - LLVMTypeRef vec_type = lp_build_vec_type(type); - return LLVMConstNull(vec_type); + if (type.length == 1) { + if (type.floating) + return LLVMConstReal(LLVMFloatType(), 0.0); + else + return LLVMConstInt(LLVMIntType(type.width), 0, 0); + } + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + return LLVMConstNull(vec_type); + } } @@ -264,10 +272,16 @@ lp_build_one(struct lp_type type) for(i = 1; i < type.length; ++i) elems[i] = elems[0]; - return LLVMConstVector(elems, type.length); + if (type.length == 1) + return elems[0]; + else + return LLVMConstVector(elems, type.length); } +/** + * Build constant-valued vector from a scalar value. + */ LLVMValueRef lp_build_const_scalar(struct lp_type type, double val) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h index cb8e1c7b00..4078636103 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h @@ -37,7 +37,7 @@ #define LP_BLD_CONST_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include <pipe/p_compiler.h> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.h b/src/gallium/auxiliary/gallivm/lp_bld_conv.h index 948e68fae4..78e8155ff7 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.h @@ -37,7 +37,7 @@ #define LP_BLD_CONV_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h index 583e6132b4..441ad94786 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h @@ -30,7 +30,7 @@ #define LP_BLD_DEBUG_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include "pipe/p_compiler.h" #include "util/u_string.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_depth.c b/src/gallium/auxiliary/gallivm/lp_bld_depth.c index d438c0e63d..f08f8eb6d8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_depth.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_depth.c @@ -171,7 +171,7 @@ lp_build_depth_test(LLVMBuilderRef builder, unsigned padding_right; unsigned chan; - assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(format_desc->channel[z_swizzle].size <= format_desc->block.bits); assert(format_desc->channel[z_swizzle].normalized); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_depth.h b/src/gallium/auxiliary/gallivm/lp_bld_depth.h index 79d6981bb5..8be80024ae 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_depth.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_depth.h @@ -36,7 +36,7 @@ #define LP_BLD_DEPTH_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct pipe_depth_state; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c index bc83138908..106fc03e46 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -308,7 +308,7 @@ lp_build_flow_scope_end(struct lp_build_flow_context *flow) * Note: this function has no dependencies on the flow code and could * be used elsewhere. */ -static LLVMBasicBlockRef +LLVMBasicBlockRef lp_build_insert_new_block(LLVMBuilderRef builder, const char *name) { LLVMBasicBlockRef current_block; @@ -648,7 +648,9 @@ lp_build_if(struct lp_build_if_state *ctx, ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); /* add add the initial value of the var from the entry block */ - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->entry_block, 1); + if (!LLVMIsUndef(*flow->variables[i])) + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], + &ifthen->entry_block, 1); } /* create/insert true_block before merge_block */ @@ -695,18 +697,21 @@ lp_build_endif(struct lp_build_if_state *ctx) { struct lp_build_flow_context *flow = ctx->flow; struct lp_build_flow_if *ifthen; + LLVMBasicBlockRef curBlock = LLVMGetInsertBlock(ctx->builder); unsigned i; ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen; assert(ifthen); + /* Insert branch to the merge block from current block */ + LLVMBuildBr(ctx->builder, ifthen->merge_block); + if (ifthen->false_block) { LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); /* for each variable, update the Phi node with a (variable, block) pair */ for (i = 0; i < flow->num_variables; i++) { assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->false_block, 1); - + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &curBlock, 1); /* replace the variable ref with the phi function */ *flow->variables[i] = ifthen->phi[i]; } @@ -742,15 +747,18 @@ lp_build_endif(struct lp_build_if_state *ctx) ifthen->true_block, ifthen->merge_block); } - /* Append an unconditional Br(anch) instruction on the true_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block); - LLVMBuildBr(ctx->builder, ifthen->merge_block); + /* Insert branch from end of true_block to merge_block */ if (ifthen->false_block) { - /* Append an unconditional Br(anch) instruction on the false_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block); + /* Append an unconditional Br(anch) instruction on the true_block */ + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block); LLVMBuildBr(ctx->builder, ifthen->merge_block); } - + else { + /* No else clause. + * Note that we've already inserted the branch at the end of + * true_block. See the very first LLVMBuildBr() call in this function. + */ + } /* Resume building code at end of the ifthen->merge_block */ LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index 4c225a0d4f..e158836549 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -35,7 +35,7 @@ #define LP_BLD_FLOW_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct lp_type; @@ -145,7 +145,9 @@ lp_build_else(struct lp_build_if_state *ctx); void lp_build_endif(struct lp_build_if_state *ctx); - + +LLVMBasicBlockRef +lp_build_insert_new_block(LLVMBuilderRef builder, const char *name); #endif /* !LP_BLD_FLOW_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h index 970bee379f..8972c0dc17 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h @@ -34,7 +34,7 @@ * Pixel format helpers. */ -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include "pipe/p_format.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index dfa080b853..a07f7418f2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -70,7 +70,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, unsigned i; /* FIXME: Support more formats */ - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(desc->block.width == 1); assert(desc->block.height == 1); assert(desc->block.bits <= 32); @@ -189,7 +189,7 @@ lp_build_unpack_rgba8_aos(LLVMBuilderRef builder, lp_build_context_init(&bld, builder, type); /* FIXME: Support more formats */ - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(desc->block.width == 1); assert(desc->block.height == 1); assert(desc->block.bits <= 32); @@ -303,7 +303,7 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, unsigned shift; unsigned i, j; - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(desc->block.width == 1); assert(desc->block.height == 1); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index 64151d169d..abb27e4c32 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -92,9 +92,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, unsigned chan; /* FIXME: Support more formats */ - assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH || - (format_desc->layout == UTIL_FORMAT_LAYOUT_ARRAY && - format_desc->block.bits == format_desc->channel[0].size)); + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); assert(format_desc->block.bits <= 32); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_init.cpp index 6e79438ead..067397a520 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.cpp @@ -26,39 +26,34 @@ **************************************************************************/ -#include "pipe/p_config.h" +#include <llvm/Config/config.h> +#include <llvm/Target/TargetSelect.h> +#include <llvm/Target/TargetOptions.h> -#include "lp_bld_misc.h" +#include "pipe/p_config.h" +#include "lp_bld_init.h" -#ifndef LLVM_NATIVE_ARCH -namespace llvm { - extern void LinkInJIT(); -} +extern "C" void LLVMLinkInJIT(); -void -LLVMLinkInJIT(void) +extern "C" void +lp_build_init(void) { - llvm::LinkInJIT(); -} - - -extern "C" int X86TargetMachineModule; - - -int -LLVMInitializeNativeTarget(void) -{ -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - X86TargetMachineModule = 1; +#if defined(PIPE_OS_WINDOWS) && defined(PIPE_ARCH_X86) + /* + * This is mis-detected on some hardware / software combinations. + */ + llvm::StackAlignment = 4; + llvm::RealignStack = true; #endif - return 0; -} + /* Same as LLVMInitializeNativeTarget(); */ + llvm::InitializeNativeTarget(); -#endif + LLVMLinkInJIT(); +} /* @@ -69,7 +64,6 @@ LLVMInitializeNativeTarget(void) */ #if defined(_MSC_VER) && defined(_DEBUG) #include <crtdefs.h> -extern "C" { - _CRTIMP void __cdecl _invalid_parameter_noinfo(void) {} -} +extern "C" _CRTIMP void __cdecl +_invalid_parameter_noinfo(void) {} #endif diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h index 0e787e0b9c..07f50d1c43 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h @@ -26,26 +26,17 @@ **************************************************************************/ -#ifndef LP_BLD_MISC_H -#define LP_BLD_MISC_H +#ifndef LP_BLD_INIT_H +#define LP_BLD_INIT_H -#include "llvm/Config/config.h" - #ifdef __cplusplus extern "C" { #endif -#ifndef LLVM_NATIVE_ARCH - void -LLVMLinkInJIT(void); - -int -LLVMInitializeNativeTarget(void); - -#endif /* !LLVM_NATIVE_ARCH */ +lp_build_init(void); #ifdef __cplusplus @@ -53,4 +44,4 @@ LLVMInitializeNativeTarget(void); #endif -#endif /* !LP_BLD_MISC_H */ +#endif /* !LP_BLD_INIT_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_interp.h b/src/gallium/auxiliary/gallivm/lp_bld_interp.h index ca958cdf34..177b5e943e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_interp.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_interp.h @@ -41,7 +41,7 @@ #define LP_BLD_INTERP_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include "tgsi/tgsi_exec.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h index f813f27074..7d5506c733 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h @@ -37,7 +37,7 @@ #define LP_BLD_INTR_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" /** diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index 41ac81b744..f3df3dd138 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -42,9 +42,30 @@ #include "lp_bld_logic.h" +/* + * XXX + * + * Selection with vector conditional like + * + * select <4 x i1> %C, %A, %B + * + * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not + * supported on any backend. + * + * Expanding the boolean vector to full SIMD register width, as in + * + * sext <4 x i1> %C to <4 x i32> + * + * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but + * it causes assertion failures in LLVM 2.6. It appears to work correctly on + * LLVM 2.7. + */ + + /** * Build code to compare two values 'a' and 'b' of 'type' using the given func. * \param func one of PIPE_FUNC_x + * The result values will be 0 for false or ~0 for true. */ LLVMValueRef lp_build_compare(LLVMBuilderRef builder, @@ -53,13 +74,11 @@ lp_build_compare(LLVMBuilderRef builder, LLVMValueRef a, LLVMValueRef b) { - LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef zeros = LLVMConstNull(int_vec_type); LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); LLVMValueRef cond; LLVMValueRef res; - unsigned i; assert(func >= PIPE_FUNC_NEVER); assert(func <= PIPE_FUNC_ALWAYS); @@ -73,10 +92,12 @@ lp_build_compare(LLVMBuilderRef builder, /* XXX: It is not clear if we should use the ordered or unordered operators */ +#if HAVE_LLVM < 0x0207 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) if(type.width * type.length == 128) { if(type.floating && util_cpu_caps.has_sse) { /* float[4] comparison */ + LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMValueRef args[3]; unsigned cc; boolean swap; @@ -146,6 +167,7 @@ lp_build_compare(LLVMBuilderRef builder, const char *pcmpgt; LLVMValueRef args[2]; LLVMValueRef res; + LLVMTypeRef vec_type = lp_build_vec_type(type); switch (type.width) { case 8: @@ -197,8 +219,9 @@ lp_build_compare(LLVMBuilderRef builder, return res; } - } + } /* if (type.width * type.length == 128) */ #endif +#endif /* HAVE_LLVM < 0x0207 */ if(type.floating) { LLVMRealPredicate op; @@ -232,25 +255,33 @@ lp_build_compare(LLVMBuilderRef builder, return lp_build_undef(type); } -#if 0 - /* XXX: Although valid IR, no LLVM target currently support this */ +#if HAVE_LLVM >= 0x0207 cond = LLVMBuildFCmp(builder, op, a, b, ""); - res = LLVMBuildSelect(builder, cond, ones, zeros, ""); + res = LLVMBuildSExt(builder, cond, int_vec_type, ""); #else - debug_printf("%s: warning: using slow element-wise vector comparison\n", - __FUNCTION__); - res = LLVMGetUndef(int_vec_type); - for(i = 0; i < type.length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - cond = LLVMBuildFCmp(builder, op, - LLVMBuildExtractElement(builder, a, index, ""), - LLVMBuildExtractElement(builder, b, index, ""), - ""); - cond = LLVMBuildSelect(builder, cond, - LLVMConstExtractElement(ones, index), - LLVMConstExtractElement(zeros, index), - ""); - res = LLVMBuildInsertElement(builder, res, cond, index, ""); + if (type.length == 1) { + cond = LLVMBuildFCmp(builder, op, a, b, ""); + res = LLVMBuildSExt(builder, cond, int_vec_type, ""); + } + else { + unsigned i; + + res = LLVMGetUndef(int_vec_type); + + debug_printf("%s: warning: using slow element-wise float" + " vector comparison\n", __FUNCTION__); + for (i = 0; i < type.length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + cond = LLVMBuildFCmp(builder, op, + LLVMBuildExtractElement(builder, a, index, ""), + LLVMBuildExtractElement(builder, b, index, ""), + ""); + cond = LLVMBuildSelect(builder, cond, + LLVMConstExtractElement(ones, index), + LLVMConstExtractElement(zeros, index), + ""); + res = LLVMBuildInsertElement(builder, res, cond, index, ""); + } } #endif } @@ -280,25 +311,34 @@ lp_build_compare(LLVMBuilderRef builder, return lp_build_undef(type); } -#if 0 - /* XXX: Although valid IR, no LLVM target currently support this */ +#if HAVE_LLVM >= 0x0207 cond = LLVMBuildICmp(builder, op, a, b, ""); - res = LLVMBuildSelect(builder, cond, ones, zeros, ""); + res = LLVMBuildSExt(builder, cond, int_vec_type, ""); #else - debug_printf("%s: warning: using slow element-wise int vector comparison\n", - __FUNCTION__); - res = LLVMGetUndef(int_vec_type); - for(i = 0; i < type.length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - cond = LLVMBuildICmp(builder, op, - LLVMBuildExtractElement(builder, a, index, ""), - LLVMBuildExtractElement(builder, b, index, ""), - ""); - cond = LLVMBuildSelect(builder, cond, - LLVMConstExtractElement(ones, index), - LLVMConstExtractElement(zeros, index), - ""); - res = LLVMBuildInsertElement(builder, res, cond, index, ""); + if (type.length == 1) { + cond = LLVMBuildICmp(builder, op, a, b, ""); + res = LLVMBuildSExt(builder, cond, int_vec_type, ""); + } + else { + unsigned i; + + res = LLVMGetUndef(int_vec_type); + + debug_printf("%s: warning: using slow element-wise int" + " vector comparison\n", __FUNCTION__); + + for(i = 0; i < type.length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + cond = LLVMBuildICmp(builder, op, + LLVMBuildExtractElement(builder, a, index, ""), + LLVMBuildExtractElement(builder, b, index, ""), + ""); + cond = LLVMBuildSelect(builder, cond, + LLVMConstExtractElement(ones, index), + LLVMConstExtractElement(zeros, index), + ""); + res = LLVMBuildInsertElement(builder, res, cond, index, ""); + } } #endif } @@ -311,6 +351,7 @@ lp_build_compare(LLVMBuilderRef builder, /** * Build code to compare two values 'a' and 'b' using the given func. * \param func one of PIPE_FUNC_x + * The result values will be 0 for false or ~0 for true. */ LLVMValueRef lp_build_cmp(struct lp_build_context *bld, @@ -322,6 +363,11 @@ lp_build_cmp(struct lp_build_context *bld, } +/** + * Return mask ? a : b; + * + * mask is a bitwise mask, composed of 0 or ~0 for each element. + */ LLVMValueRef lp_build_select(struct lp_build_context *bld, LLVMValueRef mask, @@ -334,26 +380,32 @@ lp_build_select(struct lp_build_context *bld, if(a == b) return a; - if(type.floating) { - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); + if (type.length == 1) { + mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), ""); + res = LLVMBuildSelect(bld->builder, mask, a, b, ""); } + else { + if(type.floating) { + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); + } - a = LLVMBuildAnd(bld->builder, a, mask, ""); + a = LLVMBuildAnd(bld->builder, a, mask, ""); - /* This often gets translated to PANDN, but sometimes the NOT is - * pre-computed and stored in another constant. The best strategy depends - * on available registers, so it is not a big deal -- hopefully LLVM does - * the right decision attending the rest of the program. - */ - b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); + /* This often gets translated to PANDN, but sometimes the NOT is + * pre-computed and stored in another constant. The best strategy depends + * on available registers, so it is not a big deal -- hopefully LLVM does + * the right decision attending the rest of the program. + */ + b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); - res = LLVMBuildOr(bld->builder, a, b, ""); + res = LLVMBuildOr(bld->builder, a, b, ""); - if(type.floating) { - LLVMTypeRef vec_type = lp_build_vec_type(type); - res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); + if(type.floating) { + LLVMTypeRef vec_type = lp_build_vec_type(type); + res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); + } } return res; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h index a399ebf39e..b54ec13b70 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h @@ -37,7 +37,7 @@ #define LP_BLD_LOGIC_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c index bc360ad77a..23398f41f9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c @@ -256,7 +256,9 @@ lp_build_pack2(LLVMBuilderRef builder, LLVMValueRef lo, LLVMValueRef hi) { +#if HAVE_LLVM < 0x0207 LLVMTypeRef src_vec_type = lp_build_vec_type(src_type); +#endif LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type); LLVMValueRef shuffle; LLVMValueRef res; @@ -272,11 +274,14 @@ lp_build_pack2(LLVMBuilderRef builder, switch(src_type.width) { case 32: if(dst_type.sign) { +#if HAVE_LLVM >= 0x0207 + res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", dst_vec_type, lo, hi); +#else res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi); +#endif } else { if (util_cpu_caps.has_sse4_1) { - /* PACKUSDW is the only instrinsic with a consistent signature */ return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi); } else { @@ -288,9 +293,17 @@ lp_build_pack2(LLVMBuilderRef builder, case 16: if(dst_type.sign) +#if HAVE_LLVM >= 0x0207 + res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", dst_vec_type, lo, hi); +#else res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi); +#endif else +#if HAVE_LLVM >= 0x0207 + res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", dst_vec_type, lo, hi); +#else res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi); +#endif break; default: diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h index fb2a34984a..346a17d580 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h @@ -37,7 +37,7 @@ #define LP_BLD_PACK_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index a133b56ac5..2f74aa5e00 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -44,6 +44,11 @@ #include "lp_bld_sample.h" +/** + * Initialize lp_sampler_static_state object with the gallium sampler + * and texture state. + * The former is considered to be static and the later dynamic. + */ void lp_sampler_static_state(struct lp_sampler_static_state *state, const struct pipe_texture *texture, @@ -57,6 +62,18 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, if(!sampler) return; + /* + * We don't copy sampler state over unless it is actually enabled, to avoid + * spurious recompiles, as the sampler static state is part of the shader + * key. + * + * Ideally the state tracker or cso_cache module would make all state + * canonical, but until that happens it's better to be safe than sorry here. + * + * XXX: Actually there's much more than can be done here, especially + * regarding 1D/2D/3D/CUBE textures, wrap modes, etc. + */ + state->format = texture->format; state->target = texture->target; state->pot_width = util_is_pot(texture->width0); @@ -67,13 +84,26 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, state->wrap_t = sampler->wrap_t; state->wrap_r = sampler->wrap_r; state->min_img_filter = sampler->min_img_filter; - state->min_mip_filter = sampler->min_mip_filter; state->mag_img_filter = sampler->mag_img_filter; + if (texture->last_level) { + state->min_mip_filter = sampler->min_mip_filter; + } else { + state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + } + state->compare_mode = sampler->compare_mode; - if(sampler->compare_mode != PIPE_TEX_COMPARE_NONE) { - state->compare_func = sampler->compare_func; + if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) { + state->compare_func = sampler->compare_func; } + state->normalized_coords = sampler->normalized_coords; + state->lod_bias = sampler->lod_bias; + state->min_lod = sampler->min_lod; + state->max_lod = sampler->max_lod; + state->border_color[0] = sampler->border_color[0]; + state->border_color[1] = sampler->border_color[1]; + state->border_color[2] = sampler->border_color[2]; + state->border_color[3] = sampler->border_color[3]; } @@ -129,15 +159,16 @@ lp_build_gather(LLVMBuilderRef builder, /** * Compute the offset of a pixel. * - * x, y, y_stride are vectors + * x, y, z, y_stride, z_stride are vectors */ LLVMValueRef lp_build_sample_offset(struct lp_build_context *bld, const struct util_format_description *format_desc, LLVMValueRef x, LLVMValueRef y, + LLVMValueRef z, LLVMValueRef y_stride, - LLVMValueRef data_ptr) + LLVMValueRef z_stride) { LLVMValueRef x_stride; LLVMValueRef offset; @@ -153,6 +184,10 @@ lp_build_sample_offset(struct lp_build_context *bld, LLVMValueRef y_offset_lo, y_offset_hi; LLVMValueRef offset_lo, offset_hi; + /* XXX 1D & 3D addressing not done yet */ + assert(!z); + assert(!z_stride); + x_lo = LLVMBuildAnd(bld->builder, x, bld->one, ""); y_lo = LLVMBuildAnd(bld->builder, y, bld->one, ""); @@ -176,13 +211,17 @@ lp_build_sample_offset(struct lp_build_context *bld, offset = lp_build_add(bld, offset_hi, offset_lo); } else { - LLVMValueRef x_offset; - LLVMValueRef y_offset; + offset = lp_build_mul(bld, x, x_stride); - x_offset = lp_build_mul(bld, x, x_stride); - y_offset = lp_build_mul(bld, y, y_stride); + if (y && y_stride) { + LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride); + offset = lp_build_add(bld, offset, y_offset); + } - offset = lp_build_add(bld, x_offset, y_offset); + if (z && z_stride) { + LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride); + offset = lp_build_add(bld, offset, z_offset); + } } return offset; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index 39edcf13d1..7f08bfaac1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -36,7 +36,7 @@ #define LP_BLD_SAMPLE_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct pipe_texture; struct pipe_sampler_state; @@ -70,6 +70,8 @@ struct lp_sampler_static_state unsigned compare_mode:1; unsigned compare_func:3; unsigned normalized_coords:1; + float lod_bias, min_lod, max_lod; + float border_color[4]; }; @@ -98,10 +100,22 @@ struct lp_sampler_dynamic_state LLVMBuilderRef builder, unsigned unit); + /** Obtain the base texture depth. */ LLVMValueRef - (*stride)( struct lp_sampler_dynamic_state *state, - LLVMBuilderRef builder, - unsigned unit); + (*depth)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + /** Obtain the number of mipmap levels (minus one). */ + LLVMValueRef + (*last_level)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + LLVMValueRef + (*row_stride)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); LLVMValueRef (*data_ptr)( struct lp_sampler_dynamic_state *state, @@ -134,8 +148,9 @@ lp_build_sample_offset(struct lp_build_context *bld, const struct util_format_description *format_desc, LLVMValueRef x, LLVMValueRef y, + LLVMValueRef z, LLVMValueRef y_stride, - LLVMValueRef data_ptr); + LLVMValueRef z_stride); void diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index e268862282..9741dbb389 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -48,6 +48,7 @@ #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" #include "lp_bld_pack.h" +#include "lp_bld_flow.h" #include "lp_bld_format.h" #include "lp_bld_sample.h" @@ -65,11 +66,23 @@ struct lp_build_sample_context const struct util_format_description *format_desc; + /** regular scalar float type */ + struct lp_type float_type; + struct lp_build_context float_bld; + + /** regular scalar float type */ + struct lp_type int_type; + struct lp_build_context int_bld; + /** Incoming coordinates type and build context */ struct lp_type coord_type; struct lp_build_context coord_bld; - /** Integer coordinates */ + /** Unsigned integer coordinates */ + struct lp_type uint_coord_type; + struct lp_build_context uint_coord_bld; + + /** Signed integer coordinates */ struct lp_type int_coord_type; struct lp_build_context int_coord_bld; @@ -79,36 +92,212 @@ struct lp_build_sample_context }; +/** + * Does the given texture wrap mode allow sampling the texture border color? + * XXX maybe move this into gallium util code. + */ +static boolean +wrap_mode_uses_border_color(unsigned mode) +{ + switch (mode) { + case PIPE_TEX_WRAP_REPEAT: + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_REPEAT: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return FALSE; + case PIPE_TEX_WRAP_CLAMP: + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return TRUE; + default: + assert(0 && "unexpected wrap mode"); + return FALSE; + } +} + + +static LLVMValueRef +lp_build_get_mipmap_level(struct lp_build_sample_context *bld, + LLVMValueRef data_array, LLVMValueRef level) +{ + LLVMValueRef indexes[2], data_ptr; + indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indexes[1] = level; + data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, ""); + data_ptr = LLVMBuildLoad(bld->builder, data_ptr, ""); + return data_ptr; +} + + +static LLVMValueRef +lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, + LLVMValueRef data_array, int level) +{ + LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0); + return lp_build_get_mipmap_level(bld, data_array, lvl); +} + + +/** + * Dereference stride_array[mipmap_level] array to get a stride. + * Return stride as a vector. + */ +static LLVMValueRef +lp_build_get_level_stride_vec(struct lp_build_sample_context *bld, + LLVMValueRef stride_array, LLVMValueRef level) +{ + LLVMValueRef indexes[2], stride; + indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indexes[1] = level; + stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, ""); + stride = LLVMBuildLoad(bld->builder, stride, ""); + stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride); + return stride; +} + + +/** Dereference stride_array[0] array to get a stride (as vector). */ +static LLVMValueRef +lp_build_get_const_level_stride_vec(struct lp_build_sample_context *bld, + LLVMValueRef stride_array, int level) +{ + LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0); + return lp_build_get_level_stride_vec(bld, stride_array, lvl); +} + + +static int +texture_dims(enum pipe_texture_target tex) +{ + switch (tex) { + case PIPE_TEXTURE_1D: + return 1; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_CUBE: + return 2; + case PIPE_TEXTURE_3D: + return 3; + default: + assert(0 && "bad texture target in texture_dims()"); + return 2; + } +} + + + +/** + * Generate code to fetch a texel from a texture at int coords (x, y, z). + * The computation depends on whether the texture is 1D, 2D or 3D. + * The result, texel, will be: + * texel[0] = red values + * texel[1] = green values + * texel[2] = blue values + * texel[3] = alpha values + */ static void lp_build_sample_texel_soa(struct lp_build_sample_context *bld, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef depth, LLVMValueRef x, LLVMValueRef y, + LLVMValueRef z, LLVMValueRef y_stride, + LLVMValueRef z_stride, LLVMValueRef data_ptr, LLVMValueRef *texel) { + const int dims = texture_dims(bld->static_state->target); + struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMValueRef offset; LLVMValueRef packed; + LLVMValueRef use_border = NULL; + + /* use_border = x < 0 || x >= width || y < 0 || y >= height */ + if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) { + LLVMValueRef b1, b2; + b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero); + b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width); + use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2"); + } + + if (dims >= 2 && wrap_mode_uses_border_color(bld->static_state->wrap_t)) { + LLVMValueRef b1, b2; + b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero); + b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height); + if (use_border) { + use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1"); + use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2"); + } + else { + use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2"); + } + } + + if (dims == 3 && wrap_mode_uses_border_color(bld->static_state->wrap_r)) { + LLVMValueRef b1, b2; + b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero); + b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth); + if (use_border) { + use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1"); + use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2"); + } + else { + use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2"); + } + } - offset = lp_build_sample_offset(&bld->int_coord_bld, + /* + * Note: if we find an app which frequently samples the texture border + * we might want to implement a true conditional here to avoid sampling + * the texture whenever possible (since that's quite a bit of code). + * Ex: + * if (use_border) { + * texel = border_color; + * } + * else { + * texel = sample_texture(coord); + * } + * As it is now, we always sample the texture, then selectively replace + * the texel color results with the border color. + */ + + /* convert x,y,z coords to linear offset from start of texture, in bytes */ + offset = lp_build_sample_offset(&bld->uint_coord_bld, bld->format_desc, - x, y, y_stride, - data_ptr); + x, y, z, y_stride, z_stride); assert(bld->format_desc->block.width == 1); assert(bld->format_desc->block.height == 1); assert(bld->format_desc->block.bits <= bld->texel_type.width); + /* gather the texels from the texture */ packed = lp_build_gather(bld->builder, bld->texel_type.length, bld->format_desc->block.bits, bld->texel_type.width, data_ptr, offset); + texel[0] = texel[1] = texel[2] = texel[3] = NULL; + + /* convert texels to float rgba */ lp_build_unpack_rgba_soa(bld->builder, bld->format_desc, bld->texel_type, packed, texel); + + if (use_border) { + /* select texel color or border color depending on use_border */ + int chan; + for (chan = 0; chan < 4; chan++) { + LLVMValueRef border_chan = + lp_build_const_scalar(bld->texel_type, + bld->static_state->border_color[chan]); + texel[chan] = lp_build_select(&bld->texel_bld, use_border, + border_chan, texel[chan]); + } + } } @@ -117,19 +306,22 @@ lp_build_sample_packed(struct lp_build_sample_context *bld, LLVMValueRef x, LLVMValueRef y, LLVMValueRef y_stride, - LLVMValueRef data_ptr) + LLVMValueRef data_array) { LLVMValueRef offset; + LLVMValueRef data_ptr; - offset = lp_build_sample_offset(&bld->int_coord_bld, + offset = lp_build_sample_offset(&bld->uint_coord_bld, bld->format_desc, - x, y, y_stride, - data_ptr); + x, y, NULL, y_stride, NULL); assert(bld->format_desc->block.width == 1); assert(bld->format_desc->block.height == 1); assert(bld->format_desc->block.bits <= bld->texel_type.width); + /* get pointer to mipmap level 0 data */ + data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0); + return lp_build_gather(bld->builder, bld->texel_type.length, bld->format_desc->block.bits, @@ -138,17 +330,77 @@ lp_build_sample_packed(struct lp_build_sample_context *bld, } +/** + * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes. + */ static LLVMValueRef -lp_build_sample_wrap(struct lp_build_sample_context *bld, - LLVMValueRef coord, - LLVMValueRef length, - boolean is_pot, - unsigned wrap_mode) +lp_build_coord_mirror(struct lp_build_sample_context *bld, + LLVMValueRef coord) { + struct lp_build_context *coord_bld = &bld->coord_bld; + struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMValueRef fract, flr, isOdd; + + /* fract = coord - floor(coord) */ + fract = lp_build_sub(coord_bld, coord, lp_build_floor(coord_bld, coord)); + + /* flr = ifloor(coord); */ + flr = lp_build_ifloor(coord_bld, coord); + + /* isOdd = flr & 1 */ + isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, ""); + + /* make coord positive or negative depending on isOdd */ + coord = lp_build_set_sign(coord_bld, fract, isOdd); + + /* convert isOdd to float */ + isOdd = lp_build_int_to_float(coord_bld, isOdd); + + /* add isOdd to coord */ + coord = lp_build_add(coord_bld, coord, isOdd); + + return coord; +} + + +/** + * We only support a few wrap modes in lp_build_sample_wrap_int() at this time. + * Return whether the given mode is supported by that function. + */ +static boolean +is_simple_wrap_mode(unsigned mode) +{ + switch (mode) { + case PIPE_TEX_WRAP_REPEAT: + case PIPE_TEX_WRAP_CLAMP: + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return TRUE; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + default: + return FALSE; + } +} + + +/** + * Build LLVM code for texture wrap mode, for scaled integer texcoords. + * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size + * \param length the texture size along one dimension + * \param is_pot if TRUE, length is a power of two + * \param wrap_mode one of PIPE_TEX_WRAP_x + */ +static LLVMValueRef +lp_build_sample_wrap_int(struct lp_build_sample_context *bld, + LLVMValueRef coord, + LLVMValueRef length, + boolean is_pot, + unsigned wrap_mode) +{ + struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMValueRef length_minus_one; - length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); + length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); switch(wrap_mode) { case PIPE_TEX_WRAP_REPEAT: @@ -161,12 +413,12 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld, break; case PIPE_TEX_WRAP_CLAMP: + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); coord = lp_build_min(int_coord_bld, coord, length_minus_one); break; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: case PIPE_TEX_WRAP_MIRROR_REPEAT: case PIPE_TEX_WRAP_MIRROR_CLAMP: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: @@ -174,8 +426,8 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld, /* FIXME */ _debug_printf("llvmpipe: failed to translate texture wrap mode %s\n", util_dump_tex_wrap(wrap_mode, TRUE)); - coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); - coord = lp_build_min(int_coord_bld, coord, length_minus_one); + coord = lp_build_max(uint_coord_bld, coord, uint_coord_bld->zero); + coord = lp_build_min(uint_coord_bld, coord, length_minus_one); break; default: @@ -186,92 +438,1270 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld, } +/** + * Build LLVM code for texture wrap mode for linear filtering. + * \param x0_out returns first integer texcoord + * \param x1_out returns second integer texcoord + * \param weight_out returns linear interpolation weight + */ static void -lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld, - LLVMValueRef s, - LLVMValueRef t, - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef stride, - LLVMValueRef data_ptr, - LLVMValueRef *texel) +lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, + LLVMValueRef coord, + LLVMValueRef length, + boolean is_pot, + unsigned wrap_mode, + LLVMValueRef *x0_out, + LLVMValueRef *x1_out, + LLVMValueRef *weight_out) { - LLVMValueRef x; - LLVMValueRef y; + struct lp_build_context *coord_bld = &bld->coord_bld; + struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; + LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0); + LLVMValueRef half = lp_build_const_scalar(coord_bld->type, 0.5); + LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length); + LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); + LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one); + LLVMValueRef coord0, coord1, weight; + + switch(wrap_mode) { + case PIPE_TEX_WRAP_REPEAT: + /* mul by size and subtract 0.5 */ + coord = lp_build_mul(coord_bld, coord, length_f); + coord = lp_build_sub(coord_bld, coord, half); + /* convert to int */ + coord0 = lp_build_ifloor(coord_bld, coord); + coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one); + /* compute lerp weight */ + weight = lp_build_fract(coord_bld, coord); + /* repeat wrap */ + if (is_pot) { + coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, ""); + coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, ""); + } + else { + /* Signed remainder won't give the right results for negative + * dividends but unsigned remainder does.*/ + coord0 = LLVMBuildURem(bld->builder, coord0, length, ""); + coord1 = LLVMBuildURem(bld->builder, coord1, length, ""); + } + break; - x = lp_build_ifloor(&bld->coord_bld, s); - y = lp_build_ifloor(&bld->coord_bld, t); - lp_build_name(x, "tex.x.floor"); - lp_build_name(y, "tex.y.floor"); + case PIPE_TEX_WRAP_CLAMP: + if (bld->static_state->normalized_coords) { + coord = lp_build_mul(coord_bld, coord, length_f); + } + weight = lp_build_fract(coord_bld, coord); + coord0 = lp_build_clamp(coord_bld, coord, coord_bld->zero, + length_f_minus_one); + coord1 = lp_build_add(coord_bld, coord, coord_bld->one); + coord1 = lp_build_clamp(coord_bld, coord1, coord_bld->zero, + length_f_minus_one); + coord0 = lp_build_ifloor(coord_bld, coord0); + coord1 = lp_build_ifloor(coord_bld, coord1); + break; - x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s); - y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t); - lp_build_name(x, "tex.x.wrapped"); - lp_build_name(y, "tex.y.wrapped"); + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + if (bld->static_state->normalized_coords) { + /* clamp to [0,1] */ + coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one); + /* mul by tex size and subtract 0.5 */ + coord = lp_build_mul(coord_bld, coord, length_f); + coord = lp_build_sub(coord_bld, coord, half); + } + else { + LLVMValueRef min, max; + /* clamp to [0.5, length - 0.5] */ + min = lp_build_const_scalar(coord_bld->type, 0.5F); + max = lp_build_sub(coord_bld, length_f, min); + coord = lp_build_clamp(coord_bld, coord, min, max); + } + /* compute lerp weight */ + weight = lp_build_fract(coord_bld, coord); + /* coord0 = floor(coord); */ + coord0 = lp_build_ifloor(coord_bld, coord); + coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); + /* coord0 = max(coord0, 0) */ + coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero); + /* coord1 = min(coord1, length-1) */ + coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); + break; + + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + { + LLVMValueRef min, max; + if (bld->static_state->normalized_coords) { + /* min = -1.0 / (2 * length) = -0.5 / length */ + min = lp_build_mul(coord_bld, + lp_build_const_scalar(coord_bld->type, -0.5F), + lp_build_rcp(coord_bld, length_f)); + /* max = 1.0 - min */ + max = lp_build_sub(coord_bld, coord_bld->one, min); + /* coord = clamp(coord, min, max) */ + coord = lp_build_clamp(coord_bld, coord, min, max); + /* scale coord to length (and sub 0.5?) */ + coord = lp_build_mul(coord_bld, coord, length_f); + coord = lp_build_sub(coord_bld, coord, half); + } + else { + /* clamp to [-0.5, length + 0.5] */ + min = lp_build_const_scalar(coord_bld->type, -0.5F); + max = lp_build_sub(coord_bld, length_f, min); + coord = lp_build_clamp(coord_bld, coord, min, max); + coord = lp_build_sub(coord_bld, coord, half); + } + /* compute lerp weight */ + weight = lp_build_fract(coord_bld, coord); + /* convert to int */ + coord0 = lp_build_ifloor(coord_bld, coord); + coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); + } + break; + + case PIPE_TEX_WRAP_MIRROR_REPEAT: + /* compute mirror function */ + coord = lp_build_coord_mirror(bld, coord); + + /* scale coord to length */ + coord = lp_build_mul(coord_bld, coord, length_f); + coord = lp_build_sub(coord_bld, coord, half); + + /* compute lerp weight */ + weight = lp_build_fract(coord_bld, coord); + + /* convert to int coords */ + coord0 = lp_build_ifloor(coord_bld, coord); + coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); - lp_build_sample_texel_soa(bld, x, y, stride, data_ptr, texel); + /* coord0 = max(coord0, 0) */ + coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero); + /* coord1 = min(coord1, length-1) */ + coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); + break; + + case PIPE_TEX_WRAP_MIRROR_CLAMP: + { + LLVMValueRef min, max; + /* min = 1.0 / (2 * length) */ + min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f)); + /* max = 1.0 - min */ + max = lp_build_sub(coord_bld, coord_bld->one, min); + + coord = lp_build_abs(coord_bld, coord); + coord = lp_build_clamp(coord_bld, coord, min, max); + coord = lp_build_mul(coord_bld, coord, length_f); + if(0)coord = lp_build_sub(coord_bld, coord, half); + weight = lp_build_fract(coord_bld, coord); + coord0 = lp_build_ifloor(coord_bld, coord); + coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); + } + break; + + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + { + LLVMValueRef min, max; + /* min = 1.0 / (2 * length) */ + min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f)); + /* max = 1.0 - min */ + max = lp_build_sub(coord_bld, coord_bld->one, min); + + coord = lp_build_abs(coord_bld, coord); + coord = lp_build_clamp(coord_bld, coord, min, max); + coord = lp_build_mul(coord_bld, coord, length_f); + coord = lp_build_sub(coord_bld, coord, half); + weight = lp_build_fract(coord_bld, coord); + coord0 = lp_build_ifloor(coord_bld, coord); + coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); + } + break; + + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + { + LLVMValueRef min, max; + /* min = -1.0 / (2 * length) = -0.5 / length */ + min = lp_build_mul(coord_bld, + lp_build_const_scalar(coord_bld->type, -0.5F), + lp_build_rcp(coord_bld, length_f)); + /* max = 1.0 - min */ + max = lp_build_sub(coord_bld, coord_bld->one, min); + + coord = lp_build_abs(coord_bld, coord); + coord = lp_build_clamp(coord_bld, coord, min, max); + coord = lp_build_mul(coord_bld, coord, length_f); + coord = lp_build_sub(coord_bld, coord, half); + weight = lp_build_fract(coord_bld, coord); + coord0 = lp_build_ifloor(coord_bld, coord); + coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); + } + break; + + default: + assert(0); + coord0 = NULL; + coord1 = NULL; + weight = NULL; + } + + *x0_out = coord0; + *x1_out = coord1; + *weight_out = weight; } +/** + * Build LLVM code for texture wrap mode for nearest filtering. + * \param coord the incoming texcoord (nominally in [0,1]) + * \param length the texture size along one dimension, as int + * \param is_pot if TRUE, length is a power of two + * \param wrap_mode one of PIPE_TEX_WRAP_x + */ +static LLVMValueRef +lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, + LLVMValueRef coord, + LLVMValueRef length, + boolean is_pot, + unsigned wrap_mode) +{ + struct lp_build_context *coord_bld = &bld->coord_bld; + struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; + LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0); + LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length); + LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); + LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one); + LLVMValueRef icoord; + + switch(wrap_mode) { + case PIPE_TEX_WRAP_REPEAT: + coord = lp_build_mul(coord_bld, coord, length_f); + icoord = lp_build_ifloor(coord_bld, coord); + if (is_pot) + icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, ""); + else + /* Signed remainder won't give the right results for negative + * dividends but unsigned remainder does.*/ + icoord = LLVMBuildURem(bld->builder, icoord, length, ""); + break; + + case PIPE_TEX_WRAP_CLAMP: + /* mul by size */ + if (bld->static_state->normalized_coords) { + coord = lp_build_mul(coord_bld, coord, length_f); + } + /* floor */ + icoord = lp_build_ifloor(coord_bld, coord); + /* clamp to [0, size-1]. Note: int coord builder type */ + icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero, + length_minus_one); + break; + + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + { + LLVMValueRef min, max; + if (bld->static_state->normalized_coords) { + /* min = 1.0 / (2 * length) */ + min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f)); + /* max = length - min */ + max = lp_build_sub(coord_bld, length_f, min); + /* scale coord to length */ + coord = lp_build_mul(coord_bld, coord, length_f); + } + else { + /* clamp to [0.5, length - 0.5] */ + min = lp_build_const_scalar(coord_bld->type, 0.5F); + max = lp_build_sub(coord_bld, length_f, min); + } + /* coord = clamp(coord, min, max) */ + coord = lp_build_clamp(coord_bld, coord, min, max); + icoord = lp_build_ifloor(coord_bld, coord); + } + break; + + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + /* Note: this is the same as CLAMP_TO_EDGE, except min = -min */ + { + LLVMValueRef min, max; + if (bld->static_state->normalized_coords) { + /* min = -1.0 / (2 * length) = -0.5 / length */ + min = lp_build_mul(coord_bld, + lp_build_const_scalar(coord_bld->type, -0.5F), + lp_build_rcp(coord_bld, length_f)); + /* max = length - min */ + max = lp_build_sub(coord_bld, length_f, min); + /* scale coord to length */ + coord = lp_build_mul(coord_bld, coord, length_f); + } + else { + /* clamp to [-0.5, length + 0.5] */ + min = lp_build_const_scalar(coord_bld->type, -0.5F); + max = lp_build_sub(coord_bld, length_f, min); + } + /* coord = clamp(coord, min, max) */ + coord = lp_build_clamp(coord_bld, coord, min, max); + icoord = lp_build_ifloor(coord_bld, coord); + } + break; + + case PIPE_TEX_WRAP_MIRROR_REPEAT: + { + LLVMValueRef min, max; + /* min = 1.0 / (2 * length) */ + min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f)); + /* max = length - min */ + max = lp_build_sub(coord_bld, length_f, min); + + /* compute mirror function */ + coord = lp_build_coord_mirror(bld, coord); + + /* scale coord to length */ + coord = lp_build_mul(coord_bld, coord, length_f); + + /* coord = clamp(coord, min, max) */ + coord = lp_build_clamp(coord_bld, coord, min, max); + icoord = lp_build_ifloor(coord_bld, coord); + } + break; + + case PIPE_TEX_WRAP_MIRROR_CLAMP: + coord = lp_build_abs(coord_bld, coord); + coord = lp_build_mul(coord_bld, coord, length_f); + coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f_minus_one); + icoord = lp_build_ifloor(coord_bld, coord); + break; + + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + { + LLVMValueRef min, max; + /* min = 1.0 / (2 * length) */ + min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f)); + /* max = length - min */ + max = lp_build_sub(coord_bld, length_f, min); + + coord = lp_build_abs(coord_bld, coord); + coord = lp_build_mul(coord_bld, coord, length_f); + coord = lp_build_clamp(coord_bld, coord, min, max); + icoord = lp_build_ifloor(coord_bld, coord); + } + break; + + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + { + LLVMValueRef min, max; + /* min = 1.0 / (2 * length) */ + min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f)); + min = lp_build_negate(coord_bld, min); + /* max = length - min */ + max = lp_build_sub(coord_bld, length_f, min); + + coord = lp_build_abs(coord_bld, coord); + coord = lp_build_mul(coord_bld, coord, length_f); + coord = lp_build_clamp(coord_bld, coord, min, max); + icoord = lp_build_ifloor(coord_bld, coord); + } + break; + + default: + assert(0); + icoord = NULL; + } + + return icoord; +} + + +/** + * Codegen equivalent for u_minify(). + * Return max(1, base_size >> level); + */ +static LLVMValueRef +lp_build_minify(struct lp_build_sample_context *bld, + LLVMValueRef base_size, + LLVMValueRef level) +{ + LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify"); + size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one); + return size; +} + + +/** + * Generate code to compute texture level of detail (lambda). + * \param s vector of texcoord s values + * \param t vector of texcoord t values + * \param r vector of texcoord r values + * \param width scalar int texture width + * \param height scalar int texture height + * \param depth scalar int texture depth + */ +static LLVMValueRef +lp_build_lod_selector(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef depth) + +{ + if (bld->static_state->min_lod == bld->static_state->max_lod) { + /* User is forcing sampling from a particular mipmap level. + * This is hit during mipmap generation. + */ + return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod); + } + else { + const int dims = texture_dims(bld->static_state->target); + struct lp_build_context *float_bld = &bld->float_bld; + LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(), + bld->static_state->lod_bias); + LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), + bld->static_state->min_lod); + LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), + bld->static_state->max_lod); + + LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + + LLVMValueRef s0, s1, s2; + LLVMValueRef t0, t1, t2; + LLVMValueRef r0, r1, r2; + LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy; + LLVMValueRef rho, lod; + + /* + * dsdx = abs(s[1] - s[0]); + * dsdy = abs(s[2] - s[0]); + * dtdx = abs(t[1] - t[0]); + * dtdy = abs(t[2] - t[0]); + * drdx = abs(r[1] - r[0]); + * drdy = abs(r[2] - r[0]); + * XXX we're assuming a four-element quad in 2x2 layout here. + */ + s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0"); + s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1"); + s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2"); + dsdx = LLVMBuildSub(bld->builder, s1, s0, ""); + dsdx = lp_build_abs(float_bld, dsdx); + dsdy = LLVMBuildSub(bld->builder, s2, s0, ""); + dsdy = lp_build_abs(float_bld, dsdy); + if (dims > 1) { + t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0"); + t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1"); + t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2"); + dtdx = LLVMBuildSub(bld->builder, t1, t0, ""); + dtdx = lp_build_abs(float_bld, dtdx); + dtdy = LLVMBuildSub(bld->builder, t2, t0, ""); + dtdy = lp_build_abs(float_bld, dtdy); + if (dims > 2) { + r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0"); + r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1"); + r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2"); + drdx = LLVMBuildSub(bld->builder, r1, r0, ""); + drdx = lp_build_abs(float_bld, drdx); + drdy = LLVMBuildSub(bld->builder, r2, r0, ""); + drdy = lp_build_abs(float_bld, drdy); + } + } + + /* Compute rho = max of all partial derivatives scaled by texture size. + * XXX this could be vectorized somewhat + */ + rho = LLVMBuildMul(bld->builder, + lp_build_max(float_bld, dsdx, dsdy), + lp_build_int_to_float(float_bld, width), ""); + if (dims > 1) { + LLVMValueRef max; + max = LLVMBuildMul(bld->builder, + lp_build_max(float_bld, dtdx, dtdy), + lp_build_int_to_float(float_bld, height), ""); + rho = lp_build_max(float_bld, rho, max); + if (dims > 2) { + max = LLVMBuildMul(bld->builder, + lp_build_max(float_bld, drdx, drdy), + lp_build_int_to_float(float_bld, depth), ""); + rho = lp_build_max(float_bld, rho, max); + } + } + + /* compute lod = log2(rho) */ + lod = lp_build_log2(float_bld, rho); + + /* add lod bias */ + lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias"); + + /* clamp lod */ + lod = lp_build_clamp(float_bld, lod, min_lod, max_lod); + + return lod; + } +} + + +/** + * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer + * mipmap level index. + * Note: this is all scalar code. + * \param lod scalar float texture level of detail + * \param level_out returns integer + */ static void -lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, +lp_build_nearest_mip_level(struct lp_build_sample_context *bld, + unsigned unit, + LLVMValueRef lod, + LLVMValueRef *level_out) +{ + struct lp_build_context *float_bld = &bld->float_bld; + struct lp_build_context *int_bld = &bld->int_bld; + LLVMValueRef last_level, level; + + LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0); + + last_level = bld->dynamic_state->last_level(bld->dynamic_state, + bld->builder, unit); + + /* convert float lod to integer */ + level = lp_build_iround(float_bld, lod); + + /* clamp level to legal range of levels */ + *level_out = lp_build_clamp(int_bld, level, zero, last_level); +} + + +/** + * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to + * two (adjacent) mipmap level indexes. Later, we'll sample from those + * two mipmap levels and interpolate between them. + */ +static void +lp_build_linear_mip_levels(struct lp_build_sample_context *bld, + unsigned unit, + LLVMValueRef lod, + LLVMValueRef *level0_out, + LLVMValueRef *level1_out, + LLVMValueRef *weight_out) +{ + struct lp_build_context *float_bld = &bld->float_bld; + struct lp_build_context *int_bld = &bld->int_bld; + LLVMValueRef last_level, level; + + last_level = bld->dynamic_state->last_level(bld->dynamic_state, + bld->builder, unit); + + /* convert float lod to integer */ + level = lp_build_ifloor(float_bld, lod); + + /* compute level 0 and clamp to legal range of levels */ + *level0_out = lp_build_clamp(int_bld, level, + int_bld->zero, + last_level); + /* compute level 1 and clamp to legal range of levels */ + *level1_out = lp_build_add(int_bld, *level0_out, int_bld->one); + *level1_out = lp_build_min(int_bld, *level1_out, last_level); + + *weight_out = lp_build_fract(float_bld, lod); +} + + +/** + * Generate code to sample a mipmap level with nearest filtering. + * If sampling a cube texture, r = cube face in [0,5]. + */ +static void +lp_build_sample_image_nearest(struct lp_build_sample_context *bld, + LLVMValueRef width_vec, + LLVMValueRef height_vec, + LLVMValueRef depth_vec, + LLVMValueRef row_stride_vec, + LLVMValueRef img_stride_vec, + LLVMValueRef data_ptr, LLVMValueRef s, LLVMValueRef t, - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef stride, - LLVMValueRef data_ptr, - LLVMValueRef *texel) + LLVMValueRef r, + LLVMValueRef colors_out[4]) { - LLVMValueRef half; - LLVMValueRef s_ipart; - LLVMValueRef t_ipart; - LLVMValueRef s_fpart; - LLVMValueRef t_fpart; - LLVMValueRef x0, x1; - LLVMValueRef y0, y1; + const int dims = texture_dims(bld->static_state->target); + LLVMValueRef x, y, z; + + /* + * Compute integer texcoords. + */ + x = lp_build_sample_wrap_nearest(bld, s, width_vec, + bld->static_state->pot_width, + bld->static_state->wrap_s); + lp_build_name(x, "tex.x.wrapped"); + + if (dims >= 2) { + y = lp_build_sample_wrap_nearest(bld, t, height_vec, + bld->static_state->pot_height, + bld->static_state->wrap_t); + lp_build_name(y, "tex.y.wrapped"); + + if (dims == 3) { + z = lp_build_sample_wrap_nearest(bld, r, depth_vec, + bld->static_state->pot_height, + bld->static_state->wrap_r); + lp_build_name(z, "tex.z.wrapped"); + } + else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { + z = r; + } + else { + z = NULL; + } + } + else { + y = z = NULL; + } + + /* + * Get texture colors. + */ + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x, y, z, + row_stride_vec, img_stride_vec, + data_ptr, colors_out); +} + + +/** + * Generate code to sample a mipmap level with linear filtering. + * If sampling a cube texture, r = cube face in [0,5]. + */ +static void +lp_build_sample_image_linear(struct lp_build_sample_context *bld, + LLVMValueRef width_vec, + LLVMValueRef height_vec, + LLVMValueRef depth_vec, + LLVMValueRef row_stride_vec, + LLVMValueRef img_stride_vec, + LLVMValueRef data_ptr, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef colors_out[4]) +{ + const int dims = texture_dims(bld->static_state->target); + LLVMValueRef x0, y0, z0, x1, y1, z1; + LLVMValueRef s_fpart, t_fpart, r_fpart; LLVMValueRef neighbors[2][2][4]; - unsigned chan; + int chan; + + /* + * Compute integer texcoords. + */ + lp_build_sample_wrap_linear(bld, s, width_vec, + bld->static_state->pot_width, + bld->static_state->wrap_s, + &x0, &x1, &s_fpart); + lp_build_name(x0, "tex.x0.wrapped"); + lp_build_name(x1, "tex.x1.wrapped"); + + if (dims >= 2) { + lp_build_sample_wrap_linear(bld, t, height_vec, + bld->static_state->pot_height, + bld->static_state->wrap_t, + &y0, &y1, &t_fpart); + lp_build_name(y0, "tex.y0.wrapped"); + lp_build_name(y1, "tex.y1.wrapped"); + + if (dims == 3) { + lp_build_sample_wrap_linear(bld, r, depth_vec, + bld->static_state->pot_depth, + bld->static_state->wrap_r, + &z0, &z1, &r_fpart); + lp_build_name(z0, "tex.z0.wrapped"); + lp_build_name(z1, "tex.z1.wrapped"); + } + else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { + z0 = z1 = r; /* cube face */ + r_fpart = NULL; + } + else { + z0 = z1 = NULL; + r_fpart = NULL; + } + } + else { + y0 = y1 = t_fpart = NULL; + z0 = z1 = r_fpart = NULL; + } + + /* + * Get texture colors. + */ + /* get x0/x1 texels */ + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x0, y0, z0, + row_stride_vec, img_stride_vec, + data_ptr, neighbors[0][0]); + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x1, y0, z0, + row_stride_vec, img_stride_vec, + data_ptr, neighbors[0][1]); + + if (dims == 1) { + /* Interpolate two samples from 1D image to produce one color */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart, + neighbors[0][0][chan], + neighbors[0][1][chan]); + } + } + else { + /* 2D/3D texture */ + LLVMValueRef colors0[4]; + + /* get x0/x1 texels at y1 */ + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x0, y1, z0, + row_stride_vec, img_stride_vec, + data_ptr, neighbors[1][0]); + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x1, y1, z0, + row_stride_vec, img_stride_vec, + data_ptr, neighbors[1][1]); + + /* Bilinear interpolate the four samples from the 2D image / 3D slice */ + for (chan = 0; chan < 4; chan++) { + colors0[chan] = lp_build_lerp_2d(&bld->texel_bld, + s_fpart, t_fpart, + neighbors[0][0][chan], + neighbors[0][1][chan], + neighbors[1][0][chan], + neighbors[1][1][chan]); + } + + if (dims == 3) { + LLVMValueRef neighbors1[2][2][4]; + LLVMValueRef colors1[4]; + + /* get x0/x1/y0/y1 texels at z1 */ + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x0, y0, z1, + row_stride_vec, img_stride_vec, + data_ptr, neighbors1[0][0]); + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x1, y0, z1, + row_stride_vec, img_stride_vec, + data_ptr, neighbors1[0][1]); + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x0, y1, z1, + row_stride_vec, img_stride_vec, + data_ptr, neighbors1[1][0]); + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x1, y1, z1, + row_stride_vec, img_stride_vec, + data_ptr, neighbors1[1][1]); + + /* Bilinear interpolate the four samples from the second Z slice */ + for (chan = 0; chan < 4; chan++) { + colors1[chan] = lp_build_lerp_2d(&bld->texel_bld, + s_fpart, t_fpart, + neighbors1[0][0][chan], + neighbors1[0][1][chan], + neighbors1[1][0][chan], + neighbors1[1][1][chan]); + } + + /* Linearly interpolate the two samples from the two 3D slices */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = lp_build_lerp(&bld->texel_bld, + r_fpart, + colors0[chan], colors1[chan]); + } + } + else { + /* 2D tex */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = colors0[chan]; + } + } + } +} - half = lp_build_const_scalar(bld->coord_type, 0.5); - s = lp_build_sub(&bld->coord_bld, s, half); - t = lp_build_sub(&bld->coord_bld, t, half); - s_ipart = lp_build_floor(&bld->coord_bld, s); - t_ipart = lp_build_floor(&bld->coord_bld, t); +/** Helper used by lp_build_cube_lookup() */ +static LLVMValueRef +lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord) +{ + /* ima = -0.5 / abs(coord); */ + LLVMValueRef negHalf = lp_build_const_scalar(coord_bld->type, -0.5); + LLVMValueRef absCoord = lp_build_abs(coord_bld, coord); + LLVMValueRef ima = lp_build_mul(coord_bld, negHalf, + lp_build_rcp(coord_bld, absCoord)); + return ima; +} - s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart); - t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart); - x0 = lp_build_itrunc(&bld->coord_bld, s_ipart); - y0 = lp_build_itrunc(&bld->coord_bld, t_ipart); +/** + * Helper used by lp_build_cube_lookup() + * \param sign scalar +1 or -1 + * \param coord float vector + * \param ima float vector + */ +static LLVMValueRef +lp_build_cube_coord(struct lp_build_context *coord_bld, + LLVMValueRef sign, int negate_coord, + LLVMValueRef coord, LLVMValueRef ima) +{ + /* return negate(coord) * ima * sign + 0.5; */ + LLVMValueRef half = lp_build_const_scalar(coord_bld->type, 0.5); + LLVMValueRef res; - x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s); - y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t); + assert(negate_coord == +1 || negate_coord == -1); - x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one); - y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one); + if (negate_coord == -1) { + coord = lp_build_negate(coord_bld, coord); + } - x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s); - y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t); + res = lp_build_mul(coord_bld, coord, ima); + if (sign) { + sign = lp_build_broadcast_scalar(coord_bld, sign); + res = lp_build_mul(coord_bld, res, sign); + } + res = lp_build_add(coord_bld, res, half); + + return res; +} - lp_build_sample_texel_soa(bld, x0, y0, stride, data_ptr, neighbors[0][0]); - lp_build_sample_texel_soa(bld, x1, y0, stride, data_ptr, neighbors[0][1]); - lp_build_sample_texel_soa(bld, x0, y1, stride, data_ptr, neighbors[1][0]); - lp_build_sample_texel_soa(bld, x1, y1, stride, data_ptr, neighbors[1][1]); - /* TODO: Don't interpolate missing channels */ - for(chan = 0; chan < 4; ++chan) { - texel[chan] = lp_build_lerp_2d(&bld->texel_bld, - s_fpart, t_fpart, - neighbors[0][0][chan], - neighbors[0][1][chan], - neighbors[1][0][chan], - neighbors[1][1][chan]); +/** Helper used by lp_build_cube_lookup() + * Return (major_coord >= 0) ? pos_face : neg_face; + */ +static LLVMValueRef +lp_build_cube_face(struct lp_build_sample_context *bld, + LLVMValueRef major_coord, + unsigned pos_face, unsigned neg_face) +{ + LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE, + major_coord, + bld->float_bld.zero, ""); + LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0); + LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0); + LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, ""); + return res; +} + + + +/** + * Generate code to do cube face selection and per-face texcoords. + */ +static void +lp_build_cube_lookup(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef *face, + LLVMValueRef *face_s, + LLVMValueRef *face_t) +{ + struct lp_build_context *float_bld = &bld->float_bld; + struct lp_build_context *coord_bld = &bld->coord_bld; + LLVMValueRef rx, ry, rz; + LLVMValueRef arx, ary, arz; + LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25); + LLVMValueRef arx_ge_ary, arx_ge_arz; + LLVMValueRef ary_ge_arx, ary_ge_arz; + LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz; + LLVMValueRef rx_pos, ry_pos, rz_pos; + + assert(bld->coord_bld.type.length == 4); + + /* + * Use the average of the four pixel's texcoords to choose the face. + */ + rx = lp_build_mul(float_bld, c25, + lp_build_sum_vector(&bld->coord_bld, s)); + ry = lp_build_mul(float_bld, c25, + lp_build_sum_vector(&bld->coord_bld, t)); + rz = lp_build_mul(float_bld, c25, + lp_build_sum_vector(&bld->coord_bld, r)); + + arx = lp_build_abs(float_bld, rx); + ary = lp_build_abs(float_bld, ry); + arz = lp_build_abs(float_bld, rz); + + /* + * Compare sign/magnitude of rx,ry,rz to determine face + */ + arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, ""); + arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, ""); + ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, ""); + ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, ""); + + arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, ""); + ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); + + rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, ""); + ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, ""); + rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, ""); + + { + struct lp_build_flow_context *flow_ctx; + struct lp_build_if_state if_ctx; + + flow_ctx = lp_build_flow_create(bld->builder); + lp_build_flow_scope_begin(flow_ctx); + + *face_s = bld->coord_bld.undef; + *face_t = bld->coord_bld.undef; + *face = bld->int_bld.undef; + + lp_build_name(*face_s, "face_s"); + lp_build_name(*face_t, "face_t"); + lp_build_name(*face, "face"); + + lp_build_flow_scope_declare(flow_ctx, face_s); + lp_build_flow_scope_declare(flow_ctx, face_t); + lp_build_flow_scope_declare(flow_ctx, face); + + lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz); + { + /* +/- X face */ + LLVMValueRef sign = lp_build_sgn(float_bld, rx); + LLVMValueRef ima = lp_build_cube_ima(coord_bld, s); + *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima); + *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); + *face = lp_build_cube_face(bld, rx, + PIPE_TEX_FACE_POS_X, + PIPE_TEX_FACE_NEG_X); + } + lp_build_else(&if_ctx); + { + struct lp_build_flow_context *flow_ctx2; + struct lp_build_if_state if_ctx2; + + LLVMValueRef face_s2 = bld->coord_bld.undef; + LLVMValueRef face_t2 = bld->coord_bld.undef; + LLVMValueRef face2 = bld->int_bld.undef; + + flow_ctx2 = lp_build_flow_create(bld->builder); + lp_build_flow_scope_begin(flow_ctx2); + lp_build_flow_scope_declare(flow_ctx2, &face_s2); + lp_build_flow_scope_declare(flow_ctx2, &face_t2); + lp_build_flow_scope_declare(flow_ctx2, &face2); + + ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); + + lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz); + { + /* +/- Y face */ + LLVMValueRef sign = lp_build_sgn(float_bld, ry); + LLVMValueRef ima = lp_build_cube_ima(coord_bld, t); + face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima); + face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima); + face2 = lp_build_cube_face(bld, ry, + PIPE_TEX_FACE_POS_Y, + PIPE_TEX_FACE_NEG_Y); + } + lp_build_else(&if_ctx2); + { + /* +/- Z face */ + LLVMValueRef sign = lp_build_sgn(float_bld, rz); + LLVMValueRef ima = lp_build_cube_ima(coord_bld, r); + face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima); + face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); + face2 = lp_build_cube_face(bld, rz, + PIPE_TEX_FACE_POS_Z, + PIPE_TEX_FACE_NEG_Z); + } + lp_build_endif(&if_ctx2); + lp_build_flow_scope_end(flow_ctx2); + lp_build_flow_destroy(flow_ctx2); + + *face_s = face_s2; + *face_t = face_t2; + *face = face2; + } + + lp_build_endif(&if_ctx); + lp_build_flow_scope_end(flow_ctx); + lp_build_flow_destroy(flow_ctx); } } + +/** + * Sample the texture/mipmap using given image filter and mip filter. + * data0_ptr and data1_ptr point to the two mipmap levels to sample + * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes. + * If we're using nearest miplevel sampling the '1' values will be null/unused. + */ +static void +lp_build_sample_mipmap(struct lp_build_sample_context *bld, + unsigned img_filter, + unsigned mip_filter, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef lod_fpart, + LLVMValueRef width0_vec, + LLVMValueRef width1_vec, + LLVMValueRef height0_vec, + LLVMValueRef height1_vec, + LLVMValueRef depth0_vec, + LLVMValueRef depth1_vec, + LLVMValueRef row_stride0_vec, + LLVMValueRef row_stride1_vec, + LLVMValueRef img_stride0_vec, + LLVMValueRef img_stride1_vec, + LLVMValueRef data_ptr0, + LLVMValueRef data_ptr1, + LLVMValueRef *colors_out) +{ + LLVMValueRef colors0[4], colors1[4]; + int chan; + + if (img_filter == PIPE_TEX_FILTER_NEAREST) { + lp_build_sample_image_nearest(bld, + width0_vec, height0_vec, depth0_vec, + row_stride0_vec, img_stride0_vec, + data_ptr0, s, t, r, colors0); + + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* sample the second mipmap level, and interp */ + lp_build_sample_image_nearest(bld, + width1_vec, height1_vec, depth1_vec, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, colors1); + } + } + else { + assert(img_filter == PIPE_TEX_FILTER_LINEAR); + + lp_build_sample_image_linear(bld, + width0_vec, height0_vec, depth0_vec, + row_stride0_vec, img_stride0_vec, + data_ptr0, s, t, r, colors0); + + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* sample the second mipmap level, and interp */ + lp_build_sample_image_linear(bld, + width1_vec, height1_vec, depth1_vec, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, colors1); + } + } + + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* interpolate samples from the two mipmap levels */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, + colors0[chan], colors1[chan]); + } + } + else { + /* use first/only level's colors */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = colors0[chan]; + } + } +} + + + +/** + * General texture sampling codegen. + * This function handles texture sampling for all texture targets (1D, + * 2D, 3D, cube) and all filtering modes. + */ +static void +lp_build_sample_general(struct lp_build_sample_context *bld, + unsigned unit, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef depth, + LLVMValueRef width_vec, + LLVMValueRef height_vec, + LLVMValueRef depth_vec, + LLVMValueRef row_stride_array, + LLVMValueRef img_stride_vec, + LLVMValueRef data_array, + LLVMValueRef *colors_out) +{ + struct lp_build_context *float_bld = &bld->float_bld; + const unsigned mip_filter = bld->static_state->min_mip_filter; + const unsigned min_filter = bld->static_state->min_img_filter; + const unsigned mag_filter = bld->static_state->mag_img_filter; + const int dims = texture_dims(bld->static_state->target); + LLVMValueRef lod, lod_fpart; + LLVMValueRef ilevel0, ilevel1, ilevel0_vec, ilevel1_vec; + LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL; + LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL; + LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL; + LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL; + LLVMValueRef data_ptr0, data_ptr1; + + /* + printf("%s mip %d min %d mag %d\n", __FUNCTION__, + mip_filter, min_filter, mag_filter); + */ + + /* + * Compute the level of detail (float). + */ + if (min_filter != mag_filter || + mip_filter != PIPE_TEX_MIPFILTER_NONE) { + /* Need to compute lod either to choose mipmap levels or to + * distinguish between minification/magnification with one mipmap level. + */ + lod = lp_build_lod_selector(bld, s, t, r, width, height, depth); + } + + /* + * Compute integer mipmap level(s) to fetch texels from. + */ + if (mip_filter == PIPE_TEX_MIPFILTER_NONE) { + /* always use mip level 0 */ + ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + } + else { + if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { + lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); + } + else { + assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR); + lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1, + &lod_fpart); + lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart); + } + } + + /* + * Convert scalar integer mipmap levels into vectors. + */ + ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0); + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) + ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1); + + /* + * Compute width, height at mipmap level 'ilevel0' + */ + width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec); + if (dims >= 2) { + height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec); + row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array, + ilevel0); + if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { + img_stride0_vec = lp_build_mul(&bld->int_coord_bld, + row_stride0_vec, height0_vec); + if (dims == 3) { + depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec); + } + } + } + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* compute width, height, depth for second mipmap level at 'ilevel1' */ + width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec); + if (dims >= 2) { + height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec); + row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array, + ilevel1); + if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { + img_stride1_vec = lp_build_mul(&bld->int_coord_bld, + row_stride1_vec, height1_vec); + if (dims ==3) { + depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec); + } + } + } + } + + /* + * Choose cube face, recompute per-face texcoords. + */ + if (bld->static_state->target == PIPE_TEXTURE_CUBE) { + LLVMValueRef face, face_s, face_t; + lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t); + s = face_s; /* vec */ + t = face_t; /* vec */ + /* use 'r' to indicate cube face */ + r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */ + } + + /* + * Get pointer(s) to image data for mipmap level(s). + */ + data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0); + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1); + } + + /* + * Get/interpolate texture colors. + */ + if (min_filter == mag_filter) { + /* no need to distinquish between minification and magnification */ + lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, lod_fpart, + width0_vec, width1_vec, + height0_vec, height1_vec, + depth0_vec, depth1_vec, + row_stride0_vec, row_stride1_vec, + img_stride0_vec, img_stride1_vec, + data_ptr0, data_ptr1, + colors_out); + } + else { + /* Emit conditional to choose min image filter or mag image filter + * depending on the lod being >0 or <= 0, respectively. + */ + struct lp_build_flow_context *flow_ctx; + struct lp_build_if_state if_ctx; + LLVMValueRef minify; + + flow_ctx = lp_build_flow_create(bld->builder); + lp_build_flow_scope_begin(flow_ctx); + + lp_build_flow_scope_declare(flow_ctx, &colors_out[0]); + lp_build_flow_scope_declare(flow_ctx, &colors_out[1]); + lp_build_flow_scope_declare(flow_ctx, &colors_out[2]); + lp_build_flow_scope_declare(flow_ctx, &colors_out[3]); + + /* minify = lod > 0.0 */ + minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE, + lod, float_bld->zero, ""); + + lp_build_if(&if_ctx, flow_ctx, bld->builder, minify); + { + /* Use the minification filter */ + lp_build_sample_mipmap(bld, min_filter, mip_filter, + s, t, r, lod_fpart, + width0_vec, width1_vec, + height0_vec, height1_vec, + depth0_vec, depth1_vec, + row_stride0_vec, row_stride1_vec, + img_stride0_vec, img_stride1_vec, + data_ptr0, data_ptr1, + colors_out); + } + lp_build_else(&if_ctx); + { + /* Use the magnification filter */ + lp_build_sample_mipmap(bld, mag_filter, mip_filter, + s, t, r, lod_fpart, + width0_vec, width1_vec, + height0_vec, height1_vec, + depth0_vec, depth1_vec, + row_stride0_vec, row_stride1_vec, + img_stride0_vec, img_stride1_vec, + data_ptr0, data_ptr1, + colors_out); + } + lp_build_endif(&if_ctx); + + lp_build_flow_scope_end(flow_ctx); + lp_build_flow_destroy(flow_ctx); + } +} + + + static void lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, struct lp_type dst_type, @@ -308,8 +1738,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, LLVMValueRef t, LLVMValueRef width, LLVMValueRef height, - LLVMValueRef stride, - LLVMValueRef data_ptr, + LLVMValueRef stride_array, + LLVMValueRef data_array, LLVMValueRef *texel) { LLVMBuilderRef builder = bld->builder; @@ -325,8 +1755,9 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, LLVMValueRef neighbors_hi[2][2]; LLVMValueRef packed, packed_lo, packed_hi; LLVMValueRef unswizzled[4]; + LLVMValueRef stride; - lp_build_context_init(&i32, builder, lp_type_int(32)); + lp_build_context_init(&i32, builder, lp_type_int_vec(32)); lp_build_context_init(&h16, builder, lp_type_ufixed(16)); lp_build_context_init(&u8n, builder, lp_type_unorm(8)); @@ -334,20 +1765,33 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, h16_vec_type = lp_build_vec_type(h16.type); u8n_vec_type = lp_build_vec_type(u8n.type); + if (bld->static_state->normalized_coords) { + LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type); + LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width, coord_vec_type, ""); + LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height, coord_vec_type, ""); + s = lp_build_mul(&bld->coord_bld, s, fp_width); + t = lp_build_mul(&bld->coord_bld, t, fp_height); + } + + /* scale coords by 256 (8 fractional bits) */ s = lp_build_mul_imm(&bld->coord_bld, s, 256); t = lp_build_mul_imm(&bld->coord_bld, t, 256); + /* convert float to int */ s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); + /* subtract 0.5 (add -128) */ i32_c128 = lp_build_int_const_scalar(i32.type, -128); s = LLVMBuildAdd(builder, s, i32_c128, ""); t = LLVMBuildAdd(builder, t, i32_c128, ""); + /* compute floor (shift right 8) */ i32_c8 = lp_build_int_const_scalar(i32.type, 8); s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); + /* compute fractional part (AND with 0xff) */ i32_c255 = lp_build_int_const_scalar(i32.type, 255); s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); @@ -355,14 +1799,18 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, x0 = s_ipart; y0 = t_ipart; - x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s); - y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t); - x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one); y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one); - x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s); - y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t); + x0 = lp_build_sample_wrap_int(bld, x0, width, bld->static_state->pot_width, + bld->static_state->wrap_s); + y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height, + bld->static_state->wrap_t); + + x1 = lp_build_sample_wrap_int(bld, x1, width, bld->static_state->pot_width, + bld->static_state->wrap_s); + y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height, + bld->static_state->wrap_t); /* * Transform 4 x i32 in @@ -415,6 +1863,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, ""); } + stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0); + /* * Fetch the pixels as 4 x 32bit (rgba order might differ): * @@ -432,10 +1882,10 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, * The higher 8 bits of the resulting elements will be zero. */ - neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_ptr); - neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_ptr); - neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_ptr); - neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_ptr); + neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array); + neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array); + neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array); + neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array); neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, ""); neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, ""); @@ -518,6 +1968,12 @@ lp_build_sample_compare(struct lp_build_sample_context *bld, } +/** + * Build texture sampling code. + * 'texel' will return a vector of four LLVMValueRefs corresponding to + * R, G, B, A. + * \param type vector float type to use for coords, etc. + */ void lp_build_sample_soa(LLVMBuilderRef builder, const struct lp_sampler_static_state *static_state, @@ -530,13 +1986,19 @@ lp_build_sample_soa(LLVMBuilderRef builder, LLVMValueRef *texel) { struct lp_build_sample_context bld; - LLVMValueRef width; - LLVMValueRef height; - LLVMValueRef stride; - LLVMValueRef data_ptr; + LLVMValueRef width, width_vec; + LLVMValueRef height, height_vec; + LLVMValueRef depth, depth_vec; + LLVMValueRef stride_array; + LLVMValueRef data_array; LLVMValueRef s; LLVMValueRef t; - LLVMValueRef p; + LLVMValueRef r; + + (void) lp_build_lod_selector; /* temporary to silence warning */ + (void) lp_build_nearest_mip_level; + (void) lp_build_linear_mip_levels; + (void) lp_build_minify; /* Setup our build context */ memset(&bld, 0, sizeof bld); @@ -544,54 +2006,55 @@ lp_build_sample_soa(LLVMBuilderRef builder, bld.static_state = static_state; bld.dynamic_state = dynamic_state; bld.format_desc = util_format_description(static_state->format); + + bld.float_type = lp_type_float(32); + bld.int_type = lp_type_int(32); bld.coord_type = type; + bld.uint_coord_type = lp_uint_type(type); bld.int_coord_type = lp_int_type(type); bld.texel_type = type; + + lp_build_context_init(&bld.float_bld, builder, bld.float_type); + lp_build_context_init(&bld.int_bld, builder, bld.int_type); lp_build_context_init(&bld.coord_bld, builder, bld.coord_type); + lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type); lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type); lp_build_context_init(&bld.texel_bld, builder, bld.texel_type); /* Get the dynamic state */ width = dynamic_state->width(dynamic_state, builder, unit); height = dynamic_state->height(dynamic_state, builder, unit); - stride = dynamic_state->stride(dynamic_state, builder, unit); - data_ptr = dynamic_state->data_ptr(dynamic_state, builder, unit); + depth = dynamic_state->depth(dynamic_state, builder, unit); + stride_array = dynamic_state->row_stride(dynamic_state, builder, unit); + data_array = dynamic_state->data_ptr(dynamic_state, builder, unit); + /* Note that data_array is an array[level] of pointers to texture images */ s = coords[0]; t = coords[1]; - p = coords[2]; - - width = lp_build_broadcast_scalar(&bld.int_coord_bld, width); - height = lp_build_broadcast_scalar(&bld.int_coord_bld, height); - stride = lp_build_broadcast_scalar(&bld.int_coord_bld, stride); - - if(static_state->target == PIPE_TEXTURE_1D) - t = bld.coord_bld.zero; - - if(static_state->normalized_coords) { - LLVMTypeRef coord_vec_type = lp_build_vec_type(bld.coord_type); - LLVMValueRef fp_width = LLVMBuildSIToFP(builder, width, coord_vec_type, ""); - LLVMValueRef fp_height = LLVMBuildSIToFP(builder, height, coord_vec_type, ""); - s = lp_build_mul(&bld.coord_bld, s, fp_width); - t = lp_build_mul(&bld.coord_bld, t, fp_height); + r = coords[2]; + + width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width); + height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height); + depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth); + + if (lp_format_is_rgba8(bld.format_desc) && + static_state->target == PIPE_TEXTURE_2D && + static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR && + static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR && + static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && + is_simple_wrap_mode(static_state->wrap_s) && + is_simple_wrap_mode(static_state->wrap_t)) { + /* special case */ + lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec, + stride_array, data_array, texel); } - - switch (static_state->min_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel); - break; - case PIPE_TEX_FILTER_LINEAR: - if(lp_format_is_rgba8(bld.format_desc)) - lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel); - else - lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel); - break; - default: - assert(0); + else { + lp_build_sample_general(&bld, unit, s, t, r, + width, height, depth, + width_vec, height_vec, depth_vec, + stride_array, NULL, data_array, + texel); } - /* FIXME: respect static_state->min_mip_filter */; - /* FIXME: respect static_state->mag_img_filter */; - - lp_build_sample_compare(&bld, p, texel); + lp_build_sample_compare(&bld, r, texel); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.h b/src/gallium/auxiliary/gallivm/lp_bld_struct.h index 740392f561..34478c10f5 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_struct.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.h @@ -37,7 +37,7 @@ #define LP_BLD_STRUCT_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include <llvm-c/Target.h> #include "util/u_debug.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h index b9472127a6..57b5cc079f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h @@ -37,7 +37,7 @@ #define LP_BLD_SWIZZLE_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index eddb7a83fa..0f2f8a65b1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -35,7 +35,7 @@ #ifndef LP_BLD_TGSI_H #define LP_BLD_TGSI_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct tgsi_token; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 4cf28a9f93..5ec59d636c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -41,6 +41,7 @@ #include "util/u_debug.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" @@ -52,6 +53,7 @@ #include "lp_bld_swizzle.h" #include "lp_bld_flow.h" #include "lp_bld_tgsi.h" +#include "lp_bld_debug.h" #define LP_MAX_TEMPS 256 @@ -81,6 +83,34 @@ #define QUAD_BOTTOM_LEFT 2 #define QUAD_BOTTOM_RIGHT 3 +#define LP_TGSI_MAX_NESTING 16 + +struct lp_exec_mask { + struct lp_build_context *bld; + + boolean has_mask; + + LLVMTypeRef int_vec_type; + + LLVMValueRef cond_stack[LP_TGSI_MAX_NESTING]; + int cond_stack_size; + LLVMValueRef cond_mask; + + LLVMValueRef break_stack[LP_TGSI_MAX_NESTING]; + int break_stack_size; + LLVMValueRef break_mask; + + LLVMValueRef cont_stack[LP_TGSI_MAX_NESTING]; + int cont_stack_size; + LLVMValueRef cont_mask; + + LLVMBasicBlockRef loop_stack[LP_TGSI_MAX_NESTING]; + int loop_stack_size; + LLVMBasicBlockRef loop_block; + + + LLVMValueRef exec_mask; +}; struct lp_build_tgsi_soa_context { @@ -97,9 +127,9 @@ struct lp_build_tgsi_soa_context LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS]; struct lp_build_mask_context *mask; + struct lp_exec_mask exec_mask; }; - static const unsigned char swizzle_left[4] = { QUAD_TOP_LEFT, QUAD_TOP_LEFT, @@ -124,6 +154,174 @@ swizzle_bottom[4] = { QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT }; +static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) +{ + mask->bld = bld; + mask->has_mask = FALSE; + mask->cond_stack_size = 0; + mask->loop_stack_size = 0; + mask->break_stack_size = 0; + mask->cont_stack_size = 0; + + mask->int_vec_type = lp_build_int_vec_type(mask->bld->type); +} + +static void lp_exec_mask_update(struct lp_exec_mask *mask) +{ + if (mask->loop_stack_size) { + /*for loops we need to update the entire mask at + * runtime */ + LLVMValueRef tmp; + tmp = LLVMBuildAnd(mask->bld->builder, + mask->cont_mask, + mask->break_mask, + "maskcb"); + mask->exec_mask = LLVMBuildAnd(mask->bld->builder, + mask->cond_mask, + tmp, + "maskfull"); + } else + mask->exec_mask = mask->cond_mask; + + + mask->has_mask = (mask->cond_stack_size > 0 || + mask->loop_stack_size > 0); +} + +static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, + LLVMValueRef val) +{ + mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; + mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val, + mask->int_vec_type, ""); + + lp_exec_mask_update(mask); +} + +static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) +{ + LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1]; + LLVMValueRef inv_mask = LLVMBuildNot(mask->bld->builder, + mask->cond_mask, ""); + + /* means that we didn't have any mask before and that + * we were fully enabled */ + if (mask->cond_stack_size <= 1) { + prev_mask = LLVMConstAllOnes(mask->int_vec_type); + } + + mask->cond_mask = LLVMBuildAnd(mask->bld->builder, + inv_mask, + prev_mask, ""); + lp_exec_mask_update(mask); +} + +static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) +{ + mask->cond_mask = mask->cond_stack[--mask->cond_stack_size]; + lp_exec_mask_update(mask); +} + +static void lp_exec_bgnloop(struct lp_exec_mask *mask) +{ + + if (mask->cont_stack_size == 0) + mask->cont_mask = LLVMConstAllOnes(mask->int_vec_type); + if (mask->cont_stack_size == 0) + mask->break_mask = LLVMConstAllOnes(mask->int_vec_type); + if (mask->cond_stack_size == 0) + mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type); + mask->loop_stack[mask->loop_stack_size++] = mask->loop_block; + mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop"); + LLVMBuildBr(mask->bld->builder, mask->loop_block); + LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block); + + lp_exec_mask_update(mask); +} + +static void lp_exec_break(struct lp_exec_mask *mask) +{ + LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, + mask->exec_mask, + "break"); + + mask->break_stack[mask->break_stack_size++] = mask->break_mask; + if (mask->break_stack_size > 1) { + mask->break_mask = LLVMBuildAnd(mask->bld->builder, + mask->break_mask, + exec_mask, "break_full"); + } else + mask->break_mask = exec_mask; + + lp_exec_mask_update(mask); +} + +static void lp_exec_continue(struct lp_exec_mask *mask) +{ + LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, + mask->exec_mask, + ""); + + mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask; + if (mask->cont_stack_size > 1) { + mask->cont_mask = LLVMBuildAnd(mask->bld->builder, + mask->cont_mask, + exec_mask, ""); + } else + mask->cont_mask = exec_mask; + + lp_exec_mask_update(mask); +} + + +static void lp_exec_endloop(struct lp_exec_mask *mask) +{ + LLVMBasicBlockRef endloop; + LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width* + mask->bld->type.length); + /* i1cond = (mask == 0) */ + LLVMValueRef i1cond = LLVMBuildICmp( + mask->bld->builder, + LLVMIntNE, + LLVMBuildBitCast(mask->bld->builder, mask->break_mask, reg_type, ""), + LLVMConstNull(reg_type), ""); + + endloop = lp_build_insert_new_block(mask->bld->builder, "endloop"); + + LLVMBuildCondBr(mask->bld->builder, + i1cond, mask->loop_block, endloop); + + LLVMPositionBuilderAtEnd(mask->bld->builder, endloop); + + mask->loop_block = mask->loop_stack[--mask->loop_stack_size]; + /* pop the break mask */ + if (mask->cont_stack_size) { + mask->cont_mask = mask->cont_stack[--mask->cont_stack_size]; + } + if (mask->break_stack_size) { + mask->break_mask = mask->cont_stack[--mask->break_stack_size]; + } + + lp_exec_mask_update(mask); +} + +static void lp_exec_mask_store(struct lp_exec_mask *mask, + LLVMValueRef val, + LLVMValueRef dst) +{ + if (mask->has_mask) { + LLVMValueRef real_val, dst_val; + + dst_val = LLVMBuildLoad(mask->bld->builder, dst, ""); + real_val = lp_build_select(mask->bld, + mask->exec_mask, + val, dst_val); + + LLVMBuildStore(mask->bld->builder, real_val, dst); + } else + LLVMBuildStore(mask->bld->builder, val, dst); +} + static LLVMValueRef emit_ddx(struct lp_build_tgsi_soa_context *bld, @@ -287,13 +485,13 @@ emit_store( switch( reg->Register.File ) { case TGSI_FILE_OUTPUT: - LLVMBuildStore(bld->base.builder, value, - bld->outputs[reg->Register.Index][chan_index]); + lp_exec_mask_store(&bld->exec_mask, value, + bld->outputs[reg->Register.Index][chan_index]); break; case TGSI_FILE_TEMPORARY: - LLVMBuildStore(bld->base.builder, value, - bld->temps[reg->Register.Index][chan_index]); + lp_exec_mask_store(&bld->exec_mask, value, + bld->temps[reg->Register.Index][chan_index]); break; case TGSI_FILE_ADDRESS: @@ -301,6 +499,11 @@ emit_store( assert(0); break; + case TGSI_FILE_PREDICATE: + /* FIXME */ + assert(0); + break; + default: assert( 0 ); } @@ -498,6 +701,17 @@ emit_instruction( if (indirect_temp_reference(inst)) return FALSE; + /* + * Stores and write masks are handled in a general fashion after the long + * instruction opcode switch statement. + * + * Although not stricitly necessary, we avoid generating instructions for + * channels which won't be stored, in cases where's that easy. For some + * complex instructions, like texture sampling, it is more convenient to + * assume a full writemask and then let LLVM optimization passes eliminate + * redundant code. + */ + assert(info->num_dst <= 1); if(info->num_dst) { FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { @@ -1265,15 +1479,16 @@ emit_instruction( case TGSI_OPCODE_TXP: emit_tex( bld, inst, FALSE, TRUE, dst0 ); break; - + case TGSI_OPCODE_BRK: - /* FIXME */ - return 0; + lp_exec_break(&bld->exec_mask); break; case TGSI_OPCODE_IF: - /* FIXME */ - return 0; + tmp0 = emit_fetch(bld, inst, 0, CHAN_X); + tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, + tmp0, bld->base.zero); + lp_exec_mask_cond_push(&bld->exec_mask, tmp0); break; case TGSI_OPCODE_BGNFOR: @@ -1282,6 +1497,10 @@ emit_instruction( return 0; break; + case TGSI_OPCODE_BGNLOOP: + lp_exec_bgnloop(&bld->exec_mask); + break; + case TGSI_OPCODE_REP: /* deprecated */ assert(0); @@ -1289,13 +1508,11 @@ emit_instruction( break; case TGSI_OPCODE_ELSE: - /* FIXME */ - return 0; + lp_exec_mask_cond_invert(&bld->exec_mask); break; case TGSI_OPCODE_ENDIF: - /* FIXME */ - return 0; + lp_exec_mask_cond_pop(&bld->exec_mask); break; case TGSI_OPCODE_ENDFOR: @@ -1304,6 +1521,10 @@ emit_instruction( return 0; break; + case TGSI_OPCODE_ENDLOOP: + lp_exec_endloop(&bld->exec_mask); + break; + case TGSI_OPCODE_ENDREP: /* deprecated */ assert(0); @@ -1403,8 +1624,7 @@ emit_instruction( break; case TGSI_OPCODE_CONT: - /* FIXME */ - return 0; + lp_exec_continue(&bld->exec_mask); break; case TGSI_OPCODE_EMIT: @@ -1458,6 +1678,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, bld.consts_ptr = consts_ptr; bld.sampler = sampler; + lp_exec_mask_init(&bld.exec_mask, &bld.base); + tgsi_parse_init( &parse, tokens ); while( !tgsi_parse_end_of_tokens( &parse ) ) { @@ -1505,7 +1727,14 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, assert( 0 ); } } - + if (0) { + LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(block); + debug_printf("11111111111111111111111111111 \n"); + tgsi_dump(tokens, 0); + LLVMDumpValue(function); + debug_printf("2222222222222222222222222222 \n"); + } tgsi_parse_free( &parse ); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.c b/src/gallium/auxiliary/gallivm/lp_bld_type.c index 8270cd057f..796af88caa 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.c @@ -58,7 +58,10 @@ LLVMTypeRef lp_build_vec_type(struct lp_type type) { LLVMTypeRef elem_type = lp_build_elem_type(type); - return LLVMVectorType(elem_type, type.length); + if (type.length == 1) + return elem_type; + else + return LLVMVectorType(elem_type, type.length); } @@ -115,6 +118,9 @@ lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type) if(!vec_type) return FALSE; + if (type.length == 1) + return lp_check_elem_type(type, vec_type); + if(LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) return FALSE; @@ -153,7 +159,10 @@ LLVMTypeRef lp_build_int_vec_type(struct lp_type type) { LLVMTypeRef elem_type = lp_build_int_elem_type(type); - return LLVMVectorType(elem_type, type.length); + if (type.length == 1) + return elem_type; + else + return LLVMVectorType(elem_type, type.length); } @@ -178,6 +187,25 @@ lp_build_int32_vec4_type(void) } +/** + * Create unsigned integer type variation of given type. + */ +struct lp_type +lp_uint_type(struct lp_type type) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.width = type.width; + res_type.length = type.length; + + return res_type; +} + + +/** + * Create signed integer type variation of given type. + */ struct lp_type lp_int_type(struct lp_type type) { @@ -186,6 +214,7 @@ lp_int_type(struct lp_type type) memset(&res_type, 0, sizeof res_type); res_type.width = type.width; res_type.length = type.length; + res_type.sign = 1; return res_type; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h index 62ee05be4d..5b351476ac 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h @@ -37,7 +37,7 @@ #define LP_BLD_TYPE_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include <pipe/p_compiler.h> @@ -103,7 +103,7 @@ struct lp_type { unsigned width:14; /** - * Vector length. + * Vector length. If length==1, this is a scalar (float/int) type. * * width*length should be a power of two greater or equal to eight. * @@ -139,6 +139,7 @@ struct lp_build_context }; +/** Create scalar float type */ static INLINE struct lp_type lp_type_float(unsigned width) { @@ -148,12 +149,29 @@ lp_type_float(unsigned width) res_type.floating = TRUE; res_type.sign = TRUE; res_type.width = width; + res_type.length = 1; + + return res_type; +} + + +/** Create vector of float type */ +static INLINE struct lp_type +lp_type_float_vec(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.floating = TRUE; + res_type.sign = TRUE; + res_type.width = width; res_type.length = LP_NATIVE_VECTOR_WIDTH / width; return res_type; } +/** Create scalar int type */ static INLINE struct lp_type lp_type_int(unsigned width) { @@ -162,12 +180,28 @@ lp_type_int(unsigned width) memset(&res_type, 0, sizeof res_type); res_type.sign = TRUE; res_type.width = width; + res_type.length = 1; + + return res_type; +} + + +/** Create vector int type */ +static INLINE struct lp_type +lp_type_int_vec(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.sign = TRUE; + res_type.width = width; res_type.length = LP_NATIVE_VECTOR_WIDTH / width; return res_type; } +/** Create scalar uint type */ static INLINE struct lp_type lp_type_uint(unsigned width) { @@ -175,6 +209,20 @@ lp_type_uint(unsigned width) memset(&res_type, 0, sizeof res_type); res_type.width = width; + res_type.length = 1; + + return res_type; +} + + +/** Create vector uint type */ +static INLINE struct lp_type +lp_type_uint_vec(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.width = width; res_type.length = LP_NATIVE_VECTOR_WIDTH / width; return res_type; @@ -257,6 +305,10 @@ lp_build_int32_vec4_type(void); struct lp_type +lp_uint_type(struct lp_type type); + + +struct lp_type lp_int_type(struct lp_type type); |