diff options
Diffstat (limited to 'src/gallium/auxiliary/gallivm')
27 files changed, 1011 insertions, 250 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_alpha.h b/src/gallium/auxiliary/gallivm/lp_bld_alpha.h index 634575670d..fe3cedcc48 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_alpha.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_alpha.h @@ -35,7 +35,7 @@ #define LP_BLD_ALPHA_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct pipe_alpha_state; struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index e2c6788397..aa47338b32 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -232,6 +232,37 @@ lp_build_add(struct lp_build_context *bld, } +/** Return the sum of the elements of a */ +LLVMValueRef +lp_build_sum_vector(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMValueRef index, res; + int i; + + if (a == bld->zero) + return bld->zero; + if (a == bld->undef) + return bld->undef; + assert(type.length > 1); + + assert(!bld->type.norm); + + index = LLVMConstInt(LLVMInt32Type(), 0, 0); + res = LLVMBuildExtractElement(bld->builder, a, index, ""); + + for (i = 1; i < type.length; i++) { + index = LLVMConstInt(LLVMInt32Type(), i, 0); + res = LLVMBuildAdd(bld->builder, res, + LLVMBuildExtractElement(bld->builder, a, index, ""), + ""); + } + + return res; +} + + /** * Generate a - b */ @@ -689,12 +720,12 @@ lp_build_negate(struct lp_build_context *bld, } +/** Return -1, 0 or +1 depending on the sign of a */ LLVMValueRef lp_build_sgn(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; - LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMValueRef cond; LLVMValueRef res; @@ -704,14 +735,29 @@ lp_build_sgn(struct lp_build_context *bld, res = bld->one; } else if(type.floating) { - /* Take the sign bit and add it to 1 constant */ - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMTypeRef vec_type; + LLVMTypeRef int_type; + LLVMValueRef mask; LLVMValueRef sign; LLVMValueRef one; - sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + unsigned long long maskBit = (unsigned long long)1 << (type.width - 1); + + if (type.length == 1) { + int_type = lp_build_int_elem_type(type); + vec_type = lp_build_elem_type(type); + mask = LLVMConstInt(int_type, maskBit, 0); + } + else { + /* vector */ + int_type = lp_build_int_vec_type(type); + vec_type = lp_build_vec_type(type); + mask = lp_build_int_const_scalar(type, maskBit); + } + + /* Take the sign bit and add it to 1 constant */ + sign = LLVMBuildBitCast(bld->builder, a, int_type, ""); sign = LLVMBuildAnd(bld->builder, sign, mask, ""); - one = LLVMConstBitCast(bld->one, int_vec_type); + one = LLVMConstBitCast(bld->one, int_type); res = LLVMBuildOr(bld->builder, sign, one, ""); res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); } @@ -883,6 +929,10 @@ lp_build_floor(struct lp_build_context *bld, assert(type.floating); + if (type.length == 1) { + return LLVMBuildFPTrunc(bld->builder, a, LLVMFloatType(), ""); + } + if(util_cpu_caps.has_sse4_1) return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); else { @@ -953,6 +1003,9 @@ lp_build_itrunc(struct lp_build_context *bld, } +/** + * Convert float[] to int[] with round(). + */ LLVMValueRef lp_build_iround(struct lp_build_context *bld, LLVMValueRef a) @@ -1013,6 +1066,14 @@ lp_build_ifloor(struct lp_build_context *bld, LLVMValueRef res; assert(type.floating); + + if (type.length == 1) { + /* scalar float to int */ + LLVMTypeRef int_type = LLVMIntType(type.width); + res = LLVMBuildFPToSI(bld->builder, a, int_type, ""); + return res; + } + assert(lp_check_value(type, a)); if(util_cpu_caps.has_sse4_1) { diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index 55385e3a66..7a10fe1220 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -37,7 +37,7 @@ #define LP_BLD_ARIT_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct lp_type; @@ -57,6 +57,10 @@ lp_build_add(struct lp_build_context *bld, LLVMValueRef b); LLVMValueRef +lp_build_sum_vector(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef lp_build_sub(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_blend.h b/src/gallium/auxiliary/gallivm/lp_bld_blend.h index da272e549f..5a9e1c1fb2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_blend.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_blend.h @@ -40,7 +40,7 @@ * for a standalone example. */ -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include "pipe/p_format.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c index 53447757e8..8a275fa72f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c @@ -221,8 +221,16 @@ lp_build_undef(struct lp_type type) LLVMValueRef lp_build_zero(struct lp_type type) { - LLVMTypeRef vec_type = lp_build_vec_type(type); - return LLVMConstNull(vec_type); + if (type.length == 1) { + if (type.floating) + return LLVMConstReal(LLVMFloatType(), 0.0); + else + return LLVMConstInt(LLVMIntType(type.width), 0, 0); + } + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + return LLVMConstNull(vec_type); + } } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h index cb8e1c7b00..4078636103 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h @@ -37,7 +37,7 @@ #define LP_BLD_CONST_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include <pipe/p_compiler.h> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.h b/src/gallium/auxiliary/gallivm/lp_bld_conv.h index 948e68fae4..78e8155ff7 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.h @@ -37,7 +37,7 @@ #define LP_BLD_CONV_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h index 583e6132b4..441ad94786 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h @@ -30,7 +30,7 @@ #define LP_BLD_DEBUG_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include "pipe/p_compiler.h" #include "util/u_string.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_depth.h b/src/gallium/auxiliary/gallivm/lp_bld_depth.h index 79d6981bb5..8be80024ae 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_depth.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_depth.h @@ -36,7 +36,7 @@ #define LP_BLD_DEPTH_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct pipe_depth_state; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c index bc83138908..106fc03e46 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -308,7 +308,7 @@ lp_build_flow_scope_end(struct lp_build_flow_context *flow) * Note: this function has no dependencies on the flow code and could * be used elsewhere. */ -static LLVMBasicBlockRef +LLVMBasicBlockRef lp_build_insert_new_block(LLVMBuilderRef builder, const char *name) { LLVMBasicBlockRef current_block; @@ -648,7 +648,9 @@ lp_build_if(struct lp_build_if_state *ctx, ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); /* add add the initial value of the var from the entry block */ - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->entry_block, 1); + if (!LLVMIsUndef(*flow->variables[i])) + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], + &ifthen->entry_block, 1); } /* create/insert true_block before merge_block */ @@ -695,18 +697,21 @@ lp_build_endif(struct lp_build_if_state *ctx) { struct lp_build_flow_context *flow = ctx->flow; struct lp_build_flow_if *ifthen; + LLVMBasicBlockRef curBlock = LLVMGetInsertBlock(ctx->builder); unsigned i; ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen; assert(ifthen); + /* Insert branch to the merge block from current block */ + LLVMBuildBr(ctx->builder, ifthen->merge_block); + if (ifthen->false_block) { LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); /* for each variable, update the Phi node with a (variable, block) pair */ for (i = 0; i < flow->num_variables; i++) { assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->false_block, 1); - + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &curBlock, 1); /* replace the variable ref with the phi function */ *flow->variables[i] = ifthen->phi[i]; } @@ -742,15 +747,18 @@ lp_build_endif(struct lp_build_if_state *ctx) ifthen->true_block, ifthen->merge_block); } - /* Append an unconditional Br(anch) instruction on the true_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block); - LLVMBuildBr(ctx->builder, ifthen->merge_block); + /* Insert branch from end of true_block to merge_block */ if (ifthen->false_block) { - /* Append an unconditional Br(anch) instruction on the false_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block); + /* Append an unconditional Br(anch) instruction on the true_block */ + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block); LLVMBuildBr(ctx->builder, ifthen->merge_block); } - + else { + /* No else clause. + * Note that we've already inserted the branch at the end of + * true_block. See the very first LLVMBuildBr() call in this function. + */ + } /* Resume building code at end of the ifthen->merge_block */ LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index 4c225a0d4f..e158836549 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -35,7 +35,7 @@ #define LP_BLD_FLOW_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct lp_type; @@ -145,7 +145,9 @@ lp_build_else(struct lp_build_if_state *ctx); void lp_build_endif(struct lp_build_if_state *ctx); - + +LLVMBasicBlockRef +lp_build_insert_new_block(LLVMBuilderRef builder, const char *name); #endif /* !LP_BLD_FLOW_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h index 970bee379f..8972c0dc17 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h @@ -34,7 +34,7 @@ * Pixel format helpers. */ -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include "pipe/p_format.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_interp.h b/src/gallium/auxiliary/gallivm/lp_bld_interp.h index ca958cdf34..177b5e943e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_interp.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_interp.h @@ -41,7 +41,7 @@ #define LP_BLD_INTERP_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include "tgsi/tgsi_exec.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h index f813f27074..7d5506c733 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h @@ -37,7 +37,7 @@ #define LP_BLD_INTR_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" /** diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index 7c585fda78..f3df3dd138 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -42,6 +42,26 @@ #include "lp_bld_logic.h" +/* + * XXX + * + * Selection with vector conditional like + * + * select <4 x i1> %C, %A, %B + * + * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not + * supported on any backend. + * + * Expanding the boolean vector to full SIMD register width, as in + * + * sext <4 x i1> %C to <4 x i32> + * + * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but + * it causes assertion failures in LLVM 2.6. It appears to work correctly on + * LLVM 2.7. + */ + + /** * Build code to compare two values 'a' and 'b' of 'type' using the given func. * \param func one of PIPE_FUNC_x @@ -54,13 +74,11 @@ lp_build_compare(LLVMBuilderRef builder, LLVMValueRef a, LLVMValueRef b) { - LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef zeros = LLVMConstNull(int_vec_type); LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); LLVMValueRef cond; LLVMValueRef res; - unsigned i; assert(func >= PIPE_FUNC_NEVER); assert(func <= PIPE_FUNC_ALWAYS); @@ -74,10 +92,12 @@ lp_build_compare(LLVMBuilderRef builder, /* XXX: It is not clear if we should use the ordered or unordered operators */ +#if HAVE_LLVM < 0x0207 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) if(type.width * type.length == 128) { if(type.floating && util_cpu_caps.has_sse) { /* float[4] comparison */ + LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMValueRef args[3]; unsigned cc; boolean swap; @@ -147,6 +167,7 @@ lp_build_compare(LLVMBuilderRef builder, const char *pcmpgt; LLVMValueRef args[2]; LLVMValueRef res; + LLVMTypeRef vec_type = lp_build_vec_type(type); switch (type.width) { case 8: @@ -200,6 +221,7 @@ lp_build_compare(LLVMBuilderRef builder, } } /* if (type.width * type.length == 128) */ #endif +#endif /* HAVE_LLVM < 0x0207 */ if(type.floating) { LLVMRealPredicate op; @@ -233,16 +255,19 @@ lp_build_compare(LLVMBuilderRef builder, return lp_build_undef(type); } -#if 0 - /* XXX: Although valid IR, no LLVM target currently support this */ +#if HAVE_LLVM >= 0x0207 cond = LLVMBuildFCmp(builder, op, a, b, ""); - res = LLVMBuildSelect(builder, cond, ones, zeros, ""); + res = LLVMBuildSExt(builder, cond, int_vec_type, ""); #else - res = LLVMGetUndef(int_vec_type); if (type.length == 1) { - res = LLVMBuildFCmp(builder, op, a, b, ""); + cond = LLVMBuildFCmp(builder, op, a, b, ""); + res = LLVMBuildSExt(builder, cond, int_vec_type, ""); } else { + unsigned i; + + res = LLVMGetUndef(int_vec_type); + debug_printf("%s: warning: using slow element-wise float" " vector comparison\n", __FUNCTION__); for (i = 0; i < type.length; ++i) { @@ -286,16 +311,19 @@ lp_build_compare(LLVMBuilderRef builder, return lp_build_undef(type); } -#if 0 - /* XXX: Although valid IR, no LLVM target currently support this */ +#if HAVE_LLVM >= 0x0207 cond = LLVMBuildICmp(builder, op, a, b, ""); - res = LLVMBuildSelect(builder, cond, ones, zeros, ""); + res = LLVMBuildSExt(builder, cond, int_vec_type, ""); #else - res = LLVMGetUndef(int_vec_type); if (type.length == 1) { - res = LLVMBuildICmp(builder, op, a, b, ""); + cond = LLVMBuildICmp(builder, op, a, b, ""); + res = LLVMBuildSExt(builder, cond, int_vec_type, ""); } else { + unsigned i; + + res = LLVMGetUndef(int_vec_type); + debug_printf("%s: warning: using slow element-wise int" " vector comparison\n", __FUNCTION__); @@ -337,6 +365,8 @@ lp_build_cmp(struct lp_build_context *bld, /** * Return mask ? a : b; + * + * mask is a bitwise mask, composed of 0 or ~0 for each element. */ LLVMValueRef lp_build_select(struct lp_build_context *bld, @@ -351,6 +381,7 @@ lp_build_select(struct lp_build_context *bld, return a; if (type.length == 1) { + mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), ""); res = LLVMBuildSelect(bld->builder, mask, a, b, ""); } else { diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h index a399ebf39e..b54ec13b70 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h @@ -37,7 +37,7 @@ #define LP_BLD_LOGIC_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c index 4c61d10749..23398f41f9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c @@ -256,7 +256,7 @@ lp_build_pack2(LLVMBuilderRef builder, LLVMValueRef lo, LLVMValueRef hi) { -#if !(HAVE_LLVM >= 0x0207) +#if HAVE_LLVM < 0x0207 LLVMTypeRef src_vec_type = lp_build_vec_type(src_type); #endif LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h index fb2a34984a..346a17d580 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h @@ -37,7 +37,7 @@ #define LP_BLD_PACK_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 311c9f1b9e..543fd5fea3 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -155,14 +155,16 @@ lp_build_gather(LLVMBuilderRef builder, /** * Compute the offset of a pixel. * - * x, y, y_stride are vectors + * x, y, z, y_stride, z_stride are vectors */ LLVMValueRef lp_build_sample_offset(struct lp_build_context *bld, const struct util_format_description *format_desc, LLVMValueRef x, LLVMValueRef y, - LLVMValueRef y_stride) + LLVMValueRef z, + LLVMValueRef y_stride, + LLVMValueRef z_stride) { LLVMValueRef x_stride; LLVMValueRef offset; @@ -178,6 +180,10 @@ lp_build_sample_offset(struct lp_build_context *bld, LLVMValueRef y_offset_lo, y_offset_hi; LLVMValueRef offset_lo, offset_hi; + /* XXX 1D & 3D addressing not done yet */ + assert(!z); + assert(!z_stride); + x_lo = LLVMBuildAnd(bld->builder, x, bld->one, ""); y_lo = LLVMBuildAnd(bld->builder, y, bld->one, ""); @@ -201,13 +207,17 @@ lp_build_sample_offset(struct lp_build_context *bld, offset = lp_build_add(bld, offset_hi, offset_lo); } else { - LLVMValueRef x_offset; - LLVMValueRef y_offset; + offset = lp_build_mul(bld, x, x_stride); - x_offset = lp_build_mul(bld, x, x_stride); - y_offset = lp_build_mul(bld, y, y_stride); + if (y && y_stride) { + LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride); + offset = lp_build_add(bld, offset, y_offset); + } - offset = lp_build_add(bld, x_offset, y_offset); + if (z && z_stride) { + LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride); + offset = lp_build_add(bld, offset, z_offset); + } } return offset; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index 68db91d6fd..7f08bfaac1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -36,7 +36,7 @@ #define LP_BLD_SAMPLE_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct pipe_texture; struct pipe_sampler_state; @@ -113,9 +113,9 @@ struct lp_sampler_dynamic_state unsigned unit); LLVMValueRef - (*stride)( struct lp_sampler_dynamic_state *state, - LLVMBuilderRef builder, - unsigned unit); + (*row_stride)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); LLVMValueRef (*data_ptr)( struct lp_sampler_dynamic_state *state, @@ -148,7 +148,9 @@ lp_build_sample_offset(struct lp_build_context *bld, const struct util_format_description *format_desc, LLVMValueRef x, LLVMValueRef y, - LLVMValueRef y_stride); + LLVMValueRef z, + LLVMValueRef y_stride, + LLVMValueRef z_stride); void diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index a965d394f4..72018737a8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -48,6 +48,7 @@ #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" #include "lp_bld_pack.h" +#include "lp_bld_flow.h" #include "lp_bld_format.h" #include "lp_bld_sample.h" @@ -139,7 +140,55 @@ lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, /** - * Gen code to fetch a texel from a texture at int coords (x, y). + * Dereference stride_array[mipmap_level] array to get a stride. + * Return stride as a vector. + */ +static LLVMValueRef +lp_build_get_level_stride_vec(struct lp_build_sample_context *bld, + LLVMValueRef stride_array, LLVMValueRef level) +{ + LLVMValueRef indexes[2], stride; + indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indexes[1] = level; + stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, ""); + stride = LLVMBuildLoad(bld->builder, stride, ""); + stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride); + return stride; +} + + +/** Dereference stride_array[0] array to get a stride (as vector). */ +static LLVMValueRef +lp_build_get_const_level_stride_vec(struct lp_build_sample_context *bld, + LLVMValueRef stride_array, int level) +{ + LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0); + return lp_build_get_level_stride_vec(bld, stride_array, lvl); +} + + +static int +texture_dims(enum pipe_texture_target tex) +{ + switch (tex) { + case PIPE_TEXTURE_1D: + return 1; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_CUBE: + return 2; + case PIPE_TEXTURE_3D: + return 3; + default: + assert(0 && "bad texture target in texture_dims()"); + return 2; + } +} + + + +/** + * Generate code to fetch a texel from a texture at int coords (x, y, z). + * The computation depends on whether the texture is 1D, 2D or 3D. * The result, texel, will be: * texel[0] = red values * texel[1] = green values @@ -150,12 +199,16 @@ static void lp_build_sample_texel_soa(struct lp_build_sample_context *bld, LLVMValueRef width, LLVMValueRef height, + LLVMValueRef depth, LLVMValueRef x, LLVMValueRef y, + LLVMValueRef z, LLVMValueRef y_stride, + LLVMValueRef z_stride, LLVMValueRef data_ptr, LLVMValueRef *texel) { + const int dims = texture_dims(bld->static_state->target); struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMValueRef offset; LLVMValueRef packed; @@ -169,7 +222,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2"); } - if (wrap_mode_uses_border_color(bld->static_state->wrap_t)) { + if (dims >= 2 && wrap_mode_uses_border_color(bld->static_state->wrap_t)) { LLVMValueRef b1, b2; b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero); b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height); @@ -182,6 +235,19 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, } } + if (dims == 3 && wrap_mode_uses_border_color(bld->static_state->wrap_r)) { + LLVMValueRef b1, b2; + b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero); + b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth); + if (use_border) { + use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1"); + use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2"); + } + else { + use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2"); + } + } + /* * Note: if we find an app which frequently samples the texture border * we might want to implement a true conditional here to avoid sampling @@ -197,10 +263,10 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, * the texel color results with the border color. */ - /* convert x,y coords to linear offset from start of texture, in bytes */ + /* convert x,y,z coords to linear offset from start of texture, in bytes */ offset = lp_build_sample_offset(&bld->uint_coord_bld, bld->format_desc, - x, y, y_stride); + x, y, z, y_stride, z_stride); assert(bld->format_desc->block.width == 1); assert(bld->format_desc->block.height == 1); @@ -213,6 +279,8 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, bld->texel_type.width, data_ptr, offset); + texel[0] = texel[1] = texel[2] = texel[3] = NULL; + /* convert texels to float rgba */ lp_build_unpack_rgba_soa(bld->builder, bld->format_desc, @@ -245,7 +313,7 @@ lp_build_sample_packed(struct lp_build_sample_context *bld, offset = lp_build_sample_offset(&bld->uint_coord_bld, bld->format_desc, - x, y, y_stride); + x, y, NULL, y_stride, NULL); assert(bld->format_desc->block.width == 1); assert(bld->format_desc->block.height == 1); @@ -756,24 +824,6 @@ lp_build_minify(struct lp_build_sample_context *bld, } -static int -texture_dims(enum pipe_texture_target tex) -{ - switch (tex) { - case PIPE_TEXTURE_1D: - return 1; - case PIPE_TEXTURE_2D: - case PIPE_TEXTURE_CUBE: - return 2; - case PIPE_TEXTURE_3D: - return 3; - default: - assert(0 && "bad texture target in texture_dims()"); - return 2; - } -} - - /** * Generate code to compute texture level of detail (lambda). * \param s vector of texcoord s values @@ -794,7 +844,6 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, { const int dims = texture_dims(bld->static_state->target); - struct lp_build_context *coord_bld = &bld->coord_bld; struct lp_build_context *float_bld = &bld->float_bld; LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(), bld->static_state->lod_bias); LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod); @@ -921,161 +970,635 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld, LLVMValueRef *level1_out, LLVMValueRef *weight_out) { - struct lp_build_context *coord_bld = &bld->coord_bld; - struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + struct lp_build_context *float_bld = &bld->float_bld; + struct lp_build_context *int_bld = &bld->int_bld; LLVMValueRef last_level, level; last_level = bld->dynamic_state->last_level(bld->dynamic_state, bld->builder, unit); /* convert float lod to integer */ - level = lp_build_ifloor(coord_bld, lod); + level = lp_build_ifloor(float_bld, lod); /* compute level 0 and clamp to legal range of levels */ - *level0_out = lp_build_clamp(int_coord_bld, level, - int_coord_bld->zero, + *level0_out = lp_build_clamp(int_bld, level, + int_bld->zero, last_level); /* compute level 1 and clamp to legal range of levels */ - *level1_out = lp_build_add(int_coord_bld, *level0_out, int_coord_bld->one); - *level1_out = lp_build_min(int_coord_bld, *level1_out, int_coord_bld->zero); + *level1_out = lp_build_add(int_bld, *level0_out, int_bld->one); + *level1_out = lp_build_min(int_bld, *level1_out, int_bld->zero); - *weight_out = lp_build_fract(coord_bld, lod); + *weight_out = lp_build_fract(float_bld, lod); } - /** - * Sample 2D texture with nearest filtering, no mipmapping. + * Generate code to sample a mipmap level with nearest filtering. + * If sampling a cube texture, r = cube face in [0,5]. */ static void -lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld, - LLVMValueRef s, - LLVMValueRef t, - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef stride, - LLVMValueRef data_array, - LLVMValueRef *texel) +lp_build_sample_image_nearest(struct lp_build_sample_context *bld, + LLVMValueRef width_vec, + LLVMValueRef height_vec, + LLVMValueRef depth_vec, + LLVMValueRef row_stride_vec, + LLVMValueRef img_stride_vec, + LLVMValueRef data_ptr, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef colors_out[4]) { - LLVMValueRef x, y; - LLVMValueRef data_ptr; + const int dims = texture_dims(bld->static_state->target); + LLVMValueRef x, y, z; - x = lp_build_sample_wrap_nearest(bld, s, width, + /* + * Compute integer texcoords. + */ + x = lp_build_sample_wrap_nearest(bld, s, width_vec, bld->static_state->pot_width, bld->static_state->wrap_s); - y = lp_build_sample_wrap_nearest(bld, t, height, - bld->static_state->pot_height, - bld->static_state->wrap_t); - lp_build_name(x, "tex.x.wrapped"); - lp_build_name(y, "tex.y.wrapped"); - /* get pointer to mipmap level 0 data */ - data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0); + if (dims >= 2) { + y = lp_build_sample_wrap_nearest(bld, t, height_vec, + bld->static_state->pot_height, + bld->static_state->wrap_t); + lp_build_name(y, "tex.y.wrapped"); + + if (dims == 3) { + z = lp_build_sample_wrap_nearest(bld, r, depth_vec, + bld->static_state->pot_height, + bld->static_state->wrap_r); + lp_build_name(z, "tex.z.wrapped"); + } + else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { + z = r; + } + else { + z = NULL; + } + } + else { + y = z = NULL; + } - lp_build_sample_texel_soa(bld, width, height, x, y, stride, data_ptr, texel); + /* + * Get texture colors. + */ + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x, y, z, + row_stride_vec, img_stride_vec, + data_ptr, colors_out); } /** - * Sample 2D texture with nearest filtering, nearest mipmap. + * Generate code to sample a mipmap level with linear filtering. + * If sampling a cube texture, r = cube face in [0,5]. */ static void -lp_build_sample_2d_nearest_mip_nearest_soa(struct lp_build_sample_context *bld, - unsigned unit, - LLVMValueRef s, - LLVMValueRef t, - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef stride, - LLVMValueRef data_array, - LLVMValueRef *texel) +lp_build_sample_image_linear(struct lp_build_sample_context *bld, + LLVMValueRef width_vec, + LLVMValueRef height_vec, + LLVMValueRef depth_vec, + LLVMValueRef row_stride_vec, + LLVMValueRef img_stride_vec, + LLVMValueRef data_ptr, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef colors_out[4]) { - LLVMValueRef x, y; - LLVMValueRef lod, ilevel, ilevel_vec; - LLVMValueRef data_ptr; + const int dims = texture_dims(bld->static_state->target); + LLVMValueRef x0, y0, z0, x1, y1, z1; + LLVMValueRef s_fpart, t_fpart, r_fpart; + LLVMValueRef neighbors[2][2][4]; + int chan; - /* compute float LOD */ - lod = lp_build_lod_selector(bld, s, t, NULL, width, height, NULL); + /* + * Compute integer texcoords. + */ + lp_build_sample_wrap_linear(bld, s, width_vec, + bld->static_state->pot_width, + bld->static_state->wrap_s, + &x0, &x1, &s_fpart); + lp_build_name(x0, "tex.x0.wrapped"); + lp_build_name(x1, "tex.x1.wrapped"); + + if (dims >= 2) { + lp_build_sample_wrap_linear(bld, t, height_vec, + bld->static_state->pot_height, + bld->static_state->wrap_t, + &y0, &y1, &t_fpart); + lp_build_name(y0, "tex.y0.wrapped"); + lp_build_name(y1, "tex.y1.wrapped"); + + if (dims == 3) { + lp_build_sample_wrap_linear(bld, r, depth_vec, + bld->static_state->pot_depth, + bld->static_state->wrap_r, + &z0, &z1, &r_fpart); + lp_build_name(z0, "tex.z0.wrapped"); + lp_build_name(z1, "tex.z1.wrapped"); + } + else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { + z0 = z1 = r; /* cube face */ + r_fpart = NULL; + } + else { + z0 = z1 = NULL; + r_fpart = NULL; + } + } + else { + y0 = y1 = t_fpart = NULL; + z0 = z1 = r_fpart = NULL; + } - /* convert LOD to int */ - lp_build_nearest_mip_level(bld, unit, lod, &ilevel); + /* + * Get texture colors. + */ + /* get x0/x1 texels */ + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x0, y0, z0, + row_stride_vec, img_stride_vec, + data_ptr, neighbors[0][0]); + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x1, y0, z0, + row_stride_vec, img_stride_vec, + data_ptr, neighbors[0][1]); + + if (dims == 1) { + /* Interpolate two samples from 1D image to produce one color */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart, + neighbors[0][0][chan], + neighbors[0][1][chan]); + } + } + else { + /* 2D/3D texture */ + LLVMValueRef colors0[4]; + + /* get x0/x1 texels at y1 */ + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x0, y1, z0, + row_stride_vec, img_stride_vec, + data_ptr, neighbors[1][0]); + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x1, y1, z0, + row_stride_vec, img_stride_vec, + data_ptr, neighbors[1][1]); + + /* Bilinear interpolate the four samples from the 2D image / 3D slice */ + for (chan = 0; chan < 4; chan++) { + colors0[chan] = lp_build_lerp_2d(&bld->texel_bld, + s_fpart, t_fpart, + neighbors[0][0][chan], + neighbors[0][1][chan], + neighbors[1][0][chan], + neighbors[1][1][chan]); + } - ilevel_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel); + if (dims == 3) { + LLVMValueRef neighbors1[2][2][4]; + LLVMValueRef colors1[4]; + + /* get x0/x1/y0/y1 texels at z1 */ + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x0, y0, z1, + row_stride_vec, img_stride_vec, + data_ptr, neighbors1[0][0]); + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x1, y0, z1, + row_stride_vec, img_stride_vec, + data_ptr, neighbors1[0][1]); + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x0, y1, z1, + row_stride_vec, img_stride_vec, + data_ptr, neighbors1[1][0]); + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x1, y1, z1, + row_stride_vec, img_stride_vec, + data_ptr, neighbors1[1][1]); + + /* Bilinear interpolate the four samples from the second Z slice */ + for (chan = 0; chan < 4; chan++) { + colors1[chan] = lp_build_lerp_2d(&bld->texel_bld, + s_fpart, t_fpart, + neighbors1[0][0][chan], + neighbors1[0][1][chan], + neighbors1[1][0][chan], + neighbors1[1][1][chan]); + } - /* compute width_vec, height at mipmap level 'ilevel' */ - width_vec = lp_build_minify(bld, width_vec, ilevel_vec); - height_vec = lp_build_minify(bld, height_vec, ilevel_vec); - stride = lp_build_minify(bld, stride, ilevel_vec); + /* Linearly interpolate the two samples from the two 3D slices */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = lp_build_lerp(&bld->texel_bld, + r_fpart, + colors0[chan], colors1[chan]); + } + } + else { + /* 2D tex */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = colors0[chan]; + } + } + } +} - x = lp_build_sample_wrap_nearest(bld, s, width_vec, - bld->static_state->pot_width, - bld->static_state->wrap_s); - y = lp_build_sample_wrap_nearest(bld, t, height_vec, - bld->static_state->pot_height, - bld->static_state->wrap_t); - lp_build_name(x, "tex.x.wrapped"); - lp_build_name(y, "tex.y.wrapped"); +/** Helper used by lp_build_cube_lookup() */ +static LLVMValueRef +lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord) +{ + /* ima = -0.5 / abs(coord); */ + LLVMValueRef negHalf = lp_build_const_scalar(coord_bld->type, -0.5); + LLVMValueRef absCoord = lp_build_abs(coord_bld, coord); + LLVMValueRef ima = lp_build_mul(coord_bld, negHalf, + lp_build_rcp(coord_bld, absCoord)); + return ima; +} - /* get pointer to mipmap level [ilevel] data */ - if (0) - data_ptr = lp_build_get_mipmap_level(bld, data_array, ilevel); - else - data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0); - lp_build_sample_texel_soa(bld, width_vec, height_vec, x, y, stride, data_ptr, texel); +/** + * Helper used by lp_build_cube_lookup() + * \param sign scalar +1 or -1 + * \param coord float vector + * \param ima float vector + */ +static LLVMValueRef +lp_build_cube_coord(struct lp_build_context *coord_bld, + LLVMValueRef sign, int negate_coord, + LLVMValueRef coord, LLVMValueRef ima) +{ + /* return negate(coord) * ima * sign + 0.5; */ + LLVMValueRef half = lp_build_const_scalar(coord_bld->type, 0.5); + LLVMValueRef res; + + assert(negate_coord == +1 || negate_coord == -1); + + if (negate_coord == -1) { + coord = lp_build_negate(coord_bld, coord); + } + + res = lp_build_mul(coord_bld, coord, ima); + if (sign) { + sign = lp_build_broadcast_scalar(coord_bld, sign); + res = lp_build_mul(coord_bld, res, sign); + } + res = lp_build_add(coord_bld, res, half); + + return res; } +/** Helper used by lp_build_cube_lookup() + * Return (major_coord >= 0) ? pos_face : neg_face; + */ +static LLVMValueRef +lp_build_cube_face(struct lp_build_sample_context *bld, + LLVMValueRef major_coord, + unsigned pos_face, unsigned neg_face) +{ + LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE, + major_coord, + bld->float_bld.zero, ""); + LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0); + LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0); + LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, ""); + return res; +} + + + /** - * Sample 2D texture with bilinear filtering. + * Generate code to do cube face selection and per-face texcoords. */ static void -lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, - LLVMValueRef s, - LLVMValueRef t, - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef stride, - LLVMValueRef data_array, - LLVMValueRef *texel) +lp_build_cube_lookup(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef *face, + LLVMValueRef *face_s, + LLVMValueRef *face_t) { - LLVMValueRef s_fpart; - LLVMValueRef t_fpart; - LLVMValueRef x0, x1; - LLVMValueRef y0, y1; - LLVMValueRef neighbors[2][2][4]; - LLVMValueRef data_ptr; - unsigned chan; + struct lp_build_context *float_bld = &bld->float_bld; + struct lp_build_context *coord_bld = &bld->coord_bld; + LLVMValueRef rx, ry, rz; + LLVMValueRef arx, ary, arz; + LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25); + LLVMValueRef arx_ge_ary, arx_ge_arz; + LLVMValueRef ary_ge_arx, ary_ge_arz; + LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz; + LLVMValueRef rx_pos, ry_pos, rz_pos; - lp_build_sample_wrap_linear(bld, s, width, bld->static_state->pot_width, - bld->static_state->wrap_s, &x0, &x1, &s_fpart); - lp_build_sample_wrap_linear(bld, t, height, bld->static_state->pot_height, - bld->static_state->wrap_t, &y0, &y1, &t_fpart); + assert(bld->coord_bld.type.length == 4); - /* get pointer to mipmap level 0 data */ - data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0); + /* + * Use the average of the four pixel's texcoords to choose the face. + */ + rx = lp_build_mul(float_bld, c25, + lp_build_sum_vector(&bld->coord_bld, s)); + ry = lp_build_mul(float_bld, c25, + lp_build_sum_vector(&bld->coord_bld, t)); + rz = lp_build_mul(float_bld, c25, + lp_build_sum_vector(&bld->coord_bld, r)); - lp_build_sample_texel_soa(bld, width, height, x0, y0, stride, data_ptr, neighbors[0][0]); - lp_build_sample_texel_soa(bld, width, height, x1, y0, stride, data_ptr, neighbors[0][1]); - lp_build_sample_texel_soa(bld, width, height, x0, y1, stride, data_ptr, neighbors[1][0]); - lp_build_sample_texel_soa(bld, width, height, x1, y1, stride, data_ptr, neighbors[1][1]); + arx = lp_build_abs(float_bld, rx); + ary = lp_build_abs(float_bld, ry); + arz = lp_build_abs(float_bld, rz); - /* TODO: Don't interpolate missing channels */ - for(chan = 0; chan < 4; ++chan) { - texel[chan] = lp_build_lerp_2d(&bld->texel_bld, - s_fpart, t_fpart, - neighbors[0][0][chan], - neighbors[0][1][chan], - neighbors[1][0][chan], - neighbors[1][1][chan]); + /* + * Compare sign/magnitude of rx,ry,rz to determine face + */ + arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, ""); + arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, ""); + ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, ""); + ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, ""); + + arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, ""); + ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); + + rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, ""); + ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, ""); + rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, ""); + + { + struct lp_build_flow_context *flow_ctx; + struct lp_build_if_state if_ctx; + + flow_ctx = lp_build_flow_create(bld->builder); + lp_build_flow_scope_begin(flow_ctx); + + *face_s = bld->coord_bld.undef; + *face_t = bld->coord_bld.undef; + *face = bld->int_bld.undef; + + lp_build_name(*face_s, "face_s"); + lp_build_name(*face_t, "face_t"); + lp_build_name(*face, "face"); + + lp_build_flow_scope_declare(flow_ctx, face_s); + lp_build_flow_scope_declare(flow_ctx, face_t); + lp_build_flow_scope_declare(flow_ctx, face); + + lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz); + { + /* +/- X face */ + LLVMValueRef sign = lp_build_sgn(float_bld, rx); + LLVMValueRef ima = lp_build_cube_ima(coord_bld, s); + *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima); + *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); + *face = lp_build_cube_face(bld, rx, + PIPE_TEX_FACE_POS_X, + PIPE_TEX_FACE_NEG_X); + } + lp_build_else(&if_ctx); + { + struct lp_build_flow_context *flow_ctx2; + struct lp_build_if_state if_ctx2; + + LLVMValueRef face_s2 = bld->coord_bld.undef; + LLVMValueRef face_t2 = bld->coord_bld.undef; + LLVMValueRef face2 = bld->int_bld.undef; + + flow_ctx2 = lp_build_flow_create(bld->builder); + lp_build_flow_scope_begin(flow_ctx2); + lp_build_flow_scope_declare(flow_ctx2, &face_s2); + lp_build_flow_scope_declare(flow_ctx2, &face_t2); + lp_build_flow_scope_declare(flow_ctx2, &face2); + + ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); + + lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz); + { + /* +/- Y face */ + LLVMValueRef sign = lp_build_sgn(float_bld, ry); + LLVMValueRef ima = lp_build_cube_ima(coord_bld, t); + face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima); + face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima); + face2 = lp_build_cube_face(bld, ry, + PIPE_TEX_FACE_POS_Y, + PIPE_TEX_FACE_NEG_Y); + } + lp_build_else(&if_ctx2); + { + /* +/- Z face */ + LLVMValueRef sign = lp_build_sgn(float_bld, rz); + LLVMValueRef ima = lp_build_cube_ima(coord_bld, r); + face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima); + face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); + face2 = lp_build_cube_face(bld, rz, + PIPE_TEX_FACE_POS_Z, + PIPE_TEX_FACE_NEG_Z); + } + lp_build_endif(&if_ctx2); + lp_build_flow_scope_end(flow_ctx2); + lp_build_flow_destroy(flow_ctx2); + + *face_s = face_s2; + *face_t = face_t2; + *face = face2; + } + + lp_build_endif(&if_ctx); + lp_build_flow_scope_end(flow_ctx); + lp_build_flow_destroy(flow_ctx); + } +} + + + +/** + * General texture sampling codegen. + * This function handles texture sampling for all texture targets (1D, + * 2D, 3D, cube) and all filtering modes. + */ +static void +lp_build_sample_general(struct lp_build_sample_context *bld, + unsigned unit, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef depth, + LLVMValueRef width_vec, + LLVMValueRef height_vec, + LLVMValueRef depth_vec, + LLVMValueRef row_stride_array, + LLVMValueRef img_stride_vec, + LLVMValueRef data_array, + LLVMValueRef *colors_out) +{ + const unsigned mip_filter = bld->static_state->min_mip_filter; + const unsigned min_filter = bld->static_state->min_img_filter; + const unsigned mag_filter = bld->static_state->mag_img_filter; + const int dims = texture_dims(bld->static_state->target); + LLVMValueRef lod, lod_fpart; + LLVMValueRef ilevel0, ilevel1, ilevel0_vec, ilevel1_vec; + LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL; + LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL; + LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL; + LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL; + LLVMValueRef data_ptr0, data_ptr1; + int chan; + + /* + printf("%s mip %d min %d mag %d\n", __FUNCTION__, + mip_filter, min_filter, mag_filter); + */ + + /* + * Compute the level of detail (mipmap level index(es)). + */ + if (mip_filter == PIPE_TEX_MIPFILTER_NONE) { + /* always use mip level 0 */ + ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + } + else { + /* compute float LOD */ + lod = lp_build_lod_selector(bld, s, t, r, width, height, depth); + + if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { + lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); + } + else { + assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR); + lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1, + &lod_fpart); + lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart); + } + } + + /* + * Convert scalar integer mipmap levels into vectors. + */ + ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0); + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) + ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1); + + /* + * Compute width, height at mipmap level 'ilevel0' + */ + width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec); + if (dims >= 2) { + height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec); + row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array, + ilevel0); + if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { + img_stride0_vec = lp_build_mul(&bld->int_coord_bld, + row_stride0_vec, height0_vec); + if (dims == 3) { + depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec); + } + } + } + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* compute width, height, depth for second mipmap level at ilevel1 */ + width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec); + if (dims >= 2) { + height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec); + row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array, + ilevel1); + if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { + img_stride1_vec = lp_build_mul(&bld->int_coord_bld, + row_stride1_vec, height1_vec); + if (dims ==3) { + depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec); + } + } + } + } + + /* + * Choose cube face, recompute texcoords. + */ + if (bld->static_state->target == PIPE_TEXTURE_CUBE) { + LLVMValueRef face, face_s, face_t; + lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t); + s = face_s; /* vec */ + t = face_t; /* vec */ + /* use 'r' to indicate cube face */ + r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */ + } + + /* + * Get pointer(s) to image data for mipmap level(s). + */ + data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0); + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1); + } + + /* + * Get/interpolate texture colors. + */ + /* XXX temporarily force this path: */ + if (1 /*min_filter == mag_filter*/) { + /* same filter for minification or magnification */ + LLVMValueRef colors0[4], colors1[4]; + + if (min_filter == PIPE_TEX_FILTER_NEAREST) { + lp_build_sample_image_nearest(bld, + width0_vec, height0_vec, depth0_vec, + row_stride0_vec, img_stride0_vec, + data_ptr0, s, t, r, colors0); + + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* sample the second mipmap level, and interp */ + lp_build_sample_image_nearest(bld, + width1_vec, height1_vec, depth1_vec, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, colors1); + } + } + else { + assert(min_filter == PIPE_TEX_FILTER_LINEAR); + + lp_build_sample_image_linear(bld, + width0_vec, height0_vec, depth0_vec, + row_stride0_vec, img_stride0_vec, + data_ptr0, s, t, r, colors0); + + + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* sample the second mipmap level, and interp */ + lp_build_sample_image_linear(bld, + width1_vec, height1_vec, depth1_vec, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, colors1); + } + } + + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* interpolate samples from the two mipmap levels */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, + colors0[chan], colors1[chan]); + } + } + else { + /* use first/only level's colors */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = colors0[chan]; + } + } + } + else { + /* emit conditional to choose min image filter or mag image filter + * depending on the lod being >0 or <= 0, respectively. + */ + abort(); } } + static void lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, struct lp_type dst_type, @@ -1112,7 +1635,7 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, LLVMValueRef t, LLVMValueRef width, LLVMValueRef height, - LLVMValueRef stride, + LLVMValueRef stride_array, LLVMValueRef data_array, LLVMValueRef *texel) { @@ -1129,6 +1652,7 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, LLVMValueRef neighbors_hi[2][2]; LLVMValueRef packed, packed_lo, packed_hi; LLVMValueRef unswizzled[4]; + LLVMValueRef stride; lp_build_context_init(&i32, builder, lp_type_int_vec(32)); lp_build_context_init(&h16, builder, lp_type_ufixed(16)); @@ -1236,6 +1760,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, ""); } + stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0); + /* * Fetch the pixels as 4 x 32bit (rgba order might differ): * @@ -1359,12 +1885,12 @@ lp_build_sample_soa(LLVMBuilderRef builder, struct lp_build_sample_context bld; LLVMValueRef width, width_vec; LLVMValueRef height, height_vec; - LLVMValueRef stride, stride_vec; + LLVMValueRef depth, depth_vec; + LLVMValueRef stride_array; LLVMValueRef data_array; LLVMValueRef s; LLVMValueRef t; LLVMValueRef r; - boolean done = FALSE; (void) lp_build_lod_selector; /* temporary to silence warning */ (void) lp_build_nearest_mip_level; @@ -1395,7 +1921,8 @@ lp_build_sample_soa(LLVMBuilderRef builder, /* Get the dynamic state */ width = dynamic_state->width(dynamic_state, builder, unit); height = dynamic_state->height(dynamic_state, builder, unit); - stride = dynamic_state->stride(dynamic_state, builder, unit); + depth = dynamic_state->depth(dynamic_state, builder, unit); + stride_array = dynamic_state->row_stride(dynamic_state, builder, unit); data_array = dynamic_state->data_ptr(dynamic_state, builder, unit); /* Note that data_array is an array[level] of pointers to texture images */ @@ -1405,58 +1932,26 @@ lp_build_sample_soa(LLVMBuilderRef builder, width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width); height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height); - stride_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, stride); - - if(static_state->target == PIPE_TEXTURE_1D) - t = bld.coord_bld.zero; - - switch (static_state->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NONE: - break; - case PIPE_TEX_MIPFILTER_NEAREST: - - switch (static_state->min_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - lp_build_sample_2d_nearest_mip_nearest_soa(&bld, unit, - s, t, - width, height, - width_vec, height_vec, - stride_vec, - data_array, texel); - done = TRUE; - break; - } - - break; - case PIPE_TEX_MIPFILTER_LINEAR: - break; - default: - assert(0 && "invalid mip filter"); + depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth); + + if (lp_format_is_rgba8(bld.format_desc) && + static_state->target == PIPE_TEXTURE_2D && + static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR && + static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR && + static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && + is_simple_wrap_mode(static_state->wrap_s) && + is_simple_wrap_mode(static_state->wrap_t)) { + /* special case */ + lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec, + stride_array, data_array, texel); } - - if (!done) { - switch (static_state->min_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - lp_build_sample_2d_nearest_soa(&bld, s, t, width_vec, height_vec, - stride_vec, data_array, texel); - break; - case PIPE_TEX_FILTER_LINEAR: - if(lp_format_is_rgba8(bld.format_desc) && - is_simple_wrap_mode(static_state->wrap_s) && - is_simple_wrap_mode(static_state->wrap_t)) - lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec, - stride_vec, data_array, texel); - else - lp_build_sample_2d_linear_soa(&bld, s, t, width_vec, height_vec, - stride_vec, data_array, texel); - break; - default: - assert(0); - } + else { + lp_build_sample_general(&bld, unit, s, t, r, + width, height, depth, + width_vec, height_vec, depth_vec, + stride_array, NULL, data_array, + texel); } - /* FIXME: respect static_state->min_mip_filter */; - /* FIXME: respect static_state->mag_img_filter */; - lp_build_sample_compare(&bld, r, texel); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.h b/src/gallium/auxiliary/gallivm/lp_bld_struct.h index 740392f561..34478c10f5 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_struct.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.h @@ -37,7 +37,7 @@ #define LP_BLD_STRUCT_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include <llvm-c/Target.h> #include "util/u_debug.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h index b9472127a6..57b5cc079f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h @@ -37,7 +37,7 @@ #define LP_BLD_SWIZZLE_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index eddb7a83fa..0f2f8a65b1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -35,7 +35,7 @@ #ifndef LP_BLD_TGSI_H #define LP_BLD_TGSI_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct tgsi_token; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index fbb664d43a..5ec59d636c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -41,6 +41,7 @@ #include "util/u_debug.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" @@ -95,6 +96,19 @@ struct lp_exec_mask { int cond_stack_size; LLVMValueRef cond_mask; + LLVMValueRef break_stack[LP_TGSI_MAX_NESTING]; + int break_stack_size; + LLVMValueRef break_mask; + + LLVMValueRef cont_stack[LP_TGSI_MAX_NESTING]; + int cont_stack_size; + LLVMValueRef cont_mask; + + LLVMBasicBlockRef loop_stack[LP_TGSI_MAX_NESTING]; + int loop_stack_size; + LLVMBasicBlockRef loop_block; + + LLVMValueRef exec_mask; }; @@ -145,14 +159,33 @@ static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context mask->bld = bld; mask->has_mask = FALSE; mask->cond_stack_size = 0; + mask->loop_stack_size = 0; + mask->break_stack_size = 0; + mask->cont_stack_size = 0; mask->int_vec_type = lp_build_int_vec_type(mask->bld->type); } static void lp_exec_mask_update(struct lp_exec_mask *mask) { - mask->exec_mask = mask->cond_mask; - mask->has_mask = (mask->cond_stack_size > 0); + if (mask->loop_stack_size) { + /*for loops we need to update the entire mask at + * runtime */ + LLVMValueRef tmp; + tmp = LLVMBuildAnd(mask->bld->builder, + mask->cont_mask, + mask->break_mask, + "maskcb"); + mask->exec_mask = LLVMBuildAnd(mask->bld->builder, + mask->cond_mask, + tmp, + "maskfull"); + } else + mask->exec_mask = mask->cond_mask; + + + mask->has_mask = (mask->cond_stack_size > 0 || + mask->loop_stack_size > 0); } static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, @@ -189,6 +222,89 @@ static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) lp_exec_mask_update(mask); } +static void lp_exec_bgnloop(struct lp_exec_mask *mask) +{ + + if (mask->cont_stack_size == 0) + mask->cont_mask = LLVMConstAllOnes(mask->int_vec_type); + if (mask->cont_stack_size == 0) + mask->break_mask = LLVMConstAllOnes(mask->int_vec_type); + if (mask->cond_stack_size == 0) + mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type); + mask->loop_stack[mask->loop_stack_size++] = mask->loop_block; + mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop"); + LLVMBuildBr(mask->bld->builder, mask->loop_block); + LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block); + + lp_exec_mask_update(mask); +} + +static void lp_exec_break(struct lp_exec_mask *mask) +{ + LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, + mask->exec_mask, + "break"); + + mask->break_stack[mask->break_stack_size++] = mask->break_mask; + if (mask->break_stack_size > 1) { + mask->break_mask = LLVMBuildAnd(mask->bld->builder, + mask->break_mask, + exec_mask, "break_full"); + } else + mask->break_mask = exec_mask; + + lp_exec_mask_update(mask); +} + +static void lp_exec_continue(struct lp_exec_mask *mask) +{ + LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, + mask->exec_mask, + ""); + + mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask; + if (mask->cont_stack_size > 1) { + mask->cont_mask = LLVMBuildAnd(mask->bld->builder, + mask->cont_mask, + exec_mask, ""); + } else + mask->cont_mask = exec_mask; + + lp_exec_mask_update(mask); +} + + +static void lp_exec_endloop(struct lp_exec_mask *mask) +{ + LLVMBasicBlockRef endloop; + LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width* + mask->bld->type.length); + /* i1cond = (mask == 0) */ + LLVMValueRef i1cond = LLVMBuildICmp( + mask->bld->builder, + LLVMIntNE, + LLVMBuildBitCast(mask->bld->builder, mask->break_mask, reg_type, ""), + LLVMConstNull(reg_type), ""); + + endloop = lp_build_insert_new_block(mask->bld->builder, "endloop"); + + LLVMBuildCondBr(mask->bld->builder, + i1cond, mask->loop_block, endloop); + + LLVMPositionBuilderAtEnd(mask->bld->builder, endloop); + + mask->loop_block = mask->loop_stack[--mask->loop_stack_size]; + /* pop the break mask */ + if (mask->cont_stack_size) { + mask->cont_mask = mask->cont_stack[--mask->cont_stack_size]; + } + if (mask->break_stack_size) { + mask->break_mask = mask->cont_stack[--mask->break_stack_size]; + } + + lp_exec_mask_update(mask); +} + static void lp_exec_mask_store(struct lp_exec_mask *mask, LLVMValueRef val, LLVMValueRef dst) @@ -1363,14 +1479,15 @@ emit_instruction( case TGSI_OPCODE_TXP: emit_tex( bld, inst, FALSE, TRUE, dst0 ); break; - + case TGSI_OPCODE_BRK: - /* FIXME */ - return 0; + lp_exec_break(&bld->exec_mask); break; case TGSI_OPCODE_IF: tmp0 = emit_fetch(bld, inst, 0, CHAN_X); + tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, + tmp0, bld->base.zero); lp_exec_mask_cond_push(&bld->exec_mask, tmp0); break; @@ -1380,6 +1497,10 @@ emit_instruction( return 0; break; + case TGSI_OPCODE_BGNLOOP: + lp_exec_bgnloop(&bld->exec_mask); + break; + case TGSI_OPCODE_REP: /* deprecated */ assert(0); @@ -1400,6 +1521,10 @@ emit_instruction( return 0; break; + case TGSI_OPCODE_ENDLOOP: + lp_exec_endloop(&bld->exec_mask); + break; + case TGSI_OPCODE_ENDREP: /* deprecated */ assert(0); @@ -1499,8 +1624,7 @@ emit_instruction( break; case TGSI_OPCODE_CONT: - /* FIXME */ - return 0; + lp_exec_continue(&bld->exec_mask); break; case TGSI_OPCODE_EMIT: @@ -1603,7 +1727,14 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, assert( 0 ); } } - + if (0) { + LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(block); + debug_printf("11111111111111111111111111111 \n"); + tgsi_dump(tokens, 0); + LLVMDumpValue(function); + debug_printf("2222222222222222222222222222 \n"); + } tgsi_parse_free( &parse ); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.c b/src/gallium/auxiliary/gallivm/lp_bld_type.c index c327ba045a..796af88caa 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.c @@ -58,7 +58,10 @@ LLVMTypeRef lp_build_vec_type(struct lp_type type) { LLVMTypeRef elem_type = lp_build_elem_type(type); - return LLVMVectorType(elem_type, type.length); + if (type.length == 1) + return elem_type; + else + return LLVMVectorType(elem_type, type.length); } @@ -115,6 +118,9 @@ lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type) if(!vec_type) return FALSE; + if (type.length == 1) + return lp_check_elem_type(type, vec_type); + if(LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) return FALSE; @@ -153,7 +159,10 @@ LLVMTypeRef lp_build_int_vec_type(struct lp_type type) { LLVMTypeRef elem_type = lp_build_int_elem_type(type); - return LLVMVectorType(elem_type, type.length); + if (type.length == 1) + return elem_type; + else + return LLVMVectorType(elem_type, type.length); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h index 4daa904e63..5b351476ac 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h @@ -37,7 +37,7 @@ #define LP_BLD_TYPE_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include <pipe/p_compiler.h> |