diff options
Diffstat (limited to 'src/gallium/auxiliary')
39 files changed, 1654 insertions, 106 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index eb86d83d2a..5388f4ecd5 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -171,6 +171,7 @@ GALLIVM_SOURCES = \ gallivm/lp_bld_sample_soa.c \ gallivm/lp_bld_struct.c \ gallivm/lp_bld_swizzle.c \ + gallivm/lp_bld_tgsi_aos.c \ gallivm/lp_bld_tgsi_soa.c \ gallivm/lp_bld_type.c \ draw/draw_llvm.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 6210ada990..ba8be2efd1 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -222,6 +222,7 @@ if env['llvm']: 'gallivm/lp_bld_sample_soa.c', 'gallivm/lp_bld_struct.c', 'gallivm/lp_bld_swizzle.c', + 'gallivm/lp_bld_tgsi_aos.c', 'gallivm/lp_bld_tgsi_soa.c', 'gallivm/lp_bld_type.c', 'draw/draw_llvm.c', diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 4c780e4dcb..4f0d30123a 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -39,6 +39,7 @@ #include "pipe/p_state.h" +#include "tgsi/tgsi_exec.h" struct pipe_context; struct draw_context; @@ -225,4 +226,16 @@ boolean draw_need_pipeline(const struct draw_context *draw, const struct pipe_rasterizer_state *rasterizer, unsigned prim ); +static INLINE int +draw_get_shader_param(unsigned shader, enum pipe_cap param) +{ + switch(shader) { + case PIPE_SHADER_VERTEX: + case PIPE_SHADER_GEOMETRY: + return tgsi_exec_get_shader_param(param); + default: + return 0; + } +} + #endif /* DRAW_CONTEXT_H */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index c0135f5bb7..eac21110be 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -373,8 +373,7 @@ generate_aaline_fs(struct aaline_stage *aaline) aaline->fs->sampler_unit = transform.freeSampler; - aaline->fs->aaline_fs - = aaline->driver_create_fs_state(pipe, &aaline_fs); + aaline->fs->aaline_fs = aaline->driver_create_fs_state(pipe, &aaline_fs); if (aaline->fs->aaline_fs == NULL) goto fail; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 1911242f82..68e8295b5e 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -32,6 +32,8 @@ #define DRAW_VS_AOS_H #include "pipe/p_config.h" +#include "tgsi/tgsi_exec.h" +#include "draw_vs.h" #ifdef PIPE_ARCH_X86 diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index e0d30be98d..dce3c3745b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -56,6 +56,7 @@ #include "lp_bld_intr.h" #include "lp_bld_logic.h" #include "lp_bld_pack.h" +#include "lp_bld_debug.h" #include "lp_bld_arit.h" @@ -1850,9 +1851,11 @@ lp_build_pow(struct lp_build_context *bld, LLVMValueRef y) { /* TODO: optimize the constant case */ - if(LLVMIsConstant(x) && LLVMIsConstant(y)) + if (gallivm_debug & GALLIVM_DEBUG_PERF && + LLVMIsConstant(x) && LLVMIsConstant(y)) { debug_printf("%s: inefficient/imprecise constant arithmetic\n", __FUNCTION__); + } return lp_build_exp2(bld, lp_build_mul(bld, lp_build_log2(bld, x), y)); } @@ -1907,9 +1910,11 @@ lp_build_polynomial(struct lp_build_context *bld, assert(lp_check_value(bld->type, x)); /* TODO: optimize the constant case */ - if(LLVMIsConstant(x)) + if (gallivm_debug & GALLIVM_DEBUG_PERF && + LLVMIsConstant(x)) { debug_printf("%s: inefficient/imprecise constant arithmetic\n", __FUNCTION__); + } for (i = num_coeffs; i--; ) { LLVMValueRef coeff; @@ -1981,9 +1986,11 @@ lp_build_exp2_approx(struct lp_build_context *bld, if(p_exp2_int_part || p_frac_part || p_exp2) { /* TODO: optimize the constant case */ - if(LLVMIsConstant(x)) + if (gallivm_debug & GALLIVM_DEBUG_PERF && + LLVMIsConstant(x)) { debug_printf("%s: inefficient/imprecise constant arithmetic\n", __FUNCTION__); + } assert(type.floating && type.width == 32); @@ -2096,9 +2103,11 @@ lp_build_log2_approx(struct lp_build_context *bld, if(p_exp || p_floor_log2 || p_log2) { /* TODO: optimize the constant case */ - if(LLVMIsConstant(x)) + if (gallivm_debug & GALLIVM_DEBUG_PERF && + LLVMIsConstant(x)) { debug_printf("%s: inefficient/imprecise constant arithmetic\n", __FUNCTION__); + } assert(type.floating && type.width == 32); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c index e42ff31ac7..dd839c0bea 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c @@ -382,9 +382,12 @@ lp_build_const_aos(struct lp_type type, } +/** + * @param mask TGSI_WRITEMASK_xxx + */ LLVMValueRef lp_build_const_mask_aos(struct lp_type type, - const boolean cond[4]) + unsigned mask) { LLVMTypeRef elem_type = LLVMIntType(type.width); LLVMValueRef masks[LP_MAX_VECTOR_LENGTH]; @@ -392,9 +395,13 @@ lp_build_const_mask_aos(struct lp_type type, assert(type.length <= LP_MAX_VECTOR_LENGTH); - for(j = 0; j < type.length; j += 4) - for(i = 0; i < 4; ++i) - masks[j + i] = LLVMConstInt(elem_type, cond[i] ? ~0 : 0, 0); + for (j = 0; j < type.length; j += 4) { + for( i = 0; i < 4; ++i) { + masks[j + i] = LLVMConstInt(elem_type, + mask & (1 << i) ? ~0ULL : 0, + 1); + } + } return LLVMConstVector(masks, type.length); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h index 7ee8fff140..6b1fc590c1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h @@ -104,7 +104,7 @@ lp_build_const_aos(struct lp_type type, LLVMValueRef lp_build_const_mask_aos(struct lp_type type, - const boolean cond[4]); + unsigned mask); static INLINE LLVMValueRef diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h index 858002b34f..369c1bbf09 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h @@ -40,6 +40,7 @@ #define GALLIVM_DEBUG_IR 0x2 #define GALLIVM_DEBUG_ASM 0x4 #define GALLIVM_DEBUG_NO_OPT 0x8 +#define GALLIVM_DEBUG_PERF 0x10 #ifdef DEBUG diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index 92123e09d3..6b9189e1da 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -46,6 +46,7 @@ #include "lp_bld_conv.h" #include "lp_bld_swizzle.h" #include "lp_bld_gather.h" +#include "lp_bld_debug.h" #include "lp_bld_format.h" @@ -449,6 +450,11 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, * integer conversions. */ + if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) { + debug_printf("%s: unpacking %s with floating point\n", + __FUNCTION__, format_desc->short_name); + } + lp_build_conv(builder, lp_float32_vec4_type(), type, @@ -513,6 +519,10 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm", format_desc->short_name); + if (gallivm_debug & GALLIVM_DEBUG_PERF) { + debug_printf("%s: falling back to %s\n", __FUNCTION__, name); + } + /* * Declare and bind format_desc->fetch_rgba_8unorm(). */ @@ -612,6 +622,10 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", format_desc->short_name); + if (gallivm_debug & GALLIVM_DEBUG_PERF) { + debug_printf("%s: falling back to %s\n", __FUNCTION__, name); + } + /* * Declare and bind format_desc->fetch_rgba_float(). */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index c724a4453e..ce7e54afc7 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -37,6 +37,7 @@ #include "lp_bld_conv.h" #include "lp_bld_swizzle.h" #include "lp_bld_gather.h" +#include "lp_bld_debug.h" #include "lp_bld_format.h" @@ -387,6 +388,11 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, unsigned k, chan; struct lp_type tmp_type; + if (gallivm_debug & GALLIVM_DEBUG_PERF) { + debug_printf("%s: scalar unpacking of %s\n", + __FUNCTION__, format_desc->short_name); + } + tmp_type = type; tmp_type.length = 4; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 60d8bcfa55..761f33b578 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -43,6 +43,7 @@ static const struct debug_named_value lp_bld_debug_flags[] = { { "ir", GALLIVM_DEBUG_IR, NULL }, { "asm", GALLIVM_DEBUG_ASM, NULL }, { "nopt", GALLIVM_DEBUG_NO_OPT, NULL }, + { "perf", GALLIVM_DEBUG_PERF, NULL }, DEBUG_NAMED_VALUE_END }; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index 7d7db3b0d9..a959bd4ad4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -40,6 +40,7 @@ #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_intr.h" +#include "lp_bld_debug.h" #include "lp_bld_logic.h" @@ -325,8 +326,10 @@ lp_build_compare(LLVMBuilderRef builder, res = LLVMGetUndef(int_vec_type); - debug_printf("%s: warning: using slow element-wise int" - " vector comparison\n", __FUNCTION__); + if (gallivm_debug & GALLIVM_DEBUG_PERF) { + debug_printf("%s: using slow element-wise int" + " vector comparison\n", __FUNCTION__); + } for(i = 0; i < type.length; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); @@ -482,24 +485,30 @@ lp_build_select(struct lp_build_context *bld, } +/** + * Return mask ? a : b; + * + * mask is a TGSI_WRITEMASK_xxx. + */ LLVMValueRef lp_build_select_aos(struct lp_build_context *bld, + unsigned mask, LLVMValueRef a, - LLVMValueRef b, - const boolean cond[4]) + LLVMValueRef b) { const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; + assert((mask & ~0xf) == 0); assert(lp_check_value(type, a)); assert(lp_check_value(type, b)); if(a == b) return a; - if(cond[0] && cond[1] && cond[2] && cond[3]) + if((mask & 0xf) == 0xf) return a; - if(!cond[0] && !cond[1] && !cond[2] && !cond[3]) + if((mask & 0xf) == 0x0) return b; if(a == bld->undef || b == bld->undef) return bld->undef; @@ -522,7 +531,9 @@ lp_build_select_aos(struct lp_build_context *bld, for(j = 0; j < n; j += 4) for(i = 0; i < 4; ++i) - shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0); + shuffles[j + i] = LLVMConstInt(elem_type, + (mask & (1 << i) ? 0 : n) + j + i, + 0); return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); } @@ -531,16 +542,17 @@ lp_build_select_aos(struct lp_build_context *bld, /* XXX: Unfortunately select of vectors do not work */ /* Use a select */ LLVMTypeRef elem_type = LLVMInt1Type(); - LLVMValueRef cond[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef cond_vec[LP_MAX_VECTOR_LENGTH]; for(j = 0; j < n; j += 4) for(i = 0; i < 4; ++i) - cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0); + cond_vec[j + i] = LLVMConstInt(elem_type, + mask & (1 << i) ? 1 : 0, 0); - return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, ""); + return LLVMBuildSelect(bld->builder, LLVMConstVector(cond_vec, n), a, b, ""); #else - LLVMValueRef mask = lp_build_const_mask_aos(type, cond); - return lp_build_select(bld, mask, a, b); + LLVMValueRef mask_vec = lp_build_const_mask_aos(type, mask); + return lp_build_select(bld, mask_vec, a, b); #endif } } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h index 4e7b4c9938..111daad971 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h @@ -77,9 +77,9 @@ lp_build_select(struct lp_build_context *bld, LLVMValueRef lp_build_select_aos(struct lp_build_context *bld, + unsigned mask, LLVMValueRef a, - LLVMValueRef b, - const boolean cond[4]); + LLVMValueRef b); LLVMValueRef diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c index b7b630f2e8..f7eb7148ab 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c @@ -111,8 +111,6 @@ lp_build_const_pack_shuffle(unsigned n) assert(n <= LP_MAX_VECTOR_LENGTH); - /* TODO: cache results in a static table */ - for(i = 0; i < n; ++i) elems[i] = LLVMConstInt(LLVMInt32Type(), 2*i, 0); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 1f39d9c98b..baf0402f56 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -2190,9 +2190,8 @@ lp_build_sample_soa(LLVMBuilderRef builder, lp_build_sample_nop(&bld, texel_out); } else if (util_format_fits_8unorm(bld.format_desc) && - bld.format_desc->nr_channels > 1 && (static_state->target == PIPE_TEXTURE_2D || - static_state->target == PIPE_TEXTURE_RECT) && + static_state->target == PIPE_TEXTURE_RECT) && static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR && static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR && static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && @@ -2203,6 +2202,15 @@ lp_build_sample_soa(LLVMBuilderRef builder, row_stride_array, data_array, texel_out); } else { + if (gallivm_debug & GALLIVM_DEBUG_PERF && + (static_state->min_img_filter != PIPE_TEX_FILTER_NEAREST || + static_state->mag_img_filter != PIPE_TEX_FILTER_NEAREST || + static_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) && + util_format_fits_8unorm(bld.format_desc)) { + debug_printf("%s: using floating point linear filtering for %s\n", + __FUNCTION__, bld.format_desc->short_name); + } + lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy, lod_bias, explicit_lod, width, height, depth, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.c b/src/gallium/auxiliary/gallivm/lp_bld_struct.c index 3998ac374f..4693c2de6f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_struct.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.c @@ -49,6 +49,8 @@ lp_build_struct_get_ptr(LLVMBuilderRef builder, { LLVMValueRef indices[2]; LLVMValueRef member_ptr; + assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind); + assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMStructTypeKind); indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0); member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); @@ -65,8 +67,91 @@ lp_build_struct_get(LLVMBuilderRef builder, { LLVMValueRef member_ptr; LLVMValueRef res; + assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind); + assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMStructTypeKind); member_ptr = lp_build_struct_get_ptr(builder, ptr, member, name); res = LLVMBuildLoad(builder, member_ptr, ""); lp_build_name(res, "%s.%s", LLVMGetValueName(ptr), name); return res; } + + +LLVMValueRef +lp_build_array_get_ptr(LLVMBuilderRef builder, + LLVMValueRef ptr, + LLVMValueRef index) +{ + LLVMValueRef indices[2]; + LLVMValueRef element_ptr; + assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind); + assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMArrayTypeKind); + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[1] = index; + element_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); +#ifdef DEBUG + lp_build_name(element_ptr, "&%s[%s]", + LLVMGetValueName(ptr), LLVMGetValueName(index)); +#endif + return element_ptr; +} + + +LLVMValueRef +lp_build_array_get(LLVMBuilderRef builder, + LLVMValueRef ptr, + LLVMValueRef index) +{ + LLVMValueRef element_ptr; + LLVMValueRef res; + assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind); + assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMArrayTypeKind); + element_ptr = lp_build_array_get_ptr(builder, ptr, index); + res = LLVMBuildLoad(builder, element_ptr, ""); +#ifdef DEBUG + lp_build_name(res, "%s[%s]", LLVMGetValueName(ptr), LLVMGetValueName(index)); +#endif + return res; +} + + +void +lp_build_array_set(LLVMBuilderRef builder, + LLVMValueRef ptr, + LLVMValueRef index, + LLVMValueRef value) +{ + LLVMValueRef element_ptr; + assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind); + assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMArrayTypeKind); + element_ptr = lp_build_array_get_ptr(builder, ptr, index); + LLVMBuildStore(builder, value, element_ptr); +} + + +LLVMValueRef +lp_build_pointer_get(LLVMBuilderRef builder, + LLVMValueRef ptr, + LLVMValueRef index) +{ + LLVMValueRef element_ptr; + LLVMValueRef res; + assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind); + element_ptr = LLVMBuildGEP(builder, ptr, &index, 1, ""); + res = LLVMBuildLoad(builder, element_ptr, ""); +#ifdef DEBUG + lp_build_name(res, "%s[%s]", LLVMGetValueName(ptr), LLVMGetValueName(index)); +#endif + return res; +} + + +void +lp_build_pointer_set(LLVMBuilderRef builder, + LLVMValueRef ptr, + LLVMValueRef index, + LLVMValueRef value) +{ + LLVMValueRef element_ptr; + element_ptr = LLVMBuildGEP(builder, ptr, &index, 1, ""); + LLVMBuildStore(builder, value, element_ptr); +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.h b/src/gallium/auxiliary/gallivm/lp_bld_struct.h index 147336edb4..eb87a8eee9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_struct.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.h @@ -71,5 +71,46 @@ lp_build_struct_get(LLVMBuilderRef builder, unsigned member, const char *name); +/** + * Get value pointer to an array element. + */ +LLVMValueRef +lp_build_array_get_ptr(LLVMBuilderRef builder, + LLVMValueRef ptr, + LLVMValueRef index); + +/** + * Get the value of an array element. + */ +LLVMValueRef +lp_build_array_get(LLVMBuilderRef builder, + LLVMValueRef ptr, + LLVMValueRef index); + +/** + * Set the value of an array element. + */ +void +lp_build_array_set(LLVMBuilderRef builder, + LLVMValueRef ptr, + LLVMValueRef index, + LLVMValueRef value); + +/** + * Get the value of an array element. + */ +LLVMValueRef +lp_build_pointer_get(LLVMBuilderRef builder, + LLVMValueRef ptr, + LLVMValueRef index); + +/** + * Set the value of an array element. + */ +void +lp_build_pointer_set(LLVMBuilderRef builder, + LLVMValueRef ptr, + LLVMValueRef index, + LLVMValueRef value); #endif /* !LP_BLD_STRUCT_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c index 20cf96ca66..d4d2f2b307 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c @@ -95,10 +95,13 @@ lp_build_broadcast_scalar(struct lp_build_context *bld, } +/** + * Swizzle one channel into all other three channels. + */ LLVMValueRef -lp_build_broadcast_aos(struct lp_build_context *bld, - LLVMValueRef a, - unsigned channel) +lp_build_swizzle_scalar_aos(struct lp_build_context *bld, + LLVMValueRef a, + unsigned channel) { const struct lp_type type = bld->type; const unsigned n = type.length; @@ -139,13 +142,10 @@ lp_build_broadcast_aos(struct lp_build_context *bld, { 1, -2}, {-1, -2} }; - boolean cond[4]; unsigned i; - memset(cond, 0, sizeof cond); - cond[channel] = 1; - - a = LLVMBuildAnd(bld->builder, a, lp_build_const_mask_aos(type, cond), ""); + a = LLVMBuildAnd(bld->builder, a, + lp_build_const_mask_aos(type, 1 << channel), ""); /* * Build a type where each element is an integer that cover the four @@ -206,7 +206,7 @@ lp_build_swizzle_aos(struct lp_build_context *bld, case PIPE_SWIZZLE_GREEN: case PIPE_SWIZZLE_BLUE: case PIPE_SWIZZLE_ALPHA: - return lp_build_broadcast_aos(bld, a, swizzles[0]); + return lp_build_swizzle_scalar_aos(bld, a, swizzles[0]); case PIPE_SWIZZLE_ZERO: return bld->zero; case PIPE_SWIZZLE_ONE: @@ -282,7 +282,7 @@ lp_build_swizzle_aos(struct lp_build_context *bld, */ LLVMValueRef res; struct lp_type type4; - boolean cond[4]; + unsigned cond = 0; unsigned chan; int shift; @@ -290,9 +290,11 @@ lp_build_swizzle_aos(struct lp_build_context *bld, * Start with a mixture of 1 and 0. */ for (chan = 0; chan < 4; ++chan) { - cond[chan] = swizzles[chan] == PIPE_SWIZZLE_ONE ? TRUE : FALSE; + if (swizzles[chan] == PIPE_SWIZZLE_ONE) { + cond |= 1 << chan; + } } - res = lp_build_select_aos(bld, bld->one, bld->zero, cond); + res = lp_build_select_aos(bld, cond, bld->one, bld->zero); /* * Build a type where each element is an integer that cover the four diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h index 315e1bcb54..f9b6a5e725 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h @@ -60,7 +60,7 @@ lp_build_broadcast_scalar(struct lp_build_context *bld, * all four channel. */ LLVMValueRef -lp_build_broadcast_aos(struct lp_build_context *bld, +lp_build_swizzle_scalar_aos(struct lp_build_context *bld, LLVMValueRef a, unsigned channel); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 4d415b6d41..97318b3456 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -45,6 +45,15 @@ struct lp_build_context; struct lp_build_mask_context; +enum lp_build_tex_modifier { + LP_BLD_TEX_MODIFIER_NONE = 0, + LP_BLD_TEX_MODIFIER_PROJECTED, + LP_BLD_TEX_MODIFIER_LOD_BIAS, + LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, + LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV +}; + + /** * Sampler code generation interface. * @@ -73,6 +82,20 @@ struct lp_build_sampler_soa }; +struct lp_build_sampler_aos +{ + LLVMValueRef + (*emit_fetch_texel)( struct lp_build_sampler_aos *sampler, + struct lp_build_context *bld, + unsigned target, /* TGSI_TEXTURE_* */ + unsigned unit, + LLVMValueRef coords, + LLVMValueRef ddx, + LLVMValueRef ddy, + enum lp_build_tex_modifier modifier); +}; + + void lp_build_tgsi_soa(LLVMBuilderRef builder, const struct tgsi_token *tokens, @@ -86,4 +109,16 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, const struct tgsi_shader_info *info); +void +lp_build_tgsi_aos(LLVMBuilderRef builder, + const struct tgsi_token *tokens, + struct lp_type type, + const unsigned char swizzles[4], + LLVMValueRef consts_ptr, + const LLVMValueRef *inputs, + LLVMValueRef *outputs, + struct lp_build_sampler_aos *sampler, + const struct tgsi_shader_info *info); + + #endif /* LP_BLD_TGSI_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c new file mode 100644 index 0000000000..d5f963be58 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -0,0 +1,1176 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * TGSI to LLVM IR translation -- AoS. + * + * FIXME: + * - No control flow support: the existing control flow code should be factored + * out into from the SoA code into a common module and shared. + * - No derivatives. Derivate logic should be pluggable, just like the samplers. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#include "pipe/p_config.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_debug.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_info.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_scan.h" +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_arit.h" +#include "lp_bld_logic.h" +#include "lp_bld_swizzle.h" +#include "lp_bld_flow.h" +#include "lp_bld_quad.h" +#include "lp_bld_tgsi.h" +#include "lp_bld_limits.h" +#include "lp_bld_debug.h" + + +#define LP_MAX_INSTRUCTIONS 256 + + +struct lp_build_tgsi_aos_context +{ + struct lp_build_context base; + + /* Builder for integer masks and indices */ + struct lp_build_context int_bld; + + /* + * AoS swizzle used: + * - swizzles[0] = red index + * - swizzles[1] = green index + * - swizzles[2] = blue index + * - swizzles[3] = alpha index + */ + unsigned char swizzles[4]; + unsigned char inv_swizzles[4]; + + LLVMValueRef consts_ptr; + const LLVMValueRef *inputs; + LLVMValueRef *outputs; + + struct lp_build_sampler_aos *sampler; + + LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES]; + LLVMValueRef temps[LP_MAX_TGSI_TEMPS]; + LLVMValueRef addr[LP_MAX_TGSI_ADDRS]; + LLVMValueRef preds[LP_MAX_TGSI_PREDS]; + + /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is + * set in the indirect_files field. + * The temps[] array above is unused then. + */ + LLVMValueRef temps_array; + + /** bitmask indicating which register files are accessed indirectly */ + unsigned indirect_files; + + struct tgsi_full_instruction *instructions; + uint max_instructions; +}; + + +/** + * Wrapper around lp_build_swizzle_aos which translates swizzles to another + * ordering. + */ +static LLVMValueRef +swizzle_aos(struct lp_build_tgsi_aos_context *bld, + LLVMValueRef a, + unsigned swizzle_x, + unsigned swizzle_y, + unsigned swizzle_z, + unsigned swizzle_w) +{ + unsigned char swizzles[4]; + + assert(swizzle_x < 4); + assert(swizzle_y < 4); + assert(swizzle_z < 4); + assert(swizzle_w < 4); + + swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x]; + swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y]; + swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z]; + swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w]; + + return lp_build_swizzle_aos(&bld->base, a, swizzles); +} + + +static LLVMValueRef +swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld, + LLVMValueRef a, + unsigned chan) +{ + chan = bld->swizzles[chan]; + return lp_build_swizzle_scalar_aos(&bld->base, a, chan); +} + + +/** + * Register fetch. + */ +static LLVMValueRef +emit_fetch( + struct lp_build_tgsi_aos_context *bld, + const struct tgsi_full_instruction *inst, + unsigned src_op) +{ + struct lp_type type = bld->base.type; + const struct tgsi_full_src_register *reg = &inst->Src[src_op]; + LLVMValueRef res; + unsigned chan; + + assert(!reg->Register.Indirect); + + /* + * Fetch the from the register file. + */ + + switch (reg->Register.File) { + case TGSI_FILE_CONSTANT: + /* + * Get the constants components + */ + + res = bld->base.undef; + for (chan = 0; chan < 4; ++chan) { + LLVMValueRef index; + LLVMValueRef scalar_ptr; + LLVMValueRef scalar; + LLVMValueRef swizzle; + + index = LLVMConstInt(LLVMInt32Type(), + reg->Register.Index*4 + chan, + 0); + + scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, + &index, 1, ""); + + scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); + + lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]); + + /* + * NOTE: constants array is always assumed to be RGBA + */ + + swizzle = LLVMConstInt(LLVMInt32Type(), chan, 0); + + res = LLVMBuildInsertElement(bld->base.builder, res, scalar, swizzle, ""); + } + + /* + * Broadcast the first quaternion to all others. + * + * XXX: could be factored into a reusable function. + */ + + if (type.length > 4) { + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + for (chan = 0; chan < 4; ++chan) { + shuffles[chan] = LLVMConstInt(LLVMInt32Type(), chan, 0); + } + + for (i = 4; i < type.length; ++i) { + shuffles[i] = shuffles[i % 4]; + } + + res = LLVMBuildShuffleVector(bld->base.builder, + res, bld->base.undef, + LLVMConstVector(shuffles, type.length), + ""); + } + break; + + case TGSI_FILE_IMMEDIATE: + res = bld->immediates[reg->Register.Index]; + assert(res); + break; + + case TGSI_FILE_INPUT: + res = bld->inputs[reg->Register.Index]; + assert(res); + break; + + case TGSI_FILE_TEMPORARY: + { + LLVMValueRef temp_ptr; + temp_ptr = bld->temps[reg->Register.Index]; + res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); + if (!res) + return bld->base.undef; + } + break; + + default: + assert(0 && "invalid src register in emit_fetch()"); + return bld->base.undef; + } + + /* + * Apply sign modifier. + */ + + if (reg->Register.Absolute) { + res = lp_build_abs(&bld->base, res); + } + + if(reg->Register.Negate) { + res = lp_build_negate(&bld->base, res); + } + + /* + * Swizzle the argument + */ + + res = swizzle_aos(bld, res, + reg->Register.SwizzleX, + reg->Register.SwizzleY, + reg->Register.SwizzleZ, + reg->Register.SwizzleW); + + return res; +} + + +/** + * Register store. + */ +static void +emit_store( + struct lp_build_tgsi_aos_context *bld, + const struct tgsi_full_instruction *inst, + unsigned index, + LLVMValueRef value) +{ + const struct tgsi_full_dst_register *reg = &inst->Dst[index]; + LLVMValueRef mask = NULL; + LLVMValueRef ptr; + + /* + * Saturate the value + */ + + switch (inst->Instruction.Saturate) { + case TGSI_SAT_NONE: + break; + + case TGSI_SAT_ZERO_ONE: + value = lp_build_max(&bld->base, value, bld->base.zero); + value = lp_build_min(&bld->base, value, bld->base.one); + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0)); + value = lp_build_min(&bld->base, value, bld->base.one); + break; + + default: + assert(0); + } + + /* + * Translate the register file + */ + + assert(!reg->Register.Indirect); + + switch (reg->Register.File) { + case TGSI_FILE_OUTPUT: + ptr = bld->outputs[reg->Register.Index]; + break; + + case TGSI_FILE_TEMPORARY: + ptr = bld->temps[reg->Register.Index]; + break; + + case TGSI_FILE_ADDRESS: + ptr = bld->addr[reg->Indirect.Index]; + break; + + case TGSI_FILE_PREDICATE: + ptr = bld->preds[reg->Register.Index]; + break; + + default: + assert(0); + return; + } + + /* + * Predicate + */ + + if (inst->Instruction.Predicate) { + LLVMValueRef pred; + + assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS); + + pred = LLVMBuildLoad(bld->base.builder, + bld->preds[inst->Predicate.Index], ""); + + /* + * Convert the value to an integer mask. + */ + pred = lp_build_compare(bld->base.builder, + bld->base.type, + PIPE_FUNC_NOTEQUAL, + pred, + bld->base.zero); + + if (inst->Predicate.Negate) { + pred = LLVMBuildNot(bld->base.builder, pred, ""); + } + + pred = swizzle_aos(bld, pred, + inst->Predicate.SwizzleX, + inst->Predicate.SwizzleY, + inst->Predicate.SwizzleZ, + inst->Predicate.SwizzleW); + + if (mask) { + mask = LLVMBuildAnd(bld->base.builder, mask, pred, ""); + } else { + mask = pred; + } + } + + /* + * Writemask + */ + + if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) { + LLVMValueRef writemask; + + writemask = lp_build_const_mask_aos(bld->base.type, reg->Register.WriteMask); + + if (mask) { + mask = LLVMBuildAnd(bld->base.builder, mask, writemask, ""); + } else { + mask = writemask; + } + } + + if (mask) { + LLVMValueRef orig_value; + + orig_value = LLVMBuildLoad(bld->base.builder, ptr, ""); + value = lp_build_select(&bld->base, + mask, value, orig_value); + } + + LLVMBuildStore(bld->base.builder, value, ptr); +} + + +/** + * High-level instruction translators. + */ + +static LLVMValueRef +emit_tex(struct lp_build_tgsi_aos_context *bld, + const struct tgsi_full_instruction *inst, + enum lp_build_tex_modifier modifier) +{ + unsigned target; + unsigned unit; + LLVMValueRef coords; + LLVMValueRef ddx; + LLVMValueRef ddy; + + if (!bld->sampler) { + _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); + return bld->base.undef; + } + + target = inst->Texture.Texture; + + coords = emit_fetch( bld, inst, 0 ); + + if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { + ddx = emit_fetch( bld, inst, 1 ); + ddy = emit_fetch( bld, inst, 2 ); + unit = inst->Src[3].Register.Index; + } else { +#if 0 + ddx = lp_build_ddx( &bld->base, coords ); + ddy = lp_build_ddy( &bld->base, coords ); +#else + /* TODO */ + ddx = bld->base.one; + ddy = bld->base.one; +#endif + unit = inst->Src[1].Register.Index; + } + + return bld->sampler->emit_fetch_texel(bld->sampler, + &bld->base, + target, unit, + coords, ddx, ddy, + modifier); +} + + +static void +emit_declaration( + struct lp_build_tgsi_aos_context *bld, + const struct tgsi_full_declaration *decl) +{ + LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type); + + unsigned first = decl->Range.First; + unsigned last = decl->Range.Last; + unsigned idx; + + for (idx = first; idx <= last; ++idx) { + switch (decl->Declaration.File) { + case TGSI_FILE_TEMPORARY: + assert(idx < LP_MAX_TGSI_TEMPS); + if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { + LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), + last + 1, 0); + bld->temps_array = lp_build_array_alloca(bld->base.builder, + vec_type, array_size, ""); + } else { + bld->temps[idx] = lp_build_alloca(bld->base.builder, + vec_type, ""); + } + break; + + case TGSI_FILE_OUTPUT: + bld->outputs[idx] = lp_build_alloca(bld->base.builder, + vec_type, ""); + break; + + case TGSI_FILE_ADDRESS: + assert(idx < LP_MAX_TGSI_ADDRS); + bld->addr[idx] = lp_build_alloca(bld->base.builder, + vec_type, ""); + break; + + case TGSI_FILE_PREDICATE: + assert(idx < LP_MAX_TGSI_PREDS); + bld->preds[idx] = lp_build_alloca(bld->base.builder, + vec_type, ""); + break; + + default: + /* don't need to declare other vars */ + break; + } + } +} + + +/** + * Emit LLVM for one TGSI instruction. + * \param return TRUE for success, FALSE otherwise + */ +static boolean +emit_instruction( + struct lp_build_tgsi_aos_context *bld, + const struct tgsi_full_instruction *inst, + const struct tgsi_opcode_info *info, + int *pc) +{ + LLVMValueRef src0, src1, src2; + LLVMValueRef tmp0, tmp1; + LLVMValueRef dst0; + + /* + * Stores and write masks are handled in a general fashion after the long + * instruction opcode switch statement. + * + * Although not stricitly necessary, we avoid generating instructions for + * channels which won't be stored, in cases where's that easy. For some + * complex instructions, like texture sampling, it is more convenient to + * assume a full writemask and then let LLVM optimization passes eliminate + * redundant code. + */ + + (*pc)++; + + assert(info->num_dst <= 1); + if (info->num_dst) { + dst0 = bld->base.undef; + } + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: + src0 = emit_fetch(bld, inst, 0); + dst0 = lp_build_floor(&bld->base, src0); + break; + + case TGSI_OPCODE_MOV: + dst0 = emit_fetch(bld, inst, 0); + break; + + case TGSI_OPCODE_LIT: + return FALSE; + + case TGSI_OPCODE_RCP: + /* TGSI_OPCODE_RECIP */ + src0 = emit_fetch(bld, inst, 0); + dst0 = lp_build_rcp(&bld->base, src0); + break; + + case TGSI_OPCODE_RSQ: + /* TGSI_OPCODE_RECIPSQRT */ + src0 = emit_fetch(bld, inst, 0); + tmp0 = lp_build_abs(&bld->base, src0); + dst0 = lp_build_rsqrt(&bld->base, tmp0); + break; + + case TGSI_OPCODE_EXP: + return FALSE; + + case TGSI_OPCODE_LOG: + return FALSE; + + case TGSI_OPCODE_MUL: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + dst0 = lp_build_mul(&bld->base, src0, src1); + break; + + case TGSI_OPCODE_ADD: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + dst0 = lp_build_add(&bld->base, src0, src1); + break; + + case TGSI_OPCODE_DP3: + /* TGSI_OPCODE_DOT3 */ + return FALSE; + + case TGSI_OPCODE_DP4: + /* TGSI_OPCODE_DOT4 */ + return FALSE; + + case TGSI_OPCODE_DST: + return FALSE; + + case TGSI_OPCODE_MIN: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + dst0 = lp_build_max(&bld->base, src0, src1); + break; + + case TGSI_OPCODE_MAX: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + dst0 = lp_build_max(&bld->base, src0, src1); + break; + + case TGSI_OPCODE_SLT: + /* TGSI_OPCODE_SETLT */ + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, src1); + dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); + break; + + case TGSI_OPCODE_SGE: + /* TGSI_OPCODE_SETGE */ + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, src0, src1); + dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); + break; + + case TGSI_OPCODE_MAD: + /* TGSI_OPCODE_MADD */ + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + src2 = emit_fetch(bld, inst, 2); + tmp0 = lp_build_mul(&bld->base, src0, src1); + dst0 = lp_build_add(&bld->base, tmp0, src2); + break; + + case TGSI_OPCODE_SUB: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + dst0 = lp_build_sub(&bld->base, src0, src1); + break; + + case TGSI_OPCODE_LRP: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + src2 = emit_fetch(bld, inst, 2); + tmp0 = lp_build_sub(&bld->base, src1, src2); + tmp0 = lp_build_mul(&bld->base, src0, tmp0); + dst0 = lp_build_add(&bld->base, tmp0, src2); + break; + + case TGSI_OPCODE_CND: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + src2 = emit_fetch(bld, inst, 2); + tmp1 = lp_build_const_vec(bld->base.type, 0.5); + tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src2, tmp1); + dst0 = lp_build_select(&bld->base, tmp0, src0, src1); + break; + + case TGSI_OPCODE_DP2A: + return FALSE; + + case TGSI_OPCODE_FRC: + src0 = emit_fetch(bld, inst, 0); + tmp0 = lp_build_floor(&bld->base, src0); + dst0 = lp_build_sub(&bld->base, src0, tmp0); + break; + + case TGSI_OPCODE_CLAMP: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + src2 = emit_fetch(bld, inst, 2); + tmp0 = lp_build_max(&bld->base, src0, src1); + dst0 = lp_build_min(&bld->base, tmp0, src2); + break; + + case TGSI_OPCODE_FLR: + src0 = emit_fetch(bld, inst, 0); + dst0 = lp_build_floor(&bld->base, src0); + break; + + case TGSI_OPCODE_ROUND: + src0 = emit_fetch(bld, inst, 0); + dst0 = lp_build_round(&bld->base, src0); + break; + + case TGSI_OPCODE_EX2: + src0 = emit_fetch(bld, inst, 0); + tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X); + dst0 = lp_build_exp2(&bld->base, tmp0); + break; + + case TGSI_OPCODE_LG2: + src0 = emit_fetch(bld, inst, 0); + tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); + dst0 = lp_build_log2(&bld->base, tmp0); + break; + + case TGSI_OPCODE_POW: + src0 = emit_fetch(bld, inst, 0); + src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); + src1 = emit_fetch(bld, inst, 1); + src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X); + dst0 = lp_build_pow(&bld->base, src0, src1); + break; + + case TGSI_OPCODE_XPD: + return FALSE; + + case TGSI_OPCODE_ABS: + src0 = emit_fetch(bld, inst, 0); + dst0 = lp_build_abs(&bld->base, src0); + break; + + case TGSI_OPCODE_RCC: + /* deprecated? */ + assert(0); + return FALSE; + + case TGSI_OPCODE_DPH: + return FALSE; + + case TGSI_OPCODE_COS: + src0 = emit_fetch(bld, inst, 0); + tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); + dst0 = lp_build_cos(&bld->base, tmp0); + break; + + case TGSI_OPCODE_DDX: + return FALSE; + + case TGSI_OPCODE_DDY: + return FALSE; + + case TGSI_OPCODE_KILP: + /* predicated kill */ + return FALSE; + + case TGSI_OPCODE_KIL: + /* conditional kill */ + return FALSE; + + case TGSI_OPCODE_PK2H: + return FALSE; + break; + + case TGSI_OPCODE_PK2US: + return FALSE; + break; + + case TGSI_OPCODE_PK4B: + return FALSE; + break; + + case TGSI_OPCODE_PK4UB: + return FALSE; + + case TGSI_OPCODE_RFL: + return FALSE; + + case TGSI_OPCODE_SEQ: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_EQUAL, src0, src1); + dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); + break; + + case TGSI_OPCODE_SFL: + dst0 = bld->base.zero; + break; + + case TGSI_OPCODE_SGT: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src0, src1); + dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); + break; + + case TGSI_OPCODE_SIN: + src0 = emit_fetch(bld, inst, 0); + tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); + dst0 = lp_build_sin(&bld->base, tmp0); + break; + + case TGSI_OPCODE_SLE: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LEQUAL, src0, src1); + dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); + break; + + case TGSI_OPCODE_SNE: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, src0, src1); + dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); + break; + + case TGSI_OPCODE_STR: + dst0 = bld->base.one; + break; + + case TGSI_OPCODE_TEX: + dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE); + break; + + case TGSI_OPCODE_TXD: + dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV); + break; + + case TGSI_OPCODE_UP2H: + /* deprecated */ + assert (0); + return FALSE; + break; + + case TGSI_OPCODE_UP2US: + /* deprecated */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_UP4B: + /* deprecated */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_UP4UB: + /* deprecated */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_X2D: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_ARA: + /* deprecated */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_ARR: + src0 = emit_fetch(bld, inst, 0); + dst0 = lp_build_round(&bld->base, src0); + break; + + case TGSI_OPCODE_BRA: + /* deprecated */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_CAL: + return FALSE; + + case TGSI_OPCODE_RET: + return FALSE; + + case TGSI_OPCODE_END: + *pc = -1; + break; + + case TGSI_OPCODE_SSG: + /* TGSI_OPCODE_SGN */ + tmp0 = emit_fetch(bld, inst, 0); + dst0 = lp_build_sgn(&bld->base, tmp0); + break; + + case TGSI_OPCODE_CMP: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + src2 = emit_fetch(bld, inst, 2); + tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, bld->base.zero); + dst0 = lp_build_select(&bld->base, tmp0, src1, src2); + break; + + case TGSI_OPCODE_SCS: + return FALSE; + + case TGSI_OPCODE_TXB: + dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS); + break; + + case TGSI_OPCODE_NRM: + /* fall-through */ + case TGSI_OPCODE_NRM4: + return FALSE; + + case TGSI_OPCODE_DIV: + /* deprecated */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_DP2: + return FALSE; + + case TGSI_OPCODE_TXL: + dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD); + break; + + case TGSI_OPCODE_TXP: + dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED); + break; + + case TGSI_OPCODE_BRK: + return FALSE; + + case TGSI_OPCODE_IF: + return FALSE; + + case TGSI_OPCODE_BGNLOOP: + return FALSE; + + case TGSI_OPCODE_BGNSUB: + return FALSE; + + case TGSI_OPCODE_ELSE: + return FALSE; + + case TGSI_OPCODE_ENDIF: + return FALSE; + + case TGSI_OPCODE_ENDLOOP: + return FALSE; + + case TGSI_OPCODE_ENDSUB: + return FALSE; + + case TGSI_OPCODE_PUSHA: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_POPA: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_CEIL: + src0 = emit_fetch(bld, inst, 0); + dst0 = lp_build_ceil(&bld->base, src0); + break; + + case TGSI_OPCODE_I2F: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_NOT: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_TRUNC: + src0 = emit_fetch(bld, inst, 0); + dst0 = lp_build_trunc(&bld->base, src0); + break; + + case TGSI_OPCODE_SHL: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_ISHR: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_AND: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_OR: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_MOD: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_XOR: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_SAD: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_TXF: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_TXQ: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_CONT: + return FALSE; + + case TGSI_OPCODE_EMIT: + return FALSE; + break; + + case TGSI_OPCODE_ENDPRIM: + return FALSE; + break; + + case TGSI_OPCODE_NOP: + break; + + default: + return FALSE; + } + + if (info->num_dst) { + emit_store(bld, inst, 0, dst0); + } + + return TRUE; +} + + +void +lp_build_tgsi_aos(LLVMBuilderRef builder, + const struct tgsi_token *tokens, + struct lp_type type, + const unsigned char swizzles[4], + LLVMValueRef consts_ptr, + const LLVMValueRef *inputs, + LLVMValueRef *outputs, + struct lp_build_sampler_aos *sampler, + const struct tgsi_shader_info *info) +{ + struct lp_build_tgsi_aos_context bld; + struct tgsi_parse_context parse; + uint num_immediates = 0; + uint num_instructions = 0; + unsigned chan; + int pc = 0; + + /* Setup build context */ + memset(&bld, 0, sizeof bld); + lp_build_context_init(&bld.base, builder, type); + lp_build_context_init(&bld.int_bld, builder, lp_int_type(type)); + + for (chan = 0; chan < 4; ++chan) { + bld.swizzles[chan] = swizzles[chan]; + bld.inv_swizzles[swizzles[chan]] = chan; + } + + bld.inputs = inputs; + bld.outputs = outputs; + bld.consts_ptr = consts_ptr; + bld.sampler = sampler; + bld.indirect_files = info->indirect_files; + bld.instructions = (struct tgsi_full_instruction *) + MALLOC(LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction)); + bld.max_instructions = LP_MAX_INSTRUCTIONS; + + if (!bld.instructions) { + return; + } + + tgsi_parse_init(&parse, tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch(parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + /* Inputs already interpolated */ + emit_declaration(&bld, &parse.FullToken.FullDeclaration); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + /* save expanded instruction */ + if (num_instructions == bld.max_instructions) { + struct tgsi_full_instruction *instructions; + instructions = REALLOC(bld.instructions, + bld.max_instructions + * sizeof(struct tgsi_full_instruction), + (bld.max_instructions + LP_MAX_INSTRUCTIONS) + * sizeof(struct tgsi_full_instruction)); + if (!instructions) { + break; + } + bld.instructions = instructions; + bld.max_instructions += LP_MAX_INSTRUCTIONS; + } + + memcpy(bld.instructions + num_instructions, + &parse.FullToken.FullInstruction, + sizeof(bld.instructions[0])); + + num_instructions++; + } + + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* simply copy the immediate values into the next immediates[] slot */ + { + const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; + float imm[4]; + assert(size <= 4); + assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); + for (chan = 0; chan < 4; ++chan) { + imm[chan] = 0.0f; + } + for (chan = 0; chan < size; ++chan) { + unsigned swizzle = bld.swizzles[chan]; + imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float; + } + bld.immediates[num_immediates] = + lp_build_const_aos(type, + imm[0], imm[1], imm[2], imm[3], + NULL); + num_immediates++; + } + break; + + case TGSI_TOKEN_TYPE_PROPERTY: + break; + + default: + assert(0); + } + } + + while (pc != -1) { + struct tgsi_full_instruction *instr = bld.instructions + pc; + const struct tgsi_opcode_info *opcode_info = + tgsi_get_opcode_info(instr->Instruction.Opcode); + if (!emit_instruction(&bld, instr, opcode_info, &pc)) + _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", + opcode_info->mnemonic); + } + + if (0) { + LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(block); + debug_printf("11111111111111111111111111111 \n"); + tgsi_dump(tokens, 0); + lp_debug_dump_value(function); + debug_printf("2222222222222222222222222222 \n"); + } + tgsi_parse_free(&parse); + + if (0) { + LLVMModuleRef module = LLVMGetGlobalParent( + LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder))); + LLVMDumpModule(module); + } + + FREE(bld.instructions); +} + diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 0e07f7f3f3..cd5b132b41 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -612,7 +612,6 @@ emit_fetch( break; case TGSI_UTIL_SIGN_SET: - /* TODO: Use bitwese OR for floating point */ res = lp_build_abs( &bld->base, res ); /* fall through */ case TGSI_UTIL_SIGN_TOGGLE: @@ -817,18 +816,10 @@ emit_store( * High-level instruction translators. */ -enum tex_modifier { - TEX_MODIFIER_NONE = 0, - TEX_MODIFIER_PROJECTED, - TEX_MODIFIER_LOD_BIAS, - TEX_MODIFIER_EXPLICIT_LOD, - TEX_MODIFIER_EXPLICIT_DERIV -}; - static void emit_tex( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst, - enum tex_modifier modifier, + enum lp_build_tex_modifier modifier, LLVMValueRef *texel) { unsigned unit; @@ -868,11 +859,11 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, return; } - if (modifier == TEX_MODIFIER_LOD_BIAS) { + if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { lod_bias = emit_fetch( bld, inst, 0, 3 ); explicit_lod = NULL; } - else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { + else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { lod_bias = NULL; explicit_lod = emit_fetch( bld, inst, 0, 3 ); } @@ -881,21 +872,21 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, explicit_lod = NULL; } - if (modifier == TEX_MODIFIER_PROJECTED) { + if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) { oow = emit_fetch( bld, inst, 0, 3 ); oow = lp_build_rcp(&bld->base, oow); } for (i = 0; i < num_coords; i++) { coords[i] = emit_fetch( bld, inst, 0, i ); - if (modifier == TEX_MODIFIER_PROJECTED) + if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) coords[i] = lp_build_mul(&bld->base, coords[i], oow); } for (i = num_coords; i < 3; i++) { coords[i] = bld->base.undef; } - if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) { + if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { for (i = 0; i < num_coords; i++) { ddx[i] = emit_fetch( bld, inst, 1, i ); ddy[i] = emit_fetch( bld, inst, 2, i ); @@ -1628,11 +1619,11 @@ emit_instruction( break; case TGSI_OPCODE_TEX: - emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 ); + emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 ); break; case TGSI_OPCODE_TXD: - emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 ); + emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 ); break; case TGSI_OPCODE_UP2H: @@ -1736,7 +1727,7 @@ emit_instruction( break; case TGSI_OPCODE_TXB: - emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 ); + emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 ); break; case TGSI_OPCODE_NRM: @@ -1841,11 +1832,11 @@ emit_instruction( break; case TGSI_OPCODE_TXL: - emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 ); + emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 ); break; case TGSI_OPCODE_TXP: - emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 ); + emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 ); break; case TGSI_OPCODE_BRK: @@ -2063,11 +2054,16 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, { /* save expanded instruction */ if (num_instructions == bld.max_instructions) { - bld.instructions = REALLOC(bld.instructions, - bld.max_instructions - * sizeof(struct tgsi_full_instruction), - (bld.max_instructions + LP_MAX_INSTRUCTIONS) - * sizeof(struct tgsi_full_instruction)); + struct tgsi_full_instruction *instructions; + instructions = REALLOC(bld.instructions, + bld.max_instructions + * sizeof(struct tgsi_full_instruction), + (bld.max_instructions + LP_MAX_INSTRUCTIONS) + * sizeof(struct tgsi_full_instruction)); + if (!instructions) { + break; + } + bld.instructions = instructions; bld.max_instructions += LP_MAX_INSTRUCTIONS; } diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index a6c50dcf0c..5a13f39849 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -130,7 +130,7 @@ struct pb_vtbl * flags is bitmask of PB_USAGE_CPU_READ/WRITE. */ void *(*map)( struct pb_buffer *buf, - unsigned flags ); + unsigned flags, void *flush_ctx ); void (*unmap)( struct pb_buffer *buf ); @@ -164,13 +164,13 @@ struct pb_vtbl */ static INLINE void * pb_map(struct pb_buffer *buf, - unsigned flags) + unsigned flags, void *flush_ctx) { assert(buf); if(!buf) return NULL; assert(pipe_is_referenced(&buf->base.reference)); - return buf->vtbl->map(buf, flags); + return buf->vtbl->map(buf, flags, flush_ctx); } diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index d6cf640582..c310f28f51 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -624,7 +624,7 @@ fenced_buffer_copy_storage_to_gpu_locked(struct fenced_buffer *fenced_buf) assert(fenced_buf->data); assert(fenced_buf->buffer); - map = pb_map(fenced_buf->buffer, PB_USAGE_CPU_WRITE); + map = pb_map(fenced_buf->buffer, PB_USAGE_CPU_WRITE, NULL); if(!map) return PIPE_ERROR; @@ -644,7 +644,7 @@ fenced_buffer_copy_storage_to_cpu_locked(struct fenced_buffer *fenced_buf) assert(fenced_buf->data); assert(fenced_buf->buffer); - map = pb_map(fenced_buf->buffer, PB_USAGE_CPU_READ); + map = pb_map(fenced_buf->buffer, PB_USAGE_CPU_READ, NULL); if(!map) return PIPE_ERROR; @@ -674,7 +674,7 @@ fenced_buffer_destroy(struct pb_buffer *buf) static void * fenced_buffer_map(struct pb_buffer *buf, - unsigned flags) + unsigned flags, void *flush_ctx) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); struct fenced_manager *fenced_mgr = fenced_buf->mgr; @@ -712,7 +712,7 @@ fenced_buffer_map(struct pb_buffer *buf, } if(fenced_buf->buffer) { - map = pb_map(fenced_buf->buffer, flags); + map = pb_map(fenced_buf->buffer, flags, flush_ctx); } else { assert(fenced_buf->data); diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c index b706f429be..c2322eed19 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c @@ -70,7 +70,8 @@ malloc_buffer_destroy(struct pb_buffer *buf) static void * malloc_buffer_map(struct pb_buffer *buf, - unsigned flags) + unsigned flags, + void *flush_ctx) { return malloc_buffer(buf)->data; } diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index 88501e8d72..b4d8107372 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -167,10 +167,10 @@ pb_cache_buffer_destroy(struct pb_buffer *_buf) static void * pb_cache_buffer_map(struct pb_buffer *_buf, - unsigned flags) + unsigned flags, void *flush_ctx) { struct pb_cache_buffer *buf = pb_cache_buffer(_buf); - return pb_map(buf->buffer, flags); + return pb_map(buf->buffer, flags, flush_ctx); } @@ -242,7 +242,7 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf, if(!pb_check_usage(desc->usage, buf->base.base.usage)) return FALSE; - map = pb_map(buf->buffer, PB_USAGE_DONTBLOCK); + map = pb_map(buf->buffer, PB_USAGE_DONTBLOCK, NULL); if (!map) { return FALSE; } diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index 0dc5b31a75..7604e75af8 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -158,7 +158,7 @@ pb_debug_buffer_fill(struct pb_debug_buffer *buf) { uint8_t *map; - map = pb_map(buf->buffer, PB_USAGE_CPU_WRITE); + map = pb_map(buf->buffer, PB_USAGE_CPU_WRITE, NULL); assert(map); if(map) { fill_random_pattern(map, buf->underflow_size); @@ -181,7 +181,7 @@ pb_debug_buffer_check(struct pb_debug_buffer *buf) map = pb_map(buf->buffer, PB_USAGE_CPU_READ | - PB_USAGE_UNSYNCHRONIZED); + PB_USAGE_UNSYNCHRONIZED, NULL); assert(map); if(map) { boolean underflow, overflow; @@ -247,14 +247,14 @@ pb_debug_buffer_destroy(struct pb_buffer *_buf) static void * pb_debug_buffer_map(struct pb_buffer *_buf, - unsigned flags) + unsigned flags, void *flush_ctx) { struct pb_debug_buffer *buf = pb_debug_buffer(_buf); void *map; pb_debug_buffer_check(buf); - map = pb_map(buf->buffer, flags); + map = pb_map(buf->buffer, flags, flush_ctx); if(!map) return NULL; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index faf7c35267..88da786216 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -108,11 +108,14 @@ mm_buffer_destroy(struct pb_buffer *buf) static void * mm_buffer_map(struct pb_buffer *buf, - unsigned flags) + unsigned flags, + void *flush_ctx) { struct mm_buffer *mm_buf = mm_buffer(buf); struct mm_pb_manager *mm = mm_buf->mgr; + /* XXX: it will be necessary to remap here to propagate flush_ctx */ + return (unsigned char *) mm->map + mm_buf->block->ofs; } @@ -269,7 +272,7 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, mm->map = pb_map(mm->buffer, PB_USAGE_CPU_READ | - PB_USAGE_CPU_WRITE); + PB_USAGE_CPU_WRITE, NULL); if(!mm->map) goto failure; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c index 31f1ebbeb7..694a092f3c 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c @@ -103,13 +103,13 @@ pb_ondemand_buffer_destroy(struct pb_buffer *_buf) static void * pb_ondemand_buffer_map(struct pb_buffer *_buf, - unsigned flags) + unsigned flags, void *flush_ctx) { struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); if(buf->buffer) { assert(!buf->data); - return pb_map(buf->buffer, flags); + return pb_map(buf->buffer, flags, flush_ctx); } else { assert(buf->data); @@ -150,7 +150,7 @@ pb_ondemand_buffer_instantiate(struct pb_ondemand_buffer *buf) if(!buf->buffer) return PIPE_ERROR_OUT_OF_MEMORY; - map = pb_map(buf->buffer, PB_USAGE_CPU_READ); + map = pb_map(buf->buffer, PB_USAGE_CPU_READ, NULL); if(!map) { pb_reference(&buf->buffer, NULL); return PIPE_ERROR; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c index fdcce42878..2f7c7389ff 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -118,12 +118,14 @@ pool_buffer_destroy(struct pb_buffer *buf) static void * -pool_buffer_map(struct pb_buffer *buf, unsigned flags) +pool_buffer_map(struct pb_buffer *buf, unsigned flags, void *flush_ctx) { struct pool_buffer *pool_buf = pool_buffer(buf); struct pool_pb_manager *pool = pool_buf->mgr; void *map; + /* XXX: it will be necessary to remap here to propagate flush_ctx */ + pipe_mutex_lock(pool->mutex); map = (unsigned char *) pool->map + pool_buf->start; pipe_mutex_unlock(pool->mutex); @@ -285,7 +287,7 @@ pool_bufmgr_create(struct pb_manager *provider, pool->map = pb_map(pool->buffer, PB_USAGE_CPU_READ | - PB_USAGE_CPU_WRITE); + PB_USAGE_CPU_WRITE, NULL); if(!pool->map) goto failure; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c index 7a3305aaf3..176f9aa38a 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -227,10 +227,13 @@ pb_slab_buffer_destroy(struct pb_buffer *_buf) static void * pb_slab_buffer_map(struct pb_buffer *_buf, - unsigned flags) + unsigned flags, + void *flush_ctx) { struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + /* XXX: it will be necessary to remap here to propagate flush_ctx */ + ++buf->mapCount; return (void *) ((uint8_t *) buf->slab->virtual + buf->start); } @@ -316,7 +319,7 @@ pb_slab_create(struct pb_slab_manager *mgr) * through this address so it is required that the buffer is pinned. */ slab->virtual = pb_map(slab->bo, PB_USAGE_CPU_READ | - PB_USAGE_CPU_WRITE); + PB_USAGE_CPU_WRITE, NULL); if(!slab->virtual) { ret = PIPE_ERROR_OUT_OF_MEMORY; goto out_err1; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 6dee362d58..9d62c1d7e7 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -377,6 +377,36 @@ tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, const unsigned *buf_sizes); +static INLINE int +tgsi_exec_get_shader_param(enum pipe_shader_cap param) +{ + switch(param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return INT_MAX; + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return TGSI_EXEC_MAX_NESTING; + case PIPE_SHADER_CAP_MAX_INPUTS: + return TGSI_EXEC_MAX_INPUT_ATTRIBS; + case PIPE_SHADER_CAP_MAX_CONSTS: + return TGSI_EXEC_MAX_CONST_BUFFER; + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return PIPE_MAX_CONSTANT_BUFFERS; + case PIPE_SHADER_CAP_MAX_TEMPS: + return TGSI_EXEC_NUM_TEMPS; + case PIPE_SHADER_CAP_MAX_ADDRS: + return TGSI_EXEC_NUM_ADDRS; + case PIPE_SHADER_CAP_MAX_PREDS: + return TGSI_EXEC_NUM_PREDS; + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + return 1; + default: + return 0; + } +} + #if defined __cplusplus } /* extern "C" */ #endif diff --git a/src/gallium/auxiliary/util/u_caps.c b/src/gallium/auxiliary/util/u_caps.c index 94d5bd3027..e209a98b70 100644 --- a/src/gallium/auxiliary/util/u_caps.c +++ b/src/gallium/auxiliary/util/u_caps.c @@ -75,6 +75,14 @@ util_check_caps_out(struct pipe_screen *screen, const unsigned *list, int *out) return FALSE; } break; + case UTIL_CAPS_CHECK_SHADER: + tmpi = screen->get_shader_param(screen, list[i] >> 24, list[i] & ((1 << 24) - 1)); + ++i; + if (tmpi < (int)list[i++]) { + *out = i - 3; + return FALSE; + } + break; case UTIL_CAPS_CHECK_UNIMPLEMENTED: *out = i - 1; return FALSE; @@ -188,17 +196,17 @@ static unsigned caps_opengl_2_1[] = { /* Shader Model 3 */ static unsigned caps_sm3[] = { - UTIL_CHECK_INT(MAX_FS_INSTRUCTIONS, 512), - UTIL_CHECK_INT(MAX_FS_INPUTS, 10), - UTIL_CHECK_INT(MAX_FS_TEMPS, 32), - UTIL_CHECK_INT(MAX_FS_ADDRS, 1), - UTIL_CHECK_INT(MAX_FS_CONSTS, 224), + UTIL_CHECK_SHADER(FRAGMENT, MAX_INSTRUCTIONS, 512), + UTIL_CHECK_SHADER(FRAGMENT, MAX_INPUTS, 10), + UTIL_CHECK_SHADER(FRAGMENT, MAX_TEMPS, 32), + UTIL_CHECK_SHADER(FRAGMENT, MAX_ADDRS, 1), + UTIL_CHECK_SHADER(FRAGMENT, MAX_CONSTS, 224), - UTIL_CHECK_INT(MAX_VS_INSTRUCTIONS, 512), - UTIL_CHECK_INT(MAX_VS_INPUTS, 16), - UTIL_CHECK_INT(MAX_VS_TEMPS, 32), - UTIL_CHECK_INT(MAX_VS_ADDRS, 2), - UTIL_CHECK_INT(MAX_VS_CONSTS, 256), + UTIL_CHECK_SHADER(VERTEX, MAX_INSTRUCTIONS, 512), + UTIL_CHECK_SHADER(VERTEX, MAX_INPUTS, 16), + UTIL_CHECK_SHADER(VERTEX, MAX_TEMPS, 32), + UTIL_CHECK_SHADER(VERTEX, MAX_ADDRS, 2), + UTIL_CHECK_SHADER(VERTEX, MAX_CONSTS, 256), UTIL_CHECK_TERMINATE }; diff --git a/src/gallium/auxiliary/util/u_caps.h b/src/gallium/auxiliary/util/u_caps.h index b1074f9eb2..7bd2380041 100644 --- a/src/gallium/auxiliary/util/u_caps.h +++ b/src/gallium/auxiliary/util/u_caps.h @@ -38,6 +38,7 @@ enum u_caps_check_enum { UTIL_CAPS_CHECK_INT, UTIL_CAPS_CHECK_FLOAT, UTIL_CAPS_CHECK_FORMAT, + UTIL_CAPS_CHECK_SHADER, UTIL_CAPS_CHECK_UNIMPLEMENTED, }; @@ -54,6 +55,9 @@ enum u_caps_check_enum { #define UTIL_CHECK_FORMAT(format) \ UTIL_CAPS_CHECK_FORMAT, PIPE_FORMAT_##format +#define UTIL_CHECK_SHADER(shader, cap, higher) \ + UTIL_CAPS_CHECK_SHADER, (PIPE_SHADER_##shader << 24) | PIPE_SHADER_CAP_##cap, (unsigned)(higher) + #define UTIL_CHECK_UNIMPLEMENTED \ UTIL_CAPS_CHECK_UNIMPLEMENTED diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c index 43d09f1960..a4ee91b0cf 100644 --- a/src/gallium/auxiliary/util/u_format.c +++ b/src/gallium/auxiliary/util/u_format.c @@ -121,6 +121,54 @@ util_format_write_4ub(enum pipe_format format, const uint8_t *src, unsigned src_ boolean +util_is_format_compatible(const struct util_format_description *src_desc, + const struct util_format_description *dst_desc) +{ + unsigned chan; + + if (src_desc->format == dst_desc->format) { + return TRUE; + } + + if (src_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || + dst_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { + return FALSE; + } + + if (src_desc->block.bits != dst_desc->block.bits || + src_desc->nr_channels != dst_desc->nr_channels || + src_desc->colorspace != dst_desc->colorspace) { + return FALSE; + } + + for (chan = 0; chan < 4; ++chan) { + if (src_desc->channel[chan].size != + dst_desc->channel[chan].size) { + return FALSE; + } + } + + for (chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle = dst_desc->swizzle[chan]; + + if (swizzle < 4) { + if (src_desc->swizzle[chan] != swizzle) { + return FALSE; + } + if ((src_desc->channel[swizzle].type != + dst_desc->channel[swizzle].type) || + (src_desc->channel[swizzle].normalized != + dst_desc->channel[swizzle].normalized)) { + return FALSE; + } + } + } + + return TRUE; +} + + +boolean util_format_fits_8unorm(const struct util_format_description *format_desc) { unsigned chan; @@ -193,7 +241,10 @@ util_format_translate(enum pipe_format dst_format, unsigned dst_step; unsigned src_step; - if (dst_format == src_format) { + dst_format_desc = util_format_description(dst_format); + src_format_desc = util_format_description(src_format); + + if (util_is_format_compatible(src_format_desc, dst_format_desc)) { /* * Trivial case. */ @@ -204,9 +255,6 @@ util_format_translate(enum pipe_format dst_format, return; } - dst_format_desc = util_format_description(dst_format); - src_format_desc = util_format_description(src_format); - assert(dst_x % dst_format_desc->block.width == 0); assert(dst_y % dst_format_desc->block.height == 0); assert(src_x % src_format_desc->block.width == 0); diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index 8e786a390a..03b73c0e98 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -440,6 +440,48 @@ util_format_is_depth_and_stencil(enum pipe_format format) desc->swizzle[1] != UTIL_FORMAT_SWIZZLE_NONE) ? TRUE : FALSE; } + +/** + * Give the RGBA colormask of the channels that can be represented in this + * format. + * + * That is, the channels whose values are preserved. + */ +static INLINE unsigned +util_format_colormask(const struct util_format_description *desc) +{ + unsigned colormask; + unsigned chan; + + switch (desc->colorspace) { + case UTIL_FORMAT_COLORSPACE_RGB: + case UTIL_FORMAT_COLORSPACE_SRGB: + case UTIL_FORMAT_COLORSPACE_YUV: + colormask = 0; + for (chan = 0; chan < 4; ++chan) { + if (desc->swizzle[chan] < 4) { + colormask |= (1 << chan); + } + } + return colormask; + case UTIL_FORMAT_COLORSPACE_ZS: + return 0; + default: + assert(0); + return 0; + } +} + + +/** + * Whether the src format can be blitted to destation format with a simple + * memcpy. + */ +boolean +util_is_format_compatible(const struct util_format_description *src_desc, + const struct util_format_description *dst_desc); + + /** * Whether this format is a rgab8 variant. * diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h index 78473bf35a..6ed39561fb 100644 --- a/src/gallium/auxiliary/util/u_inlines.h +++ b/src/gallium/auxiliary/util/u_inlines.h @@ -399,7 +399,6 @@ static INLINE boolean util_get_offset( } } - #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/util/u_linear.h b/src/gallium/auxiliary/util/u_linear.h index 42c40b2aa7..81ffc9fb27 100644 --- a/src/gallium/auxiliary/util/u_linear.h +++ b/src/gallium/auxiliary/util/u_linear.h @@ -33,6 +33,7 @@ #ifndef U_LINEAR_H #define U_LINEAR_H +#include "pipe/p_compiler.h" #include "pipe/p_format.h" struct u_linear_format_block |