diff options
Diffstat (limited to 'src/gallium/auxiliary/gallivm/lp_bld_format_soa.c')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 91 |
1 files changed, 77 insertions, 14 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index e1b94adc85..9f405921b0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -36,7 +36,7 @@ #include "lp_bld_const.h" #include "lp_bld_conv.h" #include "lp_bld_swizzle.h" -#include "lp_bld_sample.h" /* for lp_build_gather */ +#include "lp_bld_gather.h" #include "lp_bld_format.h" @@ -251,6 +251,41 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, } +void +lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, + struct lp_type dst_type, + LLVMValueRef packed, + LLVMValueRef *rgba) +{ + LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff); + unsigned chan; + + packed = LLVMBuildBitCast(builder, packed, + lp_build_int_vec_type(dst_type), ""); + + /* Decode the input vector components */ + for (chan = 0; chan < 4; ++chan) { + unsigned start = chan*8; + unsigned stop = start + 8; + LLVMValueRef input; + + input = packed; + + if (start) + input = LLVMBuildLShr(builder, input, + lp_build_const_int_vec(dst_type, start), ""); + + if (stop < 32) + input = LLVMBuildAnd(builder, input, mask, ""); + + input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input); + + rgba[chan] = input; + } +} + + + /** * Fetch a texels from a texture, returning them in SoA layout. * @@ -311,20 +346,49 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, format_desc, type, packed, rgba_out); + return; } - else { - /* - * Fallback to calling lp_build_fetch_rgba_aos for each pixel. - * - * This is not the most efficient way of fetching pixels, as we - * miss some opportunities to do vectorization, but this is - * convenient for formats or scenarios for which there was no - * opportunity or incentive to optimize. - */ + /* + * Try calling lp_build_fetch_rgba_aos for all pixels. + */ + + if (util_format_fits_8unorm(format_desc) && + type.floating && type.width == 32 && type.length == 4) { + struct lp_type tmp_type; + LLVMValueRef tmp; + + memset(&tmp_type, 0, sizeof tmp_type); + tmp_type.width = 8; + tmp_type.length = type.length * 4; + tmp_type.norm = TRUE; + + tmp = lp_build_fetch_rgba_aos(builder, format_desc, tmp_type, + base_ptr, offset, i, j); + + lp_build_rgba8_to_f32_soa(builder, + type, + tmp, + rgba_out); + + return; + } + + /* + * Fallback to calling lp_build_fetch_rgba_aos for each pixel. + * + * This is not the most efficient way of fetching pixels, as we + * miss some opportunities to do vectorization, but this is + * convenient for formats or scenarios for which there was no + * opportunity or incentive to optimize. + */ + + { unsigned k, chan; + struct lp_type tmp_type; - assert(type.floating); + tmp_type = type; + tmp_type.length = 4; for (chan = 0; chan < 4; ++chan) { rgba_out[chan] = lp_build_undef(type); @@ -334,18 +398,17 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, for(k = 0; k < type.length; ++k) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0); LLVMValueRef offset_elem; - LLVMValueRef ptr; LLVMValueRef i_elem, j_elem; LLVMValueRef tmp; offset_elem = LLVMBuildExtractElement(builder, offset, index, ""); - ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, ""); i_elem = LLVMBuildExtractElement(builder, i, index, ""); j_elem = LLVMBuildExtractElement(builder, j, index, ""); /* Get a single float[4]={R,G,B,A} pixel */ - tmp = lp_build_fetch_rgba_aos(builder, format_desc, ptr, + tmp = lp_build_fetch_rgba_aos(builder, format_desc, tmp_type, + base_ptr, offset_elem, i_elem, j_elem); /* |