From b919bb7f6119d59751fe846cabe5b0d587f46edc Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 1 Jul 2010 12:33:34 +0100 Subject: gallivm: Allow to conversions to/from registers of different sizes. Allow for example to convert from 4 x float32 to 4 x unorm8 and vice versa. Uses code and ideas from Brian Paul. --- src/gallium/auxiliary/gallivm/lp_bld_conv.c | 32 +++------ src/gallium/auxiliary/gallivm/lp_bld_pack.c | 106 ++++++++++++++++++++++++++++ src/gallium/auxiliary/gallivm/lp_bld_pack.h | 8 +++ src/gallium/drivers/llvmpipe/lp_test_conv.c | 20 ++++-- 4 files changed, 141 insertions(+), 25 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c index 3f7f2ebde9..5e7260dc21 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -83,6 +83,9 @@ * * Although the result values can be scaled to an arbitrary bit width specified * by dst_width, the actual result type will have the same width. + * + * Ex: src = { float, float, float, float } + * return { i32, i32, i32, i32 } where each value is in [0, 2^dst_width-1]. */ LLVMValueRef lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, @@ -152,6 +155,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, /** * Inverse of lp_build_clamped_float_to_unsigned_norm above. + * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1] + * return {float, float, float, float} with values in range [0, 1]. */ LLVMValueRef lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, @@ -219,14 +224,13 @@ lp_build_conv(LLVMBuilderRef builder, unsigned num_tmps; unsigned i; - /* Register width must remain constant */ - assert(src_type.width * src_type.length == dst_type.width * dst_type.length); - /* We must not loose or gain channels. Only precision */ assert(src_type.length * num_srcs == dst_type.length * num_dsts); assert(src_type.length <= LP_MAX_VECTOR_LENGTH); assert(dst_type.length <= LP_MAX_VECTOR_LENGTH); + assert(num_srcs <= LP_MAX_VECTOR_LENGTH); + assert(num_dsts <= LP_MAX_VECTOR_LENGTH); tmp_type = src_type; for(i = 0; i < num_srcs; ++i) @@ -330,25 +334,11 @@ lp_build_conv(LLVMBuilderRef builder, assert(!tmp_type.floating || tmp_type.width == dst_type.width); - if(tmp_type.width > dst_type.width) { - assert(num_dsts == 1); - tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps); - tmp_type.width = dst_type.width; - tmp_type.length = dst_type.length; - num_tmps = 1; - } - - if(tmp_type.width < dst_type.width) { - assert(num_tmps == 1); - lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts); - tmp_type.width = dst_type.width; - tmp_type.length = dst_type.length; - num_tmps = num_dsts; - } + lp_build_resize(builder, tmp_type, dst_type, tmp, num_srcs, tmp, num_dsts); - assert(tmp_type.width == dst_type.width); - assert(tmp_type.length == dst_type.length); - assert(num_tmps == num_dsts); + tmp_type.width = dst_type.width; + tmp_type.length = dst_type.length; + num_tmps = num_dsts; /* * Scale to the widest range diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c index 186f8849b8..dfe83b36c4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c @@ -427,3 +427,109 @@ lp_build_pack(LLVMBuilderRef builder, return tmp[0]; } + + +/** + * Truncate or expand the bitwidth + */ +void +lp_build_resize(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + const LLVMValueRef *src, unsigned num_srcs, + LLVMValueRef *dst, unsigned num_dsts) +{ + LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + assert(!src_type.floating || src_type.width == dst_type.width); + + /* We must not loose or gain channels. Only precision */ + assert(src_type.length * num_srcs == dst_type.length * num_dsts); + + /* We don't support M:N conversion, only 1:N, M:1, or 1:1 */ + assert(num_srcs == 1 || num_dsts == 1); + + assert(src_type.length <= LP_MAX_VECTOR_LENGTH); + assert(dst_type.length <= LP_MAX_VECTOR_LENGTH); + assert(num_srcs <= LP_MAX_VECTOR_LENGTH); + assert(num_dsts <= LP_MAX_VECTOR_LENGTH); + + if (src_type.width > dst_type.width) { + /* + * Truncate bit width. + */ + + assert(num_dsts == 1); + + if (src_type.width * src_type.length == dst_type.width * dst_type.length) { + /* + * Register width remains constant -- use vector packing intrinsics + */ + + tmp[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs); + } + else { + /* + * Do it element-wise. + */ + + assert(src_type.length == dst_type.length); + tmp[0] = lp_build_undef(dst_type); + for (i = 0; i < dst_type.length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, ""); + val = LLVMBuildTrunc(builder, val, lp_build_elem_type(dst_type), ""); + tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, ""); + } + } + } + else if (src_type.width < dst_type.width) { + /* + * Expand bit width. + */ + + assert(num_srcs == 1); + + if (src_type.width * src_type.length == dst_type.width * dst_type.length) { + /* + * Register width remains constant -- use vector unpack intrinsics + */ + lp_build_unpack(builder, src_type, dst_type, src[0], tmp, num_dsts); + } + else { + /* + * Do it element-wise. + */ + + assert(src_type.length == dst_type.length); + tmp[0] = lp_build_undef(dst_type); + for (i = 0; i < dst_type.length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, ""); + + if (src_type.sign && dst_type.sign) { + val = LLVMBuildSExt(builder, val, lp_build_elem_type(dst_type), ""); + } else { + val = LLVMBuildZExt(builder, val, lp_build_elem_type(dst_type), ""); + } + tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, ""); + } + } + } + else { + /* + * No-op + */ + + assert(num_srcs == 1); + assert(num_dsts == 1); + + tmp[0] = src[0]; + } + + for(i = 0; i < num_dsts; ++i) + dst[i] = tmp[i]; +} + + diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h index 41adeed220..e470082b97 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h @@ -92,4 +92,12 @@ lp_build_pack(LLVMBuilderRef builder, const LLVMValueRef *src, unsigned num_srcs); +void +lp_build_resize(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + const LLVMValueRef *src, unsigned num_srcs, + LLVMValueRef *dst, unsigned num_dsts); + + #endif /* !LP_BLD_PACK_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index 9b02f436c5..081f2d324b 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -167,19 +167,26 @@ test_one(unsigned verbose, unsigned i, j; void *code; + if (src_type.width * src_type.length != dst_type.width * dst_type.length || + src_type.length != dst_type.length) { + return TRUE; + } + if(verbose >= 1) dump_conv_types(stdout, src_type, dst_type); - if(src_type.length > dst_type.length) { + if (src_type.length > dst_type.length) { num_srcs = 1; num_dsts = src_type.length/dst_type.length; } - else { + else if (src_type.length < dst_type.length) { num_dsts = 1; num_srcs = dst_type.length/src_type.length; } - - assert(src_type.width * src_type.length == dst_type.width * dst_type.length); + else { + num_dsts = 1; + num_srcs = 1; + } /* We must not loose or gain channels. Only precision */ assert(src_type.length * num_srcs == dst_type.length * num_dsts); @@ -381,6 +388,11 @@ const struct lp_type conv_types[] = { { FALSE, FALSE, TRUE, FALSE, 8, 16 }, { FALSE, FALSE, FALSE, TRUE, 8, 16 }, { FALSE, FALSE, FALSE, FALSE, 8, 16 }, + + { FALSE, FALSE, TRUE, TRUE, 8, 4 }, + { FALSE, FALSE, TRUE, FALSE, 8, 4 }, + { FALSE, FALSE, FALSE, TRUE, 8, 4 }, + { FALSE, FALSE, FALSE, FALSE, 8, 4 }, }; -- cgit v1.2.3