summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJosé Fonseca <jfonseca@vmware.com>2010-07-01 12:33:34 +0100
committerJosé Fonseca <jfonseca@vmware.com>2010-07-01 15:02:15 +0100
commitb919bb7f6119d59751fe846cabe5b0d587f46edc (patch)
tree3ad3cffb62e5dff83cb611451b23bf35e9a1bc8c /src
parenta70ec096aaece3aaadc1a8307e32554f7ad4d082 (diff)
gallivm: Allow to conversions to/from registers of different sizes.
Allow for example to convert from 4 x float32 to 4 x unorm8 and vice versa. Uses code and ideas from Brian Paul.
Diffstat (limited to 'src')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_conv.c32
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.c106
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.h8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_conv.c20
4 files changed, 141 insertions, 25 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index 3f7f2ebde9..5e7260dc21 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -83,6 +83,9 @@
*
* Although the result values can be scaled to an arbitrary bit width specified
* by dst_width, the actual result type will have the same width.
+ *
+ * Ex: src = { float, float, float, float }
+ * return { i32, i32, i32, i32 } where each value is in [0, 2^dst_width-1].
*/
LLVMValueRef
lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
@@ -152,6 +155,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
/**
* Inverse of lp_build_clamped_float_to_unsigned_norm above.
+ * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1]
+ * return {float, float, float, float} with values in range [0, 1].
*/
LLVMValueRef
lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
@@ -219,14 +224,13 @@ lp_build_conv(LLVMBuilderRef builder,
unsigned num_tmps;
unsigned i;
- /* Register width must remain constant */
- assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
-
/* We must not loose or gain channels. Only precision */
assert(src_type.length * num_srcs == dst_type.length * num_dsts);
assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
+ assert(num_srcs <= LP_MAX_VECTOR_LENGTH);
+ assert(num_dsts <= LP_MAX_VECTOR_LENGTH);
tmp_type = src_type;
for(i = 0; i < num_srcs; ++i)
@@ -330,25 +334,11 @@ lp_build_conv(LLVMBuilderRef builder,
assert(!tmp_type.floating || tmp_type.width == dst_type.width);
- if(tmp_type.width > dst_type.width) {
- assert(num_dsts == 1);
- tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps);
- tmp_type.width = dst_type.width;
- tmp_type.length = dst_type.length;
- num_tmps = 1;
- }
-
- if(tmp_type.width < dst_type.width) {
- assert(num_tmps == 1);
- lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts);
- tmp_type.width = dst_type.width;
- tmp_type.length = dst_type.length;
- num_tmps = num_dsts;
- }
+ lp_build_resize(builder, tmp_type, dst_type, tmp, num_srcs, tmp, num_dsts);
- assert(tmp_type.width == dst_type.width);
- assert(tmp_type.length == dst_type.length);
- assert(num_tmps == num_dsts);
+ tmp_type.width = dst_type.width;
+ tmp_type.length = dst_type.length;
+ num_tmps = num_dsts;
/*
* Scale to the widest range
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
index 186f8849b8..dfe83b36c4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -427,3 +427,109 @@ lp_build_pack(LLVMBuilderRef builder,
return tmp[0];
}
+
+
+/**
+ * Truncate or expand the bitwidth
+ */
+void
+lp_build_resize(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ const LLVMValueRef *src, unsigned num_srcs,
+ LLVMValueRef *dst, unsigned num_dsts)
+{
+ LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
+ unsigned i;
+
+ assert(!src_type.floating || src_type.width == dst_type.width);
+
+ /* We must not loose or gain channels. Only precision */
+ assert(src_type.length * num_srcs == dst_type.length * num_dsts);
+
+ /* We don't support M:N conversion, only 1:N, M:1, or 1:1 */
+ assert(num_srcs == 1 || num_dsts == 1);
+
+ assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
+ assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
+ assert(num_srcs <= LP_MAX_VECTOR_LENGTH);
+ assert(num_dsts <= LP_MAX_VECTOR_LENGTH);
+
+ if (src_type.width > dst_type.width) {
+ /*
+ * Truncate bit width.
+ */
+
+ assert(num_dsts == 1);
+
+ if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
+ /*
+ * Register width remains constant -- use vector packing intrinsics
+ */
+
+ tmp[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs);
+ }
+ else {
+ /*
+ * Do it element-wise.
+ */
+
+ assert(src_type.length == dst_type.length);
+ tmp[0] = lp_build_undef(dst_type);
+ for (i = 0; i < dst_type.length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, "");
+ val = LLVMBuildTrunc(builder, val, lp_build_elem_type(dst_type), "");
+ tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, "");
+ }
+ }
+ }
+ else if (src_type.width < dst_type.width) {
+ /*
+ * Expand bit width.
+ */
+
+ assert(num_srcs == 1);
+
+ if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
+ /*
+ * Register width remains constant -- use vector unpack intrinsics
+ */
+ lp_build_unpack(builder, src_type, dst_type, src[0], tmp, num_dsts);
+ }
+ else {
+ /*
+ * Do it element-wise.
+ */
+
+ assert(src_type.length == dst_type.length);
+ tmp[0] = lp_build_undef(dst_type);
+ for (i = 0; i < dst_type.length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, "");
+
+ if (src_type.sign && dst_type.sign) {
+ val = LLVMBuildSExt(builder, val, lp_build_elem_type(dst_type), "");
+ } else {
+ val = LLVMBuildZExt(builder, val, lp_build_elem_type(dst_type), "");
+ }
+ tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, "");
+ }
+ }
+ }
+ else {
+ /*
+ * No-op
+ */
+
+ assert(num_srcs == 1);
+ assert(num_dsts == 1);
+
+ tmp[0] = src[0];
+ }
+
+ for(i = 0; i < num_dsts; ++i)
+ dst[i] = tmp[i];
+}
+
+
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
index 41adeed220..e470082b97 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
@@ -92,4 +92,12 @@ lp_build_pack(LLVMBuilderRef builder,
const LLVMValueRef *src, unsigned num_srcs);
+void
+lp_build_resize(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ const LLVMValueRef *src, unsigned num_srcs,
+ LLVMValueRef *dst, unsigned num_dsts);
+
+
#endif /* !LP_BLD_PACK_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
index 9b02f436c5..081f2d324b 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -167,19 +167,26 @@ test_one(unsigned verbose,
unsigned i, j;
void *code;
+ if (src_type.width * src_type.length != dst_type.width * dst_type.length ||
+ src_type.length != dst_type.length) {
+ return TRUE;
+ }
+
if(verbose >= 1)
dump_conv_types(stdout, src_type, dst_type);
- if(src_type.length > dst_type.length) {
+ if (src_type.length > dst_type.length) {
num_srcs = 1;
num_dsts = src_type.length/dst_type.length;
}
- else {
+ else if (src_type.length < dst_type.length) {
num_dsts = 1;
num_srcs = dst_type.length/src_type.length;
}
-
- assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
+ else {
+ num_dsts = 1;
+ num_srcs = 1;
+ }
/* We must not loose or gain channels. Only precision */
assert(src_type.length * num_srcs == dst_type.length * num_dsts);
@@ -381,6 +388,11 @@ const struct lp_type conv_types[] = {
{ FALSE, FALSE, TRUE, FALSE, 8, 16 },
{ FALSE, FALSE, FALSE, TRUE, 8, 16 },
{ FALSE, FALSE, FALSE, FALSE, 8, 16 },
+
+ { FALSE, FALSE, TRUE, TRUE, 8, 4 },
+ { FALSE, FALSE, TRUE, FALSE, 8, 4 },
+ { FALSE, FALSE, FALSE, TRUE, 8, 4 },
+ { FALSE, FALSE, FALSE, FALSE, 8, 4 },
};