From b919bb7f6119d59751fe846cabe5b0d587f46edc Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 1 Jul 2010 12:33:34 +0100
Subject: gallivm: Allow to conversions to/from registers of different sizes.

Allow for example to convert from 4 x float32 to 4 x unorm8 and vice versa.

Uses code and ideas from Brian Paul.
---
 src/gallium/auxiliary/gallivm/lp_bld_conv.c | 32 ++++++++++-------------------
 1 file changed, 11 insertions(+), 21 deletions(-)

(limited to 'src/gallium/auxiliary/gallivm/lp_bld_conv.c')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index 3f7f2ebde9..5e7260dc21 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -83,6 +83,9 @@
  *
  * Although the result values can be scaled to an arbitrary bit width specified
  * by dst_width, the actual result type will have the same width.
+ *
+ * Ex: src = { float, float, float, float }
+ * return { i32, i32, i32, i32 } where each value is in [0, 2^dst_width-1].
  */
 LLVMValueRef
 lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
@@ -152,6 +155,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
 
 /**
  * Inverse of lp_build_clamped_float_to_unsigned_norm above.
+ * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1]
+ * return {float, float, float, float} with values in range [0, 1].
  */
 LLVMValueRef
 lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
@@ -219,14 +224,13 @@ lp_build_conv(LLVMBuilderRef builder,
    unsigned num_tmps;
    unsigned i;
 
-   /* Register width must remain constant */
-   assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
-
    /* We must not loose or gain channels. Only precision */
    assert(src_type.length * num_srcs == dst_type.length * num_dsts);
 
    assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
    assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
+   assert(num_srcs <= LP_MAX_VECTOR_LENGTH);
+   assert(num_dsts <= LP_MAX_VECTOR_LENGTH);
 
    tmp_type = src_type;
    for(i = 0; i < num_srcs; ++i)
@@ -330,25 +334,11 @@ lp_build_conv(LLVMBuilderRef builder,
 
    assert(!tmp_type.floating || tmp_type.width == dst_type.width);
 
-   if(tmp_type.width > dst_type.width) {
-      assert(num_dsts == 1);
-      tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps);
-      tmp_type.width = dst_type.width;
-      tmp_type.length = dst_type.length;
-      num_tmps = 1;
-   }
-
-   if(tmp_type.width < dst_type.width) {
-      assert(num_tmps == 1);
-      lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts);
-      tmp_type.width = dst_type.width;
-      tmp_type.length = dst_type.length;
-      num_tmps = num_dsts;
-   }
+   lp_build_resize(builder, tmp_type, dst_type, tmp, num_srcs, tmp, num_dsts);
 
-   assert(tmp_type.width == dst_type.width);
-   assert(tmp_type.length == dst_type.length);
-   assert(num_tmps == num_dsts);
+   tmp_type.width  = dst_type.width;
+   tmp_type.length = dst_type.length;
+   num_tmps        = num_dsts;
 
    /*
     * Scale to the widest range
-- 
cgit v1.2.3