summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/gallium/auxiliary/Makefile5
-rw-r--r--src/gallium/auxiliary/SConscript8
-rw-r--r--src/gallium/auxiliary/util/u_format.c1
-rw-r--r--src/gallium/auxiliary/util/u_half.c165
-rw-r--r--src/gallium/auxiliary/util/u_half.h3
-rw-r--r--src/gallium/auxiliary/util/u_half.py179
6 files changed, 191 insertions, 170 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 1db4aaa439..843778d810 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -169,7 +169,8 @@ GALLIVM_CPP_SOURCES = \
GENERATED_SOURCES = \
indices/u_indices_gen.c \
indices/u_unfilled_gen.c \
- util/u_format_table.c
+ util/u_format_table.c \
+ util/u_half.c
ifeq ($(MESA_LLVM),1)
@@ -198,3 +199,5 @@ util/u_format_table.c: util/u_format_table.py util/u_format_pack.py util/u_forma
util/u_format_access.c: util/u_format_access.py util/u_format_parse.py util/u_format.csv
python util/u_format_access.py util/u_format.csv > $@
+util/u_half.c: util/u_half.py
+ python util/u_half.py > $@
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index d0443db3f7..73d4150448 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -29,6 +29,14 @@ env.CodeGenerate(
source = ['util/u_format.csv'],
command = 'python $SCRIPT $SOURCE > $TARGET'
)
+
+env.CodeGenerate(
+ target = 'util/u_half.c',
+ script = 'util/u_half.py',
+ source = [],
+ command = 'python $SCRIPT > $TARGET'
+)
+
env.Depends('util/u_format_table.c', [
'util/u_format_parse.py',
'util/u_format_pack.py',
diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c
index d3ee1f0339..fae0a462dc 100644
--- a/src/gallium/auxiliary/util/u_format.c
+++ b/src/gallium/auxiliary/util/u_format.c
@@ -124,5 +124,4 @@ void
util_format_do_init(void)
{
util_format_s3tc_init();
- util_half_init();
}
diff --git a/src/gallium/auxiliary/util/u_half.c b/src/gallium/auxiliary/util/u_half.c
deleted file mode 100644
index 4c8f8a51c5..0000000000
--- a/src/gallium/auxiliary/util/u_half.c
+++ /dev/null
@@ -1,165 +0,0 @@
-
-/*
- * Copyright 2010 Luca Barbieri
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/* The code is a reimplementation of the algorithm in
- * www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
- * "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
- *
- * The table contents have been slightly changed so that the exponent
- * bias is now in the exponent table instead of the mantissa table (mostly
- * for cosmetic reasons, and because it theoretically allows a variant
- * that flushes denormal to zero but uses a mantissa table with 24-bit
- * entries).
- *
- * The tables are also constructed slightly differently.
- */
-
-/* Note that using a 64K * 4 table is a terrible idea since it will not fit
- * in the L1 cache and will massively pollute the L2 cache as well
- *
- * These should instead fit in the L1 cache.
- *
- * TODO: we could use a denormal bias table instead of the mantissa/offset
- * tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
- * but would involve more computation
- *
- * Note however that if denormals are never encountered, the L1 cache usage
- * is only about 4608 bytes anyway.
- */
-
-#include "util/u_half.h"
-#include "util/u_init.h"
-
-uint32_t util_half_to_float_mantissa_table[2048];
-uint32_t util_half_to_float_exponent_table[64];
-uint32_t util_half_to_float_offset_table[64];
-uint16_t util_float_to_half_base_table[512];
-uint8_t util_float_to_half_shift_table[512];
-
-boolean util_half_inited;
-
-void
-util_half_do_init(void)
-{
- int i;
-
- /* zero */
- util_half_to_float_mantissa_table[0] = 0;
-
- /* denormals */
- for(i = 1; i < 1024; ++i)
- {
- unsigned int m = i << 13;
- unsigned int e = 0;
-
- /* Normalize number */
- while(!(m & 0x00800000))
- {
- e -= 0x00800000;
- m <<= 1;
- }
- m &= ~0x00800000;
- e += 0x38800000;
- util_half_to_float_mantissa_table[i] = m | e;
- }
-
- /* normals */
- for(i = 1024; i < 2048; ++i)
- util_half_to_float_mantissa_table[i] = ((i - 1024) << 13);
-
- /* positive zero or denormals */
- util_half_to_float_exponent_table[0] = 0;
-
- /* positive numbers */
- for(i = 1; i <= 30; ++i)
- util_half_to_float_exponent_table[i] = 0x38000000 + (i << 23);
-
- /* positive infinity/NaN */
- util_half_to_float_exponent_table[31] = 0x7f800000;
-
- /* negative zero or denormals */
- util_half_to_float_exponent_table[32] = 0x80000000;
-
- /* negative numbers */
- for(i = 33; i <= 62; ++i)
- util_half_to_float_exponent_table[i] = 0xb8000000 + ((i - 32) << 23);
-
- /* negative infinity/NaN */
- util_half_to_float_exponent_table[63] = 0xff800000;
-
- /* positive zero or denormals */
- util_half_to_float_offset_table[0] = 0;
-
- /* positive normals */
- for(i = 1; i < 32; ++i)
- util_half_to_float_offset_table[i] = 1024;
-
- /* negative zero or denormals */
- util_half_to_float_offset_table[32] = 0;
-
- /* negative normals */
- for(i = 33; i < 64; ++i)
- util_half_to_float_offset_table[i] = 1024;
-
- /* very small numbers mapping to zero */
- for(i = -127; i < -24; ++i)
- {
- util_float_to_half_base_table[127 + i] = 0;
- util_float_to_half_shift_table[127 + i] = 24;
- }
-
- /* small numbers mapping to denormals */
- for(i = -24; i < -14; ++i)
- {
- util_float_to_half_base_table[127 + i] = 0x0400 >> (-14 - i);
- util_float_to_half_shift_table[127 + i] = -i - 1;
- }
-
- /* normal numbers */
- for(i = -14; i < 16; ++i)
- {
- util_float_to_half_base_table[127 + i] = (i + 15) << 10;
- util_float_to_half_shift_table[127 + i] = 13;
- }
-
- /* large numbers mapping to infinity */
- for(i = 16; i < 128; ++i)
- {
- util_float_to_half_base_table[127 + i] = 0x7c00;
- util_float_to_half_shift_table[127 + i] = 24;
- }
-
- /* infinity and NaNs */
- util_float_to_half_base_table[255] = 0x7c00;
- util_float_to_half_shift_table[255] = 13;
-
- /* negative numbers */
- for(i = 0; i < 256; ++i)
- {
- util_float_to_half_base_table[256 + i] = util_float_to_half_base_table[i] | 0x8000;
- util_float_to_half_shift_table[256 + i] = util_float_to_half_shift_table[i];
- }
-}
diff --git a/src/gallium/auxiliary/util/u_half.h b/src/gallium/auxiliary/util/u_half.h
index 02f0f24193..a28b1fd1d9 100644
--- a/src/gallium/auxiliary/util/u_half.h
+++ b/src/gallium/auxiliary/util/u_half.h
@@ -3,7 +3,6 @@
#include "pipe/p_compiler.h"
#include "util/u_math.h"
-#include "util/u_inline_init.h"
#ifdef __cplusplus
extern "C" {
@@ -56,8 +55,6 @@ util_float_to_half(float f)
return util_floatui_to_half(i.ui);
}
-UTIL_INLINE_INIT(util_half);
-
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/util/u_half.py b/src/gallium/auxiliary/util/u_half.py
new file mode 100644
index 0000000000..a92f758750
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_half.py
@@ -0,0 +1,179 @@
+# Copyright 2010 Luca Barbieri
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice (including the
+# next paragraph) shall be included in all copies or substantial
+# portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# *************************************************************************
+
+# The code is a reimplementation of the algorithm in
+# www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
+# "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
+#
+# The table contents have been slightly changed so that the exponent
+# bias is now in the exponent table instead of the mantissa table (mostly
+# for cosmetic reasons, and because it theoretically allows a variant
+# that flushes denormal to zero but uses a mantissa table with 24-bit
+# entries).
+#
+# The tables are also constructed slightly differently.
+#
+
+# Note that using a 64K * 4 table is a terrible idea since it will not fit
+# in the L1 cache and will massively pollute the L2 cache as well
+#
+# These should instead fit in the L1 cache.
+#
+# TODO: we could use a denormal bias table instead of the mantissa/offset
+# tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
+# but would involve more computation
+#
+# Note however that if denormals are never encountered, the L1 cache usage
+# is only about 4608 bytes anyway.
+
+table_index = None
+table_length = None
+
+def begin(t, n, l):
+ global table_length
+ global table_index
+ table_index = 0
+ table_length = l
+ print
+ print t + " " + n + "[" + str(l) + "] = {"
+
+def value(v):
+ global table_index
+ table_index += 1
+ print "\t" + hex(v) + ","
+
+def end():
+ global table_length
+ global table_index
+ print "};"
+ assert table_index == table_length
+
+print "/* This file is autogenerated by u_half.py. Do not edit directly. */"
+print "#include \"util/u_half.h\""
+
+begin("uint32_t", "util_half_to_float_mantissa_table", 2048)
+# zero
+value(0)
+
+# denormals
+for i in xrange(1, 1024):
+ m = i << 13
+ e = 0
+
+ # normalize number
+ while (m & 0x00800000) == 0:
+ e -= 0x00800000;
+ m <<= 1;
+
+ m &= ~0x00800000;
+ e += 0x38800000;
+ value(m | e)
+
+# normals
+for i in xrange(1024, 2048):
+ value((i - 1024) << 13)
+end()
+
+begin("uint32_t", "util_half_to_float_exponent_table", 64)
+# positive zero or denormals
+value(0)
+
+# positive numbers
+for i in xrange(1, 31):
+ value(0x38000000 + (i << 23))
+
+# positive infinity/NaN
+value(0x7f800000)
+
+# negative zero or denormals
+value(0x80000000)
+
+# negative numbers
+for i in range(33, 63):
+ value(0xb8000000 + ((i - 32) << 23))
+
+# negative infinity/NaN
+value(0xff800000)
+end()
+
+begin("uint32_t", "util_half_to_float_offset_table", 64)
+# positive zero or denormals
+value(0)
+
+# positive normals
+for i in range(1, 32):
+ value(1024)
+
+# negative zero or denormals
+value(0)
+
+# negative normals
+for i in xrange(33, 64):
+ value(1024)
+end()
+
+begin("uint16_t", "util_float_to_half_base_table", 512)
+for sign in (0, 0x8000):
+ # very small numbers mapping to zero
+ for i in xrange(-127, -24):
+ value(sign | 0)
+
+ # small numbers mapping to denormals
+ for i in xrange(-24, -14):
+ value(sign | (0x400 >> (-14 -i)))
+
+ # normal numbers
+ for i in xrange(-14, 16):
+ value(sign | ((i + 15) << 10))
+
+ # large numbers mapping to infinity
+ for i in xrange(16, 128):
+ value(sign | 0x7c00)
+
+ # infinity and NaNs
+ value(sign | 0x7c00)
+end()
+
+begin("uint8_t", "util_float_to_half_shift_table", 512)
+for sign in (0, 0x8000):
+ # very small numbers mapping to zero
+ for i in xrange(-127, -24):
+ value(24)
+
+ # small numbers mapping to denormals
+ for i in xrange(-24, -14):
+ value(-1 - i)
+
+ # normal numbers
+ for i in xrange(-14, 16):
+ value(13)
+
+ # large numbers mapping to infinity
+ for i in xrange(16, 128):
+ value(24)
+
+ # infinity and NaNs
+ value(13)
+end()
+