diff options
author | Luca Barbieri <luca@luca-barbieri.com> | 2010-03-24 18:12:45 +0100 |
---|---|---|
committer | Michal Krol <michal@vmware.com> | 2010-04-01 13:33:07 +0200 |
commit | 3ff175d6de89ad92d167362355501f99d06f0f97 (patch) | |
tree | 6332294693fc3581785927a9df6013aecf9aca22 /src/gallium/auxiliary/util/u_half.c | |
parent | 110e039d0df08ae1642adf4bd20f07992b9ffe9c (diff) |
gallium/util: add fast half float conversion functions
This adds a fast half float conversion facility to Gallium.
Mesa already contains such a facility, but using a much worse algorithm.
This one is an implementation of
www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
and uses a branch-less algorithm with some lookup tables small enough
to fit in the L1 cache.
Ideally, Mesa should start using these functions too, but I'm not sure
how to arrange that with the current build system.
A new "u_gctors.cpp" is added that defines a global C++ constructor
allowing to initialize to conversion lookup tables at library init.
Diffstat (limited to 'src/gallium/auxiliary/util/u_half.c')
-rw-r--r-- | src/gallium/auxiliary/util/u_half.c | 123 |
1 files changed, 123 insertions, 0 deletions
diff --git a/src/gallium/auxiliary/util/u_half.c b/src/gallium/auxiliary/util/u_half.c new file mode 100644 index 0000000000..8865acb76b --- /dev/null +++ b/src/gallium/auxiliary/util/u_half.c @@ -0,0 +1,123 @@ +#include "util/u_half.h" + +/* see www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf + * "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008 + */ + +/* Note that using a 64K * 4 table is a terrible idea since it will not fit + * in the L1 cache and will massively pollute the L2 cache as well + * + * These should instead fit in the L1 cache. + * + * TODO: we could use a denormal bias table instead of the mantissa/offset + * tables: this would reduce the L1 cache usage from 8704 to 2304 bytes + * but would involve more computation + * + * Note however that if denormals are never encountered, the L1 cache usage + * is only about 4608 bytes anyway. + */ +uint32_t util_half_to_float_mantissa_table[2048]; +uint32_t util_half_to_float_exponent_table[64]; +uint32_t util_half_to_float_offset_table[64]; +uint16_t util_float_to_half_base_table[512]; +uint8_t util_float_to_half_shift_table[512]; + +/* called by u_gctors.cpp, which defines the prototype itself */ +void util_half_init_tables(void); + +void util_half_init_tables(void) +{ + int i; + + /* zero */ + util_half_to_float_mantissa_table[0] = 0; + + /* denormals */ + for(i = 1; i < 1024; ++i) { + unsigned int m = i << 13; + unsigned int e = 0; + + /* Normalize number */ + while(!(m & 0x00800000)) { + e -= 0x00800000; + m<<=1; + } + m &= ~0x00800000; + e+= 0x38800000; + util_half_to_float_mantissa_table[i] = m | e; + } + + /* normals */ + for(i = 1024; i < 2048; ++i) + util_half_to_float_mantissa_table[i] = ((i-1024)<<13); + + /* positive zero or denormals */ + util_half_to_float_exponent_table[0] = 0; + + /* positive numbers */ + for(i = 1; i <= 30; ++i) + util_half_to_float_exponent_table[i] = 0x38000000 + (i << 23); + + /* positive infinity/NaN */ + util_half_to_float_exponent_table[31] = 0x7f800000; + + /* negative zero or denormals */ + util_half_to_float_exponent_table[32] = 0x80000000; + + /* negative numbers */ + for(i = 33; i <= 62; ++i) + util_half_to_float_exponent_table[i] = 0xb8000000 + ((i - 32) << 23); + + /* negative infinity/NaN */ + util_half_to_float_exponent_table[63] = 0xff800000; + + /* positive zero or denormals */ + util_half_to_float_offset_table[0] = 0; + + /* positive normals */ + for(i = 1; i < 32; ++i) + util_half_to_float_offset_table[i] = 1024; + + /* negative zero or denormals */ + util_half_to_float_offset_table[32] = 0; + + /* negative normals */ + for(i = 33; i < 64; ++i) + util_half_to_float_offset_table[i] = 1024; + + + + /* very small numbers mapping to zero */ + for(i = -127; i < -24; ++i) { + util_float_to_half_base_table[127 + i] = 0; + util_float_to_half_shift_table[127 + i] = 24; + } + + /* small numbers mapping to denormals */ + for(i = -24; i < -14; ++i) { + util_float_to_half_base_table[127 + i] = 0x0400 >> (-14 - i); + util_float_to_half_shift_table[127 + i] = -i - 1; + } + + /* normal numbers */ + for(i = -14; i < 16; ++i) { + util_float_to_half_base_table[127 + i] = (i + 15) << 10; + util_float_to_half_shift_table[127 + i] = 13; + } + + /* large numbers mapping to infinity */ + for(i = 16; i < 128; ++i) { + util_float_to_half_base_table[127 + i] = 0x7c00; + util_float_to_half_shift_table[127 + i] = 24; + } + + /* infinity and NaNs */ + util_float_to_half_base_table[255] = 0x7c00; + util_float_to_half_shift_table[255] = 13; + + /* negative numbers */ + for(i = 0; i < 256; ++i) { + util_float_to_half_base_table[256 + i] = util_float_to_half_base_table[i] | 0x8000; + util_float_to_half_shift_table[256 + i] = util_float_to_half_shift_table[i]; + } +} |