diff options
-rw-r--r-- | src/gallium/auxiliary/Makefile | 4 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_gctors.cpp | 17 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_half.c | 123 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_half.h | 55 |
4 files changed, 199 insertions, 0 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 4c629924b9..14c0fb1840 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -110,6 +110,7 @@ C_SOURCES = \ util/u_format_table.c \ util/u_format_tests.c \ util/u_gen_mipmap.c \ + util/u_half.c \ util/u_handle_table.c \ util/u_hash_table.c \ util/u_hash.c \ @@ -138,6 +139,9 @@ C_SOURCES = \ #vl/vl_csc.c \ #vl/vl_shader_build.c \ +CPP_SOURCES = \ + util/u_gctors.cpp + GALLIVM_SOURCES = \ gallivm/lp_bld_alpha.c \ gallivm/lp_bld_arit.c \ diff --git a/src/gallium/auxiliary/util/u_gctors.cpp b/src/gallium/auxiliary/util/u_gctors.cpp new file mode 100644 index 0000000000..9ea9819d73 --- /dev/null +++ b/src/gallium/auxiliary/util/u_gctors.cpp @@ -0,0 +1,17 @@ +/* this file uses the C++ global constructor mechanism to automatically + initialize global data + + __attribute__((constructor)) allows to do this in C, but is GCC-only +*/ + +extern "C" void util_half_init_tables(void); + +struct util_gctor_t +{ + util_gctor_t() + { + util_half_init_tables(); + } +}; + +static struct util_gctor_t util_gctor; diff --git a/src/gallium/auxiliary/util/u_half.c b/src/gallium/auxiliary/util/u_half.c new file mode 100644 index 0000000000..8865acb76b --- /dev/null +++ b/src/gallium/auxiliary/util/u_half.c @@ -0,0 +1,123 @@ +#include "util/u_half.h" + +/* see www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf + * "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008 + */ + +/* Note that using a 64K * 4 table is a terrible idea since it will not fit + * in the L1 cache and will massively pollute the L2 cache as well + * + * These should instead fit in the L1 cache. + * + * TODO: we could use a denormal bias table instead of the mantissa/offset + * tables: this would reduce the L1 cache usage from 8704 to 2304 bytes + * but would involve more computation + * + * Note however that if denormals are never encountered, the L1 cache usage + * is only about 4608 bytes anyway. + */ +uint32_t util_half_to_float_mantissa_table[2048]; +uint32_t util_half_to_float_exponent_table[64]; +uint32_t util_half_to_float_offset_table[64]; +uint16_t util_float_to_half_base_table[512]; +uint8_t util_float_to_half_shift_table[512]; + +/* called by u_gctors.cpp, which defines the prototype itself */ +void util_half_init_tables(void); + +void util_half_init_tables(void) +{ + int i; + + /* zero */ + util_half_to_float_mantissa_table[0] = 0; + + /* denormals */ + for(i = 1; i < 1024; ++i) { + unsigned int m = i << 13; + unsigned int e = 0; + + /* Normalize number */ + while(!(m & 0x00800000)) { + e -= 0x00800000; + m<<=1; + } + m &= ~0x00800000; + e+= 0x38800000; + util_half_to_float_mantissa_table[i] = m | e; + } + + /* normals */ + for(i = 1024; i < 2048; ++i) + util_half_to_float_mantissa_table[i] = ((i-1024)<<13); + + /* positive zero or denormals */ + util_half_to_float_exponent_table[0] = 0; + + /* positive numbers */ + for(i = 1; i <= 30; ++i) + util_half_to_float_exponent_table[i] = 0x38000000 + (i << 23); + + /* positive infinity/NaN */ + util_half_to_float_exponent_table[31] = 0x7f800000; + + /* negative zero or denormals */ + util_half_to_float_exponent_table[32] = 0x80000000; + + /* negative numbers */ + for(i = 33; i <= 62; ++i) + util_half_to_float_exponent_table[i] = 0xb8000000 + ((i - 32) << 23); + + /* negative infinity/NaN */ + util_half_to_float_exponent_table[63] = 0xff800000; + + /* positive zero or denormals */ + util_half_to_float_offset_table[0] = 0; + + /* positive normals */ + for(i = 1; i < 32; ++i) + util_half_to_float_offset_table[i] = 1024; + + /* negative zero or denormals */ + util_half_to_float_offset_table[32] = 0; + + /* negative normals */ + for(i = 33; i < 64; ++i) + util_half_to_float_offset_table[i] = 1024; + + + + /* very small numbers mapping to zero */ + for(i = -127; i < -24; ++i) { + util_float_to_half_base_table[127 + i] = 0; + util_float_to_half_shift_table[127 + i] = 24; + } + + /* small numbers mapping to denormals */ + for(i = -24; i < -14; ++i) { + util_float_to_half_base_table[127 + i] = 0x0400 >> (-14 - i); + util_float_to_half_shift_table[127 + i] = -i - 1; + } + + /* normal numbers */ + for(i = -14; i < 16; ++i) { + util_float_to_half_base_table[127 + i] = (i + 15) << 10; + util_float_to_half_shift_table[127 + i] = 13; + } + + /* large numbers mapping to infinity */ + for(i = 16; i < 128; ++i) { + util_float_to_half_base_table[127 + i] = 0x7c00; + util_float_to_half_shift_table[127 + i] = 24; + } + + /* infinity and NaNs */ + util_float_to_half_base_table[255] = 0x7c00; + util_float_to_half_shift_table[255] = 13; + + /* negative numbers */ + for(i = 0; i < 256; ++i) { + util_float_to_half_base_table[256 + i] = util_float_to_half_base_table[i] | 0x8000; + util_float_to_half_shift_table[256 + i] = util_float_to_half_shift_table[i]; + } +} diff --git a/src/gallium/auxiliary/util/u_half.h b/src/gallium/auxiliary/util/u_half.h new file mode 100644 index 0000000000..464d43df8a --- /dev/null +++ b/src/gallium/auxiliary/util/u_half.h @@ -0,0 +1,55 @@ +#ifndef U_HALF_H +#define U_HALF_H + +#include "pipe/p_compiler.h" + +extern uint32_t util_half_to_float_mantissa_table[2048]; +extern uint32_t util_half_to_float_exponent_table[64]; +extern uint32_t util_half_to_float_offset_table[64]; +extern uint16_t util_float_to_half_base_table[512]; +extern uint8_t util_float_to_half_shift_table[512]; + +/* + * Note that if the half float is a signaling NaN, the x87 FPU will turn + * it into a quiet NaN immediately upon loading into a float. + * + * Additionally, denormals may be flushed to zero. + * + * To avoid this, use the floatui functions instead of the float ones + * when just doing conversion rather than computation on the resulting + * floats. + */ + +static INLINE uint32_t +util_half_to_floatui(half h) +{ + unsigned exp = h >> 10; + return util_half_to_float_mantissa_table[util_half_to_float_offset_table[exp] + (h & 0x3ff)] + + util_half_to_float_exponent_table[exp]; +} + +static INLINE float +util_half_to_float(half h) +{ + union {float f; uint32_t v;} r; + r.v = util_half_to_floatui(h); + return r.f; +} + +static INLINE half +util_floatui_to_half(uint32_t v) +{ + unsigned signexp = v >> 23; + return util_float_to_half_base_table[signexp] + + ((v & 0x007fffff) >> util_float_to_half_shift_table[signexp]); +} + +static INLINE half +util_float_to_half(float f) +{ + union {float f; uint32_t v;} i; + i.f = f; + return util_floatui_to_half(i.v); +} + +#endif /* U_HALF_H */ |