From 3ff175d6de89ad92d167362355501f99d06f0f97 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 24 Mar 2010 18:12:45 +0100 Subject: gallium/util: add fast half float conversion functions This adds a fast half float conversion facility to Gallium. Mesa already contains such a facility, but using a much worse algorithm. This one is an implementation of www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf and uses a branch-less algorithm with some lookup tables small enough to fit in the L1 cache. Ideally, Mesa should start using these functions too, but I'm not sure how to arrange that with the current build system. A new "u_gctors.cpp" is added that defines a global C++ constructor allowing to initialize to conversion lookup tables at library init. --- src/gallium/auxiliary/Makefile | 4 ++ src/gallium/auxiliary/util/u_gctors.cpp | 17 +++++ src/gallium/auxiliary/util/u_half.c | 123 ++++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_half.h | 55 ++++++++++++++ 4 files changed, 199 insertions(+) create mode 100644 src/gallium/auxiliary/util/u_gctors.cpp create mode 100644 src/gallium/auxiliary/util/u_half.c create mode 100644 src/gallium/auxiliary/util/u_half.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 4c629924b9..14c0fb1840 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -110,6 +110,7 @@ C_SOURCES = \ util/u_format_table.c \ util/u_format_tests.c \ util/u_gen_mipmap.c \ + util/u_half.c \ util/u_handle_table.c \ util/u_hash_table.c \ util/u_hash.c \ @@ -138,6 +139,9 @@ C_SOURCES = \ #vl/vl_csc.c \ #vl/vl_shader_build.c \ +CPP_SOURCES = \ + util/u_gctors.cpp + GALLIVM_SOURCES = \ gallivm/lp_bld_alpha.c \ gallivm/lp_bld_arit.c \ diff --git a/src/gallium/auxiliary/util/u_gctors.cpp b/src/gallium/auxiliary/util/u_gctors.cpp new file mode 100644 index 0000000000..9ea9819d73 --- /dev/null +++ b/src/gallium/auxiliary/util/u_gctors.cpp @@ -0,0 +1,17 @@ +/* this file uses the C++ global constructor mechanism to automatically + initialize global data + + __attribute__((constructor)) allows to do this in C, but is GCC-only +*/ + +extern "C" void util_half_init_tables(void); + +struct util_gctor_t +{ + util_gctor_t() + { + util_half_init_tables(); + } +}; + +static struct util_gctor_t util_gctor; diff --git a/src/gallium/auxiliary/util/u_half.c b/src/gallium/auxiliary/util/u_half.c new file mode 100644 index 0000000000..8865acb76b --- /dev/null +++ b/src/gallium/auxiliary/util/u_half.c @@ -0,0 +1,123 @@ +#include "util/u_half.h" + +/* see www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf + * "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008 + */ + +/* Note that using a 64K * 4 table is a terrible idea since it will not fit + * in the L1 cache and will massively pollute the L2 cache as well + * + * These should instead fit in the L1 cache. + * + * TODO: we could use a denormal bias table instead of the mantissa/offset + * tables: this would reduce the L1 cache usage from 8704 to 2304 bytes + * but would involve more computation + * + * Note however that if denormals are never encountered, the L1 cache usage + * is only about 4608 bytes anyway. + */ +uint32_t util_half_to_float_mantissa_table[2048]; +uint32_t util_half_to_float_exponent_table[64]; +uint32_t util_half_to_float_offset_table[64]; +uint16_t util_float_to_half_base_table[512]; +uint8_t util_float_to_half_shift_table[512]; + +/* called by u_gctors.cpp, which defines the prototype itself */ +void util_half_init_tables(void); + +void util_half_init_tables(void) +{ + int i; + + /* zero */ + util_half_to_float_mantissa_table[0] = 0; + + /* denormals */ + for(i = 1; i < 1024; ++i) { + unsigned int m = i << 13; + unsigned int e = 0; + + /* Normalize number */ + while(!(m & 0x00800000)) { + e -= 0x00800000; + m<<=1; + } + m &= ~0x00800000; + e+= 0x38800000; + util_half_to_float_mantissa_table[i] = m | e; + } + + /* normals */ + for(i = 1024; i < 2048; ++i) + util_half_to_float_mantissa_table[i] = ((i-1024)<<13); + + /* positive zero or denormals */ + util_half_to_float_exponent_table[0] = 0; + + /* positive numbers */ + for(i = 1; i <= 30; ++i) + util_half_to_float_exponent_table[i] = 0x38000000 + (i << 23); + + /* positive infinity/NaN */ + util_half_to_float_exponent_table[31] = 0x7f800000; + + /* negative zero or denormals */ + util_half_to_float_exponent_table[32] = 0x80000000; + + /* negative numbers */ + for(i = 33; i <= 62; ++i) + util_half_to_float_exponent_table[i] = 0xb8000000 + ((i - 32) << 23); + + /* negative infinity/NaN */ + util_half_to_float_exponent_table[63] = 0xff800000; + + /* positive zero or denormals */ + util_half_to_float_offset_table[0] = 0; + + /* positive normals */ + for(i = 1; i < 32; ++i) + util_half_to_float_offset_table[i] = 1024; + + /* negative zero or denormals */ + util_half_to_float_offset_table[32] = 0; + + /* negative normals */ + for(i = 33; i < 64; ++i) + util_half_to_float_offset_table[i] = 1024; + + + + /* very small numbers mapping to zero */ + for(i = -127; i < -24; ++i) { + util_float_to_half_base_table[127 + i] = 0; + util_float_to_half_shift_table[127 + i] = 24; + } + + /* small numbers mapping to denormals */ + for(i = -24; i < -14; ++i) { + util_float_to_half_base_table[127 + i] = 0x0400 >> (-14 - i); + util_float_to_half_shift_table[127 + i] = -i - 1; + } + + /* normal numbers */ + for(i = -14; i < 16; ++i) { + util_float_to_half_base_table[127 + i] = (i + 15) << 10; + util_float_to_half_shift_table[127 + i] = 13; + } + + /* large numbers mapping to infinity */ + for(i = 16; i < 128; ++i) { + util_float_to_half_base_table[127 + i] = 0x7c00; + util_float_to_half_shift_table[127 + i] = 24; + } + + /* infinity and NaNs */ + util_float_to_half_base_table[255] = 0x7c00; + util_float_to_half_shift_table[255] = 13; + + /* negative numbers */ + for(i = 0; i < 256; ++i) { + util_float_to_half_base_table[256 + i] = util_float_to_half_base_table[i] | 0x8000; + util_float_to_half_shift_table[256 + i] = util_float_to_half_shift_table[i]; + } +} diff --git a/src/gallium/auxiliary/util/u_half.h b/src/gallium/auxiliary/util/u_half.h new file mode 100644 index 0000000000..464d43df8a --- /dev/null +++ b/src/gallium/auxiliary/util/u_half.h @@ -0,0 +1,55 @@ +#ifndef U_HALF_H +#define U_HALF_H + +#include "pipe/p_compiler.h" + +extern uint32_t util_half_to_float_mantissa_table[2048]; +extern uint32_t util_half_to_float_exponent_table[64]; +extern uint32_t util_half_to_float_offset_table[64]; +extern uint16_t util_float_to_half_base_table[512]; +extern uint8_t util_float_to_half_shift_table[512]; + +/* + * Note that if the half float is a signaling NaN, the x87 FPU will turn + * it into a quiet NaN immediately upon loading into a float. + * + * Additionally, denormals may be flushed to zero. + * + * To avoid this, use the floatui functions instead of the float ones + * when just doing conversion rather than computation on the resulting + * floats. + */ + +static INLINE uint32_t +util_half_to_floatui(half h) +{ + unsigned exp = h >> 10; + return util_half_to_float_mantissa_table[util_half_to_float_offset_table[exp] + (h & 0x3ff)] + + util_half_to_float_exponent_table[exp]; +} + +static INLINE float +util_half_to_float(half h) +{ + union {float f; uint32_t v;} r; + r.v = util_half_to_floatui(h); + return r.f; +} + +static INLINE half +util_floatui_to_half(uint32_t v) +{ + unsigned signexp = v >> 23; + return util_float_to_half_base_table[signexp] + + ((v & 0x007fffff) >> util_float_to_half_shift_table[signexp]); +} + +static INLINE half +util_float_to_half(float f) +{ + union {float f; uint32_t v;} i; + i.f = f; + return util_floatui_to_half(i.v); +} + +#endif /* U_HALF_H */ -- cgit v1.2.3