# Copyright 2010 Luca Barbieri # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # # The above copyright notice and this permission notice (including the # next paragraph) shall be included in all copies or substantial # portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. # IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # ************************************************************************* # The code is a reimplementation of the algorithm in # www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf # "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008 # # The table contents have been slightly changed so that the exponent # bias is now in the exponent table instead of the mantissa table (mostly # for cosmetic reasons, and because it theoretically allows a variant # that flushes denormal to zero but uses a mantissa table with 24-bit # entries). # # The tables are also constructed slightly differently. # # Note that using a 64K * 4 table is a terrible idea since it will not fit # in the L1 cache and will massively pollute the L2 cache as well # # These should instead fit in the L1 cache. # # TODO: we could use a denormal bias table instead of the mantissa/offset # tables: this would reduce the L1 cache usage from 8704 to 2304 bytes # but would involve more computation # # Note however that if denormals are never encountered, the L1 cache usage # is only about 4608 bytes anyway. table_index = None table_length = None def begin(t, n, l): global table_length global table_index table_index = 0 table_length = l print print "const " + t + " " + n + "[" + str(l) + "] = {" def value(v): global table_index table_index += 1 print "\t" + hex(v) + "," def end(): global table_length global table_index print "};" assert table_index == table_length print "/* This file is autogenerated by u_half.py. Do not edit directly. */" print "#include \"util/u_half.h\"" begin("uint32_t", "util_half_to_float_mantissa_table", 2048) # zero value(0) # denormals for i in xrange(1, 1024): m = i << 13 e = 0 # normalize number while (m & 0x00800000) == 0: e -= 0x00800000 m <<= 1 m &= ~0x00800000 e += 0x38800000 value(m | e) # normals for i in xrange(1024, 2048): value((i - 1024) << 13) end() begin("uint32_t", "util_half_to_float_exponent_table", 64) # positive zero or denormals value(0) # positive numbers for i in xrange(1, 31): value(0x38000000 + (i << 23)) # positive infinity/NaN value(0x7f800000) # negative zero or denormals value(0x80000000) # negative numbers for i in range(33, 63): value(0xb8000000 + ((i - 32) << 23)) # negative infinity/NaN value(0xff800000) end() begin("uint32_t", "util_half_to_float_offset_table", 64) # positive zero or denormals value(0) # positive normals for i in range(1, 32): value(1024) # negative zero or denormals value(0) # negative normals for i in xrange(33, 64): value(1024) end() begin("uint16_t", "util_float_to_half_base_table", 512) for sign in (0, 0x8000): # very small numbers mapping to zero for i in xrange(-127, -24): value(sign | 0) # small numbers mapping to denormals for i in xrange(-24, -14): value(sign | (0x400 >> (-14 -i))) # normal numbers for i in xrange(-14, 16): value(sign | ((i + 15) << 10)) # large numbers mapping to infinity for i in xrange(16, 128): value(sign | 0x7c00) # infinity and NaNs value(sign | 0x7c00) end() begin("uint8_t", "util_float_to_half_shift_table", 512) for sign in (0, 0x8000): # very small numbers mapping to zero for i in xrange(-127, -24): value(24) # small numbers mapping to denormals for i in xrange(-24, -14): value(-1 - i) # normal numbers for i in xrange(-14, 16): value(13) # large numbers mapping to infinity for i in xrange(16, 128): value(24) # infinity and NaNs value(13) end()