Floating point formats defined by this standard are classified as either interchange or non-interchange. In the standard storage formats are narrow interchange formats, i.e. the set of floating point values that can be stored by the specified binary encoding is a proper subset of wider floating point formats such as the 32-bit float and 64-bit double. In particular, the standard defines the encodings for one binary storage floating-point format of 16 bits length and radix 2, and one decimal storage floating-point format of 32 bits length. Note that these two formats are for storage only and are not defined for in-memory arithmetic operations. The remainder of this post is about the 16-bit binary storage format which we will refer to as half precision.
Half precision (also known as a 1.5.10 or s10e5 minifloat) was added to the standard because it is the de facto storage format for certain floating-point values frequently used in modern graphics processing units (GPUs) where minimizing memory usage and bus traffic is a major challange and priority.& It is used in several computer graphics environments including OpenGL, OpenEXR, and by hardware in MP3 decoders and nVidia graphic cards.  This format became popular because it can store a larger range of values than an int16 without requiring the bandwidth and storage space of a float type. Typically this increased range of numbers is used to preserve more highlighting and shadow detail. The minimum and maximum representable values are 2.98×10-8 and 65504 respectively.
I only know of one C or C++ compiler which supports half precision i.e. Sourcery G++ Lite. It uses an __fp16 type to represent half precision with a number of limitations. However, whether __fp16 becomes part of ISO C remains to be seen.& The lastest version of the C++ ABI also provides some support for name mangling of half precisions. The GNU debugger appears to have some limited support. Ruby supports half precision (IEEE_binary16) using the float-formats package (but only for little endian platforms according to the float-formats README). The Python structs module which is the logical home for half-precision support does not currently support this format. A cursory search of CPAN did not reveal any modules with support for half precision in Perl.
The following is a small C program which demonstrates how to encode and decode the half precision binary format. It is a based on some code from OGRE (Object-oriented Graphics Rendering Engine) header.
/*The following example shows one way to read a single floating point number (1.0) encoded in a half precision binary floating point two byte string into a float using Python.
** This program is free software; you can redistribute it and/or modify it under
** the terms of the GNU Lesser General Public License, as published by the Free
** Software Foundation; either version 2 of the License, or (at your option) any
** later version.
**
** IEEE 758-2008 Half-precision Floating Point Format
** --------------------------------------------------
**
** | Field | Last | First | Note
** |----------|------|-------|----------
** | Sign | 15 | 15 |
** | Exponent | 14 | 10 | Bias = 15
** | Fraction | 9 | 0 |
*/
#include <stdio.h>
#include <inttypes.h>
typedef uint16_t HALF;
/* ----- prototypes ------ */
float HALFToFloat(HALF);
HALF floatToHALF(float);
static uint32_t halfToFloatI(HALF);
static HALF floatToHalfI(uint32_t);
float
HALFToFloat(HALF y)
{
union { float f; uint32_t i; } v;
v.i = halfToFloatI(y);
return v.f;
}
uint32_t
static halfToFloatI(HALF y)
{
int s = (y >> 15) & 0x00000001; // sign
int e = (y >> 10) & 0x0000001f; // exponent
int f = y & 0x000003ff; // fraction
// need to handle 7c00 INF and fc00 -INF?
if (e == 0) {
// need to handle +-0 case f==0 or f=0x8000?
if (f == 0) // Plus or minus zero
return s << 31;
else { // Denormalized number -- renormalize it
while (!(f & 0x00000400)) {
f <<= 1;
e -= 1;
}
e += 1;
f &= ~0x00000400;
}
} else if (e == 31) {
if (f == 0) // Inf
return (s << 31) | 0x7f800000;
else // NaN
return (s << 31) | 0x7f800000 | (f << 13);
}
e = e + (127 - 15);
f = f << 13;
return ((s << 31) | (e << 23) | f);
}
HALF
floatToHALF(float i)
{
union { float f; uint32_t i; } v;
v.f = i;
return floatToHalfI(v.i);
}
HALF
static floatToHalfI(uint32_t i)
{
register int s = (i >> 16) & 0x00008000; // sign
register int e = ((i >> 23) & 0x000000ff) - (127 - 15); // exponent
register int f = i & 0x007fffff; // fraction
// need to handle NaNs and Inf?
if (e <= 0) {
if (e < -10) {
if (s) // handle -0.0
return 0x8000;
else
return 0;
}
f = (f | 0x00800000) >> (1 - e);
return s | (f >> 13);
} else if (e == 0xff - (127 - 15)) {
if (f == 0) // Inf
return s | 0x7c00;
else { // NAN
f >>= 13;
return s | 0x7c00 | f | (f == 0);
}
} else {
if (e > 30) // Overflow
return s | 0x7c00;
return s | (e << 10) | (f >> 13);
}
}
int
main(int argc, char *argv[])
{
float f1, f2;
HALF h;
printf("Please enter a floating point number: ");
scanf("%f", &f1);
h = floatToHALF(f1);
f2 = HALFToFloat(h);
printf("Results are: %f %f %#lx\n", f1, f2, h);
}
import structI hope this post provided you with some useful information on the interesting subject of the half precision floating point binary format. I suspect within a few years most compilers and scripting langauges will support it.
def HalfToFloat(h):
s = int((h >> 15) & 0x00000001) # sign
e = int((h >> 10) & 0x0000001f) # exponent
f = int(h & 0x000003ff) # fraction
if e == 0:
if f == 0:
return int(s << 31)
else:
while not (f & 0x00000400):
f <<= 1
e -= 1
e += 1
f &= ~0x00000400
print s,e,f
elif e == 31:
if f == 0:
return int((s << 31) | 0x7f800000)
else:
return int((s << 31) | 0x7f800000 | (f << 13))
e = e + (127 -15)
f = f << 13
return int((s << 31) | (e << 23) | f)
if __name__ == "__main__":
# a half precision binary floating point string
FP16='\x00\x3c'
v = struct.unpack('H', FP16)
x = HalfToFloat(v[0])
# hack to coerce to float
str = struct.pack('I',x)
f=struct.unpack('f', str)
# print the resulting floating point
print f[0]
2 comments:
This is a great blog, helped a lot.
thanks
This is awesome! I've been trolling around the internet all day searching for something like this!
Wonderful! thanks!
Post a Comment