-
Notifications
You must be signed in to change notification settings - Fork 0
/
ieee754half.h
98 lines (84 loc) · 4.5 KB
/
ieee754half.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#ifdef XM_F16C_INTRINSICS_
namespace rapid
{
/* IEEE 754 half-precision binary floating-point format.
At the time of writing modern CPUs do not support native 16-bit FP math.
This class emulates it through a series of conversions between half-precision
and single-precision floating-point formats, which requires F16C instruction set.
Keep in mind that this doesn't improve FPU perfomance, the implementation
intended to simplify usage of FP16 format with C++ operator overloading. */
struct ieee754half
{
static constexpr uint16_t eps = 0b0001010000000000; // 2^-10 = 9.77e-04 Machine epsilon
static constexpr uint16_t fmin = 0b0000010000000000; // 0.00006103515625 Smallest positive normal number
static constexpr uint16_t fmax = 0b0111101111111111; // 65504.0 Largest normal number
static constexpr uint16_t smin = 0b0000000000000001; // 0.000000059604645 Smallest positive subnormal number
static constexpr uint16_t smax = 0b0000001111111111; // 0.000060975552 Largest subnormal number
static constexpr uint16_t low = 0b1111101111111111; // -65504.0 Smallest negative number
static constexpr uint16_t one = 0b0011110000000000; // 1.0 One
static constexpr uint16_t inf = 0b0111110000000000; // Infinity
static constexpr uint16_t nan = 0b0111111000000000; // Not-a-Number
static constexpr uint16_t bias = 15; // Exponent bias
union
{
uint16_t binary;
struct
{ // little-endian byte order
uint16_t m : 10;
uint16_t e : 5;
uint16_t sign : 1;
};
};
ieee754half() noexcept {}
ieee754half(uint16_t h) noexcept: binary(h) {}
ieee754half(int i) noexcept: binary(i) {}
ieee754half(float s) noexcept;
ieee754half operator++(int) const noexcept;
ieee754half operator--(int) const noexcept;
ieee754half& operator++() noexcept;
ieee754half& operator--() noexcept;
const ieee754half& operator+() const noexcept { return *this; }
ieee754half operator-() const noexcept;
ieee754half operator*(ieee754half h) const noexcept;
ieee754half operator/(ieee754half h) const noexcept;
ieee754half operator+(ieee754half h) const noexcept;
ieee754half operator-(ieee754half h) const noexcept;
ieee754half& operator*=(ieee754half h) noexcept;
ieee754half& operator/=(ieee754half h) noexcept;
ieee754half& operator+=(ieee754half h) noexcept;
ieee754half& operator-=(ieee754half h) noexcept;
ieee754half& operator=(float s) noexcept;
bool operator<(ieee754half h) const noexcept;
bool operator>(ieee754half h) const noexcept;
bool operator<=(ieee754half h) const noexcept;
bool operator>=(ieee754half h) const noexcept;
bool operator==(ieee754half h) const noexcept;
bool operator!=(ieee754half h) const noexcept;
bool operator<(float s) const noexcept;
bool operator>(float s) const noexcept;
bool operator<=(float s) const noexcept;
bool operator>=(float s) const noexcept;
bool operator==(float s) const noexcept;
bool operator!=(float s) const noexcept;
operator int() const noexcept;
operator unsigned int() const noexcept;
operator float() const noexcept;
operator double() const noexcept;
};
float operator*(float s, ieee754half h) noexcept;
float operator/(float s, ieee754half h) noexcept;
float operator+(float s, ieee754half h) noexcept;
float operator-(float s, ieee754half h) noexcept;
float& operator*=(float& s, ieee754half h) noexcept;
float& operator/=(float& s, ieee754half h) noexcept;
float& operator+=(float& s, ieee754half h) noexcept;
float& operator-=(float& s, ieee754half h) noexcept;
bool operator<(float s, ieee754half h) noexcept { return h > s; }
bool operator>(float s, ieee754half h) noexcept { return h < s; }
bool operator<=(float s, ieee754half h) noexcept { return h >= s; }
bool operator>=(float s, ieee754half h) noexcept { return h <= s; }
bool operator==(float s, ieee754half h) noexcept { return h == s; }
bool operator!=(float s, ieee754half h) noexcept { return h != s; }
} // namespace rapid
#include "ieee754half.inl"
#endif // XM_F16C_INTRINSICS_