-
Notifications
You must be signed in to change notification settings - Fork 1
/
Float8m2e5s1.h
36 lines (33 loc) · 2.96 KB
/
Float8m2e5s1.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
//-----------------------------------------------------------------------------
//
// See:
// https://arxiv.org/abs/2209.05433 FP8 Formats for Deep Learning
// https://arxiv.org/abs/2206.02915 8-bit Numerical Formats for Deep Neural Networks 2022-10-24
//
//-----------------------------------------------------------------------------
#pragma once
using float8m2e5s1_t = FloatNumber<uint8_t, 2, 5, true, true, true, true>;
inline float8m2e5s1_t operator +(float8m2e5s1_t a, float8m2e5s1_t b) noexcept { return float(a) + float(b); }
inline float8m2e5s1_t operator -(float8m2e5s1_t a, float8m2e5s1_t b) noexcept { return float(a) - float(b); }
inline float8m2e5s1_t operator *(float8m2e5s1_t a, float8m2e5s1_t b) noexcept { return float(a) * float(b); }
inline float8m2e5s1_t operator /(float8m2e5s1_t a, float8m2e5s1_t b) noexcept { return float(a) / float(b); }
inline float8m2e5s1_t operator +(float8m2e5s1_t a, double b) noexcept { return float(a) + float(b); }
inline float8m2e5s1_t operator -(float8m2e5s1_t a, double b) noexcept { return float(a) - float(b); }
inline float8m2e5s1_t operator *(float8m2e5s1_t a, double b) noexcept { return float(a) * float(b); }
inline float8m2e5s1_t operator /(float8m2e5s1_t a, double b) noexcept { return float(a) / float(b); }
inline float8m2e5s1_t operator +(double a, float8m2e5s1_t b) noexcept { return float(a) + float(b); }
inline float8m2e5s1_t operator -(double a, float8m2e5s1_t b) noexcept { return float(a) - float(b); }
inline float8m2e5s1_t operator *(double a, float8m2e5s1_t b) noexcept { return float(a) * float(b); }
inline float8m2e5s1_t operator /(double a, float8m2e5s1_t b) noexcept { return float(a) / float(b); }
inline float8m2e5s1_t& operator +=(float8m2e5s1_t& a, float8m2e5s1_t b) noexcept { return a = (float(a) + float(b)); }
inline float8m2e5s1_t& operator -=(float8m2e5s1_t& a, float8m2e5s1_t b) noexcept { return a = (float(a) - float(b)); }
inline float8m2e5s1_t& operator *=(float8m2e5s1_t& a, float8m2e5s1_t b) noexcept { return a = (float(a) * float(b)); }
inline float8m2e5s1_t& operator /=(float8m2e5s1_t& a, float8m2e5s1_t b) noexcept { return a = (float(a) / float(b)); }
inline float8m2e5s1_t& operator ++(float8m2e5s1_t& a) noexcept { return a = float(a) + 1; }
inline float8m2e5s1_t& operator --(float8m2e5s1_t& a) noexcept { return a = float(a) + 1; }
inline bool operator==(float8m2e5s1_t lhs, float8m2e5s1_t rhs) noexcept { return float(lhs) == float(rhs); }
inline bool operator!=(float8m2e5s1_t lhs, float8m2e5s1_t rhs) noexcept { return float(lhs) != float(rhs); }
inline bool operator< (float8m2e5s1_t lhs, float8m2e5s1_t rhs) noexcept { return float(lhs) < float(rhs); }
inline bool operator> (float8m2e5s1_t lhs, float8m2e5s1_t rhs) noexcept { return float(lhs) > float(rhs); }
inline bool operator<=(float8m2e5s1_t lhs, float8m2e5s1_t rhs) noexcept { return float(lhs) <= float(rhs); }
inline bool operator>=(float8m2e5s1_t lhs, float8m2e5s1_t rhs) noexcept { return float(lhs) >= float(rhs); }