Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 77 additions & 11 deletions libdevice/imf_bf16.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,10 @@ static inline float __bfloat162float(_iml_bf16_internal b) {

static inline _iml_bf16_internal
__float2bfloat16(float f, __iml_rounding_mode rounding_mode) {
union {
float f_val;
uint32_t u32_val;
} fp32_bits;

fp32_bits.f_val = f;
uint16_t bf16_sign =
static_cast<uint16_t>((fp32_bits.u32_val & 0x80000000) >> 31);
uint16_t bf16_exp =
static_cast<uint16_t>((fp32_bits.u32_val & 0x7F800000) >> 23);
uint32_t f_mant = fp32_bits.u32_val & 0x7FFFFF;
uint32_t u32_val = __builtin_bit_cast(uint32_t, f);
uint16_t bf16_sign = static_cast<uint16_t>((u32_val >> 31) & 0x1);
uint16_t bf16_exp = static_cast<uint16_t>((u32_val >> 23) & 0x7FF);
uint32_t f_mant = u32_val & 0x7F'FFFF;
uint16_t bf16_mant = static_cast<uint16_t>(f_mant >> 16);
// +/-infinity and NAN
if (bf16_exp == 0xFF) {
Expand Down Expand Up @@ -82,6 +75,79 @@ __float2bfloat16(float f, __iml_rounding_mode rounding_mode) {
return (bf16_sign << 15) | (bf16_exp << 7) | bf16_mant;
}

// We only need utils to convert double to bfloat16 with RTE
static _iml_bf16_internal __double2bfloat16(double d) {
uint64_t u64_val = __builtin_bit_cast(uint64_t, d);
int16_t bf16_sign = (u64_val >> 63) & 0x1;
uint16_t fp64_exp = static_cast<uint16_t>((u64_val >> 52) & 0x7FF);
uint64_t fp64_mant = (u64_val & 0xF'FFFF'FFFF'FFFF);
uint16_t bf16_mant;
// handling +/-infinity and NAN for double input
if (fp64_exp == 0x7FF) {
if (!fp64_mant) {
return bf16_sign ? 0xFF80 : 0x7F80;
} else {
// returns a quiet NaN
return 0x7FC0;
}
}

// Subnormal double precision is converted to 0
if (fp64_exp == 0) {
return bf16_sign ? 0x8000 : 0x0;
}

fp64_exp -= 1023;
// handling overflow, convert to +/-infinity
if (static_cast<int16_t>(fp64_exp) > 127) {
return bf16_sign ? 0xFF80 : 0x7F80;
}

// handling underflow
if (static_cast<int16_t>(fp64_exp) < -133) {
return bf16_sign ? 0x8000 : 0x0;
}

//-133 <= fp64_exp <= 127, 1.signicand * 2^fp64_exp
// For these numbers, they are NOT subnormal double-precision numbers but
// will turn into subnormal when converting to bfloat16
uint64_t discard_bits;
if (static_cast<int16_t>(fp64_exp) < -126) {
fp64_mant |= 0x10'0000'0000'0000;
fp64_mant >>= -126 - static_cast<int16_t>(fp64_exp) - 1;
discard_bits = fp64_mant & 0x3FFF'FFFF'FFFF;
bf16_mant = static_cast<uint16_t>(fp64_mant >> 46);
if (discard_bits > 0x2000'0000'0000 ||
((discard_bits == 0x2000'0000'0000) && ((bf16_mant & 0x1) == 0x1)))
bf16_mant += 1;
fp64_exp = 0;
if (bf16_mant == 0x80) {
bf16_mant = 0;
fp64_exp = 1;
}
return (bf16_sign << 15) | (fp64_exp << 7) | bf16_mant;
}

// For normal value, discard 45 bits from mantissa
discard_bits = fp64_mant & 0x1FFF'FFFF'FFFF;
bf16_mant = static_cast<uint16_t>(fp64_mant >> 45);
if (discard_bits > 0x1000'0000'0000 ||
((discard_bits == 0x1000'0000'0000) && ((bf16_mant & 0x1) == 0x1)))
bf16_mant += 1;

if (bf16_mant == 0x80) {
if (fp64_exp != 127) {
bf16_mant = 0;
fp64_exp++;
} else {
return bf16_sign ? 0xFF80 : 0x7F80;
}
}
fp64_exp += 127;

return (bf16_sign << 15) | (fp64_exp << 7) | bf16_mant;
}

template <typename Ty>
static Ty __iml_bfloat162integral_u(uint16_t b,
__iml_rounding_mode rounding_mode) {
Expand Down
5 changes: 5 additions & 0 deletions libdevice/imf_utils/double_convert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -450,4 +450,9 @@ _iml_half_internal __devicelib_imf_double2half(double x) {
return __spirv_FConvert_Rhalf_rte(x);
#endif
}

DEVICE_EXTERN_C_INLINE
_iml_bf16_internal __devicelib_imf_double2bfloat16(double x) {
return __double2bfloat16(x);
}
#endif // __LIBDEVICE_IMF_ENABLED__
8 changes: 8 additions & 0 deletions libdevice/imf_wrapper_fp64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -369,4 +369,12 @@ DEVICE_EXTERN_C_INLINE
_iml_half_internal __imf_double2half(double x) {
return __devicelib_imf_double2half(x);
}

DEVICE_EXTERN_C_INLINE
_iml_bf16_internal __devicelib_imf_double2bfloat16(double);

DEVICE_EXTERN_C_INLINE
_iml_bf16_internal __imf_double2bfloat16(double x) {
return __devicelib_imf_double2bfloat16(x);
}
#endif // __LIBDEVICE_IMF_ENABLED__
2 changes: 2 additions & 0 deletions llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,8 @@ SYCLDeviceLibFuncMap SDLMap = {
{"__devicelib_imf_vsadu2", DeviceLibExt::cl_intel_devicelib_imf},
{"__devicelib_imf_vsadu4", DeviceLibExt::cl_intel_devicelib_imf},
{"__devicelib_imf_double2half", DeviceLibExt::cl_intel_devicelib_imf_fp64},
{"__devicelib_imf_double2bfloat16",
DeviceLibExt::cl_intel_devicelib_imf_fp64},
{"__devicelib_imf_fma", DeviceLibExt::cl_intel_devicelib_imf_fp64},
{"__devicelib_imf_floor", DeviceLibExt::cl_intel_devicelib_imf_fp64},
{"__devicelib_imf_ceil", DeviceLibExt::cl_intel_devicelib_imf_fp64},
Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/builtins.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2254,6 +2254,7 @@ extern SYCL_EXTERNAL uint16_t __imf_ll2bfloat16_rd(long long x);
extern SYCL_EXTERNAL uint16_t __imf_ll2bfloat16_rn(long long x);
extern SYCL_EXTERNAL uint16_t __imf_ll2bfloat16_ru(long long x);
extern SYCL_EXTERNAL uint16_t __imf_ll2bfloat16_rz(long long x);
extern SYCL_EXTERNAL uint16_t __imf_double2bfloat16(double x);
extern SYCL_EXTERNAL short __imf_bfloat16_as_short(uint16_t x);
extern SYCL_EXTERNAL unsigned short __imf_bfloat16_as_ushort(uint16_t x);
extern SYCL_EXTERNAL uint16_t __imf_short_as_bfloat16(short x);
Expand Down