intel · AlexeySachkov · Mar 2, 2023 · Feb 20, 2023 · Feb 21, 2023 · Feb 23, 2023
@@ -28,17 +28,10 @@ static inline float __bfloat162float(_iml_bf16_internal b) {
 
 static inline _iml_bf16_internal
 __float2bfloat16(float f, __iml_rounding_mode rounding_mode) {
-  union {
-    float f_val;
-    uint32_t u32_val;
-  } fp32_bits;
-
-  fp32_bits.f_val = f;
-  uint16_t bf16_sign =
-      static_cast<uint16_t>((fp32_bits.u32_val & 0x80000000) >> 31);
-  uint16_t bf16_exp =
-      static_cast<uint16_t>((fp32_bits.u32_val & 0x7F800000) >> 23);
-  uint32_t f_mant = fp32_bits.u32_val & 0x7FFFFF;
+  uint32_t u32_val = __builtin_bit_cast(uint32_t, f);
+  uint16_t bf16_sign = static_cast<uint16_t>((u32_val >> 31) & 0x1);
+  uint16_t bf16_exp = static_cast<uint16_t>((u32_val >> 23) & 0x7FF);
+  uint32_t f_mant = u32_val & 0x7F'FFFF;
   uint16_t bf16_mant = static_cast<uint16_t>(f_mant >> 16);
   // +/-infinity and NAN
   if (bf16_exp == 0xFF) {
@@ -82,6 +75,79 @@ __float2bfloat16(float f, __iml_rounding_mode rounding_mode) {
   return (bf16_sign << 15) | (bf16_exp << 7) | bf16_mant;
 }
 
+// We only need utils to convert double to bfloat16 with RTE
+static _iml_bf16_internal __double2bfloat16(double d) {
+  uint64_t u64_val = __builtin_bit_cast(uint64_t, d);
+  int16_t bf16_sign = (u64_val >> 63) & 0x1;
+  uint16_t fp64_exp = static_cast<uint16_t>((u64_val >> 52) & 0x7FF);
+  uint64_t fp64_mant = (u64_val & 0xF'FFFF'FFFF'FFFF);
+  uint16_t bf16_mant;
+  // handling +/-infinity and NAN for double input
+  if (fp64_exp == 0x7FF) {
+    if (!fp64_mant) {
+      return bf16_sign ? 0xFF80 : 0x7F80;
+    } else {
+      // returns a quiet NaN
+      return 0x7FC0;
+    }
+  }
+
+  // Subnormal double precision is converted to 0
+  if (fp64_exp == 0) {
+    return bf16_sign ? 0x8000 : 0x0;
+  }
+
+  fp64_exp -= 1023;
+  // handling overflow, convert to +/-infinity
+  if (static_cast<int16_t>(fp64_exp) > 127) {
+    return bf16_sign ? 0xFF80 : 0x7F80;
+  }
+
+  // handling underflow
+  if (static_cast<int16_t>(fp64_exp) < -133) {
+    return bf16_sign ? 0x8000 : 0x0;
+  }
+
+  //-133 <= fp64_exp <= 127, 1.signicand * 2^fp64_exp
+  // For these numbers, they are NOT subnormal double-precision numbers but
+  // will turn into subnormal when converting to bfloat16
+  uint64_t discard_bits;
+  if (static_cast<int16_t>(fp64_exp) < -126) {
+    fp64_mant |= 0x10'0000'0000'0000;
+    fp64_mant >>= -126 - static_cast<int16_t>(fp64_exp) - 1;
+    discard_bits = fp64_mant & 0x3FFF'FFFF'FFFF;
+    bf16_mant = static_cast<uint16_t>(fp64_mant >> 46);
+    if (discard_bits > 0x2000'0000'0000 ||
+        ((discard_bits == 0x2000'0000'0000) && ((bf16_mant & 0x1) == 0x1)))
+      bf16_mant += 1;
+    fp64_exp = 0;
+    if (bf16_mant == 0x80) {
+      bf16_mant = 0;
+      fp64_exp = 1;
+    }
+    return (bf16_sign << 15) | (fp64_exp << 7) | bf16_mant;
+  }
+
+  // For normal value, discard 45 bits from mantissa
+  discard_bits = fp64_mant & 0x1FFF'FFFF'FFFF;
+  bf16_mant = static_cast<uint16_t>(fp64_mant >> 45);
+  if (discard_bits > 0x1000'0000'0000 ||
+      ((discard_bits == 0x1000'0000'0000) && ((bf16_mant & 0x1) == 0x1)))
+    bf16_mant += 1;
+
+  if (bf16_mant == 0x80) {
+    if (fp64_exp != 127) {
+      bf16_mant = 0;
+      fp64_exp++;
+    } else {
+      return bf16_sign ? 0xFF80 : 0x7F80;
+    }
+  }
+  fp64_exp += 127;
+
+  return (bf16_sign << 15) | (fp64_exp << 7) | bf16_mant;
+}
+
 template <typename Ty>
 static Ty __iml_bfloat162integral_u(uint16_t b,
                                     __iml_rounding_mode rounding_mode) {

@@ -450,4 +450,9 @@ _iml_half_internal __devicelib_imf_double2half(double x) {
   return __spirv_FConvert_Rhalf_rte(x);
 #endif
 }
+
+DEVICE_EXTERN_C_INLINE
+_iml_bf16_internal __devicelib_imf_double2bfloat16(double x) {
+  return __double2bfloat16(x);
+}
 #endif // __LIBDEVICE_IMF_ENABLED__
@@ -369,4 +369,12 @@ DEVICE_EXTERN_C_INLINE
 _iml_half_internal __imf_double2half(double x) {
   return __devicelib_imf_double2half(x);
 }
+
+DEVICE_EXTERN_C_INLINE
+_iml_bf16_internal __devicelib_imf_double2bfloat16(double);
+
+DEVICE_EXTERN_C_INLINE
+_iml_bf16_internal __imf_double2bfloat16(double x) {
+  return __devicelib_imf_double2bfloat16(x);
+}
 #endif // __LIBDEVICE_IMF_ENABLED__
@@ -401,6 +401,8 @@ SYCLDeviceLibFuncMap SDLMap = {
     {"__devicelib_imf_vsadu2", DeviceLibExt::cl_intel_devicelib_imf},
     {"__devicelib_imf_vsadu4", DeviceLibExt::cl_intel_devicelib_imf},
     {"__devicelib_imf_double2half", DeviceLibExt::cl_intel_devicelib_imf_fp64},
+    {"__devicelib_imf_double2bfloat16",
+     DeviceLibExt::cl_intel_devicelib_imf_fp64},
     {"__devicelib_imf_fma", DeviceLibExt::cl_intel_devicelib_imf_fp64},
     {"__devicelib_imf_floor", DeviceLibExt::cl_intel_devicelib_imf_fp64},
     {"__devicelib_imf_ceil", DeviceLibExt::cl_intel_devicelib_imf_fp64},

@@ -2254,6 +2254,7 @@ extern SYCL_EXTERNAL uint16_t __imf_ll2bfloat16_rd(long long x);
 extern SYCL_EXTERNAL uint16_t __imf_ll2bfloat16_rn(long long x);
 extern SYCL_EXTERNAL uint16_t __imf_ll2bfloat16_ru(long long x);
 extern SYCL_EXTERNAL uint16_t __imf_ll2bfloat16_rz(long long x);
+extern SYCL_EXTERNAL uint16_t __imf_double2bfloat16(double x);
 extern SYCL_EXTERNAL short __imf_bfloat16_as_short(uint16_t x);
 extern SYCL_EXTERNAL unsigned short __imf_bfloat16_as_ushort(uint16_t x);
 extern SYCL_EXTERNAL uint16_t __imf_short_as_bfloat16(short x);