ROCm · emankov · Nov 27, 2024 · Nov 27, 2024
@@ -6675,7 +6675,8 @@ sub simpleSubstitutions {
     subst("__half2", "__half2", "device_type");
     subst("__half2_raw", "__half2_raw", "device_type");
     subst("__half_raw", "__half_raw", "device_type");
-    subst("__nv_bfloat16", "hip_bfloat16", "device_type");
+    subst("__nv_bfloat16", "__hip_bfloat16", "device_type");
+    subst("__nv_bfloat162", "__hip_bfloat162", "device_type");
     subst("__nv_bfloat162_raw", "__hip_bfloat162_raw", "device_type");
     subst("__nv_bfloat16_raw", "__hip_bfloat16_raw", "device_type");
     subst("__nv_fp8_e4m3", "__hip_fp8_e4m3_fnuz", "device_type");
@@ -9835,6 +9836,7 @@ sub warnUnsupportedFunctions {
         "nvrtcGetLTOIRSize",
         "nvrtcGetLTOIR",
         "nv_bfloat162",
+        "nv_bfloat16",
         "mulhi",
         "mul64hi",
         "mul24",
@@ -11071,7 +11073,6 @@ sub warnUnsupportedFunctions {
         "__pm2",
         "__pm1",
         "__pm0",
-        "__nv_bfloat162",
         "__lows2bfloat162",
         "__low2bfloat162",
         "__low2bfloat16",

@@ -817,8 +817,8 @@
 |`__half2`| | | | |`__half2`|1.6.0| | | | |
 |`__half2_raw`| | | | |`__half2_raw`|1.9.0| | | | |
 |`__half_raw`| | | | |`__half_raw`|1.9.0| | | | |
-|`__nv_bfloat16`|11.0| | | |`hip_bfloat16`|3.5.0| | | | |
-|`__nv_bfloat162`|11.0| | | | | | | | | |
+|`__nv_bfloat16`|11.0| | | |`__hip_bfloat16`|5.7.0| | | | |
+|`__nv_bfloat162`|11.0| | | |`__hip_bfloat162`|5.7.0| | | | |
 |`__nv_bfloat162_raw`|11.0| | | |`__hip_bfloat162_raw`|6.2.0| | | | |
 |`__nv_bfloat16_raw`|11.0| | | |`__hip_bfloat16_raw`|6.2.0| | | | |
 |`__nv_fp8_e4m3`|11.8| | | |`__hip_fp8_e4m3_fnuz`|6.2.0| | | | |
@@ -832,6 +832,7 @@
 |`__nv_fp8x4_e5m2`|11.8| | | |`__hip_fp8x4_e5m2_fnuz`|6.2.0| | | | |
 |`__nv_fp8x4_storage_t`|11.8| | | |`__hip_fp8x4_storage_t`|6.2.0| | | | |
 |`__nv_saturation_t`|11.8| | | |`__hip_saturation_t`|6.2.0| | | | |
+|`nv_bfloat16`|11.0| | | | | | | | | |
 |`nv_bfloat162`|11.0| | | | | | | | | |
 
 

@@ -30,11 +30,12 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DEVICE_TYPE_NAME_MAP {
   {"__half2",                              {"__half2",                               "",                                        CONV_DEVICE_TYPE, API_RUNTIME, 2}},
   {"__half2_raw",                          {"__half2_raw",                           "",                                        CONV_DEVICE_TYPE, API_RUNTIME, 2}},
   // Bfloat16 Precision Device types
-  {"__nv_bfloat16",                        {"hip_bfloat16",                          "rocblas_bfloat16",                        CONV_DEVICE_TYPE, API_RUNTIME, 2}},
-  {"__nv_bfloat16_raw",                    {"__hip_bfloat16_raw",                    "",                                        CONV_DEVICE_TYPE, API_RUNTIME, 2, ROC_UNSUPPORTED}},
-  {"__nv_bfloat162",                       {"__hip_bfloat162",                       "",                                        CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
+  {"__nv_bfloat16",                        {"__hip_bfloat16",                        "rocblas_bfloat16",                        CONV_DEVICE_TYPE, API_RUNTIME, 2}},
+  {"nv_bfloat16",                          {"hip_bfloat16",                          "",                                        CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
+  {"__nv_bfloat16_raw",                    {"__hip_bfloat16_raw",                    "",                                        CONV_DEVICE_TYPE, API_RUNTIME, 2}},
+  {"__nv_bfloat162",                       {"__hip_bfloat162",                       "",                                        CONV_DEVICE_TYPE, API_RUNTIME, 2}},
   {"nv_bfloat162",                         {"hip_bfloat162",                         "",                                        CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
-  {"__nv_bfloat162_raw",                   {"__hip_bfloat162_raw",                   "",                                        CONV_DEVICE_TYPE, API_RUNTIME, 2, ROC_UNSUPPORTED}},
+  {"__nv_bfloat162_raw",                   {"__hip_bfloat162_raw",                   "",                                        CONV_DEVICE_TYPE, API_RUNTIME, 2}},
   // float8 Precision Device types
   {"__nv_fp8_storage_t",                   {"__hip_fp8_storage_t",                   "",                                        CONV_DEVICE_TYPE, API_RUNTIME, 2}},
   {"__nv_fp8x2_storage_t",                 {"__hip_fp8x2_storage_t",                 "",                                        CONV_DEVICE_TYPE, API_RUNTIME, 2}},
@@ -82,7 +83,7 @@ const std::map<llvm::StringRef, hipAPIversions> HIP_DEVICE_TYPE_NAME_VER_MAP {
   {"__half2",                              {HIP_1060, HIP_0,    HIP_0   }},
   {"__half_raw",                           {HIP_1090, HIP_0,    HIP_0   }},
   {"__half2_raw",                          {HIP_1090, HIP_0,    HIP_0   }},
-  {"hip_bfloat16",                         {HIP_3050, HIP_0,    HIP_0   }},
+  {"__hip_bfloat16",                       {HIP_5070, HIP_0,    HIP_0   }},
   {"__hip_fp8_e4m3_fnuz",                  {HIP_6020, HIP_0,    HIP_0   }},
   {"__hip_fp8_storage_t",                  {HIP_6020, HIP_0,    HIP_0   }},
   {"__hip_fp8x2_storage_t",                {HIP_6020, HIP_0,    HIP_0   }},
@@ -100,6 +101,7 @@ const std::map<llvm::StringRef, hipAPIversions> HIP_DEVICE_TYPE_NAME_VER_MAP {
   {"__hip_fp8x4_e5m2_fnuz",                {HIP_6020, HIP_0,    HIP_0   }},
   {"__hip_bfloat16_raw",                   {HIP_6020, HIP_0,    HIP_0   }},
   {"__hip_bfloat162_raw",                  {HIP_6020, HIP_0,    HIP_0   }},
+  {"__hip_bfloat162",                      {HIP_5070, HIP_0,    HIP_0   }},
 
   {"rocblas_half",                         {HIP_1050, HIP_0,    HIP_0   }},
   {"rocblas_bfloat16",                     {HIP_3050, HIP_0,    HIP_0   }},

@@ -1679,15 +1679,15 @@ int main() {
   cublasMath_t BLAS_TF32_TENSOR_OP_MATH = CUBLAS_TF32_TENSOR_OP_MATH;
   cublasMath_t BLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION = CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION;
 
-  // CHECK: hip_bfloat16** bf16Aarray = 0;
+  // CHECK: __hip_bfloat16** bf16Aarray = 0;
   __nv_bfloat16** bf16Aarray = 0;
-  // CHECK: const hip_bfloat16** const bf16Aarray_const = const_cast<const hip_bfloat16**>(bf16Aarray);
+  // CHECK: const __hip_bfloat16** const bf16Aarray_const = const_cast<const __hip_bfloat16**>(bf16Aarray);
   const __nv_bfloat16** const bf16Aarray_const = const_cast<const __nv_bfloat16**>(bf16Aarray);
-  // CHECK: hip_bfloat16** bf16xarray = 0;
+  // CHECK: __hip_bfloat16** bf16xarray = 0;
   __nv_bfloat16** bf16xarray = 0;
-  // CHECK: const hip_bfloat16** const bf16xarray_const = const_cast<const hip_bfloat16**>(bf16xarray_const);
+  // CHECK: const __hip_bfloat16** const bf16xarray_const = const_cast<const __hip_bfloat16**>(bf16xarray_const);
   const __nv_bfloat16** const bf16xarray_const = const_cast<const __nv_bfloat16**>(bf16xarray_const);
-  // CHECK: hip_bfloat16** bf16yarray = 0;
+  // CHECK: __hip_bfloat16** bf16yarray = 0;
   __nv_bfloat16** bf16yarray = 0;
 
   // CHECK: hipblasComputeType_t blasComputeType;

@@ -1864,23 +1864,23 @@ int main() {
   cublasDataType_t R_16BF = CUDA_R_16BF;
   cublasDataType_t C_16BF = CUDA_C_16BF;
 
-  // CHECK: hip_bfloat16* bf16A = nullptr;
+  // CHECK: __hip_bfloat16* bf16A = nullptr;
   __nv_bfloat16* bf16A = nullptr;
-  // CHECK: hip_bfloat16** bf16Aarray = 0;
+  // CHECK: __hip_bfloat16** bf16Aarray = 0;
   __nv_bfloat16** bf16Aarray = 0;
-  // CHECK: const hip_bfloat16** const bf16Aarray_const = const_cast<const hip_bfloat16**>(bf16Aarray);
+  // CHECK: const __hip_bfloat16** const bf16Aarray_const = const_cast<const __hip_bfloat16**>(bf16Aarray);
   const __nv_bfloat16** const bf16Aarray_const = const_cast<const __nv_bfloat16**>(bf16Aarray);
-  // CHECK: hip_bfloat16* bf16X = nullptr;
+  // CHECK: __hip_bfloat16* bf16X = nullptr;
   __nv_bfloat16* bf16X = nullptr;
-  // CHECK: hip_bfloat16** bf16xarray = 0;
+  // CHECK: __hip_bfloat16** bf16xarray = 0;
   __nv_bfloat16** bf16xarray = 0;
-  // CHECK: const hip_bfloat16** const bf16xarray_const = const_cast<const hip_bfloat16**>(bf16xarray);
+  // CHECK: const __hip_bfloat16** const bf16xarray_const = const_cast<const __hip_bfloat16**>(bf16xarray);
   const __nv_bfloat16** const bf16xarray_const = const_cast<const __nv_bfloat16**>(bf16xarray);
-  // CHECK: hip_bfloat16* bf16Y = nullptr;
+  // CHECK: __hip_bfloat16* bf16Y = nullptr;
   __nv_bfloat16* bf16Y = nullptr;
-  // CHECK: hip_bfloat16** bf16yarray = 0;
+  // CHECK: __hip_bfloat16** bf16yarray = 0;
   __nv_bfloat16** bf16yarray = 0;
-  // CHECK: const hip_bfloat16** const bf16yarray_const = const_cast<const hip_bfloat16**>(bf16yarray);
+  // CHECK: const __hip_bfloat16** const bf16yarray_const = const_cast<const __hip_bfloat16**>(bf16yarray);
   const __nv_bfloat16** const bf16yarray_const = const_cast<const __nv_bfloat16**>(bf16yarray);
 
   // CHECK: hipblasComputeType_t blasComputeType;

@@ -18,10 +18,16 @@ int main() {
   __half_raw hrx = { 0 };
   __half2_raw h2rx = { 0, 0 };
 
-#if CUDA_VERSION >= 11080
+#if CUDA_VERSION >= 11000
+  // CHECK: __hip_bfloat16 bf16 = { 0 };
+  __nv_bfloat16 bf16 = { 0 };
+
   // CHECK: __hip_bfloat16_raw bf16r = { 0 };
   __nv_bfloat16_raw bf16r = { 0 };
 
+  // CHECK: __hip_bfloat162 bf162 = { 0, 0 };
+  __nv_bfloat162 bf162 = { 0, 0 };
+
   // CHECK: __hip_bfloat162_raw bf162r = { 0, 0 };
   __nv_bfloat162_raw bf162r = { 0, 0 };
 #endif