Skip to content

Commit

Permalink
Merge pull request #1770 from emankov/HIPIFY
Browse files Browse the repository at this point in the history
[HIPIFY][#1769][fp16] Support for `fp16` math - Part 1 - Data Types
  • Loading branch information
emankov authored Nov 27, 2024
2 parents 80ac36e + 5466c48 commit c66e133
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 24 deletions.
5 changes: 3 additions & 2 deletions bin/hipify-perl
Original file line number Diff line number Diff line change
Expand Up @@ -6675,7 +6675,8 @@ sub simpleSubstitutions {
subst("__half2", "__half2", "device_type");
subst("__half2_raw", "__half2_raw", "device_type");
subst("__half_raw", "__half_raw", "device_type");
subst("__nv_bfloat16", "hip_bfloat16", "device_type");
subst("__nv_bfloat16", "__hip_bfloat16", "device_type");
subst("__nv_bfloat162", "__hip_bfloat162", "device_type");
subst("__nv_bfloat162_raw", "__hip_bfloat162_raw", "device_type");
subst("__nv_bfloat16_raw", "__hip_bfloat16_raw", "device_type");
subst("__nv_fp8_e4m3", "__hip_fp8_e4m3_fnuz", "device_type");
Expand Down Expand Up @@ -9835,6 +9836,7 @@ sub warnUnsupportedFunctions {
"nvrtcGetLTOIRSize",
"nvrtcGetLTOIR",
"nv_bfloat162",
"nv_bfloat16",
"mulhi",
"mul64hi",
"mul24",
Expand Down Expand Up @@ -11071,7 +11073,6 @@ sub warnUnsupportedFunctions {
"__pm2",
"__pm1",
"__pm0",
"__nv_bfloat162",
"__lows2bfloat162",
"__low2bfloat162",
"__low2bfloat16",
Expand Down
5 changes: 3 additions & 2 deletions docs/tables/CUDA_Device_API_supported_by_HIP.md
Original file line number Diff line number Diff line change
Expand Up @@ -817,8 +817,8 @@
|`__half2`| | | | |`__half2`|1.6.0| | | | |
|`__half2_raw`| | | | |`__half2_raw`|1.9.0| | | | |
|`__half_raw`| | | | |`__half_raw`|1.9.0| | | | |
|`__nv_bfloat16`|11.0| | | |`hip_bfloat16`|3.5.0| | | | |
|`__nv_bfloat162`|11.0| | | | | | | | | |
|`__nv_bfloat16`|11.0| | | |`__hip_bfloat16`|5.7.0| | | | |
|`__nv_bfloat162`|11.0| | | |`__hip_bfloat162`|5.7.0| | | | |
|`__nv_bfloat162_raw`|11.0| | | |`__hip_bfloat162_raw`|6.2.0| | | | |
|`__nv_bfloat16_raw`|11.0| | | |`__hip_bfloat16_raw`|6.2.0| | | | |
|`__nv_fp8_e4m3`|11.8| | | |`__hip_fp8_e4m3_fnuz`|6.2.0| | | | |
Expand All @@ -832,6 +832,7 @@
|`__nv_fp8x4_e5m2`|11.8| | | |`__hip_fp8x4_e5m2_fnuz`|6.2.0| | | | |
|`__nv_fp8x4_storage_t`|11.8| | | |`__hip_fp8x4_storage_t`|6.2.0| | | | |
|`__nv_saturation_t`|11.8| | | |`__hip_saturation_t`|6.2.0| | | | |
|`nv_bfloat16`|11.0| | | | | | | | | |
|`nv_bfloat162`|11.0| | | | | | | | | |


Expand Down
12 changes: 7 additions & 5 deletions src/CUDA2HIP_Device_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,12 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DEVICE_TYPE_NAME_MAP {
{"__half2", {"__half2", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}},
{"__half2_raw", {"__half2_raw", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}},
// Bfloat16 Precision Device types
{"__nv_bfloat16", {"hip_bfloat16", "rocblas_bfloat16", CONV_DEVICE_TYPE, API_RUNTIME, 2}},
{"__nv_bfloat16_raw", {"__hip_bfloat16_raw", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, ROC_UNSUPPORTED}},
{"__nv_bfloat162", {"__hip_bfloat162", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
{"__nv_bfloat16", {"__hip_bfloat16", "rocblas_bfloat16", CONV_DEVICE_TYPE, API_RUNTIME, 2}},
{"nv_bfloat16", {"hip_bfloat16", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
{"__nv_bfloat16_raw", {"__hip_bfloat16_raw", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}},
{"__nv_bfloat162", {"__hip_bfloat162", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}},
{"nv_bfloat162", {"hip_bfloat162", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
{"__nv_bfloat162_raw", {"__hip_bfloat162_raw", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, ROC_UNSUPPORTED}},
{"__nv_bfloat162_raw", {"__hip_bfloat162_raw", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}},
// float8 Precision Device types
{"__nv_fp8_storage_t", {"__hip_fp8_storage_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}},
{"__nv_fp8x2_storage_t", {"__hip_fp8x2_storage_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}},
Expand Down Expand Up @@ -82,7 +83,7 @@ const std::map<llvm::StringRef, hipAPIversions> HIP_DEVICE_TYPE_NAME_VER_MAP {
{"__half2", {HIP_1060, HIP_0, HIP_0 }},
{"__half_raw", {HIP_1090, HIP_0, HIP_0 }},
{"__half2_raw", {HIP_1090, HIP_0, HIP_0 }},
{"hip_bfloat16", {HIP_3050, HIP_0, HIP_0 }},
{"__hip_bfloat16", {HIP_5070, HIP_0, HIP_0 }},
{"__hip_fp8_e4m3_fnuz", {HIP_6020, HIP_0, HIP_0 }},
{"__hip_fp8_storage_t", {HIP_6020, HIP_0, HIP_0 }},
{"__hip_fp8x2_storage_t", {HIP_6020, HIP_0, HIP_0 }},
Expand All @@ -100,6 +101,7 @@ const std::map<llvm::StringRef, hipAPIversions> HIP_DEVICE_TYPE_NAME_VER_MAP {
{"__hip_fp8x4_e5m2_fnuz", {HIP_6020, HIP_0, HIP_0 }},
{"__hip_bfloat16_raw", {HIP_6020, HIP_0, HIP_0 }},
{"__hip_bfloat162_raw", {HIP_6020, HIP_0, HIP_0 }},
{"__hip_bfloat162", {HIP_5070, HIP_0, HIP_0 }},

{"rocblas_half", {HIP_1050, HIP_0, HIP_0 }},
{"rocblas_bfloat16", {HIP_3050, HIP_0, HIP_0 }},
Expand Down
10 changes: 5 additions & 5 deletions tests/unit_tests/synthetic/libraries/cublas2hipblas.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1679,15 +1679,15 @@ int main() {
cublasMath_t BLAS_TF32_TENSOR_OP_MATH = CUBLAS_TF32_TENSOR_OP_MATH;
cublasMath_t BLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION = CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION;

// CHECK: hip_bfloat16** bf16Aarray = 0;
// CHECK: __hip_bfloat16** bf16Aarray = 0;
__nv_bfloat16** bf16Aarray = 0;
// CHECK: const hip_bfloat16** const bf16Aarray_const = const_cast<const hip_bfloat16**>(bf16Aarray);
// CHECK: const __hip_bfloat16** const bf16Aarray_const = const_cast<const __hip_bfloat16**>(bf16Aarray);
const __nv_bfloat16** const bf16Aarray_const = const_cast<const __nv_bfloat16**>(bf16Aarray);
// CHECK: hip_bfloat16** bf16xarray = 0;
// CHECK: __hip_bfloat16** bf16xarray = 0;
__nv_bfloat16** bf16xarray = 0;
// CHECK: const hip_bfloat16** const bf16xarray_const = const_cast<const hip_bfloat16**>(bf16xarray_const);
// CHECK: const __hip_bfloat16** const bf16xarray_const = const_cast<const __hip_bfloat16**>(bf16xarray_const);
const __nv_bfloat16** const bf16xarray_const = const_cast<const __nv_bfloat16**>(bf16xarray_const);
// CHECK: hip_bfloat16** bf16yarray = 0;
// CHECK: __hip_bfloat16** bf16yarray = 0;
__nv_bfloat16** bf16yarray = 0;

// CHECK: hipblasComputeType_t blasComputeType;
Expand Down
18 changes: 9 additions & 9 deletions tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1864,23 +1864,23 @@ int main() {
cublasDataType_t R_16BF = CUDA_R_16BF;
cublasDataType_t C_16BF = CUDA_C_16BF;

// CHECK: hip_bfloat16* bf16A = nullptr;
// CHECK: __hip_bfloat16* bf16A = nullptr;
__nv_bfloat16* bf16A = nullptr;
// CHECK: hip_bfloat16** bf16Aarray = 0;
// CHECK: __hip_bfloat16** bf16Aarray = 0;
__nv_bfloat16** bf16Aarray = 0;
// CHECK: const hip_bfloat16** const bf16Aarray_const = const_cast<const hip_bfloat16**>(bf16Aarray);
// CHECK: const __hip_bfloat16** const bf16Aarray_const = const_cast<const __hip_bfloat16**>(bf16Aarray);
const __nv_bfloat16** const bf16Aarray_const = const_cast<const __nv_bfloat16**>(bf16Aarray);
// CHECK: hip_bfloat16* bf16X = nullptr;
// CHECK: __hip_bfloat16* bf16X = nullptr;
__nv_bfloat16* bf16X = nullptr;
// CHECK: hip_bfloat16** bf16xarray = 0;
// CHECK: __hip_bfloat16** bf16xarray = 0;
__nv_bfloat16** bf16xarray = 0;
// CHECK: const hip_bfloat16** const bf16xarray_const = const_cast<const hip_bfloat16**>(bf16xarray);
// CHECK: const __hip_bfloat16** const bf16xarray_const = const_cast<const __hip_bfloat16**>(bf16xarray);
const __nv_bfloat16** const bf16xarray_const = const_cast<const __nv_bfloat16**>(bf16xarray);
// CHECK: hip_bfloat16* bf16Y = nullptr;
// CHECK: __hip_bfloat16* bf16Y = nullptr;
__nv_bfloat16* bf16Y = nullptr;
// CHECK: hip_bfloat16** bf16yarray = 0;
// CHECK: __hip_bfloat16** bf16yarray = 0;
__nv_bfloat16** bf16yarray = 0;
// CHECK: const hip_bfloat16** const bf16yarray_const = const_cast<const hip_bfloat16**>(bf16yarray);
// CHECK: const __hip_bfloat16** const bf16yarray_const = const_cast<const __hip_bfloat16**>(bf16yarray);
const __nv_bfloat16** const bf16yarray_const = const_cast<const __nv_bfloat16**>(bf16yarray);

// CHECK: hipblasComputeType_t blasComputeType;
Expand Down
8 changes: 7 additions & 1 deletion tests/unit_tests/synthetic/libraries/cudevice2hipdevice.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,16 @@ int main() {
__half_raw hrx = { 0 };
__half2_raw h2rx = { 0, 0 };

#if CUDA_VERSION >= 11080
#if CUDA_VERSION >= 11000
// CHECK: __hip_bfloat16 bf16 = { 0 };
__nv_bfloat16 bf16 = { 0 };

// CHECK: __hip_bfloat16_raw bf16r = { 0 };
__nv_bfloat16_raw bf16r = { 0 };

// CHECK: __hip_bfloat162 bf162 = { 0, 0 };
__nv_bfloat162 bf162 = { 0, 0 };

// CHECK: __hip_bfloat162_raw bf162r = { 0, 0 };
__nv_bfloat162_raw bf162r = { 0, 0 };
#endif
Expand Down

0 comments on commit c66e133

Please sign in to comment.