Skip to content

Commit

Permalink
[HIPIFY][ROCm#1769][fp16] Support for fp16 math - Part 2 - Functions
Browse files Browse the repository at this point in the history
+ Updated synthetic tests, the regenerated `hipify-perl`, and `Device` `CUDA2HIP` docs accordingly
  • Loading branch information
emankov committed Nov 29, 2024
1 parent 1681e5d commit 53b4c87
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 32 deletions.
32 changes: 16 additions & 16 deletions bin/hipify-perl
Original file line number Diff line number Diff line change
Expand Up @@ -6152,6 +6152,9 @@ sub simpleSubstitutions {
subst("__assertfail", "__assertfail", "device_function");
subst("__ballot", "__ballot", "device_function");
subst("__ballot_sync", "__ballot_sync", "device_function");
subst("__bfloat1622float2", "__bfloat1622float2", "device_function");
subst("__bfloat162bfloat162", "__bfloat162bfloat162", "device_function");
subst("__bfloat162float", "__bfloat162float", "device_function");
subst("__brev", "__brev", "device_function");
subst("__brevll", "__brevll", "device_function");
subst("__byte_perm", "__byte_perm", "device_function");
Expand All @@ -6161,6 +6164,7 @@ sub simpleSubstitutions {
subst("__dadd_rn", "__dadd_rn", "device_function");
subst("__ddiv_rn", "__ddiv_rn", "device_function");
subst("__dmul_rn", "__dmul_rn", "device_function");
subst("__double2bfloat16", "__double2bfloat16", "device_function");
subst("__double2float_rd", "__double2float_rd", "device_function");
subst("__double2float_rn", "__double2float_rn", "device_function");
subst("__double2float_ru", "__double2float_ru", "device_function");
Expand Down Expand Up @@ -6195,6 +6199,7 @@ sub simpleSubstitutions {
subst("__ffs", "__ffs", "device_function");
subst("__ffsll", "__ffsll", "device_function");
subst("__float22half2_rn", "__float22half2_rn", "device_function");
subst("__float2bfloat16", "__float2bfloat16", "device_function");
subst("__float2half", "__float2half", "device_function");
subst("__float2half2_rn", "__float2half2_rn", "device_function");
subst("__float2half_rd", "__float2half_rd", "device_function");
Expand Down Expand Up @@ -6297,9 +6302,11 @@ sub simpleSubstitutions {
subst("__hgt2", "__hgt2", "device_function");
subst("__hgtu", "__hgtu", "device_function");
subst("__hgtu2", "__hgtu2", "device_function");
subst("__high2bfloat16", "__high2bfloat16", "device_function");
subst("__high2float", "__high2float", "device_function");
subst("__high2half", "__high2half", "device_function");
subst("__high2half2", "__high2half2", "device_function");
subst("__highs2bfloat162", "__highs2bfloat162", "device_function");
subst("__highs2half2", "__highs2half2", "device_function");
subst("__hiloint2double", "__hiloint2double", "device_function");
subst("__hisinf", "__hisinf", "device_function");
Expand Down Expand Up @@ -6365,6 +6372,7 @@ sub simpleSubstitutions {
subst("__low2half", "__low2half", "device_function");
subst("__low2half2", "__low2half2", "device_function");
subst("__lowhigh2highlow", "__lowhigh2highlow", "device_function");
subst("__lows2bfloat162", "__lows2bfloat162", "device_function");
subst("__lows2half2", "__lows2half2", "device_function");
subst("__match_all_sync", "__match_all_sync", "device_function");
subst("__match_any_sync", "__match_any_sync", "device_function");
Expand Down Expand Up @@ -9237,6 +9245,7 @@ sub countSupportedDeviceFunctions {
"__match_any_sync",
"__match_all_sync",
"__lows2half2",
"__lows2bfloat162",
"__lowhigh2highlow",
"__low2half2",
"__low2half",
Expand Down Expand Up @@ -9302,9 +9311,11 @@ sub countSupportedDeviceFunctions {
"__hisinf",
"__hiloint2double",
"__highs2half2",
"__highs2bfloat162",
"__high2half2",
"__high2half",
"__high2float",
"__high2bfloat16",
"__hgtu2",
"__hgtu",
"__hgt2",
Expand Down Expand Up @@ -9407,6 +9418,7 @@ sub countSupportedDeviceFunctions {
"__float2half_rd",
"__float2half2_rn",
"__float2half",
"__float2bfloat16",
"__float22half2_rn",
"__ffsll",
"__ffs",
Expand Down Expand Up @@ -9441,6 +9453,7 @@ sub countSupportedDeviceFunctions {
"__double2float_ru",
"__double2float_rn",
"__double2float_rd",
"__double2bfloat16",
"__dmul_rn",
"__ddiv_rn",
"__dadd_rn",
Expand All @@ -9450,6 +9463,9 @@ sub countSupportedDeviceFunctions {
"__byte_perm",
"__brevll",
"__brev",
"__bfloat162float",
"__bfloat162bfloat162",
"__bfloat1622float2",
"__ballot_sync",
"__ballot",
"__assertfail",
Expand Down Expand Up @@ -9610,7 +9626,6 @@ sub warnUnsupportedDeviceFunctions {
"__pm2",
"__pm1",
"__pm0",
"__lows2bfloat162",
"__low2bfloat162",
"__low2bfloat16",
"__ll2bfloat16_rz",
Expand Down Expand Up @@ -9643,9 +9658,7 @@ sub warnUnsupportedDeviceFunctions {
"__hlt2_mask",
"__hleu2_mask",
"__hle2_mask",
"__highs2bfloat162",
"__high2bfloat162",
"__high2bfloat16",
"__hgtu2_mask",
"__hgt2_mask",
"__hgeu2_mask",
Expand Down Expand Up @@ -9684,7 +9697,6 @@ sub warnUnsupportedDeviceFunctions {
"__float2bfloat16_rn",
"__float2bfloat16_rd",
"__float2bfloat162_rn",
"__float2bfloat16",
"__float22bfloat162_rn",
"__finitel",
"__finitef",
Expand All @@ -9705,7 +9717,6 @@ sub warnUnsupportedDeviceFunctions {
"__drcp_ru",
"__drcp_rd",
"__double2half",
"__double2bfloat16",
"__dmul_rz",
"__dmul_ru",
"__dmul_rd",
Expand Down Expand Up @@ -9743,10 +9754,7 @@ sub warnUnsupportedDeviceFunctions {
"__bfloat162int_ru",
"__bfloat162int_rn",
"__bfloat162int_rd",
"__bfloat162float",
"__bfloat162char_rz",
"__bfloat162bfloat162",
"__bfloat1622float2",
"_Pow_int"
)
{
Expand Down Expand Up @@ -11073,7 +11081,6 @@ sub warnUnsupportedFunctions {
"__pm2",
"__pm1",
"__pm0",
"__lows2bfloat162",
"__low2bfloat162",
"__low2bfloat16",
"__ll2bfloat16_rz",
Expand Down Expand Up @@ -11106,9 +11113,7 @@ sub warnUnsupportedFunctions {
"__hlt2_mask",
"__hleu2_mask",
"__hle2_mask",
"__highs2bfloat162",
"__high2bfloat162",
"__high2bfloat16",
"__hgtu2_mask",
"__hgt2_mask",
"__hgeu2_mask",
Expand Down Expand Up @@ -11147,7 +11152,6 @@ sub warnUnsupportedFunctions {
"__float2bfloat16_rn",
"__float2bfloat16_rd",
"__float2bfloat162_rn",
"__float2bfloat16",
"__float22bfloat162_rn",
"__finitel",
"__finitef",
Expand All @@ -11168,7 +11172,6 @@ sub warnUnsupportedFunctions {
"__drcp_ru",
"__drcp_rd",
"__double2half",
"__double2bfloat16",
"__dmul_rz",
"__dmul_ru",
"__dmul_rd",
Expand Down Expand Up @@ -11206,10 +11209,7 @@ sub warnUnsupportedFunctions {
"__bfloat162int_ru",
"__bfloat162int_rn",
"__bfloat162int_rd",
"__bfloat162float",
"__bfloat162char_rz",
"__bfloat162bfloat162",
"__bfloat1622float2",
"__CUB_LP64__",
"_Pow_int",
"_CUB_ASM_PTR_SIZE_",
Expand Down
16 changes: 8 additions & 8 deletions docs/tables/CUDA_Device_API_supported_by_HIP.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
|`__assertfail`| | | | |`__assertfail`|1.9.0| | | | |
|`__ballot`| | | | |`__ballot`|1.6.0| | | | |
|`__ballot_sync`|9.0| | | |`__ballot_sync`|6.2.0| | | | |
|`__bfloat1622float2`|11.0| | | | | | | | | |
|`__bfloat162bfloat162`|11.0| | | | | | | | | |
|`__bfloat1622float2`|11.0| | | |`__bfloat1622float2`|5.7.0| | | | |
|`__bfloat162bfloat162`|11.0| | | |`__bfloat162bfloat162`|5.7.0| | | | |
|`__bfloat162char_rz`|12.2| | | | | | | | | |
|`__bfloat162float`|11.0| | | | | | | | | |
|`__bfloat162float`|11.0| | | |`__bfloat162float`|5.7.0| | | | |
|`__bfloat162int_rd`|11.0| | | | | | | | | |
|`__bfloat162int_rn`|11.0| | | | | | | | | |
|`__bfloat162int_ru`|11.0| | | | | | | | | |
Expand Down Expand Up @@ -64,7 +64,7 @@
|`__dmul_rn`| | | | |`__dmul_rn`|1.6.0| | | | |
|`__dmul_ru`| | | | | | | | | | |
|`__dmul_rz`| | | | | | | | | | |
|`__double2bfloat16`|11.0| | | | | | | | | |
|`__double2bfloat16`|11.0| | | |`__double2bfloat16`|5.7.0| | | | |
|`__double2float_rd`| | | | |`__double2float_rd`|1.6.0| | | | |
|`__double2float_rn`| | | | |`__double2float_rn`|1.6.0| | | | |
|`__double2float_ru`| | | | |`__double2float_ru`|1.6.0| | | | |
Expand Down Expand Up @@ -119,7 +119,7 @@
|`__finitel`| | | | | | | | | | |
|`__float22bfloat162_rn`|11.0| | | | | | | | | |
|`__float22half2_rn`| | | | |`__float22half2_rn`|1.6.0| | | | |
|`__float2bfloat16`|11.0| | | | | | | | | |
|`__float2bfloat16`|11.0| | | |`__float2bfloat16`|5.7.0| | | | |
|`__float2bfloat162_rn`|11.0| | | | | | | | | |
|`__float2bfloat16_rd`|11.0| | | | | | | | | |
|`__float2bfloat16_rn`|11.0| | | | | | | | | |
Expand Down Expand Up @@ -260,12 +260,12 @@
|`__hgtu`| | | | |`__hgtu`|1.9.0| | | | |
|`__hgtu2`| | | | |`__hgtu2`|1.9.0| | | | |
|`__hgtu2_mask`|12.0| | | | | | | | | |
|`__high2bfloat16`|11.0| | | | | | | | | |
|`__high2bfloat16`|11.0| | | |`__high2bfloat16`|5.7.0| | | | |
|`__high2bfloat162`|11.0| | | | | | | | | |
|`__high2float`| | | | |`__high2float`|1.6.0| | | | |
|`__high2half`| | | | |`__high2half`|1.6.0| | | | |
|`__high2half2`| | | | |`__high2half2`|1.6.0| | | | |
|`__highs2bfloat162`|11.0| | | | | | | | | |
|`__highs2bfloat162`|11.0| | | |`__highs2bfloat162`|5.7.0| | | | |
|`__highs2half2`| | | | |`__highs2half2`|1.6.0| | | | |
|`__hiloint2double`| | | | |`__hiloint2double`|1.6.0| | | | |
|`__hisinf`| | | | |`__hisinf`|1.6.0| | | | |
Expand Down Expand Up @@ -363,7 +363,7 @@
|`__low2half`| | | | |`__low2half`|1.6.0| | | | |
|`__low2half2`| | | | |`__low2half2`|1.6.0| | | | |
|`__lowhigh2highlow`| | | | |`__lowhigh2highlow`|1.6.0| | | | |
|`__lows2bfloat162`|11.0| | | | | | | | | |
|`__lows2bfloat162`|11.0| | | |`__lows2bfloat162`|5.7.0| | | | |
|`__lows2half2`| | | | |`__lows2half2`|1.6.0| | | | |
|`__match_all_sync`|9.0| | | |`__match_all_sync`|6.2.0| | | | |
|`__match_any_sync`|9.0| | | |`__match_any_sync`|6.2.0| | | | |
Expand Down
24 changes: 16 additions & 8 deletions src/CUDA2HIP_Device_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -714,16 +714,16 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DEVICE_FUNCTION_MAP {
{"__half2char_rz", {"__half2char_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__half2uchar_rz", {"__half2uchar_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
// bfp16 functions
{"__double2bfloat16", {"__double2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__float2bfloat16", {"__float2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__double2bfloat16", {"__double2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__float2bfloat16", {"__float2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__float2bfloat16_rn", {"__float2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__float2bfloat16_rz", {"__float2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__float2bfloat16_rd", {"__float2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__float2bfloat16_ru", {"__float2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__bfloat162float", {"__bfloat162float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__bfloat162float", {"__bfloat162float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__float2bfloat162_rn", {"__float2bfloat162_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__floats2bfloat162_rn", {"__floats2bfloat162_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__bfloat1622float2", {"__bfloat1622float2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__bfloat1622float2", {"__bfloat1622float2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__bfloat162int_rn", {"__bfloat162int_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__bfloat162int_rz", {"__bfloat162int_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__bfloat162int_rd", {"__bfloat162int_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
Expand Down Expand Up @@ -772,10 +772,10 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DEVICE_FUNCTION_MAP {
{"__ll2bfloat16_rz", {"__ll2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__ll2bfloat16_rd", {"__ll2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__ll2bfloat16_ru", {"__ll2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__bfloat162bfloat162", {"__bfloat162bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__lows2bfloat162", {"__lows2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__highs2bfloat162", {"__highs2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__high2bfloat16", {"__high2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__bfloat162bfloat162", {"__bfloat162bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__lows2bfloat162", {"__lows2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__highs2bfloat162", {"__highs2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__high2bfloat16", {"__high2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__low2bfloat16", {"__low2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__halves2bfloat162", {"__halves2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__low2bfloat162", {"__low2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
Expand Down Expand Up @@ -1508,6 +1508,14 @@ const std::map<llvm::StringRef, hipAPIversions> HIP_DEVICE_FUNCTION_VER_MAP {
{"__hip_cvt_bfloat16raw2_to_fp8x2", {HIP_6020, HIP_0, HIP_0 }},
{"__hip_cvt_fp8_to_halfraw", {HIP_6020, HIP_0, HIP_0 }},
{"__hip_cvt_fp8x2_to_halfraw2", {HIP_6020, HIP_0, HIP_0 }},
{"__double2bfloat16", {HIP_5070, HIP_0, HIP_0 }},
{"__float2bfloat16", {HIP_5070, HIP_0, HIP_0 }},
{"__bfloat162float", {HIP_5070, HIP_0, HIP_0 }},
{"__bfloat1622float2", {HIP_5070, HIP_0, HIP_0 }},
{"__bfloat162bfloat162", {HIP_5070, HIP_0, HIP_0 }},
{"__lows2bfloat162", {HIP_5070, HIP_0, HIP_0 }},
{"__highs2bfloat162", {HIP_5070, HIP_0, HIP_0 }},
{"__high2bfloat16", {HIP_5070, HIP_0, HIP_0 }},
};

const std::map<unsigned int, llvm::StringRef> CUDA_DEVICE_FUNCTION_API_SECTION_MAP {
Expand Down
44 changes: 44 additions & 0 deletions tests/unit_tests/synthetic/libraries/cudevice2hipdevice.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
int main() {
printf("24. CUDA Device API to HIP Device API synthetic test\n");

double da = 0.0f;
double dx = 0.0f;
float fa = 0.0f;
float fx = 0.0f;
double2 d2 = { 0.0f, 0.0f };
float2 f2 = { 0.0f, 0.0f };
Expand All @@ -27,9 +29,51 @@ int main() {

// CHECK: __hip_bfloat162 bf162 = { 0, 0 };
__nv_bfloat162 bf162 = { 0, 0 };
__nv_bfloat162 bf162a = { 0, 0 };
__nv_bfloat162 bf162b = { 0, 0 };

// CHECK: __hip_bfloat162_raw bf162r = { 0, 0 };
__nv_bfloat162_raw bf162r = { 0, 0 };

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __double2bfloat16(const double a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __double2bfloat16(const double a)
// CHECK: bf16 = __double2bfloat16(da);
bf16 = __double2bfloat16(da);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __float2bfloat16(const float a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __float2bfloat16(float f);
// CHECK: bf16 = __float2bfloat16(fa);
bf16 = __float2bfloat16(fa);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ float __bfloat162float(const __nv_bfloat16 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ float __bfloat162float(__hip_bfloat16 a);
// CHECK: bf16 = __bfloat162float(fa);
bf16 = __bfloat162float(fa);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ float2 __bfloat1622float2(const __nv_bfloat162 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ float2 __bfloat1622float2(const __hip_bfloat162 a);
// CHECK: f2 = __bfloat1622float2(bf162);
f2 = __bfloat1622float2(bf162);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __bfloat162bfloat162(const __nv_bfloat16 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __bfloat162bfloat162(const __hip_bfloat16 a);
// CHECK: bf162 = __bfloat162bfloat162(bf16);
bf162 = __bfloat162bfloat162(bf16);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __lows2bfloat162(const __nv_bfloat162 a, const __nv_bfloat162 b);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __lows2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b);
// CHECK: bf162 = __lows2bfloat162(bf162a, bf162b);
bf162 = __lows2bfloat162(bf162a, bf162b);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __highs2bfloat162(const __nv_bfloat162 a, const __nv_bfloat162 b);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __highs2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b);
// CHECK: bf162 = __highs2bfloat162(bf162a, bf162b);
bf162 = __highs2bfloat162(bf162a, bf162b);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __high2bfloat16(const __nv_bfloat162 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __high2bfloat16(const __hip_bfloat162 a);
// CHECK: bf16 = __high2bfloat16(bf162a);
bf16 = __high2bfloat16(bf162a);
#endif

#if CUDA_VERSION >= 11080
Expand Down

0 comments on commit 53b4c87

Please sign in to comment.