diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 0ceea395b81d2..f4b4f992d5d9c 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4847,9 +4847,9 @@ def fno_sycl_dead_args_optimization : Flag<["-"], "fno-sycl-dead-args-optimizati Group, Flags<[NoArgumentUnused, CoreOption]>, HelpText<"Disables " "elimination of DPC++ dead kernel arguments">; def fsycl_device_lib_EQ : CommaJoined<["-"], "fsycl-device-lib=">, Group, Flags<[NoXarchOption, CoreOption]>, - Values<"libc, libm-fp32, libm-fp64, all">, HelpText<"Control inclusion of " + Values<"libc, libm-fp32, libm-fp64, libimf-fp32, libimf-fp64, all">, HelpText<"Control inclusion of " "device libraries into device binary linkage. Valid arguments " - "are libc, libm-fp32, libm-fp64, all">; + "are libc, libm-fp32, libm-fp64, libimf-fp32, libimf-fp64, all">; def fno_sycl_device_lib_EQ : CommaJoined<["-"], "fno-sycl-device-lib=">, Group, Flags<[NoXarchOption, CoreOption]>, Values<"libc, libm-fp32, libm-fp64, all">, HelpText<"Control exclusion of " "device libraries from device binary linkage. Valid arguments " diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 88b94ef474e2c..35d94364e5fcf 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4714,10 +4714,9 @@ class OffloadingActionBuilder final { int NumOfDeviceLibLinked = 0; // Currently, all SYCL device libraries will be linked by default. Linkage // of "internal" libraries cannot be affected via -fno-sycl-device-lib. - llvm::StringMap devicelib_link_info = {{"libc", true}, - {"libm-fp32", true}, - {"libm-fp64", true}, - {"internal", true}}; + llvm::StringMap devicelib_link_info = { + {"libc", true}, {"libm-fp32", true}, {"libm-fp64", true}, + {"libimf-fp32", true}, {"libimf-fp64", true}, {"internal", true}}; if (Arg *A = Args.getLastArg(options::OPT_fsycl_device_lib_EQ, options::OPT_fno_sycl_device_lib_EQ)) { if (A->getValues().size() == 0) @@ -4764,6 +4763,8 @@ class OffloadingActionBuilder final { #if defined(_WIN32) {"libsycl-msvc-math", "libm-fp32"}, #endif + {"libsycl-imf", "libimf-fp32"}, + {"libsycl-imf-fp64", "libimf-fp64"} }; // For AOT compilation, we need to link sycl_device_fallback_libs as // default too. @@ -4773,7 +4774,9 @@ class OffloadingActionBuilder final { {"libsycl-fallback-complex", "libm-fp32"}, {"libsycl-fallback-complex-fp64", "libm-fp64"}, {"libsycl-fallback-cmath", "libm-fp32"}, - {"libsycl-fallback-cmath-fp64", "libm-fp64"}}; + {"libsycl-fallback-cmath-fp64", "libm-fp64"}, + {"libsycl-fallback-imf", "libimf-fp32"}, + {"libsycl-fallback-imf-fp64", "libimf-fp64"}}; // ITT annotation libraries are linked in separately whenever the device // code instrumentation is enabled. const SYCLDeviceLibsList sycl_device_annotation_libs = { diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 390c637a0aff6..59dfa268dc38e 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -714,6 +714,7 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (Args.hasArg(options::OPT_fsycl) && !Args.hasArg(options::OPT_nolibsycl)) { CmdArgs.push_back("-lsycl"); + CmdArgs.push_back("-lsycl-devicelib-host"); // Use of -fintelfpga implies -lOpenCL. // FIXME: Adjust to use plugin interface when available. if (Args.hasArg(options::OPT_fintelfpga)) diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp index 54ebdac2ded42..bb6d1db23e614 100644 --- a/clang/lib/Driver/ToolChains/MSVC.cpp +++ b/clang/lib/Driver/ToolChains/MSVC.cpp @@ -139,6 +139,7 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-defaultlib:sycld.lib"); else CmdArgs.push_back("-defaultlib:sycl.lib"); + CmdArgs.push_back("-defaultlib:sycl-devicelib-host.lib"); } for (const auto *A : Args.filtered(options::OPT_foffload_static_lib_EQ)) diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 246bd51acb694..ae83f2a4da222 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -130,15 +130,15 @@ void SYCL::constructLLVMForeachCommand(Compilation &C, const JobAction &JA, // The list should match pre-built SYCL device library files located in // compiler package. Once we add or remove any SYCL device library files, // the list should be updated accordingly. - static llvm::SmallVector SYCLDeviceLibList { "crt", "cmath", "cmath-fp64", "complex", "complex-fp64", #if defined(_WIN32) "msvc-math", #endif - "itt-compiler-wrappers", "itt-stubs", "itt-user-wrappers", - "fallback-cassert", "fallback-cstring", "fallback-cmath", - "fallback-cmath-fp64", "fallback-complex", "fallback-complex-fp64" + "imf", "imf-fp64", "itt-compiler-wrappers", "itt-stubs", + "itt-user-wrappers", "fallback-cassert", "fallback-cstring", + "fallback-cmath", "fallback-cmath-fp64", "fallback-complex", + "fallback-complex-fp64", "fallback-imf", "fallback-imf-fp64" }; const char *SYCL::Linker::constructLLVMLinkCommand( diff --git a/clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-fallback-imf-fp64.obj b/clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-fallback-imf-fp64.obj new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-fallback-imf.obj b/clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-fallback-imf.obj new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-imf-fp64.obj b/clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-imf-fp64.obj new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-imf.obj b/clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-imf.obj new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-msvc-math.obj b/clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-msvc-math.obj new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-fallback-imf-fp64.o b/clang/test/Driver/Inputs/SYCL/lib/libsycl-fallback-imf-fp64.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-fallback-imf.o b/clang/test/Driver/Inputs/SYCL/lib/libsycl-fallback-imf.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-imf-fp64.o b/clang/test/Driver/Inputs/SYCL/lib/libsycl-imf-fp64.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-imf.o b/clang/test/Driver/Inputs/SYCL/lib/libsycl-imf.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/sycl-device-lib-win.cpp b/clang/test/Driver/sycl-device-lib-win.cpp index 329cbd6bc3afe..f3b3c76499f24 100644 --- a/clang/test/Driver/sycl-device-lib-win.cpp +++ b/clang/test/Driver/sycl-device-lib-win.cpp @@ -25,12 +25,15 @@ // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.obj" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.obj" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-msvc-math.obj" "-output={{.*}}libsycl-msvc-math-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.obj" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.obj" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.obj" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.obj" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex-fp64.obj" "-output={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.obj" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.obj" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" - +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.obj" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.obj" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" /// ########################################################################### /// test sycl fallback device libraries are not linked when using online link. // RUN: %clangxx -fsycl -fsycl-device-lib-online-link %s --sysroot=%S/Inputs/SYCL-windows -### 2>&1 \ @@ -57,13 +60,16 @@ // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.obj" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.obj" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-msvc-math.obj" "-output={{.*}}libsycl-msvc-math-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.obj" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.obj" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.obj" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.obj" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.obj" "-output={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex-fp64.obj" "-output={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.obj" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.obj" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" - +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.obj" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.obj" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" /// ########################################################################### /// test behavior of -fno-sycl-device-lib=libc @@ -74,20 +80,26 @@ // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.obj" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.obj" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-msvc-math.obj" "-output={{.*}}libsycl-msvc-math-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.obj" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.obj" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.obj" "-output={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex-fp64.obj" "-output={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.obj" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.obj" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" - +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.obj" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.obj" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" /// ########################################################################### /// test behavior of -fno-sycl-device-lib=libm-fp32,libm-fp64 // RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libm-fp32,libm-fp64 --sysroot=%S/Inputs/SYCL-windows -### 2>&1 \ // RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-crt.obj" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.obj" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.obj" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.obj" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.obj" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.o" "-unbundle" - +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.obj" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.obj" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" /// ########################################################################### /// test behavior of disabling all device libraries @@ -134,12 +146,16 @@ // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.obj" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.obj" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-msvc-math.obj" "-output={{.*}}libsycl-msvc-math-{{.*}}.o" "-unbundle" +// SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.obj" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.obj" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.obj" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.obj" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.obj" "-output={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex-fp64.obj" "-output={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.obj" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.obj" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" +// SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.obj" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.obj" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: llvm-link{{.*}} "-only-needed" "{{.*}}" "-o" "{{.*}}.bc" "--suppress-warnings" /// ########################################################################### diff --git a/clang/test/Driver/sycl-device-lib.cpp b/clang/test/Driver/sycl-device-lib.cpp index 5543ef085b31c..542db07142c20 100644 --- a/clang/test/Driver/sycl-device-lib.cpp +++ b/clang/test/Driver/sycl-device-lib.cpp @@ -24,13 +24,16 @@ // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.o" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.o" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.o" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.o" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.o" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.o" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.o" "-output={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex-fp64.o" "-output={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.o" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.o" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" - +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.o" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.o" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" /// ########################################################################### /// test sycl fallback device libraries are not linked by default // RUN: %clangxx -fsycl -fsycl-device-lib-online-link %s --sysroot=%S/Inputs/SYCL -### 2>&1 \ @@ -56,13 +59,16 @@ // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.o" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.o" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.o" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.o" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.o" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.o" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.o" "-output={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex-fp64.o" "-output={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.o" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.o" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" - +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.o" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.o" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" /// ########################################################################### /// test behavior of -fno-sycl-device-lib=libc @@ -72,20 +78,26 @@ // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.o" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.o" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.o" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.o" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.o" "-output={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex-fp64.o" "-output={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.o" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.o" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" - +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.o" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.o" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" /// ########################################################################### /// test behavior of -fno-sycl-device-lib=libm-fp32,libm-fp64 // RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libm-fp32,libm-fp64 --sysroot=%S/Inputs/SYCL -### 2>&1 \ // RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-crt.o" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.o" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.o" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.o" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.o" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.o" "-unbundle" - +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.o" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.o" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" /// ########################################################################### /// test behavior of disabling all device libraries @@ -131,12 +143,16 @@ // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.o" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.o" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.o" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.o" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.o" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.o" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.o" "-output={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex-fp64.o" "-output={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.o" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.o" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" +// SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.o" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.o" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-itt-user-wrappers.o" "-output={{.*}}libsycl-itt-user-wrappers-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-itt-compiler-wrappers.o" "-output={{.*}}libsycl-itt-compiler-wrappers-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-itt-stubs.o" "-output={{.*}}libsycl-itt-stubs-{{.*}}.o" "-unbundle" diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index bba16a7549179..569bf018b7f02 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -3,13 +3,21 @@ if (WIN32) set(lib-suffix obj) set(spv_binary_dir "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") set(install_dest_spv bin) + set(devicelib_host_static sycl-devicelib-host.lib) else() set(lib-suffix o) set(spv_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") set(install_dest_spv lib${LLVM_LIBDIR_SUFFIX}) + set(devicelib_host_static libsycl-devicelib-host.a) endif() set(install_dest_lib lib${LLVM_LIBDIR_SUFFIX}) + set(clang $) +set(llvm-link $) +set(llc $) +set(llvm-spirv $) +set(llvm-ar $) +set(clang-offload-bundler $) string(CONCAT sycl_targets_opt "-fsycl-targets=" @@ -87,6 +95,7 @@ endfunction() set(crt_obj_deps wrapper.h device.h spirv_vars.h sycl-compiler) set(complex_obj_deps device_complex.h device.h sycl-compiler) set(cmath_obj_deps device_math.h device.h sycl-compiler) +set(imf_obj_deps device_imf.hpp imf_half.hpp device.h sycl-compiler) set(itt_obj_deps device_itt.h spirv_vars.h device.h sycl-compiler) add_devicelib_obj(libsycl-itt-stubs SRC itt_stubs.cpp DEP ${itt_obj_deps}) @@ -98,6 +107,8 @@ add_devicelib_obj(libsycl-complex SRC complex_wrapper.cpp DEP ${complex_obj_deps add_devicelib_obj(libsycl-complex-fp64 SRC complex_wrapper_fp64.cpp DEP ${complex_obj_deps} ) add_devicelib_obj(libsycl-cmath SRC cmath_wrapper.cpp DEP ${cmath_obj_deps}) add_devicelib_obj(libsycl-cmath-fp64 SRC cmath_wrapper_fp64.cpp DEP ${cmath_obj_deps} ) +add_devicelib_obj(libsycl-imf SRC imf_wrapper.cpp DEP ${imf_obj_deps}) +add_devicelib_obj(libsycl-imf-fp64 SRC imf_wrapper_fp64.cpp DEP ${imf_obj_deps}) if(WIN32) add_devicelib_obj(libsycl-msvc-math SRC msvc_math.cpp DEP ${cmath_obj_deps}) endif() @@ -108,3 +119,163 @@ add_fallback_devicelib(libsycl-fallback-complex SRC fallback-complex.cpp DEP ${c add_fallback_devicelib(libsycl-fallback-complex-fp64 SRC fallback-complex-fp64.cpp DEP ${complex_obj_deps} ) add_fallback_devicelib(libsycl-fallback-cmath SRC fallback-cmath.cpp DEP ${cmath_obj_deps}) add_fallback_devicelib(libsycl-fallback-cmath-fp64 SRC fallback-cmath-fp64.cpp DEP ${cmath_obj_deps}) + +# imf fallback is different, we have many separate sources instead of single one including all functions. +# So, we need to combine all LLVM IR to a complete one and run llvm-spirv for it. +file(MAKE_DIRECTORY ${obj_binary_dir}/libdevice) +set(bc_binary_dir ${obj_binary_dir}/libdevice) + +set(fallback-imf-src imf_utils/float_convert.cpp + imf_utils/half_convert.cpp + imf_utils/integer_misc.cpp + imf/imf_inline_fp32.cpp) +set(fallback-imf-fp64-src imf_utils/double_convert.cpp + imf/imf_inline_fp64.cpp) +set(wrapper-imf-src imf_wrapper.cpp imf_wrapper_fp64.cpp) +set(imf-src ${wrapper-imf-src} ${fallback-imf-src} ${fallback-imf-fp64-src}) + +add_custom_target(imf-fallback-spv + COMMAND ${llvm-spirv} + ${bc_binary_dir}/fallback-imf-spir64-unknown-unknown.bc + -o ${spv_binary_dir}/libsycl-fallback-imf.spv) +add_custom_target(imf-fp64-fallback-spv + COMMAND ${llvm-spirv} + ${bc_binary_dir}/fallback-imf-fp64-spir64-unknown-unknown.bc + -o ${spv_binary_dir}/libsycl-fallback-imf-fp64.spv) + +add_dependencies(libsycldevice-spv imf-fallback-spv) +add_dependencies(libsycldevice-spv imf-fp64-fallback-spv) +install(FILES ${spv_binary_dir}/libsycl-fallback-imf.spv + ${spv_binary_dir}/libsycl-fallback-imf-fp64.spv + DESTINATION ${install_dest_spv} + COMPONENT libsycldevice) + +set(sycl_offload_targets sycl-spir64_x86_64-unknown-unknown + sycl-spir64_gen-unknown-unknown + sycl-spir64_fpga-unknown-unknown + sycl-spir64-unknown-unknow + host-x86_64-unknown-linux-gnu) + +string(REPLACE ";" "," sycl_offload_targets "${sycl_offload_targets}") +set(imf-offload-inputs ${bc_binary_dir}/fallback-imf-spir64-unknown-unknown.bc + ${bc_binary_dir}/fallback-imf-spir64_x86_64-unknown-unknown.bc + ${bc_binary_dir}/fallback-imf-spir64_gen-unknown-unknown.bc + ${bc_binary_dir}/fallback-imf-spir64_fpga-unknown-unknown.bc + ${bc_binary_dir}/fallback-imf-dummy-host.bc) +string(REPLACE ";" "," imf-offload-inputs "${imf-offload-inputs}") +add_custom_target(imf-fallback-obj + COMMAND ${clang-offload-bundler} -type=o -targets=${sycl_offload_targets} + -outputs=${obj_binary_dir}/libsycl-fallback-imf.${lib-suffix} + -inputs=${imf-offload-inputs}) + +add_dependencies(libsycldevice-obj imf-fallback-obj) + +set(imf-fp64-offload-inputs ${bc_binary_dir}/fallback-imf-fp64-spir64-unknown-unknown.bc + ${bc_binary_dir}/fallback-imf-fp64-spir64_x86_64-unknown-unknown.bc + ${bc_binary_dir}/fallback-imf-fp64-spir64_gen-unknown-unknown.bc + ${bc_binary_dir}/fallback-imf-fp64-spir64_fpga-unknown-unknown.bc + ${bc_binary_dir}/fallback-imf-fp64-dummy-host.bc) +string(REPLACE ";" "," imf-fp64-offload-inputs "${imf-fp64-offload-inputs}") +add_custom_target(imf-fp64-fallback-obj + COMMAND ${clang-offload-bundler} -type=o -targets=${sycl_offload_targets} + -outputs=${obj_binary_dir}/libsycl-fallback-imf-fp64.${lib-suffix} + -inputs=${imf-fp64-offload-inputs}) + +add_dependencies(libsycldevice-obj imf-fp64-fallback-obj) + +install(FILES ${obj_binary_dir}/libsycl-fallback-imf.${lib-suffix} + ${obj_binary_dir}/libsycl-fallback-imf-fp64.${lib-suffix} + DESTINATION ${install_dest_lib} + COMPONENT libsycldevice) + +function(add_devicelib_bc src_file sycl_target) + cmake_parse_arguments(BC "" "" "DEPS;DEPED" ${ARGN}) + get_filename_component(fn ${src_file} NAME_WE) + set(temp_bc_fn ${fn}-${sycl_target}.bc) + set(devicelib-bc ${bc_binary_dir}/${temp_bc_fn}) + if(sycl_target STREQUAL "dummy-host") + set(bc_compile_flags -c -emit-llvm) + elseif(sycl_target STREQUAL "host") + set(bc_compile_flags -c -emit-llvm -D__LIBDEVICE_HOST_IMPL__) + else() + set(bc_compile_flags -fsycl -fsycl-device-only -fsycl-targets=${sycl_target}) + endif() + if (WIN32) + list(APPEND bc_compile_flags -D_ALLOW_RUNTIME_LIBRARY_MISMATCH) + list(APPEND bc_compile_flags -D_ALLOW_ITERATOR_DEBUG_LEVEL_MISMATCH) + endif() + add_custom_command(OUTPUT ${devicelib-bc} + COMMAND ${clang} ${bc_compile_flags} + ${CMAKE_CURRENT_SOURCE_DIR}/${src_file} + -o ${devicelib-bc} + MAIN_DEPENDENCY ${src_file} + DEPENDS ${BC_DEPS} + VERBATIM) + add_custom_target(${temp_bc_fn} DEPENDS ${devicelib-bc}) + add_dependencies(${BC_DEPED} ${temp_bc_fn}) +endfunction() + +function(merge_devicelib_bc bc_filename sycl_target) + cmake_parse_arguments(FBC "" "" "SRCS;DEPS;DEPED" ${ARGN}) + set(bc_file_list) + foreach(src ${FBC_SRCS}) + get_filename_component(fn ${src} NAME_WE) + set(temp_bc_fn ${fn}-${sycl_target}.bc) + list(APPEND bc_file_list ${bc_binary_dir}/${temp_bc_fn}) + endforeach() + set(bc_target ${bc_filename}-${sycl_target}) + add_custom_target(${bc_target} + COMMAND ${llvm-link} ${bc_file_list} -o ${bc_binary_dir}/${bc_target}.bc + VERBATIM) + foreach(src ${FBC_SRCS}) + add_devicelib_bc(${src} ${sycl_target} + DEPS ${FBC_DEPS} + DEPED ${bc_target}) + endforeach() + foreach(deped ${FBC_DEPED}) + add_dependencies(${deped} ${bc_target}) + endforeach() +endfunction() + +set(imf_sycl_targets spir64_x86_64-unknown-unknown + spir64_gen-unknown-unknown + spir64_fpga-unknown-unknown + spir64-unknown-unknown + dummy-host) + +foreach(imf_target ${imf_sycl_targets}) + if(imf_target STREQUAL "spir64-unknown-unknown") + set(deped_list imf-fallback-obj imf-fallback-spv) + set(deped64_list imf-fp64-fallback-obj imf-fp64-fallback-spv) + else() + set(deped_list imf-fallback-obj) + set(deped64_list imf-fp64-fallback-obj) + endif() + merge_devicelib_bc(fallback-imf ${imf_target} + SRCS ${fallback-imf-src} + DEPS ${imf_obj_deps} + DEPED ${deped_list}) + + merge_devicelib_bc(fallback-imf-fp64 ${imf_target} + SRCS ${fallback-imf-fp64-src} + DEPS ${imf_obj_deps} + DEPED ${deped64_list}) +endforeach() + +add_custom_target(imf-host-obj + COMMAND ${llc} -filetype=obj + ${bc_binary_dir}/imf-host.bc -o + ${bc_binary_dir}/imf-host.${lib-suffix} + COMMAND ${llvm-ar} rcs ${obj_binary_dir}/${devicelib_host_static} ${bc_binary_dir}/imf-host.${lib-suffix} + VERBATIM) + +add_dependencies(libsycldevice imf-host-obj) + +install(FILES ${obj_binary_dir}/${devicelib_host_static} + DESTINATION ${install_dest_lib} + COMPONENT libsycldevice) + +merge_devicelib_bc(imf host + SRCS ${imf-src} + DEPS ${imf_obj_deps} + DEPED imf-host-obj) diff --git a/libdevice/device.h b/libdevice/device.h index e85a9bc91593e..710df0b25efcb 100644 --- a/libdevice/device.h +++ b/libdevice/device.h @@ -27,4 +27,13 @@ DEVICE_EXTERNAL EXTERN_C __attribute__((always_inline)) #endif // __SPIR__ +#if defined(__SPIR__) || defined(__LIBDEVICE_HOST_IMPL__) +#define __LIBDEVICE_IMF_ENABLED__ +#endif // __SPIR__ || __LIBDEVICE_HOST_IMPL__ + +#ifdef __LIBDEVICE_HOST_IMPL__ +#define DEVICE_EXTERN_C __attribute__((weak)) EXTERN_C +#define DEVICE_EXTERN_C_INLINE DEVICE_EXTERN_C __attribute__((always_inline)) +#endif // __LIBDEVICE_HOST_IMPL__ + #endif // __LIBDEVICE_DEVICE_H__ diff --git a/libdevice/device_imf.hpp b/libdevice/device_imf.hpp new file mode 100644 index 0000000000000..9466f2ce3970a --- /dev/null +++ b/libdevice/device_imf.hpp @@ -0,0 +1,536 @@ +//==------- device_imf.h - intel math devicelib functions declarations------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//==------------------------------------------------------------------------==// + +#ifndef __LIBDEVICE_DEVICE_IMF_H__ +#define __LIBDEVICE_DEVICE_IMF_H__ + +#include "device.h" +#include "imf_half.hpp" +#include +#include + +#ifdef __LIBDEVICE_IMF_ENABLED__ + +#if !defined(__SPIR__) && !defined(__LIBDEVICE_HOST_IMPL__) +#error \ + "__SPIR__ or __LIBDEVICE_HOST_IMPL__ must be defined to enable device imf functions!" +#endif + +// TODO: Bitcast is valid to trivially copyable object only but using +// is_trivially_copyable check will lead to compiling error in some +// pre-ci tests, the pre-ci environment used some legacy c++ std library +// which doesn't include this function. Need to report to pre-ci owners. +template +static inline constexpr To __bit_cast(const From &from) { + static_assert(sizeof(To) == sizeof(From), + "Can't do bit cast between 2 types with different sizes!"); + /*static_assert(std::is_trivially_copyable::value && + std::is_trivially_copyable::value, + "Can't do bit cast for type which is not trivially + copyable!");*/ + return __builtin_bit_cast(To, from); +} + +#if defined(__LIBDEVICE_HOST_IMPL__) +#include +#pragma STDC FENV_ACCESS ON + +template static inline Tp __double2Tp_host(double x, int rdMode) { + static_assert(std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value, + "Invalid type for double conversion!"); + + const int roundingOriginal = fegetround(); + fesetround(rdMode); + Tp res; + if (std::is_same::value) + res = static_cast(x); + else + res = static_cast(__builtin_nearbyint(x)); + fesetround(roundingOriginal); + return res; +} + +template static inline Tp __float2Tp_host(float x, int rdMode) { + static_assert(std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value, + "Invalid type for float conversion!"); + + const int roundingOriginal = fegetround(); + fesetround(rdMode); + Tp res = static_cast(__builtin_nearbyintf(x)); + fesetround(roundingOriginal); + return res; +} + +template +static inline TyFP __integral2FP_host(TyINT x, int rdMode) { + static_assert((std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value) && + (std::is_same::value || + std::is_same::value), + "Invalid integral to FP conversion!"); + const int roundingOriginal = fegetround(); + fesetround(rdMode); + TyFP res = static_cast(x); + fesetround(roundingOriginal); + return res; +} +#endif // __LIBDEVICE_HOST_IMPL__ + +static inline float __fclamp(float x, float y, float z) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_fmin(__builtin_fmax(x, y), z); +#elif defined(__SPIR__) + return __spirv_ocl_fclamp(x, y, z); +#endif +} + +// fma for float, double, half math, covers both device and host. +static inline float __fma(float x, float y, float z) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_fmaf(x, y, z); +#elif defined(__SPIR__) + return __spirv_ocl_fma(x, y, z); +#endif +} + +static inline double __fma(double x, double y, double z) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_fma(x, y, z); +#elif defined(__SPIR__) + return __spirv_ocl_fma(x, y, z); +#endif +} + +static inline _iml_half __fma(_iml_half x, _iml_half y, _iml_half z) { + _iml_half_internal x_i = x.get_internal(); + _iml_half_internal y_i = y.get_internal(); + _iml_half_internal z_i = z.get_internal(); +#if defined(__LIBDEVICE_HOST_IMPL__) + float tmp_x = __half2float(x_i); + float tmp_y = __half2float(y_i); + float tmp_z = __half2float(z_i); + float res = __builtin_fmaf(tmp_x, tmp_y, tmp_z); + return _iml_half(__float2half(res)); +#elif defined(__SPIR__) + return _iml_half(__spirv_ocl_fma(x_i, y_i, z_i)); +#endif +} + +// sqrt for float, double, half math, covers both device and host. +static inline float __sqrt(float x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_sqrtf(x); +#elif defined(__SPIR__) + return __spirv_ocl_sqrt(x); +#endif +} + +static inline double __sqrt(double x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_sqrt(x); +#elif defined(__SPIR__) + return __spirv_ocl_sqrt(x); +#endif +} + +static inline _iml_half __sqrt(_iml_half x) { + _iml_half_internal x_i = x.get_internal(); +#if defined(__LIBDEVICE_HOST_IMPL__) + float tmp_x = __half2float(x_i); + float res = __builtin_sqrtf(tmp_x); + return _iml_half(__float2half(res)); +#elif defined(__SPIR__) + return _iml_half(__spirv_ocl_sqrt(x_i)); +#endif +} + +// rsqrt for float, double, half math, covers both device and host. +static inline float __rsqrt(float x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return 1.f / __builtin_sqrtf(x); +#elif defined(__SPIR__) + return __spirv_ocl_rsqrt(x); +#endif +} + +static inline double __rsqrt(double x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return 1.0 / __builtin_sqrt(x); +#elif defined(__SPIR__) + return __spirv_ocl_rsqrt(x); +#endif +} + +static inline _iml_half __rsqrt(_iml_half x) { + _iml_half_internal x_i = x.get_internal(); +#if defined(__LIBDEVICE_HOST_IMPL__) + float tmp_x = __half2float(x_i); + float res = 1.f / __builtin_sqrtf(tmp_x); + return _iml_half(__float2half(res)); +#elif defined(__SPIR__) + return _iml_half(__spirv_ocl_rsqrt(x_i)); +#endif +} + +// fmin for float, double, half math, covers both device and host. +static inline float __fmin(float x, float y) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_fminf(x, y); +#elif defined(__SPIR__) + return __spirv_ocl_fmin(x, y); +#endif +} + +static inline double __fmin(double x, double y) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_fmin(x, y); +#elif defined(__SPIR__) + return __spirv_ocl_fmin(x, y); +#endif +} + +static inline _iml_half __fmin(_iml_half x, _iml_half y) { + _iml_half_internal x_i = x.get_internal(); + _iml_half_internal y_i = y.get_internal(); +#if defined(__LIBDEVICE_HOST_IMPL__) + float tmp_x = __half2float(x_i); + float tmp_y = __half2float(y_i); + float res = __builtin_fminf(tmp_x, tmp_y); + return _iml_half(__float2half(res)); +#elif defined(__SPIR__) + return _iml_half(__spirv_ocl_fmin(x_i, y_i)); +#endif +} + +// fmax for float, double, half math, covers both device and host. +static inline float __fmax(float x, float y) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_fmaxf(x, y); +#elif defined(__SPIR__) + return __spirv_ocl_fmax(x, y); +#endif +} + +static inline double __fmax(double x, double y) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_fmax(x, y); +#elif defined(__SPIR__) + return __spirv_ocl_fmax(x, y); +#endif +} + +static inline _iml_half __fmax(_iml_half x, _iml_half y) { + _iml_half_internal x_i = x.get_internal(); + _iml_half_internal y_i = y.get_internal(); +#if defined(__LIBDEVICE_HOST_IMPL__) + float tmp_x = __half2float(x_i); + float tmp_y = __half2float(y_i); + float res = __builtin_fmaxf(tmp_x, tmp_y); + return _iml_half(__float2half(res)); +#elif defined(__SPIR__) + return _iml_half(__spirv_ocl_fmax(x_i, y_i)); +#endif +} + +// copysign for float, double, half math, covers both device and host. +static inline float __copysign(float x, float y) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_copysignf(x, y); +#elif defined(__SPIR__) + return __spirv_ocl_copysign(x, y); +#endif +} + +static inline double __copysign(double x, double y) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_copysign(x, y); +#elif defined(__SPIR__) + return __spirv_ocl_copysign(x, y); +#endif +} + +static inline _iml_half __copysign(_iml_half x, _iml_half y) { + _iml_half_internal x_i = x.get_internal(); + _iml_half_internal y_i = y.get_internal(); +#if defined(__LIBDEVICE_HOST_IMPL__) + float tmp_x = __half2float(x_i); + float tmp_y = __half2float(y_i); + float res = __builtin_copysignf(tmp_x, tmp_y); + return _iml_half(__float2half(res)); +#elif defined(__SPIR__) + return _iml_half(__spirv_ocl_copysign(x_i, y_i)); +#endif +} + +// fabs for float, double, half math, covers both device and host. +static inline float __fabs(float x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_fabsf(x); +#elif defined(__SPIR__) + return __spirv_ocl_fabs(x); +#endif +} + +static inline double __fabs(double x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_fabs(x); +#elif defined(__SPIR__) + return __spirv_ocl_fabs(x); +#endif +} + +static inline _iml_half __fabs(_iml_half x) { + _iml_half_internal x_i = x.get_internal(); +#if defined(__LIBDEVICE_HOST_IMPL__) + float tmp_x = __half2float(x_i); + float res = __builtin_fabsf(tmp_x); + return _iml_half(__float2half(res)); +#elif defined(__SPIR__) + return _iml_half(__spirv_ocl_fabs(x_i)); +#endif +} + +// rint for float, double, half math, covers both device and host. +static inline float __rint(float x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_rintf(x); +#elif defined(__SPIR__) + return __spirv_ocl_rint(x); +#endif +} + +static inline double __rint(double x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_rint(x); +#elif defined(__SPIR__) + return __spirv_ocl_rint(x); +#endif +} + +static inline _iml_half __rint(_iml_half x) { + _iml_half_internal x_i = x.get_internal(); +#if defined(__LIBDEVICE_HOST_IMPL__) + float tmp_x = __half2float(x_i); + float res = __builtin_rintf(tmp_x); + return _iml_half(__float2half(res)); +#elif defined(__SPIR__) + return _iml_half(__spirv_ocl_rint(x_i)); +#endif +} + +// floor for float, double, half math, covers both device and host. +static inline float __floor(float x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_floorf(x); +#elif defined(__SPIR__) + return __spirv_ocl_floor(x); +#endif +} + +static inline double __floor(double x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_floor(x); +#elif defined(__SPIR__) + return __spirv_ocl_floor(x); +#endif +} + +static inline _iml_half __floor(_iml_half x) { + _iml_half_internal x_i = x.get_internal(); +#if defined(__LIBDEVICE_HOST_IMPL__) + float tmp_x = __half2float(x_i); + float res = __builtin_floorf(tmp_x); + return _iml_half(__float2half(res)); +#elif defined(__SPIR__) + return _iml_half(__spirv_ocl_floor(x_i)); +#endif +} + +// ceil for float, double, half math, covers both device and host. +static inline float __ceil(float x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_ceilf(x); +#elif defined(__SPIR__) + return __spirv_ocl_ceil(x); +#endif +} + +static inline double __ceil(double x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_ceil(x); +#elif defined(__SPIR__) + return __spirv_ocl_ceil(x); +#endif +} + +static inline _iml_half __ceil(_iml_half x) { + _iml_half_internal x_i = x.get_internal(); +#if defined(__LIBDEVICE_HOST_IMPL__) + float tmp_x = __half2float(x_i); + float res = __builtin_ceilf(tmp_x); + return _iml_half(__float2half(res)); +#elif defined(__SPIR__) + return _iml_half(__spirv_ocl_ceil(x_i)); +#endif +} + +// trunc for float, double, half math, covers both device and host. +static inline float __trunc(float x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_truncf(x); +#elif defined(__SPIR__) + return __spirv_ocl_trunc(x); +#endif +} + +static inline double __trunc(double x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_trunc(x); +#elif defined(__SPIR__) + return __spirv_ocl_trunc(x); +#endif +} + +static inline _iml_half __trunc(_iml_half x) { + _iml_half_internal x_i = x.get_internal(); +#if defined(__LIBDEVICE_HOST_IMPL__) + float tmp_x = __half2float(x_i); + float res = __builtin_truncf(tmp_x); + return _iml_half(__float2half(res)); +#elif defined(__SPIR__) + return _iml_half(__spirv_ocl_trunc(x_i)); +#endif +} + +static inline int __clz(int x) { + if (x == 0) + return 32; + uint32_t xi32 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_clz(xi32); +#elif defined(__SPIR__) + return __spirv_ocl_clz(xi32); +#endif +} + +static inline int __clzll(long long int x) { + if (x == 0) + return 64; + uint64_t xi64 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_clzll(xi64); +#elif defined(__SPIR__) + return __spirv_ocl_clz(xi64); +#endif +} + +static inline int __popc(unsigned int x) { + uint32_t xui32 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_popcount(xui32); +#elif defined(__SPIR__) + return __spirv_ocl_popcount(xui32); +#endif +} + +static inline int __popcll(unsigned long long int x) { + uint64_t xui64 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_popcountll(xui64); +#elif defined(__SPIR__) + return __spirv_ocl_popcount(xui64); +#endif +} + +static inline unsigned int __abs(int x) { return x < 0 ? -x : x; } + +static inline unsigned long long int __abs(long long int x) { + return x < 0 ? -x : x; +} + +template +static inline Ty2 __get_bytes_by_index(Ty1 x, size_t idx) { + static_assert(!std::is_signed::value && !std::is_signed::value, + "__get_bytes_by_index can only accept unsigned value."); + static_assert(std::is_integral::value && std::is_integral::value, + "__get_bytes_by_index can only accept integral type."); + size_t bits_shift = idx * sizeof(Ty2) * 8; + Ty1 mask1 = static_cast(-1); + x >>= bits_shift; + x = x & mask1; + return static_cast(x); +} + +template +Ty1 __assemble_integral_value(Ty2 *x) { + static_assert(!std::is_signed::value && !std::is_signed::value, + "__assemble_integeral_value can only accept unsigned value."); + static_assert(std::is_integral::value && std::is_integral::value, + "__assemble_integeral_value can only accept integral value."); + static_assert(sizeof(Ty1) == N * sizeof(Ty2), + "size mismatch for __assemble_integeral_value"); + Ty1 res = 0; + for (size_t idx = 0; idx < N; ++idx) { + res <<= sizeof(Ty2) * 8; + res |= static_cast(x[N - 1 - idx]); + } + return res; +} + +template static inline Ty __uhadd(Ty x, Ty y) { + static_assert(std::is_integral::value && !std::is_signed::value, + "__uhadd can only accept unsigned integral type."); +#if defined(__LIBDEVICE_HOST_IMPL__) + return (x >> 1) + (y >> 1) + ((x & y) & 0x1); +#elif defined(__SPIR__) + return __spirv_ocl_u_hadd(x, y); +#endif +} + +template static inline Ty __shadd(Ty x, Ty y) { + static_assert(std::is_integral::value && std::is_signed::value, + "__shadd can only accept signed integral type."); +#if defined(__LIBDEVICE_HOST_IMPL__) + return (x >> 1) + (y >> 1) + ((x & y) & 0x1); +#elif defined(__SPIR__) + return __spirv_ocl_s_hadd(x, y); +#endif +} + +template static inline Ty __urhadd(Ty x, Ty y) { + static_assert(std::is_integral::value && !std::is_signed::value, + "__urhadd can only accept unsigned integral type."); +#if defined(__LIBDEVICE_HOST_IMPL__) + return (x >> 1) + (y >> 1) + ((x | y) & 0x1); +#elif defined(__SPIR__) + return __spirv_ocl_u_rhadd(x, y); +#endif +} + +template static inline Ty __srhadd(Ty x, Ty y) { + static_assert(std::is_integral::value && std::is_signed::value, + "__srhadd can only accept signed integral type."); +#if defined(__LIBDEVICE_HOST_IMPL__) + return (x >> 1) + (y >> 1) + ((x | y) & 0x1); +#elif defined(__SPIR__) + return __spirv_ocl_s_rhadd(x, y); +#endif +} +#endif // __LIBDEVICE_IMF_ENABLED__ +#endif // __LIBDEVICE_DEVICE_IMF_H__ diff --git a/libdevice/device_math.h b/libdevice/device_math.h index 14a2634c56641..eb86baba931ae 100644 --- a/libdevice/device_math.h +++ b/libdevice/device_math.h @@ -288,5 +288,6 @@ float __devicelib_scalbnf(float x, int n); DEVICE_EXTERN_C double __devicelib_scalbn(double x, int exp); + #endif // __SPIR__ #endif // __LIBDEVICE_DEVICE_MATH_H__ diff --git a/libdevice/imf/imf_inline_fp32.cpp b/libdevice/imf/imf_inline_fp32.cpp new file mode 100644 index 0000000000000..0273a1c4902ed --- /dev/null +++ b/libdevice/imf/imf_inline_fp32.cpp @@ -0,0 +1,141 @@ +//==----- imf_inline_fp32.cpp - some fp32 trivial intel math functions -----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "../device_imf.hpp" +#ifdef __LIBDEVICE_IMF_ENABLED__ + +DEVICE_EXTERN_C_INLINE _iml_half_internal __devicelib_imf_fmaf16( + _iml_half_internal a, _iml_half_internal b, _iml_half_internal c) { + _iml_half ha(a), hb(b), hc(c); + return __fma(ha, hb, hc).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_floorf16(_iml_half_internal x) { + _iml_half hx(x); + return __floor(hx).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_ceilf16(_iml_half_internal x) { + _iml_half hx(x); + return __ceil(hx).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_truncf16(_iml_half_internal x) { + _iml_half hx(x); + return __trunc(hx).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_rintf16(_iml_half_internal x) { + _iml_half hx(x); + return __rint(hx).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_nearbyintf16(_iml_half_internal x) { + _iml_half hx(x); + return __rint(hx).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_sqrtf16(_iml_half_internal a) { + _iml_half ha(a); + return __sqrt(ha).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_rsqrtf16(_iml_half_internal a) { + _iml_half ha(a); + return __rsqrt(ha).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_invf16(_iml_half_internal a) { + _iml_half ha(a), h1(1.0f); + return (h1 / ha).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_fmaxf16(_iml_half_internal a, _iml_half_internal b) { + _iml_half ha(a), hb(b); + return __fmax(ha, hb).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_fminf16(_iml_half_internal a, _iml_half_internal b) { + _iml_half ha(a), hb(b); + return __fmin(ha, hb).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_fabsf16(_iml_half_internal x) { + _iml_half hx(x); + return __fabs(hx).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_copysignf16(_iml_half_internal a, _iml_half_internal b) { + _iml_half ha(a), hb(b); + return __copysign(ha, hb).get_internal(); +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_saturatef(float x) { return __fclamp(x, .0f, 1.f); } + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_fmaf(float a, float b, float c) { + return __fma(a, b, c); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_floorf(float x) { + return __floor(x); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_ceilf(float x) { + return __ceil(x); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_truncf(float x) { + return __trunc(x); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_rintf(float x) { + return __rint(x); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_nearbyintf(float x) { + return __rint(x); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_sqrtf(float a) { + return __sqrt(a); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_rsqrtf(float a) { + return __rsqrt(a); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_invf(float a) { return 1.0f / a; } + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_fmaxf(float a, float b) { + return __fmax(a, b); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_fminf(float a, float b) { + return __fmin(a, b); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_fabsf(float x) { + return __fabs(x); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_copysignf(float a, float b) { + return __copysign(a, b); +} +#endif /*__LIBDEVICE_IMF_ENABLED__*/ diff --git a/libdevice/imf/imf_inline_fp64.cpp b/libdevice/imf/imf_inline_fp64.cpp new file mode 100644 index 0000000000000..a0c566ff1d2ea --- /dev/null +++ b/libdevice/imf/imf_inline_fp64.cpp @@ -0,0 +1,61 @@ +//==----- imf_inline_fp64.cpp - some fp64 trivial intel math functions -----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "../device_imf.hpp" +#ifdef __LIBDEVICE_IMF_ENABLED__ + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_fma(double a, double b, + double c) { + return __fma(a, b, c); +} + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_floor(double x) { + return __floor(x); +} + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_ceil(double x) { + return __ceil(x); +} + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_trunc(double x) { + return __trunc(x); +} + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_rint(double x) { + return __rint(x); +} + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_nearbyint(double x) { + return __rint(x); +} + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_sqrt(double a) { + return __sqrt(a); +} + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_rsqrt(double a) { + return 1.0 / __sqrt(a); +} + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_inv(double a) { return 1.0 / a; } + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_fmax(double a, double b) { + return __fmax(a, b); +} + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_fmin(double a, double b) { + return __fmin(a, b); +} + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_fabs(double x) { + return __fabs(x); +} + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_copysign(double a, double b) { + return __copysign(a, b); +} +#endif /*__LIBDEVICE_IMF_ENABLED__*/ diff --git a/libdevice/imf_half.hpp b/libdevice/imf_half.hpp new file mode 100644 index 0000000000000..af8a32a389ae9 --- /dev/null +++ b/libdevice/imf_half.hpp @@ -0,0 +1,261 @@ +//==--------- imf_half.hpp - half emulation for intel math functions -------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//==------------------------------------------------------------------------==// + +#ifndef __LIBDEVICE_HALF_EMUL_H__ +#define __LIBDEVICE_HALF_EMUL_H__ + +#include "device.h" +#include + +#ifdef __LIBDEVICE_IMF_ENABLED__ + +#if defined(__SPIR__) +typedef _Float16 _iml_half_internal; +#else +typedef uint16_t _iml_half_internal; +#endif + +// TODO: need to support float to half conversion with different +// rounding mode. +static inline _iml_half_internal __float2half(float x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + uint32_t fp32_bits = __builtin_bit_cast(uint32_t, x); + + const uint16_t sign = (fp32_bits & 0x80000000) >> 16; + const uint32_t frac32 = fp32_bits & 0x7fffff; + const uint8_t exp32 = (fp32_bits & 0x7f800000) >> 23; + const int16_t exp32_diff = exp32 - 127; + + // initialize to 0, covers the case for 0 and small numbers + uint16_t exp16 = 0, frac16 = 0; + + if (__builtin_expect(exp32_diff > 15, 0)) { + // Infinity and big numbers convert to infinity + exp16 = 0x1f; + } else if (__builtin_expect(exp32_diff > -14, 0)) { + // normal range for half type + exp16 = exp32_diff + 15; + // convert 23-bit mantissa to 10-bit mantissa. + frac16 = frac32 >> 13; + if (frac32 >> 12 & 0x01) + frac16 += 1; + } else if (__builtin_expect(exp32_diff > -24, 0)) { + // subnormals + frac16 = (frac32 | (uint32_t(1) << 23)) >> (-exp32_diff - 1); + } + + if (__builtin_expect(exp32 == 0xff && frac32 != 0, 0)) { + // corner case: FP32 is NaN + exp16 = 0x1F; + frac16 = 0x200; + } + + // Compose the final FP16 binary + uint16_t res = 0; + res |= sign; + res |= exp16 << 10; + res += frac16; // Add the carry bit from operation Frac16 += 1; + + return res; +#elif defined(__SPIR__) + return __spirv_FConvert_Rhalf_rte(x); +#endif +} + +static inline float __half2float(_iml_half_internal x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + // Extract the sign from the bits. It is 1 if the sign is negative + const uint32_t sign = static_cast(x & 0x8000) << 16; + // Extract the exponent from the bits + const uint8_t exp16 = (x & 0x7c00) >> 10; + // Extract the fraction from the bits + uint16_t frac16 = x & 0x3ff; + + uint32_t exp32 = 0; + if (__builtin_expect(exp16 == 0x1f, 0)) { + exp32 = 0xff; + } else if (__builtin_expect(exp16 == 0, 0)) { + exp32 = 0; + } else { + exp32 = static_cast(exp16) + 112; + } + // corner case: subnormal -> normal + // The denormal number of FP16 can be represented by FP32, therefore we need + // to recover the exponent and recalculate the fration. + if (__builtin_expect(exp16 == 0 && frac16 != 0, 0)) { + uint8_t offset = 0; + do { + ++offset; + frac16 <<= 1; + } while ((frac16 & 0x400) != 0x400); + // mask the 9th bit + frac16 &= 0x3ff; + exp32 = 113 - offset; + } + + uint32_t frac32 = frac16 << 13; + + uint32_t fp32_bits = 0; + fp32_bits |= sign; + fp32_bits |= (exp32 << 23); + fp32_bits |= frac32; + return __builtin_bit_cast(float, fp32_bits); +#elif defined(__SPIR__) + return __spirv_FConvert_Rfloat_rte(x); +#endif +} + +class _iml_half { +public: + _iml_half(_iml_half_internal h) : _half_internal(h) {} + _iml_half() = default; + _iml_half(const _iml_half &) = default; + _iml_half &operator=(const _iml_half &rh) = default; + _iml_half &operator=(float fval) { + _half_internal = __float2half(fval); + return *this; + } + _iml_half(float fval) : _half_internal(__float2half(fval)) {} + explicit operator float() const { return __half2float(_half_internal); } + + _iml_half_internal get_internal() const { return _half_internal; } + bool operator==(const _iml_half &rh) { + return _half_internal == rh._half_internal; + } + bool operator!=(const _iml_half &rh) { return !operator==(rh); } +#if (__SPIR__) + _iml_half &operator+=(const _iml_half &rh) { + _half_internal += rh._half_internal; + return *this; + } + _iml_half &operator-=(const _iml_half &rh) { + _half_internal -= rh._half_internal; + return *this; + } + _iml_half &operator*=(const _iml_half &rh) { + _half_internal *= rh._half_internal; + return *this; + } + _iml_half &operator/=(const _iml_half &rh) { + _half_internal /= rh._half_internal; + return *this; + } + _iml_half &operator++() { + _half_internal += 1; + return *this; + } + _iml_half operator++(int) { + _iml_half res(*this); + operator++(); + return res; + } + _iml_half &operator--() { + _half_internal -= 1; + return *this; + } + _iml_half operator--(int) { + _iml_half res(*this); + operator--(); + return res; + } + + _iml_half operator-() { + _iml_half res(-_half_internal); + return res; + } + + bool operator<(const _iml_half &rh) { + return _half_internal < rh._half_internal; + } + bool operator>(const _iml_half &rh) { + return _half_internal > rh._half_internal; + } +#else + _iml_half &operator+=(const _iml_half &rh) { + *this = (operator float() + static_cast(rh)); + return *this; + } + _iml_half &operator-=(const _iml_half &rh) { + *this = (operator float() - static_cast(rh)); + return *this; + } + _iml_half &operator*=(const _iml_half &rh) { + *this = (operator float() * static_cast(rh)); + return *this; + } + _iml_half &operator/=(const _iml_half &rh) { + *this = (operator float() / static_cast(rh)); + return *this; + } + _iml_half &operator++() { + *this = operator float() + 1; + return *this; + } + _iml_half operator++(int) { + _iml_half res(*this); + operator++(); + return res; + } + _iml_half &operator--() { + *this = operator float() - 1; + return *this; + } + _iml_half operator--(int) { + _iml_half res(*this); + operator--(); + return res; + } + + _iml_half operator-() { + _iml_half res(-operator float()); + return res; + } + + bool operator<(const _iml_half &rh) { + return operator float() < static_cast(rh); + } + bool operator>(const _iml_half &rh) { + return operator float() > static_cast(rh); + } +#endif + _iml_half operator+(const _iml_half &rh) { + _iml_half res(*this); + res += rh; + return res; + } + + _iml_half operator-(const _iml_half &rh) { + _iml_half res(*this); + res -= rh; + return res; + } + + _iml_half operator*(const _iml_half &rh) { + _iml_half res(*this); + res *= rh; + return res; + } + + _iml_half operator/(const _iml_half &rh) { + _iml_half res(*this); + res /= rh; + return res; + } + bool operator<=(const _iml_half &rh) { + return operator<(rh) || operator==(rh); + } + bool operator>=(const _iml_half &rh) { + return operator>(rh) || operator==(rh); + } + +private: + _iml_half_internal _half_internal; +}; + +#endif // __LIBDEVICE_IMF_ENABLED__ +#endif // __LIBDEVICE_HALF_EMUL_H__ diff --git a/libdevice/imf_utils/double_convert.cpp b/libdevice/imf_utils/double_convert.cpp new file mode 100644 index 0000000000000..6506f8a40e250 --- /dev/null +++ b/libdevice/imf_utils/double_convert.cpp @@ -0,0 +1,444 @@ +//==-- double_convert.cpp - fallback implementation of double to other type +// convert--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../device_imf.hpp" + +#ifdef __LIBDEVICE_IMF_ENABLED__ + +static inline float __double2float_rd(double x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __double2Tp_host(x, FE_DOWNWARD); +#elif defined(__SPIR__) + return __spirv_FConvert_Rfloat_rtn(x); +#endif +} + +static inline float __double2float_rn(double x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __double2Tp_host(x, FE_TONEAREST); +#elif defined(__SPIR__) + return __spirv_FConvert_Rfloat_rte(x); +#endif +} + +static inline float __double2float_ru(double x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __double2Tp_host(x, FE_UPWARD); +#elif defined(__SPIR__) + return __spirv_FConvert_Rfloat_rtp(x); +#endif +} + +static inline float __double2float_rz(double x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __double2Tp_host(x, FE_TOWARDZERO); +#elif defined(__SPIR__) + return __spirv_FConvert_Rfloat_rtz(x); +#endif +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_double2float_rd(double x) { return __double2float_rd(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_double2float_rn(double x) { return __double2float_rn(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_double2float_ru(double x) { return __double2float_ru(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_double2float_rz(double x) { return __double2float_rz(x); } + +static inline int __double2int_rd(double x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __double2Tp_host(x, FE_DOWNWARD); +#elif defined(__SPIR__) + return __spirv_ConvertFToS_Rint_rtn(x); +#endif +} + +static inline int __double2int_rn(double x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __double2Tp_host(x, FE_TONEAREST); +#elif defined(__SPIR__) + return __spirv_ConvertFToS_Rint_rte(x); +#endif +} + +static inline int __double2int_ru(double x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __double2Tp_host(x, FE_UPWARD); +#elif defined(__SPIR__) + return __spirv_ConvertFToS_Rint_rtp(x); +#endif +} + +static inline int __double2int_rz(double x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __double2Tp_host(x, FE_TOWARDZERO); +#elif defined(__SPIR__) + return __spirv_ConvertFToS_Rint_rtz(x); +#endif +} + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_double2int_rd(double x) { return __double2int_rd(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_double2int_rn(double x) { return __double2int_rn(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_double2int_ru(double x) { return __double2int_ru(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_double2int_rz(double x) { return __double2int_rz(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_double2hiint(double x) { + uint64_t tmp = __bit_cast(x); + tmp = tmp >> 32; + return static_cast(tmp); +} + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_double2loint(double x) { + uint64_t tmp = __bit_cast(x); + return static_cast(tmp); +} + +// __spirv_ConvertFToU_Ruint_rtn/e/p/z have different behaviors +// on CPU and GPU device when input value is negative. +static inline unsigned int __double2uint_rd(double x) { + if (x < 0) + return 0; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __double2Tp_host(x, FE_DOWNWARD); +#elif defined(__SPIR__) + return __spirv_ConvertFToU_Ruint_rtn(x); +#endif +} + +static inline unsigned int __double2uint_rn(double x) { + if (x < 0) + return 0; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __double2Tp_host(x, FE_TONEAREST); +#elif defined(__SPIR__) + return __spirv_ConvertFToU_Ruint_rte(x); +#endif +} + +static inline unsigned int __double2uint_ru(double x) { + if (x < 0) + return 0; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __double2Tp_host(x, FE_UPWARD); +#elif defined(__SPIR__) + return __spirv_ConvertFToU_Ruint_rtp(x); +#endif +} + +static inline unsigned int __double2uint_rz(double x) { + if (x < 0) + return 0; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __double2Tp_host(x, FE_TOWARDZERO); +#elif defined(__SPIR__) + return __spirv_ConvertFToU_Ruint_rtz(x); +#endif +} + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_double2uint_rd(double x) { + return __double2uint_rd(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_double2uint_rn(double x) { + return __double2uint_rn(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_double2uint_ru(double x) { + return __double2uint_ru(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_double2uint_rz(double x) { + return __double2uint_rz(x); +} + +static inline long long int __double2ll_rd(double x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __double2Tp_host(x, FE_DOWNWARD); +#elif defined(__SPIR__) + return __spirv_ConvertFToS_Rlong_rtn(x); +#endif +} + +static inline long long int __double2ll_rn(double x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __double2Tp_host(x, FE_TONEAREST); +#elif defined(__SPIR__) + return __spirv_ConvertFToS_Rlong_rte(x); +#endif +} + +static inline long long int __double2ll_ru(double x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __double2Tp_host(x, FE_UPWARD); +#elif defined(__SPIR__) + return __spirv_ConvertFToS_Rlong_rtp(x); +#endif +} + +static inline long long int __double2ll_rz(double x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __double2Tp_host(x, FE_TOWARDZERO); +#elif defined(__SPIR__) + return __spirv_ConvertFToS_Rlong_rtz(x); +#endif +} + +DEVICE_EXTERN_C_INLINE +long long int __devicelib_imf_double2ll_rd(double x) { + return __double2ll_rd(x); +} + +DEVICE_EXTERN_C_INLINE +long long int __devicelib_imf_double2ll_rn(double x) { + return __double2ll_rn(x); +} + +DEVICE_EXTERN_C_INLINE +long long int __devicelib_imf_double2ll_ru(double x) { + return __double2ll_ru(x); +} + +DEVICE_EXTERN_C_INLINE +long long int __devicelib_imf_double2ll_rz(double x) { + return __double2ll_rz(x); +} + +static inline unsigned long long int __double2ull_rd(double x) { + if (x < 0) + return 0; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __double2Tp_host(x, FE_DOWNWARD); +#elif defined(__SPIR__) + return __spirv_ConvertFToU_Rulong_rtn(x); +#endif +} + +static inline unsigned long long int __double2ull_rn(double x) { + if (x < 0) + return 0; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __double2Tp_host(x, FE_TONEAREST); +#elif defined(__SPIR__) + return __spirv_ConvertFToU_Rulong_rte(x); +#endif +} + +static inline unsigned long long int __double2ull_ru(double x) { + if (x < 0) + return 0; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __double2Tp_host(x, FE_UPWARD); +#elif defined(__SPIR__) + return __spirv_ConvertFToU_Rulong_rtp(x); +#endif +} + +static inline unsigned long long int __double2ull_rz(double x) { + if (x < 0) + return 0; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __double2Tp_host(x, FE_TOWARDZERO); +#elif defined(__SPIR__) + return __spirv_ConvertFToU_Rulong_rtz(x); +#endif +} + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_double2ull_rd(double x) { + return __double2ull_rd(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_double2ull_rn(double x) { + return __double2ull_rn(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_double2ull_ru(double x) { + return __double2ull_ru(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_double2ull_rz(double x) { + return __double2ull_rz(x); +} + +DEVICE_EXTERN_C_INLINE +long long int __devicelib_imf_double_as_longlong(double x) { + return __bit_cast(x); +} + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_hiloint2double(int hi, int lo) { + uint32_t hiu = __bit_cast(hi); + uint32_t lou = __bit_cast(lo); + uint64_t res_bits = static_cast(hiu); + res_bits = res_bits << 32; + res_bits = res_bits | static_cast(lou); + return __bit_cast(res_bits); +} + +static inline double __int2double_rn(int x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(x, FE_TONEAREST); +#elif defined(__SPIR__) + return __spirv_ConvertSToF_Rdouble(x); +#endif +} + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_int2double_rn(int x) { return __int2double_rn(x); } + +static inline double __ll2double_rd(long long int x) { + int64_t xi64 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(xi64, FE_DOWNWARD); +#elif defined(__SPIR__) + return __spirv_ConvertSToF_Rdouble_rtn(xi64); +#endif +} + +static inline double __ll2double_rn(long long int x) { + int64_t xi64 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(xi64, FE_TONEAREST); +#elif defined(__SPIR__) + return __spirv_ConvertSToF_Rdouble_rte(xi64); +#endif +} + +static inline double __ll2double_ru(long long int x) { + int64_t xi64 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(xi64, FE_UPWARD); +#elif defined(__SPIR__) + return __spirv_ConvertSToF_Rdouble_rtp(xi64); +#endif +} + +static inline double __ll2double_rz(long long int x) { + int64_t xi64 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(xi64, FE_TOWARDZERO); +#elif defined(__SPIR__) + return __spirv_ConvertSToF_Rdouble_rtz(xi64); +#endif +} + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ll2double_rd(long long int x) { + return __ll2double_rd(x); +} + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ll2double_rn(long long int x) { + return __ll2double_rn(x); +} + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ll2double_ru(long long int x) { + return __ll2double_ru(x); +} + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ll2double_rz(long long int x) { + return __ll2double_rz(x); +} + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_longlong_as_double(long long int x) { + return __bit_cast(x); +} + +static inline double __uint2double_rn(unsigned int x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(x, FE_TOWARDZERO); +#elif defined(__SPIR__) + return __spirv_ConvertUToF_Rdouble_rte(x); +#endif +} + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_uint2double_rn(unsigned int x) { + return __uint2double_rn(x); +} + +static inline double __ull2double_rd(unsigned long long int x) { + uint64_t xui64 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(xui64, FE_DOWNWARD); +#elif defined(__SPIR__) + return __spirv_ConvertUToF_Rdouble_rtn(xui64); +#endif +} + +static inline double __ull2double_rn(unsigned long long int x) { + uint64_t xui64 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(xui64, FE_TONEAREST); +#elif defined(__SPIR__) + return __spirv_ConvertUToF_Rdouble_rte(xui64); +#endif +} + +static inline double __ull2double_ru(unsigned long long int x) { + uint64_t xui64 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(xui64, FE_UPWARD); +#elif defined(__SPIR__) + return __spirv_ConvertUToF_Rdouble_rtp(xui64); +#endif +} + +static inline double __ull2double_rz(unsigned long long int x) { + uint64_t xui64 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(xui64, FE_TOWARDZERO); +#elif defined(__SPIR__) + return __spirv_ConvertUToF_Rdouble_rtz(xui64); +#endif +} + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ull2double_rd(unsigned long long int x) { + return __ull2double_rd(x); +} + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ull2double_rn(unsigned long long int x) { + return __ull2double_rn(x); +} + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ull2double_ru(unsigned long long int x) { + return __ull2double_ru(x); +} + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ull2double_rz(unsigned long long int x) { + return __ull2double_rz(x); +} +#endif // __LIBDEVICE_IMF_ENABLED__ diff --git a/libdevice/imf_utils/float_convert.cpp b/libdevice/imf_utils/float_convert.cpp new file mode 100644 index 0000000000000..d42749c15b0aa --- /dev/null +++ b/libdevice/imf_utils/float_convert.cpp @@ -0,0 +1,422 @@ +//==-- float_convert.cpp - fallback implementation of float to other type +// convert--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../device_imf.hpp" + +#ifdef __LIBDEVICE_IMF_ENABLED__ + +static inline int __float2int_rd(float x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __float2Tp_host(x, FE_DOWNWARD); +#elif defined(__SPIR__) + return __spirv_ConvertFToS_Rint_rtn(x); +#endif +} + +static inline int __float2int_rn(float x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __float2Tp_host(x, FE_TONEAREST); +#elif defined(__SPIR__) + return __spirv_ConvertFToS_Rint_rte(x); +#endif +} + +static inline int __float2int_ru(float x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __float2Tp_host(x, FE_UPWARD); +#elif defined(__SPIR__) + return __spirv_ConvertFToS_Rint_rtp(x); +#endif +} + +static inline int __float2int_rz(float x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __float2Tp_host(x, FE_TOWARDZERO); +#elif defined(__SPIR__) + return __spirv_ConvertFToS_Rint_rtz(x); +#endif +} + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_float2int_rd(float x) { return __float2int_rd(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_float2int_rn(float x) { return __float2int_rn(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_float2int_ru(float x) { return __float2int_ru(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_float2int_rz(float x) { return __float2int_rz(x); } + +static inline unsigned int __float2uint_rd(float x) { + if (x < 0) + return 0; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __float2Tp_host(x, FE_DOWNWARD); +#elif defined(__SPIR__) + return __spirv_ConvertFToU_Ruint_rtn(x); +#endif +} + +static inline unsigned int __float2uint_rn(float x) { + if (x < 0) + return 0; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __float2Tp_host(x, FE_TONEAREST); +#elif defined(__SPIR__) + return __spirv_ConvertFToU_Ruint_rte(x); +#endif +} + +static inline unsigned int __float2uint_ru(float x) { + if (x < 0) + return 0; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __float2Tp_host(x, FE_UPWARD); +#elif defined(__SPIR__) + return __spirv_ConvertFToU_Ruint_rtp(x); +#endif +} + +static inline unsigned int __float2uint_rz(float x) { + if (x < 0) + return 0; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __float2Tp_host(x, FE_TOWARDZERO); +#elif defined(__SPIR__) + return __spirv_ConvertFToU_Ruint_rtz(x); +#endif +} + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_float2uint_rd(float x) { + return __float2uint_rd(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_float2uint_rn(float x) { + return __float2uint_rn(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_float2uint_ru(float x) { + return __float2uint_ru(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_float2uint_rz(float x) { + return __float2uint_rz(x); +} + +static inline long long int __float2ll_rd(float x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __float2Tp_host(x, FE_DOWNWARD); +#elif defined(__SPIR__) + return __spirv_ConvertFToS_Rlong_rtn(x); +#endif +} + +static inline long long int __float2ll_rn(float x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __float2Tp_host(x, FE_TONEAREST); +#elif defined(__SPIR__) + return __spirv_ConvertFToS_Rlong_rte(x); +#endif +} + +static inline long long int __float2ll_ru(float x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __float2Tp_host(x, FE_UPWARD); +#elif defined(__SPIR__) + return __spirv_ConvertFToS_Rlong_rtp(x); +#endif +} + +static inline long long int __float2ll_rz(float x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __float2Tp_host(x, FE_TOWARDZERO); +#elif defined(__SPIR__) + return __spirv_ConvertFToS_Rlong_rtz(x); +#endif +} + +DEVICE_EXTERN_C_INLINE +long long int __devicelib_imf_float2ll_rd(float x) { return __float2ll_rd(x); } + +DEVICE_EXTERN_C_INLINE +long long int __devicelib_imf_float2ll_rn(float x) { return __float2ll_rn(x); } + +DEVICE_EXTERN_C_INLINE +long long int __devicelib_imf_float2ll_ru(float x) { return __float2ll_ru(x); } + +DEVICE_EXTERN_C_INLINE +long long int __devicelib_imf_float2ll_rz(float x) { return __float2ll_rz(x); } + +static inline unsigned long long int __float2ull_rd(float x) { + if (x < 0) + return 0; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __float2Tp_host(x, FE_DOWNWARD); +#elif defined(__SPIR__) + return __spirv_ConvertFToU_Rulong_rtn(x); +#endif +} + +static inline unsigned long long int __float2ull_rn(float x) { + if (x < 0) + return 0; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __float2Tp_host(x, FE_TONEAREST); +#elif defined(__SPIR__) + return __spirv_ConvertFToU_Rulong_rte(x); +#endif +} + +static inline unsigned long long int __float2ull_ru(float x) { + if (x < 0) + return 0; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __float2Tp_host(x, FE_UPWARD); +#elif defined(__SPIR__) + return __spirv_ConvertFToU_Rulong_rtp(x); +#endif +} + +static inline unsigned long long int __float2ull_rz(float x) { + if (x < 0) + return 0; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __float2Tp_host(x, FE_TOWARDZERO); +#elif defined(__SPIR__) + return __spirv_ConvertFToU_Rulong_rtz(x); +#endif +} + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_float2ull_rd(float x) { + return __float2ull_rd(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_float2ull_rn(float x) { + return __float2ull_rn(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_float2ull_ru(float x) { + return __float2ull_ru(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_float2ull_rz(float x) { + return __float2ull_rz(x); +} + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_float_as_int(float x) { return __bit_cast(x); } + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_float_as_uint(float x) { + return __bit_cast(x); +} + +static inline float __int2float_rd(int x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(x, FE_DOWNWARD); +#elif defined(__SPIR__) + return __spirv_ConvertSToF_Rfloat_rtn(x); +#endif +} + +static inline float __int2float_rn(int x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(x, FE_TONEAREST); +#elif defined(__SPIR__) + return __spirv_ConvertSToF_Rfloat_rte(x); +#endif +} + +static inline float __int2float_ru(int x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(x, FE_UPWARD); +#elif defined(__SPIR__) + return __spirv_ConvertSToF_Rfloat_rtp(x); +#endif +} + +static inline float __int2float_rz(int x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(x, FE_TOWARDZERO); +#elif defined(__SPIR__) + return __spirv_ConvertSToF_Rfloat_rtz(x); +#endif +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_int2float_rd(int x) { return __int2float_rd(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_int2float_rn(int x) { return __int2float_rn(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_int2float_ru(int x) { return __int2float_ru(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_int2float_rz(int x) { return __int2float_rz(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_int_as_float(int x) { return __bit_cast(x); } + +static inline float __ll2float_rd(long long int x) { + int64_t xi64 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(xi64, FE_DOWNWARD); +#elif defined(__SPIR__) + return __spirv_ConvertSToF_Rfloat_rtn(xi64); +#endif +} + +static inline float __ll2float_rn(long long int x) { + int64_t xi64 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(xi64, FE_TONEAREST); +#elif defined(__SPIR__) + return __spirv_ConvertSToF_Rfloat_rte(xi64); +#endif +} + +static inline float __ll2float_ru(long long int x) { + int64_t xi64 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(xi64, FE_UPWARD); +#elif defined(__SPIR__) + return __spirv_ConvertSToF_Rfloat_rtp(xi64); +#endif +} + +static inline float __ll2float_rz(long long int x) { + int64_t xi64 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(xi64, FE_TOWARDZERO); +#elif defined(__SPIR__) + return __spirv_ConvertSToF_Rfloat_rtz(xi64); +#endif +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_ll2float_rd(long long int x) { return __ll2float_rd(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_ll2float_rn(long long int x) { return __ll2float_rn(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_ll2float_ru(long long int x) { return __ll2float_ru(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_ll2float_rz(long long int x) { return __ll2float_rz(x); } + +static inline float __uint2float_rd(unsigned int x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(x, FE_DOWNWARD); +#elif defined(__SPIR__) + return __spirv_ConvertUToF_Rfloat_rtn(x); +#endif +} + +static inline float __uint2float_rn(unsigned int x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(x, FE_TONEAREST); +#elif defined(__SPIR__) + return __spirv_ConvertUToF_Rfloat_rte(x); +#endif +} + +static inline float __uint2float_ru(unsigned int x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(x, FE_UPWARD); +#elif defined(__SPIR__) + return __spirv_ConvertUToF_Rfloat_rtp(x); +#endif +} + +static inline float __uint2float_rz(unsigned int x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(x, FE_TOWARDZERO); +#elif defined(__SPIR__) + return __spirv_ConvertUToF_Rfloat_rtz(x); +#endif +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_uint2float_rd(unsigned int x) { + return __uint2float_rd(x); +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_uint2float_rn(unsigned int x) { + return __uint2float_rn(x); +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_uint2float_ru(unsigned int x) { + return __uint2float_ru(x); +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_uint2float_rz(unsigned int x) { + return __uint2float_rz(x); +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_uint_as_float(unsigned int x) { + return __bit_cast(x); +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_ull2float_rd(unsigned long long int x) { + uint64_t xui64 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(xui64, FE_DOWNWARD); +#elif defined(__SPIR__) + return __spirv_ConvertUToF_Rfloat_rtn(xui64); +#endif +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_ull2float_rn(unsigned long long int x) { + uint64_t xui64 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(xui64, FE_TONEAREST); +#elif defined(__SPIR__) + return __spirv_ConvertUToF_Rfloat_rte(xui64); +#endif +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_ull2float_ru(unsigned long long int x) { + uint64_t xui64 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(xui64, FE_UPWARD); +#elif defined(__SPIR__) + return __spirv_ConvertUToF_Rfloat_rtp(xui64); +#endif +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_ull2float_rz(unsigned long long int x) { + uint64_t xui64 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __integral2FP_host(xui64, FE_TOWARDZERO); +#elif defined(__SPIR__) + return __spirv_ConvertUToF_Rfloat_rtz(xui64); +#endif +} + +#endif // __LIBDEVICE_IMF_ENABLED__ diff --git a/libdevice/imf_utils/half_convert.cpp b/libdevice/imf_utils/half_convert.cpp new file mode 100644 index 0000000000000..25f3d2a010914 --- /dev/null +++ b/libdevice/imf_utils/half_convert.cpp @@ -0,0 +1,18 @@ +//==-- half_convert.cpp - fallback implementation of half to other type +// convert--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../device_imf.hpp" + +#ifdef __LIBDEVICE_IMF_ENABLED__ + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_half2float(_iml_half_internal x) { + return __half2float(x); +} +#endif // __LIBDEVICE_IMF_ENABLED__ diff --git a/libdevice/imf_utils/integer_misc.cpp b/libdevice/imf_utils/integer_misc.cpp new file mode 100644 index 0000000000000..800a42d69da5a --- /dev/null +++ b/libdevice/imf_utils/integer_misc.cpp @@ -0,0 +1,176 @@ +//==------ integer_misc.cpp - fallback implementation of a bunch of integer +// functions ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../device_imf.hpp" +#ifdef __LIBDEVICE_IMF_ENABLED__ + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_brev(unsigned int x) { + unsigned int res = 0; + size_t bit_count = 8 * sizeof(unsigned int); + for (size_t idx = 0; idx < bit_count - 1; ++idx) { + res |= x & 0x1; + res <<= 1; + x >>= 1; + } + res |= x & 0x1; + return res; +} + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_brevll(unsigned long long int x) { + unsigned long long int res = 0; + size_t bit_count = 8 * sizeof(unsigned long long int); + for (size_t idx = 0; idx < bit_count - 1; ++idx) { + res |= x & 0x1; + res <<= 1; + x >>= 1; + } + res |= x & 0x1; + return res; +} + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_clz(int x) { return __clz(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_clzll(long long int x) { return __clzll(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_popc(unsigned int x) { return __popc(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_popcll(unsigned long long int x) { return __popcll(x); } + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_sad(int x, int y, unsigned int z) { + return __abs(x - y) + z; +} + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_usad(unsigned int x, unsigned int y, + unsigned int z) { + long long int xll = x, yll = y; + return static_cast(__abs(xll - yll)) + z; +} + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_byte_perm(unsigned int x, unsigned int y, + unsigned int s) { + uint8_t buf[4] = { + 0, + }; + for (size_t idx = 0; idx < 4; ++idx) { + uint8_t select_idx = static_cast(s & 0x00000007); + if (select_idx < 4) + buf[idx] = __get_bytes_by_index(x, select_idx); + else + buf[idx] = __get_bytes_by_index(y, select_idx - 4); + s >>= 4; + } + return __assemble_integral_value(buf); +} + +template static inline int __do_imf_ffs(Ty x) { + static_assert(std::is_integral::value, + "ffs can only accept integral type."); + if (x == 0) + return 0; + size_t idx; + for (idx = 0; idx < sizeof(Ty) * 8; ++idx) { + if (0x1 == (0x1 & x)) + break; + x >>= 1; + } + return idx + 1; +} + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_ffs(int x) { return __do_imf_ffs(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_ffsll(long long int x) { return __do_imf_ffs(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_rhadd(int x, int y) { return __srhadd(x, y); } + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_uhadd(unsigned int x, unsigned int y) { + return __uhadd(x, y); +} + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_urhadd(unsigned int x, unsigned int y) { + return __urhadd(x, y); +} + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_mul24(int x, int y) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return x * y; +#elif defined(__SPIR__) + return __spirv_ocl_s_mul24(x, y); +#endif +} + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_umul24(unsigned int x, unsigned int y) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return x * y; +#elif defined(__SPIR__) + return __spirv_ocl_u_mul24(x, y); +#endif +} + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_mulhi(int x, int y) { +#if defined(__LIBDEVICE_HOST_IMPL__) + int64_t p = static_cast(x) * static_cast(y); + p >>= 32; + return static_cast(p); +#elif defined(__SPIR__) + return __spirv_ocl_s_mul_hi(x, y); +#endif +} + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_umulhi(unsigned int x, unsigned int y) { +#if defined(__LIBDEVICE_HOST_IMPL__) + uint64_t p = static_cast(x) * static_cast(y); + p >>= 32; + return static_cast(p); +#elif defined(__SPIR__) + return __spirv_ocl_u_mul_hi(x, y); +#endif +} + +DEVICE_EXTERN_C_INLINE +long long int __devicelib_imf_mul64hi(long long int x, long long int y) { +#if defined(__LIBDEVICE_HOST_IMPL__) + __int128_t p = static_cast<__int128_t>(x) * static_cast<__int128_t>(y); + p >>= 64; + return static_cast(p); +#elif defined(__SPIR__) + return __spirv_ocl_s_mul_hi(static_cast(x), static_cast(y)); +#endif +} + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_umul64hi(unsigned long long int x, + unsigned long long int y) { +#if defined(__LIBDEVICE_HOST_IMPL__) + __uint128_t p = static_cast<__uint128_t>(x) * static_cast<__uint128_t>(y); + p >>= 64; + return static_cast(p); +#elif defined(__SPIR__) + return __spirv_ocl_u_mul_hi(static_cast(x), + static_cast(y)); +#endif +} +#endif //__LIBDEVICE_IMF_ENABLED__ diff --git a/libdevice/imf_wrapper.cpp b/libdevice/imf_wrapper.cpp new file mode 100644 index 0000000000000..6197845aad440 --- /dev/null +++ b/libdevice/imf_wrapper.cpp @@ -0,0 +1,630 @@ +//==----- imf_wrapper.cpp - wrappers for intel math library functions ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "device_imf.hpp" + +#ifdef __LIBDEVICE_IMF_ENABLED__ + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_saturatef(float); + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_half2float(_iml_half_internal); + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_float2int_rd(float); + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_float2int_rn(float); + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_float2int_ru(float); + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_float2int_rz(float); + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_float2uint_rd(float); + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_float2uint_rn(float); + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_float2uint_ru(float); + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_float2uint_rz(float); + +DEVICE_EXTERN_C_INLINE +long long int __devicelib_imf_float2ll_rd(float); + +DEVICE_EXTERN_C_INLINE +long long int __devicelib_imf_float2ll_rn(float); + +DEVICE_EXTERN_C_INLINE +long long int __devicelib_imf_float2ll_ru(float); + +DEVICE_EXTERN_C_INLINE +long long int __devicelib_imf_float2ll_rz(float); + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_float2ull_rd(float); + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_float2ull_rn(float); + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_float2ull_ru(float); + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_float2ull_rz(float); + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_float_as_int(float); + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_float_as_uint(float); + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_int2float_rd(int); + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_int2float_rn(int); + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_int2float_ru(int); + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_int2float_rz(int); + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_int_as_float(int); + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_ll2float_rd(long long int); + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_ll2float_rn(long long int); + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_ll2float_ru(long long int); + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_ll2float_rz(long long int); + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_uint2float_rd(unsigned int); + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_uint2float_rn(unsigned int); + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_uint2float_ru(unsigned int); + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_uint2float_rz(unsigned int); + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_uint_as_float(unsigned int); + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_ull2float_rd(unsigned long long int); + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_ull2float_rn(unsigned long long int); + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_ull2float_ru(unsigned long long int); + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_ull2float_rz(unsigned long long int); + +DEVICE_EXTERN_C_INLINE +float __imf_saturatef(float x) { return __devicelib_imf_saturatef(x); } + +DEVICE_EXTERN_C_INLINE +int __imf_float2int_rd(float x) { return __devicelib_imf_float2int_rd(x); } + +DEVICE_EXTERN_C_INLINE +int __imf_float2int_rn(float x) { return __devicelib_imf_float2int_rn(x); } + +DEVICE_EXTERN_C_INLINE +int __imf_float2int_ru(float x) { return __devicelib_imf_float2int_ru(x); } + +DEVICE_EXTERN_C_INLINE +int __imf_float2int_rz(float x) { return __devicelib_imf_float2int_rz(x); } + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_float2uint_rd(float x) { + return __devicelib_imf_float2uint_rd(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_float2uint_rn(float x) { + return __devicelib_imf_float2uint_rn(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_float2uint_ru(float x) { + return __devicelib_imf_float2uint_ru(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_float2uint_rz(float x) { + return __devicelib_imf_float2uint_rz(x); +} + +DEVICE_EXTERN_C_INLINE +long long int __imf_float2ll_rd(float x) { + return __devicelib_imf_float2ll_rd(x); +} + +DEVICE_EXTERN_C_INLINE +long long int __imf_float2ll_rn(float x) { + return __devicelib_imf_float2ll_rn(x); +} + +DEVICE_EXTERN_C_INLINE +long long int __imf_float2ll_ru(float x) { + return __devicelib_imf_float2ll_ru(x); +} + +DEVICE_EXTERN_C_INLINE +long long int __imf_float2ll_rz(float x) { + return __devicelib_imf_float2ll_rz(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned long long int __imf_float2ull_rd(float x) { + return __devicelib_imf_float2ull_rd(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned long long int __imf_float2ull_rn(float x) { + return __devicelib_imf_float2ull_rn(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned long long int __imf_float2ull_ru(float x) { + return __devicelib_imf_float2ull_ru(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned long long int __imf_float2ull_rz(float x) { + return __devicelib_imf_float2ull_rz(x); +} + +DEVICE_EXTERN_C_INLINE +int __imf_float_as_int(float x) { return __devicelib_imf_float_as_int(x); } + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_float_as_uint(float x) { + return __devicelib_imf_float_as_uint(x); +} + +DEVICE_EXTERN_C_INLINE +float __imf_int2float_rd(int x) { return __devicelib_imf_int2float_rd(x); } + +DEVICE_EXTERN_C_INLINE +float __imf_int2float_rn(int x) { return __devicelib_imf_int2float_rn(x); } + +DEVICE_EXTERN_C_INLINE +float __imf_int2float_ru(int x) { return __devicelib_imf_int2float_ru(x); } + +DEVICE_EXTERN_C_INLINE +float __imf_int2float_rz(int x) { return __devicelib_imf_int2float_rz(x); } + +DEVICE_EXTERN_C_INLINE +float __imf_int_as_float(int x) { return __devicelib_imf_int_as_float(x); } + +DEVICE_EXTERN_C_INLINE +float __imf_ll2float_rd(long long int x) { + return __devicelib_imf_ll2float_rd(x); +} + +DEVICE_EXTERN_C_INLINE +float __imf_ll2float_rn(long long int x) { + return __devicelib_imf_ll2float_rn(x); +} + +DEVICE_EXTERN_C_INLINE +float __imf_ll2float_ru(long long int x) { + return __devicelib_imf_ll2float_ru(x); +} + +DEVICE_EXTERN_C_INLINE +float __imf_ll2float_rz(long long int x) { + return __devicelib_imf_ll2float_rz(x); +} + +DEVICE_EXTERN_C_INLINE +float __imf_uint2float_rd(unsigned int x) { + return __devicelib_imf_uint2float_rd(x); +} + +DEVICE_EXTERN_C_INLINE +float __imf_uint2float_rn(unsigned int x) { + return __devicelib_imf_uint2float_rn(x); +} + +DEVICE_EXTERN_C_INLINE +float __imf_uint2float_ru(unsigned int x) { + return __devicelib_imf_uint2float_ru(x); +} + +DEVICE_EXTERN_C_INLINE +float __imf_uint2float_rz(unsigned int x) { + return __devicelib_imf_uint2float_rz(x); +} + +DEVICE_EXTERN_C_INLINE +float __imf_uint_as_float(unsigned int x) { + return __devicelib_imf_uint_as_float(x); +} + +DEVICE_EXTERN_C_INLINE +float __imf_ull2float_rd(unsigned long long int x) { + return __devicelib_imf_ull2float_rd(x); +} + +DEVICE_EXTERN_C_INLINE +float __imf_ull2float_rn(unsigned long long int x) { + return __devicelib_imf_ull2float_rn(x); +} + +DEVICE_EXTERN_C_INLINE +float __imf_ull2float_ru(unsigned long long int x) { + return __devicelib_imf_ull2float_ru(x); +} + +DEVICE_EXTERN_C_INLINE +float __imf_ull2float_rz(unsigned long long int x) { + return __devicelib_imf_ull2float_rz(x); +} + +DEVICE_EXTERN_C_INLINE +float __imf_half2float(_iml_half_internal x) { + return __devicelib_imf_half2float(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_brev(unsigned int); + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_brevll(unsigned long long int); + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_brev(unsigned int x) { return __devicelib_imf_brev(x); } + +DEVICE_EXTERN_C_INLINE +unsigned long long int __imf_brevll(unsigned long long int x) { + return __devicelib_imf_brevll(x); +} + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_clz(int); + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_clzll(long long int); + +DEVICE_EXTERN_C_INLINE +int __imf_clz(int x) { return __devicelib_imf_clz(x); } + +DEVICE_EXTERN_C_INLINE +int __imf_clzll(long long int x) { return __devicelib_imf_clzll(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_popc(unsigned int); + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_popcll(unsigned long long int); + +DEVICE_EXTERN_C_INLINE +int __imf_popc(unsigned int x) { return __devicelib_imf_popc(x); } + +DEVICE_EXTERN_C_INLINE +int __imf_popcll(unsigned long long int x) { return __devicelib_imf_popcll(x); } + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_sad(int, int, unsigned int); + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_usad(unsigned int, unsigned int, unsigned int); + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_sad(int x, int y, unsigned int z) { + return __devicelib_imf_sad(x, y, z); +} + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_usad(unsigned int x, unsigned int y, unsigned int z) { + return __devicelib_imf_usad(x, y, z); +} + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_byte_perm(unsigned int, unsigned int, + unsigned int); + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_byte_perm(unsigned int x, unsigned int y, unsigned int s) { + return __devicelib_imf_byte_perm(x, y, s); +} + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_ffs(int); + +DEVICE_EXTERN_C_INLINE +int __imf_ffs(int x) { return __devicelib_imf_ffs(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_ffsll(long long int); + +DEVICE_EXTERN_C_INLINE +int __imf_ffsll(long long int x) { return __devicelib_imf_ffsll(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_rhadd(int, int); + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_uhadd(int, int); + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_urhadd(unsigned int, unsigned int); + +DEVICE_EXTERN_C_INLINE +int __imf_rhadd(int x, int y) { return __devicelib_imf_rhadd(x, y); } + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_uhadd(unsigned int x, unsigned int y) { + return __devicelib_imf_uhadd(x, y); +} + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_urhadd(unsigned int x, unsigned int y) { + return __devicelib_imf_urhadd(x, y); +} + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_mul24(int, int); + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_umul24(unsigned int, unsigned int); + +DEVICE_EXTERN_C_INLINE +int __imf_mul24(int x, int y) { return __devicelib_imf_mul24(x, y); } + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_umul24(unsigned int x, unsigned int y) { + return __devicelib_imf_umul24(x, y); +} + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_mulhi(int, int); + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_umulhi(unsigned int, unsigned int); + +DEVICE_EXTERN_C_INLINE +long long int __devicelib_imf_mul64hi(long long int, long long int); + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_umul64hi(unsigned long long int, + unsigned long long int); + +DEVICE_EXTERN_C_INLINE +long long int __imf_mul64hi(long long int x, long long int y) { + return __devicelib_imf_mul64hi(x, y); +} + +DEVICE_EXTERN_C_INLINE +unsigned long long int __imf_umul64hi(unsigned long long int x, + unsigned long long int y) { + return __devicelib_imf_umul64hi(x, y); +} + +DEVICE_EXTERN_C_INLINE +int __imf_mulhi(int x, int y) { return __devicelib_imf_mulhi(x, y); } + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_umulhi(unsigned int x, unsigned int y) { + return __devicelib_imf_umulhi(x, y); +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fmaf(float, float, float); + +DEVICE_EXTERN_C_INLINE +float __imf_fmaf(float x, float y, float z) { + return __devicelib_imf_fmaf(x, y, z); +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_floorf(float); + +DEVICE_EXTERN_C_INLINE +float __imf_floorf(float x) { return __devicelib_imf_floorf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_ceilf(float); + +DEVICE_EXTERN_C_INLINE +float __imf_ceilf(float x) { return __devicelib_imf_ceilf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_truncf(float); + +DEVICE_EXTERN_C_INLINE +float __imf_truncf(float x) { return __devicelib_imf_truncf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_rintf(float); + +DEVICE_EXTERN_C_INLINE +float __imf_rintf(float x) { return __devicelib_imf_rintf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_nearbyintf(float); + +DEVICE_EXTERN_C_INLINE +float __imf_nearbyintf(float x) { return __devicelib_imf_nearbyintf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_sqrtf(float); + +DEVICE_EXTERN_C_INLINE +float __imf_sqrtf(float x) { return __devicelib_imf_sqrtf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_rsqrtf(float); + +DEVICE_EXTERN_C_INLINE +float __imf_rsqrtf(float x) { return __devicelib_imf_rsqrtf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_invf(float); + +DEVICE_EXTERN_C_INLINE +float __imf_invf(float x) { return __devicelib_imf_invf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fabsf(float); + +DEVICE_EXTERN_C_INLINE +float __imf_fabsf(float x) { return __devicelib_imf_fabsf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fmaxf(float, float); + +DEVICE_EXTERN_C_INLINE +float __imf_fmaxf(float x, float y) { return __devicelib_imf_fmaxf(x, y); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fminf(float, float); + +DEVICE_EXTERN_C_INLINE +float __imf_fminf(float x, float y) { return __devicelib_imf_fminf(x, y); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_copysignf(float, float); + +DEVICE_EXTERN_C_INLINE +float __imf_copysignf(float x, float y) { + return __devicelib_imf_copysignf(x, y); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_fmaf16(_iml_half_internal, + _iml_half_internal, + _iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_fmaf16(_iml_half_internal x, _iml_half_internal y, + _iml_half_internal z) { + return __devicelib_imf_fmaf16(x, y, z); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_floorf16(_iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_floorf16(_iml_half_internal x) { + return __devicelib_imf_floorf16(x); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_ceilf16(_iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_ceilf16(_iml_half_internal x) { + return __devicelib_imf_ceilf16(x); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_truncf16(_iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_truncf16(_iml_half_internal x) { + return __devicelib_imf_truncf16(x); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_rintf16(_iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_rintf16(_iml_half_internal x) { + return __devicelib_imf_rintf16(x); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_nearbyintf16(_iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_nearbyintf16(_iml_half_internal x) { + return __devicelib_imf_nearbyintf16(x); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_sqrtf16(_iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_sqrtf16(_iml_half_internal x) { + return __devicelib_imf_sqrtf16(x); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_rsqrtf16(_iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_rsqrtf16(_iml_half_internal x) { + return __devicelib_imf_rsqrtf16(x); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_invf16(_iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_invf16(_iml_half_internal x) { + return __devicelib_imf_invf16(x); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_fabsf16(_iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_fabsf16(_iml_half_internal x) { + return __devicelib_imf_fabsf16(x); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_fmaxf16(_iml_half_internal, + _iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_fmaxf16(_iml_half_internal x, _iml_half_internal y) { + return __devicelib_imf_fmaxf16(x, y); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_fminf16(_iml_half_internal, + _iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_fminf16(_iml_half_internal x, _iml_half_internal y) { + return __devicelib_imf_fminf16(x, y); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_copysignf16(_iml_half_internal, + _iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_copysignf16(_iml_half_internal x, + _iml_half_internal y) { + return __devicelib_imf_copysignf16(x, y); +} +#endif // __LIBDEVICE_IMF_ENABLED__ diff --git a/libdevice/imf_wrapper_fp64.cpp b/libdevice/imf_wrapper_fp64.cpp new file mode 100644 index 0000000000000..eddba077a3f30 --- /dev/null +++ b/libdevice/imf_wrapper_fp64.cpp @@ -0,0 +1,364 @@ +//==----- imf_wrapper_fp64.cpp - wrappers for double precision intel math +// library functions ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "device_imf.hpp" + +#ifdef __LIBDEVICE_IMF_ENABLED__ + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_double2float_rd(double); + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_double2float_rn(double); + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_double2float_ru(double); + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_double2float_rz(double); + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_double2hiint(double); + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_double2int_rd(double); + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_double2int_rn(double); + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_double2int_ru(double); + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_double2int_rz(double); + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_double2uint_rd(double); + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_double2uint_rn(double); + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_double2uint_ru(double); + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_double2uint_rz(double); + +DEVICE_EXTERN_C_INLINE +long long int __devicelib_imf_double2ll_rd(double); + +DEVICE_EXTERN_C_INLINE +long long int __devicelib_imf_double2ll_rn(double); + +DEVICE_EXTERN_C_INLINE +long long int __devicelib_imf_double2ll_ru(double); + +DEVICE_EXTERN_C_INLINE +long long int __devicelib_imf_double2ll_rz(double); + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_double2loint(double); + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_double2ull_rd(double); + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_double2ull_rn(double); + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_double2ull_ru(double); + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_double2ull_rz(double); + +DEVICE_EXTERN_C_INLINE +long long int __devicelib_imf_double_as_longlong(double); + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_hiloint2double(int, int); + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_int2double_rn(int); + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ll2double_rd(long long int); + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ll2double_rn(long long int); + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ll2double_ru(long long int); + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ll2double_rz(long long int); + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_longlong_as_double(long long int); + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_uint2double_rn(unsigned int); + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ull2double_rd(unsigned long long int); + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ull2double_rn(unsigned long long int); + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ull2double_ru(unsigned long long int); + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ull2double_rz(unsigned long long int); + +DEVICE_EXTERN_C_INLINE +float __imf_double2float_rd(double x) { + return __devicelib_imf_double2float_rd(x); +} + +DEVICE_EXTERN_C_INLINE +float __imf_double2float_rn(double x) { + return __devicelib_imf_double2float_rn(x); +} + +DEVICE_EXTERN_C_INLINE +float __imf_double2float_ru(double x) { + return __devicelib_imf_double2float_ru(x); +} + +DEVICE_EXTERN_C_INLINE +float __imf_double2float_rz(double x) { + return __devicelib_imf_double2float_rz(x); +} + +DEVICE_EXTERN_C_INLINE +int __imf_double2int_rd(double x) { return __devicelib_imf_double2int_rd(x); } + +DEVICE_EXTERN_C_INLINE +int __imf_double2int_rn(double x) { return __devicelib_imf_double2int_rn(x); } + +DEVICE_EXTERN_C_INLINE +int __imf_double2int_ru(double x) { return __devicelib_imf_double2int_ru(x); } + +DEVICE_EXTERN_C_INLINE +int __imf_double2int_rz(double x) { return __devicelib_imf_double2int_rz(x); } + +// TODO: For __imf_double2hiint and __imf_double2loint, we assume underlying +// device is little-endian. We need to check if it is necessary to provide an +// endian independent implementation. +DEVICE_EXTERN_C_INLINE +int __imf_double2hiint(double x) { return __devicelib_imf_double2hiint(x); } + +DEVICE_EXTERN_C_INLINE +int __imf_double2loint(double x) { return __devicelib_imf_double2loint(x); } + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_double2uint_rd(double x) { + return __devicelib_imf_double2uint_rd(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_double2uint_rn(double x) { + return __devicelib_imf_double2uint_rn(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_double2uint_ru(double x) { + return __devicelib_imf_double2uint_ru(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_double2uint_rz(double x) { + return __devicelib_imf_double2uint_rz(x); +} + +DEVICE_EXTERN_C_INLINE +long long int __imf_double2ll_rd(double x) { + return __devicelib_imf_double2ll_rd(x); +} + +DEVICE_EXTERN_C_INLINE +long long int __imf_double2ll_rn(double x) { + return __devicelib_imf_double2ll_rn(x); +} + +DEVICE_EXTERN_C_INLINE +long long int __imf_double2ll_ru(double x) { + return __devicelib_imf_double2ll_ru(x); +} + +DEVICE_EXTERN_C_INLINE +long long int __imf_double2ll_rz(double x) { + return __devicelib_imf_double2ll_rz(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned long long int __imf_double2ull_rd(double x) { + return __devicelib_imf_double2ull_rd(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned long long int __imf_double2ull_rn(double x) { + return __devicelib_imf_double2ull_rn(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned long long int __imf_double2ull_ru(double x) { + return __devicelib_imf_double2ull_ru(x); +} + +DEVICE_EXTERN_C_INLINE +unsigned long long int __imf_double2ull_rz(double x) { + return __devicelib_imf_double2ull_rz(x); +} + +DEVICE_EXTERN_C_INLINE +long long int __imf_double_as_longlong(double x) { + return __devicelib_imf_double_as_longlong(x); +} + +DEVICE_EXTERN_C_INLINE +double __imf_hiloint2double(int hi, int lo) { + return __devicelib_imf_hiloint2double(hi, lo); +} + +DEVICE_EXTERN_C_INLINE +double __imf_int2double_rn(int x) { return __devicelib_imf_int2double_rn(x); } + +DEVICE_EXTERN_C_INLINE +double __imf_ll2double_rd(long long int x) { + return __devicelib_imf_ll2double_rd(x); +} + +DEVICE_EXTERN_C_INLINE +double __imf_ll2double_rn(long long int x) { + return __devicelib_imf_ll2double_rn(x); +} + +DEVICE_EXTERN_C_INLINE +double __imf_ll2double_ru(long long int x) { + return __devicelib_imf_ll2double_ru(x); +} + +DEVICE_EXTERN_C_INLINE +double __imf_ll2double_rz(long long int x) { + return __devicelib_imf_ll2double_rz(x); +} + +DEVICE_EXTERN_C_INLINE +double __imf_longlong_as_double(long long int x) { + return __devicelib_imf_longlong_as_double(x); +} + +DEVICE_EXTERN_C_INLINE +double __imf_uint2double_rn(unsigned int x) { + return __devicelib_imf_uint2double_rn(x); +} + +DEVICE_EXTERN_C_INLINE +double __imf_ull2double_rd(unsigned long long int x) { + return __devicelib_imf_ull2double_rd(x); +} + +DEVICE_EXTERN_C_INLINE +double __imf_ull2double_rn(unsigned long long int x) { + return __devicelib_imf_ull2double_rn(x); +} + +DEVICE_EXTERN_C_INLINE +double __imf_ull2double_ru(unsigned long long int x) { + return __devicelib_imf_ull2double_ru(x); +} + +DEVICE_EXTERN_C_INLINE +double __imf_ull2double_rz(unsigned long long int x) { + return __devicelib_imf_ull2double_rz(x); +} + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_fma(double, double, double); + +DEVICE_EXTERN_C_INLINE +double __imf_fma(double x, double y, double z) { + return __devicelib_imf_fma(x, y, z); +} + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_floor(double); + +DEVICE_EXTERN_C_INLINE +double __imf_floor(double x) { return __devicelib_imf_floor(x); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ceil(double); + +DEVICE_EXTERN_C_INLINE +double __imf_ceil(double x) { return __devicelib_imf_ceil(x); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_trunc(double); + +DEVICE_EXTERN_C_INLINE +double __imf_trunc(double x) { return __devicelib_imf_trunc(x); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_rint(double); + +DEVICE_EXTERN_C_INLINE +double __imf_rint(double x) { return __devicelib_imf_rint(x); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_nearbyint(double); + +DEVICE_EXTERN_C_INLINE +double __imf_nearbyint(double x) { return __devicelib_imf_nearbyint(x); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_sqrt(double); + +DEVICE_EXTERN_C_INLINE +double __imf_sqrt(double x) { return __devicelib_imf_sqrt(x); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_rsqrt(double); + +DEVICE_EXTERN_C_INLINE +double __imf_rsqrt(double x) { return __devicelib_imf_rsqrt(x); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_inv(double); + +DEVICE_EXTERN_C_INLINE +double __imf_inv(double x) { return __devicelib_imf_inv(x); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_fabs(double); + +DEVICE_EXTERN_C_INLINE +double __imf_fabs(double x) { return __devicelib_imf_fabs(x); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_fmax(double, double); + +DEVICE_EXTERN_C_INLINE +double __imf_fmax(double x, double y) { return __devicelib_imf_fmax(x, y); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_fmin(double, double); + +DEVICE_EXTERN_C_INLINE +double __imf_fmin(double x, double y) { return __devicelib_imf_fmin(x, y); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_copysign(double, double); + +DEVICE_EXTERN_C_INLINE +double __imf_copysign(double x, double y) { + return __devicelib_imf_copysign(x, y); +} +#endif // __LIBDEVICE_IMF_ENABLED__ diff --git a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp index 59e53f6c0cac9..ca20506d14160 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp +++ b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp @@ -171,6 +171,162 @@ SYCLDeviceLibFuncMap SDLMap = { {"__devicelib_memcmp", DeviceLibExt::cl_intel_devicelib_cstring}, {"__devicelib_assert_read", DeviceLibExt::cl_intel_devicelib_assert}, {"__devicelib_assert_fail", DeviceLibExt::cl_intel_devicelib_assert}, + {"__devicelib_imf_brev", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_brevll", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_byte_perm", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ffs", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ffsll", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_clz", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_clzll", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_popc", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_popcll", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_sad", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_usad", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_uhadd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_urhadd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_rhadd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_mul24", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_umul24", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_mulhi", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_umulhi", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_mul64hi", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_umul64hi", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_saturatef", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fmaf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_floorf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ceilf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fabsf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_truncf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_rintf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_nearbyintf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_invf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_sqrtf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_rsqrtf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fmaxf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fminf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_copysignf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2int_rd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2int_rn", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2int_ru", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2int_rz", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2uint_rd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2uint_rn", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2uint_ru", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2uint_rz", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2ll_rd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2ll_rn", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2ll_ru", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2ll_rz", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2ull_rd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2ull_rn", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2ull_ru", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2ull_rz", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float_as_int", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_int2float_rd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_int2float_rn", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_int2float_ru", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_int2float_rz", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_int_as_float", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float_as_uint", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ll2float_rd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ll2float_rn", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ll2float_ru", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ll2float_rz", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_uint2float_rd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_uint2float_rn", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_uint2float_ru", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_uint2float_rz", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_uint_as_float", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ull2float_rd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ull2float_rn", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ull2float_ru", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ull2float_rz", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fmaf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_floorf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ceilf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fabsf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_truncf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_rintf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_nearbyintf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_invf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_sqrtf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_rsqrtf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fmaxf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fminf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_copysignf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fma", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_floor", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_ceil", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_fabs", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_trunc", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_rint", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_nearbyint", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_inv", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_sqrt", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_rsqrt", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_fmax", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_fmin", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_copysign", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2float_rd", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2float_rn", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2float_ru", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2float_rz", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2int_rd", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2int_rn", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2int_ru", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2int_rz", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2uint_rd", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2uint_rn", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2uint_ru", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2uint_rz", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2hiint", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2loint", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2ll_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2ll_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2ll_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2ll_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2ull_rd", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2ull_rn", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2ull_ru", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2ull_rz", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double_as_longlong", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_hiloint2double", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_int2double_rn", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_ll2double_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_ll2double_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_ll2double_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_ll2double_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_ull2double_rd", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_ull2double_rn", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_ull2double_ru", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_ull2double_rz", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_uint2double_rn", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_longlong_as_double", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, }; // Each fallback device library corresponds to one bit in "require mask" which @@ -183,6 +339,8 @@ SYCLDeviceLibFuncMap SDLMap = { // fallback-complex: 0x8 // fallback-complex-fp64: 0x10 // fallback-cstring: 0x20 +// fallback-imf: 0x40 +// fallback-imf-fp64: 0x80 uint32_t getDeviceLibBits(const std::string &FuncName) { auto DeviceLibFuncIter = SDLMap.find(FuncName); return ((DeviceLibFuncIter == SDLMap.end()) diff --git a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.h b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.h index ba73ad3cdb39a..15cae43da0779 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.h +++ b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.h @@ -32,6 +32,8 @@ enum class DeviceLibExt : std::uint32_t { cl_intel_devicelib_complex, cl_intel_devicelib_complex_fp64, cl_intel_devicelib_cstring, + cl_intel_devicelib_imf, + cl_intel_devicelib_imf_fp64, }; uint32_t getSYCLDeviceLibReqMask(const Module &M); diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index c6cd1c03f87d0..3ca2f945be111 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -751,7 +751,7 @@ sycl::detail::enable_if_t::value, T> ctz( } // namespace ext namespace __SYCL2020_DEPRECATED("use 'ext::intel' instead") intel { - using namespace ext::intel; +using namespace ext::intel; } // geninteger mad_hi (geninteger a, geninteger b, geninteger c) @@ -1738,6 +1738,147 @@ extern SYCL_EXTERNAL double hypot(double x, double y); extern SYCL_EXTERNAL void *memcpy(void *dest, const void *src, size_t n); extern SYCL_EXTERNAL void *memset(void *dest, int c, size_t n); extern SYCL_EXTERNAL int memcmp(const void *s1, const void *s2, size_t n); +extern SYCL_EXTERNAL unsigned int __imf_brev(unsigned int x); +extern SYCL_EXTERNAL unsigned long long int +__imf_brevll(unsigned long long int x); +extern SYCL_EXTERNAL unsigned int +__imf_byte_perm(unsigned int x, unsigned int y, unsigned int s); +extern SYCL_EXTERNAL int __imf_ffs(int x); +extern SYCL_EXTERNAL int __imf_ffsll(long long int x); +extern SYCL_EXTERNAL int __imf_clz(int x); +extern SYCL_EXTERNAL int __imf_clzll(long long int x); +extern SYCL_EXTERNAL int __imf_popc(unsigned int x); +extern SYCL_EXTERNAL int __imf_popcll(unsigned long long int x); +extern SYCL_EXTERNAL unsigned int __imf_sad(int x, int y, unsigned int z); +extern SYCL_EXTERNAL unsigned int __imf_usad(unsigned int x, unsigned int y, + unsigned int z); +extern SYCL_EXTERNAL int __imf_rhadd(int x, int y); +extern SYCL_EXTERNAL unsigned int __imf_urhadd(unsigned int x, unsigned int y); +extern SYCL_EXTERNAL unsigned int __imf_uhadd(unsigned int x, unsigned int y); +extern SYCL_EXTERNAL int __imf_mul24(int x, int y); +extern SYCL_EXTERNAL unsigned int __imf_umul24(unsigned int x, unsigned int y); +extern SYCL_EXTERNAL int __imf_mulhi(int x, int y); +extern SYCL_EXTERNAL unsigned int __imf_umulhi(unsigned int x, unsigned int y); +extern SYCL_EXTERNAL long long int __imf_mul64hi(long long int x, + long long int y); +extern SYCL_EXTERNAL unsigned long long int +__imf_umul64hi(unsigned long long int x, unsigned long long int y); +extern SYCL_EXTERNAL float __imf_saturatef(float x); +extern SYCL_EXTERNAL float __imf_fmaf(float x, float y, float z); +extern SYCL_EXTERNAL float __imf_fabsf(float x); +extern SYCL_EXTERNAL float __imf_floorf(float x); +extern SYCL_EXTERNAL float __imf_ceilf(float x); +extern SYCL_EXTERNAL float __imf_truncf(float x); +extern SYCL_EXTERNAL float __imf_rintf(float x); +extern SYCL_EXTERNAL float __imf_nearbyintf(float x); +extern SYCL_EXTERNAL float __imf_sqrtf(float x); +extern SYCL_EXTERNAL float __imf_rsqrtf(float x); +extern SYCL_EXTERNAL float __imf_invf(float x); +extern SYCL_EXTERNAL float __imf_fmaxf(float x, float y); +extern SYCL_EXTERNAL float __imf_fminf(float x, float y); +extern SYCL_EXTERNAL float __imf_copysignf(float x, float y); +extern SYCL_EXTERNAL int __imf_float2int_rd(float x); +extern SYCL_EXTERNAL int __imf_float2int_rn(float x); +extern SYCL_EXTERNAL int __imf_float2int_ru(float x); +extern SYCL_EXTERNAL int __imf_float2int_rz(float x); +extern SYCL_EXTERNAL unsigned int __imf_float2uint_rd(float x); +extern SYCL_EXTERNAL unsigned int __imf_float2uint_rn(float x); +extern SYCL_EXTERNAL unsigned int __imf_float2uint_ru(float x); +extern SYCL_EXTERNAL unsigned int __imf_float2uint_rz(float x); +extern SYCL_EXTERNAL long long int __imf_float2ll_rd(float x); +extern SYCL_EXTERNAL long long int __imf_float2ll_rn(float x); +extern SYCL_EXTERNAL long long int __imf_float2ll_ru(float x); +extern SYCL_EXTERNAL long long int __imf_float2ll_rz(float x); +extern SYCL_EXTERNAL unsigned long long int __imf_float2ull_rd(float x); +extern SYCL_EXTERNAL unsigned long long int __imf_float2ull_rn(float x); +extern SYCL_EXTERNAL unsigned long long int __imf_float2ull_ru(float x); +extern SYCL_EXTERNAL unsigned long long int __imf_float2ull_rz(float x); +extern SYCL_EXTERNAL int __imf_float_as_int(float x); +extern SYCL_EXTERNAL unsigned int __imf_float_as_uint(float x); +extern SYCL_EXTERNAL float __imf_int2float_rd(int x); +extern SYCL_EXTERNAL float __imf_int2float_rn(int x); +extern SYCL_EXTERNAL float __imf_int2float_ru(int x); +extern SYCL_EXTERNAL float __imf_int2float_rz(int x); +extern SYCL_EXTERNAL float __imf_int_as_float(int x); +extern SYCL_EXTERNAL float __imf_ll2float_rd(long long int x); +extern SYCL_EXTERNAL float __imf_ll2float_rn(long long int x); +extern SYCL_EXTERNAL float __imf_ll2float_ru(long long int x); +extern SYCL_EXTERNAL float __imf_ll2float_rz(long long int x); +extern SYCL_EXTERNAL float __imf_uint2float_rd(unsigned int x); +extern SYCL_EXTERNAL float __imf_uint2float_rn(unsigned int x); +extern SYCL_EXTERNAL float __imf_uint2float_ru(unsigned int x); +extern SYCL_EXTERNAL float __imf_uint2float_rz(unsigned int x); +extern SYCL_EXTERNAL float __imf_uint_as_float(unsigned int x); +extern SYCL_EXTERNAL float __imf_ull2float_rd(unsigned long long int x); +extern SYCL_EXTERNAL float __imf_ull2float_rn(unsigned long long int x); +extern SYCL_EXTERNAL float __imf_ull2float_ru(unsigned long long int x); +extern SYCL_EXTERNAL float __imf_ull2float_rz(unsigned long long int x); + +extern SYCL_EXTERNAL _Float16 __imf_fmaf16(_Float16 x, _Float16 y, _Float16 z); +extern SYCL_EXTERNAL _Float16 __imf_fabsf16(_Float16 x); +extern SYCL_EXTERNAL _Float16 __imf_floorf16(_Float16 x); +extern SYCL_EXTERNAL _Float16 __imf_ceilf16(_Float16 x); +extern SYCL_EXTERNAL _Float16 __imf_truncf16(_Float16 x); +extern SYCL_EXTERNAL _Float16 __imf_rintf16(_Float16 x); +extern SYCL_EXTERNAL _Float16 __imf_nearbyintf16(_Float16 x); +extern SYCL_EXTERNAL _Float16 __imf_sqrtf16(_Float16 x); +extern SYCL_EXTERNAL _Float16 __imf_rsqrtf16(_Float16 x); +extern SYCL_EXTERNAL _Float16 __imf_invf16(_Float16 x); +extern SYCL_EXTERNAL _Float16 __imf_fmaxf16(_Float16 x, _Float16 y); +extern SYCL_EXTERNAL _Float16 __imf_fminf16(_Float16 x, _Float16 y); +extern SYCL_EXTERNAL _Float16 __imf_copysignf16(_Float16 x, _Float16 y); +extern SYCL_EXTERNAL float __imf_half2float(_Float16 x); +extern SYCL_EXTERNAL double __imf_fma(double x, double y, double z); +extern SYCL_EXTERNAL double __imf_fabs(double x); +extern SYCL_EXTERNAL double __imf_floor(double x); +extern SYCL_EXTERNAL double __imf_ceil(double x); +extern SYCL_EXTERNAL double __imf_trunc(double x); +extern SYCL_EXTERNAL double __imf_rint(double x); +extern SYCL_EXTERNAL double __imf_nearbyint(double x); +extern SYCL_EXTERNAL double __imf_sqrt(double x); +extern SYCL_EXTERNAL double __imf_rsqrt(double x); +extern SYCL_EXTERNAL double __imf_inv(double x); +extern SYCL_EXTERNAL double __imf_fmax(double x, double y); +extern SYCL_EXTERNAL double __imf_fmin(double x, double y); +extern SYCL_EXTERNAL double __imf_copysign(double x, double y); +extern SYCL_EXTERNAL float __imf_double2float_rd(double x); +extern SYCL_EXTERNAL float __imf_double2float_rn(double x); +extern SYCL_EXTERNAL float __imf_double2float_ru(double x); +extern SYCL_EXTERNAL float __imf_double2float_rz(double x); +extern SYCL_EXTERNAL int __imf_double2hiint(double x); +extern SYCL_EXTERNAL int __imf_double2loint(double x); +extern SYCL_EXTERNAL int __imf_double2int_rd(double x); +extern SYCL_EXTERNAL int __imf_double2int_rn(double x); +extern SYCL_EXTERNAL int __imf_double2int_ru(double x); +extern SYCL_EXTERNAL int __imf_double2int_rz(double x); +extern SYCL_EXTERNAL double __imf_int2double_rn(int x); +extern SYCL_EXTERNAL unsigned int __imf_double2uint_rd(double x); +extern SYCL_EXTERNAL unsigned int __imf_double2uint_rn(double x); +extern SYCL_EXTERNAL unsigned int __imf_double2uint_ru(double x); +extern SYCL_EXTERNAL unsigned int __imf_double2uint_rz(double x); +extern SYCL_EXTERNAL long long int __imf_double2ll_rd(double x); +extern SYCL_EXTERNAL long long int __imf_double2ll_rn(double x); +extern SYCL_EXTERNAL long long int __imf_double2ll_ru(double x); +extern SYCL_EXTERNAL long long int __imf_double2ll_rz(double x); +extern SYCL_EXTERNAL double __imf_ll2double_rd(long long int x); +extern SYCL_EXTERNAL double __imf_ll2double_rn(long long int x); +extern SYCL_EXTERNAL double __imf_ll2double_ru(long long int x); +extern SYCL_EXTERNAL double __imf_ll2double_rz(long long int x); +extern SYCL_EXTERNAL double __imf_ull2double_rd(unsigned long long int x); +extern SYCL_EXTERNAL double __imf_ull2double_rn(unsigned long long int x); +extern SYCL_EXTERNAL double __imf_ull2double_ru(unsigned long long int x); +extern SYCL_EXTERNAL double __imf_ull2double_rz(unsigned long long int x); +extern SYCL_EXTERNAL unsigned long long int __imf_double2ull_rd(double x); +extern SYCL_EXTERNAL unsigned long long int __imf_double2ull_rn(double x); +extern SYCL_EXTERNAL unsigned long long int __imf_double2ull_ru(double x); +extern SYCL_EXTERNAL unsigned long long int __imf_double2ull_rz(double x); +extern SYCL_EXTERNAL long long int __imf_double_as_longlong(double x); +extern SYCL_EXTERNAL double __imf_longlong_as_double(long long int x); +extern SYCL_EXTERNAL double __imf_uint2double_rd(unsigned int x); +extern SYCL_EXTERNAL double __imf_uint2double_rn(unsigned int x); +extern SYCL_EXTERNAL double __imf_uint2double_ru(unsigned int x); +extern SYCL_EXTERNAL double __imf_uint2double_rz(unsigned int x); +extern SYCL_EXTERNAL double __imf_hiloint2double(int hi, int lo); } #ifdef __GLIBC__ extern "C" { diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 6297b084c7e07..0b20c182f238f 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -754,6 +754,10 @@ static const char *getDeviceLibFilename(DeviceLibExt Extension) { return "libsycl-fallback-complex-fp64.spv"; case DeviceLibExt::cl_intel_devicelib_cstring: return "libsycl-fallback-cstring.spv"; + case DeviceLibExt::cl_intel_devicelib_imf: + return "libsycl-fallback-imf.spv"; + case DeviceLibExt::cl_intel_devicelib_imf_fp64: + return "libsycl-fallback-imf-fp64.spv"; } throw compile_program_error("Unhandled (new?) device library extension", PI_INVALID_OPERATION); @@ -773,6 +777,10 @@ static const char *getDeviceLibExtensionStr(DeviceLibExt Extension) { return "cl_intel_devicelib_complex_fp64"; case DeviceLibExt::cl_intel_devicelib_cstring: return "cl_intel_devicelib_cstring"; + case DeviceLibExt::cl_intel_devicelib_imf: + return "cl_intel_devicelib_imf"; + case DeviceLibExt::cl_intel_devicelib_imf_fp64: + return "cl_intel_devicelib_imf_fp64"; } throw compile_program_error("Unhandled (new?) device library extension", PI_INVALID_OPERATION); @@ -935,7 +943,9 @@ getDeviceLibPrograms(const ContextImplPtr Context, const RT::PiDevice &Device, {DeviceLibExt::cl_intel_devicelib_math_fp64, false}, {DeviceLibExt::cl_intel_devicelib_complex, false}, {DeviceLibExt::cl_intel_devicelib_complex_fp64, false}, - {DeviceLibExt::cl_intel_devicelib_cstring, false}}; + {DeviceLibExt::cl_intel_devicelib_cstring, false}, + {DeviceLibExt::cl_intel_devicelib_imf, false}, + {DeviceLibExt::cl_intel_devicelib_imf_fp64, false}}; // Disable all devicelib extensions requiring fp64 support if at least // one underlying device doesn't support cl_khr_fp64. diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 2c1f0ebddf473..1975964517498 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -63,7 +63,9 @@ enum class DeviceLibExt : std::uint32_t { cl_intel_devicelib_math_fp64, cl_intel_devicelib_complex, cl_intel_devicelib_complex_fp64, - cl_intel_devicelib_cstring + cl_intel_devicelib_cstring, + cl_intel_devicelib_imf, + cl_intel_devicelib_imf_fp64, }; // Provides single loading and building OpenCL programs with unique contexts