From 57dcd402cd5365cd2af11353305ad0f1435a950b Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Tue, 4 Jan 2022 17:35:39 +0000 Subject: [PATCH 01/10] [SYCL] Add -fsycl-fp32-prec-sqrt flag This flag enables correctly rounded `sycl::sqrt` (the default precision requirement is 3 ULP). And enables the flag for CUDA and HIP targets. --- clang/include/clang/Basic/CodeGenOptions.def | 1 + clang/include/clang/Driver/Options.td | 3 +++ clang/include/clang/Driver/ToolChain.h | 3 ++- clang/lib/Driver/ToolChain.cpp | 3 ++- clang/lib/Driver/ToolChains/AMDGPU.cpp | 12 +++++++--- clang/lib/Driver/ToolChains/AMDGPU.h | 3 ++- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 3 ++- clang/lib/Driver/ToolChains/Cuda.cpp | 4 ++++ clang/lib/Driver/ToolChains/HIPAMD.cpp | 18 +++++++++------ clang/lib/Driver/ToolChains/HIPAMD.h | 5 ++-- clang/lib/Driver/ToolChains/HIPSPV.cpp | 6 +++-- clang/lib/Driver/ToolChains/HIPSPV.h | 3 ++- clang/test/Driver/sycl-amdgcn-sqrt.cpp | 24 ++++++++++++++++++++ clang/test/Driver/sycl-nvptx-sqrt.cpp | 20 ++++++++++++++++ sycl/doc/GetStartedGuide.md | 2 +- sycl/doc/UsersManual.md | 8 +++++++ 16 files changed, 98 insertions(+), 20 deletions(-) create mode 100644 clang/test/Driver/sycl-amdgcn-sqrt.cpp create mode 100644 clang/test/Driver/sycl-nvptx-sqrt.cpp diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 4a6253f4eec87..35b59aafe818c 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -177,6 +177,7 @@ CODEGENOPT(NoImplicitFloat , 1, 0) ///< Set when -mno-implicit-float is enable CODEGENOPT(NullPointerIsValid , 1, 0) ///< Assume Null pointer deference is defined. CODEGENOPT(OpenCLCorrectlyRoundedDivSqrt, 1, 0) ///< -cl-fp32-correctly-rounded-divide-sqrt CODEGENOPT(HIPCorrectlyRoundedDivSqrt, 1, 1) ///< -fno-hip-fp32-correctly-rounded-divide-sqrt +CODEGENOPT(SYCLFp32PrecSqrt, 1, 0) ///< -fsycl-fp32-prec-sqrt CODEGENOPT(UniqueInternalLinkageNames, 1, 0) ///< Internal Linkage symbols get unique names. CODEGENOPT(SplitMachineFunctions, 1, 0) ///< Split machine functions using profile information. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 6ef29fbf833be..0fd08c4ef237d 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4732,6 +4732,9 @@ def fno_sycl_device_lib_EQ : CommaJoined<["-"], "fno-sycl-device-lib=">, Group, HelpText<"Control exclusion of " "device libraries from device binary linkage. Valid arguments " "are libc, libm-fp32, libm-fp64, all">; +def fsycl_fp32_prec_sqrt : Flag<["-"], "fsycl-fp32-prec-sqrt">, Group, Flags<[CC1Option]>, + HelpText<"SYCL only. Specify that single precision floating-point sqrt used in the program source are correctly rounded.">, + MarshallingInfoFlag>; //===----------------------------------------------------------------------===// // FLangOption + CoreOption + NoXarchOption diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index abb986a42f221..cdd60b4eee871 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -705,7 +705,8 @@ class ToolChain { /// Get paths of HIP device libraries. virtual llvm::SmallVector - getHIPDeviceLibs(const llvm::opt::ArgList &Args) const; + getHIPDeviceLibs(const llvm::opt::ArgList &Args, + const Action::OffloadKind DeviceOffloadingKind) const; /// Return sanitizers which are available in this toolchain. virtual SanitizerMask getSupportedSanitizers() const; diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 5ce9f942efd27..5266832969d05 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -1097,7 +1097,8 @@ void ToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const {} llvm::SmallVector -ToolChain::getHIPDeviceLibs(const ArgList &DriverArgs) const { +ToolChain::getHIPDeviceLibs(const ArgList &DriverArgs, + const Action::OffloadKind OffloadKind) const { return {}; } diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 43ce33750ebac..21a116c52c0cd 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -895,9 +895,9 @@ bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const { return false; } -llvm::SmallVector -ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, - const std::string &GPUArch) const { +llvm::SmallVector ROCMToolChain::getCommonDeviceLibNames( + const llvm::opt::ArgList &DriverArgs, const std::string &GPUArch, + const Action::OffloadKind DeviceOffloadingKind) const { auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch); const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); @@ -923,6 +923,12 @@ ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, bool CorrectSqrt = DriverArgs.hasFlag( options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt); + + if (DeviceOffloadingKind == Action::OFK_SYCL) { + // When using SYCL, sqrt is only correctly rounded if the flag is specified + CorrectSqrt = DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt); + } + bool Wave64 = isWave64(DriverArgs, Kind); return RocmInstallation.getCommonBitcodeLibs( diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h index 156bfd1fbdb2a..c459f4629d343 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.h +++ b/clang/lib/Driver/ToolChains/AMDGPU.h @@ -142,7 +142,8 @@ class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain { // Returns a list of device library names shared by different languages llvm::SmallVector getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, - const std::string &GPUArch) const; + const std::string &GPUArch, + const Action::OffloadKind DeviceOffloadingKind) const; }; } // end namespace toolchains diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index f282f04b79311..07d2f60866260 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -123,7 +123,8 @@ const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand( // - write an opt pass that sets that on every function it sees and pipe // the device-libs bitcode through that on the way to this llvm-link SmallVector BCLibs = - AMDGPUOpenMPTC.getCommonDeviceLibNames(Args, SubArchName.str()); + AMDGPUOpenMPTC.getCommonDeviceLibNames(Args, SubArchName.str(), + Action::OFK_OpenMP); llvm::for_each(BCLibs, [&](StringRef BCFile) { CmdArgs.push_back(Args.MakeArgString(BCFile)); }); diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 0058def1aacfa..d1c3d575b598c 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -707,6 +707,10 @@ void CudaToolChain::addClangTargetOptions( if (DeviceOffloadingKind == Action::OFK_SYCL) { toolchains::SYCLToolChain::AddSYCLIncludeArgs(getDriver(), DriverArgs, CC1Args); + + if (DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt)) { + CC1Args.push_back("-fcuda-prec-sqrt"); + } } auto NoLibSpirv = DriverArgs.hasArg(options::OPT_fno_sycl_libspirv, diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp index 6bf5b1a220be1..fff8d455568e6 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.cpp +++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -256,11 +256,12 @@ void HIPAMDToolChain::addClangTargetOptions( CC1Args.push_back(DriverArgs.MakeArgString(LibSpirvFile)); } - llvm::for_each(getHIPDeviceLibs(DriverArgs), [&](auto BCFile) { - CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode" - : "-mlink-bitcode-file"); - CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path)); - }); + llvm::for_each( + getHIPDeviceLibs(DriverArgs, DeviceOffloadingKind), [&](auto BCFile) { + CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode" + : "-mlink-bitcode-file"); + CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path)); + }); } llvm::opt::DerivedArgList * @@ -355,7 +356,9 @@ VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D, } llvm::SmallVector -HIPAMDToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { +HIPAMDToolChain::getHIPDeviceLibs( + const llvm::opt::ArgList &DriverArgs, + const Action::OffloadKind DeviceOffloadingKind) const { llvm::SmallVector BCLibs; if (DriverArgs.hasArg(options::OPT_nogpulib)) return {}; @@ -412,7 +415,8 @@ HIPAMDToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { BCLibs.push_back(RocmInstallation.getHIPPath()); // Add common device libraries like ocml etc. - for (auto N : getCommonDeviceLibNames(DriverArgs, GpuArch.str())) + for (auto N : getCommonDeviceLibNames(DriverArgs, GpuArch.str(), + DeviceOffloadingKind)) BCLibs.push_back(StringRef(N)); // Add instrument lib. diff --git a/clang/lib/Driver/ToolChains/HIPAMD.h b/clang/lib/Driver/ToolChains/HIPAMD.h index e4a2f74796484..3b2c2383857a3 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.h +++ b/clang/lib/Driver/ToolChains/HIPAMD.h @@ -86,8 +86,9 @@ class LLVM_LIBRARY_VISIBILITY HIPAMDToolChain final : public ROCMToolChain { llvm::opt::ArgStringList &CC1Args) const override; void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; - llvm::SmallVector - getHIPDeviceLibs(const llvm::opt::ArgList &Args) const override; + llvm::SmallVector getHIPDeviceLibs( + const llvm::opt::ArgList &Args, + const Action::OffloadKind DeviceOffloadingKind) const override; SanitizerMask getSupportedSanitizers() const override; diff --git a/clang/lib/Driver/ToolChains/HIPSPV.cpp b/clang/lib/Driver/ToolChains/HIPSPV.cpp index d68c87e9b3e71..e6717e244545c 100644 --- a/clang/lib/Driver/ToolChains/HIPSPV.cpp +++ b/clang/lib/Driver/ToolChains/HIPSPV.cpp @@ -154,7 +154,7 @@ void HIPSPVToolChain::addClangTargetOptions( CC1Args.append( {"-fvisibility", "hidden", "-fapply-global-visibility-to-externs"}); - llvm::for_each(getHIPDeviceLibs(DriverArgs), + llvm::for_each(getHIPDeviceLibs(DriverArgs, DeviceOffloadingKind), [&](const BitCodeLibraryInfo &BCFile) { CC1Args.append({"-mlink-builtin-bitcode", DriverArgs.MakeArgString(BCFile.Path)}); @@ -206,7 +206,9 @@ void HIPSPVToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs, } llvm::SmallVector -HIPSPVToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { +HIPSPVToolChain::getHIPDeviceLibs( + const llvm::opt::ArgList &DriverArgs, + const Action::OffloadKind DeviceOffloadingKind) const { llvm::SmallVector BCLibs; if (DriverArgs.hasArg(options::OPT_nogpulib)) return {}; diff --git a/clang/lib/Driver/ToolChains/HIPSPV.h b/clang/lib/Driver/ToolChains/HIPSPV.h index 79520f77c742f..5d85a3af2b673 100644 --- a/clang/lib/Driver/ToolChains/HIPSPV.h +++ b/clang/lib/Driver/ToolChains/HIPSPV.h @@ -69,7 +69,8 @@ class LLVM_LIBRARY_VISIBILITY HIPSPVToolChain final : public ToolChain { void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; llvm::SmallVector - getHIPDeviceLibs(const llvm::opt::ArgList &Args) const override; + getHIPDeviceLibs(const llvm::opt::ArgList &Args, + const Action::OffloadKind DeviceOffloadKind) const override; SanitizerMask getSupportedSanitizers() const override; diff --git a/clang/test/Driver/sycl-amdgcn-sqrt.cpp b/clang/test/Driver/sycl-amdgcn-sqrt.cpp new file mode 100644 index 0000000000000..4eb1f28603e1a --- /dev/null +++ b/clang/test/Driver/sycl-amdgcn-sqrt.cpp @@ -0,0 +1,24 @@ +// REQUIRES: clang-driver +// REQUIRES: amdgpu-registered-target +// REQUIRES: !system-windows + +// RUN: %clang -### \ +// RUN: -fsycl -fsycl-targets=amdgcn-amd-amdhsa \ +// RUN: -Xsycl-target-backend --offload-arch=gfx900 \ +// RUN: -fsycl-fp32-prec-sqrt \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: %s \ +// RUN: 2>&1 | FileCheck --check-prefix=CHECK-CORRECT %s + +// CHECK-CORRECT: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_correctly_rounded_sqrt_on.bc" + +// RUN: %clang -### \ +// RUN: -fsycl -fsycl-targets=amdgcn-amd-amdhsa \ +// RUN: -Xsycl-target-backend --offload-arch=gfx900 \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: %s \ +// RUN: 2>&1 | FileCheck --check-prefix=CHECK-APPROX %s + +// CHECK-APPROX: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_correctly_rounded_sqrt_off.bc" + +void func(){}; diff --git a/clang/test/Driver/sycl-nvptx-sqrt.cpp b/clang/test/Driver/sycl-nvptx-sqrt.cpp new file mode 100644 index 0000000000000..5ef66b775dd54 --- /dev/null +++ b/clang/test/Driver/sycl-nvptx-sqrt.cpp @@ -0,0 +1,20 @@ +// REQUIRES: clang-driver +// REQUIRES: nvptx-registered-target +// REQUIRES: !system-windows + +// RUN: %clang -### \ +// RUN: -fsycl -fsycl-targets=nvptx64-nvidia-cuda \ +// RUN: -fsycl-fp32-prec-sqrt \ +// RUN: %s \ +// RUN: 2>&1 | FileCheck --check-prefix=CHECK-CORRECT %s + +// CHECK-CORRECT: "-fcuda-prec-sqrt" + +// RUN: %clang -### \ +// RUN: -fsycl -fsycl-targets=nvptx64-nvidia-cuda \ +// RUN: %s \ +// RUN: 2>&1 | FileCheck --check-prefix=CHECK-APPROX %s + +// CHECK-APPROX-NOT: "-fcuda-prec-sqrt" + +void func(){}; diff --git a/sycl/doc/GetStartedGuide.md b/sycl/doc/GetStartedGuide.md index 026411d2aed50..5e6f9042851a6 100644 --- a/sycl/doc/GetStartedGuide.md +++ b/sycl/doc/GetStartedGuide.md @@ -832,7 +832,7 @@ which contains all the symbols required. project and may cause compilation issues on some platforms * `sycl::sqrt` is not correctly rounded by default as the SYCL specification allows lower precision, when porting from CUDA it may be helpful to use - `-Xclang -fcuda-prec-sqrt` to use the correctly rounded square root, this is + `-fsycl-fp32-prec-sqrt` to use the correctly rounded square root, this is significantly slower but matches the default precision used by `nvcc`, and this `clang++` flag is equivalent to the `nvcc` `-prec-sqrt` flag, except that it defaults to `false`. diff --git a/sycl/doc/UsersManual.md b/sycl/doc/UsersManual.md index 0e39e271471d5..046475cbcc571 100644 --- a/sycl/doc/UsersManual.md +++ b/sycl/doc/UsersManual.md @@ -257,6 +257,14 @@ and not recommended to use in production environment. options (e.g. -c, -E, -S) may interfere with the expected output set during the host compilation. Doing so is considered undefined behavior. +**`-fsycl-fp32-prec-sqrt`** + + Enable use of correctly rounded `sycl::sqrt` function as defined by IEE754. + Without this flag, the default precision requirement for `sycl::sqrt` is 3 + ULP. + + NOTE: This flag is currently only supported with the CUDA and HIP targets. + # Example: SYCL device code compilation To invoke SYCL device compiler set `-fsycl-device-only` flag. From eb27453dd90279dbc9a6dd00d078dac74d12bffa Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Fri, 14 Jan 2022 15:51:05 +0000 Subject: [PATCH 02/10] [SYCL] Update command line help text --- clang/include/clang/Driver/Options.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 0fd08c4ef237d..d0c5827eb9c2e 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4733,7 +4733,7 @@ def fno_sycl_device_lib_EQ : CommaJoined<["-"], "fno-sycl-device-lib=">, Group; def fsycl_fp32_prec_sqrt : Flag<["-"], "fsycl-fp32-prec-sqrt">, Group, Flags<[CC1Option]>, - HelpText<"SYCL only. Specify that single precision floating-point sqrt used in the program source are correctly rounded.">, + HelpText<"SYCL only. Specify that single precision floating-point sqrt is correctly rounded.">, MarshallingInfoFlag>; //===----------------------------------------------------------------------===// From f90cb8d24516347378b9e61f120af958a89a5fb0 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Fri, 14 Jan 2022 16:29:03 +0000 Subject: [PATCH 03/10] [SYCL] Add test for unused argument warning --- clang/test/Driver/sycl-no-prec-sqrt.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 clang/test/Driver/sycl-no-prec-sqrt.cpp diff --git a/clang/test/Driver/sycl-no-prec-sqrt.cpp b/clang/test/Driver/sycl-no-prec-sqrt.cpp new file mode 100644 index 0000000000000..a1100be284949 --- /dev/null +++ b/clang/test/Driver/sycl-no-prec-sqrt.cpp @@ -0,0 +1,17 @@ +// REQUIRES: clang-driver + +// RUN: %clang -### -fsycl \ +// RUN: -fsycl-fp32-prec-sqrt %s 2>&1 | FileCheck %s + +// RUN: %clang -### -fsycl -fsycl-targets=spir64_gen \ +// RUN: -fsycl-fp32-prec-sqrt %s 2>&1 | FileCheck %s +// +// RUN: %clang -### -fsycl -fsycl-targets=spir64_x86_64 \ +// RUN: -fsycl-fp32-prec-sqrt %s 2>&1 | FileCheck %s +// +// RUN: %clang -### -fsycl -fsycl-targets=spir64_fpga \ +// RUN: -fsycl-fp32-prec-sqrt %s 2>&1 | FileCheck %s + +// CHECK: warning: argument unused during compilation: '-fsycl-fp32-prec-sqrt' + +void func(){}; From 5cebae7dc4055775f98bbd05a169788e977258d3 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Fri, 14 Jan 2022 16:58:07 +0000 Subject: [PATCH 04/10] Update clang/lib/Driver/ToolChains/HIPSPV.h Co-authored-by: premanandrao --- clang/lib/Driver/ToolChains/HIPSPV.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChains/HIPSPV.h b/clang/lib/Driver/ToolChains/HIPSPV.h index 5d85a3af2b673..ba9389c46b272 100644 --- a/clang/lib/Driver/ToolChains/HIPSPV.h +++ b/clang/lib/Driver/ToolChains/HIPSPV.h @@ -70,7 +70,7 @@ class LLVM_LIBRARY_VISIBILITY HIPSPVToolChain final : public ToolChain { llvm::opt::ArgStringList &CC1Args) const override; llvm::SmallVector getHIPDeviceLibs(const llvm::opt::ArgList &Args, - const Action::OffloadKind DeviceOffloadKind) const override; + const Action::OffloadKind DeviceOffloadingKind) const override; SanitizerMask getSupportedSanitizers() const override; From 71e578adbb9a95872356ef0d7bfcdd724e8f547c Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Fri, 14 Jan 2022 17:01:44 +0000 Subject: [PATCH 05/10] Update clang/lib/Driver/ToolChain.cpp Co-authored-by: premanandrao --- clang/lib/Driver/ToolChain.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 5266832969d05..71ca23cc7905e 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -1098,7 +1098,7 @@ void ToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs, llvm::SmallVector ToolChain::getHIPDeviceLibs(const ArgList &DriverArgs, - const Action::OffloadKind OffloadKind) const { + const Action::OffloadKind DeviceOffloadingKind) const { return {}; } From b701652fee1aacd3434b2286f798dcc69b0e311a Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Fri, 14 Jan 2022 17:03:01 +0000 Subject: [PATCH 06/10] [SYCL] Fix formatting --- clang/lib/Driver/ToolChain.cpp | 5 +++-- clang/lib/Driver/ToolChains/HIPSPV.h | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 71ca23cc7905e..106d87d3dddfe 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -1097,8 +1097,9 @@ void ToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const {} llvm::SmallVector -ToolChain::getHIPDeviceLibs(const ArgList &DriverArgs, - const Action::OffloadKind DeviceOffloadingKind) const { +ToolChain::getHIPDeviceLibs( + const ArgList &DriverArgs, + const Action::OffloadKind DeviceOffloadingKind) const { return {}; } diff --git a/clang/lib/Driver/ToolChains/HIPSPV.h b/clang/lib/Driver/ToolChains/HIPSPV.h index ba9389c46b272..036f09e5872f7 100644 --- a/clang/lib/Driver/ToolChains/HIPSPV.h +++ b/clang/lib/Driver/ToolChains/HIPSPV.h @@ -68,9 +68,9 @@ class LLVM_LIBRARY_VISIBILITY HIPSPVToolChain final : public ToolChain { llvm::opt::ArgStringList &CC1Args) const override; void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; - llvm::SmallVector - getHIPDeviceLibs(const llvm::opt::ArgList &Args, - const Action::OffloadKind DeviceOffloadingKind) const override; + llvm::SmallVector getHIPDeviceLibs( + const llvm::opt::ArgList &Args, + const Action::OffloadKind DeviceOffloadingKind) const override; SanitizerMask getSupportedSanitizers() const override; From eb0abf26f64402566859571f9f79a8a2f868a617 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Mon, 17 Jan 2022 10:31:23 +0000 Subject: [PATCH 07/10] [SYCL] sycl-nvptx-sqrt.cpp test works on Windows --- clang/test/Driver/sycl-nvptx-sqrt.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/test/Driver/sycl-nvptx-sqrt.cpp b/clang/test/Driver/sycl-nvptx-sqrt.cpp index 5ef66b775dd54..d5320b8c0e21e 100644 --- a/clang/test/Driver/sycl-nvptx-sqrt.cpp +++ b/clang/test/Driver/sycl-nvptx-sqrt.cpp @@ -1,6 +1,5 @@ // REQUIRES: clang-driver // REQUIRES: nvptx-registered-target -// REQUIRES: !system-windows // RUN: %clang -### \ // RUN: -fsycl -fsycl-targets=nvptx64-nvidia-cuda \ From 486eef3a595f8647b65daf50d0f1273a686deb05 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Mon, 17 Jan 2022 10:40:34 +0000 Subject: [PATCH 08/10] Update clang/lib/Driver/ToolChains/AMDGPU.cpp Co-authored-by: Artem Gindinson --- clang/lib/Driver/ToolChains/AMDGPU.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 21a116c52c0cd..5b5981c8eac84 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -920,14 +920,14 @@ llvm::SmallVector ROCMToolChain::getCommonDeviceLibNames( options::OPT_fno_unsafe_math_optimizations, false); bool FastRelaxedMath = DriverArgs.hasFlag(options::OPT_ffast_math, options::OPT_fno_fast_math, false); - bool CorrectSqrt = DriverArgs.hasFlag( - options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, - options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt); - + bool CorrectSqrt = false; if (DeviceOffloadingKind == Action::OFK_SYCL) { // When using SYCL, sqrt is only correctly rounded if the flag is specified CorrectSqrt = DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt); - } + } else + CorrectSqrt = DriverArgs.hasFlag( + options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, + options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt); bool Wave64 = isWave64(DriverArgs, Kind); From db77ba8856d9add16c3097432506e5235bf2738b Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Mon, 17 Jan 2022 10:41:03 +0000 Subject: [PATCH 09/10] [SYCL] Fix formatting --- clang/lib/Driver/ToolChains/AMDGPU.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 5b5981c8eac84..96a7c23c1778b 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -926,8 +926,8 @@ llvm::SmallVector ROCMToolChain::getCommonDeviceLibNames( CorrectSqrt = DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt); } else CorrectSqrt = DriverArgs.hasFlag( - options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, - options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt); + options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, + options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt); bool Wave64 = isWave64(DriverArgs, Kind); From 0ff1e2161cecbd06b9bd0975992e7a026ad56766 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Mon, 17 Jan 2022 10:57:00 +0000 Subject: [PATCH 10/10] [SYCL] Check conflicting flag on AMD --- clang/test/Driver/sycl-amdgcn-sqrt.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/clang/test/Driver/sycl-amdgcn-sqrt.cpp b/clang/test/Driver/sycl-amdgcn-sqrt.cpp index 4eb1f28603e1a..1c0ee7077dd69 100644 --- a/clang/test/Driver/sycl-amdgcn-sqrt.cpp +++ b/clang/test/Driver/sycl-amdgcn-sqrt.cpp @@ -21,4 +21,15 @@ // CHECK-APPROX: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_correctly_rounded_sqrt_off.bc" +// RUN: %clang -### \ +// RUN: -fsycl -fsycl-targets=amdgcn-amd-amdhsa \ +// RUN: -Xsycl-target-backend --offload-arch=gfx900 \ +// RUN: -fsycl-fp32-prec-sqrt -fno-hip-fp32-correctly-rounded-divide-sqrt \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: %s \ +// RUN: 2>&1 | FileCheck --check-prefix=CHECK-CONFLICT %s + +// CHECK-CONFLICT: warning: argument unused during compilation: '-fno-hip-fp32-correctly-rounded-divide-sqrt' +// CHECK-CONFLICT: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_correctly_rounded_sqrt_on.bc" + void func(){};