diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 0937f2839ca71..20cea13278efa 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -1056,7 +1056,8 @@ llvm::SmallVector ROCMToolChain::getCommonDeviceLibNames( bool CorrectSqrt = false; if (DeviceOffloadingKind == Action::OFK_SYCL) { // When using SYCL, sqrt is only correctly rounded if the flag is specified - CorrectSqrt = DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt); + CorrectSqrt = DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt) || + DriverArgs.hasArg(options::OPT_foffload_fp32_prec_sqrt); } else CorrectSqrt = DriverArgs.hasFlag( options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 2e8a59b5c0366..3726b70197202 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -965,7 +965,8 @@ void CudaToolChain::addClangTargetOptions( if (DeviceOffloadingKind == Action::OFK_SYCL) { SYCLInstallation.addSYCLIncludeArgs(DriverArgs, CC1Args); - if (DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt)) + if (DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt) || + DriverArgs.hasArg(options::OPT_foffload_fp32_prec_sqrt)) CC1Args.push_back("-fcuda-prec-sqrt"); bool FastRelaxedMath = DriverArgs.hasFlag( diff --git a/clang/test/Driver/sycl-amdgcn-sqrt.cpp b/clang/test/Driver/sycl-amdgcn-sqrt.cpp index 6b4aca10b0dfe..f52728d544741 100644 --- a/clang/test/Driver/sycl-amdgcn-sqrt.cpp +++ b/clang/test/Driver/sycl-amdgcn-sqrt.cpp @@ -9,6 +9,14 @@ // RUN: %s \ // RUN: 2>&1 | FileCheck --check-prefix=CHECK-CORRECT %s +// RUN: %clang -### \ +// RUN: -fsycl -fsycl-targets=amdgcn-amd-amdhsa -fno-sycl-libspirv \ +// RUN: -Xsycl-target-backend --offload-arch=gfx900 \ +// RUN: -foffload-fp32-prec-sqrt \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: %s \ +// RUN: 2>&1 | FileCheck --check-prefix=CHECK-CORRECT %s + // CHECK-CORRECT: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_correctly_rounded_sqrt_on.bc" // RUN: %clang -### \ @@ -28,6 +36,14 @@ // RUN: %s \ // RUN: 2>&1 | FileCheck --check-prefix=CHECK-CONFLICT %s +// RUN: %clang -### \ +// RUN: -fsycl -fsycl-targets=amdgcn-amd-amdhsa -fno-sycl-libspirv \ +// RUN: -Xsycl-target-backend --offload-arch=gfx900 \ +// RUN: -foffload-fp32-prec-sqrt -fno-hip-fp32-correctly-rounded-divide-sqrt \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: %s \ +// RUN: 2>&1 | FileCheck --check-prefix=CHECK-CONFLICT %s + // CHECK-CONFLICT: warning: argument unused during compilation: '-fno-hip-fp32-correctly-rounded-divide-sqrt' // CHECK-CONFLICT: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_correctly_rounded_sqrt_on.bc" diff --git a/clang/test/Driver/sycl-nvptx-sqrt.cpp b/clang/test/Driver/sycl-nvptx-sqrt.cpp index a08a7bad782ba..62b69d01a1abc 100644 --- a/clang/test/Driver/sycl-nvptx-sqrt.cpp +++ b/clang/test/Driver/sycl-nvptx-sqrt.cpp @@ -6,6 +6,12 @@ // RUN: %s \ // RUN: 2>&1 | FileCheck --check-prefix=CHECK-CORRECT %s +// RUN: %clang -### -nocudalib \ +// RUN: -fsycl -fsycl-targets=nvptx64-nvidia-cuda \ +// RUN: -foffload-fp32-prec-sqrt \ +// RUN: %s \ +// RUN: 2>&1 | FileCheck --check-prefix=CHECK-CORRECT %s + // CHECK-CORRECT: "-fcuda-prec-sqrt" // RUN: %clang -### -nocudalib \ diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst index 7899de547d7e8..055bee5239ade 100644 --- a/llvm/docs/NVPTXUsage.rst +++ b/llvm/docs/NVPTXUsage.rst @@ -1049,8 +1049,9 @@ The following sets the ftz flag to 1, and the precise sqrt flag to 1. .. code-block:: llvm - !llvm.module.flags = !{!0} + !llvm.module.flags = !{!0, !1} !0 = !{i32 4, !"nvvm-reflect-ftz", i32 1} + !1 = !{i32 4, !"nvvm-reflect-prec-sqrt", i32 1} (``i32 4`` indicates that the value set here overrides the value in another module we link with. See the `LangRef `