diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index eb1607c7cfa11..98aedff0e585a 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -815,13 +815,15 @@ void CodeGenModule::Release() { llvm::MDString::get(Ctx, CodeGenOpts.MemoryProfileOutput)); } - if ((LangOpts.CUDAIsDevice || LangOpts.isSYCL()) && getTriple().isNVPTX()) { + if ((LangOpts.CUDAIsDevice || LangOpts.SYCLIsDevice) && getTriple().isNVPTX()) { // Indicate whether __nvvm_reflect should be configured to flush denormal // floating point values to 0. (This corresponds to its "__CUDA_FTZ" // property.) getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-ftz", - CodeGenOpts.FP32DenormalMode.Output != - llvm::DenormalMode::IEEE); + (CodeGenOpts.FP32DenormalMode.Output != + llvm::DenormalMode::IEEE) || + (CodeGenOpts.FPDenormalMode.Output != + llvm::DenormalMode::IEEE)); getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-prec-sqrt", getTarget().getTargetOpts().NVVMCudaPrecSqrt); } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 03ae003593edc..08d7eb165c3e7 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3003,6 +3003,11 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, RoundingMathPresent = false; break; + case options::OPT_fcuda_flush_denormals_to_zero: + case options::OPT_fgpu_flush_denormals_to_zero: + DenormalFP32Math = llvm::DenormalMode::getPreserveSign(); + break; + case options::OPT_fdenormal_fp_math_EQ: DenormalFPMath = llvm::parseDenormalFPAttribute(A->getValue()); DenormalFP32Math = DenormalFPMath; diff --git a/clang/test/CodeGenSYCL/flush-denormals.cpp b/clang/test/CodeGenSYCL/flush-denormals.cpp new file mode 100644 index 0000000000000..192f2600ad895 --- /dev/null +++ b/clang/test/CodeGenSYCL/flush-denormals.cpp @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -fcuda-is-device -fdenormal-fp-math-f32=preserve-sign \ +// RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \ +// RUN: FileCheck -check-prefixes=FTZ32,PTXFTZ32 %s + +// RUN: %clang_cc1 -fcuda-is-device -fdenormal-fp-math=preserve-sign \ +// RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \ +// RUN: FileCheck -check-prefixes=FTZ,PTXFTZ %s + +// CHECK-LABEL: define void @_Z3foov() #0 +void foo() {} + +// FTZ32: attributes #0 = {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" +// PTXFTZ32:!llvm.module.flags = !{{{.*}}, [[MODFLAG:![0-9]+]], {{.*}}} +// PTXFTZ32:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 1} + +// FTZ: attributes #0 = {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" +// PTXFTZ:!llvm.module.flags = !{{{.*}}, [[MODFLAG:![0-9]+]], {{.*}}} +// PTXFTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 1} diff --git a/clang/test/Driver/ftz-cuda.c b/clang/test/Driver/ftz-cuda.c new file mode 100644 index 0000000000000..33d3fb5b9351c --- /dev/null +++ b/clang/test/Driver/ftz-cuda.c @@ -0,0 +1,4 @@ +// RUN: %clang -### -fcuda-flush-denormals-to-zero -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FTZ %s +// CHECK-FTZ: "-cc1" +// CHECK-FTZ: "-fdenormal-fp-math-f32=preserve-sign,preserve-sign"