diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 96a5968175c37..496d12abbbcd7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3530,6 +3530,7 @@ def fsycl_esimd : Flag<["-"], "fsycl-explicit-simd">, Group, Flags<[ HelpText<"Enable SYCL explicit SIMD extension">; def fno_sycl_esimd : Flag<["-"], "fno-sycl-explicit-simd">, Group, HelpText<"Disable SYCL explicit SIMD extension">, Flags<[NoArgumentUnused, CoreOption]>; +defm sycl_std_optimizations : OptOutFFlag<"sycl-std-optimizations", "Enable", "Disable", " standard optimization pipeline for SYCL device compiler">; //===----------------------------------------------------------------------===// // CC1 Options @@ -4454,8 +4455,6 @@ def fsycl_std_layout_kernel_params: Flag<["-"], "fsycl-std-layout-kernel-params" def fsycl_allow_func_ptr : Flag<["-"], "fsycl-allow-func-ptr">, HelpText<"Allow function pointers in SYCL device.">; def fno_sycl_allow_func_ptr : Flag<["-"], "fno-sycl-allow-func-ptr">; -def fsycl_enable_optimizations: Flag<["-"], "fsycl-enable-optimizations">, - HelpText<"Experimental flag enabling standard optimization in the front-end.">; } // let Flags = [CC1Option] diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 64a03682bb748..c47bf377f0ad5 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4102,6 +4102,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Args.hasFlag(options::OPT_fsycl_esimd, options::OPT_fno_sycl_esimd, false)) CmdArgs.push_back("-fsycl-explicit-simd"); + + if (!Args.hasFlag(options::OPT_fsycl_std_optimizations, + options::OPT_fno_sycl_std_optimizations, true)) + CmdArgs.push_back("-fno-sycl-std-optimizations"); + // Pass the triple of host when doing SYCL auto AuxT = llvm::Triple(llvm::sys::getProcessTriple()); std::string NormalizedTriple = AuxT.normalize(); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index b64c7105a142f..8d4ebebb7da24 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -823,7 +823,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, Opts.DisableLLVMPasses = Args.hasArg(OPT_disable_llvm_passes) || (Args.hasArg(OPT_fsycl_is_device) && Triple.isSPIR() && - !Args.hasArg(OPT_fsycl_enable_optimizations) && !IsSyclESIMD); + Args.hasArg(OPT_fno_sycl_std_optimizations) && !IsSyclESIMD); Opts.DisableLifetimeMarkers = Args.hasArg(OPT_disable_lifetimemarkers); const llvm::Triple::ArchType DebugEntryValueArchs[] = { diff --git a/clang/test/CodeGenSYCL/inline_asm.cpp b/clang/test/CodeGenSYCL/inline_asm.cpp index 60015f8ca6a82..db1956673a15a 100644 --- a/clang/test/CodeGenSYCL/inline_asm.cpp +++ b/clang/test/CodeGenSYCL/inline_asm.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsycl-enable-optimizations -triple spir64-unknown-unknown-sycldevice -emit-llvm -x c++ %s -o - | FileCheck %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm -x c++ %s -o - | FileCheck %s class kernel; diff --git a/clang/test/CodeGenSYCL/remove-ur-inst.cpp b/clang/test/CodeGenSYCL/remove-ur-inst.cpp index 7866aff7f07a3..cf70f7c61f435 100644 --- a/clang/test/CodeGenSYCL/remove-ur-inst.cpp +++ b/clang/test/CodeGenSYCL/remove-ur-inst.cpp @@ -1,5 +1,5 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fno-sycl-std-optimizations -triple spir64-unknown-unknown-sycldevice -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsycl-enable-optimizations -triple spir64-unknown-unknown-sycldevice -emit-llvm %s -o - | FileCheck %s SYCL_EXTERNAL void doesNotReturn() throw() __attribute__((__noreturn__)); diff --git a/clang/test/Driver/sycl-device-optimizations.cpp b/clang/test/Driver/sycl-device-optimizations.cpp new file mode 100644 index 0000000000000..d945bfa09f0d8 --- /dev/null +++ b/clang/test/Driver/sycl-device-optimizations.cpp @@ -0,0 +1,14 @@ +/// Check that optimizations for sycl device are enabled by default: +// RUN: %clang -### -fsycl %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-DEFAULT %s +// RUN: %clang -### -fsycl -fsycl-device-only %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-DEFAULT %s +// CHECK-DEFAULT-NOT: "-fno-sycl-std-optimizations" +// CHECK-DEFAULT-NOT: "-disable-llvm-passes" + +/// Check "-fno-sycl-std-optimizations" is passed to the front-end: +// RUN: %clang -### -fsycl -fno-sycl-std-optimizations %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-NO-SYCL-STD-OPTS %s +// RUN: %clang -### -fsycl -fsycl-device-only -fno-sycl-std-optimizations %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-NO-SYCL-STD-OPTS %s +// CHECK-NO-SYCL-STD-OPTS: "-fno-sycl-std-optimizations" \ No newline at end of file diff --git a/sycl/test/basic_tests/accessor/accessor.cpp b/sycl/test/basic_tests/accessor/accessor.cpp index 413ed9ddd0c13..08b647b15f210 100644 --- a/sycl/test/basic_tests/accessor/accessor.cpp +++ b/sycl/test/basic_tests/accessor/accessor.cpp @@ -1,4 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// TODO: Enable compilation w/o -fno-sycl-std-optimizations option. +// See https://github.com/intel/llvm/issues/2264 for more details. + +// RUN: %clangxx -fsycl -fno-sycl-std-optimizations -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/sycl/test/basic_tests/boolean.cpp b/sycl/test/basic_tests/boolean.cpp index cac65ddaa80bd..d8adee7652666 100644 --- a/sycl/test/basic_tests/boolean.cpp +++ b/sycl/test/basic_tests/boolean.cpp @@ -1,4 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// TODO: Enable compilation w/o -fno-sycl-std-optimizations option. +// See https://github.com/intel/llvm/issues/2264 for more details. + +// RUN: %clangxx -fsycl -fno-sycl-std-optimizations -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/sycl/test/basic_tests/stream/stream.cpp b/sycl/test/basic_tests/stream/stream.cpp index 6b5e6925298d1..7d9f8889c38d0 100644 --- a/sycl/test/basic_tests/stream/stream.cpp +++ b/sycl/test/basic_tests/stream/stream.cpp @@ -1,4 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// TODO: Enable compilation w/o -fno-sycl-std-optimizations option. +// See https://github.com/intel/llvm/issues/2264 for more details. + +// RUN: %clangxx -fsycl -fno-sycl-std-optimizations -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out | FileCheck %s // RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER // RUN: %GPU_RUN_ON_LINUX_PLACEHOLDER %t.out %GPU_CHECK_ON_LINUX_PLACEHOLDER diff --git a/sycl/test/check_device_code/fpga_ihs_float.cpp b/sycl/test/check_device_code/fpga_ihs_float.cpp index f452fc2517c35..d12f48b168ae6 100644 --- a/sycl/test/check_device_code/fpga_ihs_float.cpp +++ b/sycl/test/check_device_code/fpga_ihs_float.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // RUN: %clangxx -I %sycl_include -S -emit-llvm -fsycl -fsycl-device-only %s -o - | FileCheck %s -// RUN: %clangxx -I %sycl_include -S -emit-llvm -fsycl -fsycl-device-only %s -Xclang -fsycl-enable-optimizations -o - | FileCheck %s +// RUN: %clangxx -I %sycl_include -S -emit-llvm -fsycl -fno-sycl-std-optimizations -fsycl-device-only %s -o - | FileCheck %s #include "CL/__spirv/spirv_ops.hpp" diff --git a/sycl/test/fpga_tests/fpga_lsu.cpp b/sycl/test/fpga_tests/fpga_lsu.cpp index 65b35e09dbd69..fda4ac5e3b60a 100644 --- a/sycl/test/fpga_tests/fpga_lsu.cpp +++ b/sycl/test/fpga_tests/fpga_lsu.cpp @@ -1,4 +1,7 @@ -// RUN: %clangxx -fsycl %s -o %t.out +// TODO: Enable compilation w/o -fno-sycl-std-optimizations option. +// See https://github.com/intel/llvm/issues/2264 for more details. + +// RUN: %clangxx -fsycl -fno-sycl-std-optimizations %s -o %t.out // RUNx: %ACC_RUN_PLACEHOLDER %t.out //==----------------- fpga_lsu.cpp - SYCL FPGA LSU test --------------------==// // diff --git a/sycl/test/hier_par/hier_par_wgscope.cpp b/sycl/test/hier_par/hier_par_wgscope.cpp index 6c608b35ab142..f2146241e911b 100644 --- a/sycl/test/hier_par/hier_par_wgscope.cpp +++ b/sycl/test/hier_par/hier_par_wgscope.cpp @@ -1,3 +1,12 @@ +// TODO: Enable compilation w/o -fno-sycl-std-optimizations option. +// See https://github.com/intel/llvm/issues/2264 for more details. + +// RUN: %clangxx -fsycl -fno-sycl-std-optimizations -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + //==- hier_par_wgscope.cpp --- hierarchical parallelism test for WG scope---==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -6,12 +15,6 @@ // //===----------------------------------------------------------------------===// -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -// RUN: env SYCL_DEVICE_TYPE=HOST %t.out -// RUN: %CPU_RUN_PLACEHOLDER %t.out -// RUN: %GPU_RUN_PLACEHOLDER %t.out -// RUN: %ACC_RUN_PLACEHOLDER %t.out - // This test checks correctness of hierarchical kernel execution when there is // code and data in the work group scope. diff --git a/sycl/test/regression/group.cpp b/sycl/test/regression/group.cpp index c53dc3cc64360..0ecd8c569df9f 100644 --- a/sycl/test/regression/group.cpp +++ b/sycl/test/regression/group.cpp @@ -1,4 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// TODO: Enable compilation w/o -fno-sycl-std-optimizations option. +// See https://github.com/intel/llvm/issues/2264 for more details. + +// RUN: %clangxx -fsycl -fno-sycl-std-optimizations -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/sycl/test/spec_const/spec_const_hw.cpp b/sycl/test/spec_const/spec_const_hw.cpp index b8d161cbac204..c2a0bf168a5cc 100644 --- a/sycl/test/spec_const/spec_const_hw.cpp +++ b/sycl/test/spec_const/spec_const_hw.cpp @@ -1,6 +1,9 @@ +// TODO: Enable compilation w/o -fno-sycl-std-optimizations option. +// See https://github.com/intel/llvm/issues/2264 for more details. + // UNSUPPORTED: cuda || level_zero // -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fno-sycl-std-optimizations -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/sycl/test/sub_group/generic-shuffle.cpp b/sycl/test/sub_group/generic-shuffle.cpp index 2f7788172a5ad..18c1cfa08a50b 100644 --- a/sycl/test/sub_group/generic-shuffle.cpp +++ b/sycl/test/sub_group/generic-shuffle.cpp @@ -1,7 +1,10 @@ +// TODO: Enable compilation w/o -fno-sycl-std-optimizations option. +// See https://github.com/intel/llvm/issues/2264 for more details. + // UNSUPPORTED: cuda // CUDA compilation and runtime do not yet support sub-groups. // -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fno-sycl-std-optimizations -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/sycl/test/sub_group/generic_reduce.cpp b/sycl/test/sub_group/generic_reduce.cpp index cfeea7f459b69..1c7df6a9f7619 100644 --- a/sycl/test/sub_group/generic_reduce.cpp +++ b/sycl/test/sub_group/generic_reduce.cpp @@ -1,8 +1,11 @@ +// TODO: Enable compilation w/o -fno-sycl-std-optimizations option. +// See https://github.com/intel/llvm/issues/2264 for more details. + // UNSUPPORTED: cuda // CUDA compilation and runtime do not yet support sub-groups. // -// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -std=c++14 %s -o %t.out -// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple -std=c++14 -D SG_GPU %s -o %t_gpu.out +// RUN: %clangxx -fsycl -fno-sycl-std-optimizations -fsycl-unnamed-lambda -std=c++14 %s -o %t.out +// RUN: %clangxx -fsycl -fno-sycl-std-optimizations -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple -std=c++14 -D SG_GPU %s -o %t_gpu.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t_gpu.out diff --git a/sycl/test/sub_group/load_store.cpp b/sycl/test/sub_group/load_store.cpp index cc84ebcab5bb9..2c5d71c240f4f 100644 --- a/sycl/test/sub_group/load_store.cpp +++ b/sycl/test/sub_group/load_store.cpp @@ -1,9 +1,12 @@ +// TODO: Enable compilation w/o -fno-sycl-std-optimizations option. +// See https://github.com/intel/llvm/issues/2264 for more details. + // UNSUPPORTED: cuda || cpu // CUDA compilation and runtime do not yet support sub-groups. // #2252 Disable until all variants of built-ins are available in OpenCL CPU // runtime for every supported ISA // -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fno-sycl-std-optimizations -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/sycl/test/sub_group/scan_fp16.cpp b/sycl/test/sub_group/scan_fp16.cpp index bfdb09f8f7f31..a837055a3aae7 100644 --- a/sycl/test/sub_group/scan_fp16.cpp +++ b/sycl/test/sub_group/scan_fp16.cpp @@ -1,7 +1,10 @@ +// TODO: Enable compilation w/o -fno-sycl-std-optimizations option. +// See https://github.com/intel/llvm/issues/2264 for more details. + // UNSUPPORTED: cuda // CUDA compilation and runtime do not yet support sub-groups. // -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fno-sycl-std-optimizations -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out //==--------------- scan_fp16.cpp - SYCL sub_group scan test --------*- C++ -*---==// diff --git a/sycl/test/sub_group/shuffle.cpp b/sycl/test/sub_group/shuffle.cpp index 5207716148ef6..6a93dfb83a003 100644 --- a/sycl/test/sub_group/shuffle.cpp +++ b/sycl/test/sub_group/shuffle.cpp @@ -1,7 +1,10 @@ +// TODO: Enable compilation w/o -fno-sycl-std-optimizations option. +// See https://github.com/intel/llvm/issues/2264 for more details. + // UNSUPPORTED: cuda // CUDA compilation and runtime do not yet support sub-groups. // -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fno-sycl-std-optimizations -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/sycl/test/sub_group/shuffle_fp16.cpp b/sycl/test/sub_group/shuffle_fp16.cpp index 62f07fc612de8..c48795702308f 100644 --- a/sycl/test/sub_group/shuffle_fp16.cpp +++ b/sycl/test/sub_group/shuffle_fp16.cpp @@ -1,7 +1,10 @@ +// TODO: Enable compilation w/o -fno-sycl-std-optimizations option. +// See https://github.com/intel/llvm/issues/2264 for more details. + // UNSUPPORTED: cuda // CUDA compilation and runtime do not yet support sub-groups. // -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fno-sycl-std-optimizations -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // //==------------ shuffle_fp16.cpp - SYCL sub_group shuffle test -----*- C++ -*---==// diff --git a/sycl/test/sub_group/shuffle_fp64.cpp b/sycl/test/sub_group/shuffle_fp64.cpp index 3b1ed56907601..c6465f1ec7917 100644 --- a/sycl/test/sub_group/shuffle_fp64.cpp +++ b/sycl/test/sub_group/shuffle_fp64.cpp @@ -1,7 +1,10 @@ +// TODO: Enable compilation w/o -fno-sycl-std-optimizations option. +// See https://github.com/intel/llvm/issues/2264 for more details. + // UNSUPPORTED: cuda // CUDA compilation and runtime do not yet support sub-groups. // -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fno-sycl-std-optimizations -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out