diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 628d5643623da..fdca78487b0ce 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4189,6 +4189,10 @@ def fsycl_remove_unused_external_funcs : Flag<["-"], "fsycl-remove-unused-extern Group, HelpText<"Allow removal of unused `SYCL_EXTERNAL` functions (default)">; def fno_sycl_remove_unused_external_funcs : Flag<["-"], "fno-sycl-remove-unused-external-funcs">, Group, HelpText<"Prevent removal of unused `SYCL_EXTERNAL` functions">; +def fsycl_allow_device_dependencies : Flag<["-"], "fsycl-allow-device-dependencies">, + Group, HelpText<"Allow dependencies between device code images">; +def fno_sycl_allow_device_dependencies : Flag<["-"], "fno-sycl-allow-device-dependencies">, + Group, HelpText<"Do not allow dependencies between device code images (default)">; def fsave_optimization_record : Flag<["-"], "fsave-optimization-record">, Visibility<[ClangOption, FlangOption]>, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index f0a3e3ca02099..5f859caa99def 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10703,6 +10703,14 @@ static void addArgs(ArgStringList &DstArgs, const llvm::opt::ArgList &Alloc, } } +static bool supportDynamicLinking(const llvm::opt::ArgList &TCArgs) { + if (TCArgs.hasFlag(options::OPT_fsycl_allow_device_dependencies, + options::OPT_fno_sycl_allow_device_dependencies, + false)) + return true; + return false; +} + static void getNonTripleBasedSYCLPostLinkOpts(const ToolChain &TC, const JobAction &JA, const llvm::opt::ArgList &TCArgs, @@ -10729,6 +10737,9 @@ static void getNonTripleBasedSYCLPostLinkOpts(const ToolChain &TC, if (TCArgs.hasFlag(options::OPT_fno_sycl_esimd_force_stateless_mem, options::OPT_fsycl_esimd_force_stateless_mem, false)) addArgs(PostLinkArgs, TCArgs, {"-lower-esimd-force-stateless-mem=false"}); + + if (supportDynamicLinking(TCArgs)) + addArgs(PostLinkArgs, TCArgs, {"-support-dynamic-linking"}); } // Add any sycl-post-link options that rely on a specific Triple in addition @@ -10776,6 +10787,8 @@ static void getTripleBasedSYCLPostLinkOpts(const ToolChain &TC, options::OPT_fsycl_remove_unused_external_funcs, false) && !isSYCLNativeCPU(TC)) && + // When supporting dynamic linking, non-kernels in a device image can be called + !supportDynamicLinking(TCArgs) && !Triple.isNVPTX() && !Triple.isAMDGPU()) addArgs(PostLinkArgs, TCArgs, {"-emit-only-kernels-as-entry-points"}); diff --git a/clang/test/Driver/sycl-offload-old-model.cpp b/clang/test/Driver/sycl-offload-old-model.cpp index e17db2e115683..cbb38a1d8859e 100644 --- a/clang/test/Driver/sycl-offload-old-model.cpp +++ b/clang/test/Driver/sycl-offload-old-model.cpp @@ -174,9 +174,12 @@ // RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_gen %s 2>&1 | FileCheck -check-prefix=CHECK_SYCL_POST_LINK_OPT_PASS %s // CHECK_SYCL_POST_LINK_OPT_PASS: sycl-post-link{{.*}}emit-only-kernels-as-entry-points // RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_gen -fno-sycl-remove-unused-external-funcs %s 2>&1 | FileCheck -check-prefix=CHECK_SYCL_POST_LINK_OPT_NO_PASS %s +// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_gen -fsycl-allow-device-dependencies %s 2>&1 | FileCheck -check-prefix=CHECK_SYCL_POST_LINK_OPT_NO_PASS %s // CHECK_SYCL_POST_LINK_OPT_NO_PASS-NOT: sycl-post-link{{.*}}emit-only-kernels-as-entry-points /// Check selective passing of -support-dynamic-linking to sycl-post-link tool +// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_fpga -fsycl-allow-device-dependencies %s 2>&1 | FileCheck -check-prefix=CHECK_SYCL_POST_LINK_SHARED_PASS %s +// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_gen -fsycl-allow-device-dependencies %s 2>&1 | FileCheck -check-prefix=CHECK_SYCL_POST_LINK_SHARED_PASS %s // TODO: Enable when SYCL RT supports dynamic linking // RUNx: %clang -### -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_fpga -shared %s 2>&1 | FileCheck -check-prefix=CHECK_SYCL_POST_LINK_SHARED_PASS %s // RUNx: %clang -### -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_gen -shared %s 2>&1 | FileCheck -check-prefix=CHECK_SYCL_POST_LINK_SHARED_PASS %s diff --git a/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp b/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp index 824f5de06f53e..cea7afb2ffe1a 100644 --- a/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp +++ b/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp @@ -182,11 +182,8 @@ class DependencyGraph { FuncTypeToFuncsMap[F.getFunctionType()].insert(&F); } - // We add every function into the graph except if - // SupportDynamicLinking is true for (const auto &F : M.functions()) { - - if (SupportDynamicLinking && canBeImportedFunction(F)) + if (canBeImportedFunction(F)) continue; // case (1), see comment above the class definition @@ -1312,8 +1309,26 @@ splitSYCLModule(std::unique_ptr M, ModuleSplitterSettings Settings) { } bool canBeImportedFunction(const Function &F) { + // It may be theoretically possible to determine what is importable + // based solely on function F, but the "SYCL/imported symbols" + // property list MUST NOT have any imported symbols that are not supplied + // the exported symbols from another device image. This will lead to a + // runtime crash "No device image found for external symbol". Generating + // precise "SYCL/imported symbols" can be difficult because there exist + // functions that may look like they can be imported, but are supplied outside + // of user device code (e.g. _Z38__spirv_JointMatrixWorkItemLength...) In + // order to be safe and not require perfect name analysis just start with this + // simple check. + if (!SupportDynamicLinking) + return false; + + // SYCL_EXTERNAL property is not recorded for a declaration + // in a header file. Thus SYCL IR that is a declaration + // will be considered as SYCL_EXTERNAL for the purposes of + // this function. if (F.isIntrinsic() || F.getName().starts_with("__") || - !llvm::sycl::utils::isSYCLExternalFunction(&F)) + isSpirvSyclBuiltin(F.getName()) || isESIMDBuiltin(F.getName()) || + (!F.isDeclaration() && !llvm::sycl::utils::isSYCLExternalFunction(&F))) return false; bool ReturnValue = true; diff --git a/llvm/test/tools/sycl-post-link/emit_imported_symbols.ll b/llvm/test/tools/sycl-post-link/emit_imported_symbols.ll index bade08d34147e..e2580c2628c28 100644 --- a/llvm/test/tools/sycl-post-link/emit_imported_symbols.ll +++ b/llvm/test/tools/sycl-post-link/emit_imported_symbols.ll @@ -1,12 +1,12 @@ ; This test checks that the -emit-imported-symbols option generates a list of imported symbols ; Function names were chosen so that no function with a 'inside' in their function name is imported -; +; Note that -emit-imported-symbols will not emit any imported symbols without -support-dynamic-linking. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Test with -split=kernel ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; RUN: sycl-post-link -properties -symbols -emit-imported-symbols -split=kernel -S < %s -o %t_kernel.table +; RUN: sycl-post-link -properties -symbols -support-dynamic-linking -emit-imported-symbols -split=kernel -S < %s -o %t_kernel.table ; RUN: FileCheck %s -input-file=%t_kernel_0.sym --check-prefixes CHECK-KERNEL-SYM-0 ; RUN: FileCheck %s -input-file=%t_kernel_1.sym --check-prefixes CHECK-KERNEL-SYM-1 @@ -23,17 +23,17 @@ ; CHECK-KERNEL-SYM-1: foo ; CHECK-KERNEL-IMPORTED-SYM-1: [SYCL/imported symbols] +; CHECK-KERNEL-IMPORTED-SYM-1-NEXT: middle ; CHECK-KERNEL-IMPORTED-SYM-1-NEXT: childA ; CHECK-KERNEL-IMPORTED-SYM-1-NEXT: childC -; CHECK-KERNEL-IMPORTED-SYM-1-NEXT: childD ; CHECK-KERNEL-IMPORTED-SYM-1-EMPTY: ; CHECK-KERNEL-SYM-2: bar ; CHECK-KERNEL-IMPORTED-SYM-2: [SYCL/imported symbols] +; CHECK-KERNEL-IMPORTED-SYM-2-NEXT: middle ; CHECK-KERNEL-IMPORTED-SYM-2-NEXT: childB ; CHECK-KERNEL-IMPORTED-SYM-2-NEXT: childC -; CHECK-KERNEL-IMPORTED-SYM-2-NEXT: childD ; CHECK-KERNEL-IMPORTED-SYM-2-NEXT: _Z7outsidev ; CHECK-KERNEL-IMPORTED-SYM-2-EMPTY: @@ -41,11 +41,11 @@ ; Test with -split=source ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; RUN: sycl-post-link -properties -symbols -emit-imported-symbols -split=source -S < %s -o %t_source.table +; RUN: sycl-post-link -properties -symbols -support-dynamic-linking -emit-imported-symbols -split=source -S < %s -o %t_source.table ; RUN: FileCheck %s -input-file=%t_source_0.sym --check-prefixes CHECK-SOURCE-SYM-0 ; RUN: FileCheck %s -input-file=%t_source_0.prop --check-prefixes CHECK-SOURCE-IMPORTED-SYM-0 -; RUN: sycl-post-link -properties -symbols -emit-imported-symbols -split=source -S < %s -o %t_source.table -O0 +; RUN: sycl-post-link -properties -symbols -support-dynamic-linking -emit-imported-symbols -split=source -S < %s -o %t_source.table -O0 ; RUN: FileCheck %s -input-file=%t_source_0.sym --check-prefixes CHECK-SOURCE-SYM-0 ; RUN: FileCheck %s -input-file=%t_source_0.prop --check-prefixes CHECK-SOURCE-IMPORTED-SYM-0 @@ -73,7 +73,7 @@ define weak_odr spir_kernel void @foo() #0 { } define weak_odr spir_kernel void @bar() #0 { - ;; Functions that are not SYCL External (i.e. they have no sycl-module-id) cannot be imported + ;; Functions whose name start with '__' cannot be imported call spir_func void @__itt_offload_wi_start_wrapper() call void @childB() diff --git a/llvm/test/tools/sycl-post-link/internalize_functions.ll b/llvm/test/tools/sycl-post-link/internalize_functions.ll index 03179dd8f5ed4..83f0e6af1698b 100644 --- a/llvm/test/tools/sycl-post-link/internalize_functions.ll +++ b/llvm/test/tools/sycl-post-link/internalize_functions.ll @@ -1,5 +1,5 @@ ; Test that when -support-dynamic-linking is used -; non SYCL-EXTERNAL functions are internalized. +; non SYCL_EXTERNAL functions are internalized. ; Variables must not be internalized. ; RUN: sycl-post-link -symbols -support-dynamic-linking -split=kernel -S < %s -o %t.table @@ -8,8 +8,8 @@ ; CHECK-SYM-0: foo0 -; Non SYCL-EXTERNAL Functions are internalized -; foo0 is a SYCL-EXTERNAL function +; Non SYCL_EXTERNAL Functions are internalized +; foo0 is a SYCL_EXTERNAL function ; CHECK-LL-0-DAG: define weak_odr spir_kernel void @foo0() #0 { ; Internalize does not change available_externally ; CHECK-LL-0-DAG: define available_externally spir_func void @internalA() { diff --git a/sycl/test-e2e/DeviceDependencies/Inputs/a.cpp b/sycl/test-e2e/DeviceDependencies/Inputs/a.cpp new file mode 100644 index 0000000000000..eab521745eda5 --- /dev/null +++ b/sycl/test-e2e/DeviceDependencies/Inputs/a.cpp @@ -0,0 +1,13 @@ +#include +#include "a.hpp" +#include "b.hpp" + +SYCL_EXTERNAL int levelA(int val) { +#ifndef __SYCL_DEVICE_ONLY__ + std::cerr << "Host symbol used" << std::endl; + return 0; +#endif + val=levelB(val); + return val|=(0xA<<0); +} + diff --git a/sycl/test-e2e/DeviceDependencies/Inputs/a.hpp b/sycl/test-e2e/DeviceDependencies/Inputs/a.hpp new file mode 100644 index 0000000000000..7578451418c06 --- /dev/null +++ b/sycl/test-e2e/DeviceDependencies/Inputs/a.hpp @@ -0,0 +1,3 @@ +#include + +SYCL_EXTERNAL int levelA(int val); diff --git a/sycl/test-e2e/DeviceDependencies/Inputs/b.cpp b/sycl/test-e2e/DeviceDependencies/Inputs/b.cpp new file mode 100644 index 0000000000000..f4bb9b056eadf --- /dev/null +++ b/sycl/test-e2e/DeviceDependencies/Inputs/b.cpp @@ -0,0 +1,13 @@ +#include +#include "b.hpp" +#include "c.hpp" + +SYCL_EXTERNAL int levelB(int val) { +#ifndef __SYCL_DEVICE_ONLY__ + std::cerr << "Host symbol used" << std::endl; + return 0; +#endif + val=levelC(val); + return val|=(0xB<<4); +} + diff --git a/sycl/test-e2e/DeviceDependencies/Inputs/b.hpp b/sycl/test-e2e/DeviceDependencies/Inputs/b.hpp new file mode 100644 index 0000000000000..a1e689ef1f9be --- /dev/null +++ b/sycl/test-e2e/DeviceDependencies/Inputs/b.hpp @@ -0,0 +1,3 @@ +#include + +SYCL_EXTERNAL int levelB(int val); diff --git a/sycl/test-e2e/DeviceDependencies/Inputs/c.cpp b/sycl/test-e2e/DeviceDependencies/Inputs/c.cpp new file mode 100644 index 0000000000000..35324615afc79 --- /dev/null +++ b/sycl/test-e2e/DeviceDependencies/Inputs/c.cpp @@ -0,0 +1,13 @@ +#include +#include "c.hpp" +#include "d.hpp" + +SYCL_EXTERNAL int levelC(int val) { +#ifndef __SYCL_DEVICE_ONLY__ + std::cerr << "Host symbol used" << std::endl; + return 0; +#endif + val=levelD(val); + return val|=(0xC<<8); +} + diff --git a/sycl/test-e2e/DeviceDependencies/Inputs/c.hpp b/sycl/test-e2e/DeviceDependencies/Inputs/c.hpp new file mode 100644 index 0000000000000..d1451809bca2a --- /dev/null +++ b/sycl/test-e2e/DeviceDependencies/Inputs/c.hpp @@ -0,0 +1,3 @@ +#include + +SYCL_EXTERNAL int levelC(int val); diff --git a/sycl/test-e2e/DeviceDependencies/Inputs/d.cpp b/sycl/test-e2e/DeviceDependencies/Inputs/d.cpp new file mode 100644 index 0000000000000..d93e83d3d8d50 --- /dev/null +++ b/sycl/test-e2e/DeviceDependencies/Inputs/d.cpp @@ -0,0 +1,11 @@ +#include +#include "d.hpp" + +SYCL_EXTERNAL int levelD(int val) { +#ifndef __SYCL_DEVICE_ONLY__ + std::cerr << "Host symbol used" << std::endl; + return 0; +#endif + return val|=(0xD<<12); +} + diff --git a/sycl/test-e2e/DeviceDependencies/Inputs/d.hpp b/sycl/test-e2e/DeviceDependencies/Inputs/d.hpp new file mode 100644 index 0000000000000..e9c05bc874593 --- /dev/null +++ b/sycl/test-e2e/DeviceDependencies/Inputs/d.hpp @@ -0,0 +1,3 @@ +#include + +SYCL_EXTERNAL int levelD(int val); diff --git a/sycl/test-e2e/DeviceDependencies/Inputs/wrapper.cpp b/sycl/test-e2e/DeviceDependencies/Inputs/wrapper.cpp new file mode 100644 index 0000000000000..7f7952e7d2810 --- /dev/null +++ b/sycl/test-e2e/DeviceDependencies/Inputs/wrapper.cpp @@ -0,0 +1,26 @@ +#include +#include "a.hpp" +#include +#define EXPORT +#include "wrapper.hpp" + +using namespace sycl; + +class ExeKernel; + +int wrapper() { + int val = 0; + { + buffer buf(&val, range<1>(1)); + queue q; + q.submit([&](handler &cgh) { + auto acc = buf.get_access(cgh); + cgh.single_task([=]() {acc[0] = levelA(acc[0]);}); + }); + } + + std::cout << "val=" << std::hex << val << "\n"; + if (val!=0xDCBA) + return (1); + return(0); +} diff --git a/sycl/test-e2e/DeviceDependencies/Inputs/wrapper.hpp b/sycl/test-e2e/DeviceDependencies/Inputs/wrapper.hpp new file mode 100644 index 0000000000000..b22d122913140 --- /dev/null +++ b/sycl/test-e2e/DeviceDependencies/Inputs/wrapper.hpp @@ -0,0 +1,8 @@ +#if defined(_WIN32) +#ifdef EXPORT +__declspec(dllexport) +#else +__declspec(dllimport) +#endif +#endif +int wrapper(); diff --git a/sycl/test-e2e/DeviceDependencies/dynamic.cpp b/sycl/test-e2e/DeviceDependencies/dynamic.cpp new file mode 100644 index 0000000000000..f2b730c732b32 --- /dev/null +++ b/sycl/test-e2e/DeviceDependencies/dynamic.cpp @@ -0,0 +1,36 @@ +// Test -fsycl-allow-device-dependencies with dynamic libraries. + +// REQUIRES: linux +// UNSUPPORTED: cuda || hip + +// RUN: %clangxx -fsycl -fPIC -shared -fsycl-allow-device-dependencies %S/Inputs/a.cpp -I %S/Inputs -o %T/libdevice_a.so +// RUN: %clangxx -fsycl -fPIC -shared -fsycl-allow-device-dependencies %S/Inputs/b.cpp -I %S/Inputs -o %T/libdevice_b.so +// RUN: %clangxx -fsycl -fPIC -shared -fsycl-allow-device-dependencies %S/Inputs/c.cpp -I %S/Inputs -o %T/libdevice_c.so +// RUN: %clangxx -fsycl -fPIC -shared -fsycl-allow-device-dependencies %S/Inputs/d.cpp -I %S/Inputs -o %T/libdevice_d.so +// RUN: %{build} -fsycl-allow-device-dependencies -L%T -ldevice_a -ldevice_b -ldevice_c -ldevice_d -I %S/Inputs -o %t.out -Wl,-rpath=%T +// RUN: %{run} %t.out + +#include +#include "a.hpp" +#include + +using namespace sycl; + +class ExeKernel; + +int main() { + int val = 0; + { + buffer buf(&val, range<1>(1)); + queue q; + q.submit([&](handler &cgh) { + auto acc = buf.get_access(cgh); + cgh.single_task([=]() {acc[0] = levelA(acc[0]);}); + }); + } + + std::cout << "val=" << std::hex << val << "\n"; + if (val!=0xDCBA) + return (1); + return(0); +} diff --git a/sycl/test-e2e/DeviceDependencies/objects.cpp b/sycl/test-e2e/DeviceDependencies/objects.cpp new file mode 100644 index 0000000000000..365632b9ed23f --- /dev/null +++ b/sycl/test-e2e/DeviceDependencies/objects.cpp @@ -0,0 +1,35 @@ +// Test -fsycl-allow-device-dependencies with objects. + +// UNSUPPORTED: cuda || hip + +// RUN: %clangxx -fsycl %S/Inputs/a.cpp -I %S/Inputs -c -o %t_a.o +// RUN: %clangxx -fsycl %S/Inputs/b.cpp -I %S/Inputs -c -o %t_b.o +// RUN: %clangxx -fsycl %S/Inputs/c.cpp -I %S/Inputs -c -o %t_c.o +// RUN: %clangxx -fsycl %S/Inputs/d.cpp -I %S/Inputs -c -o %t_d.o +// RUN: %{build} -fsycl-allow-device-dependencies %t_a.o %t_b.o %t_c.o %t_d.o -I %S/Inputs -o %t.out +// RUN: %{run} %t.out + +#include +#include "a.hpp" +#include + +using namespace sycl; + +class ExeKernel; + +int main() { + int val = 0; + { + buffer buf(&val, range<1>(1)); + queue q; + q.submit([&](handler &cgh) { + auto acc = buf.get_access(cgh); + cgh.single_task([=]() {acc[0] = levelA(acc[0]);}); + }); + } + + std::cout << "val=" << std::hex << val << "\n"; + if (val!=0xDCBA) + return (1); + return(0); +} diff --git a/sycl/test-e2e/DeviceDependencies/singleDynamicLibrary.cpp b/sycl/test-e2e/DeviceDependencies/singleDynamicLibrary.cpp new file mode 100644 index 0000000000000..2ec70f8f09378 --- /dev/null +++ b/sycl/test-e2e/DeviceDependencies/singleDynamicLibrary.cpp @@ -0,0 +1,26 @@ +// Test -fsycl-allow-device-dependencies with a single dynamic library on Windows +// and Linux. + +// UNSUPPORTED: cuda || hip + +// RUN: %clangxx -fsycl %fPIC %shared_lib -fsycl-allow-device-dependencies -I %S/Inputs \ +// RUN: %S/Inputs/a.cpp \ +// RUN: %S/Inputs/b.cpp \ +// RUN: %S/Inputs/c.cpp \ +// RUN: %S/Inputs/d.cpp \ +// RUN: %S/Inputs/wrapper.cpp \ +// RUN: -o %if windows %{%T/device_single.dll%} %else %{%T/libdevice_single.so%} + +// RUN: %{build} -I%S/Inputs -o %t.out \ +// RUN: %if windows \ +// RUN: %{%T/device_single.lib%} \ +// RUN: %else \ +// RUN: %{-L%T -ldevice_single -Wl,-rpath=%T%} + +// RUN: %{run} %t.out + +#include "wrapper.hpp" + +int main() { + return(wrapper()); +}