From d5f15037efc9beaf72ac607071472df6d1afe7b6 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Thu, 25 May 2023 15:51:09 -0700 Subject: [PATCH 1/5] [SYCL] Attach sycl_declared_aspects to SYCL_EXTERNAL functions --- clang/lib/CodeGen/CodeGenModule.cpp | 14 ++++- clang/test/CodeGenSYCL/device_has.cpp | 6 +++ .../SYCLLowerIR/SYCLPropagateAspectsUsage.cpp | 8 +++ .../sycl-external-with-optional-features.cpp | 52 +++++++++++++++++++ 4 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 sycl/test-e2e/OptionalKernelFeatures/sycl-external-with-optional-features.cpp diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index d5d1287d169dd..a0c32a300a602 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4546,8 +4546,20 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( } assert(F->getName() == MangledName && "name was uniqued!"); - if (D) + if (D) { SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk); + if (const auto *A = D->getAttr()) { + SmallVector AspectsMD; + for (auto *Aspect : A->aspects()) { + llvm::APSInt AspectInt = Aspect->EvaluateKnownConstInt(getContext()); + auto *T = llvm::Type::getInt32Ty(getLLVMContext()); + auto *C = llvm::Constant::getIntegerValue(T, AspectInt); + AspectsMD.push_back(llvm::ConstantAsMetadata::get(C)); + } + F->setMetadata("sycl_declared_aspects", + llvm::MDNode::get(getLLVMContext(), AspectsMD)); + } + } if (ExtraAttrs.hasFnAttrs()) { llvm::AttrBuilder B(F->getContext(), ExtraAttrs.getFnAttrs()); F->addFnAttrs(B); diff --git a/clang/test/CodeGenSYCL/device_has.cpp b/clang/test/CodeGenSYCL/device_has.cpp index 1eb900dc788bb..b7b54d04f5408 100644 --- a/clang/test/CodeGenSYCL/device_has.cpp +++ b/clang/test/CodeGenSYCL/device_has.cpp @@ -38,6 +38,10 @@ template template [[sycl::device_has(Asp, AspPack...)]] void func8() {} +// CHECK: declare !sycl_declared_aspects ![[ASPECTS6:[0-9]+]] spir_func void @{{.*}}func9{{.*}} +[[sycl::device_has(sycl::aspect::fp16)]] +SYCL_EXTERNAL void func9(); + class KernelFunctor { public: [[sycl::device_has(sycl::aspect::cpu)]] void operator()() const { @@ -50,6 +54,7 @@ class KernelFunctor { func7(); func7(); func8(); + func9(); } }; @@ -74,5 +79,6 @@ void foo() { // CHECK: [[SRCLOC6]] = !{i32 {{[0-9]+}}} // CHECK: [[SRCLOC7]] = !{i32 {{[0-9]+}}} // CHECK: [[ASPECTS5]] = !{i32 1, i32 0} +// CHECK: [[ASPECTS6]] = !{i32 5} // CHECK: [[ASPECTS4]] = !{i32 2} // CHECK: [[SRCLOC8]] = !{i32 {{[0-9]+}}} diff --git a/llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp b/llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp index af7bd065b7c73..79c6282dcff50 100644 --- a/llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp @@ -255,6 +255,14 @@ AspectsSetTy getAspectsUsedByInstruction(const Instruction &I, Result.insert(Aspects.begin(), Aspects.end()); } + if (const auto *CI = dyn_cast(&I)) + if (const auto *F = CI->getCalledFunction()) + if (const auto *MD = F->getMetadata("sycl_declared_aspects")) + for (const auto &Op : MD->operands()) { + Constant *C = cast(Op.get())->getValue(); + Result.insert(cast(C)->getSExtValue()); + } + return Result; } diff --git a/sycl/test-e2e/OptionalKernelFeatures/sycl-external-with-optional-features.cpp b/sycl/test-e2e/OptionalKernelFeatures/sycl-external-with-optional-features.cpp new file mode 100644 index 0000000000000..15ebc62bfa792 --- /dev/null +++ b/sycl/test-e2e/OptionalKernelFeatures/sycl-external-with-optional-features.cpp @@ -0,0 +1,52 @@ +// RUN: %{build} -DSOURCE1 -c -o %t1.o +// RUN: %{build} -DSOURCE2 -c -o %t2.o +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %t1.o %t2.o -o %t.exe +// RUN: %{run} %t.exe + +#ifdef SOURCE1 +#include +#include + +using accT = sycl::accessor; +constexpr int value = 42; + +template +[[sycl::device_has(aspect)]] SYCL_EXTERNAL void func(const accT &acc); + +int main() { + sycl::queue q; + int data = 0; + sycl::buffer buf{&data, {1}}; + if (q.get_device().has(sycl::aspect::cpu)) { + q.submit([&](sycl::handler &cgh) { + accT acc{buf, cgh}; + cgh.single_task([=] { func(acc); }); + }).wait_and_throw(); + } else if (q.get_device().has(sycl::aspect::gpu)) { + q.submit([&](sycl::handler &cgh) { + accT acc{buf, cgh}; + cgh.single_task([=] { func(acc); }); + }).wait_and_throw(); + } + std::cout << "OK" << std::endl; +} + +#endif // SOURCE1 + +#ifdef SOURCE2 +#include + +constexpr int value = 42; + +using accT = sycl::accessor; + +template +[[sycl::device_has(aspect)]] SYCL_EXTERNAL void func(const accT &acc); +template <> SYCL_EXTERNAL void func(const accT &acc) { + acc[0] = value; +} +template <> SYCL_EXTERNAL void func(const accT &acc) { + acc[0] = value; +} + +#endif // SOURCE2 From 88eeead3697c789324df2cb5ea473141384c76ce Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Fri, 26 May 2023 10:13:45 -0700 Subject: [PATCH 2/5] Process declarations --- .../SYCLLowerIR/SYCLPropagateAspectsUsage.cpp | 10 --------- .../propagate-declared-4.ll | 21 +++++++++++++++++++ 2 files changed, 21 insertions(+), 10 deletions(-) create mode 100644 llvm/test/SYCLLowerIR/PropagateAspectsUsage/PropageteDeclaredAspects/propagate-declared-4.ll diff --git a/llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp b/llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp index 79c6282dcff50..e846b9141d63c 100644 --- a/llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp @@ -255,14 +255,6 @@ AspectsSetTy getAspectsUsedByInstruction(const Instruction &I, Result.insert(Aspects.begin(), Aspects.end()); } - if (const auto *CI = dyn_cast(&I)) - if (const auto *F = CI->getCalledFunction()) - if (const auto *MD = F->getMetadata("sycl_declared_aspects")) - for (const auto &Op : MD->operands()) { - Constant *C = cast(Op.get())->getValue(); - Result.insert(cast(C)->getSExtValue()); - } - return Result; } @@ -558,8 +550,6 @@ buildFunctionsToAspectsMap(Module &M, TypeToAspectsMapTy &TypesWithAspects, CallGraphTy CG; for (Function &F : M.functions()) { - if (F.isDeclaration()) - continue; processFunction(F, FunctionToUsedAspects, FunctionToDeclaredAspects, TypesWithAspects, CG); } diff --git a/llvm/test/SYCLLowerIR/PropagateAspectsUsage/PropageteDeclaredAspects/propagate-declared-4.ll b/llvm/test/SYCLLowerIR/PropagateAspectsUsage/PropageteDeclaredAspects/propagate-declared-4.ll new file mode 100644 index 0000000000000..f5e1182004960 --- /dev/null +++ b/llvm/test/SYCLLowerIR/PropagateAspectsUsage/PropageteDeclaredAspects/propagate-declared-4.ll @@ -0,0 +1,21 @@ +; RUN: opt -passes=sycl-propagate-aspects-usage %s -S | FileCheck %s + +source_filename = "main.cpp" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +; CHECK: void @kernel() !sycl_used_aspects ![[#ASPECT:]] +define weak_odr dso_local spir_kernel void @kernel() { +entry: + call spir_func void @_Z3foov() + ret void +} + +; CHECK: !sycl_declared_aspects ![[#ASPECT]] !sycl_used_aspects ![[#ASPECT]] {{.*}} @_Z3foov() +declare !sycl_declared_aspects !2 dso_local spir_func void @_Z3foov() + +!sycl_aspects = !{!0, !1} + +!0 = !{!"gpu", i32 2} +!1 = !{!"fp64", i32 6} +!2 = !{i32 2} From 7c210fd74c6154e75356cf3c0d7e0a7bf8a87ab3 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Fri, 26 May 2023 19:48:43 +0000 Subject: [PATCH 3/5] Simplify test a bit --- .../PropageteDeclaredAspects/propagate-declared-4.ll | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/llvm/test/SYCLLowerIR/PropagateAspectsUsage/PropageteDeclaredAspects/propagate-declared-4.ll b/llvm/test/SYCLLowerIR/PropagateAspectsUsage/PropageteDeclaredAspects/propagate-declared-4.ll index f5e1182004960..72b2b0facbd4f 100644 --- a/llvm/test/SYCLLowerIR/PropagateAspectsUsage/PropageteDeclaredAspects/propagate-declared-4.ll +++ b/llvm/test/SYCLLowerIR/PropagateAspectsUsage/PropageteDeclaredAspects/propagate-declared-4.ll @@ -1,7 +1,5 @@ ; RUN: opt -passes=sycl-propagate-aspects-usage %s -S | FileCheck %s -source_filename = "main.cpp" -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" target triple = "spir64-unknown-unknown" ; CHECK: void @kernel() !sycl_used_aspects ![[#ASPECT:]] @@ -12,10 +10,9 @@ entry: } ; CHECK: !sycl_declared_aspects ![[#ASPECT]] !sycl_used_aspects ![[#ASPECT]] {{.*}} @_Z3foov() -declare !sycl_declared_aspects !2 dso_local spir_func void @_Z3foov() +declare !sycl_declared_aspects !1 dso_local spir_func void @_Z3foov() -!sycl_aspects = !{!0, !1} +!sycl_aspects = !{!0} -!0 = !{!"gpu", i32 2} -!1 = !{!"fp64", i32 6} -!2 = !{i32 2} +!0 = !{!"fp64", i32 6} +!1 = !{i32 2} From 5e34e3d6ad0011c90d624f0b2db0f3b14ac4fb46 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Tue, 30 May 2023 09:32:45 -0700 Subject: [PATCH 4/5] Use check-dag and add test for defined SYCL_EXTERNAL function --- clang/test/CodeGenSYCL/device_has.cpp | 59 +++++++++++++++------------ 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/clang/test/CodeGenSYCL/device_has.cpp b/clang/test/CodeGenSYCL/device_has.cpp index b7b54d04f5408..55eaf08d147d7 100644 --- a/clang/test/CodeGenSYCL/device_has.cpp +++ b/clang/test/CodeGenSYCL/device_has.cpp @@ -6,42 +6,46 @@ using namespace sycl; queue q; -// CHECK: define dso_local spir_kernel void @{{.*}}kernel_name_1{{.*}} !sycl_declared_aspects ![[ASPECTS1:[0-9]+]] !srcloc ![[SRCLOC1:[0-9]+]] +// CHECK-DAG: define dso_local spir_kernel void @{{.*}}kernel_name_1{{.*}} !sycl_declared_aspects ![[ASPECTS1:[0-9]+]] !srcloc ![[SRCLOC1:[0-9]+]] -// CHECK: define dso_local spir_func void @{{.*}}func1{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC2:[0-9]+]] { +// CHECK-DAG: define dso_local spir_func void @{{.*}}func1{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC2:[0-9]+]] { [[sycl::device_has(sycl::aspect::cpu)]] void func1() {} -// CHECK: define dso_local spir_func void @{{.*}}func2{{.*}} !sycl_declared_aspects ![[ASPECTS2:[0-9]+]] !srcloc ![[SRCLOC3:[0-9]+]] { +// CHECK-DAG: define dso_local spir_func void @{{.*}}func2{{.*}} !sycl_declared_aspects ![[ASPECTS2:[0-9]+]] !srcloc ![[SRCLOC3:[0-9]+]] { [[sycl::device_has(sycl::aspect::fp16, sycl::aspect::gpu)]] void func2() {} -// CHECK: define dso_local spir_func void @{{.*}}func3{{.*}} !sycl_declared_aspects ![[EMPTYASPECTS:[0-9]+]] !srcloc ![[SRCLOC4:[0-9]+]] { +// CHECK-DAG: define dso_local spir_func void @{{.*}}func3{{.*}} !sycl_declared_aspects ![[EMPTYASPECTS:[0-9]+]] !srcloc ![[SRCLOC4:[0-9]+]] { [[sycl::device_has()]] void func3() {} -// CHECK: define linkonce_odr spir_func void @{{.*}}func4{{.*}} !sycl_declared_aspects ![[ASPECTS3:[0-9]+]] !srcloc ![[SRCLOC5:[0-9]+]] { +// CHECK-DAG: define linkonce_odr spir_func void @{{.*}}func4{{.*}} !sycl_declared_aspects ![[ASPECTS3:[0-9]+]] !srcloc ![[SRCLOC5:[0-9]+]] { template [[sycl::device_has(Aspect)]] void func4() {} -// CHECK: define dso_local spir_func void @{{.*}}func5{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC6:[0-9]+]] { +// CHECK-DAG: define dso_local spir_func void @{{.*}}func5{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC6:[0-9]+]] { [[sycl::device_has(sycl::aspect::cpu)]] void func5(); void func5() {} constexpr sycl::aspect getAspect() { return sycl::aspect::cpu; } -// CHECK: define dso_local spir_func void @{{.*}}func6{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC7:[0-9]+]] { +// CHECK-DAG: define dso_local spir_func void @{{.*}}func6{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC7:[0-9]+]] { [[sycl::device_has(getAspect())]] void func6() {} -// CHECK: define linkonce_odr spir_func void @{{.*}}func7{{.*}} !sycl_declared_aspects ![[ASPECTS1]] -// CHECK: define linkonce_odr spir_func void @{{.*}}func7{{.*}} !sycl_declared_aspects ![[ASPECTS5:[0-9]+]] +// CHECK-DAG: define linkonce_odr spir_func void @{{.*}}func7{{.*}} !sycl_declared_aspects ![[ASPECTS1]] +// CHECK-DAG: define linkonce_odr spir_func void @{{.*}}func7{{.*}} !sycl_declared_aspects ![[ASPECTS5:[0-9]+]] template [[sycl::device_has(Asp...)]] void func7() {} -// CHECK: define linkonce_odr spir_func void @{{.*}}func8{{.*}} !sycl_declared_aspects ![[ASPECTS5]] +// CHECK-DAG: define linkonce_odr spir_func void @{{.*}}func8{{.*}} !sycl_declared_aspects ![[ASPECTS5]] template [[sycl::device_has(Asp, AspPack...)]] void func8() {} -// CHECK: declare !sycl_declared_aspects ![[ASPECTS6:[0-9]+]] spir_func void @{{.*}}func9{{.*}} +// CHECK-DAG: declare !sycl_declared_aspects ![[ASPECTS6:[0-9]+]] spir_func void @{{.*}}func9{{.*}} [[sycl::device_has(sycl::aspect::fp16)]] SYCL_EXTERNAL void func9(); +// CHECK-DAG: define dso_local spir_func void @{{.*}}func10{{.*}} !sycl_declared_aspects ![[ASPECTS6]] +[[sycl::device_has(sycl::aspect::fp16)]] +SYCL_EXTERNAL void func10() {} + class KernelFunctor { public: [[sycl::device_has(sycl::aspect::cpu)]] void operator()() const { @@ -55,6 +59,7 @@ class KernelFunctor { func7(); func8(); func9(); + func10(); } }; @@ -62,23 +67,23 @@ void foo() { q.submit([&](handler &h) { KernelFunctor f1; h.single_task(f1); - // CHECK: define dso_local spir_kernel void @{{.*}}kernel_name_2{{.*}} !sycl_declared_aspects ![[ASPECTS4:[0-9]+]] !srcloc ![[SRCLOC8:[0-9]+]] + // CHECK-DAG: define dso_local spir_kernel void @{{.*}}kernel_name_2{{.*}} !sycl_declared_aspects ![[ASPECTS4:[0-9]+]] !srcloc ![[SRCLOC8:[0-9]+]] h.single_task([]() [[sycl::device_has(sycl::aspect::gpu)]] {}); }); } -// CHECK: [[ASPECTS1]] = !{i32 1} -// CHECK: [[SRCLOC1]] = !{i32 {{[0-9]+}}} -// CHECK: [[EMPTYASPECTS]] = !{} -// CHECK: [[SRCLOC2]] = !{i32 {{[0-9]+}}} -// CHECK: [[ASPECTS2]] = !{i32 5, i32 2} -// CHECK: [[SRCLOC3]] = !{i32 {{[0-9]+}}} -// CHECK: [[SRCLOC4]] = !{i32 {{[0-9]+}}} -// CHECK: [[ASPECTS3]] = !{i32 0} -// CHECK: [[SRCLOC5]] = !{i32 {{[0-9]+}}} -// CHECK: [[SRCLOC6]] = !{i32 {{[0-9]+}}} -// CHECK: [[SRCLOC7]] = !{i32 {{[0-9]+}}} -// CHECK: [[ASPECTS5]] = !{i32 1, i32 0} -// CHECK: [[ASPECTS6]] = !{i32 5} -// CHECK: [[ASPECTS4]] = !{i32 2} -// CHECK: [[SRCLOC8]] = !{i32 {{[0-9]+}}} +// CHECK-DAG: [[ASPECTS1]] = !{i32 1} +// CHECK-DAG: [[SRCLOC1]] = !{i32 {{[0-9]+}}} +// CHECK-DAG: [[EMPTYASPECTS]] = !{} +// CHECK-DAG: [[SRCLOC2]] = !{i32 {{[0-9]+}}} +// CHECK-DAG: [[ASPECTS2]] = !{i32 5, i32 2} +// CHECK-DAG: [[SRCLOC3]] = !{i32 {{[0-9]+}}} +// CHECK-DAG: [[SRCLOC4]] = !{i32 {{[0-9]+}}} +// CHECK-DAG: [[ASPECTS3]] = !{i32 0} +// CHECK-DAG: [[SRCLOC5]] = !{i32 {{[0-9]+}}} +// CHECK-DAG: [[SRCLOC6]] = !{i32 {{[0-9]+}}} +// CHECK-DAG: [[SRCLOC7]] = !{i32 {{[0-9]+}}} +// CHECK-DAG: [[ASPECTS5]] = !{i32 1, i32 0} +// CHECK-DAG: [[ASPECTS6]] = !{i32 5} +// CHECK-DAG: [[ASPECTS4]] = !{i32 2} +// CHECK-DAG: [[SRCLOC8]] = !{i32 {{[0-9]+}}} From 9d4da40d3d954edffd2165b1b1ece9726d01f19b Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Tue, 30 May 2023 09:33:49 -0700 Subject: [PATCH 5/5] Remove the second attachment of sycl_declared_aspects --- clang/lib/CodeGen/CodeGenFunction.cpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 7159a8d3bd4f8..6e903c24763f9 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -1121,16 +1121,6 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, Fn->setMetadata("loop_fuse", llvm::MDNode::get(getLLVMContext(), AttrMDArgs)); } - if (const auto *A = D->getAttr()) { - SmallVector AspectsMD; - for (auto *Aspect : A->aspects()) { - llvm::APSInt AspectInt = Aspect->EvaluateKnownConstInt(getContext()); - AspectsMD.push_back(llvm::ConstantAsMetadata::get( - Builder.getInt32(AspectInt.getZExtValue()))); - } - Fn->setMetadata("sycl_declared_aspects", - llvm::MDNode::get(getLLVMContext(), AspectsMD)); - } if (const auto *A = D->getAttr()) { SmallVector AspectsMD; for (auto *Aspect : A->aspects()) {