From 01daa5a4753d4e3512d0f3fc30cf8ed8a1a80e48 Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Mon, 1 Mar 2021 00:44:21 +0300 Subject: [PATCH 01/16] [SYCL] Add ITT annotation instructions This patch introduces InstrumentalAnnotationsPass pass to sycl-post-link tool. The pass is being included to sycl-post-link chain only when '-fsycl_device_code_add_instrumentation_calls' option is passed to clang driver. Current version of the pass create instruction to notify profiling tools that: work item has started/finised or resumed (in case if barrier was called) and it annotates the barrier call itself and for annotation of atomic instructions. Signed-off-by: Dmitry Sidorov --- clang/include/clang/Driver/Options.td | 2 + clang/lib/Driver/ToolChains/Clang.cpp | 4 + clang/test/Driver/sycl-instrumentation.cpp | 10 + .../sycl-post-link/itt_annotations_O2.ll | 169 ++++++++++++++ .../tools/sycl-post-link/itt_atomic_load.ll | 134 +++++++++++ .../tools/sycl-post-link/itt_atomic_store.ll | 118 ++++++++++ llvm/test/tools/sycl-post-link/itt_barrier.ll | 152 ++++++++++++ llvm/tools/sycl-post-link/CMakeLists.txt | 1 + .../InstrumentalAnnotations.cpp | 219 ++++++++++++++++++ .../sycl-post-link/InstrumentalAnnotations.h | 26 +++ llvm/tools/sycl-post-link/sycl-post-link.cpp | 20 +- 11 files changed, 854 insertions(+), 1 deletion(-) create mode 100644 clang/test/Driver/sycl-instrumentation.cpp create mode 100644 llvm/test/tools/sycl-post-link/itt_annotations_O2.ll create mode 100644 llvm/test/tools/sycl-post-link/itt_atomic_load.ll create mode 100644 llvm/test/tools/sycl-post-link/itt_atomic_store.ll create mode 100644 llvm/test/tools/sycl-post-link/itt_barrier.ll create mode 100644 llvm/tools/sycl-post-link/InstrumentalAnnotations.cpp create mode 100644 llvm/tools/sycl-post-link/InstrumentalAnnotations.h diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 61bad5a0b4aff..203813fb3096b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2402,6 +2402,8 @@ def fsycl_device_code_lower_esimd : Flag<["-"], "fsycl-device-code-lower-esimd"> Flags<[CC1Option, CoreOption]>, HelpText<"Lower ESIMD-specific constructs">; def fno_sycl_device_code_lower_esimd : Flag<["-"], "fno-sycl-device-code-lower-esimd">, Flags<[CC1Option, CoreOption]>, HelpText<"Do not lower ESIMD-specific constructs">; +def fsycl_device_code_add_instrumentation_calls : Flag<["-"], "fsycl-device-code-add-instrumentation-calls">, + Flags<[CC1Option, CoreOption]>, HelpText<"Add instrumentation intrinsics calls">; defm sycl_id_queries_fit_in_int: OptInFFlag<"sycl-id-queries-fit-in-int", "Assume", "Do not assume", " that SYCL ID queries fit within MAX_INT.", [CC1Option,CoreOption], LangOpts<"SYCLValueFitInMaxInt">>; def fsycl_use_bitcode : Flag<["-"], "fsycl-use-bitcode">, Flags<[CC1Option, CoreOption]>, HelpText<"Use LLVM bitcode instead of SPIR-V in fat objects">; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 6f33a9c52cd2d..3f5b7962774c4 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8284,6 +8284,10 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA, assert(isa(JA) && "Expecting SYCL post link job!"); ArgStringList CmdArgs; + // See if device code instrumentation is requested + if (TCArgs.hasArg(options::OPT_fsycl_device_code_add_instrumentation_calls)) + addArgs(CmdArgs, TCArgs, {"-add-instrumentation-calls"}); + // See if device code splitting is requested if (Arg *A = TCArgs.getLastArg(options::OPT_fsycl_device_code_split_EQ)) { if (StringRef(A->getValue()) == "per_kernel") diff --git a/clang/test/Driver/sycl-instrumentation.cpp b/clang/test/Driver/sycl-instrumentation.cpp new file mode 100644 index 0000000000000..60cbde196a5f2 --- /dev/null +++ b/clang/test/Driver/sycl-instrumentation.cpp @@ -0,0 +1,10 @@ +/// Check that instrumentation is disabled by default: +// RUN: %clang -### %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-DEFAULT %s +// CHECK-DEFAULT-NOT: "-add-instrumentation-calls" + +/// Check "fsycl_device_code_add_instrumentation_calls" is passed to sycl post +/// link tool: +// RUN: %clang -### -fsycl -fsycl-device-code-add-instrumentation-calls %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-ENABLED %s +// CHECK-ENABLED: sycl-post-link{{.*}}"-add-instrumentation-calls" diff --git a/llvm/test/tools/sycl-post-link/itt_annotations_O2.ll b/llvm/test/tools/sycl-post-link/itt_annotations_O2.ll new file mode 100644 index 0000000000000..f7d57a0e771a8 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/itt_annotations_O2.ll @@ -0,0 +1,169 @@ +;; Compiled from https://github.com/intel/llvm-test-suite/blob/intel/SYCL/KernelAndProgram/kernel-and-program.cpp +;; with following commands: +;; clang++ -fsycl -fsycl-device-only kernel-and-program.cpp -o kernel_and_program_optimized.bc +;; llvm-link kernel_and_program_optimized.bc -o kernel_and_program_optimized_link.bc --suppress-warnings + +; RUN: sycl-post-link -add-instrumentation-calls -split=auto --ir-output-only %s -S -o %t.ll +; RUN: FileCheck %s -input-file=%t.ll + +; ModuleID = 'kernel_and_program_optimized.bc' +source_filename = "llvm-link" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown-sycldevice" + +%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } +%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] } + +$_ZTSZ4mainE10SingleTask = comdat any + +$_ZTSZ4mainE11ParallelFor = comdat any + +$_ZTSZ4mainE13ParallelForND = comdat any + +@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInGlobalOffset = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +; CHECK: @__spirv_BuiltInWorkgroupId = external dso_local addrspace(1) constant <3 x i64>, align 32 +; CHECK: @__spirv_BuiltInGlobalLinearId = external dso_local addrspace(1) constant i32, align 32 +; CHECK: @__spirv_BuiltInWorkgroupSize = external dso_local addrspace(1) constant <3 x i64>, align 32 + +; Function Attrs: norecurse willreturn +define weak_odr dso_local spir_kernel void @_ZTSZ4mainE10SingleTask(i32 addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { +entry: +; CHECK: _ZTSZ4mainE10SingleTask +; CHECK-NEXT: entry: +; CHECK-NEXT: %[[CAST_WG_ID_1:[0-9a-zA-Z._]+]] = addrspacecast <3 x i64> addrspace(1)* @__spirv_BuiltInWorkgroupId to i64 addrspace(4)* +; CHECK-NEXT: %[[CAST_WI_ID_1:[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* @__spirv_BuiltInGlobalLinearId to i64 addrspace(4)* +; CHECK-NEXT: %[[LOAD_WI_ID_1:[0-9a-zA-Z._]+]] = load i64, i64 addrspace(4)* %[[CAST_WI_ID_1]], align 8 +; CHECK-NEXT: %[[CAST_WG_SIZE_1:[0-9a-zA-Z._]+]] = addrspacecast <3 x i64> addrspace(1)* @__spirv_BuiltInWorkgroupSize to <3 x i32> addrspace(4)* +; CHECK-NEXT: %[[LOAD_WG_SIZE_1:[0-9a-zA-Z._]+]] = load <3 x i32>, <3 x i32> addrspace(4)* %[[CAST_WG_SIZE_1]], align 16 +; CHECK-NEXT: %[[EXTRACT_WG_SIZE_1:[0-9a-zA-Z._]+]] = extractelement <3 x i32> %[[LOAD_WG_SIZE_1]], i32 0 +; CHECK-NEXT: call void @__itt_offload_wi_start(i64 addrspace(4)* %[[CAST_WG_ID_1]], i64 %[[LOAD_WI_ID_1]], i32 %[[EXTRACT_WG_SIZE_1]]) + %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_3, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast i64* %0 to i64 addrspace(4)* + %2 = load i64, i64 addrspace(4)* %1, align 8 + %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_, i64 %2 + %ptridx.ascast.i9.i = addrspacecast i32 addrspace(1)* %add.ptr.i to i32 addrspace(4)* + %3 = load i32, i32 addrspace(4)* %ptridx.ascast.i9.i, align 4, !tbaa !5 + %add.i = add nsw i32 %3, 1 + store i32 %add.i, i32 addrspace(4)* %ptridx.ascast.i9.i, align 4, !tbaa !5 +; CHECK: call void @__itt_offload_wi_finish(i64 addrspace(4)* %[[CAST_WG_ID_1]], i64 %[[LOAD_WI_ID_1]] +; CHECK-NEXT: ret void + ret void +} + +; Function Attrs: norecurse willreturn +define weak_odr dso_local spir_kernel void @_ZTSZ4mainE11ParallelFor(i32 addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { +entry: +; CHECK: _ZTSZ4mainE11ParallelFor +; CHECK-NEXT: entry: +; CHECK-NEXT: %[[CAST_WG_ID_2:[0-9a-zA-Z._]+]] = addrspacecast <3 x i64> addrspace(1)* @__spirv_BuiltInWorkgroupId to i64 addrspace(4)* +; CHECK-NEXT: %[[CAST_WI_ID_2:[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* @__spirv_BuiltInGlobalLinearId to i64 addrspace(4)* +; CHECK-NEXT: %[[LOAD_WI_ID_2:[0-9a-zA-Z._]+]] = load i64, i64 addrspace(4)* %[[CAST_WI_ID_2]], align 8 +; CHECK-NEXT: %[[CAST_WG_SIZE_2:[0-9a-zA-Z._]+]] = addrspacecast <3 x i64> addrspace(1)* @__spirv_BuiltInWorkgroupSize to <3 x i32> addrspace(4)* +; CHECK-NEXT: %[[LOAD_WG_SIZE_2:[0-9a-zA-Z._]+]] = load <3 x i32>, <3 x i32> addrspace(4)* %[[CAST_WG_SIZE_2]], align 16 +; CHECK-NEXT: %[[EXTRACT_WG_SIZE_2:[0-9a-zA-Z._]+]] = extractelement <3 x i32> %[[LOAD_WG_SIZE_2]], i32 0 +; CHECK-NEXT: call void @__itt_offload_wi_start(i64 addrspace(4)* %[[CAST_WG_ID_2]], i64 %[[LOAD_WI_ID_2]], i32 %[[EXTRACT_WG_SIZE_2]]) + %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_3, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast i64* %0 to i64 addrspace(4)* + %2 = load i64, i64 addrspace(4)* %1, align 8 + %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_, i64 %2 + %3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !9 + %4 = extractelement <3 x i64> %3, i64 0 + %ptridx.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %4 + %ptridx.ascast.i.i = addrspacecast i32 addrspace(1)* %ptridx.i.i to i32 addrspace(4)* + %5 = load i32, i32 addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !5 + %add.i = add nsw i32 %5, 1 + store i32 %add.i, i32 addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !5 +; CHECK: call void @__itt_offload_wi_finish(i64 addrspace(4)* %[[CAST_WG_ID_2]], i64 %[[LOAD_WI_ID_2]] +; CHECK-NEXT: ret void + ret void +} + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @_ZTSZ4mainE13ParallelForND(i32 addrspace(3)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3, i32 addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_8) local_unnamed_addr #1 comdat !kernel_arg_buffer_location !16 { +entry: +; CHECK: _ZTSZ4mainE13ParallelForND +; CHECK-NEXT: entry: +; CHECK-NEXT: %[[CAST_WG_ID_3:[0-9a-zA-Z._]+]] = addrspacecast <3 x i64> addrspace(1)* @__spirv_BuiltInWorkgroupId to i64 addrspace(4)* +; CHECK-NEXT: %[[CAST_WI_ID_3:[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* @__spirv_BuiltInGlobalLinearId to i64 addrspace(4)* +; CHECK-NEXT: %[[LOAD_WI_ID_3:[0-9a-zA-Z._]+]] = load i64, i64 addrspace(4)* %[[CAST_WI_ID_3]], align 8 +; CHECK-NEXT: %[[CAST_WG_SIZE_3:[0-9a-zA-Z._]+]] = addrspacecast <3 x i64> addrspace(1)* @__spirv_BuiltInWorkgroupSize to <3 x i32> addrspace(4)* +; CHECK-NEXT: %[[LOAD_WG_SIZE_3:[0-9a-zA-Z._]+]] = load <3 x i32>, <3 x i32> addrspace(4)* %[[CAST_WG_SIZE_3]], align 16 +; CHECK-NEXT: %[[EXTRACT_WG_SIZE_3:[0-9a-zA-Z._]+]] = extractelement <3 x i32> %[[LOAD_WG_SIZE_3]], i32 0 +; CHECK-NEXT: call void @__itt_offload_wi_start(i64 addrspace(4)* %[[CAST_WG_ID_3]], i64 %[[LOAD_WI_ID_3]], i32 %[[EXTRACT_WG_SIZE_3]]) + %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_8, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast i64* %0 to i64 addrspace(4)* + %2 = load i64, i64 addrspace(4)* %1, align 8 + %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_4, i64 %2 + %3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !17 + %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalOffset to <3 x i64> addrspace(4)*), align 32, !noalias !24 + %5 = extractelement <3 x i64> %3, i64 0 + %6 = extractelement <3 x i64> %4, i64 0 + %sub.i.i.i.i = sub i64 %5, %6 + %7 = trunc i64 %sub.i.i.i.i to i32 + %conv.i = and i32 %7, 1 + %xor.i = xor i32 %conv.i, 1 + %ptridx.i27.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %sub.i.i.i.i + %ptridx.ascast.i28.i = addrspacecast i32 addrspace(1)* %ptridx.i27.i to i32 addrspace(4)* + %8 = load i32, i32 addrspace(4)* %ptridx.ascast.i28.i, align 4, !tbaa !5 + %9 = zext i32 %conv.i to i64 + %ptridx.i23.i = getelementptr inbounds i32, i32 addrspace(3)* %_arg_, i64 %9 + %ptridx.ascast.i24.i = addrspacecast i32 addrspace(3)* %ptridx.i23.i to i32 addrspace(4)* + store i32 %8, i32 addrspace(4)* %ptridx.ascast.i24.i, align 4, !tbaa !5 +; CHECK: call void @__itt_offload_wg_barrier(i8* null) +; CHECK-NEXT: tail call void @_Z22__spirv_ControlBarrierjjj +; CHECK-NEXT: call void @__itt_offload_wi_resume(i64 addrspace(4)* %[[CAST_WG_ID_3]], i64 %[[LOAD_WI_ID_3]]) + tail call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) #3 + %conv6.i = zext i32 %xor.i to i64 + %ptridx.i17.i = getelementptr inbounds i32, i32 addrspace(3)* %_arg_, i64 %conv6.i + %ptridx.ascast.i18.i = addrspacecast i32 addrspace(3)* %ptridx.i17.i to i32 addrspace(4)* + %10 = load i32, i32 addrspace(4)* %ptridx.ascast.i18.i, align 4, !tbaa !5 + store i32 %10, i32 addrspace(4)* %ptridx.ascast.i28.i, align 4, !tbaa !5 +; CHECK: call void @__itt_offload_wi_finish(i64 addrspace(4)* %[[CAST_WG_ID_3]], i64 %[[LOAD_WI_ID_3]] +; CHECK-NEXT: ret void + ret void +} + +; Function Attrs: convergent +declare dso_local void @_Z22__spirv_ControlBarrierjjj(i32, i32, i32) local_unnamed_addr #2 + +attributes #0 = { norecurse willreturn "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/localdisk2/sidorovd/SYCLTest/llvm-test-suite/SYCL/KernelAndProgram/kernel-and-program.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/localdisk2/sidorovd/SYCLTest/llvm-test-suite/SYCL/KernelAndProgram/kernel-and-program.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { convergent } + +!opencl.spir.version = !{!0} +!spirv.Source = !{!1} +!llvm.ident = !{!2} +!llvm.module.flags = !{!3} + +!0 = !{i32 1, i32 2} +!1 = !{i32 4, i32 100000} +!2 = !{!"clang version 13.0.0 (https://github.com/intel/llvm.git 3d2adc7b3ca269708bcabdc4a40352a5cacb4b9d)"} +!3 = !{i32 1, !"wchar_size", i32 4} +!4 = !{i32 -1, i32 -1, i32 -1, i32 -1} +!5 = !{!6, !6, i64 0} +!6 = !{!"int", !7, i64 0} +!7 = !{!"omnipotent char", !8, i64 0} +!8 = !{!"Simple C++ TBAA"} +!9 = !{!10, !12, !14} +!10 = distinct !{!10, !11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} +!11 = distinct !{!11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} +!12 = distinct !{!12, !13, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} +!13 = distinct !{!13, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} +!14 = distinct !{!14, !15, !"_ZN2cl4sycl6detail7Builder10getElementILi1EEEKNS0_2idIXT_EEEPS5_: %agg.result"} +!15 = distinct !{!15, !"_ZN2cl4sycl6detail7Builder10getElementILi1EEEKNS0_2idIXT_EEEPS5_"} +!16 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} +!17 = !{!18, !20, !22} +!18 = distinct !{!18, !19, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} +!19 = distinct !{!19, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} +!20 = distinct !{!20, !21, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} +!21 = distinct !{!21, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} +!22 = distinct !{!22, !23, !"_ZN2cl4sycl6detail7Builder10getElementILi1EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} +!23 = distinct !{!23, !"_ZN2cl4sycl6detail7Builder10getElementILi1EEEKNS0_7nd_itemIXT_EEEPS5_"} +!24 = !{!25, !27, !22} +!25 = distinct !{!25, !26, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} +!26 = distinct !{!26, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} +!27 = distinct !{!27, !28, !"_ZN7__spirvL16initGlobalOffsetILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} +!28 = distinct !{!28, !"_ZN7__spirvL16initGlobalOffsetILi1EN2cl4sycl2idILi1EEEEET0_v"} diff --git a/llvm/test/tools/sycl-post-link/itt_atomic_load.ll b/llvm/test/tools/sycl-post-link/itt_atomic_load.ll new file mode 100644 index 0000000000000..e858130061de2 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/itt_atomic_load.ll @@ -0,0 +1,134 @@ +;; Compiled from https://github.com/intel/llvm-test-suite/blob/intel/SYCL/AtomicRef/load.cpp +;; with following commands: +;; clang++ -fsycl -fsycl-device-only load.cpp -o load.bc +;; llvm-link load.bc -o load_link.bc --suppress-warnings + +; RUN: sycl-post-link -add-instrumentation-calls -split=auto --ir-output-only %s -S -o %t.ll +; RUN: FileCheck %s -input-file=%t.ll + +; ModuleID = 'load.bc' +source_filename = "/localdisk2/sidorovd/SYCLTest/llvm-test-suite/SYCL/AtomicRef/load.cpp" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown-sycldevice" + +%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } +%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] } +%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } + +$_ZTSN2cl4sycl6detail19__pf_kernel_wrapperI11load_kernelIiEEE = comdat any + +$_ZTS11load_kernelIiE = comdat any + +@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @_ZTSN2cl4sycl6detail19__pf_kernel_wrapperI11load_kernelIiEEE(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_, i32 addrspace(1)* %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_4, i32 addrspace(1)* %_arg_5, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_8, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_9) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { +entry: +; CHECK-LABEL: _ZTSN2cl4sycl6detail19__pf_kernel_wrapperI11load_kernelIiEEE( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__itt_spirv_wi_start_wrapper() + %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast i64* %0 to i64 addrspace(4)* + %2 = load i64, i64 addrspace(4)* %1, align 8 + %3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !5 + %4 = extractelement <3 x i64> %3, i64 0 + %cmp.not.i = icmp ult i64 %4, %2 + br i1 %cmp.not.i, label %if.end.i, label %_ZZN2cl4sycl7handler24parallel_for_lambda_implI11load_kernelIiEZZ9load_testIiEvNS0_5queueEmENKUlRS1_E_clES7_EUlNS0_4itemILi1ELb1EEEE_Li1EEEvNS0_5rangeIXT1_EEET0_ENKUlSA_E_clESA_.exit + +if.end.i: ; preds = %entry + %5 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_9, i64 0, i32 0, i32 0, i64 0 + %6 = addrspacecast i64* %5 to i64 addrspace(4)* + %7 = load i64, i64 addrspace(4)* %6, align 8 + %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_5, i64 %7 + %8 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_4, i64 0, i32 0, i32 0, i64 0 + %9 = addrspacecast i64* %8 to i64 addrspace(4)* + %10 = load i64, i64 addrspace(4)* %9, align 8 + %add.ptr.i34 = getelementptr inbounds i32, i32 addrspace(1)* %_arg_1, i64 %10 +; CHECK: call void @__itt_sync_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]], i32 [[ATOMIC_INST_1:[0-9]+]], i32 [[MEM_ORDER_1:[0-9]+]]) +; CHECK-NEXT: {{.*}}__spirv_AtomicLoad{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_1]],{{.*}}, i32 [[MEM_ORDER_1]]) +; CHECK-NEXT: call void @__itt_sync_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_1]], i32 [[ATOMIC_INST_1]], i32 [[MEM_ORDER_1]]) + %call3.i.i.i.i = tail call spir_func i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(i32 addrspace(1)* %add.ptr.i34, i32 1, i32 896) #2 + %ptridx.i.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %4 + %ptridx.ascast.i.i.i = addrspacecast i32 addrspace(1)* %ptridx.i.i.i to i32 addrspace(4)* + store i32 %call3.i.i.i.i, i32 addrspace(4)* %ptridx.ascast.i.i.i, align 4, !tbaa !14 + br label %_ZZN2cl4sycl7handler24parallel_for_lambda_implI11load_kernelIiEZZ9load_testIiEvNS0_5queueEmENKUlRS1_E_clES7_EUlNS0_4itemILi1ELb1EEEE_Li1EEEvNS0_5rangeIXT1_EEET0_ENKUlSA_E_clESA_.exit + +_ZZN2cl4sycl7handler24parallel_for_lambda_implI11load_kernelIiEZZ9load_testIiEvNS0_5queueEmENKUlRS1_E_clES7_EUlNS0_4itemILi1ELb1EEEE_Li1EEEvNS0_5rangeIXT1_EEET0_ENKUlSA_E_clESA_.exit: ; preds = %entry, %if.end.i +; CHECK: call void @__itt_spirv_wi_finish_wrapper() +; CHECK-NEXT: ret void + ret void +} + +; Function Attrs: convergent +declare dso_local spir_func i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(i32 addrspace(1)*, i32, i32) local_unnamed_addr #1 + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @_ZTS11load_kernelIiE(i32 addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, i32 addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !18 { +entry: +; CHECK-LABEL: _ZTS11load_kernelIiE( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__itt_spirv_wi_start_wrapper() + %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast i64* %0 to i64 addrspace(4)* + %2 = load i64, i64 addrspace(4)* %1, align 8 + %add.ptr.i32 = getelementptr inbounds i32, i32 addrspace(1)* %_arg_, i64 %2 + %3 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0 + %4 = addrspacecast i64* %3 to i64 addrspace(4)* + %5 = load i64, i64 addrspace(4)* %4, align 8 + %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_4, i64 %5 + %6 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !19 + %7 = extractelement <3 x i64> %6, i64 0 +; CHECK: call void @__itt_sync_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]], i32 [[ATOMIC_INST_2:[0-9]+]], i32 [[MEM_ORDER_2:[0-9]+]]) +; CHECK-NEXT: {{.*}}__spirv_AtomicLoad{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_2]],{{.*}}, i32 [[MEM_ORDER_2]]) +; CHECK-NEXT: call void @__itt_sync_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_2]], i32 [[ATOMIC_INST_2]], i32 [[MEM_ORDER_2]]) + %call3.i.i.i = tail call spir_func i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(i32 addrspace(1)* %add.ptr.i32, i32 1, i32 896) #2 + %ptridx.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %7 + %ptridx.ascast.i.i = addrspacecast i32 addrspace(1)* %ptridx.i.i to i32 addrspace(4)* + store i32 %call3.i.i.i, i32 addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !14 +; CHECK: call void @__itt_spirv_wi_finish_wrapper() +; CHECK-NEXT: ret void + ret void +} + +; CHECK: declare void @__itt_spirv_wi_start_wrapper() +; CHECK: declare void @__itt_sync_atomic_op_start(i32 addrspace(1)*, i32, i32) +; CHECK: declare void @__itt_sync_atomic_op_finish(i32 addrspace(1)*, i32, i32) +; CHECK: declare void @__itt_spirv_wi_finish_wrapper() + +attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/localdisk2/sidorovd/SYCLTest/llvm-test-suite/SYCL/AtomicRef/load.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { convergent nounwind } + +!llvm.module.flags = !{!0} +!opencl.spir.version = !{!1} +!spirv.Source = !{!2} +!llvm.ident = !{!3} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 2} +!2 = !{i32 4, i32 100000} +!3 = !{!"clang version 13.0.0 (https://github.com/intel/llvm.git 51f22c4b69cf01465bdd7b586343f6e19e9ab045)"} +!4 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} +!5 = !{!6, !8, !10, !12} +!6 = distinct !{!6, !7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} +!7 = distinct !{!7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} +!8 = distinct !{!8, !9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} +!9 = distinct !{!9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} +!10 = distinct !{!10, !11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} +!11 = distinct !{!11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} +!12 = distinct !{!12, !13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} +!13 = distinct !{!13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} +!14 = !{!15, !15, i64 0} +!15 = !{!"int", !16, i64 0} +!16 = !{!"omnipotent char", !17, i64 0} +!17 = !{!"Simple C++ TBAA"} +!18 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} +!19 = !{!20, !22, !24, !26} +!20 = distinct !{!20, !21, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} +!21 = distinct !{!21, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} +!22 = distinct !{!22, !23, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} +!23 = distinct !{!23, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} +!24 = distinct !{!24, !25, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} +!25 = distinct !{!25, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} +!26 = distinct !{!26, !27, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} +!27 = distinct !{!27, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} diff --git a/llvm/test/tools/sycl-post-link/itt_atomic_store.ll b/llvm/test/tools/sycl-post-link/itt_atomic_store.ll new file mode 100644 index 0000000000000..f8eb427a483fc --- /dev/null +++ b/llvm/test/tools/sycl-post-link/itt_atomic_store.ll @@ -0,0 +1,118 @@ +;; Compiled from https://github.com/intel/llvm-test-suite/blob/intel/SYCL/AtomicRef/load.cpp +;; with following commands: +;; clang++ -fsycl -fsycl-device-only load.cpp -o load.bc +;; llvm-link load.bc -o load_link.bc --suppress-warnings + +; RUN: sycl-post-link -add-instrumentation-calls -split=auto --ir-output-only %s -S -o %t.ll +; RUN: FileCheck %s -input-file=%t.ll + +; ModuleID = 'store.bc' +source_filename = "/localdisk2/sidorovd/SYCLTest/llvm-test-suite/SYCL/AtomicRef/store.cpp" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown-sycldevice" + +%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } +%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] } +%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } + +$_ZTSN2cl4sycl6detail19__pf_kernel_wrapperI12store_kernelIiEEE = comdat any + +$_ZTS12store_kernelIiE = comdat any + +@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @_ZTSN2cl4sycl6detail19__pf_kernel_wrapperI12store_kernelIiEEE(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_, i32 addrspace(1)* %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_4) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { +entry: +; CHECK-LABEL: _ZTSN2cl4sycl6detail19__pf_kernel_wrapperI12store_kernelIiEEE( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__itt_spirv_wi_start_wrapper() + %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast i64* %0 to i64 addrspace(4)* + %2 = load i64, i64 addrspace(4)* %1, align 8 + %3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !5 + %4 = extractelement <3 x i64> %3, i64 0 + %cmp.not.i = icmp ult i64 %4, %2 + br i1 %cmp.not.i, label %if.end.i, label %_ZZN2cl4sycl7handler24parallel_for_lambda_implI12store_kernelIiEZZ10store_testIiEvNS0_5queueEmENKUlRS1_E_clES7_EUlNS0_4itemILi1ELb1EEEE_Li1EEEvNS0_5rangeIXT1_EEET0_ENKUlSA_E_clESA_.exit + +if.end.i: ; preds = %entry + %5 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_4, i64 0, i32 0, i32 0, i64 0 + %6 = addrspacecast i64* %5 to i64 addrspace(4)* + %7 = load i64, i64 addrspace(4)* %6, align 8 + %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_1, i64 %7 + %conv.i.i = trunc i64 %4 to i32 +; CHECK: call void @__itt_sync_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]], i32 [[ATOMIC_INST_1:[0-9]+]], i32 [[MEM_ORDER_1:[0-9]+]]) +; CHECK-NEXT: {{.*}}__spirv_AtomicStore{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_1]],{{.*}}, i32 [[MEM_ORDER_1]] +; CHECK-NEXT: call void @__itt_sync_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_1]], i32 [[ATOMIC_INST_1]], i32 [[MEM_ORDER_1]]) + tail call spir_func void @_Z19__spirv_AtomicStorePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEi(i32 addrspace(1)* %add.ptr.i, i32 1, i32 896, i32 %conv.i.i) #2 + br label %_ZZN2cl4sycl7handler24parallel_for_lambda_implI12store_kernelIiEZZ10store_testIiEvNS0_5queueEmENKUlRS1_E_clES7_EUlNS0_4itemILi1ELb1EEEE_Li1EEEvNS0_5rangeIXT1_EEET0_ENKUlSA_E_clESA_.exit + +_ZZN2cl4sycl7handler24parallel_for_lambda_implI12store_kernelIiEZZ10store_testIiEvNS0_5queueEmENKUlRS1_E_clES7_EUlNS0_4itemILi1ELb1EEEE_Li1EEEvNS0_5rangeIXT1_EEET0_ENKUlSA_E_clESA_.exit: ; preds = %entry, %if.end.i +; CHECK: call void @__itt_spirv_wi_finish_wrapper() +; CHECK-NEXT: ret void + ret void +} + +; Function Attrs: convergent +declare dso_local spir_func void @_Z19__spirv_AtomicStorePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEi(i32 addrspace(1)*, i32, i32, i32) local_unnamed_addr #1 + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @_ZTS12store_kernelIiE(i32 addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !14 { +entry: +; CHECK-LABEL: _ZTS12store_kernelIiE( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__itt_spirv_wi_start_wrapper() + %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast i64* %0 to i64 addrspace(4)* + %2 = load i64, i64 addrspace(4)* %1, align 8 + %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_, i64 %2 + %3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !15 + %4 = extractelement <3 x i64> %3, i64 0 + %conv.i = trunc i64 %4 to i32 +; CHECK: call void @__itt_sync_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]], i32 [[ATOMIC_INST_2:[0-9]+]], i32 [[MEM_ORDER_2:[0-9]+]]) +; CHECK-NEXT: {{.*}}__spirv_AtomicStore{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_2]],{{.*}}, i32 [[MEM_ORDER_2]] +; CHECK-NEXT: call void @__itt_sync_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_2]], i32 [[ATOMIC_INST_2]], i32 [[MEM_ORDER_2]]) + tail call spir_func void @_Z19__spirv_AtomicStorePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEi(i32 addrspace(1)* %add.ptr.i, i32 1, i32 896, i32 %conv.i) #2 +; CHECK: call void @__itt_spirv_wi_finish_wrapper() +; CHECK-NEXT: ret void + ret void +} + +; CHECK: declare void @__itt_spirv_wi_start_wrapper() +; CHECK: declare void @__itt_sync_atomic_op_start(i32 addrspace(1)*, i32, i32) +; CHECK: declare void @__itt_sync_atomic_op_finish(i32 addrspace(1)*, i32, i32) +; CHECK: declare void @__itt_spirv_wi_finish_wrapper() + +attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/localdisk2/sidorovd/SYCLTest/llvm-test-suite/SYCL/AtomicRef/store.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { convergent nounwind } + +!llvm.module.flags = !{!0} +!opencl.spir.version = !{!1} +!spirv.Source = !{!2} +!llvm.ident = !{!3} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 2} +!2 = !{i32 4, i32 100000} +!3 = !{!"clang version 13.0.0 (https://github.com/intel/llvm.git 51f22c4b69cf01465bdd7b586343f6e19e9ab045)"} +!4 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} +!5 = !{!6, !8, !10, !12} +!6 = distinct !{!6, !7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} +!7 = distinct !{!7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} +!8 = distinct !{!8, !9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} +!9 = distinct !{!9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} +!10 = distinct !{!10, !11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} +!11 = distinct !{!11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} +!12 = distinct !{!12, !13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} +!13 = distinct !{!13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} +!14 = !{i32 -1, i32 -1, i32 -1, i32 -1} +!15 = !{!16, !18, !20, !22} +!16 = distinct !{!16, !17, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} +!17 = distinct !{!17, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} +!18 = distinct !{!18, !19, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} +!19 = distinct !{!19, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} +!20 = distinct !{!20, !21, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} +!21 = distinct !{!21, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} +!22 = distinct !{!22, !23, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} +!23 = distinct !{!23, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} diff --git a/llvm/test/tools/sycl-post-link/itt_barrier.ll b/llvm/test/tools/sycl-post-link/itt_barrier.ll new file mode 100644 index 0000000000000..475dd2bf977a4 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/itt_barrier.ll @@ -0,0 +1,152 @@ +;; Compiled from https://github.com/intel/llvm-test-suite/blob/intel/SYCL/KernelAndProgram/kernel-and-program.cpp +;; with following commands: +;; clang++ -fsycl -fsycl-device-only kernel-and-program.cpp -o kernel_and_program_optimized.bc +;; llvm-link kernel_and_program_optimized.bc -o kernel_and_program_optimized_link.bc --suppress-warnings + +; RUN: sycl-post-link -add-instrumentation-calls -split=auto --ir-output-only %s -S -o %t.ll +; RUN: FileCheck %s -input-file=%t.ll + +; ModuleID = 'kernel_and_program_optimized.bc' +source_filename = "llvm-link" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown-sycldevice" + +%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } +%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] } + +$_ZTSZ4mainE10SingleTask = comdat any + +$_ZTSZ4mainE11ParallelFor = comdat any + +$_ZTSZ4mainE13ParallelForND = comdat any + +@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInGlobalOffset = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +; Function Attrs: norecurse willreturn +define weak_odr dso_local spir_kernel void @_ZTSZ4mainE10SingleTask(i32 addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { +entry: +; CHECK-LABEL: _ZTSZ4mainE10SingleTask( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__itt_spirv_wi_start_wrapper() + %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_3, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast i64* %0 to i64 addrspace(4)* + %2 = load i64, i64 addrspace(4)* %1, align 8 + %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_, i64 %2 + %ptridx.ascast.i9.i = addrspacecast i32 addrspace(1)* %add.ptr.i to i32 addrspace(4)* + %3 = load i32, i32 addrspace(4)* %ptridx.ascast.i9.i, align 4, !tbaa !5 + %add.i = add nsw i32 %3, 1 + store i32 %add.i, i32 addrspace(4)* %ptridx.ascast.i9.i, align 4, !tbaa !5 +; CHECK: call void @__itt_spirv_wi_finish_wrapper() +; CHECK-NEXT: ret void + ret void +} + +; Function Attrs: norecurse willreturn +define weak_odr dso_local spir_kernel void @_ZTSZ4mainE11ParallelFor(i32 addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { +entry: +; CHECK-LABEL: _ZTSZ4mainE11ParallelFor( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__itt_spirv_wi_start_wrapper() + %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_3, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast i64* %0 to i64 addrspace(4)* + %2 = load i64, i64 addrspace(4)* %1, align 8 + %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_, i64 %2 + %3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !9 + %4 = extractelement <3 x i64> %3, i64 0 + %ptridx.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %4 + %ptridx.ascast.i.i = addrspacecast i32 addrspace(1)* %ptridx.i.i to i32 addrspace(4)* + %5 = load i32, i32 addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !5 + %add.i = add nsw i32 %5, 1 + store i32 %add.i, i32 addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !5 +; CHECK: call void @__itt_spirv_wi_finish_wrapper() +; CHECK-NEXT: ret void + ret void +} + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @_ZTSZ4mainE13ParallelForND(i32 addrspace(3)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3, i32 addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_8) local_unnamed_addr #1 comdat !kernel_arg_buffer_location !16 { +entry: +; CHECK-LABEL: _ZTSZ4mainE13ParallelForND( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__itt_spirv_wi_start_wrapper() + %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_8, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast i64* %0 to i64 addrspace(4)* + %2 = load i64, i64 addrspace(4)* %1, align 8 + %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_4, i64 %2 + %3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !17 + %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalOffset to <3 x i64> addrspace(4)*), align 32, !noalias !24 + %5 = extractelement <3 x i64> %3, i64 0 + %6 = extractelement <3 x i64> %4, i64 0 + %sub.i.i.i.i = sub i64 %5, %6 + %7 = trunc i64 %sub.i.i.i.i to i32 + %conv.i = and i32 %7, 1 + %xor.i = xor i32 %conv.i, 1 + %ptridx.i27.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %sub.i.i.i.i + %ptridx.ascast.i28.i = addrspacecast i32 addrspace(1)* %ptridx.i27.i to i32 addrspace(4)* + %8 = load i32, i32 addrspace(4)* %ptridx.ascast.i28.i, align 4, !tbaa !5 + %9 = zext i32 %conv.i to i64 + %ptridx.i23.i = getelementptr inbounds i32, i32 addrspace(3)* %_arg_, i64 %9 + %ptridx.ascast.i24.i = addrspacecast i32 addrspace(3)* %ptridx.i23.i to i32 addrspace(4)* + store i32 %8, i32 addrspace(4)* %ptridx.ascast.i24.i, align 4, !tbaa !5 +; CHECK: call void @__itt_spirv_wg_barrier_wrapper() +; CHECK-NEXT: tail call void @_Z22__spirv_ControlBarrierjjj +; CHECK-NEXT: call void @__itt_spirv_wi_resume_wrapper() + tail call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) #3 + %conv6.i = zext i32 %xor.i to i64 + %ptridx.i17.i = getelementptr inbounds i32, i32 addrspace(3)* %_arg_, i64 %conv6.i + %ptridx.ascast.i18.i = addrspacecast i32 addrspace(3)* %ptridx.i17.i to i32 addrspace(4)* + %10 = load i32, i32 addrspace(4)* %ptridx.ascast.i18.i, align 4, !tbaa !5 + store i32 %10, i32 addrspace(4)* %ptridx.ascast.i28.i, align 4, !tbaa !5 +; CHECK: call void @__itt_spirv_wi_finish_wrapper() +; CHECK-NEXT: ret void + ret void +} + +; Function Attrs: convergent +declare dso_local void @_Z22__spirv_ControlBarrierjjj(i32, i32, i32) local_unnamed_addr #2 + +; CHECK: declare void @__itt_spirv_wi_start_wrapper() +; CHECK: declare void @__itt_spirv_wi_finish_wrapper() +; CHECK: declare void @__itt_spirv_wg_barrier_wrapper() +; CHECK: declare void @__itt_spirv_wi_resume_wrapper() + +attributes #0 = { norecurse willreturn "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/localdisk2/sidorovd/SYCLTest/llvm-test-suite/SYCL/KernelAndProgram/kernel-and-program.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/localdisk2/sidorovd/SYCLTest/llvm-test-suite/SYCL/KernelAndProgram/kernel-and-program.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { convergent } + +!opencl.spir.version = !{!0} +!spirv.Source = !{!1} +!llvm.ident = !{!2} +!llvm.module.flags = !{!3} + +!0 = !{i32 1, i32 2} +!1 = !{i32 4, i32 100000} +!2 = !{!"clang version 13.0.0 (https://github.com/intel/llvm.git 3d2adc7b3ca269708bcabdc4a40352a5cacb4b9d)"} +!3 = !{i32 1, !"wchar_size", i32 4} +!4 = !{i32 -1, i32 -1, i32 -1, i32 -1} +!5 = !{!6, !6, i64 0} +!6 = !{!"int", !7, i64 0} +!7 = !{!"omnipotent char", !8, i64 0} +!8 = !{!"Simple C++ TBAA"} +!9 = !{!10, !12, !14} +!10 = distinct !{!10, !11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} +!11 = distinct !{!11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} +!12 = distinct !{!12, !13, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} +!13 = distinct !{!13, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} +!14 = distinct !{!14, !15, !"_ZN2cl4sycl6detail7Builder10getElementILi1EEEKNS0_2idIXT_EEEPS5_: %agg.result"} +!15 = distinct !{!15, !"_ZN2cl4sycl6detail7Builder10getElementILi1EEEKNS0_2idIXT_EEEPS5_"} +!16 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} +!17 = !{!18, !20, !22} +!18 = distinct !{!18, !19, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} +!19 = distinct !{!19, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} +!20 = distinct !{!20, !21, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} +!21 = distinct !{!21, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} +!22 = distinct !{!22, !23, !"_ZN2cl4sycl6detail7Builder10getElementILi1EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} +!23 = distinct !{!23, !"_ZN2cl4sycl6detail7Builder10getElementILi1EEEKNS0_7nd_itemIXT_EEEPS5_"} +!24 = !{!25, !27, !22} +!25 = distinct !{!25, !26, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} +!26 = distinct !{!26, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} +!27 = distinct !{!27, !28, !"_ZN7__spirvL16initGlobalOffsetILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} +!28 = distinct !{!28, !"_ZN7__spirvL16initGlobalOffsetILi1EN2cl4sycl2idILi1EEEEET0_v"} diff --git a/llvm/tools/sycl-post-link/CMakeLists.txt b/llvm/tools/sycl-post-link/CMakeLists.txt index 98ef96a6b5aff..31a868847c534 100644 --- a/llvm/tools/sycl-post-link/CMakeLists.txt +++ b/llvm/tools/sycl-post-link/CMakeLists.txt @@ -19,6 +19,7 @@ add_llvm_tool(sycl-post-link sycl-post-link.cpp SPIRKernelParamOptInfo.cpp SpecConstants.cpp + InstrumentalAnnotations.cpp ADDITIONAL_HEADER_DIRS ${LLVMGenXIntrinsics_SOURCE_DIR}/GenXIntrinsics/include diff --git a/llvm/tools/sycl-post-link/InstrumentalAnnotations.cpp b/llvm/tools/sycl-post-link/InstrumentalAnnotations.cpp new file mode 100644 index 0000000000000..4af136b02a396 --- /dev/null +++ b/llvm/tools/sycl-post-link/InstrumentalAnnotations.cpp @@ -0,0 +1,219 @@ +//===-- InstrumentalAnnotations.cpp - SYCL Instrumental Annotations Pass --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A transformation pass which adds instrumental calls to annotate SYCL +// synchronization instrucations. This can be used for kernel profiling. +//===----------------------------------------------------------------------===// + +#include "InstrumentalAnnotations.h" + +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Function.h" + +/** Following instrumentations will be linked from libdevice: + * * * * * * * * * * * + * Notify tools work-item execution has started + * + * /param[in] group_id Pointer to array of 3 integers that uniquely identify + * group withing a kernel + * /param[in] wi_id Globally unique work-item id + * /param[in] wg_size Number of work-items in given group + * + * void __itt_offload_wi_start(size_t* group_id, size_t wi_id, + * uint32_t wg_size); + * * * * * * * * * * * + * Notify tools work-item execution resumed (e.g. after barrier) + * + * /param[in] group_id Pointer to array of 3 integers that uniquely identify + * group withing a kernel. + * /param[in] wi_id Globally unique work-item id. + * + * void __itt_offload_wi_resume(size_t* group_id, size_t wi_id); + * * * * * * * * * * * + * Notify tools work-item execution has finished + * + * /param[in] group_id Pointer to array of 3 integers that uniquely identify + * group withing a kernel. + * /param[in] wi_id Globally unique work-item id. + * + * void __itt_offload_wi_finish(size_t* group_id, size_t wi_id); + * * * * * * * * * * * + * Notify tools work-item has reached a barier + * + * /param[in] barrier_id Unique barrier id. If multi-barriers are not supported. + * Pass 0 for barrier_id. Notify tools work-item has reached a barier. + * + * void __itt_offload_wg_barrier(uintptr_t barrier_id); + * * * * * * * * * * * + * Purpose of this pass is to add wrapper calls to these instructions. + */ + +namespace { +constexpr char SPIRV_CONTROL_BARRIER[] = "__spirv_ControlBarrier"; +constexpr char SPIRV_GROUP_ALL[] = "__spirv_GroupAll"; +constexpr char SPIRV_GROUP_ANY[] = "__spirv_GroupAny"; +constexpr char SPIRV_GROUP_BROADCAST[] = "__spirv_GroupBroadcast"; +constexpr char SPIRV_GROUP_IADD[] = "__spirv_GroupIAdd"; +constexpr char SPIRV_GROUP_FADD[] = "__spirv_GroupFAdd"; +constexpr char SPIRV_GROUP_FMIN[] = "__spirv_GroupFMin"; +constexpr char SPIRV_GROUP_UMIN[] = "__spirv_GroupUMin"; +constexpr char SPIRV_GROUP_SMIN[] = "__spirv_GroupSMin"; +constexpr char SPIRV_GROUP_FMAX[] = "__spirv_GroupFMax"; +constexpr char SPIRV_GROUP_UMAX[] = "__spirv_GroupUMax"; +constexpr char SPIRV_GROUP_SMAX[] = "__spirv_GroupSMax"; +constexpr char SPIRV_ATOMIC_INST[] = "__spirv_Atomic"; +constexpr char SPIRV_ATOMIC_LOAD[] = "__spirv_AtomicLoad"; +constexpr char SPIRV_ATOMIC_STORE[] = "__spirv_AtomicSTORE"; +constexpr char ITT_ANNOTATION_WI_START[] = "__itt_spirv_wi_start_wrapper"; +constexpr char ITT_ANNOTATION_WI_RESUME[] = "__itt_spirv_wi_resume_wrapper"; +constexpr char ITT_ANNOTATION_WI_FINISH[] = "__itt_spirv_wi_finish_wrapper"; +constexpr char ITT_ANNOTATION_WG_BARRIER[] = "__itt_spirv_wg_barrier_wrapper"; +constexpr char ITT_ANNOTATION_ATOMIC_START[] = "__itt_sync_atomic_op_start"; +constexpr char ITT_ANNOTATION_ATOMIC_FINISH[] = "__itt_sync_atomic_op_finish"; +} // namespace + +namespace llvm { + +// TODO: move to a separate header +// Check for calling convention of a function. If it's spir_kernel - consider +// the function to be a SYCL kernel. +bool isSyclKernel(Function &F) { + return F.getCallingConv() == CallingConv::SPIR_KERNEL; +} + +// TODO: move to a separate header +Instruction *emitCall(Module &M, Type *RetTy, StringRef FunctionName, + ArrayRef Args, Instruction *InsertBefore) { + SmallVector ArgTys(Args.size()); + for (unsigned I = 0; I < Args.size(); ++I) + ArgTys[I] = Args[I]->getType(); + auto *FT = FunctionType::get(RetTy, ArgTys, false /*isVarArg*/); + FunctionCallee FC = M.getOrInsertFunction(FunctionName, FT); + assert(FC.getCallee() && "Instruction creation failed"); + auto *Call = CallInst::Create(FT, FC.getCallee(), Args, "", InsertBefore); + return Call; +} + +// Insert instrumental annotation calls, that has no arguments (for example +// work items start/finish/resume and barrier annotation. +bool insertSimpleInstrumentationCall(Module &M, StringRef Name, + Instruction *Position) { + Type *VoidTy = Type::getVoidTy(M.getContext()); + ArrayRef Args; + Instruction *InstrumentationCall = + emitCall(M, VoidTy, Name, Args, Position); + assert(InstrumentationCall && "Instrumentation call creation failed"); + return true; +} + +// Insert instrumental annotation calls for SPIR-V atomics. +bool insertAtomicInstrumentationCall(Module &M, StringRef Name, + CallInst *AtomicFun, + Instruction *Position) { + LLVMContext &Ctx = M.getContext(); + Type *VoidTy = Type::getVoidTy(Ctx); + Type *Int32Ty = Type::getInt32Ty(Ctx); + // __spirv_Atomic... instructions have following arguments: + // Pointer, Memory Scope, Memory Semantics and others. To construct Atomic + // annotation instructions we need Pointer and Memory Semantic arguments + // taken from the original Atomic instruction. + Value *Ptr = dyn_cast(AtomicFun->getArgOperand(0)); + StringRef AtomicName = AtomicFun->getName(); + Value *AtomicOp; + // Second parameter of Atomic Start/Finish annotation is an Op code of + // the instruction, encoded into a value of enum, defined like this on user's/ + // profiler's side: + // enum __itt_atomic_mem_op_t + // { + // __itt_mem_load = 0, + // __itt_mem_store = 1, + // __itt_mem_update = 2 + // } + if (AtomicName.contains(SPIRV_ATOMIC_LOAD)) + AtomicOp = ConstantInt::get(Int32Ty, 0); + else if (AtomicName.contains(SPIRV_ATOMIC_STORE)) + AtomicOp = ConstantInt::get(Int32Ty, 1); + else + AtomicOp = ConstantInt::get(Int32Ty, 2); + // TODO: Third parameter of Atomic Start/Finish annotation is an ordering + // semanticof the instruction, encoded into a value of enum, defined like this + // on user's/profiler's side: + // enum __itt_atomic_mem_order_t + // { + // __itt_mem_order_relaxed = 0, + // __itt_mem_order_acquire = 1, + // __itt_mem_order_release = 2 + // } + // which isn't 1:1 mapped on SPIR-V memory ordering mask, need to align it. + ConstantInt *MemSemantic = dyn_cast(AtomicFun->getArgOperand(2)); + ArrayRef Args = {Ptr, AtomicOp, MemSemantic}; + Instruction *InstrumentationCall = + emitCall(M, VoidTy, Name, Args, Position); + assert(InstrumentationCall && "Instrumentation call creation failed"); + return true; +} + +PreservedAnalyses InstrumentalAnnotationsPass::run(Module &M, + ModuleAnalysisManager &MAM) { + bool IRModified = false; + std::vector SPIRVCrossWGInstuctions = { + SPIRV_CONTROL_BARRIER, SPIRV_GROUP_ALL, SPIRV_GROUP_ANY, + SPIRV_GROUP_BROADCAST, SPIRV_GROUP_IADD, SPIRV_GROUP_FADD, + SPIRV_GROUP_FMIN, SPIRV_GROUP_UMIN, SPIRV_GROUP_SMIN, SPIRV_GROUP_FMAX, + SPIRV_GROUP_UMAX, SPIRV_GROUP_SMAX }; + + for (Function &F : M) { + // Annotate only SYCL kernels + if (F.isDeclaration() || !isSyclKernel(F)) + continue; + + // At the beggining of a kernel insert work item start annotation + // instruction. + IRModified |= insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WI_START, + &*inst_begin(F)); + + for (BasicBlock &BB : F) { + // Insert Finish instruction before return instruction + if (ReturnInst *RI = dyn_cast(BB.getTerminator())) + IRModified |= + insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WI_FINISH, RI); + for (Instruction &I : BB) { + if (CallInst *CI = dyn_cast(&I)) { + if (Function *Callee = CI->getCalledFunction()) { + StringRef CalleeName = Callee->getName(); + // Annotate barrier and other cross WG calls + if (std::any_of(SPIRVCrossWGInstuctions.begin(), + SPIRVCrossWGInstuctions.end(), + [&CalleeName](StringRef Name) { + return CalleeName.contains(Name); + })) { + Instruction *InstAfterBarrier = CI->getNextNode(); + IRModified |= insertSimpleInstrumentationCall( + M, ITT_ANNOTATION_WG_BARRIER, CI); + IRModified |= insertSimpleInstrumentationCall( + M, ITT_ANNOTATION_WI_RESUME, InstAfterBarrier); + } else if (CalleeName.contains(SPIRV_ATOMIC_INST)) { + Instruction *InstAfterAtomic = CI->getNextNode(); + IRModified |= insertAtomicInstrumentationCall( + M, ITT_ANNOTATION_ATOMIC_START, CI, CI); + IRModified |= insertAtomicInstrumentationCall( + M, ITT_ANNOTATION_ATOMIC_FINISH, CI, InstAfterAtomic); + } + } + } + } + } + } + + return IRModified ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} + +} // namespace llvm diff --git a/llvm/tools/sycl-post-link/InstrumentalAnnotations.h b/llvm/tools/sycl-post-link/InstrumentalAnnotations.h new file mode 100644 index 0000000000000..ac0bc96184b85 --- /dev/null +++ b/llvm/tools/sycl-post-link/InstrumentalAnnotations.h @@ -0,0 +1,26 @@ +//===--- InstrumentalAnnotations.h - SYCL Instrumental Annotations Pass ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A transformation pass which adds instrumental calls to annotate SYCL +// synchronization instrucations. This can be used for kernel profiling. +//===----------------------------------------------------------------------===// + +#pragma once + +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class InstrumentalAnnotationsPass : public PassInfoMixin { +public: + InstrumentalAnnotationsPass() = default; + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +}; + +} // namespace llvm diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index ea1ae9ee535b3..6695ddbb43b41 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -13,6 +13,7 @@ // - specialization constant intrinsic transformation //===----------------------------------------------------------------------===// +#include "InstrumentalAnnotations.h" #include "SPIRKernelParamOptInfo.h" #include "SpecConstants.h" @@ -108,6 +109,10 @@ static cl::opt SplitEsimd{"split-esimd", static cl::opt LowerEsimd{ "lower-esimd", cl::desc("Lower ESIMD constructs"), cl::cat(PostLinkCat)}; +static cl::opt AddInstrumentationCalls{ + "add-instrumentation-calls", cl::desc("Add instrumentation calls"), + cl::cat(PostLinkCat)}; + static cl::opt OptLevelO0("O0", cl::desc("Optimization level 0. Similar to clang -O0"), cl::cat(PostLinkCat)); @@ -732,6 +737,18 @@ static TableFiles processOneModule(std::unique_ptr M, bool IsEsimd, if (IsEsimd && LowerEsimd) LowerEsimdConstructs(*M); + bool InstrumentalAnnotationsMet = false; + if (AddInstrumentationCalls) { + ModulePassManager RunInstrumentalAnnotations; + ModuleAnalysisManager MAM; + InstrumentalAnnotationsPass IAP; + // Register required analysis + MAM.registerPass([&] { return PassInstrumentationAnalysis(); }); + RunInstrumentalAnnotations.addPass(IAP); + PreservedAnalyses Res = RunInstrumentalAnnotations.run(*M, MAM); + InstrumentalAnnotationsMet = !Res.areAllPreserved(); + } + std::map> GlobalsSet; bool DoSplit = SplitMode.getNumOccurrences() > 0; @@ -788,7 +805,8 @@ static TableFiles processOneModule(std::unique_ptr M, bool IsEsimd, // no spec constants and no splitting. // We cannot reuse input module for ESIMD code since it was transformed. bool CanReuseInputModule = !SpecConstsMet && (ResultModules.size() == 1) && - !SyclAndEsimdKernels && !IsEsimd; + !SyclAndEsimdKernels && !IsEsimd && + !InstrumentalAnnotationsMet; string_vector Files = CanReuseInputModule ? string_vector{InputFilename} From 573670bb056225163a16d04dfc649558e34db78e Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Wed, 3 Mar 2021 23:30:51 +0300 Subject: [PATCH 02/16] Test fix Signed-off-by: Dmitry Sidorov --- clang/test/Driver/sycl-instrumentation.cpp | 4 +- .../sycl-post-link/itt_annotations_O2.ll | 169 ------------------ 2 files changed, 2 insertions(+), 171 deletions(-) delete mode 100644 llvm/test/tools/sycl-post-link/itt_annotations_O2.ll diff --git a/clang/test/Driver/sycl-instrumentation.cpp b/clang/test/Driver/sycl-instrumentation.cpp index 60cbde196a5f2..b65421ac9385a 100644 --- a/clang/test/Driver/sycl-instrumentation.cpp +++ b/clang/test/Driver/sycl-instrumentation.cpp @@ -1,10 +1,10 @@ /// Check that instrumentation is disabled by default: -// RUN: %clang -### %s 2>&1 \ +// RUN: %clang -fsycl -### %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-DEFAULT %s // CHECK-DEFAULT-NOT: "-add-instrumentation-calls" /// Check "fsycl_device_code_add_instrumentation_calls" is passed to sycl post /// link tool: -// RUN: %clang -### -fsycl -fsycl-device-code-add-instrumentation-calls %s 2>&1 \ +// RUN: %clang -fsycl -### -fsycl-device-code-add-instrumentation-calls %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-ENABLED %s // CHECK-ENABLED: sycl-post-link{{.*}}"-add-instrumentation-calls" diff --git a/llvm/test/tools/sycl-post-link/itt_annotations_O2.ll b/llvm/test/tools/sycl-post-link/itt_annotations_O2.ll deleted file mode 100644 index f7d57a0e771a8..0000000000000 --- a/llvm/test/tools/sycl-post-link/itt_annotations_O2.ll +++ /dev/null @@ -1,169 +0,0 @@ -;; Compiled from https://github.com/intel/llvm-test-suite/blob/intel/SYCL/KernelAndProgram/kernel-and-program.cpp -;; with following commands: -;; clang++ -fsycl -fsycl-device-only kernel-and-program.cpp -o kernel_and_program_optimized.bc -;; llvm-link kernel_and_program_optimized.bc -o kernel_and_program_optimized_link.bc --suppress-warnings - -; RUN: sycl-post-link -add-instrumentation-calls -split=auto --ir-output-only %s -S -o %t.ll -; RUN: FileCheck %s -input-file=%t.ll - -; ModuleID = 'kernel_and_program_optimized.bc' -source_filename = "llvm-link" -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spir64-unknown-unknown-sycldevice" - -%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } -%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] } - -$_ZTSZ4mainE10SingleTask = comdat any - -$_ZTSZ4mainE11ParallelFor = comdat any - -$_ZTSZ4mainE13ParallelForND = comdat any - -@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 -@__spirv_BuiltInGlobalOffset = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 - -; CHECK: @__spirv_BuiltInWorkgroupId = external dso_local addrspace(1) constant <3 x i64>, align 32 -; CHECK: @__spirv_BuiltInGlobalLinearId = external dso_local addrspace(1) constant i32, align 32 -; CHECK: @__spirv_BuiltInWorkgroupSize = external dso_local addrspace(1) constant <3 x i64>, align 32 - -; Function Attrs: norecurse willreturn -define weak_odr dso_local spir_kernel void @_ZTSZ4mainE10SingleTask(i32 addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { -entry: -; CHECK: _ZTSZ4mainE10SingleTask -; CHECK-NEXT: entry: -; CHECK-NEXT: %[[CAST_WG_ID_1:[0-9a-zA-Z._]+]] = addrspacecast <3 x i64> addrspace(1)* @__spirv_BuiltInWorkgroupId to i64 addrspace(4)* -; CHECK-NEXT: %[[CAST_WI_ID_1:[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* @__spirv_BuiltInGlobalLinearId to i64 addrspace(4)* -; CHECK-NEXT: %[[LOAD_WI_ID_1:[0-9a-zA-Z._]+]] = load i64, i64 addrspace(4)* %[[CAST_WI_ID_1]], align 8 -; CHECK-NEXT: %[[CAST_WG_SIZE_1:[0-9a-zA-Z._]+]] = addrspacecast <3 x i64> addrspace(1)* @__spirv_BuiltInWorkgroupSize to <3 x i32> addrspace(4)* -; CHECK-NEXT: %[[LOAD_WG_SIZE_1:[0-9a-zA-Z._]+]] = load <3 x i32>, <3 x i32> addrspace(4)* %[[CAST_WG_SIZE_1]], align 16 -; CHECK-NEXT: %[[EXTRACT_WG_SIZE_1:[0-9a-zA-Z._]+]] = extractelement <3 x i32> %[[LOAD_WG_SIZE_1]], i32 0 -; CHECK-NEXT: call void @__itt_offload_wi_start(i64 addrspace(4)* %[[CAST_WG_ID_1]], i64 %[[LOAD_WI_ID_1]], i32 %[[EXTRACT_WG_SIZE_1]]) - %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_3, i64 0, i32 0, i32 0, i64 0 - %1 = addrspacecast i64* %0 to i64 addrspace(4)* - %2 = load i64, i64 addrspace(4)* %1, align 8 - %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_, i64 %2 - %ptridx.ascast.i9.i = addrspacecast i32 addrspace(1)* %add.ptr.i to i32 addrspace(4)* - %3 = load i32, i32 addrspace(4)* %ptridx.ascast.i9.i, align 4, !tbaa !5 - %add.i = add nsw i32 %3, 1 - store i32 %add.i, i32 addrspace(4)* %ptridx.ascast.i9.i, align 4, !tbaa !5 -; CHECK: call void @__itt_offload_wi_finish(i64 addrspace(4)* %[[CAST_WG_ID_1]], i64 %[[LOAD_WI_ID_1]] -; CHECK-NEXT: ret void - ret void -} - -; Function Attrs: norecurse willreturn -define weak_odr dso_local spir_kernel void @_ZTSZ4mainE11ParallelFor(i32 addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { -entry: -; CHECK: _ZTSZ4mainE11ParallelFor -; CHECK-NEXT: entry: -; CHECK-NEXT: %[[CAST_WG_ID_2:[0-9a-zA-Z._]+]] = addrspacecast <3 x i64> addrspace(1)* @__spirv_BuiltInWorkgroupId to i64 addrspace(4)* -; CHECK-NEXT: %[[CAST_WI_ID_2:[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* @__spirv_BuiltInGlobalLinearId to i64 addrspace(4)* -; CHECK-NEXT: %[[LOAD_WI_ID_2:[0-9a-zA-Z._]+]] = load i64, i64 addrspace(4)* %[[CAST_WI_ID_2]], align 8 -; CHECK-NEXT: %[[CAST_WG_SIZE_2:[0-9a-zA-Z._]+]] = addrspacecast <3 x i64> addrspace(1)* @__spirv_BuiltInWorkgroupSize to <3 x i32> addrspace(4)* -; CHECK-NEXT: %[[LOAD_WG_SIZE_2:[0-9a-zA-Z._]+]] = load <3 x i32>, <3 x i32> addrspace(4)* %[[CAST_WG_SIZE_2]], align 16 -; CHECK-NEXT: %[[EXTRACT_WG_SIZE_2:[0-9a-zA-Z._]+]] = extractelement <3 x i32> %[[LOAD_WG_SIZE_2]], i32 0 -; CHECK-NEXT: call void @__itt_offload_wi_start(i64 addrspace(4)* %[[CAST_WG_ID_2]], i64 %[[LOAD_WI_ID_2]], i32 %[[EXTRACT_WG_SIZE_2]]) - %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_3, i64 0, i32 0, i32 0, i64 0 - %1 = addrspacecast i64* %0 to i64 addrspace(4)* - %2 = load i64, i64 addrspace(4)* %1, align 8 - %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_, i64 %2 - %3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !9 - %4 = extractelement <3 x i64> %3, i64 0 - %ptridx.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %4 - %ptridx.ascast.i.i = addrspacecast i32 addrspace(1)* %ptridx.i.i to i32 addrspace(4)* - %5 = load i32, i32 addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !5 - %add.i = add nsw i32 %5, 1 - store i32 %add.i, i32 addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !5 -; CHECK: call void @__itt_offload_wi_finish(i64 addrspace(4)* %[[CAST_WG_ID_2]], i64 %[[LOAD_WI_ID_2]] -; CHECK-NEXT: ret void - ret void -} - -; Function Attrs: convergent norecurse -define weak_odr dso_local spir_kernel void @_ZTSZ4mainE13ParallelForND(i32 addrspace(3)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3, i32 addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_8) local_unnamed_addr #1 comdat !kernel_arg_buffer_location !16 { -entry: -; CHECK: _ZTSZ4mainE13ParallelForND -; CHECK-NEXT: entry: -; CHECK-NEXT: %[[CAST_WG_ID_3:[0-9a-zA-Z._]+]] = addrspacecast <3 x i64> addrspace(1)* @__spirv_BuiltInWorkgroupId to i64 addrspace(4)* -; CHECK-NEXT: %[[CAST_WI_ID_3:[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* @__spirv_BuiltInGlobalLinearId to i64 addrspace(4)* -; CHECK-NEXT: %[[LOAD_WI_ID_3:[0-9a-zA-Z._]+]] = load i64, i64 addrspace(4)* %[[CAST_WI_ID_3]], align 8 -; CHECK-NEXT: %[[CAST_WG_SIZE_3:[0-9a-zA-Z._]+]] = addrspacecast <3 x i64> addrspace(1)* @__spirv_BuiltInWorkgroupSize to <3 x i32> addrspace(4)* -; CHECK-NEXT: %[[LOAD_WG_SIZE_3:[0-9a-zA-Z._]+]] = load <3 x i32>, <3 x i32> addrspace(4)* %[[CAST_WG_SIZE_3]], align 16 -; CHECK-NEXT: %[[EXTRACT_WG_SIZE_3:[0-9a-zA-Z._]+]] = extractelement <3 x i32> %[[LOAD_WG_SIZE_3]], i32 0 -; CHECK-NEXT: call void @__itt_offload_wi_start(i64 addrspace(4)* %[[CAST_WG_ID_3]], i64 %[[LOAD_WI_ID_3]], i32 %[[EXTRACT_WG_SIZE_3]]) - %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_8, i64 0, i32 0, i32 0, i64 0 - %1 = addrspacecast i64* %0 to i64 addrspace(4)* - %2 = load i64, i64 addrspace(4)* %1, align 8 - %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_4, i64 %2 - %3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !17 - %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalOffset to <3 x i64> addrspace(4)*), align 32, !noalias !24 - %5 = extractelement <3 x i64> %3, i64 0 - %6 = extractelement <3 x i64> %4, i64 0 - %sub.i.i.i.i = sub i64 %5, %6 - %7 = trunc i64 %sub.i.i.i.i to i32 - %conv.i = and i32 %7, 1 - %xor.i = xor i32 %conv.i, 1 - %ptridx.i27.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %sub.i.i.i.i - %ptridx.ascast.i28.i = addrspacecast i32 addrspace(1)* %ptridx.i27.i to i32 addrspace(4)* - %8 = load i32, i32 addrspace(4)* %ptridx.ascast.i28.i, align 4, !tbaa !5 - %9 = zext i32 %conv.i to i64 - %ptridx.i23.i = getelementptr inbounds i32, i32 addrspace(3)* %_arg_, i64 %9 - %ptridx.ascast.i24.i = addrspacecast i32 addrspace(3)* %ptridx.i23.i to i32 addrspace(4)* - store i32 %8, i32 addrspace(4)* %ptridx.ascast.i24.i, align 4, !tbaa !5 -; CHECK: call void @__itt_offload_wg_barrier(i8* null) -; CHECK-NEXT: tail call void @_Z22__spirv_ControlBarrierjjj -; CHECK-NEXT: call void @__itt_offload_wi_resume(i64 addrspace(4)* %[[CAST_WG_ID_3]], i64 %[[LOAD_WI_ID_3]]) - tail call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) #3 - %conv6.i = zext i32 %xor.i to i64 - %ptridx.i17.i = getelementptr inbounds i32, i32 addrspace(3)* %_arg_, i64 %conv6.i - %ptridx.ascast.i18.i = addrspacecast i32 addrspace(3)* %ptridx.i17.i to i32 addrspace(4)* - %10 = load i32, i32 addrspace(4)* %ptridx.ascast.i18.i, align 4, !tbaa !5 - store i32 %10, i32 addrspace(4)* %ptridx.ascast.i28.i, align 4, !tbaa !5 -; CHECK: call void @__itt_offload_wi_finish(i64 addrspace(4)* %[[CAST_WG_ID_3]], i64 %[[LOAD_WI_ID_3]] -; CHECK-NEXT: ret void - ret void -} - -; Function Attrs: convergent -declare dso_local void @_Z22__spirv_ControlBarrierjjj(i32, i32, i32) local_unnamed_addr #2 - -attributes #0 = { norecurse willreturn "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/localdisk2/sidorovd/SYCLTest/llvm-test-suite/SYCL/KernelAndProgram/kernel-and-program.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/localdisk2/sidorovd/SYCLTest/llvm-test-suite/SYCL/KernelAndProgram/kernel-and-program.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { convergent } - -!opencl.spir.version = !{!0} -!spirv.Source = !{!1} -!llvm.ident = !{!2} -!llvm.module.flags = !{!3} - -!0 = !{i32 1, i32 2} -!1 = !{i32 4, i32 100000} -!2 = !{!"clang version 13.0.0 (https://github.com/intel/llvm.git 3d2adc7b3ca269708bcabdc4a40352a5cacb4b9d)"} -!3 = !{i32 1, !"wchar_size", i32 4} -!4 = !{i32 -1, i32 -1, i32 -1, i32 -1} -!5 = !{!6, !6, i64 0} -!6 = !{!"int", !7, i64 0} -!7 = !{!"omnipotent char", !8, i64 0} -!8 = !{!"Simple C++ TBAA"} -!9 = !{!10, !12, !14} -!10 = distinct !{!10, !11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} -!11 = distinct !{!11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} -!12 = distinct !{!12, !13, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} -!13 = distinct !{!13, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} -!14 = distinct !{!14, !15, !"_ZN2cl4sycl6detail7Builder10getElementILi1EEEKNS0_2idIXT_EEEPS5_: %agg.result"} -!15 = distinct !{!15, !"_ZN2cl4sycl6detail7Builder10getElementILi1EEEKNS0_2idIXT_EEEPS5_"} -!16 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} -!17 = !{!18, !20, !22} -!18 = distinct !{!18, !19, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} -!19 = distinct !{!19, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} -!20 = distinct !{!20, !21, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} -!21 = distinct !{!21, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} -!22 = distinct !{!22, !23, !"_ZN2cl4sycl6detail7Builder10getElementILi1EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} -!23 = distinct !{!23, !"_ZN2cl4sycl6detail7Builder10getElementILi1EEEKNS0_7nd_itemIXT_EEEPS5_"} -!24 = !{!25, !27, !22} -!25 = distinct !{!25, !26, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} -!26 = distinct !{!26, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} -!27 = distinct !{!27, !28, !"_ZN7__spirvL16initGlobalOffsetILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} -!28 = distinct !{!28, !"_ZN7__spirvL16initGlobalOffsetILi1EN2cl4sycl2idILi1EEEEET0_v"} From 0f17bac5ace68e863020a27a5b0ca7dd4e5bbbc1 Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Wed, 3 Mar 2021 23:34:45 +0300 Subject: [PATCH 03/16] Comments fixes Signed-off-by: Dmitry Sidorov --- llvm/tools/sycl-post-link/InstrumentalAnnotations.cpp | 2 +- llvm/tools/sycl-post-link/sycl-post-link.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/tools/sycl-post-link/InstrumentalAnnotations.cpp b/llvm/tools/sycl-post-link/InstrumentalAnnotations.cpp index 4af136b02a396..8adda13450b78 100644 --- a/llvm/tools/sycl-post-link/InstrumentalAnnotations.cpp +++ b/llvm/tools/sycl-post-link/InstrumentalAnnotations.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // A transformation pass which adds instrumental calls to annotate SYCL -// synchronization instrucations. This can be used for kernel profiling. +// synchronization instructions. This can be used for kernel profiling. //===----------------------------------------------------------------------===// #include "InstrumentalAnnotations.h" diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 6695ddbb43b41..869726155d81f 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -802,7 +802,7 @@ static TableFiles processOneModule(std::unique_ptr M, bool IsEsimd, { // Reuse input module with only regular SYCL kernels if there were - // no spec constants and no splitting. + // no spec constants, no splitting and no instrumentation calls. // We cannot reuse input module for ESIMD code since it was transformed. bool CanReuseInputModule = !SpecConstsMet && (ResultModules.size() == 1) && !SyclAndEsimdKernels && !IsEsimd && From 4d01c8b4dbb3c84f23e0f68092e59e67254543df Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Fri, 5 Mar 2021 13:00:56 +0300 Subject: [PATCH 04/16] Apply several comments Signed-off-by: Dmitry Sidorov --- clang/include/clang/Driver/Options.td | 2 +- clang/test/Driver/sycl-instrumentation.cpp | 2 +- .../tools/sycl-post-link/itt_atomic_load.ll | 11 ++-- .../tools/sycl-post-link/itt_atomic_store.ll | 11 ++-- llvm/test/tools/sycl-post-link/itt_barrier.ll | 3 + .../InstrumentalAnnotations.cpp | 61 ++++++++++--------- 6 files changed, 50 insertions(+), 40 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 203813fb3096b..7816d089e78ec 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2402,7 +2402,7 @@ def fsycl_device_code_lower_esimd : Flag<["-"], "fsycl-device-code-lower-esimd"> Flags<[CC1Option, CoreOption]>, HelpText<"Lower ESIMD-specific constructs">; def fno_sycl_device_code_lower_esimd : Flag<["-"], "fno-sycl-device-code-lower-esimd">, Flags<[CC1Option, CoreOption]>, HelpText<"Do not lower ESIMD-specific constructs">; -def fsycl_device_code_add_instrumentation_calls : Flag<["-"], "fsycl-device-code-add-instrumentation-calls">, +def fsycl_instrument_device_code : Flag<["-"], "fsycl-instrument-device-code">, Flags<[CC1Option, CoreOption]>, HelpText<"Add instrumentation intrinsics calls">; defm sycl_id_queries_fit_in_int: OptInFFlag<"sycl-id-queries-fit-in-int", "Assume", "Do not assume", " that SYCL ID queries fit within MAX_INT.", [CC1Option,CoreOption], LangOpts<"SYCLValueFitInMaxInt">>; def fsycl_use_bitcode : Flag<["-"], "fsycl-use-bitcode">, diff --git a/clang/test/Driver/sycl-instrumentation.cpp b/clang/test/Driver/sycl-instrumentation.cpp index b65421ac9385a..ee40b21ee59c8 100644 --- a/clang/test/Driver/sycl-instrumentation.cpp +++ b/clang/test/Driver/sycl-instrumentation.cpp @@ -5,6 +5,6 @@ /// Check "fsycl_device_code_add_instrumentation_calls" is passed to sycl post /// link tool: -// RUN: %clang -fsycl -### -fsycl-device-code-add-instrumentation-calls %s 2>&1 \ +// RUN: %clang -fsycl -### -fsycl-instrument-device-code %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-ENABLED %s // CHECK-ENABLED: sycl-post-link{{.*}}"-add-instrumentation-calls" diff --git a/llvm/test/tools/sycl-post-link/itt_atomic_load.ll b/llvm/test/tools/sycl-post-link/itt_atomic_load.ll index e858130061de2..6b359dc634c4c 100644 --- a/llvm/test/tools/sycl-post-link/itt_atomic_load.ll +++ b/llvm/test/tools/sycl-post-link/itt_atomic_load.ll @@ -1,3 +1,6 @@ +;; The test serves a purpose to check if Atomic load instruction is being +;; annotated during sycl-post-link +;; ;; Compiled from https://github.com/intel/llvm-test-suite/blob/intel/SYCL/AtomicRef/load.cpp ;; with following commands: ;; clang++ -fsycl -fsycl-device-only load.cpp -o load.bc @@ -44,9 +47,9 @@ if.end.i: ; preds = %entry %9 = addrspacecast i64* %8 to i64 addrspace(4)* %10 = load i64, i64 addrspace(4)* %9, align 8 %add.ptr.i34 = getelementptr inbounds i32, i32 addrspace(1)* %_arg_1, i64 %10 -; CHECK: call void @__itt_sync_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]], i32 [[ATOMIC_INST_1:[0-9]+]], i32 [[MEM_ORDER_1:[0-9]+]]) +; CHECK: call void @__itt_sync_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]], i32 0, i32 [[MEM_ORDER_1:[0-9]+]]) ; CHECK-NEXT: {{.*}}__spirv_AtomicLoad{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_1]],{{.*}}, i32 [[MEM_ORDER_1]]) -; CHECK-NEXT: call void @__itt_sync_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_1]], i32 [[ATOMIC_INST_1]], i32 [[MEM_ORDER_1]]) +; CHECK-NEXT: call void @__itt_sync_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_1]], i32 0, i32 [[MEM_ORDER_1]]) %call3.i.i.i.i = tail call spir_func i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(i32 addrspace(1)* %add.ptr.i34, i32 1, i32 896) #2 %ptridx.i.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %4 %ptridx.ascast.i.i.i = addrspacecast i32 addrspace(1)* %ptridx.i.i.i to i32 addrspace(4)* @@ -78,9 +81,9 @@ entry: %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_4, i64 %5 %6 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !19 %7 = extractelement <3 x i64> %6, i64 0 -; CHECK: call void @__itt_sync_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]], i32 [[ATOMIC_INST_2:[0-9]+]], i32 [[MEM_ORDER_2:[0-9]+]]) +; CHECK: call void @__itt_sync_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]], i32 0, i32 [[MEM_ORDER_2:[0-9]+]]) ; CHECK-NEXT: {{.*}}__spirv_AtomicLoad{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_2]],{{.*}}, i32 [[MEM_ORDER_2]]) -; CHECK-NEXT: call void @__itt_sync_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_2]], i32 [[ATOMIC_INST_2]], i32 [[MEM_ORDER_2]]) +; CHECK-NEXT: call void @__itt_sync_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_2]], i32 0, i32 [[MEM_ORDER_2]]) %call3.i.i.i = tail call spir_func i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(i32 addrspace(1)* %add.ptr.i32, i32 1, i32 896) #2 %ptridx.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %7 %ptridx.ascast.i.i = addrspacecast i32 addrspace(1)* %ptridx.i.i to i32 addrspace(4)* diff --git a/llvm/test/tools/sycl-post-link/itt_atomic_store.ll b/llvm/test/tools/sycl-post-link/itt_atomic_store.ll index f8eb427a483fc..7a066a2837561 100644 --- a/llvm/test/tools/sycl-post-link/itt_atomic_store.ll +++ b/llvm/test/tools/sycl-post-link/itt_atomic_store.ll @@ -1,3 +1,6 @@ +;; The test serves a purpose to check if Atomic store instruction is being +;; annotated during sycl-post-link +;; ;; Compiled from https://github.com/intel/llvm-test-suite/blob/intel/SYCL/AtomicRef/load.cpp ;; with following commands: ;; clang++ -fsycl -fsycl-device-only load.cpp -o load.bc @@ -41,9 +44,9 @@ if.end.i: ; preds = %entry %7 = load i64, i64 addrspace(4)* %6, align 8 %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_1, i64 %7 %conv.i.i = trunc i64 %4 to i32 -; CHECK: call void @__itt_sync_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]], i32 [[ATOMIC_INST_1:[0-9]+]], i32 [[MEM_ORDER_1:[0-9]+]]) +; CHECK: call void @__itt_sync_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]], i32 1, i32 [[MEM_ORDER_1:[0-9]+]]) ; CHECK-NEXT: {{.*}}__spirv_AtomicStore{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_1]],{{.*}}, i32 [[MEM_ORDER_1]] -; CHECK-NEXT: call void @__itt_sync_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_1]], i32 [[ATOMIC_INST_1]], i32 [[MEM_ORDER_1]]) +; CHECK-NEXT: call void @__itt_sync_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_1]], i32 1, i32 [[MEM_ORDER_1]]) tail call spir_func void @_Z19__spirv_AtomicStorePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEi(i32 addrspace(1)* %add.ptr.i, i32 1, i32 896, i32 %conv.i.i) #2 br label %_ZZN2cl4sycl7handler24parallel_for_lambda_implI12store_kernelIiEZZ10store_testIiEvNS0_5queueEmENKUlRS1_E_clES7_EUlNS0_4itemILi1ELb1EEEE_Li1EEEvNS0_5rangeIXT1_EEET0_ENKUlSA_E_clESA_.exit @@ -69,9 +72,9 @@ entry: %3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !15 %4 = extractelement <3 x i64> %3, i64 0 %conv.i = trunc i64 %4 to i32 -; CHECK: call void @__itt_sync_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]], i32 [[ATOMIC_INST_2:[0-9]+]], i32 [[MEM_ORDER_2:[0-9]+]]) +; CHECK: call void @__itt_sync_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]], i32 1, i32 [[MEM_ORDER_2:[0-9]+]]) ; CHECK-NEXT: {{.*}}__spirv_AtomicStore{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_2]],{{.*}}, i32 [[MEM_ORDER_2]] -; CHECK-NEXT: call void @__itt_sync_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_2]], i32 [[ATOMIC_INST_2]], i32 [[MEM_ORDER_2]]) +; CHECK-NEXT: call void @__itt_sync_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_2]], i32 1, i32 [[MEM_ORDER_2]]) tail call spir_func void @_Z19__spirv_AtomicStorePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEi(i32 addrspace(1)* %add.ptr.i, i32 1, i32 896, i32 %conv.i) #2 ; CHECK: call void @__itt_spirv_wi_finish_wrapper() ; CHECK-NEXT: ret void diff --git a/llvm/test/tools/sycl-post-link/itt_barrier.ll b/llvm/test/tools/sycl-post-link/itt_barrier.ll index 475dd2bf977a4..41866dab91083 100644 --- a/llvm/test/tools/sycl-post-link/itt_barrier.ll +++ b/llvm/test/tools/sycl-post-link/itt_barrier.ll @@ -1,3 +1,6 @@ +;; The test serves a purpose to check if barrier instruction is being annotated +;; during sycl-post-link +;; ;; Compiled from https://github.com/intel/llvm-test-suite/blob/intel/SYCL/KernelAndProgram/kernel-and-program.cpp ;; with following commands: ;; clang++ -fsycl -fsycl-device-only kernel-and-program.cpp -o kernel_and_program_optimized.bc diff --git a/llvm/tools/sycl-post-link/InstrumentalAnnotations.cpp b/llvm/tools/sycl-post-link/InstrumentalAnnotations.cpp index 8adda13450b78..36ed2f939881b 100644 --- a/llvm/tools/sycl-post-link/InstrumentalAnnotations.cpp +++ b/llvm/tools/sycl-post-link/InstrumentalAnnotations.cpp @@ -56,6 +56,8 @@ * Purpose of this pass is to add wrapper calls to these instructions. */ +using namespace llvm; + namespace { constexpr char SPIRV_CONTROL_BARRIER[] = "__spirv_ControlBarrier"; constexpr char SPIRV_GROUP_ALL[] = "__spirv_GroupAll"; @@ -78,9 +80,6 @@ constexpr char ITT_ANNOTATION_WI_FINISH[] = "__itt_spirv_wi_finish_wrapper"; constexpr char ITT_ANNOTATION_WG_BARRIER[] = "__itt_spirv_wg_barrier_wrapper"; constexpr char ITT_ANNOTATION_ATOMIC_START[] = "__itt_sync_atomic_op_start"; constexpr char ITT_ANNOTATION_ATOMIC_FINISH[] = "__itt_sync_atomic_op_finish"; -} // namespace - -namespace llvm { // TODO: move to a separate header // Check for calling convention of a function. If it's spir_kernel - consider @@ -144,8 +143,8 @@ bool insertAtomicInstrumentationCall(Module &M, StringRef Name, else AtomicOp = ConstantInt::get(Int32Ty, 2); // TODO: Third parameter of Atomic Start/Finish annotation is an ordering - // semanticof the instruction, encoded into a value of enum, defined like this - // on user's/profiler's side: + // semantic of the instruction, encoded into a value of enum, defined like + // this on user's/profiler's side: // enum __itt_atomic_mem_order_t // { // __itt_mem_order_relaxed = 0, @@ -154,13 +153,15 @@ bool insertAtomicInstrumentationCall(Module &M, StringRef Name, // } // which isn't 1:1 mapped on SPIR-V memory ordering mask, need to align it. ConstantInt *MemSemantic = dyn_cast(AtomicFun->getArgOperand(2)); - ArrayRef Args = {Ptr, AtomicOp, MemSemantic}; + Value *Args[] = {Ptr, AtomicOp, MemSemantic}; Instruction *InstrumentationCall = emitCall(M, VoidTy, Name, Args, Position); assert(InstrumentationCall && "Instrumentation call creation failed"); return true; } +} // namespace + PreservedAnalyses InstrumentalAnnotationsPass::run(Module &M, ModuleAnalysisManager &MAM) { bool IRModified = false; @@ -186,28 +187,30 @@ PreservedAnalyses InstrumentalAnnotationsPass::run(Module &M, IRModified |= insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WI_FINISH, RI); for (Instruction &I : BB) { - if (CallInst *CI = dyn_cast(&I)) { - if (Function *Callee = CI->getCalledFunction()) { - StringRef CalleeName = Callee->getName(); - // Annotate barrier and other cross WG calls - if (std::any_of(SPIRVCrossWGInstuctions.begin(), - SPIRVCrossWGInstuctions.end(), - [&CalleeName](StringRef Name) { - return CalleeName.contains(Name); - })) { - Instruction *InstAfterBarrier = CI->getNextNode(); - IRModified |= insertSimpleInstrumentationCall( - M, ITT_ANNOTATION_WG_BARRIER, CI); - IRModified |= insertSimpleInstrumentationCall( - M, ITT_ANNOTATION_WI_RESUME, InstAfterBarrier); - } else if (CalleeName.contains(SPIRV_ATOMIC_INST)) { - Instruction *InstAfterAtomic = CI->getNextNode(); - IRModified |= insertAtomicInstrumentationCall( - M, ITT_ANNOTATION_ATOMIC_START, CI, CI); - IRModified |= insertAtomicInstrumentationCall( - M, ITT_ANNOTATION_ATOMIC_FINISH, CI, InstAfterAtomic); - } - } + CallInst *CI = dyn_cast(&I); + if (!CI) + continue; + Function *Callee = CI->getCalledFunction(); + if (!Callee) + continue; + StringRef CalleeName = Callee->getName(); + // Annotate barrier and other cross WG calls + if (std::any_of(SPIRVCrossWGInstuctions.begin(), + SPIRVCrossWGInstuctions.end(), + [&CalleeName](StringRef Name) { + return CalleeName.contains(Name); + })) { + Instruction *InstAfterBarrier = CI->getNextNode(); + IRModified |= insertSimpleInstrumentationCall( + M, ITT_ANNOTATION_WG_BARRIER, CI); + IRModified |= insertSimpleInstrumentationCall( + M, ITT_ANNOTATION_WI_RESUME, InstAfterBarrier); + } else if (CalleeName.contains(SPIRV_ATOMIC_INST)) { + Instruction *InstAfterAtomic = CI->getNextNode(); + IRModified |= insertAtomicInstrumentationCall( + M, ITT_ANNOTATION_ATOMIC_START, CI, CI); + IRModified |= insertAtomicInstrumentationCall( + M, ITT_ANNOTATION_ATOMIC_FINISH, CI, InstAfterAtomic); } } } @@ -215,5 +218,3 @@ PreservedAnalyses InstrumentalAnnotationsPass::run(Module &M, return IRModified ? PreservedAnalyses::none() : PreservedAnalyses::all(); } - -} // namespace llvm From d5839841a0de0ba4006ea3183df0adb2b0112ea3 Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Wed, 10 Mar 2021 15:30:44 +0300 Subject: [PATCH 05/16] Move the pass from sycl-post-link to FE Signed-off-by: Dmitry Sidorov --- clang/lib/CodeGen/BackendUtil.cpp | 6 ++- clang/lib/Driver/ToolChains/Clang.cpp | 2 +- llvm/include/llvm/InitializePasses.h | 1 + llvm/include/llvm/LinkAllPasses.h | 10 +++-- .../Instrumentation/SYCLITTAnnotations.h} | 7 +-- .../Transforms/Instrumentation/CMakeLists.txt | 1 + .../Instrumentation/SYCLITTAnnotations.cpp} | 44 ++++++++++++++++--- .../SYCLITTAnnotations}/itt_atomic_load.ll | 0 .../SYCLITTAnnotations}/itt_atomic_store.ll | 0 .../SYCLITTAnnotations}/itt_barrier.ll | 0 llvm/tools/opt/opt.cpp | 1 + llvm/tools/sycl-post-link/CMakeLists.txt | 1 - llvm/tools/sycl-post-link/sycl-post-link.cpp | 16 +------ 13 files changed, 57 insertions(+), 32 deletions(-) rename llvm/{tools/sycl-post-link/InstrumentalAnnotations.h => include/llvm/Transforms/Instrumentation/SYCLITTAnnotations.h} (76%) rename llvm/{tools/sycl-post-link/InstrumentalAnnotations.cpp => lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp} (88%) rename llvm/test/{tools/sycl-post-link => Transforms/SYCLITTAnnotations}/itt_atomic_load.ll (100%) rename llvm/test/{tools/sycl-post-link => Transforms/SYCLITTAnnotations}/itt_atomic_store.ll (100%) rename llvm/test/{tools/sycl-post-link => Transforms/SYCLITTAnnotations}/itt_barrier.ll (100%) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index db1a71462df88..140136f660634 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -75,6 +75,7 @@ #include "llvm/Transforms/Instrumentation/MemProfiler.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" #include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" +#include "llvm/Transforms/Instrumentation/SYCLITTAnnotations.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar.h" @@ -839,7 +840,6 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, PMBuilder.populateFunctionPassManager(FPM); PMBuilder.populateModulePassManager(MPM); - // Customize the tail of the module passes list for the ESIMD extension. if (LangOpts.SYCLIsDevice && LangOpts.SYCLExplicitSIMD && CodeGenOpts.OptimizationLevel != 0) { @@ -953,7 +953,6 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, legacy::FunctionPassManager PerFunctionPasses(TheModule); PerFunctionPasses.add( createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); - // ESIMD extension always requires lowering of certain IR constructs, such as // ESIMD C++ intrinsics, as the last FE step. if (LangOpts.SYCLIsDevice && LangOpts.SYCLExplicitSIMD) @@ -979,6 +978,9 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, if (LangOpts.SYCLIsDevice && LangOpts.SYCLExplicitSIMD) PerModulePasses.add(createGenXSPIRVWriterAdaptorPass()); + if (llvm::Triple(TheModule->getTargetTriple()).isSPIR()) + PerModulePasses.add(createSYCLITTAnnotationsPass()); + switch (Action) { case Backend_EmitNothing: break; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 3f5b7962774c4..00c1775622563 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8285,7 +8285,7 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA, ArgStringList CmdArgs; // See if device code instrumentation is requested - if (TCArgs.hasArg(options::OPT_fsycl_device_code_add_instrumentation_calls)) + if (TCArgs.hasArg(options::OPT_fsycl_instrument_device_code)) addArgs(CmdArgs, TCArgs, {"-add-instrumentation-calls"}); // See if device code splitting is requested diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index acc7273dfdf61..8f8f99824680e 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -427,6 +427,7 @@ void initializeStripSymbolsPass(PassRegistry&); void initializeStructurizeCFGLegacyPassPass(PassRegistry &); void initializeSYCLLowerWGScopeLegacyPassPass(PassRegistry &); void initializeSYCLLowerESIMDLegacyPassPass(PassRegistry &); +void initializeSYCLITTAnnotationsLegacyPassPass(PassRegistry &); void initializeESIMDLowerLoadStorePass(PassRegistry &); void initializeESIMDLowerVecArgLegacyPassPass(PassRegistry &); void initializeTailCallElimPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index f25604383627e..46f8e6ce729b3 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -48,6 +48,7 @@ #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Instrumentation/BoundsChecking.h" +#include "llvm/Transforms/Instrumentation/SYCLITTAnnotations.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" @@ -200,10 +201,11 @@ namespace { (void) llvm::createMergeFunctionsPass(); (void) llvm::createMergeICmpsLegacyPass(); (void) llvm::createExpandMemCmpPass(); - (void)llvm::createSYCLLowerWGScopePass(); - (void)llvm::createSYCLLowerESIMDPass(); - (void)llvm::createESIMDLowerLoadStorePass(); - (void)llvm::createESIMDLowerVecArgPass(); + (void) llvm::createSYCLLowerWGScopePass(); + (void) llvm::createSYCLLowerESIMDPass(); + (void) llvm::createESIMDLowerLoadStorePass(); + (void) llvm::createESIMDLowerVecArgPass(); + (void) llvm::createSYCLITTAnnotationsPass(); std::string buf; llvm::raw_string_ostream os(buf); (void) llvm::createPrintModulePass(os); diff --git a/llvm/tools/sycl-post-link/InstrumentalAnnotations.h b/llvm/include/llvm/Transforms/Instrumentation/SYCLITTAnnotations.h similarity index 76% rename from llvm/tools/sycl-post-link/InstrumentalAnnotations.h rename to llvm/include/llvm/Transforms/Instrumentation/SYCLITTAnnotations.h index ac0bc96184b85..99d778981c80b 100644 --- a/llvm/tools/sycl-post-link/InstrumentalAnnotations.h +++ b/llvm/include/llvm/Transforms/Instrumentation/SYCLITTAnnotations.h @@ -1,4 +1,4 @@ -//===--- InstrumentalAnnotations.h - SYCL Instrumental Annotations Pass ---===// +//===----- SYCLITTAnnotations.h - SYCL Instrumental Annotations Pass ------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -17,10 +17,11 @@ namespace llvm { -class InstrumentalAnnotationsPass : public PassInfoMixin { +class SYCLITTAnnotationsPass : public PassInfoMixin { public: - InstrumentalAnnotationsPass() = default; PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); }; +ModulePass *createSYCLITTAnnotationsPass(); + } // namespace llvm diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt index 3b29c3df64296..28abc7523210b 100644 --- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -15,6 +15,7 @@ add_llvm_component_library(LLVMInstrumentation PGOMemOPSizeOpt.cpp PoisonChecking.cpp SanitizerCoverage.cpp + SYCLITTAnnotations.cpp ValueProfileCollector.cpp ThreadSanitizer.cpp HWAddressSanitizer.cpp diff --git a/llvm/tools/sycl-post-link/InstrumentalAnnotations.cpp b/llvm/lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp similarity index 88% rename from llvm/tools/sycl-post-link/InstrumentalAnnotations.cpp rename to llvm/lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp index 36ed2f939881b..03e5e866062ed 100644 --- a/llvm/tools/sycl-post-link/InstrumentalAnnotations.cpp +++ b/llvm/lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp @@ -1,4 +1,4 @@ -//===-- InstrumentalAnnotations.cpp - SYCL Instrumental Annotations Pass --===// +//===---- SYCLITTAnnotations.cpp - SYCL Instrumental Annotations Pass -----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -10,8 +10,9 @@ // synchronization instructions. This can be used for kernel profiling. //===----------------------------------------------------------------------===// -#include "InstrumentalAnnotations.h" +#include "llvm/Transforms/Instrumentation/SYCLITTAnnotations.h" +#include "llvm/InitializePasses.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -81,14 +82,45 @@ constexpr char ITT_ANNOTATION_WG_BARRIER[] = "__itt_spirv_wg_barrier_wrapper"; constexpr char ITT_ANNOTATION_ATOMIC_START[] = "__itt_sync_atomic_op_start"; constexpr char ITT_ANNOTATION_ATOMIC_FINISH[] = "__itt_sync_atomic_op_finish"; -// TODO: move to a separate header +// Wrapper for the pass to make it working with the old pass manager +class SYCLITTAnnotationsLegacyPass : public ModulePass { +public: + static char ID; + SYCLITTAnnotationsLegacyPass() : ModulePass(ID) { + initializeSYCLITTAnnotationsLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + // run the SYCLITTAnnotations pass on the specified module + bool runOnModule(Module &M) override { + ModuleAnalysisManager MAM; + auto PA = Impl.run(M, MAM); + return !PA.areAllPreserved(); + } + +private: + SYCLITTAnnotationsPass Impl; +}; + +} // namespace + +char SYCLITTAnnotationsLegacyPass::ID = 0; +INITIALIZE_PASS(SYCLITTAnnotationsLegacyPass, "SYCLITTAnnotations", + "Insert ITT annotations in SYCL code", false, false) + +// Public interface to the SYCLITTAnnotationsPass. +ModulePass *llvm::createSYCLITTAnnotationsPass() { + return new SYCLITTAnnotationsLegacyPass(); +} + +namespace { + // Check for calling convention of a function. If it's spir_kernel - consider // the function to be a SYCL kernel. bool isSyclKernel(Function &F) { return F.getCallingConv() == CallingConv::SPIR_KERNEL; } -// TODO: move to a separate header Instruction *emitCall(Module &M, Type *RetTy, StringRef FunctionName, ArrayRef Args, Instruction *InsertBefore) { SmallVector ArgTys(Args.size()); @@ -162,8 +194,8 @@ bool insertAtomicInstrumentationCall(Module &M, StringRef Name, } // namespace -PreservedAnalyses InstrumentalAnnotationsPass::run(Module &M, - ModuleAnalysisManager &MAM) { +PreservedAnalyses SYCLITTAnnotationsPass::run(Module &M, + ModuleAnalysisManager &MAM) { bool IRModified = false; std::vector SPIRVCrossWGInstuctions = { SPIRV_CONTROL_BARRIER, SPIRV_GROUP_ALL, SPIRV_GROUP_ANY, diff --git a/llvm/test/tools/sycl-post-link/itt_atomic_load.ll b/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_load.ll similarity index 100% rename from llvm/test/tools/sycl-post-link/itt_atomic_load.ll rename to llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_load.ll diff --git a/llvm/test/tools/sycl-post-link/itt_atomic_store.ll b/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_store.ll similarity index 100% rename from llvm/test/tools/sycl-post-link/itt_atomic_store.ll rename to llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_store.ll diff --git a/llvm/test/tools/sycl-post-link/itt_barrier.ll b/llvm/test/Transforms/SYCLITTAnnotations/itt_barrier.ll similarity index 100% rename from llvm/test/tools/sycl-post-link/itt_barrier.ll rename to llvm/test/Transforms/SYCLITTAnnotations/itt_barrier.ll diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index ab14829b7e03b..f14b766433669 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -576,6 +576,7 @@ int main(int argc, char **argv) { initializeTypePromotionPass(Registry); initializeSYCLLowerWGScopeLegacyPassPass(Registry); initializeSYCLLowerESIMDLegacyPassPass(Registry); + initializeSYCLITTAnnotationsLegacyPassPass(Registry); initializeESIMDLowerLoadStorePass(Registry); initializeESIMDLowerVecArgLegacyPassPass(Registry); diff --git a/llvm/tools/sycl-post-link/CMakeLists.txt b/llvm/tools/sycl-post-link/CMakeLists.txt index 31a868847c534..98ef96a6b5aff 100644 --- a/llvm/tools/sycl-post-link/CMakeLists.txt +++ b/llvm/tools/sycl-post-link/CMakeLists.txt @@ -19,7 +19,6 @@ add_llvm_tool(sycl-post-link sycl-post-link.cpp SPIRKernelParamOptInfo.cpp SpecConstants.cpp - InstrumentalAnnotations.cpp ADDITIONAL_HEADER_DIRS ${LLVMGenXIntrinsics_SOURCE_DIR}/GenXIntrinsics/include diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 869726155d81f..c54de1ba8e9b7 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -13,7 +13,6 @@ // - specialization constant intrinsic transformation //===----------------------------------------------------------------------===// -#include "InstrumentalAnnotations.h" #include "SPIRKernelParamOptInfo.h" #include "SpecConstants.h" @@ -737,18 +736,6 @@ static TableFiles processOneModule(std::unique_ptr M, bool IsEsimd, if (IsEsimd && LowerEsimd) LowerEsimdConstructs(*M); - bool InstrumentalAnnotationsMet = false; - if (AddInstrumentationCalls) { - ModulePassManager RunInstrumentalAnnotations; - ModuleAnalysisManager MAM; - InstrumentalAnnotationsPass IAP; - // Register required analysis - MAM.registerPass([&] { return PassInstrumentationAnalysis(); }); - RunInstrumentalAnnotations.addPass(IAP); - PreservedAnalyses Res = RunInstrumentalAnnotations.run(*M, MAM); - InstrumentalAnnotationsMet = !Res.areAllPreserved(); - } - std::map> GlobalsSet; bool DoSplit = SplitMode.getNumOccurrences() > 0; @@ -805,8 +792,7 @@ static TableFiles processOneModule(std::unique_ptr M, bool IsEsimd, // no spec constants, no splitting and no instrumentation calls. // We cannot reuse input module for ESIMD code since it was transformed. bool CanReuseInputModule = !SpecConstsMet && (ResultModules.size() == 1) && - !SyclAndEsimdKernels && !IsEsimd && - !InstrumentalAnnotationsMet; + !SyclAndEsimdKernels && !IsEsimd; string_vector Files = CanReuseInputModule ? string_vector{InputFilename} From 1014db00a4ab982cd29b47c1719c08528e8782ff Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Wed, 10 Mar 2021 17:19:53 +0300 Subject: [PATCH 06/16] Rename Signed-off-by: Dmitry Sidorov --- .../Instrumentation/SYCLITTAnnotations.cpp | 13 +++++---- .../SYCLITTAnnotations/itt_atomic_load.ll | 28 +++++++++---------- .../SYCLITTAnnotations/itt_atomic_store.ll | 28 +++++++++---------- .../SYCLITTAnnotations/itt_barrier.ll | 28 +++++++++---------- llvm/tools/sycl-post-link/sycl-post-link.cpp | 6 +--- 5 files changed, 47 insertions(+), 56 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp b/llvm/lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp index 03e5e866062ed..645ccfe718663 100644 --- a/llvm/lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp +++ b/llvm/lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp @@ -75,12 +75,13 @@ constexpr char SPIRV_GROUP_SMAX[] = "__spirv_GroupSMax"; constexpr char SPIRV_ATOMIC_INST[] = "__spirv_Atomic"; constexpr char SPIRV_ATOMIC_LOAD[] = "__spirv_AtomicLoad"; constexpr char SPIRV_ATOMIC_STORE[] = "__spirv_AtomicSTORE"; -constexpr char ITT_ANNOTATION_WI_START[] = "__itt_spirv_wi_start_wrapper"; -constexpr char ITT_ANNOTATION_WI_RESUME[] = "__itt_spirv_wi_resume_wrapper"; -constexpr char ITT_ANNOTATION_WI_FINISH[] = "__itt_spirv_wi_finish_wrapper"; -constexpr char ITT_ANNOTATION_WG_BARRIER[] = "__itt_spirv_wg_barrier_wrapper"; -constexpr char ITT_ANNOTATION_ATOMIC_START[] = "__itt_sync_atomic_op_start"; -constexpr char ITT_ANNOTATION_ATOMIC_FINISH[] = "__itt_sync_atomic_op_finish"; +constexpr char ITT_ANNOTATION_WI_START[] = "__itt_offload_wi_start_wrapper"; +constexpr char ITT_ANNOTATION_WI_RESUME[] = "__itt_offload_wi_resume_wrapper"; +constexpr char ITT_ANNOTATION_WI_FINISH[] = "__itt_offload_wi_finish_wrapper"; +constexpr char ITT_ANNOTATION_WG_BARRIER[] = "__itt_offload_wg_barrier_wrapper"; +constexpr char ITT_ANNOTATION_ATOMIC_START[] = "__itt_offload_atomic_op_start"; +constexpr char ITT_ANNOTATION_ATOMIC_FINISH[] = + "__itt_offload_atomic_op_finish"; // Wrapper for the pass to make it working with the old pass manager class SYCLITTAnnotationsLegacyPass : public ModulePass { diff --git a/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_load.ll b/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_load.ll index 6b359dc634c4c..902417a7459a5 100644 --- a/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_load.ll +++ b/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_load.ll @@ -4,10 +4,8 @@ ;; Compiled from https://github.com/intel/llvm-test-suite/blob/intel/SYCL/AtomicRef/load.cpp ;; with following commands: ;; clang++ -fsycl -fsycl-device-only load.cpp -o load.bc -;; llvm-link load.bc -o load_link.bc --suppress-warnings -; RUN: sycl-post-link -add-instrumentation-calls -split=auto --ir-output-only %s -S -o %t.ll -; RUN: FileCheck %s -input-file=%t.ll +; RUN: opt < %s --SYCLITTAnnotations -S | FileCheck %s ; ModuleID = 'load.bc' source_filename = "/localdisk2/sidorovd/SYCLTest/llvm-test-suite/SYCL/AtomicRef/load.cpp" @@ -29,7 +27,7 @@ define weak_odr dso_local spir_kernel void @_ZTSN2cl4sycl6detail19__pf_kernel_wr entry: ; CHECK-LABEL: _ZTSN2cl4sycl6detail19__pf_kernel_wrapperI11load_kernelIiEEE( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @__itt_spirv_wi_start_wrapper() +; CHECK-NEXT: call void @__itt_offload_wi_start_wrapper() %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_, i64 0, i32 0, i32 0, i64 0 %1 = addrspacecast i64* %0 to i64 addrspace(4)* %2 = load i64, i64 addrspace(4)* %1, align 8 @@ -47,9 +45,9 @@ if.end.i: ; preds = %entry %9 = addrspacecast i64* %8 to i64 addrspace(4)* %10 = load i64, i64 addrspace(4)* %9, align 8 %add.ptr.i34 = getelementptr inbounds i32, i32 addrspace(1)* %_arg_1, i64 %10 -; CHECK: call void @__itt_sync_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]], i32 0, i32 [[MEM_ORDER_1:[0-9]+]]) +; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]], i32 0, i32 [[MEM_ORDER_1:[0-9]+]]) ; CHECK-NEXT: {{.*}}__spirv_AtomicLoad{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_1]],{{.*}}, i32 [[MEM_ORDER_1]]) -; CHECK-NEXT: call void @__itt_sync_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_1]], i32 0, i32 [[MEM_ORDER_1]]) +; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_1]], i32 0, i32 [[MEM_ORDER_1]]) %call3.i.i.i.i = tail call spir_func i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(i32 addrspace(1)* %add.ptr.i34, i32 1, i32 896) #2 %ptridx.i.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %4 %ptridx.ascast.i.i.i = addrspacecast i32 addrspace(1)* %ptridx.i.i.i to i32 addrspace(4)* @@ -57,7 +55,7 @@ if.end.i: ; preds = %entry br label %_ZZN2cl4sycl7handler24parallel_for_lambda_implI11load_kernelIiEZZ9load_testIiEvNS0_5queueEmENKUlRS1_E_clES7_EUlNS0_4itemILi1ELb1EEEE_Li1EEEvNS0_5rangeIXT1_EEET0_ENKUlSA_E_clESA_.exit _ZZN2cl4sycl7handler24parallel_for_lambda_implI11load_kernelIiEZZ9load_testIiEvNS0_5queueEmENKUlRS1_E_clES7_EUlNS0_4itemILi1ELb1EEEE_Li1EEEvNS0_5rangeIXT1_EEET0_ENKUlSA_E_clESA_.exit: ; preds = %entry, %if.end.i -; CHECK: call void @__itt_spirv_wi_finish_wrapper() +; CHECK: call void @__itt_offload_wi_finish_wrapper() ; CHECK-NEXT: ret void ret void } @@ -70,7 +68,7 @@ define weak_odr dso_local spir_kernel void @_ZTS11load_kernelIiE(i32 addrspace(1 entry: ; CHECK-LABEL: _ZTS11load_kernelIiE( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @__itt_spirv_wi_start_wrapper() +; CHECK-NEXT: call void @__itt_offload_wi_start_wrapper() %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 %1 = addrspacecast i64* %0 to i64 addrspace(4)* %2 = load i64, i64 addrspace(4)* %1, align 8 @@ -81,22 +79,22 @@ entry: %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_4, i64 %5 %6 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !19 %7 = extractelement <3 x i64> %6, i64 0 -; CHECK: call void @__itt_sync_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]], i32 0, i32 [[MEM_ORDER_2:[0-9]+]]) +; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]], i32 0, i32 [[MEM_ORDER_2:[0-9]+]]) ; CHECK-NEXT: {{.*}}__spirv_AtomicLoad{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_2]],{{.*}}, i32 [[MEM_ORDER_2]]) -; CHECK-NEXT: call void @__itt_sync_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_2]], i32 0, i32 [[MEM_ORDER_2]]) +; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_2]], i32 0, i32 [[MEM_ORDER_2]]) %call3.i.i.i = tail call spir_func i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(i32 addrspace(1)* %add.ptr.i32, i32 1, i32 896) #2 %ptridx.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %7 %ptridx.ascast.i.i = addrspacecast i32 addrspace(1)* %ptridx.i.i to i32 addrspace(4)* store i32 %call3.i.i.i, i32 addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !14 -; CHECK: call void @__itt_spirv_wi_finish_wrapper() +; CHECK: call void @__itt_offload_wi_finish_wrapper() ; CHECK-NEXT: ret void ret void } -; CHECK: declare void @__itt_spirv_wi_start_wrapper() -; CHECK: declare void @__itt_sync_atomic_op_start(i32 addrspace(1)*, i32, i32) -; CHECK: declare void @__itt_sync_atomic_op_finish(i32 addrspace(1)*, i32, i32) -; CHECK: declare void @__itt_spirv_wi_finish_wrapper() +; CHECK: declare void @__itt_offload_wi_start_wrapper() +; CHECK: declare void @__itt_offload_atomic_op_start(i32 addrspace(1)*, i32, i32) +; CHECK: declare void @__itt_offload_atomic_op_finish(i32 addrspace(1)*, i32, i32) +; CHECK: declare void @__itt_offload_wi_finish_wrapper() attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/localdisk2/sidorovd/SYCLTest/llvm-test-suite/SYCL/AtomicRef/load.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_store.ll b/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_store.ll index 7a066a2837561..d9d79024c72a5 100644 --- a/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_store.ll +++ b/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_store.ll @@ -4,10 +4,8 @@ ;; Compiled from https://github.com/intel/llvm-test-suite/blob/intel/SYCL/AtomicRef/load.cpp ;; with following commands: ;; clang++ -fsycl -fsycl-device-only load.cpp -o load.bc -;; llvm-link load.bc -o load_link.bc --suppress-warnings -; RUN: sycl-post-link -add-instrumentation-calls -split=auto --ir-output-only %s -S -o %t.ll -; RUN: FileCheck %s -input-file=%t.ll +; RUN: opt < %s --SYCLITTAnnotations -S | FileCheck %s ; ModuleID = 'store.bc' source_filename = "/localdisk2/sidorovd/SYCLTest/llvm-test-suite/SYCL/AtomicRef/store.cpp" @@ -29,7 +27,7 @@ define weak_odr dso_local spir_kernel void @_ZTSN2cl4sycl6detail19__pf_kernel_wr entry: ; CHECK-LABEL: _ZTSN2cl4sycl6detail19__pf_kernel_wrapperI12store_kernelIiEEE( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @__itt_spirv_wi_start_wrapper() +; CHECK-NEXT: call void @__itt_offload_wi_start_wrapper() %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_, i64 0, i32 0, i32 0, i64 0 %1 = addrspacecast i64* %0 to i64 addrspace(4)* %2 = load i64, i64 addrspace(4)* %1, align 8 @@ -44,14 +42,14 @@ if.end.i: ; preds = %entry %7 = load i64, i64 addrspace(4)* %6, align 8 %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_1, i64 %7 %conv.i.i = trunc i64 %4 to i32 -; CHECK: call void @__itt_sync_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]], i32 1, i32 [[MEM_ORDER_1:[0-9]+]]) +; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]], i32 1, i32 [[MEM_ORDER_1:[0-9]+]]) ; CHECK-NEXT: {{.*}}__spirv_AtomicStore{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_1]],{{.*}}, i32 [[MEM_ORDER_1]] -; CHECK-NEXT: call void @__itt_sync_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_1]], i32 1, i32 [[MEM_ORDER_1]]) +; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_1]], i32 1, i32 [[MEM_ORDER_1]]) tail call spir_func void @_Z19__spirv_AtomicStorePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEi(i32 addrspace(1)* %add.ptr.i, i32 1, i32 896, i32 %conv.i.i) #2 br label %_ZZN2cl4sycl7handler24parallel_for_lambda_implI12store_kernelIiEZZ10store_testIiEvNS0_5queueEmENKUlRS1_E_clES7_EUlNS0_4itemILi1ELb1EEEE_Li1EEEvNS0_5rangeIXT1_EEET0_ENKUlSA_E_clESA_.exit _ZZN2cl4sycl7handler24parallel_for_lambda_implI12store_kernelIiEZZ10store_testIiEvNS0_5queueEmENKUlRS1_E_clES7_EUlNS0_4itemILi1ELb1EEEE_Li1EEEvNS0_5rangeIXT1_EEET0_ENKUlSA_E_clESA_.exit: ; preds = %entry, %if.end.i -; CHECK: call void @__itt_spirv_wi_finish_wrapper() +; CHECK: call void @__itt_offload_wi_finish_wrapper() ; CHECK-NEXT: ret void ret void } @@ -64,7 +62,7 @@ define weak_odr dso_local spir_kernel void @_ZTS12store_kernelIiE(i32 addrspace( entry: ; CHECK-LABEL: _ZTS12store_kernelIiE( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @__itt_spirv_wi_start_wrapper() +; CHECK-NEXT: call void @__itt_offload_wi_start_wrapper() %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 %1 = addrspacecast i64* %0 to i64 addrspace(4)* %2 = load i64, i64 addrspace(4)* %1, align 8 @@ -72,19 +70,19 @@ entry: %3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !15 %4 = extractelement <3 x i64> %3, i64 0 %conv.i = trunc i64 %4 to i32 -; CHECK: call void @__itt_sync_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]], i32 1, i32 [[MEM_ORDER_2:[0-9]+]]) +; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]], i32 1, i32 [[MEM_ORDER_2:[0-9]+]]) ; CHECK-NEXT: {{.*}}__spirv_AtomicStore{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_2]],{{.*}}, i32 [[MEM_ORDER_2]] -; CHECK-NEXT: call void @__itt_sync_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_2]], i32 1, i32 [[MEM_ORDER_2]]) +; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_2]], i32 1, i32 [[MEM_ORDER_2]]) tail call spir_func void @_Z19__spirv_AtomicStorePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEi(i32 addrspace(1)* %add.ptr.i, i32 1, i32 896, i32 %conv.i) #2 -; CHECK: call void @__itt_spirv_wi_finish_wrapper() +; CHECK: call void @__itt_offload_wi_finish_wrapper() ; CHECK-NEXT: ret void ret void } -; CHECK: declare void @__itt_spirv_wi_start_wrapper() -; CHECK: declare void @__itt_sync_atomic_op_start(i32 addrspace(1)*, i32, i32) -; CHECK: declare void @__itt_sync_atomic_op_finish(i32 addrspace(1)*, i32, i32) -; CHECK: declare void @__itt_spirv_wi_finish_wrapper() +; CHECK: declare void @__itt_offload_wi_start_wrapper() +; CHECK: declare void @__itt_offload_atomic_op_start(i32 addrspace(1)*, i32, i32) +; CHECK: declare void @__itt_offload_atomic_op_finish(i32 addrspace(1)*, i32, i32) +; CHECK: declare void @__itt_offload_wi_finish_wrapper() attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/localdisk2/sidorovd/SYCLTest/llvm-test-suite/SYCL/AtomicRef/store.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/Transforms/SYCLITTAnnotations/itt_barrier.ll b/llvm/test/Transforms/SYCLITTAnnotations/itt_barrier.ll index 41866dab91083..1455d4897342a 100644 --- a/llvm/test/Transforms/SYCLITTAnnotations/itt_barrier.ll +++ b/llvm/test/Transforms/SYCLITTAnnotations/itt_barrier.ll @@ -4,10 +4,8 @@ ;; Compiled from https://github.com/intel/llvm-test-suite/blob/intel/SYCL/KernelAndProgram/kernel-and-program.cpp ;; with following commands: ;; clang++ -fsycl -fsycl-device-only kernel-and-program.cpp -o kernel_and_program_optimized.bc -;; llvm-link kernel_and_program_optimized.bc -o kernel_and_program_optimized_link.bc --suppress-warnings -; RUN: sycl-post-link -add-instrumentation-calls -split=auto --ir-output-only %s -S -o %t.ll -; RUN: FileCheck %s -input-file=%t.ll +; RUN: opt < %s --SYCLITTAnnotations -S | FileCheck %s ; ModuleID = 'kernel_and_program_optimized.bc' source_filename = "llvm-link" @@ -31,7 +29,7 @@ define weak_odr dso_local spir_kernel void @_ZTSZ4mainE10SingleTask(i32 addrspac entry: ; CHECK-LABEL: _ZTSZ4mainE10SingleTask( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @__itt_spirv_wi_start_wrapper() +; CHECK-NEXT: call void @__itt_offload_wi_start_wrapper() %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_3, i64 0, i32 0, i32 0, i64 0 %1 = addrspacecast i64* %0 to i64 addrspace(4)* %2 = load i64, i64 addrspace(4)* %1, align 8 @@ -40,7 +38,7 @@ entry: %3 = load i32, i32 addrspace(4)* %ptridx.ascast.i9.i, align 4, !tbaa !5 %add.i = add nsw i32 %3, 1 store i32 %add.i, i32 addrspace(4)* %ptridx.ascast.i9.i, align 4, !tbaa !5 -; CHECK: call void @__itt_spirv_wi_finish_wrapper() +; CHECK: call void @__itt_offload_wi_finish_wrapper() ; CHECK-NEXT: ret void ret void } @@ -50,7 +48,7 @@ define weak_odr dso_local spir_kernel void @_ZTSZ4mainE11ParallelFor(i32 addrspa entry: ; CHECK-LABEL: _ZTSZ4mainE11ParallelFor( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @__itt_spirv_wi_start_wrapper() +; CHECK-NEXT: call void @__itt_offload_wi_start_wrapper() %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_3, i64 0, i32 0, i32 0, i64 0 %1 = addrspacecast i64* %0 to i64 addrspace(4)* %2 = load i64, i64 addrspace(4)* %1, align 8 @@ -62,7 +60,7 @@ entry: %5 = load i32, i32 addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !5 %add.i = add nsw i32 %5, 1 store i32 %add.i, i32 addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !5 -; CHECK: call void @__itt_spirv_wi_finish_wrapper() +; CHECK: call void @__itt_offload_wi_finish_wrapper() ; CHECK-NEXT: ret void ret void } @@ -72,7 +70,7 @@ define weak_odr dso_local spir_kernel void @_ZTSZ4mainE13ParallelForND(i32 addrs entry: ; CHECK-LABEL: _ZTSZ4mainE13ParallelForND( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @__itt_spirv_wi_start_wrapper() +; CHECK-NEXT: call void @__itt_offload_wi_start_wrapper() %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_8, i64 0, i32 0, i32 0, i64 0 %1 = addrspacecast i64* %0 to i64 addrspace(4)* %2 = load i64, i64 addrspace(4)* %1, align 8 @@ -92,16 +90,16 @@ entry: %ptridx.i23.i = getelementptr inbounds i32, i32 addrspace(3)* %_arg_, i64 %9 %ptridx.ascast.i24.i = addrspacecast i32 addrspace(3)* %ptridx.i23.i to i32 addrspace(4)* store i32 %8, i32 addrspace(4)* %ptridx.ascast.i24.i, align 4, !tbaa !5 -; CHECK: call void @__itt_spirv_wg_barrier_wrapper() +; CHECK: call void @__itt_offload_wg_barrier_wrapper() ; CHECK-NEXT: tail call void @_Z22__spirv_ControlBarrierjjj -; CHECK-NEXT: call void @__itt_spirv_wi_resume_wrapper() +; CHECK-NEXT: call void @__itt_offload_wi_resume_wrapper() tail call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) #3 %conv6.i = zext i32 %xor.i to i64 %ptridx.i17.i = getelementptr inbounds i32, i32 addrspace(3)* %_arg_, i64 %conv6.i %ptridx.ascast.i18.i = addrspacecast i32 addrspace(3)* %ptridx.i17.i to i32 addrspace(4)* %10 = load i32, i32 addrspace(4)* %ptridx.ascast.i18.i, align 4, !tbaa !5 store i32 %10, i32 addrspace(4)* %ptridx.ascast.i28.i, align 4, !tbaa !5 -; CHECK: call void @__itt_spirv_wi_finish_wrapper() +; CHECK: call void @__itt_offload_wi_finish_wrapper() ; CHECK-NEXT: ret void ret void } @@ -109,10 +107,10 @@ entry: ; Function Attrs: convergent declare dso_local void @_Z22__spirv_ControlBarrierjjj(i32, i32, i32) local_unnamed_addr #2 -; CHECK: declare void @__itt_spirv_wi_start_wrapper() -; CHECK: declare void @__itt_spirv_wi_finish_wrapper() -; CHECK: declare void @__itt_spirv_wg_barrier_wrapper() -; CHECK: declare void @__itt_spirv_wi_resume_wrapper() +; CHECK: declare void @__itt_offload_wi_start_wrapper() +; CHECK: declare void @__itt_offload_wi_finish_wrapper() +; CHECK: declare void @__itt_offload_wg_barrier_wrapper() +; CHECK: declare void @__itt_offload_wi_resume_wrapper() attributes #0 = { norecurse willreturn "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/localdisk2/sidorovd/SYCLTest/llvm-test-suite/SYCL/KernelAndProgram/kernel-and-program.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/localdisk2/sidorovd/SYCLTest/llvm-test-suite/SYCL/KernelAndProgram/kernel-and-program.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index c54de1ba8e9b7..b0aa736d74142 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -108,10 +108,6 @@ static cl::opt SplitEsimd{"split-esimd", static cl::opt LowerEsimd{ "lower-esimd", cl::desc("Lower ESIMD constructs"), cl::cat(PostLinkCat)}; -static cl::opt AddInstrumentationCalls{ - "add-instrumentation-calls", cl::desc("Add instrumentation calls"), - cl::cat(PostLinkCat)}; - static cl::opt OptLevelO0("O0", cl::desc("Optimization level 0. Similar to clang -O0"), cl::cat(PostLinkCat)); @@ -789,7 +785,7 @@ static TableFiles processOneModule(std::unique_ptr M, bool IsEsimd, { // Reuse input module with only regular SYCL kernels if there were - // no spec constants, no splitting and no instrumentation calls. + // no spec constants, no splitting. // We cannot reuse input module for ESIMD code since it was transformed. bool CanReuseInputModule = !SpecConstsMet && (ResultModules.size() == 1) && !SyclAndEsimdKernels && !IsEsimd; From e1d768a2712b20267751282af7cd755c01daaf1c Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Wed, 10 Mar 2021 18:42:45 +0300 Subject: [PATCH 07/16] Change driver part Signed-off-by: Dmitry Sidorov --- clang/include/clang/Basic/CodeGenOptions.def | 3 +++ clang/include/clang/Driver/Options.td | 4 +++- clang/lib/CodeGen/BackendUtil.cpp | 3 ++- clang/lib/Driver/ToolChains/Clang.cpp | 4 ---- .../kernel-simple-instrumentation.cpp | 18 ++++++++++++++++++ clang/test/Driver/sycl-instrumentation.cpp | 4 ++-- 6 files changed, 28 insertions(+), 8 deletions(-) create mode 100644 clang/test/CodeGenSYCL/kernel-simple-instrumentation.cpp diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 5c8af65326edb..75985291d0869 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -414,6 +414,9 @@ CODEGENOPT(PassByValueIsNoAlias, 1, 0) /// according to the field declaring type width. CODEGENOPT(AAPCSBitfieldWidth, 1, 1) +// Whether to instrument SYCL device code with ITT annotations +CODEGENOPT(SYCLITTAnnotations, 1, 0) + #undef CODEGENOPT #undef ENUM_CODEGENOPT #undef VALUE_CODEGENOPT diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 7816d089e78ec..031262491fdfe 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2403,7 +2403,9 @@ def fsycl_device_code_lower_esimd : Flag<["-"], "fsycl-device-code-lower-esimd"> def fno_sycl_device_code_lower_esimd : Flag<["-"], "fno-sycl-device-code-lower-esimd">, Flags<[CC1Option, CoreOption]>, HelpText<"Do not lower ESIMD-specific constructs">; def fsycl_instrument_device_code : Flag<["-"], "fsycl-instrument-device-code">, - Flags<[CC1Option, CoreOption]>, HelpText<"Add instrumentation intrinsics calls">; + Group, Flags<[CC1Option, CoreOption]>, + HelpText<"Add ITT instrumentation intrinsics calls">, + MarshallingInfoFlag>; defm sycl_id_queries_fit_in_int: OptInFFlag<"sycl-id-queries-fit-in-int", "Assume", "Do not assume", " that SYCL ID queries fit within MAX_INT.", [CC1Option,CoreOption], LangOpts<"SYCLValueFitInMaxInt">>; def fsycl_use_bitcode : Flag<["-"], "fsycl-use-bitcode">, Flags<[CC1Option, CoreOption]>, HelpText<"Use LLVM bitcode instead of SPIR-V in fat objects">; diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 140136f660634..e496db2e61564 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -978,7 +978,8 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, if (LangOpts.SYCLIsDevice && LangOpts.SYCLExplicitSIMD) PerModulePasses.add(createGenXSPIRVWriterAdaptorPass()); - if (llvm::Triple(TheModule->getTargetTriple()).isSPIR()) + if (llvm::Triple(TheModule->getTargetTriple()).isSPIR() && + CodeGenOpts.SYCLITTAnnotations) PerModulePasses.add(createSYCLITTAnnotationsPass()); switch (Action) { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 00c1775622563..6f33a9c52cd2d 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8284,10 +8284,6 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA, assert(isa(JA) && "Expecting SYCL post link job!"); ArgStringList CmdArgs; - // See if device code instrumentation is requested - if (TCArgs.hasArg(options::OPT_fsycl_instrument_device_code)) - addArgs(CmdArgs, TCArgs, {"-add-instrumentation-calls"}); - // See if device code splitting is requested if (Arg *A = TCArgs.getLastArg(options::OPT_fsycl_device_code_split_EQ)) { if (StringRef(A->getValue()) == "per_kernel") diff --git a/clang/test/CodeGenSYCL/kernel-simple-instrumentation.cpp b/clang/test/CodeGenSYCL/kernel-simple-instrumentation.cpp new file mode 100644 index 0000000000000..c68301c2ed469 --- /dev/null +++ b/clang/test/CodeGenSYCL/kernel-simple-instrumentation.cpp @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsycl-instrument-device-code -triple spir64-unknown-unknown-sycldevice -emit-llvm %s -o - | FileCheck %s + +// CHECK: kernel_function +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @__itt_offload_wi_start_wrapper() +// CHECK: call void @__itt_offload_wi_finish_wrapper() +// CHECK-NEXT: ret void + +#include "Inputs/sycl.hpp" + +int main() { + cl::sycl::accessor accessorA; + cl::sycl::kernel_single_task( + [=]() { + accessorA.use(); + }); + return 0; +} diff --git a/clang/test/Driver/sycl-instrumentation.cpp b/clang/test/Driver/sycl-instrumentation.cpp index ee40b21ee59c8..8c1a1e80c5bef 100644 --- a/clang/test/Driver/sycl-instrumentation.cpp +++ b/clang/test/Driver/sycl-instrumentation.cpp @@ -1,4 +1,4 @@ -/// Check that instrumentation is disabled by default: +/// Check that SYCL ITT instrumentation is disabled by default: // RUN: %clang -fsycl -### %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-DEFAULT %s // CHECK-DEFAULT-NOT: "-add-instrumentation-calls" @@ -7,4 +7,4 @@ /// link tool: // RUN: %clang -fsycl -### -fsycl-instrument-device-code %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-ENABLED %s -// CHECK-ENABLED: sycl-post-link{{.*}}"-add-instrumentation-calls" +// CHECK-ENABLED: "-add-instrumentation-calls" From 825f58a2030c5d024094df883dfe0f003be370d5 Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Thu, 11 Mar 2021 19:53:17 +0300 Subject: [PATCH 08/16] Few bug fixes Signed-off-by: Dmitry Sidorov --- clang/lib/Driver/ToolChains/Clang.cpp | 4 ++ clang/lib/Frontend/CompilerInvocation.cpp | 5 ++ .../Instrumentation/SYCLITTAnnotations.cpp | 72 ++++++++++++------- .../SYCLITTAnnotations/itt_atomic_load.ll | 14 ++-- .../SYCLITTAnnotations/itt_atomic_store.ll | 14 ++-- .../SYCLITTAnnotations/itt_barrier.ll | 2 +- 6 files changed, 71 insertions(+), 40 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 6f33a9c52cd2d..7c4bf3061e7a8 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5866,6 +5866,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // Forward -sycl-std option to -cc1 Args.AddLastArg(CmdArgs, options::OPT_sycl_std_EQ); + // Forward -fsycl-instrument-device-code option to cc1 + if (Args.hasArg(options::OPT_fsycl_instrument_device_code)) + CmdArgs.push_back("-fsycl-instrument-device-code"); + if (IsHIP) { if (Args.hasFlag(options::OPT_fhip_new_launch_api, options::OPT_fno_hip_new_launch_api, true)) diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 12245434cc704..c47190acf803c 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1050,6 +1050,8 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, (Args.hasArg(OPT_fsycl_is_device) && T.isSPIR() && Args.hasArg(OPT_fno_sycl_early_optimizations)); + Opts.SYCLITTAnnotations = Args.hasArg(OPT_fsycl_instrument_device_code); + const llvm::Triple::ArchType DebugEntryValueArchs[] = { llvm::Triple::x86, llvm::Triple::x86_64, llvm::Triple::aarch64, llvm::Triple::arm, llvm::Triple::armeb, llvm::Triple::mips, @@ -1083,6 +1085,9 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, if (!Opts.ProfileInstrumentUsePath.empty()) setPGOUseInstrumentor(Opts, Opts.ProfileInstrumentUsePath); + // Insert ITT annotations under the flag + Opts.SYCLITTAnnotations = Args.hasArg(OPT_fsycl_instrument_device_code); + if (const Arg *A = Args.getLastArg(OPT_ftime_report, OPT_ftime_report_EQ)) { Opts.TimePasses = true; diff --git a/llvm/lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp b/llvm/lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp index 645ccfe718663..d7238765c50d1 100644 --- a/llvm/lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp +++ b/llvm/lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp @@ -60,21 +60,22 @@ using namespace llvm; namespace { -constexpr char SPIRV_CONTROL_BARRIER[] = "__spirv_ControlBarrier"; -constexpr char SPIRV_GROUP_ALL[] = "__spirv_GroupAll"; -constexpr char SPIRV_GROUP_ANY[] = "__spirv_GroupAny"; -constexpr char SPIRV_GROUP_BROADCAST[] = "__spirv_GroupBroadcast"; -constexpr char SPIRV_GROUP_IADD[] = "__spirv_GroupIAdd"; -constexpr char SPIRV_GROUP_FADD[] = "__spirv_GroupFAdd"; -constexpr char SPIRV_GROUP_FMIN[] = "__spirv_GroupFMin"; -constexpr char SPIRV_GROUP_UMIN[] = "__spirv_GroupUMin"; -constexpr char SPIRV_GROUP_SMIN[] = "__spirv_GroupSMin"; -constexpr char SPIRV_GROUP_FMAX[] = "__spirv_GroupFMax"; -constexpr char SPIRV_GROUP_UMAX[] = "__spirv_GroupUMax"; -constexpr char SPIRV_GROUP_SMAX[] = "__spirv_GroupSMax"; -constexpr char SPIRV_ATOMIC_INST[] = "__spirv_Atomic"; -constexpr char SPIRV_ATOMIC_LOAD[] = "__spirv_AtomicLoad"; -constexpr char SPIRV_ATOMIC_STORE[] = "__spirv_AtomicSTORE"; +constexpr char SPIRV_PREFIX[] = "__spirv_"; +constexpr char SPIRV_CONTROL_BARRIER[] = "ControlBarrier"; +constexpr char SPIRV_GROUP_ALL[] = "GroupAll"; +constexpr char SPIRV_GROUP_ANY[] = "GroupAny"; +constexpr char SPIRV_GROUP_BROADCAST[] = "GroupBroadcast"; +constexpr char SPIRV_GROUP_IADD[] = "GroupIAdd"; +constexpr char SPIRV_GROUP_FADD[] = "GroupFAdd"; +constexpr char SPIRV_GROUP_FMIN[] = "GroupFMin"; +constexpr char SPIRV_GROUP_UMIN[] = "GroupUMin"; +constexpr char SPIRV_GROUP_SMIN[] = "GroupSMin"; +constexpr char SPIRV_GROUP_FMAX[] = "GroupFMax"; +constexpr char SPIRV_GROUP_UMAX[] = "GroupUMax"; +constexpr char SPIRV_GROUP_SMAX[] = "GroupSMax"; +constexpr char SPIRV_ATOMIC_INST[] = "Atomic"; +constexpr char SPIRV_ATOMIC_LOAD[] = "AtomicLoad"; +constexpr char SPIRV_ATOMIC_STORE[] = "AtomicStore"; constexpr char ITT_ANNOTATION_WI_START[] = "__itt_offload_wi_start_wrapper"; constexpr char ITT_ANNOTATION_WI_RESUME[] = "__itt_offload_wi_resume_wrapper"; constexpr char ITT_ANNOTATION_WI_FINISH[] = "__itt_offload_wi_finish_wrapper"; @@ -158,7 +159,7 @@ bool insertAtomicInstrumentationCall(Module &M, StringRef Name, // annotation instructions we need Pointer and Memory Semantic arguments // taken from the original Atomic instruction. Value *Ptr = dyn_cast(AtomicFun->getArgOperand(0)); - StringRef AtomicName = AtomicFun->getName(); + StringRef AtomicName = AtomicFun->getCalledFunction()->getName(); Value *AtomicOp; // Second parameter of Atomic Start/Finish annotation is an Op code of // the instruction, encoded into a value of enum, defined like this on user's/ @@ -175,18 +176,33 @@ bool insertAtomicInstrumentationCall(Module &M, StringRef Name, AtomicOp = ConstantInt::get(Int32Ty, 1); else AtomicOp = ConstantInt::get(Int32Ty, 2); - // TODO: Third parameter of Atomic Start/Finish annotation is an ordering + // Third parameter of Atomic Start/Finish annotation is an ordering // semantic of the instruction, encoded into a value of enum, defined like // this on user's/profiler's side: // enum __itt_atomic_mem_order_t // { - // __itt_mem_order_relaxed = 0, - // __itt_mem_order_acquire = 1, - // __itt_mem_order_release = 2 + // __itt_mem_order_relaxed = 0, // SPIR-V 0x0 + // __itt_mem_order_acquire = 1, // SPIR-V 0x2 + // __itt_mem_order_release = 2, // SPIR-V 0x4 + // __itt_mem_order_acquire_release = 3 // SPIR-V 0x8 // } - // which isn't 1:1 mapped on SPIR-V memory ordering mask, need to align it. - ConstantInt *MemSemantic = dyn_cast(AtomicFun->getArgOperand(2)); - Value *Args[] = {Ptr, AtomicOp, MemSemantic}; + // which isn't 1:1 mapped on SPIR-V memory ordering mask (aside of a + // differencies in values between SYCL mem order and SPIR-V mem order, SYCL RT + // also applies Memory Semantic mask, like WorkgroupMemory (0x100)), need to + // align it. + uint64_t MemFlag = dyn_cast( + AtomicFun->getArgOperand(2))->getValue().getZExtValue(); + uint64_t Order; + if (MemFlag & 0x2) + Order = 1; + else if (MemFlag & 0x4) + Order = 2; + else if (MemFlag & 0x8) + Order = 3; + else + Order = 0; + Value *MemOrder = ConstantInt::get(Int32Ty, Order); + Value *Args[] = {Ptr, AtomicOp, MemOrder}; Instruction *InstrumentationCall = emitCall(M, VoidTy, Name, Args, Position); assert(InstrumentationCall && "Instrumentation call creation failed"); @@ -227,18 +243,24 @@ PreservedAnalyses SYCLITTAnnotationsPass::run(Module &M, if (!Callee) continue; StringRef CalleeName = Callee->getName(); + // Process only calls to functions which names starts with __spirv_ + size_t PrefixPosFound = CalleeName.find(SPIRV_PREFIX); + if (PrefixPosFound == StringRef::npos) + continue; + CalleeName = CalleeName.drop_front( + PrefixPosFound + /*len of SPIR-V prefix*/ 8); // Annotate barrier and other cross WG calls if (std::any_of(SPIRVCrossWGInstuctions.begin(), SPIRVCrossWGInstuctions.end(), [&CalleeName](StringRef Name) { - return CalleeName.contains(Name); + return CalleeName.startswith(Name); })) { Instruction *InstAfterBarrier = CI->getNextNode(); IRModified |= insertSimpleInstrumentationCall( M, ITT_ANNOTATION_WG_BARRIER, CI); IRModified |= insertSimpleInstrumentationCall( M, ITT_ANNOTATION_WI_RESUME, InstAfterBarrier); - } else if (CalleeName.contains(SPIRV_ATOMIC_INST)) { + } else if (CalleeName.startswith(SPIRV_ATOMIC_INST)) { Instruction *InstAfterAtomic = CI->getNextNode(); IRModified |= insertAtomicInstrumentationCall( M, ITT_ANNOTATION_ATOMIC_START, CI, CI); diff --git a/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_load.ll b/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_load.ll index 902417a7459a5..5370be4ccb356 100644 --- a/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_load.ll +++ b/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_load.ll @@ -1,5 +1,5 @@ ;; The test serves a purpose to check if Atomic load instruction is being -;; annotated during sycl-post-link +;; annotated by SYCLITTAnnotations pass ;; ;; Compiled from https://github.com/intel/llvm-test-suite/blob/intel/SYCL/AtomicRef/load.cpp ;; with following commands: @@ -45,9 +45,9 @@ if.end.i: ; preds = %entry %9 = addrspacecast i64* %8 to i64 addrspace(4)* %10 = load i64, i64 addrspace(4)* %9, align 8 %add.ptr.i34 = getelementptr inbounds i32, i32 addrspace(1)* %_arg_1, i64 %10 -; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]], i32 0, i32 [[MEM_ORDER_1:[0-9]+]]) -; CHECK-NEXT: {{.*}}__spirv_AtomicLoad{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_1]],{{.*}}, i32 [[MEM_ORDER_1]]) -; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_1]], i32 0, i32 [[MEM_ORDER_1]]) +; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]], i32 0, i32 0) +; CHECK-NEXT: {{.*}}__spirv_AtomicLoad{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_1]],{{.*}}, i32 896 +; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_1]], i32 0, i32 0) %call3.i.i.i.i = tail call spir_func i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(i32 addrspace(1)* %add.ptr.i34, i32 1, i32 896) #2 %ptridx.i.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %4 %ptridx.ascast.i.i.i = addrspacecast i32 addrspace(1)* %ptridx.i.i.i to i32 addrspace(4)* @@ -79,9 +79,9 @@ entry: %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_4, i64 %5 %6 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !19 %7 = extractelement <3 x i64> %6, i64 0 -; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]], i32 0, i32 [[MEM_ORDER_2:[0-9]+]]) -; CHECK-NEXT: {{.*}}__spirv_AtomicLoad{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_2]],{{.*}}, i32 [[MEM_ORDER_2]]) -; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_2]], i32 0, i32 [[MEM_ORDER_2]]) +; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]], i32 0, i32 0) +; CHECK-NEXT: {{.*}}__spirv_AtomicLoad{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_2]],{{.*}}, i32 896) +; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_2]], i32 0, i32 0) %call3.i.i.i = tail call spir_func i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(i32 addrspace(1)* %add.ptr.i32, i32 1, i32 896) #2 %ptridx.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %7 %ptridx.ascast.i.i = addrspacecast i32 addrspace(1)* %ptridx.i.i to i32 addrspace(4)* diff --git a/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_store.ll b/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_store.ll index d9d79024c72a5..2ec9de0598e6a 100644 --- a/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_store.ll +++ b/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_store.ll @@ -1,5 +1,5 @@ ;; The test serves a purpose to check if Atomic store instruction is being -;; annotated during sycl-post-link +;; annotated by SYCLITTAnnotations pass ;; ;; Compiled from https://github.com/intel/llvm-test-suite/blob/intel/SYCL/AtomicRef/load.cpp ;; with following commands: @@ -42,9 +42,9 @@ if.end.i: ; preds = %entry %7 = load i64, i64 addrspace(4)* %6, align 8 %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_1, i64 %7 %conv.i.i = trunc i64 %4 to i32 -; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]], i32 1, i32 [[MEM_ORDER_1:[0-9]+]]) -; CHECK-NEXT: {{.*}}__spirv_AtomicStore{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_1]],{{.*}}, i32 [[MEM_ORDER_1]] -; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_1]], i32 1, i32 [[MEM_ORDER_1]]) +; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]], i32 1, i32 0 +; CHECK-NEXT: {{.*}}__spirv_AtomicStore{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_1]],{{.*}}, i32 896 +; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_1]], i32 1, i32 0 tail call spir_func void @_Z19__spirv_AtomicStorePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEi(i32 addrspace(1)* %add.ptr.i, i32 1, i32 896, i32 %conv.i.i) #2 br label %_ZZN2cl4sycl7handler24parallel_for_lambda_implI12store_kernelIiEZZ10store_testIiEvNS0_5queueEmENKUlRS1_E_clES7_EUlNS0_4itemILi1ELb1EEEE_Li1EEEvNS0_5rangeIXT1_EEET0_ENKUlSA_E_clESA_.exit @@ -70,9 +70,9 @@ entry: %3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !15 %4 = extractelement <3 x i64> %3, i64 0 %conv.i = trunc i64 %4 to i32 -; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]], i32 1, i32 [[MEM_ORDER_2:[0-9]+]]) -; CHECK-NEXT: {{.*}}__spirv_AtomicStore{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_2]],{{.*}}, i32 [[MEM_ORDER_2]] -; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_2]], i32 1, i32 [[MEM_ORDER_2]]) +; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]], i32 1, i32 0) +; CHECK-NEXT: {{.*}}__spirv_AtomicStore{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_2]],{{.*}}, i32 896 +; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_2]], i32 1, i32 0) tail call spir_func void @_Z19__spirv_AtomicStorePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEi(i32 addrspace(1)* %add.ptr.i, i32 1, i32 896, i32 %conv.i) #2 ; CHECK: call void @__itt_offload_wi_finish_wrapper() ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SYCLITTAnnotations/itt_barrier.ll b/llvm/test/Transforms/SYCLITTAnnotations/itt_barrier.ll index 1455d4897342a..e7761a8f3dce4 100644 --- a/llvm/test/Transforms/SYCLITTAnnotations/itt_barrier.ll +++ b/llvm/test/Transforms/SYCLITTAnnotations/itt_barrier.ll @@ -1,5 +1,5 @@ ;; The test serves a purpose to check if barrier instruction is being annotated -;; during sycl-post-link +;; by SYCLITTAnnotations pass ;; ;; Compiled from https://github.com/intel/llvm-test-suite/blob/intel/SYCL/KernelAndProgram/kernel-and-program.cpp ;; with following commands: From c0809feefff0b68dc025d1d33db141cbabdc7194 Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Thu, 11 Mar 2021 22:55:19 +0300 Subject: [PATCH 09/16] Small fixes Signed-off-by: Dmitry Sidorov --- clang/lib/CodeGen/BackendUtil.cpp | 2 ++ clang/lib/Driver/ToolChains/Clang.cpp | 3 ++- clang/lib/Frontend/CompilerInvocation.cpp | 2 -- clang/test/Driver/sycl-instrumentation.cpp | 6 +++--- llvm/tools/sycl-post-link/sycl-post-link.cpp | 2 +- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index e496db2e61564..e9a84f17341b9 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -840,6 +840,7 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, PMBuilder.populateFunctionPassManager(FPM); PMBuilder.populateModulePassManager(MPM); + // Customize the tail of the module passes list for the ESIMD extension. if (LangOpts.SYCLIsDevice && LangOpts.SYCLExplicitSIMD && CodeGenOpts.OptimizationLevel != 0) { @@ -953,6 +954,7 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, legacy::FunctionPassManager PerFunctionPasses(TheModule); PerFunctionPasses.add( createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); + // ESIMD extension always requires lowering of certain IR constructs, such as // ESIMD C++ intrinsics, as the last FE step. if (LangOpts.SYCLIsDevice && LangOpts.SYCLExplicitSIMD) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 7c4bf3061e7a8..eb0e507245ed3 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5868,7 +5868,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // Forward -fsycl-instrument-device-code option to cc1 if (Args.hasArg(options::OPT_fsycl_instrument_device_code)) - CmdArgs.push_back("-fsycl-instrument-device-code"); + Args.AddLastArg(CmdArgs, options::OPT_sycl_std_EQ); +// CmdArgs.push_back("-fsycl-instrument-device-code"); if (IsHIP) { if (Args.hasFlag(options::OPT_fhip_new_launch_api, diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index c47190acf803c..44bc3f82b94f0 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1050,8 +1050,6 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, (Args.hasArg(OPT_fsycl_is_device) && T.isSPIR() && Args.hasArg(OPT_fno_sycl_early_optimizations)); - Opts.SYCLITTAnnotations = Args.hasArg(OPT_fsycl_instrument_device_code); - const llvm::Triple::ArchType DebugEntryValueArchs[] = { llvm::Triple::x86, llvm::Triple::x86_64, llvm::Triple::aarch64, llvm::Triple::arm, llvm::Triple::armeb, llvm::Triple::mips, diff --git a/clang/test/Driver/sycl-instrumentation.cpp b/clang/test/Driver/sycl-instrumentation.cpp index 8c1a1e80c5bef..dd11e2edcc3ab 100644 --- a/clang/test/Driver/sycl-instrumentation.cpp +++ b/clang/test/Driver/sycl-instrumentation.cpp @@ -1,10 +1,10 @@ /// Check that SYCL ITT instrumentation is disabled by default: // RUN: %clang -fsycl -### %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-DEFAULT %s -// CHECK-DEFAULT-NOT: "-add-instrumentation-calls" +// CHECK-DEFAULT-NOT: "fsycl-instrument-device-code" -/// Check "fsycl_device_code_add_instrumentation_calls" is passed to sycl post +/// Check if "fsycl_instrument_device_code" is passed to sycl post /// link tool: // RUN: %clang -fsycl -### -fsycl-instrument-device-code %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-ENABLED %s -// CHECK-ENABLED: "-add-instrumentation-calls" +// CHECK-ENABLED: "fsycl-instrument-device-code" diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index b0aa736d74142..ea1ae9ee535b3 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -785,7 +785,7 @@ static TableFiles processOneModule(std::unique_ptr M, bool IsEsimd, { // Reuse input module with only regular SYCL kernels if there were - // no spec constants, no splitting. + // no spec constants and no splitting. // We cannot reuse input module for ESIMD code since it was transformed. bool CanReuseInputModule = !SpecConstsMet && (ResultModules.size() == 1) && !SyclAndEsimdKernels && !IsEsimd; From 8bb5cdf61485726a3f4a08c22f81b586f54a2360 Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Fri, 12 Mar 2021 11:09:15 +0300 Subject: [PATCH 10/16] Several fixes Signed-off-by: Dmitry Sidorov --- clang/lib/Driver/ToolChains/Clang.cpp | 3 +- clang/lib/Frontend/CompilerInvocation.cpp | 3 - .../kernel-simple-instrumentation.cpp | 2 +- clang/test/Driver/sycl-instrumentation.cpp | 11 ++-- llvm/include/llvm/LinkAllPasses.h | 8 +-- .../SYCLITTAnnotations/itt_atomic_load.ll | 4 +- .../SYCLITTAnnotations/itt_atomic_store.ll | 4 +- .../SYCLITTAnnotations/itt_barrier.ll | 4 +- .../SYCLITTAnnotations/itt_start_finish.ll | 55 +++++++++++++++++++ 9 files changed, 72 insertions(+), 22 deletions(-) create mode 100644 llvm/test/Transforms/SYCLITTAnnotations/itt_start_finish.ll diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 624349f8ae8cf..1d7a1455ecf5e 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5885,8 +5885,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // Forward -fsycl-instrument-device-code option to cc1 if (Args.hasArg(options::OPT_fsycl_instrument_device_code)) - Args.AddLastArg(CmdArgs, options::OPT_sycl_std_EQ); -// CmdArgs.push_back("-fsycl-instrument-device-code"); + CmdArgs.push_back("-fsycl-instrument-device-code"); if (IsHIP) { if (Args.hasFlag(options::OPT_fhip_new_launch_api, diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 0829678675366..ec6287a127365 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1657,9 +1657,6 @@ bool CompilerInvocation::ParseCodeGenArgsImpl(CodeGenOptions &Opts, if (!Opts.ProfileInstrumentUsePath.empty()) setPGOUseInstrumentor(Opts, Opts.ProfileInstrumentUsePath); - // Insert ITT annotations under the flag - Opts.SYCLITTAnnotations = Args.hasArg(OPT_fsycl_instrument_device_code); - if (const Arg *A = Args.getLastArg(OPT_ftime_report, OPT_ftime_report_EQ)) { Opts.TimePasses = true; diff --git a/clang/test/CodeGenSYCL/kernel-simple-instrumentation.cpp b/clang/test/CodeGenSYCL/kernel-simple-instrumentation.cpp index c68301c2ed469..407505dbf39f5 100644 --- a/clang/test/CodeGenSYCL/kernel-simple-instrumentation.cpp +++ b/clang/test/CodeGenSYCL/kernel-simple-instrumentation.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsycl-instrument-device-code -triple spir64-unknown-unknown-sycldevice -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -fsycl-instrument-device-code -triple spir64-unknown-unknown-sycldevice -emit-llvm %s -o - | FileCheck %s // CHECK: kernel_function // CHECK-NEXT: entry: diff --git a/clang/test/Driver/sycl-instrumentation.cpp b/clang/test/Driver/sycl-instrumentation.cpp index dd11e2edcc3ab..4dfd529dd7b1c 100644 --- a/clang/test/Driver/sycl-instrumentation.cpp +++ b/clang/test/Driver/sycl-instrumentation.cpp @@ -1,10 +1,9 @@ /// Check that SYCL ITT instrumentation is disabled by default: -// RUN: %clang -fsycl -### %s 2>&1 \ +// RUN: %clang -### %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-DEFAULT %s -// CHECK-DEFAULT-NOT: "fsycl-instrument-device-code" +// CHECK-DEFAULT-NOT: "-fsycl-instrument-device-code" -/// Check if "fsycl_instrument_device_code" is passed to sycl post -/// link tool: -// RUN: %clang -fsycl -### -fsycl-instrument-device-code %s 2>&1 \ +/// Check if "fsycl_instrument_device_code" is passed to -cc1: +// RUN: %clang -### -fsycl-instrument-device-code %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-ENABLED %s -// CHECK-ENABLED: "fsycl-instrument-device-code" +// CHECK-ENABLED: "-fsycl-instrument-device-code" diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 46f8e6ce729b3..52d7d5edc8d09 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -201,10 +201,10 @@ namespace { (void) llvm::createMergeFunctionsPass(); (void) llvm::createMergeICmpsLegacyPass(); (void) llvm::createExpandMemCmpPass(); - (void) llvm::createSYCLLowerWGScopePass(); - (void) llvm::createSYCLLowerESIMDPass(); - (void) llvm::createESIMDLowerLoadStorePass(); - (void) llvm::createESIMDLowerVecArgPass(); + (void)llvm::createSYCLLowerWGScopePass(); + (void)llvm::createSYCLLowerESIMDPass(); + (void)llvm::createESIMDLowerLoadStorePass(); + (void)llvm::createESIMDLowerVecArgPass(); (void) llvm::createSYCLITTAnnotationsPass(); std::string buf; llvm::raw_string_ostream os(buf); diff --git a/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_load.ll b/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_load.ll index 5370be4ccb356..8eaf3fe14508d 100644 --- a/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_load.ll +++ b/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_load.ll @@ -8,7 +8,7 @@ ; RUN: opt < %s --SYCLITTAnnotations -S | FileCheck %s ; ModuleID = 'load.bc' -source_filename = "/localdisk2/sidorovd/SYCLTest/llvm-test-suite/SYCL/AtomicRef/load.cpp" +source_filename = "llvm-test-suite/SYCL/AtomicRef/load.cpp" target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" target triple = "spir64-unknown-unknown-sycldevice" @@ -96,7 +96,7 @@ entry: ; CHECK: declare void @__itt_offload_atomic_op_finish(i32 addrspace(1)*, i32, i32) ; CHECK: declare void @__itt_offload_wi_finish_wrapper() -attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/localdisk2/sidorovd/SYCLTest/llvm-test-suite/SYCL/AtomicRef/load.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="llvm-test-suite/SYCL/AtomicRef/load.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #2 = { convergent nounwind } diff --git a/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_store.ll b/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_store.ll index 2ec9de0598e6a..dfcc9db21fc55 100644 --- a/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_store.ll +++ b/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_store.ll @@ -8,7 +8,7 @@ ; RUN: opt < %s --SYCLITTAnnotations -S | FileCheck %s ; ModuleID = 'store.bc' -source_filename = "/localdisk2/sidorovd/SYCLTest/llvm-test-suite/SYCL/AtomicRef/store.cpp" +source_filename = "llvm-test-suite/SYCL/AtomicRef/store.cpp" target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" target triple = "spir64-unknown-unknown-sycldevice" @@ -84,7 +84,7 @@ entry: ; CHECK: declare void @__itt_offload_atomic_op_finish(i32 addrspace(1)*, i32, i32) ; CHECK: declare void @__itt_offload_wi_finish_wrapper() -attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/localdisk2/sidorovd/SYCLTest/llvm-test-suite/SYCL/AtomicRef/store.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="llvm-test-suite/SYCL/AtomicRef/store.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #2 = { convergent nounwind } diff --git a/llvm/test/Transforms/SYCLITTAnnotations/itt_barrier.ll b/llvm/test/Transforms/SYCLITTAnnotations/itt_barrier.ll index e7761a8f3dce4..17d31ba7c8ca0 100644 --- a/llvm/test/Transforms/SYCLITTAnnotations/itt_barrier.ll +++ b/llvm/test/Transforms/SYCLITTAnnotations/itt_barrier.ll @@ -112,8 +112,8 @@ declare dso_local void @_Z22__spirv_ControlBarrierjjj(i32, i32, i32) local_unnam ; CHECK: declare void @__itt_offload_wg_barrier_wrapper() ; CHECK: declare void @__itt_offload_wi_resume_wrapper() -attributes #0 = { norecurse willreturn "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/localdisk2/sidorovd/SYCLTest/llvm-test-suite/SYCL/KernelAndProgram/kernel-and-program.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/localdisk2/sidorovd/SYCLTest/llvm-test-suite/SYCL/KernelAndProgram/kernel-and-program.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { norecurse willreturn "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="llvm-test-suite/SYCL/KernelAndProgram/kernel-and-program.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="llvm-test-suite/SYCL/KernelAndProgram/kernel-and-program.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #2 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #3 = { convergent } diff --git a/llvm/test/Transforms/SYCLITTAnnotations/itt_start_finish.ll b/llvm/test/Transforms/SYCLITTAnnotations/itt_start_finish.ll new file mode 100644 index 0000000000000..09c1a73d8120d --- /dev/null +++ b/llvm/test/Transforms/SYCLITTAnnotations/itt_start_finish.ll @@ -0,0 +1,55 @@ +;; The test serves a purpose to check if work item start/finish annotations +;; are being added by SYCLITTAnnotations pass + +; RUN: opt < %s --SYCLITTAnnotations -S | FileCheck %s + +; ModuleID = 'synthetic.bc' +source_filename = "synthetic.cpp" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown-sycldevice" + +; Function Attrs: convergent norecurse nounwind mustprogress +define dso_local spir_kernel void @_ZTSZ4mainE15kernel_function() local_unnamed_addr #0 !kernel_arg_buffer_location !4 { +entry: +; CHECK: _ZTSZ4mainE15kernel_function( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__itt_offload_wi_start_wrapper() + %call.i = tail call spir_func i32 @_Z3foov() #2 + %cmp.i = icmp eq i32 %call.i, 42 + br i1 %cmp.i, label %"_ZZ4mainENK3$_0clEv.exit", label %if.end.i + +if.end.i: ; preds = %entry + tail call spir_func void @_Z3boov() #2 +; CHECK: call void @__itt_offload_wi_finish_wrapper() +; CHECK-NEXT: ret void + ret void + +"_ZZ4mainENK3$_0clEv.exit": ; preds = %entry, %if.end.i +; CHECK: call void @__itt_offload_wi_finish_wrapper() +; CHECK-NEXT: ret void + ret void +} + +; CHECK: declare void @__itt_offload_wi_start_wrapper() +; CHECK: declare void @__itt_offload_wi_finish_wrapper() + +; Function Attrs: convergent +declare spir_func i32 @_Z3foov() local_unnamed_addr #1 + +; Function Attrs: convergent +declare spir_func void @_Z3boov() local_unnamed_addr #1 + +attributes #0 = { convergent norecurse nounwind mustprogress "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="../../llvm/clang/test/CodeGenSYCL/kernel-simple-instrumentation.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { convergent nounwind } + +!llvm.module.flags = !{!0} +!opencl.spir.version = !{!1} +!spirv.Source = !{!2} +!llvm.ident = !{!3} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 2} +!2 = !{i32 4, i32 100000} +!3 = !{!"clang version 13.0.0 (https://github.com/intel/llvm.git f16527331b8cd18b3e45a4a7bc13a2460c8d0d84)"} +!4 = !{} From 33e610114735f8fa60d4edffd6cd6061cabed5d1 Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Fri, 12 Mar 2021 14:55:45 +0300 Subject: [PATCH 11/16] Add diag + clang format Signed-off-by: Dmitry Sidorov --- clang/lib/CodeGen/BackendUtil.cpp | 12 ++++++-- clang/lib/Driver/ToolChains/Clang.cpp | 9 ++++-- ...rumentation.cpp => sycl-instrumentation.c} | 7 ++++- llvm/include/llvm/LinkAllPasses.h | 2 +- .../Instrumentation/SYCLITTAnnotations.cpp | 29 +++++++++---------- 5 files changed, 37 insertions(+), 22 deletions(-) rename clang/test/Driver/{sycl-instrumentation.cpp => sycl-instrumentation.c} (52%) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 859afb2c2c0d0..c47c17e3d2855 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -72,8 +72,8 @@ #include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/MemProfiler.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" -#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/SYCLITTAnnotations.h" +#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar.h" @@ -949,9 +949,15 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, LangOpts.EnableDAEInSpirKernels) PerModulePasses.add(createDeadArgEliminationSYCLPass()); - if (llvm::Triple(TheModule->getTargetTriple()).isSPIR() && - CodeGenOpts.SYCLITTAnnotations) + // Add SYCLITTAnnotations pass to the pass manager if + // -fsycl-instrument-device-code option was passed. This option can be + // used only with spir tripple. + if (CodeGenOpts.SYCLITTAnnotations) { + if (!llvm::Triple(TheModule->getTargetTriple()).isSPIR()) + llvm::report_fatal_error( + "ITT annotations can only by added to a module with spir target"); PerModulePasses.add(createSYCLITTAnnotationsPass()); + } switch (Action) { case Backend_EmitNothing: diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 1d7a1455ecf5e..f997d4a941dd6 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5883,9 +5883,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Args.hasFlag(options::OPT_fsycl, options::OPT_fno_sycl, false)) Args.AddLastArg(CmdArgs, options::OPT_sycl_std_EQ); - // Forward -fsycl-instrument-device-code option to cc1 - if (Args.hasArg(options::OPT_fsycl_instrument_device_code)) + // Forward -fsycl-instrument-device-code option to cc1. This option can only + // be used with spir triple. + if (Arg *A = Args.getLastArg(options::OPT_fsycl_instrument_device_code)) { + if (!Triple.isSPIR()) + D.Diag(diag::err_drv_unsupported_opt_for_target) + << A->getAsString(Args) << TripleStr; CmdArgs.push_back("-fsycl-instrument-device-code"); + } if (IsHIP) { if (Args.hasFlag(options::OPT_fhip_new_launch_api, diff --git a/clang/test/Driver/sycl-instrumentation.cpp b/clang/test/Driver/sycl-instrumentation.c similarity index 52% rename from clang/test/Driver/sycl-instrumentation.cpp rename to clang/test/Driver/sycl-instrumentation.c index 4dfd529dd7b1c..2edeaae118960 100644 --- a/clang/test/Driver/sycl-instrumentation.cpp +++ b/clang/test/Driver/sycl-instrumentation.c @@ -6,4 +6,9 @@ /// Check if "fsycl_instrument_device_code" is passed to -cc1: // RUN: %clang -### -fsycl-instrument-device-code %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-ENABLED %s -// CHECK-ENABLED: "-fsycl-instrument-device-code" +// CHECK-ENABLED: "-cc1"{{.*}} "-fsycl-instrument-device-code" + +/// Check if "fsycl_instrument_device_code" usage with a non-spirv target +/// results in an error. +// RUN: %clang -### -fsycl-instrument-device-code -target=x86 %s 2>&1 +// expected-error{{unsupported option '-fsycl-instrument-device-code' for target 'x86_64-unknown-linux-gnu'}} diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 52d7d5edc8d09..69f1fca201ece 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -205,7 +205,7 @@ namespace { (void)llvm::createSYCLLowerESIMDPass(); (void)llvm::createESIMDLowerLoadStorePass(); (void)llvm::createESIMDLowerVecArgPass(); - (void) llvm::createSYCLITTAnnotationsPass(); + (void)llvm::createSYCLITTAnnotationsPass(); std::string buf; llvm::raw_string_ostream os(buf); (void) llvm::createPrintModulePass(os); diff --git a/llvm/lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp b/llvm/lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp index d7238765c50d1..b4b2574987084 100644 --- a/llvm/lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp +++ b/llvm/lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp @@ -12,12 +12,12 @@ #include "llvm/Transforms/Instrumentation/SYCLITTAnnotations.h" -#include "llvm/InitializePasses.h" +#include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" -#include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h" /** Following instrumentations will be linked from libdevice: * * * * * * * * * * * @@ -141,8 +141,7 @@ bool insertSimpleInstrumentationCall(Module &M, StringRef Name, Instruction *Position) { Type *VoidTy = Type::getVoidTy(M.getContext()); ArrayRef Args; - Instruction *InstrumentationCall = - emitCall(M, VoidTy, Name, Args, Position); + Instruction *InstrumentationCall = emitCall(M, VoidTy, Name, Args, Position); assert(InstrumentationCall && "Instrumentation call creation failed"); return true; } @@ -190,8 +189,9 @@ bool insertAtomicInstrumentationCall(Module &M, StringRef Name, // differencies in values between SYCL mem order and SPIR-V mem order, SYCL RT // also applies Memory Semantic mask, like WorkgroupMemory (0x100)), need to // align it. - uint64_t MemFlag = dyn_cast( - AtomicFun->getArgOperand(2))->getValue().getZExtValue(); + uint64_t MemFlag = dyn_cast(AtomicFun->getArgOperand(2)) + ->getValue() + .getZExtValue(); uint64_t Order; if (MemFlag & 0x2) Order = 1; @@ -203,8 +203,7 @@ bool insertAtomicInstrumentationCall(Module &M, StringRef Name, Order = 0; Value *MemOrder = ConstantInt::get(Int32Ty, Order); Value *Args[] = {Ptr, AtomicOp, MemOrder}; - Instruction *InstrumentationCall = - emitCall(M, VoidTy, Name, Args, Position); + Instruction *InstrumentationCall = emitCall(M, VoidTy, Name, Args, Position); assert(InstrumentationCall && "Instrumentation call creation failed"); return true; } @@ -215,10 +214,10 @@ PreservedAnalyses SYCLITTAnnotationsPass::run(Module &M, ModuleAnalysisManager &MAM) { bool IRModified = false; std::vector SPIRVCrossWGInstuctions = { - SPIRV_CONTROL_BARRIER, SPIRV_GROUP_ALL, SPIRV_GROUP_ANY, + SPIRV_CONTROL_BARRIER, SPIRV_GROUP_ALL, SPIRV_GROUP_ANY, SPIRV_GROUP_BROADCAST, SPIRV_GROUP_IADD, SPIRV_GROUP_FADD, - SPIRV_GROUP_FMIN, SPIRV_GROUP_UMIN, SPIRV_GROUP_SMIN, SPIRV_GROUP_FMAX, - SPIRV_GROUP_UMAX, SPIRV_GROUP_SMAX }; + SPIRV_GROUP_FMIN, SPIRV_GROUP_UMIN, SPIRV_GROUP_SMIN, + SPIRV_GROUP_FMAX, SPIRV_GROUP_UMAX, SPIRV_GROUP_SMAX}; for (Function &F : M) { // Annotate only SYCL kernels @@ -247,8 +246,8 @@ PreservedAnalyses SYCLITTAnnotationsPass::run(Module &M, size_t PrefixPosFound = CalleeName.find(SPIRV_PREFIX); if (PrefixPosFound == StringRef::npos) continue; - CalleeName = CalleeName.drop_front( - PrefixPosFound + /*len of SPIR-V prefix*/ 8); + CalleeName = + CalleeName.drop_front(PrefixPosFound + /*len of SPIR-V prefix*/ 8); // Annotate barrier and other cross WG calls if (std::any_of(SPIRVCrossWGInstuctions.begin(), SPIRVCrossWGInstuctions.end(), @@ -256,8 +255,8 @@ PreservedAnalyses SYCLITTAnnotationsPass::run(Module &M, return CalleeName.startswith(Name); })) { Instruction *InstAfterBarrier = CI->getNextNode(); - IRModified |= insertSimpleInstrumentationCall( - M, ITT_ANNOTATION_WG_BARRIER, CI); + IRModified |= + insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WG_BARRIER, CI); IRModified |= insertSimpleInstrumentationCall( M, ITT_ANNOTATION_WI_RESUME, InstAfterBarrier); } else if (CalleeName.startswith(SPIRV_ATOMIC_INST)) { From d525d3cb92ea68a3275fb20b346f68f4b74f7bad Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Fri, 12 Mar 2021 15:24:47 +0300 Subject: [PATCH 12/16] Fix test Signed-off-by: Dmitry Sidorov --- clang/test/Driver/sycl-instrumentation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Driver/sycl-instrumentation.c b/clang/test/Driver/sycl-instrumentation.c index 2edeaae118960..525a59dc0fd5e 100644 --- a/clang/test/Driver/sycl-instrumentation.c +++ b/clang/test/Driver/sycl-instrumentation.c @@ -10,5 +10,5 @@ /// Check if "fsycl_instrument_device_code" usage with a non-spirv target /// results in an error. -// RUN: %clang -### -fsycl-instrument-device-code -target=x86 %s 2>&1 +// RUN: %clang -### -fsycl-instrument-device-code --target=x86 %s 2>&1 // expected-error{{unsupported option '-fsycl-instrument-device-code' for target 'x86_64-unknown-linux-gnu'}} From 6ab9c02fed6f70019cc6734d7c108e4dcfb2d2ea Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Fri, 12 Mar 2021 16:13:37 +0300 Subject: [PATCH 13/16] Update pass header Signed-off-by: Dmitry Sidorov --- .../Instrumentation/SYCLITTAnnotations.cpp | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp b/llvm/lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp index b4b2574987084..010a1b26813ba 100644 --- a/llvm/lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp +++ b/llvm/lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp @@ -47,7 +47,7 @@ * * void __itt_offload_wi_finish(size_t* group_id, size_t wi_id); * * * * * * * * * * * - * Notify tools work-item has reached a barier + * Notify tools work-item has reached a barrier * * /param[in] barrier_id Unique barrier id. If multi-barriers are not supported. * Pass 0 for barrier_id. Notify tools work-item has reached a barier. @@ -55,7 +55,28 @@ * void __itt_offload_wg_barrier(uintptr_t barrier_id); * * * * * * * * * * * * Purpose of this pass is to add wrapper calls to these instructions. - */ + * Also this pass adds annotations to atomic instructions: + * * * * * * * * * * * + * Atomic operation markup + * + * /param[in] object Memory location which is used in atomic operation + * /param[in] op_type Operation type + * /param[in] mem_order Memory ordering semantic + * + * void __itt_offload_atomic_op_start(void* object, + * __itt_atomic_mem_op_t op_type, + * __itt_atomic_mem_order_t mem_order); + * * * * * * * * * * * + * Atomic operation markup + * + * /param[in] object Memory location which is used in atomic operation + * /param[in] op_type Operation type + * /param[in] mem_order Memory ordering semantic + * + * void __itt_offload_atomic_op_finish(void* object, + * __itt_atomic_mem_op_t op_type, + * __itt_atomic_mem_order_t mem_order); + **/ using namespace llvm; From 457b1ecb9e67815e25fa5103735d77832dd18a41 Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Mon, 15 Mar 2021 18:10:24 +0300 Subject: [PATCH 14/16] SYCLITT -> SPIRITT Signed-off-by: Dmitry Sidorov --- .github/CODEOWNERS | 5 +++ clang/include/clang/Basic/CodeGenOptions.def | 4 +-- clang/include/clang/Driver/Options.td | 2 +- clang/lib/CodeGen/BackendUtil.cpp | 8 ++--- clang/test/Driver/sycl-instrumentation.c | 2 +- llvm/include/llvm/InitializePasses.h | 2 +- llvm/include/llvm/LinkAllPasses.h | 4 +-- ...LITTAnnotations.h => SPIRITTAnnotations.h} | 8 ++--- .../Transforms/Instrumentation/CMakeLists.txt | 2 +- ...Annotations.cpp => SPIRITTAnnotations.cpp} | 32 +++++++++---------- .../itt_atomic_load.ll | 4 +-- .../itt_atomic_store.ll | 4 +-- .../itt_barrier.ll | 4 +-- .../itt_start_finish.ll | 4 +-- llvm/tools/opt/opt.cpp | 2 +- 15 files changed, 46 insertions(+), 41 deletions(-) rename llvm/include/llvm/Transforms/Instrumentation/{SYCLITTAnnotations.h => SPIRITTAnnotations.h} (73%) rename llvm/lib/Transforms/Instrumentation/{SYCLITTAnnotations.cpp => SPIRITTAnnotations.cpp} (93%) rename llvm/test/Transforms/{SYCLITTAnnotations => SPIRITTAnnotations}/itt_atomic_load.ll (99%) rename llvm/test/Transforms/{SYCLITTAnnotations => SPIRITTAnnotations}/itt_atomic_store.ll (99%) rename llvm/test/Transforms/{SYCLITTAnnotations => SPIRITTAnnotations}/itt_barrier.ll (99%) rename llvm/test/Transforms/{SYCLITTAnnotations => SPIRITTAnnotations}/itt_start_finish.ll (96%) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 24f313c45537f..01ff6d556dfbf 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -114,3 +114,8 @@ SYCLLowerIR/ @kbobrovs @DenisBakhvalov esimd/ @kbobrovs @DenisBakhvalov sycl/include/CL/sycl/INTEL/esimd.hpp @kbobrovs @DenisBakhvalov sycl/doc/extensions/ExplicitSIMD/ @kbobrovs + +# ITT annotations +llvm/lib/Transforms/Instrumentation/SPIRITTAnnotations.cpp @MrSidims +llvm/include/llvm/Transforms/Instrumentation/SPIRITTAnnotations.h @MrSidims + diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 9443ce42d3c45..d566a5cd7169b 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -417,8 +417,8 @@ CODEGENOPT(PassByValueIsNoAlias, 1, 0) /// according to the field declaring type width. CODEGENOPT(AAPCSBitfieldWidth, 1, 1) -// Whether to instrument SYCL device code with ITT annotations -CODEGENOPT(SYCLITTAnnotations, 1, 0) +// Whether to instrument SPIR device code with ITT annotations +CODEGENOPT(SPIRITTAnnotations, 1, 0) #undef CODEGENOPT #undef ENUM_CODEGENOPT diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index bf76cbfae74d0..d05d0397e0f17 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2421,7 +2421,7 @@ def fno_sycl_device_code_lower_esimd : Flag<["-"], "fno-sycl-device-code-lower-e def fsycl_instrument_device_code : Flag<["-"], "fsycl-instrument-device-code">, Group, Flags<[CC1Option, CoreOption]>, HelpText<"Add ITT instrumentation intrinsics calls">, - MarshallingInfoFlag>; + MarshallingInfoFlag>; defm sycl_id_queries_fit_in_int: OptInFFlag<"sycl-id-queries-fit-in-int", "Assume", "Do not assume", " that SYCL ID queries fit within MAX_INT.", [CC1Option,CoreOption], LangOpts<"SYCLValueFitInMaxInt">>; def fsycl_use_bitcode : Flag<["-"], "fsycl-use-bitcode">, Flags<[CC1Option, CoreOption]>, HelpText<"Use LLVM bitcode instead of SPIR-V in fat objects">; diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index c47c17e3d2855..dcfc74d54ed49 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -72,7 +72,7 @@ #include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/MemProfiler.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" -#include "llvm/Transforms/Instrumentation/SYCLITTAnnotations.h" +#include "llvm/Transforms/Instrumentation/SPIRITTAnnotations.h" #include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/ObjCARC.h" @@ -949,14 +949,14 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, LangOpts.EnableDAEInSpirKernels) PerModulePasses.add(createDeadArgEliminationSYCLPass()); - // Add SYCLITTAnnotations pass to the pass manager if + // Add SPIRITTAnnotations pass to the pass manager if // -fsycl-instrument-device-code option was passed. This option can be // used only with spir tripple. - if (CodeGenOpts.SYCLITTAnnotations) { + if (CodeGenOpts.SPIRITTAnnotations) { if (!llvm::Triple(TheModule->getTargetTriple()).isSPIR()) llvm::report_fatal_error( "ITT annotations can only by added to a module with spir target"); - PerModulePasses.add(createSYCLITTAnnotationsPass()); + PerModulePasses.add(createSPIRITTAnnotationsPass()); } switch (Action) { diff --git a/clang/test/Driver/sycl-instrumentation.c b/clang/test/Driver/sycl-instrumentation.c index 525a59dc0fd5e..df433c425e0f0 100644 --- a/clang/test/Driver/sycl-instrumentation.c +++ b/clang/test/Driver/sycl-instrumentation.c @@ -1,4 +1,4 @@ -/// Check that SYCL ITT instrumentation is disabled by default: +/// Check that SPIR ITT instrumentation is disabled by default: // RUN: %clang -### %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-DEFAULT %s // CHECK-DEFAULT-NOT: "-fsycl-instrument-device-code" diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 86dd063c8d14d..8340c62a1c5b7 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -429,7 +429,7 @@ void initializeStripSymbolsPass(PassRegistry&); void initializeStructurizeCFGLegacyPassPass(PassRegistry &); void initializeSYCLLowerWGScopeLegacyPassPass(PassRegistry &); void initializeSYCLLowerESIMDLegacyPassPass(PassRegistry &); -void initializeSYCLITTAnnotationsLegacyPassPass(PassRegistry &); +void initializeSPIRITTAnnotationsLegacyPassPass(PassRegistry &); void initializeESIMDLowerLoadStorePass(PassRegistry &); void initializeESIMDLowerVecArgLegacyPassPass(PassRegistry &); void initializeTailCallElimPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 69f1fca201ece..25a481e541be7 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -48,7 +48,7 @@ #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Instrumentation/BoundsChecking.h" -#include "llvm/Transforms/Instrumentation/SYCLITTAnnotations.h" +#include "llvm/Transforms/Instrumentation/SPIRITTAnnotations.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" @@ -205,7 +205,7 @@ namespace { (void)llvm::createSYCLLowerESIMDPass(); (void)llvm::createESIMDLowerLoadStorePass(); (void)llvm::createESIMDLowerVecArgPass(); - (void)llvm::createSYCLITTAnnotationsPass(); + (void)llvm::createSPIRITTAnnotationsPass(); std::string buf; llvm::raw_string_ostream os(buf); (void) llvm::createPrintModulePass(os); diff --git a/llvm/include/llvm/Transforms/Instrumentation/SYCLITTAnnotations.h b/llvm/include/llvm/Transforms/Instrumentation/SPIRITTAnnotations.h similarity index 73% rename from llvm/include/llvm/Transforms/Instrumentation/SYCLITTAnnotations.h rename to llvm/include/llvm/Transforms/Instrumentation/SPIRITTAnnotations.h index 99d778981c80b..36c5ebdbab4fd 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/SYCLITTAnnotations.h +++ b/llvm/include/llvm/Transforms/Instrumentation/SPIRITTAnnotations.h @@ -1,4 +1,4 @@ -//===----- SYCLITTAnnotations.h - SYCL Instrumental Annotations Pass ------===// +//===----- SPIRITTAnnotations.h - SYCL Instrumental Annotations Pass ------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // A transformation pass which adds instrumental calls to annotate SYCL -// synchronization instrucations. This can be used for kernel profiling. +// synchronization instructions. This can be used for kernel profiling. //===----------------------------------------------------------------------===// #pragma once @@ -17,11 +17,11 @@ namespace llvm { -class SYCLITTAnnotationsPass : public PassInfoMixin { +class SPIRITTAnnotationsPass : public PassInfoMixin { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); }; -ModulePass *createSYCLITTAnnotationsPass(); +ModulePass *createSPIRITTAnnotationsPass(); } // namespace llvm diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt index 28abc7523210b..85485a2b98d51 100644 --- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -15,7 +15,7 @@ add_llvm_component_library(LLVMInstrumentation PGOMemOPSizeOpt.cpp PoisonChecking.cpp SanitizerCoverage.cpp - SYCLITTAnnotations.cpp + SPIRITTAnnotations.cpp ValueProfileCollector.cpp ThreadSanitizer.cpp HWAddressSanitizer.cpp diff --git a/llvm/lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp b/llvm/lib/Transforms/Instrumentation/SPIRITTAnnotations.cpp similarity index 93% rename from llvm/lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp rename to llvm/lib/Transforms/Instrumentation/SPIRITTAnnotations.cpp index 010a1b26813ba..f7ded211a6adb 100644 --- a/llvm/lib/Transforms/Instrumentation/SYCLITTAnnotations.cpp +++ b/llvm/lib/Transforms/Instrumentation/SPIRITTAnnotations.cpp @@ -1,4 +1,4 @@ -//===---- SYCLITTAnnotations.cpp - SYCL Instrumental Annotations Pass -----===// +//===---- SPIRITTAnnotations.cpp - SYCL Instrumental Annotations Pass -----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -10,7 +10,7 @@ // synchronization instructions. This can be used for kernel profiling. //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation/SYCLITTAnnotations.h" +#include "llvm/Transforms/Instrumentation/SPIRITTAnnotations.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" @@ -19,7 +19,7 @@ #include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" -/** Following instrumentations will be linked from libdevice: +/** Following functions are used for ITT instrumentation: * * * * * * * * * * * * Notify tools work-item execution has started * @@ -106,15 +106,15 @@ constexpr char ITT_ANNOTATION_ATOMIC_FINISH[] = "__itt_offload_atomic_op_finish"; // Wrapper for the pass to make it working with the old pass manager -class SYCLITTAnnotationsLegacyPass : public ModulePass { +class SPIRITTAnnotationsLegacyPass : public ModulePass { public: static char ID; - SYCLITTAnnotationsLegacyPass() : ModulePass(ID) { - initializeSYCLITTAnnotationsLegacyPassPass( + SPIRITTAnnotationsLegacyPass() : ModulePass(ID) { + initializeSPIRITTAnnotationsLegacyPassPass( *PassRegistry::getPassRegistry()); } - // run the SYCLITTAnnotations pass on the specified module + // run the SPIRITTAnnotations pass on the specified module bool runOnModule(Module &M) override { ModuleAnalysisManager MAM; auto PA = Impl.run(M, MAM); @@ -122,25 +122,25 @@ class SYCLITTAnnotationsLegacyPass : public ModulePass { } private: - SYCLITTAnnotationsPass Impl; + SPIRITTAnnotationsPass Impl; }; } // namespace -char SYCLITTAnnotationsLegacyPass::ID = 0; -INITIALIZE_PASS(SYCLITTAnnotationsLegacyPass, "SYCLITTAnnotations", +char SPIRITTAnnotationsLegacyPass::ID = 0; +INITIALIZE_PASS(SPIRITTAnnotationsLegacyPass, "SPIRITTAnnotations", "Insert ITT annotations in SYCL code", false, false) -// Public interface to the SYCLITTAnnotationsPass. -ModulePass *llvm::createSYCLITTAnnotationsPass() { - return new SYCLITTAnnotationsLegacyPass(); +// Public interface to the SPIRITTAnnotationsPass. +ModulePass *llvm::createSPIRITTAnnotationsPass() { + return new SPIRITTAnnotationsLegacyPass(); } namespace { // Check for calling convention of a function. If it's spir_kernel - consider // the function to be a SYCL kernel. -bool isSyclKernel(Function &F) { +bool isSPIRKernel(Function &F) { return F.getCallingConv() == CallingConv::SPIR_KERNEL; } @@ -231,7 +231,7 @@ bool insertAtomicInstrumentationCall(Module &M, StringRef Name, } // namespace -PreservedAnalyses SYCLITTAnnotationsPass::run(Module &M, +PreservedAnalyses SPIRITTAnnotationsPass::run(Module &M, ModuleAnalysisManager &MAM) { bool IRModified = false; std::vector SPIRVCrossWGInstuctions = { @@ -242,7 +242,7 @@ PreservedAnalyses SYCLITTAnnotationsPass::run(Module &M, for (Function &F : M) { // Annotate only SYCL kernels - if (F.isDeclaration() || !isSyclKernel(F)) + if (F.isDeclaration() || !isSPIRKernel(F)) continue; // At the beggining of a kernel insert work item start annotation diff --git a/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_load.ll b/llvm/test/Transforms/SPIRITTAnnotations/itt_atomic_load.ll similarity index 99% rename from llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_load.ll rename to llvm/test/Transforms/SPIRITTAnnotations/itt_atomic_load.ll index 8eaf3fe14508d..7c901d9973682 100644 --- a/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_load.ll +++ b/llvm/test/Transforms/SPIRITTAnnotations/itt_atomic_load.ll @@ -1,11 +1,11 @@ ;; The test serves a purpose to check if Atomic load instruction is being -;; annotated by SYCLITTAnnotations pass +;; annotated by SPIRITTAnnotations pass ;; ;; Compiled from https://github.com/intel/llvm-test-suite/blob/intel/SYCL/AtomicRef/load.cpp ;; with following commands: ;; clang++ -fsycl -fsycl-device-only load.cpp -o load.bc -; RUN: opt < %s --SYCLITTAnnotations -S | FileCheck %s +; RUN: opt < %s --SPIRITTAnnotations -S | FileCheck %s ; ModuleID = 'load.bc' source_filename = "llvm-test-suite/SYCL/AtomicRef/load.cpp" diff --git a/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_store.ll b/llvm/test/Transforms/SPIRITTAnnotations/itt_atomic_store.ll similarity index 99% rename from llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_store.ll rename to llvm/test/Transforms/SPIRITTAnnotations/itt_atomic_store.ll index dfcc9db21fc55..7387ce3c2b8ac 100644 --- a/llvm/test/Transforms/SYCLITTAnnotations/itt_atomic_store.ll +++ b/llvm/test/Transforms/SPIRITTAnnotations/itt_atomic_store.ll @@ -1,11 +1,11 @@ ;; The test serves a purpose to check if Atomic store instruction is being -;; annotated by SYCLITTAnnotations pass +;; annotated by SPIRITTAnnotations pass ;; ;; Compiled from https://github.com/intel/llvm-test-suite/blob/intel/SYCL/AtomicRef/load.cpp ;; with following commands: ;; clang++ -fsycl -fsycl-device-only load.cpp -o load.bc -; RUN: opt < %s --SYCLITTAnnotations -S | FileCheck %s +; RUN: opt < %s --SPIRITTAnnotations -S | FileCheck %s ; ModuleID = 'store.bc' source_filename = "llvm-test-suite/SYCL/AtomicRef/store.cpp" diff --git a/llvm/test/Transforms/SYCLITTAnnotations/itt_barrier.ll b/llvm/test/Transforms/SPIRITTAnnotations/itt_barrier.ll similarity index 99% rename from llvm/test/Transforms/SYCLITTAnnotations/itt_barrier.ll rename to llvm/test/Transforms/SPIRITTAnnotations/itt_barrier.ll index 17d31ba7c8ca0..127af4562082a 100644 --- a/llvm/test/Transforms/SYCLITTAnnotations/itt_barrier.ll +++ b/llvm/test/Transforms/SPIRITTAnnotations/itt_barrier.ll @@ -1,11 +1,11 @@ ;; The test serves a purpose to check if barrier instruction is being annotated -;; by SYCLITTAnnotations pass +;; by SPIRITTAnnotations pass ;; ;; Compiled from https://github.com/intel/llvm-test-suite/blob/intel/SYCL/KernelAndProgram/kernel-and-program.cpp ;; with following commands: ;; clang++ -fsycl -fsycl-device-only kernel-and-program.cpp -o kernel_and_program_optimized.bc -; RUN: opt < %s --SYCLITTAnnotations -S | FileCheck %s +; RUN: opt < %s --SPIRITTAnnotations -S | FileCheck %s ; ModuleID = 'kernel_and_program_optimized.bc' source_filename = "llvm-link" diff --git a/llvm/test/Transforms/SYCLITTAnnotations/itt_start_finish.ll b/llvm/test/Transforms/SPIRITTAnnotations/itt_start_finish.ll similarity index 96% rename from llvm/test/Transforms/SYCLITTAnnotations/itt_start_finish.ll rename to llvm/test/Transforms/SPIRITTAnnotations/itt_start_finish.ll index 09c1a73d8120d..0999a31042207 100644 --- a/llvm/test/Transforms/SYCLITTAnnotations/itt_start_finish.ll +++ b/llvm/test/Transforms/SPIRITTAnnotations/itt_start_finish.ll @@ -1,7 +1,7 @@ ;; The test serves a purpose to check if work item start/finish annotations -;; are being added by SYCLITTAnnotations pass +;; are being added by SPIRITTAnnotations pass -; RUN: opt < %s --SYCLITTAnnotations -S | FileCheck %s +; RUN: opt < %s --SPIRITTAnnotations -S | FileCheck %s ; ModuleID = 'synthetic.bc' source_filename = "synthetic.cpp" diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index 018083e755a21..3a3cc8857a612 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -582,7 +582,7 @@ int main(int argc, char **argv) { initializeReplaceWithVeclibLegacyPass(Registry); initializeSYCLLowerWGScopeLegacyPassPass(Registry); initializeSYCLLowerESIMDLegacyPassPass(Registry); - initializeSYCLITTAnnotationsLegacyPassPass(Registry); + initializeSPIRITTAnnotationsLegacyPassPass(Registry); initializeESIMDLowerLoadStorePass(Registry); initializeESIMDLowerVecArgLegacyPassPass(Registry); From e1aa264e6f513edb75b6df175dde6ac4ae9e03ba Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Mon, 15 Mar 2021 19:59:13 +0300 Subject: [PATCH 15/16] Apply suggestions Signed-off-by: Dmitry Sidorov --- .../Transforms/Instrumentation/SPIRITTAnnotations.h | 4 ++-- .../Transforms/Instrumentation/SPIRITTAnnotations.cpp | 11 +++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/Transforms/Instrumentation/SPIRITTAnnotations.h b/llvm/include/llvm/Transforms/Instrumentation/SPIRITTAnnotations.h index 36c5ebdbab4fd..547b3c374abbd 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/SPIRITTAnnotations.h +++ b/llvm/include/llvm/Transforms/Instrumentation/SPIRITTAnnotations.h @@ -1,4 +1,4 @@ -//===----- SPIRITTAnnotations.h - SYCL Instrumental Annotations Pass ------===// +//===----- SPIRITTAnnotations.h - SPIR Instrumental Annotations Pass ------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// A transformation pass which adds instrumental calls to annotate SYCL +// A transformation pass which adds instrumental calls to annotate SPIR // synchronization instructions. This can be used for kernel profiling. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Transforms/Instrumentation/SPIRITTAnnotations.cpp b/llvm/lib/Transforms/Instrumentation/SPIRITTAnnotations.cpp index f7ded211a6adb..1c62285f68895 100644 --- a/llvm/lib/Transforms/Instrumentation/SPIRITTAnnotations.cpp +++ b/llvm/lib/Transforms/Instrumentation/SPIRITTAnnotations.cpp @@ -1,4 +1,4 @@ -//===---- SPIRITTAnnotations.cpp - SYCL Instrumental Annotations Pass -----===// +//===---- SPIRITTAnnotations.cpp - SPIR Instrumental Annotations Pass -----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// A transformation pass which adds instrumental calls to annotate SYCL +// A transformation pass which adds instrumental calls to annotate SPIR // synchronization instructions. This can be used for kernel profiling. //===----------------------------------------------------------------------===// @@ -129,7 +129,7 @@ class SPIRITTAnnotationsLegacyPass : public ModulePass { char SPIRITTAnnotationsLegacyPass::ID = 0; INITIALIZE_PASS(SPIRITTAnnotationsLegacyPass, "SPIRITTAnnotations", - "Insert ITT annotations in SYCL code", false, false) + "Insert ITT annotations in SPIR code", false, false) // Public interface to the SPIRITTAnnotationsPass. ModulePass *llvm::createSPIRITTAnnotationsPass() { @@ -138,8 +138,7 @@ ModulePass *llvm::createSPIRITTAnnotationsPass() { namespace { -// Check for calling convention of a function. If it's spir_kernel - consider -// the function to be a SYCL kernel. +// Check for calling convention of a function. bool isSPIRKernel(Function &F) { return F.getCallingConv() == CallingConv::SPIR_KERNEL; } @@ -241,7 +240,7 @@ PreservedAnalyses SPIRITTAnnotationsPass::run(Module &M, SPIRV_GROUP_FMAX, SPIRV_GROUP_UMAX, SPIRV_GROUP_SMAX}; for (Function &F : M) { - // Annotate only SYCL kernels + // Annotate only SPIR kernels if (F.isDeclaration() || !isSPIRKernel(F)) continue; From a41f48a371c6581056ec2f85d2a926f8786023b2 Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Mon, 15 Mar 2021 23:35:05 +0300 Subject: [PATCH 16/16] Apply driver suggestions Signed-off-by: Dmitry Sidorov --- clang/include/clang/Driver/Options.td | 2 +- clang/lib/CodeGen/BackendUtil.cpp | 2 +- clang/test/CodeGenSYCL/kernel-simple-instrumentation.cpp | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index d05d0397e0f17..868453b1b8c93 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2419,7 +2419,7 @@ def fsycl_device_code_lower_esimd : Flag<["-"], "fsycl-device-code-lower-esimd"> def fno_sycl_device_code_lower_esimd : Flag<["-"], "fno-sycl-device-code-lower-esimd">, Flags<[CC1Option, CoreOption]>, HelpText<"Do not lower ESIMD-specific constructs">; def fsycl_instrument_device_code : Flag<["-"], "fsycl-instrument-device-code">, - Group, Flags<[CC1Option, CoreOption]>, + Group, Flags<[CC1Option, CoreOption]>, HelpText<"Add ITT instrumentation intrinsics calls">, MarshallingInfoFlag>; defm sycl_id_queries_fit_in_int: OptInFFlag<"sycl-id-queries-fit-in-int", "Assume", "Do not assume", " that SYCL ID queries fit within MAX_INT.", [CC1Option,CoreOption], LangOpts<"SYCLValueFitInMaxInt">>; diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index dcfc74d54ed49..17e37df253e8e 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -951,7 +951,7 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, // Add SPIRITTAnnotations pass to the pass manager if // -fsycl-instrument-device-code option was passed. This option can be - // used only with spir tripple. + // used only with spir triple. if (CodeGenOpts.SPIRITTAnnotations) { if (!llvm::Triple(TheModule->getTargetTriple()).isSPIR()) llvm::report_fatal_error( diff --git a/clang/test/CodeGenSYCL/kernel-simple-instrumentation.cpp b/clang/test/CodeGenSYCL/kernel-simple-instrumentation.cpp index 407505dbf39f5..d88ef1abc3fc7 100644 --- a/clang/test/CodeGenSYCL/kernel-simple-instrumentation.cpp +++ b/clang/test/CodeGenSYCL/kernel-simple-instrumentation.cpp @@ -1,3 +1,6 @@ +/// Check if start/finish ITT annotations are being added during compilation of +/// SYCL device code + // RUN: %clang_cc1 -fsycl-is-device -fsycl-instrument-device-code -triple spir64-unknown-unknown-sycldevice -emit-llvm %s -o - | FileCheck %s // CHECK: kernel_function