diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-1.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-1.ll new file mode 100644 index 0000000000000..856461f5632df --- /dev/null +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-1.ll @@ -0,0 +1,133 @@ +; This test emulates two translation units with 3 kernels: +; TU0_kernel0 - 1st translation unit, no reqd_work_group_size attribute used +; TU0_kernel1 - 1st translation unit, reqd_work_group_size attribute is used +; TU1_kernel2 - 2nd translation unit, no reqd_work_group_size attribute used + +; The test is intended to check that sycl-post-link correctly separates kernels +; that use reqd_work_group_size attributes from kernels which doesn't use them +; regardless of device code split mode + +; RUN: sycl-post-link -split=auto -symbols -S %s -o %t.table +; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 +; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M1-IR \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 +; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M2-IR \ +; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2 +; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-M0-SYMS \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 +; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M1-SYMS \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel2 +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M2-SYMS \ +; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2 + +; RUN: sycl-post-link -split=source -symbols -S %s -o %t.table +; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 +; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M1-IR \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel2 +; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M2-IR \ +; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2 +; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-M0-SYMS \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 +; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M1-SYMS \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel2 +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M2-SYMS \ +; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2 + +; RUN: sycl-post-link -split=kernel -symbols -S %s -o %t.table +; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 +; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel2 +; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M2-IR \ +; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2 +; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-M0-SYMS \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M1-SYMS \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel2 +; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M2-SYMS \ +; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2 + +; Regardless of device code split mode, each kernel should go into a separate +; device image + +; CHECK-M2-IR: define {{.*}} @TU0_kernel0 +; CHECK-M2-SYMS: TU0_kernel0 + +; CHECK-M1-IR: define {{.*}} @TU0_kernel1 +; CHECK-M1-SYMS: TU0_kernel1 + +; CHECK-M0-IR: define {{.*}} @TU1_kernel2 +; CHECK-M0-SYMS: TU1_kernel2 + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir64-unknown-linux" + +; FIXME: device globals should also be properly distributed across device images +; if they are of optional type +@_ZL2GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4 + +define dso_local spir_kernel void @TU0_kernel0() #0 { +entry: + call spir_func void @foo() + ret void +} + +define dso_local spir_func void @foo() { +entry: + %a = alloca i32, align 4 + %call = call spir_func i32 @bar(i32 1) + %add = add nsw i32 2, %call + store i32 %add, i32* %a, align 4 + ret void +} + +; Function Attrs: nounwind +define linkonce_odr dso_local spir_func i32 @bar(i32 %arg) { +entry: + %arg.addr = alloca i32, align 4 + store i32 %arg, i32* %arg.addr, align 4 + %0 = load i32, i32* %arg.addr, align 4 + ret i32 %0 +} + +define dso_local spir_kernel void @TU0_kernel1() #0 !reqd_work_group_size !2 { +entry: + call spir_func void @foo1() + ret void +} + +; Function Attrs: nounwind +define dso_local spir_func void @foo1() { +entry: + %a = alloca i32, align 4 + store i32 2, i32* %a, align 4 + ret void +} + +define dso_local spir_kernel void @TU1_kernel2() #1 { +entry: + call spir_func void @foo2() + ret void +} + +; Function Attrs: nounwind +define dso_local spir_func void @foo2() { +entry: + %a = alloca i32, align 4 + %0 = load i32, i32 addrspace(4)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(4)* addrspacecast ([1 x i32] addrspace(1)* @_ZL2GV to [1 x i32] addrspace(4)*), i64 0, i64 0), align 4 + %add = add nsw i32 4, %0 + store i32 %add, i32* %a, align 4 + ret void +} + +attributes #0 = { "sycl-module-id"="TU1.cpp" } +attributes #1 = { "sycl-module-id"="TU2.cpp" } + +!opencl.spir.version = !{!0, !0} +!spirv.Source = !{!1, !1} + +!0 = !{i32 1, i32 2} +!1 = !{i32 4, i32 100000} +!2 = !{i32 32} diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-2.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-2.ll new file mode 100644 index 0000000000000..02bd44c53dcb4 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-2.ll @@ -0,0 +1,59 @@ +; The test is intended to check that sycl-post-link correctly groups kernels +; by unique reqd_work_group_size values used in them + +; RUN: sycl-post-link -split=auto -symbols -S %s -o %t.table +; RUN: FileCheck %s -input-file=%t.table --check-prefix CHECK-TABLE +; +; RUN: FileCheck %s -input-file=%t_0.sym --check-prefix CHECK-M0-SYMS \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 \ +; RUN: --implicit-check-not kernel2 +; +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefix CHECK-M2-SYMS \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel3 +; +; RUN: FileCheck %s -input-file=%t_2.sym --check-prefix CHECK-M1-SYMS \ +; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2 \ +; RUN: --implicit-check-not kernel3 + +; CHECK-TABLE: Code +; CHECK-TABLE-NEXT: _0.sym +; CHECK-TABLE-NEXT: _1.sym +; CHECK-TABLE-NEXT: _2.sym +; CHECK-TABLE-EMPTY: + +; CHECK-M0-SYMS: kernel3 + +; CHECK-M1-SYMS: kernel0 + +; CHECK-M2-SYMS: kernel1 +; CHECK-M2-SYMS: kernel2 + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir64-unknown-linux" + +define dso_local spir_kernel void @kernel0() #0 !reqd_work_group_size !1 { +entry: + ret void +} + +define dso_local spir_kernel void @kernel1() #0 !reqd_work_group_size !2 { +entry: + ret void +} + +define dso_local spir_kernel void @kernel2() #0 !reqd_work_group_size !3 { +entry: + ret void +} + +define dso_local spir_kernel void @kernel3() #0 !reqd_work_group_size !4 { +entry: + ret void +} + +attributes #0 = { "sycl-module-id"="TU1.cpp" } + +!1 = !{i32 32} +!2 = !{i32 64, i32 64} +!3 = !{i32 64, i32 64} +!4 = !{i32 16, i32 16, i32 16} diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-3.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-3.ll new file mode 100644 index 0000000000000..cae5d9a78322f --- /dev/null +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-3.ll @@ -0,0 +1,54 @@ +; This test is intended to check that we do not perform per-reqd_work_group_size +; split if it was disabled through one or another sycl-post-link option + +; RUN: sycl-post-link -symbols -S %s -o %t.table +; RUN: FileCheck %s -input-file=%t.table --check-prefix CHECK-TABLE +; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix CHECK-IR +; +; -lower-esimd is needed so sycl-post-link does not complain about no actions +; specified +; RUN: sycl-post-link -lower-esimd -ir-output-only -S %s -o %t.ll +; RUN: FileCheck %s -input-file=%t.ll --check-prefix CHECK-IR + +; We expect to see only one module generated: +; +; CHECK-TABLE: Code +; CHECK-TABLE-NEXT: _0.ll +; CHECK-TABLE-EMPTY: + +; Regardless of used reqd_work_group_size and sycl-module-id metadata, all +; kernel and functions should still be present. + +; CHECK-IR-DAG: define spir_func void @foo +; CHECK-IR-DAG: define spir_func void @bar +; CHECK-IR-DAG: define spir_kernel void @kernel0 +; CHECK-IR-DAG: define spir_kernel void @kernel1 + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir64-unknown-linux" + +define spir_func void @foo() #0 !reqd_work_group_size !1 { + ret void +} + +define spir_func void @bar() #1 !reqd_work_group_size !2 { + ret void +} + +define spir_kernel void @kernel0() #1 !reqd_work_group_size !2 { +entry: + ret void +} + +define spir_kernel void @kernel1() #0 !reqd_work_group_size !3 { +entry: + call void @foo() + ret void +} + +attributes #0 = { "sycl-module-id"="TU1.cpp" } +attributes #1 = { "sycl-module-id"="TU2.cpp" } + +!1 = !{i32 32} +!2 = !{i32 64} +!3 = !{i32 16, i32 16} diff --git a/llvm/test/tools/sycl-post-link/device-requirements/device-requirements.ll b/llvm/test/tools/sycl-post-link/device-requirements/aspects.ll similarity index 100% rename from llvm/test/tools/sycl-post-link/device-requirements/device-requirements.ll rename to llvm/test/tools/sycl-post-link/device-requirements/aspects.ll diff --git a/llvm/test/tools/sycl-post-link/device-requirements/reqd-work-group-size.ll b/llvm/test/tools/sycl-post-link/device-requirements/reqd-work-group-size.ll new file mode 100644 index 0000000000000..4b92ceb623630 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/device-requirements/reqd-work-group-size.ll @@ -0,0 +1,196 @@ +; Original code: +; #include + +; int main() { +; sycl::queue q; +; q.submit([&](sycl::handler &h) { +; h.parallel_for( +; sycl::range<1>(32), +; [=](sycl::item<1> it) [[sycl::reqd_work_group_size(32)]] {}); +; }); +; q.submit([&](sycl::handler &h) { +; h.parallel_for( +; sycl::range<1>(32), +; [=](sycl::item<1> it) [[sycl::reqd_work_group_size(64)]] {}); +; }); +; q.submit([&](sycl::handler &h) { +; h.parallel_for( +; sycl::range<1>(32), +; [=](sycl::item<1> it) [[sycl::reqd_work_group_size(32)]] {}); +; }); +; return 0; +; } + +; RUN: sycl-post-link -split=auto %s -o %t.table +; RUN: FileCheck %s -input-file=%t_0.prop --check-prefix CHECK-PROP-AUTO-SPLIT-0 +; RUN: FileCheck %s -input-file=%t_1.prop --check-prefix CHECK-PROP-AUTO-SPLIT-1 + +; CHECK-PROP-AUTO-SPLIT-0: [SYCL/device requirements] +; CHECK-PROP-AUTO-SPLIT-0-NEXT: aspects=2|AAAAAAAAAAA +; CHECK-PROP-AUTO-SPLIT-0-NEXT: reqd_work_group_size=2|gAAAAAAAAAAQAAAA + +; CHECK-PROP-AUTO-SPLIT-1: [SYCL/device requirements] +; CHECK-PROP-AUTO-SPLIT-1-NEXT: aspects=2|AAAAAAAAAAA +; CHECK-PROP-AUTO-SPLIT-1-NEXT: reqd_work_group_size=2|gAAAAAAAAAAIAAAA + +; ModuleID = '/tmp/source-5f7d0d.bc' +source_filename = "llvm-link" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +$_ZTSN4sycl3_V16detail19__pf_kernel_wrapperIZZ4mainENKUlRNS0_7handlerEE_clES4_E7KernelAEE = comdat any + +$_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E7KernelA = comdat any + +$_ZTSN4sycl3_V16detail19__pf_kernel_wrapperIZZ4mainENKUlRNS0_7handlerEE0_clES4_E7KernelBEE = comdat any + +$_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E7KernelB = comdat any + +$_ZTSN4sycl3_V16detail19__pf_kernel_wrapperIZZ4mainENKUlRNS0_7handlerEE1_clES4_E7KernelCEE = comdat any + +$_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE1_clES2_E7KernelC = comdat any + +@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +; Function Attrs: norecurse +define weak_odr dso_local spir_kernel void @_ZTSN4sycl3_V16detail19__pf_kernel_wrapperIZZ4mainENKUlRNS0_7handlerEE_clES4_E7KernelAEE() local_unnamed_addr #0 comdat !srcloc !46 !kernel_arg_buffer_location !47 !reqd_work_group_size !48 !sycl_fixed_targets !49 !sycl_kernel_omit_args !50 { +entry: + %0 = load <3 x i64>, <3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !51 + %1 = extractelement <3 x i64> %0, i64 0 + %cmp.i.i = icmp ult i64 %1, 2147483648 + tail call void @llvm.assume(i1 %cmp.i.i) + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) +declare void @llvm.assume(i1 noundef) #1 + +; Function Attrs: norecurse +define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E7KernelA() local_unnamed_addr #0 comdat !srcloc !60 !kernel_arg_buffer_location !49 !reqd_work_group_size !48 !sycl_fixed_targets !49 !sycl_kernel_omit_args !49 { +entry: + ret void +} + +; Function Attrs: norecurse +define weak_odr dso_local spir_kernel void @_ZTSN4sycl3_V16detail19__pf_kernel_wrapperIZZ4mainENKUlRNS0_7handlerEE0_clES4_E7KernelBEE() local_unnamed_addr #0 comdat !srcloc !46 !kernel_arg_buffer_location !47 !reqd_work_group_size !61 !sycl_fixed_targets !49 !sycl_kernel_omit_args !50 { +entry: + %0 = load <3 x i64>, <3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !62 + %1 = extractelement <3 x i64> %0, i64 0 + %cmp.i.i = icmp ult i64 %1, 2147483648 + tail call void @llvm.assume(i1 %cmp.i.i) + ret void +} + +; Function Attrs: norecurse +define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E7KernelB() local_unnamed_addr #0 comdat !srcloc !71 !kernel_arg_buffer_location !49 !reqd_work_group_size !61 !sycl_fixed_targets !49 !sycl_kernel_omit_args !49 { +entry: + ret void +} + +; Function Attrs: norecurse +define weak_odr dso_local spir_kernel void @_ZTSN4sycl3_V16detail19__pf_kernel_wrapperIZZ4mainENKUlRNS0_7handlerEE1_clES4_E7KernelCEE() local_unnamed_addr #0 comdat !srcloc !46 !kernel_arg_buffer_location !47 !reqd_work_group_size !48 !sycl_fixed_targets !49 !sycl_kernel_omit_args !50 { +entry: + %0 = load <3 x i64>, <3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !72 + %1 = extractelement <3 x i64> %0, i64 0 + %cmp.i.i = icmp ult i64 %1, 2147483648 + tail call void @llvm.assume(i1 %cmp.i.i) + ret void +} + +; Function Attrs: norecurse +define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE1_clES2_E7KernelC() local_unnamed_addr #0 comdat !srcloc !81 !kernel_arg_buffer_location !49 !reqd_work_group_size !48 !sycl_fixed_targets !49 !sycl_kernel_omit_args !49 { +entry: + ret void +} + +attributes #0 = { norecurse "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="source.cpp" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } + +!opencl.spir.version = !{!0} +!spirv.Source = !{!1} +!sycl_aspects = !{!2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42} +!llvm.ident = !{!43} +!llvm.module.flags = !{!44, !45} + +!0 = !{i32 1, i32 2} +!1 = !{i32 4, i32 100000} +!2 = !{!"host", i32 0} +!3 = !{!"cpu", i32 1} +!4 = !{!"gpu", i32 2} +!5 = !{!"accelerator", i32 3} +!6 = !{!"custom", i32 4} +!7 = !{!"fp16", i32 5} +!8 = !{!"fp64", i32 6} +!9 = !{!"image", i32 9} +!10 = !{!"online_compiler", i32 10} +!11 = !{!"online_linker", i32 11} +!12 = !{!"queue_profiling", i32 12} +!13 = !{!"usm_device_allocations", i32 13} +!14 = !{!"usm_host_allocations", i32 14} +!15 = !{!"usm_shared_allocations", i32 15} +!16 = !{!"usm_restricted_shared_allocations", i32 16} +!17 = !{!"usm_system_allocations", i32 17} +!18 = !{!"ext_intel_pci_address", i32 18} +!19 = !{!"ext_intel_gpu_eu_count", i32 19} +!20 = !{!"ext_intel_gpu_eu_simd_width", i32 20} +!21 = !{!"ext_intel_gpu_slices", i32 21} +!22 = !{!"ext_intel_gpu_subslices_per_slice", i32 22} +!23 = !{!"ext_intel_gpu_eu_count_per_subslice", i32 23} +!24 = !{!"ext_intel_max_mem_bandwidth", i32 24} +!25 = !{!"ext_intel_mem_channel", i32 25} +!26 = !{!"usm_atomic_host_allocations", i32 26} +!27 = !{!"usm_atomic_shared_allocations", i32 27} +!28 = !{!"atomic64", i32 28} +!29 = !{!"ext_intel_device_info_uuid", i32 29} +!30 = !{!"ext_oneapi_srgb", i32 30} +!31 = !{!"ext_oneapi_native_assert", i32 31} +!32 = !{!"host_debuggable", i32 32} +!33 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} +!34 = !{!"ext_oneapi_cuda_async_barrier", i32 34} +!35 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} +!36 = !{!"ext_intel_free_memory", i32 36} +!37 = !{!"ext_intel_device_id", i32 37} +!38 = !{!"ext_intel_memory_clock_rate", i32 38} +!39 = !{!"ext_intel_memory_bus_width", i32 39} +!40 = !{!"int64_base_atomics", i32 7} +!41 = !{!"int64_extended_atomics", i32 8} +!42 = !{!"usm_system_allocator", i32 17} +!43 = !{!"clang version 16.0.0"} +!44 = !{i32 1, !"wchar_size", i32 4} +!45 = !{i32 7, !"frame-pointer", i32 2} +!46 = !{i32 8347054} +!47 = !{i32 -1, i32 -1} +!48 = !{i32 32} +!49 = !{} +!50 = !{i1 true, i1 true} +!51 = !{!52, !54, !56, !58} +!52 = distinct !{!52, !53, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv: %agg.result"} +!53 = distinct !{!53, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} +!54 = distinct !{!54, !55, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v: %agg.result"} +!55 = distinct !{!55, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"} +!56 = distinct !{!56, !57, !"_ZN4sycl3_V16detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} +!57 = distinct !{!57, !"_ZN4sycl3_V16detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} +!58 = distinct !{!58, !59, !"_ZN4sycl3_V16detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} +!59 = distinct !{!59, !"_ZN4sycl3_V16detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} +!60 = !{i32 170} +!61 = !{i32 64} +!62 = !{!63, !65, !67, !69} +!63 = distinct !{!63, !64, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv: %agg.result"} +!64 = distinct !{!64, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} +!65 = distinct !{!65, !66, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v: %agg.result"} +!66 = distinct !{!66, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"} +!67 = distinct !{!67, !68, !"_ZN4sycl3_V16detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} +!68 = distinct !{!68, !"_ZN4sycl3_V16detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} +!69 = distinct !{!69, !70, !"_ZN4sycl3_V16detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} +!70 = distinct !{!70, !"_ZN4sycl3_V16detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} +!71 = !{i32 352} +!72 = !{!73, !75, !77, !79} +!73 = distinct !{!73, !74, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv: %agg.result"} +!74 = distinct !{!74, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} +!75 = distinct !{!75, !76, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v: %agg.result"} +!76 = distinct !{!76, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"} +!77 = distinct !{!77, !78, !"_ZN4sycl3_V16detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} +!78 = distinct !{!78, !"_ZN4sycl3_V16detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} +!79 = distinct !{!79, !80, !"_ZN4sycl3_V16detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} +!80 = distinct !{!80, !"_ZN4sycl3_V16detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} +!81 = !{i32 534} diff --git a/llvm/test/tools/sycl-post-link/sycl-esimd-large-grf.ll b/llvm/test/tools/sycl-post-link/sycl-esimd-large-grf.ll index e464d38708055..7506268f3b75a 100644 --- a/llvm/test/tools/sycl-post-link/sycl-esimd-large-grf.ll +++ b/llvm/test/tools/sycl-post-link/sycl-esimd-large-grf.ll @@ -9,16 +9,16 @@ ; RUN: sycl-post-link -split=source -symbols -split-esimd -lower-esimd -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table -; RUN: FileCheck %s -input-file=%t_esimd_large_grf_1.ll --check-prefixes CHECK-ESIMD-LargeGRF-IR -; RUN: FileCheck %s -input-file=%t_esimd_large_grf_1.prop --check-prefixes CHECK-ESIMD-LargeGRF-PROP -; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYCL-SYM -; RUN: FileCheck %s -input-file=%t_esimd_0.sym --check-prefixes CHECK-ESIMD-SYM -; RUN: FileCheck %s -input-file=%t_esimd_large_grf_1.sym --check-prefixes CHECK-ESIMD-LargeGRF-SYM +; RUN: FileCheck %s -input-file=%t_esimd_large_grf_0.ll --check-prefixes CHECK-ESIMD-LargeGRF-IR +; RUN: FileCheck %s -input-file=%t_esimd_large_grf_0.prop --check-prefixes CHECK-ESIMD-LargeGRF-PROP +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYCL-SYM +; RUN: FileCheck %s -input-file=%t_esimd_1.sym --check-prefixes CHECK-ESIMD-SYM +; RUN: FileCheck %s -input-file=%t_esimd_large_grf_0.sym --check-prefixes CHECK-ESIMD-LargeGRF-SYM ; CHECK: [Code|Properties|Symbols] ; CHECK: {{.*}}_0.ll|{{.*}}_0.prop|{{.*}}_0.sym -; CHECK: {{.*}}esimd_0.ll|{{.*}}esimd_0.prop|{{.*}}esimd_0.sym -; CHECK: {{.*}}esimd_large_grf_1.ll|{{.*}}esimd_large_grf_1.prop|{{.*}}esimd_large_grf_1.sym +; CHECK: {{.*}}esimd-large-grf.ll.tmp_1.ll|{{.*}}esimd-large-grf.ll.tmp_1.prop|{{.*}}esimd-large-grf.ll.tmp_1.sym +; CHECK: {{.*}}esimd-large-grf.ll.tmp_esimd_1.ll|{{.*}}esimd-large-grf.ll.tmp_esimd_1.prop|{{.*}}esimd-large-grf.ll.tmp_esimd_1.sym ; CHECK-ESIMD-LargeGRF-PROP: isEsimdImage=1|1 ; CHECK-ESIMD-LargeGRF-PROP: isLargeGRF=1|1 diff --git a/llvm/test/tools/sycl-post-link/sycl-large-grf.ll b/llvm/test/tools/sycl-post-link/sycl-large-grf.ll index 35b10716f3e0b..8c140d0c3823a 100644 --- a/llvm/test/tools/sycl-post-link/sycl-large-grf.ll +++ b/llvm/test/tools/sycl-post-link/sycl-large-grf.ll @@ -9,14 +9,14 @@ ; RUN: sycl-post-link -split=source -symbols -split-esimd -lower-esimd -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table -; RUN: FileCheck %s -input-file=%t_large_grf_1.ll --check-prefixes CHECK-LARGE-GRF-IR -; RUN: FileCheck %s -input-file=%t_large_grf_1.prop --check-prefixes CHECK-LARGE-GRF-PROP -; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYCL-SYM -; RUN: FileCheck %s -input-file=%t_large_grf_1.sym --check-prefixes CHECK-LARGE-GRF-SYM +; RUN: FileCheck %s -input-file=%t_large_grf_0.ll --check-prefixes CHECK-LARGE-GRF-IR +; RUN: FileCheck %s -input-file=%t_large_grf_0.prop --check-prefixes CHECK-LARGE-GRF-PROP +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYCL-SYM +; RUN: FileCheck %s -input-file=%t_large_grf_0.sym --check-prefixes CHECK-LARGE-GRF-SYM ; CHECK: [Code|Properties|Symbols] ; CHECK: {{.*}}_0.ll|{{.*}}_0.prop|{{.*}}_0.sym -; CHECK: {{.*}}_large_grf_1.ll|{{.*}}_large_grf_1.prop|{{.*}}_large_grf_1.sym +; CHECK: {{.*}}-large-grf.ll.tmp_1.ll|{{.*}}-large-grf.ll.tmp_1.prop|{{.*}}-large-grf.ll.tmp_1.sym ; CHECK-LARGE-GRF-PROP: isLargeGRF=1|1 diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.cpp b/llvm/tools/sycl-post-link/ModuleSplitter.cpp index a82d0f54284a1..c569ed2f9dab1 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.cpp +++ b/llvm/tools/sycl-post-link/ModuleSplitter.cpp @@ -13,6 +13,7 @@ #include "Support.h" #include "llvm/ADT/SetVector.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" @@ -712,8 +713,8 @@ namespace { struct UsedOptionalFeatures { SmallVector Aspects; bool UsesLargeGRF = false; - // TODO: extend this further with reqd-sub-group-size, reqd-work-group-size - // and other properties + SmallVector ReqdWorkGroupSize; + // TODO: extend this further with reqd-sub-group-size and other properties UsedOptionalFeatures() = default; @@ -734,17 +735,37 @@ struct UsedOptionalFeatures { if (F->hasFnAttribute(::sycl::kernel_props::ATTR_LARGE_GRF)) UsesLargeGRF = true; + if (const MDNode *MDN = F->getMetadata("reqd_work_group_size")) { + size_t NumOperands = MDN->getNumOperands(); + assert(NumOperands >= 1 && NumOperands <= 3 && + "reqd_work_group_size does not have between 1 and 3 operands."); + ReqdWorkGroupSize.reserve(NumOperands); + for (const MDOperand &MDOp : MDN->operands()) + ReqdWorkGroupSize.push_back( + mdconst::extract(MDOp)->getZExtValue()); + } + llvm::hash_code AspectsHash = llvm::hash_combine_range(Aspects.begin(), Aspects.end()); llvm::hash_code LargeGRFHash = llvm::hash_value(UsesLargeGRF); - Hash = static_cast(llvm::hash_combine(AspectsHash, LargeGRFHash)); + llvm::hash_code ReqdWorkGroupSizeHash = llvm::hash_combine_range( + ReqdWorkGroupSize.begin(), ReqdWorkGroupSize.end()); + Hash = static_cast( + llvm::hash_combine(AspectsHash, LargeGRFHash, ReqdWorkGroupSizeHash)); } std::string generateModuleName(StringRef BaseName) const { + std::string Ret = BaseName.str(); + if (!ReqdWorkGroupSize.empty()) { + Ret += "-reqd-wg-size"; + for (int V : ReqdWorkGroupSize) + Ret += "-" + std::to_string(V); + } + if (Aspects.empty()) - return BaseName.str() + "-no-aspects"; + return Ret + "-no-aspects"; - std::string Ret = BaseName.str() + "-aspects"; + Ret += "-aspects"; for (int A : Aspects) { Ret += "-" + std::to_string(A); } diff --git a/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp b/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp index c388456b04805..8d30db1a2522e 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp +++ b/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp @@ -30,20 +30,22 @@ void llvm::getSYCLDeviceRequirements( // Scan the module and if the metadata is present fill the corresponing // property with metadata's aspects constexpr std::pair ReqdMDs[] = { - {"sycl_used_aspects", "aspects"}, {"sycl_fixed_targets", "fixed_target"}}; + {"sycl_used_aspects", "aspects"}, + {"sycl_fixed_targets", "fixed_target"}, + {"reqd_work_group_size", "reqd_work_group_size"}}; for (const auto &MD : ReqdMDs) { - std::set Aspects; + std::set Values; for (const Function &F : M) { if (const MDNode *MDN = F.getMetadata(MD.first)) { for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) - Aspects.insert(ExtractIntegerFromMDNodeOperand(MDN, I)); + Values.insert(ExtractIntegerFromMDNodeOperand(MDN, I)); } } // We don't need the "fixed_target" property if it's empty - if (std::string(MD.first) == "sycl_fixed_targets" && Aspects.empty()) + if (std::string(MD.first) == "sycl_fixed_targets" && Values.empty()) continue; Requirements[MD.second] = - std::vector(Aspects.begin(), Aspects.end()); + std::vector(Values.begin(), Values.end()); } } diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 2cd4ee00899d9..c521af4d2bb5d 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -37,6 +37,7 @@ #include #include #include +#include namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { @@ -1711,7 +1712,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( for (const sycl::device &Dev : Devs) { if (!compatibleWithDevice(BinImage, Dev) || - !doesDevSupportImgAspects(Dev, *BinImage)) + !doesDevSupportDeviceRequirements(Dev, *BinImage)) continue; std::shared_ptr> KernelIDs; @@ -2178,25 +2179,88 @@ std::pair ProgramManager::getOrCreateKernel( &(BuildResult->MBuildResultMutex)); } -bool doesDevSupportImgAspects(const device &Dev, - const RTDeviceBinaryImage &Img) { - const RTDeviceBinaryImage::PropertyRange &PropRange = - Img.getDeviceRequirements(); - RTDeviceBinaryImage::PropertyRange::ConstIterator PropIt = std::find_if( - PropRange.begin(), PropRange.end(), - [](RTDeviceBinaryImage::PropertyRange::ConstIterator &&Prop) { - using namespace std::literals; - return (*Prop)->Name == "aspects"sv; - }); - if (PropIt == PropRange.end()) +bool doesDevSupportDeviceRequirements(const device &Dev, + const RTDeviceBinaryImage &Img) { + auto getPropIt = [&Img](const std::string &PropName) { + const RTDeviceBinaryImage::PropertyRange &PropRange = + Img.getDeviceRequirements(); + RTDeviceBinaryImage::PropertyRange::ConstIterator PropIt = std::find_if( + PropRange.begin(), PropRange.end(), + [&PropName](RTDeviceBinaryImage::PropertyRange::ConstIterator &&Prop) { + return (*Prop)->Name == PropName; + }); + return (PropIt == PropRange.end()) + ? std::nullopt + : std::optional< + RTDeviceBinaryImage::PropertyRange::ConstIterator>{PropIt}; + }; + + auto AspectsPropIt = getPropIt("aspects"); + auto ReqdWGSizePropIt = getPropIt("reqd_work_group_size"); + + if (!AspectsPropIt && !ReqdWGSizePropIt) return true; - ByteArray Aspects = DeviceBinaryProperty(*PropIt).asByteArray(); - // Drop 8 bytes describing the size of the byte array. - Aspects.dropBytes(8); - while (!Aspects.empty()) { - aspect Aspect = Aspects.consume(); - if (!Dev.has(Aspect)) + + // Checking if device supports defined aspects + if (AspectsPropIt) { + ByteArray Aspects = + DeviceBinaryProperty(*(AspectsPropIt.value())).asByteArray(); + // Drop 8 bytes describing the size of the byte array. + Aspects.dropBytes(8); + while (!Aspects.empty()) { + aspect Aspect = Aspects.consume(); + if (!Dev.has(Aspect)) + return false; + } + } + + // Checking if device supports defined required work group size + if (ReqdWGSizePropIt) { + ByteArray ReqdWGSize = + DeviceBinaryProperty(*(ReqdWGSizePropIt.value())).asByteArray(); + // Drop 8 bytes describing the size of the byte array. + ReqdWGSize.dropBytes(8); + int ReqdWGSizeAllDimsTotal = 1; + std::vector ReqdWGSizeVec; + int Dims = 0; + while (!ReqdWGSize.empty()) { + int SingleDimSize = ReqdWGSize.consume(); + ReqdWGSizeAllDimsTotal *= SingleDimSize; + ReqdWGSizeVec.push_back(SingleDimSize); + Dims++; + } + if (static_cast(ReqdWGSizeAllDimsTotal) > + Dev.get_info()) return false; + // Creating std::variant to call max_work_item_sizes one time to avoid + // performance drop + std::variant, id<2>, id<3>> MaxWorkItemSizesVariant; + if (Dims == 1) + MaxWorkItemSizesVariant = + Dev.get_info>(); + else if (Dims == 2) + MaxWorkItemSizesVariant = + Dev.get_info>(); + else // (Dims == 3) + MaxWorkItemSizesVariant = + Dev.get_info>(); + for (int i = 0; i < Dims; i++) { + // Extracting value from std::variant to avoid dealing with type-safety + // issues after that + if (Dims == 1) { + // ReqdWGSizeVec is in reverse order compared to MaxWorkItemSizes + if (static_cast(ReqdWGSizeVec[i]) > + std::get>(MaxWorkItemSizesVariant)[Dims - i - 1]) + return false; + } else if (Dims == 2) { + if (static_cast(ReqdWGSizeVec[i]) > + std::get>(MaxWorkItemSizesVariant)[Dims - i - 1]) + return false; + } else // (Dims == 3) + if (static_cast(ReqdWGSizeVec[i]) > + std::get>(MaxWorkItemSizesVariant)[Dims - i - 1]) + return false; + } } return true; } diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index d03042ea31f83..abb0b6ddd5513 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -46,8 +46,8 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) { class context; namespace detail { -bool doesDevSupportImgAspects(const device &Dev, - const RTDeviceBinaryImage &BinImages); +bool doesDevSupportDeviceRequirements(const device &Dev, + const RTDeviceBinaryImage &BinImages); // This value must be the same as in libdevice/device_itt.h. // See sycl/doc/design/ITTAnnotations.md for more info. diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index e8244197e25f2..e124f0316932a 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -294,7 +294,7 @@ bool is_compatible(const std::vector &KernelIDs, const device &Dev) { detail::ProgramManager::getInstance().getRawDeviceImages(KernelIDs); return std::all_of(BinImages.begin(), BinImages.end(), [&Dev](const detail::RTDeviceBinaryImage *Img) { - return doesDevSupportImgAspects(Dev, *Img); + return doesDevSupportDeviceRequirements(Dev, *Img); }); } diff --git a/sycl/unittests/SYCL2020/IsCompatible.cpp b/sycl/unittests/SYCL2020/IsCompatible.cpp index 8bef0a0c9cafe..19a4d738af4f5 100644 --- a/sycl/unittests/SYCL2020/IsCompatible.cpp +++ b/sycl/unittests/SYCL2020/IsCompatible.cpp @@ -6,6 +6,10 @@ #include class TestKernelCPU; +class TestKernelCPUInvalidReqdWGSize1D; +class TestKernelCPUInvalidReqdWGSize2D; +class TestKernelCPUInvalidReqdWGSize3D; +class TestKernelCPUValidReqdWGSize3D; class TestKernelGPU; class TestKernelACC; @@ -29,6 +33,90 @@ template <> struct KernelInfo { } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl +namespace sycl { +__SYCL_INLINE_VER_NAMESPACE(_V1) { +namespace detail { +template <> struct KernelInfo { + static constexpr unsigned getNumParams() { return 0; } + static const kernel_param_desc_t &getParamDesc(int) { + static kernel_param_desc_t Dummy; + return Dummy; + } + static constexpr const char *getName() { return "TestKernelCPUInvalidReqdWGSize1D"; } + static constexpr bool isESIMD() { return false; } + static constexpr bool callsThisItem() { return false; } + static constexpr bool callsAnyThisFreeFunction() { return false; } + static constexpr int64_t getKernelSize() { return 1; } +}; + +} // namespace detail +} // __SYCL_INLINE_VER_NAMESPACE(_V1) +} // namespace sycl + +namespace sycl { +__SYCL_INLINE_VER_NAMESPACE(_V1) { +namespace detail { +template <> struct KernelInfo { + static constexpr unsigned getNumParams() { return 0; } + static const kernel_param_desc_t &getParamDesc(int) { + static kernel_param_desc_t Dummy; + return Dummy; + } + static constexpr const char *getName() { return "TestKernelCPUInvalidReqdWGSize2D"; } + static constexpr bool isESIMD() { return false; } + static constexpr bool callsThisItem() { return false; } + static constexpr bool callsAnyThisFreeFunction() { return false; } + static constexpr int64_t getKernelSize() { return 1; } +}; + +} // namespace detail +} // __SYCL_INLINE_VER_NAMESPACE(_V1) +} // namespace sycl + +namespace sycl { +__SYCL_INLINE_VER_NAMESPACE(_V1) { +namespace detail { +template <> struct KernelInfo { + static constexpr unsigned getNumParams() { return 0; } + static const kernel_param_desc_t &getParamDesc(int) { + static kernel_param_desc_t Dummy; + return Dummy; + } + static constexpr const char *getName() { + return "TestKernelCPUInvalidReqdWGSize3D"; + } + static constexpr bool isESIMD() { return false; } + static constexpr bool callsThisItem() { return false; } + static constexpr bool callsAnyThisFreeFunction() { return false; } + static constexpr int64_t getKernelSize() { return 1; } +}; + +} // namespace detail +} // __SYCL_INLINE_VER_NAMESPACE(_V1) +} // namespace sycl + +namespace sycl { +__SYCL_INLINE_VER_NAMESPACE(_V1) { +namespace detail { +template <> struct KernelInfo { + static constexpr unsigned getNumParams() { return 0; } + static const kernel_param_desc_t &getParamDesc(int) { + static kernel_param_desc_t Dummy; + return Dummy; + } + static constexpr const char *getName() { + return "TestKernelCPUValidReqdWGSize3D"; + } + static constexpr bool isESIMD() { return false; } + static constexpr bool callsThisItem() { return false; } + static constexpr bool callsAnyThisFreeFunction() { return false; } + static constexpr int64_t getKernelSize() { return 1; } +}; + +} // namespace detail +} // __SYCL_INLINE_VER_NAMESPACE(_V1) +} // namespace sycl + namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace detail { @@ -71,11 +159,11 @@ template <> struct KernelInfo { static sycl::unittest::PiImage generateDefaultImage(std::initializer_list KernelNames, - const std::vector &Aspects) { + const std::vector &Aspects, const std::vector &ReqdWGSize = {}) { using namespace sycl::unittest; PiPropertySet PropSet; - addAspects(PropSet, Aspects); + addDeviceRequirementsProps(PropSet, Aspects, ReqdWGSize); std::vector Bin{0, 1, 2, 3, 4, 5}; // Random data @@ -92,12 +180,26 @@ generateDefaultImage(std::initializer_list KernelNames, return Img; } -static sycl::unittest::PiImage Imgs[3] = { - generateDefaultImage({"TestKernelCPU"}, {sycl::aspect::cpu}), +static sycl::unittest::PiImage Imgs[7] = { + // Images for validating checks based on max_work_group_size + aspects + generateDefaultImage({"TestKernelCPU"}, {sycl::aspect::cpu}, + {32}), // 32 <= 256 (OK) + generateDefaultImage({"TestKernelCPUInvalidReqdWGSize1D"}, + {sycl::aspect::cpu}, {257}), // 257 > 256 (FAIL) + generateDefaultImage({"TestKernelCPUInvalidReqdWGSize2D"}, + {sycl::aspect::cpu}, {32, 9}), // 32*9=288 > 256 (FAIL) + // Images for validating checks based on max_work_item_sizes + aspects + generateDefaultImage( + {"TestKernelCPUInvalidReqdWGSize3D"}, {sycl::aspect::cpu}, + {4, 256, 6}), // 4 <= 254 (OK), 256 > 255 (FAIL), 6 <= 256 (OK) + generateDefaultImage( + {"TestKernelCPUValidReqdWGSize3D"}, {sycl::aspect::cpu}, + {2, 4, 5}), // 2 <= 254 (OK), 4 <= 255 (OK), 5 <= 256 (OK) + // Images for validating checks for aspects generateDefaultImage({"TestKernelGPU"}, {sycl::aspect::gpu}), generateDefaultImage({"TestKernelACC"}, {sycl::aspect::accelerator})}; -static sycl::unittest::PiImageArray<3> ImgArray{Imgs}; +static sycl::unittest::PiImageArray<7> ImgArray{Imgs}; static pi_result redefinedDeviceGetInfoCPU(pi_device device, pi_device_info param_name, @@ -108,6 +210,36 @@ static pi_result redefinedDeviceGetInfoCPU(pi_device device, auto *Result = reinterpret_cast<_pi_device_type *>(param_value); *Result = PI_DEVICE_TYPE_CPU; } + if (param_name == PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE) { + auto *Result = static_cast(param_value); + *Result = 256; + } + if (param_name == PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES) { + auto *Result = static_cast(param_value); + *Result = 256; + } + return PI_SUCCESS; +} + +static pi_result redefinedDeviceGetInfoCPU3D(pi_device device, + pi_device_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret) { + if (param_name == PI_DEVICE_INFO_TYPE) { + auto *Result = reinterpret_cast<_pi_device_type *>(param_value); + *Result = PI_DEVICE_TYPE_CPU; + } + if (param_name == PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE) { + auto *Result = static_cast(param_value); + *Result = 256; + } + if (param_name == PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES) { + auto *Result = static_cast(param_value); + Result[0] = 256; + Result[1] = 255; + Result[2] = 254; + } return PI_SUCCESS; } @@ -150,6 +282,46 @@ TEST(IsCompatible, CPU) { EXPECT_FALSE(sycl::is_compatible(Dev)); } +TEST(IsCompatible, CPUInvalidReqdWGSize1D) { + sycl::unittest::PiMock Mock; + Mock.redefineAfter( + redefinedDeviceGetInfoCPU); + sycl::platform Plt = Mock.getPlatform(); + const sycl::device Dev = Plt.get_devices()[0]; + + EXPECT_FALSE(sycl::is_compatible(Dev)); +} + +TEST(IsCompatible, CPUInvalidReqdWGSize2D) { + sycl::unittest::PiMock Mock; + Mock.redefineAfter( + redefinedDeviceGetInfoCPU); + sycl::platform Plt = Mock.getPlatform(); + const sycl::device Dev = Plt.get_devices()[0]; + + EXPECT_FALSE(sycl::is_compatible(Dev)); +} + +TEST(IsCompatible, CPUInvalidReqdWGSize3D) { + sycl::unittest::PiMock Mock; + Mock.redefineAfter( + redefinedDeviceGetInfoCPU3D); + sycl::platform Plt = Mock.getPlatform(); + const sycl::device Dev = Plt.get_devices()[0]; + + EXPECT_FALSE(sycl::is_compatible(Dev)); +} + +TEST(IsCompatible, CPUValidReqdWGSize3D) { + sycl::unittest::PiMock Mock; + Mock.redefineAfter( + redefinedDeviceGetInfoCPU3D); + sycl::platform Plt = Mock.getPlatform(); + const sycl::device Dev = Plt.get_devices()[0]; + + EXPECT_TRUE(sycl::is_compatible(Dev)); +} + TEST(IsCompatible, GPU) { sycl::unittest::PiMock Mock; Mock.redefineAfter( diff --git a/sycl/unittests/SYCL2020/KernelBundle.cpp b/sycl/unittests/SYCL2020/KernelBundle.cpp index 09a227a1aa483..28755557ba5ae 100644 --- a/sycl/unittests/SYCL2020/KernelBundle.cpp +++ b/sycl/unittests/SYCL2020/KernelBundle.cpp @@ -74,7 +74,7 @@ generateDefaultImage(std::initializer_list KernelNames, PiPropertySet PropSet; if (!Aspects.empty()) - addAspects(PropSet, Aspects); + addDeviceRequirementsProps(PropSet, Aspects); std::vector Bin{0, 1, 2, 3, 4, 5}; // Random data diff --git a/sycl/unittests/helpers/PiImage.hpp b/sycl/unittests/helpers/PiImage.hpp index d21cc38d5e116..38b06eef6d242 100644 --- a/sycl/unittests/helpers/PiImage.hpp +++ b/sycl/unittests/helpers/PiImage.hpp @@ -479,8 +479,7 @@ inline PiProperty makeDeviceGlobalInfo(const std::string &Name, } /// Utility function to add aspects to property set. -inline void addAspects(PiPropertySet &Props, - const std::vector &Aspects) { +inline PiProperty makeAspectsProp(const std::vector &Aspects) { const size_t BYTES_FOR_SIZE = 8; std::vector ValData(BYTES_FOR_SIZE + Aspects.size() * sizeof(sycl::aspect)); @@ -490,8 +489,29 @@ inline void addAspects(PiPropertySet &Props, auto *AspectsPtr = reinterpret_cast(&Aspects[0]); std::uninitialized_copy(AspectsPtr, AspectsPtr + Aspects.size(), ValData.data() + BYTES_FOR_SIZE); - PiProperty Prop{"aspects", ValData, PI_PROPERTY_TYPE_BYTE_ARRAY}; - PiArray Value{std::move(Prop)}; + return {"aspects", ValData, PI_PROPERTY_TYPE_BYTE_ARRAY}; +} + +inline PiProperty makeReqdWGSizeProp(const std::vector &ReqdWGSize) { + const size_t BYTES_FOR_SIZE = 8; + std::vector ValData(BYTES_FOR_SIZE + ReqdWGSize.size() * sizeof(int)); + uint64_t ValDataSize = ValData.size(); + std::uninitialized_copy(&ValDataSize, &ValDataSize + sizeof(uint64_t), + ValData.data()); + auto *ReqdWGSizePtr = reinterpret_cast(&ReqdWGSize[0]); + std::uninitialized_copy(ReqdWGSizePtr, + ReqdWGSizePtr + ReqdWGSize.size() * sizeof(int), + ValData.data() + BYTES_FOR_SIZE); + return {"reqd_work_group_size", ValData, PI_PROPERTY_TYPE_BYTE_ARRAY}; +} + +inline void +addDeviceRequirementsProps(PiPropertySet &Props, + const std::vector &Aspects, + const std::vector &ReqdWGSize = {}) { + PiArray Value{makeAspectsProp(Aspects)}; + if (!ReqdWGSize.empty()) + Value.push_back(makeReqdWGSizeProp(ReqdWGSize)); Props.insert(__SYCL_PI_PROPERTY_SET_SYCL_DEVICE_REQUIREMENTS, std::move(Value)); }