diff --git a/libdevice/atomic.hpp b/libdevice/atomic.hpp new file mode 100644 index 0000000000000..429792f94eb1d --- /dev/null +++ b/libdevice/atomic.hpp @@ -0,0 +1,93 @@ +//==-------------- atomic.hpp - support of atomic operations ---------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include + +#include "device.h" + +#ifdef __SPIR__ + +#define SPIR_GLOBAL __attribute__((opencl_global)) + +namespace __spv { +struct Scope { + + enum Flag : uint32_t { + CrossDevice = 0, + Device = 1, + Workgroup = 2, + Subgroup = 3, + Invocation = 4, + }; + + constexpr Scope(Flag flag) : flag_value(flag) {} + + constexpr operator uint32_t() const { return flag_value; } + + Flag flag_value; +}; + +struct MemorySemanticsMask { + + enum Flag : uint32_t { + None = 0x0, + Acquire = 0x2, + Release = 0x4, + AcquireRelease = 0x8, + SequentiallyConsistent = 0x10, + UniformMemory = 0x40, + SubgroupMemory = 0x80, + WorkgroupMemory = 0x100, + CrossWorkgroupMemory = 0x200, + AtomicCounterMemory = 0x400, + ImageMemory = 0x800, + }; + + constexpr MemorySemanticsMask(Flag flag) : flag_value(flag) {} + + constexpr operator uint32_t() const { return flag_value; } + + Flag flag_value; +}; +} // namespace __spv + +extern DEVICE_EXTERNAL int +__spirv_AtomicCompareExchange(int SPIR_GLOBAL *, __spv::Scope::Flag, + __spv::MemorySemanticsMask::Flag, + __spv::MemorySemanticsMask::Flag, int, int); + +extern DEVICE_EXTERNAL int __spirv_AtomicLoad(const int SPIR_GLOBAL *, + __spv::Scope::Flag, + __spv::MemorySemanticsMask::Flag); + +extern DEVICE_EXTERNAL void +__spirv_AtomicStore(int SPIR_GLOBAL *, __spv::Scope::Flag, + __spv::MemorySemanticsMask::Flag, int); + +/// Atomically set the value in *Ptr with Desired if and only if it is Expected +/// Return the value which already was in *Ptr +static inline int atomicCompareAndSet(SPIR_GLOBAL int *Ptr, int Desired, + int Expected) { + return __spirv_AtomicCompareExchange( + Ptr, __spv::Scope::Device, + __spv::MemorySemanticsMask::SequentiallyConsistent, + __spv::MemorySemanticsMask::SequentiallyConsistent, Desired, Expected); +} + +static inline int atomicLoad(SPIR_GLOBAL int *Ptr) { + return __spirv_AtomicLoad(Ptr, __spv::Scope::Device, + __spv::MemorySemanticsMask::SequentiallyConsistent); +} + +static inline void atomicStore(SPIR_GLOBAL int *Ptr, int V) { + __spirv_AtomicStore(Ptr, __spv::Scope::Device, + __spv::MemorySemanticsMask::SequentiallyConsistent, V); +} + +#endif // __SPIR__ diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index 724d4635fb0b5..b03a3409b7bf8 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -6,26 +6,93 @@ // //===----------------------------------------------------------------------===// +#include "atomic.hpp" +#include "include/assert-happened.hpp" #include "wrapper.h" #ifdef __SPIR__ -static const __attribute__((opencl_constant)) char assert_fmt[] = - "%s:%d: %s: global id: [%lu,%lu,%lu], local id: [%lu,%lu,%lu] " - "Assertion `%s` failed.\n"; + +#define ASSERT_NONE 0 +#define ASSERT_START 1 +#define ASSERT_FINISH 2 + +// definition +SPIR_GLOBAL AssertHappened SPIR_AssertHappenedMem; + +DEVICE_EXTERN_C void __devicelib_assert_read(void *_Dst) { + AssertHappened *Dst = (AssertHappened *)_Dst; + int Flag = atomicLoad(&SPIR_AssertHappenedMem.Flag); + + if (ASSERT_NONE == Flag) { + Dst->Flag = Flag; + return; + } + + if (Flag != ASSERT_FINISH) + while (ASSERT_START == atomicLoad(&SPIR_AssertHappenedMem.Flag)) + ; + + *Dst = SPIR_AssertHappenedMem; +} DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, int32_t line, const char *func, uint64_t gid0, uint64_t gid1, uint64_t gid2, uint64_t lid0, uint64_t lid1, uint64_t lid2) { - // intX_t types are used instead of `int' and `long' because the format string - // is defined in terms of *device* types (OpenCL types): %d matches a 32 bit - // integer, %lu matches a 64 bit unsigned integer. Host `int' and - // `long' types may be different, so we cannot use them. - __spirv_ocl_printf(assert_fmt, file, (int32_t)line, - // WORKAROUND: IGC does not handle this well - // (func) ? func : "", - func, gid0, gid1, gid2, lid0, lid1, lid2, expr); + int Expected = ASSERT_NONE; + int Desired = ASSERT_START; + + if (atomicCompareAndSet(&SPIR_AssertHappenedMem.Flag, Desired, Expected) == + Expected) { + SPIR_AssertHappenedMem.Line = line; + SPIR_AssertHappenedMem.GID0 = gid0; + SPIR_AssertHappenedMem.GID1 = gid1; + SPIR_AssertHappenedMem.GID2 = gid2; + SPIR_AssertHappenedMem.LID0 = lid0; + SPIR_AssertHappenedMem.LID1 = lid1; + SPIR_AssertHappenedMem.LID2 = lid2; + + int ExprLength = 0; + int FileLength = 0; + int FuncLength = 0; + + if (expr) + for (const char *C = expr; *C != '\0'; ++C, ++ExprLength) + ; + if (file) + for (const char *C = file; *C != '\0'; ++C, ++FileLength) + ; + if (func) + for (const char *C = func; *C != '\0'; ++C, ++FuncLength) + ; + + int MaxExprIdx = sizeof(SPIR_AssertHappenedMem.Expr) - 1; + int MaxFileIdx = sizeof(SPIR_AssertHappenedMem.File) - 1; + int MaxFuncIdx = sizeof(SPIR_AssertHappenedMem.Func) - 1; + + if (ExprLength < MaxExprIdx) + MaxExprIdx = ExprLength; + if (FileLength < MaxFileIdx) + MaxFileIdx = FileLength; + if (FuncLength < MaxFuncIdx) + MaxFuncIdx = FuncLength; + + for (int Idx = 0; Idx < MaxExprIdx; ++Idx) + SPIR_AssertHappenedMem.Expr[Idx] = expr[Idx]; + SPIR_AssertHappenedMem.Expr[MaxExprIdx] = '\0'; + + for (int Idx = 0; Idx < MaxFileIdx; ++Idx) + SPIR_AssertHappenedMem.File[Idx] = file[Idx]; + SPIR_AssertHappenedMem.File[MaxFileIdx] = '\0'; + + for (int Idx = 0; Idx < MaxFuncIdx; ++Idx) + SPIR_AssertHappenedMem.Func[Idx] = func[Idx]; + SPIR_AssertHappenedMem.Func[MaxFuncIdx] = '\0'; + + // Show we've done copying + atomicStore(&SPIR_AssertHappenedMem.Flag, ASSERT_FINISH); + } // FIXME: call SPIR-V unreachable instead // volatile int *die = (int *)0x0; diff --git a/libdevice/include/assert-happened.hpp b/libdevice/include/assert-happened.hpp new file mode 100644 index 0000000000000..8b50f5ef216b3 --- /dev/null +++ b/libdevice/include/assert-happened.hpp @@ -0,0 +1,48 @@ +//==-- assert-happened.hpp - Structure and declaration for assert support --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +// Treat this header as system one to workaround frontend's restriction +#pragma clang system_header + +#ifdef __SPIR__ + +// NOTE Layout of this structure should be aligned with the one in +// sycl/include/CL/sycl/detail/assert_happened.hpp +struct AssertHappened { + int Flag = 0; + char Expr[256 + 1] = ""; + char File[256 + 1] = ""; + char Func[128 + 1] = ""; + + int32_t Line = 0; + + uint64_t GID0 = 0; + uint64_t GID1 = 0; + uint64_t GID2 = 0; + + uint64_t LID0 = 0; + uint64_t LID1 = 0; + uint64_t LID2 = 0; +}; + +#ifndef SPIR_GLOBAL_VAR +#ifdef __SYCL_DEVICE_ONLY__ +#define SPIR_GLOBAL_VAR __attribute__((sycl_global_var)) +#else +#warning "SPIR_GLOBAL_VAR not defined in host mode. Defining as empty macro." +#define SPIR_GLOBAL_VAR +#endif +#endif + +#define __SYCL_GLOBAL__ __attribute__((opencl_global)) + +// declaration +extern SPIR_GLOBAL_VAR __SYCL_GLOBAL__ AssertHappened SPIR_AssertHappenedMem; + +#endif diff --git a/sycl/cmake/modules/AddSYCLUnitTest.cmake b/sycl/cmake/modules/AddSYCLUnitTest.cmake index 3d6bb6301d77d..f045d4e23f674 100644 --- a/sycl/cmake/modules/AddSYCLUnitTest.cmake +++ b/sycl/cmake/modules/AddSYCLUnitTest.cmake @@ -21,7 +21,8 @@ macro(add_sycl_unittest test_dirname link_variant) else() add_unittest(SYCLUnitTests ${test_dirname} $ ${ARGN}) - target_compile_definitions(${test_dirname} PRIVATE __SYCL_BUILD_SYCL_DLL) + target_compile_definitions(${test_dirname} + PRIVATE __SYCL_BUILD_SYCL_DLL) get_target_property(SYCL_LINK_LIBS ${sycl_so_target} LINK_LIBRARIES) endif() diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index 12b074c258665..43d4231a217cd 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -149,10 +149,9 @@ The following sequence of events describes how user code gets notified: 2. A host-task is enqueued to check value of assert failure flag. 3. The host task calls abort whenever assert failure flag is set. -DPCPP Runtime will automatically check if assertions are enabled in the kernel +DPCPP Runtime will automatically check if assertions are used in the kernel being run, and won't enqueue the auxiliary kernels if assertions are not -enabled. So there is no host-side runtime overhead when assertion are not -enabled. +used. So there is no host-side runtime overhead when assertion are not used. Illustrating this with an example, lets assume the user enqueues three kernels: - `Kernel #1`, uses assert @@ -172,18 +171,25 @@ same binary image where fallback `__devicelib_assert_fail` resides. declaration: ```c++ -namespace cl { -namespace sycl { -namespace detail { -struct AssertHappened { +struct __SYCL_AssertHappened { int Flag = 0; + char Expr[256 + 1] = ""; + char File[256 + 1] = ""; + char Func[128 + 1] = ""; + + int32_t Line = 0; + + uint64_t GID0 = 0; + uint64_t GID1 = 0; + uint64_t GID2 = 0; + + uint64_t LID0 = 0; + uint64_t LID1 = 0; + uint64_t LID2 = 0; }; -} -} -} #ifdef __SYCL_DEVICE_ONLY__ -extern SYCL_GLOBAL_VAR AssertHappened AssertHappenedMem; +extern SYCL_GLOBAL_VAR __SYCL_AssertHappened __SYCL_AssertHappenedMem; #endif ``` @@ -193,6 +199,28 @@ mutable program-scope variable. The reference to extern variable is resolved within online-linking against fallback devicelib. +#### Description of fields + +The value stored here denotes if assert happened at all. There are two valid +values at host: + +| Value | Meaning | +| ----- | ------- | +| 0 | No assert failure detected | +| 2 | Assert failure detected and reported within this instance of struct | + +At device-side, there's another valid value: 1, which means that assert failure +is detected and the structure is filling up at the moment. This value is for +device-side only and should never be reported to host. Otherwise, it means, that +atomic operation malfunctioned. + +`Expr`, `File`, `Func`, `Line` are to describe the assert message itself and +contain the expression, file name, function name, line in the file where assert +failure had happened respectively. + +`GID*` and `LID*` fields describe the global and local ID respectively of a +work-item in which assert had failed. + ### Online-linking fallback `__devicelib_assert_fail` Online linking against fallback implementation of `__devicelib_assert_fail` is diff --git a/sycl/doc/PreprocessorMacros.md b/sycl/doc/PreprocessorMacros.md index d15f0525d8bed..9bf842388116c 100644 --- a/sycl/doc/PreprocessorMacros.md +++ b/sycl/doc/PreprocessorMacros.md @@ -33,6 +33,17 @@ This file describes macros that have effect on SYCL compiler and run-time. Disables all deprecation warnings in SYCL runtime headers, including SYCL 1.2.1 deprecations. +- **SYCL_DISABLE_FALLBACK_ASSERT** + + Defining this macro eliminates some overhead that is associated with + submitting kernels that call `assert()`. When this macro is defined, the logic + for detecting assertion failures in kernels is disabled, so a failed assert + will not cause a message to be printed and will not cause the program to + abort. However, this macro only affects kernels that are submitted to devices + that do **not** have native support for `assert()` because devices with native + support do not impose any extra overhead. One can check to see if a device has + native support for `assert()` via `aspect::ext_oneapi_native_assert`. + ## Version macros - `__LIBSYCL_MAJOR_VERSION` is set to SYCL runtime library major version. diff --git a/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst b/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst index 5e50f9a0676b2..76487b31358be 100644 --- a/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst +++ b/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst @@ -14,6 +14,7 @@ cl_intel_devicelib_cassert __generic const char *func, size_t gid0, size_t gid1, size_t gid2, size_t lid0, size_t lid1, size_t lid2); + Semantic: the function is called when an assertion expression `expr` is false, and it indicates that a program does not execute as expected. @@ -21,6 +22,9 @@ The function should print a message containing the information provided in the arguments. In addition to that, the function is free to terminate the current kernel invocation. +Fallback implementation of the function raises a flag to be read later by `__devicelib_assert_read`. +The flag remains raised until the program finishes. + Arguments: - `expr` is a string representation of the assertion condition @@ -33,6 +37,16 @@ Example of a message: .. code: foo.cpp:42: void foo(int): global id: [0,0,0], local id: [0,0,0] Assertion `buf[wiID] == 0 && "Invalid value"` failed. +.. code: + int __devicelib_assert_read(); + +Semantic: +the function is called to read assert failure flag raised by +`__devicelib_assert_fail`. +The function is only used in fallback implementation. +Invoking `__devicelib_assert_read` after a kernel doesn't imply the kernel has +assertion failed. + See also: assert_extension_. .. _assert_extension: ../Assert/SYCL_ONEAPI_ASSERT.asciidoc) diff --git a/sycl/include/CL/sycl/aspects.hpp b/sycl/include/CL/sycl/aspects.hpp index 80be943383fc1..0edce7775caa1 100644 --- a/sycl/include/CL/sycl/aspects.hpp +++ b/sycl/include/CL/sycl/aspects.hpp @@ -46,6 +46,7 @@ enum class aspect { atomic64 = 28, ext_intel_device_info_uuid = 29, ext_oneapi_srgb = 30, + ext_oneapi_native_assert = 31, }; } // namespace sycl diff --git a/sycl/include/CL/sycl/detail/assert_happened.hpp b/sycl/include/CL/sycl/detail/assert_happened.hpp new file mode 100644 index 0000000000000..d4172efcf3f78 --- /dev/null +++ b/sycl/include/CL/sycl/detail/assert_happened.hpp @@ -0,0 +1,42 @@ +//==------- assert_happened.hpp - Assert signalling structure --------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +#ifdef __SYCL_DEVICE_ONLY__ +// Reads Flag of AssertHappened on device +SYCL_EXTERNAL __attribute__((weak)) extern "C" void +__devicelib_assert_read(void *); +#endif + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace detail { +// NOTE Layout of this structure should be aligned with the one in +// libdevice/include/assert-happened.hpp +struct AssertHappened { + int Flag = 0; // set to non-zero upon assert failure + char Expr[256 + 1] = ""; + char File[256 + 1] = ""; + char Func[128 + 1] = ""; + + int32_t Line = 0; + + uint64_t GID0 = 0; + uint64_t GID1 = 0; + uint64_t GID2 = 0; + + uint64_t LID0 = 0; + uint64_t LID1 = 0; + uint64_t LID2 = 0; +}; +} // namespace detail +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 8349823df54ba..3ab941823e603 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -690,6 +690,12 @@ static const uint8_t PI_DEVICE_BINARY_OFFLOAD_KIND_SYCL = 4; #define __SYCL_PI_DEVICE_BINARY_TARGET_NVPTX64 "nvptx64" #define __SYCL_PI_DEVICE_BINARY_TARGET_AMDGCN "amdgcn" +/// Extension to denote native support of assert feature by an arbitrary device +/// piDeviceGetInfo call should return this extension when the device supports +/// native asserts if supported extensions' names are requested +#define PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT \ + "pi_ext_intel_devicelib_assert" + /// Device binary image property set names recognized by the SYCL runtime. /// Name must be consistent with /// PropertySetRegistry::SYCL_SPECIALIZATION_CONSTANTS defined in @@ -707,6 +713,7 @@ static const uint8_t PI_DEVICE_BINARY_OFFLOAD_KIND_SYCL = 4; #define __SYCL_PI_PROPERTY_SET_PROGRAM_METADATA "SYCL/program metadata" /// PropertySetRegistry::SYCL_MISC_PROP defined in PropertySetIO.h #define __SYCL_PI_PROPERTY_SET_SYCL_MISC_PROP "SYCL/misc properties" +#define __SYCL_PI_PROPERTY_SET_SYCL_ASSERT_USED "SYCL/assert used" /// Program metadata tags recognized by the PI backends. For kernels the tag /// must appear after the kernel name. @@ -927,6 +934,9 @@ __SYCL_EXPORT pi_result piDevicesGet(pi_platform platform, pi_uint32 num_entries, pi_device *devices, pi_uint32 *num_devices); +/// Returns requested info for provided native device +/// Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT for +/// PI_DEVICE_INFO_EXTENSIONS query when the device supports native asserts __SYCL_EXPORT pi_result piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t param_value_size, diff --git a/sycl/include/CL/sycl/detail/pi.hpp b/sycl/include/CL/sycl/detail/pi.hpp index 3651c67cf0e56..dcde4e1d807d7 100644 --- a/sycl/include/CL/sycl/detail/pi.hpp +++ b/sycl/include/CL/sycl/detail/pi.hpp @@ -361,6 +361,13 @@ class DeviceBinaryImage { const PropertyRange &getKernelParamOptInfo() const { return KernelParamOptInfo; } + const PropertyRange getAssertUsed() const { + // We can't have this variable as a class member, since it would break + // the ABI backwards compatibility. + PropertyRange AssertUsed; + AssertUsed.init(Bin, __SYCL_PI_PROPERTY_SET_SYCL_ASSERT_USED); + return AssertUsed; + } const PropertyRange &getProgramMetadata() const { return ProgramMetadata; } virtual ~DeviceBinaryImage() {} diff --git a/sycl/include/CL/sycl/event.hpp b/sycl/include/CL/sycl/event.hpp index 641d25f83ee02..70e70d13ad28b 100644 --- a/sycl/include/CL/sycl/event.hpp +++ b/sycl/include/CL/sycl/event.hpp @@ -133,7 +133,6 @@ class __SYCL_EXPORT event { return reinterpret_cast::type>( getNative()); } - private: event(std::shared_ptr EventImpl); diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index b5881d3a2b4d4..67008e57a4a82 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -2432,6 +2432,7 @@ class __SYCL_EXPORT handler { access::target); friend class ::MockHandler; + friend class detail::queue_impl; bool DisableRangeRounding(); diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 413bc49f29186..d50840e8f345e 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -9,6 +9,7 @@ #pragma once #include +#include #include #include #include @@ -20,6 +21,7 @@ #include #include +#include #include // having _TWO_ mid-param #ifdefs makes the functions very difficult to read. @@ -58,14 +60,29 @@ #define _KERNELFUNCPARAM(a) const KernelType &a #endif +// Helper macro to identify if fallback assert is needed +// FIXME remove __NVPTX__ condition once devicelib supports CUDA +#if !defined(SYCL_DISABLE_FALLBACK_ASSERT) && !defined(__NVPTX__) +#define __SYCL_USE_FALLBACK_ASSERT 1 +#else +#define __SYCL_USE_FALLBACK_ASSERT 0 +#endif + __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { // Forward declaration class context; class device; +class queue; + namespace detail { class queue_impl; +#if __SYCL_USE_FALLBACK_ASSERT +class AssertInfoCopier; +static event submitAssertCapture(queue &, event &, queue *, + const detail::code_location &); +#endif } /// Encapsulates a single SYCL queue which schedules kernels on a SYCL device. @@ -214,6 +231,7 @@ class __SYCL_EXPORT queue { template typename info::param_traits::return_type get_info() const; +public: /// Submits a command group function object to the queue, in order to be /// scheduled for execution on the device. /// @@ -223,7 +241,30 @@ class __SYCL_EXPORT queue { template event submit(T CGF _CODELOCPARAM(&CodeLoc)) { _CODELOCARG(&CodeLoc); - return submit_impl(CGF, CodeLoc); + event Event; + +#if __SYCL_USE_FALLBACK_ASSERT + if (!is_host()) { + auto PostProcess = [this, &CodeLoc](bool IsKernel, bool KernelUsesAssert, + event &E) { + if (IsKernel && !get_device().has(aspect::ext_oneapi_native_assert) && + KernelUsesAssert) { + // __devicelib_assert_fail isn't supported by Device-side Runtime + // Linking against fallback impl of __devicelib_assert_fail is + // performed by program manager class + submitAssertCapture(*this, E, /* SecondaryQueue = */ nullptr, + CodeLoc); + } + }; + + Event = submit_impl_and_postprocess(CGF, CodeLoc, PostProcess); + } else +#endif // __SYCL_USE_FALLBACK_ASSERT + { + Event = submit_impl(CGF, CodeLoc); + } + + return Event; } /// Submits a command group function object to the queue, in order to be @@ -241,7 +282,27 @@ class __SYCL_EXPORT queue { event submit(T CGF, queue &SecondaryQueue _CODELOCPARAM(&CodeLoc)) { _CODELOCARG(&CodeLoc); - return submit_impl(CGF, SecondaryQueue, CodeLoc); + event Event; + +#if __SYCL_USE_FALLBACK_ASSERT + auto PostProcess = [this, &SecondaryQueue, &CodeLoc]( + bool IsKernel, bool KernelUsesAssert, event &E) { + if (IsKernel && !get_device().has(aspect::ext_oneapi_native_assert) && + KernelUsesAssert) { + // __devicelib_assert_fail isn't supported by Device-side Runtime + // Linking against fallback impl of __devicelib_assert_fail is performed + // by program manager class + submitAssertCapture(*this, E, /* SecondaryQueue = */ nullptr, CodeLoc); + } + }; + + Event = + submit_impl_and_postprocess(CGF, SecondaryQueue, CodeLoc, PostProcess); +#else + Event = submit_impl(CGF, SecondaryQueue, CodeLoc); +#endif // __SYCL_USE_FALLBACK_ASSERT + + return Event; } /// Prevents any commands submitted afterward to this queue from executing @@ -944,6 +1005,11 @@ class __SYCL_EXPORT queue { template friend T detail::createSyclObjFromImpl(decltype(T::impl) ImplObj); +#if __SYCL_USE_FALLBACK_ASSERT + friend event detail::submitAssertCapture(queue &, event &, queue *, + const detail::code_location &); +#endif + /// A template-free version of submit. event submit_impl(std::function CGH, const detail::code_location &CodeLoc); @@ -951,6 +1017,33 @@ class __SYCL_EXPORT queue { event submit_impl(std::function CGH, queue secondQueue, const detail::code_location &CodeLoc); + // Function to postprocess submitted command + // Arguments: + // bool IsKernel - true if the submitted command was kernel, false otherwise + // bool KernelUsesAssert - true if submitted kernel uses assert, only + // meaningful when IsKernel is true + // event &Event - event after which post processing should be executed + using SubmitPostProcessF = std::function; + + /// A template-free version of submit. + /// \param CGH command group function/handler + /// \param CodeLoc code location + /// + /// This method stores additional information within event_impl class instance + event submit_impl_and_postprocess(function_class CGH, + const detail::code_location &CodeLoc, + const SubmitPostProcessF &PostProcess); + /// A template-free version of submit. + /// \param CGH command group function/handler + /// \param secondQueue fallback queue + /// \param CodeLoc code location + /// + /// This method stores additional information within event_impl class instance + event submit_impl_and_postprocess(function_class CGH, + queue secondQueue, + const detail::code_location &CodeLoc, + const SubmitPostProcessF &PostProcess); + /// parallel_for_impl with a kernel represented as a lambda + range that /// specifies global size only. /// @@ -1013,8 +1106,93 @@ class __SYCL_EXPORT queue { }, CodeLoc); } + + buffer &getAssertHappenedBuffer(); }; +namespace detail { +#if __SYCL_USE_FALLBACK_ASSERT +#define __SYCL_ASSERT_START 1 +/** + * Submit copy task for assert failure flag and host-task to check the flag + * \param Event kernel's event to depend on i.e. the event represents the + * kernel to check for assertion failure + * \param SecondaryQueue secondary queue for submit process, null if not used + * \returns host tasks event + * + * This method doesn't belong to queue class to overcome msvc behaviour due to + * which it gets compiled and exported without any integration header and, thus, + * with no proper KernelInfo instance. + */ +event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue, + const detail::code_location &CodeLoc) { + using AHBufT = buffer; + + AHBufT &Buffer = Self.getAssertHappenedBuffer(); + + event CopierEv, CheckerEv, PostCheckerEv; + auto CopierCGF = [&](handler &CGH) { + CGH.depends_on(Event); + + auto Acc = Buffer.get_access(CGH); + + CGH.single_task([Acc] { +#ifdef __SYCL_DEVICE_ONLY__ + __devicelib_assert_read(&Acc[0]); +#else + (void)Acc; +#endif // __SYCL_DEVICE_ONLY__ + }); + }; + auto CheckerCGF = [&CopierEv, &Buffer](handler &CGH) { + CGH.depends_on(CopierEv); + using mode = access::mode; + using target = access::target; + + auto Acc = Buffer.get_access(CGH); + + CGH.codeplay_host_task([=] { + const detail::AssertHappened *AH = &Acc[0]; + + // Don't use assert here as msvc will insert reference to __imp__wassert + // which won't be properly resolved in separate compile use-case +#ifndef NDEBUG + if (AH->Flag == __SYCL_ASSERT_START) + throw sycl::runtime_error( + "Internal Error. Invalid value in assert description.", + PI_INVALID_VALUE); +#endif + + if (AH->Flag) { + const char *Expr = AH->Expr[0] ? AH->Expr : ""; + const char *File = AH->File[0] ? AH->File : ""; + const char *Func = AH->Func[0] ? AH->Func : ""; + + fprintf(stderr, + "%s:%d: %s: global id: [%" PRIu64 ",%" PRIu64 ",%" PRIu64 + "], local id: [%" PRIu64 ",%" PRIu64 ",%" PRIu64 "] " + "Assertion `%s` failed.\n", + File, AH->Line, Func, AH->GID0, AH->GID1, AH->GID2, AH->LID0, + AH->LID1, AH->LID2, Expr); + abort(); // no need to release memory as it's abort anyway + } + }); + }; + + if (SecondaryQueue) { + CopierEv = Self.submit_impl(CopierCGF, *SecondaryQueue, CodeLoc); + CheckerEv = Self.submit_impl(CheckerCGF, *SecondaryQueue, CodeLoc); + } else { + CopierEv = Self.submit_impl(CopierCGF, CodeLoc); + CheckerEv = Self.submit_impl(CheckerCGF, CodeLoc); + } + + return CheckerEv; +} +#undef __SYCL_ASSERT_START +#endif // __SYCL_USE_FALLBACK_ASSERT +} // namespace detail + } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) @@ -1026,3 +1204,5 @@ template <> struct hash { } }; } // namespace std + +#undef __SYCL_USE_FALLBACK_ASSERT diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 8fccf15f647b2..31b7266323559 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1453,6 +1453,8 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_EXTENSIONS: { std::string SupportedExtensions = "cl_khr_fp64 "; + SupportedExtensions += PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT; + SupportedExtensions += " "; int major = 0; int minor = 0; diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 8446dbe7127a4..d988b0c291b6e 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -317,6 +317,8 @@ bool device_impl::has(aspect Aspect) const { return false; case aspect::ext_oneapi_srgb: return get_info(); + case aspect::ext_oneapi_native_assert: + return isAssertFailSupported(); default: throw runtime_error("This device aspect has not been implemented yet.", @@ -331,6 +333,14 @@ std::shared_ptr device_impl::getHostDeviceImpl() { return HostImpl; } +bool device_impl::isAssertFailSupported() const { + // assert is sort of natively supported by host + if (MIsHostDevice) + return true; + + return has_extension(PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT); +} + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 8de4bb1435229..51ea4f593de51 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -222,6 +222,8 @@ class device_impl { /// \return the host device_impl singleton static std::shared_ptr getHostDeviceImpl(); + bool isAssertFailSupported() const; + private: explicit device_impl(pi_native_handle InteropDevice, RT::PiDevice Device, PlatformImplPtr Platform, const plugin &Plugin); diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 40d30f7f7acc0..41880d4d2870a 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -351,6 +351,20 @@ pi_native_handle queue_impl::getNative() const { return Handle; } +bool queue_impl::kernelUsesAssert(const std::string &KernelName, + OSModuleHandle Handle) const { + RTDeviceBinaryImage &BinImg = ProgramManager::getInstance().getDeviceImage( + Handle, KernelName, get_context(), get_device()); + + const pi::DeviceBinaryImage::PropertyRange &AssertUsedRange = + BinImg.getAssertUsed(); + if (AssertUsedRange.isAvailable()) + for (const auto &Prop : AssertUsedRange) + if (Prop->Name == KernelName) + return true; + + return false; +} } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 031b893fd05bd..4b5fa9f494222 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -9,6 +9,7 @@ #pragma once #include +#include #include #include #include @@ -79,7 +80,8 @@ class queue_impl { queue_impl(const DeviceImplPtr &Device, const ContextImplPtr &Context, const async_handler &AsyncHandler, const property_list &PropList) : MDevice(Device), MContext(Context), MAsyncHandler(AsyncHandler), - MPropList(PropList), MHostQueue(MDevice->is_host()) { + MPropList(PropList), MHostQueue(MDevice->is_host()), + MAssertHappenedBuffer(range<1>{1}) { if (!Context->hasDevice(Device)) throw cl::sycl::invalid_parameter_error( "Queue cannot be constructed with the given context and device " @@ -102,7 +104,8 @@ class queue_impl { /// \param AsyncHandler is a SYCL asynchronous exception handler. queue_impl(RT::PiQueue PiQueue, const ContextImplPtr &Context, const async_handler &AsyncHandler) - : MContext(Context), MAsyncHandler(AsyncHandler), MHostQueue(false) { + : MContext(Context), MAsyncHandler(AsyncHandler), MHostQueue(false), + MAssertHappenedBuffer(range<1>{1}) { MQueues.push_back(pi::cast(PiQueue)); @@ -159,6 +162,8 @@ class queue_impl { template typename info::param_traits::return_type get_info() const; + using SubmitPostProcessF = std::function; + /// Submits a command group function object to the queue, in order to be /// scheduled for execution on the device. /// @@ -169,20 +174,22 @@ class queue_impl { /// \param Self is a shared_ptr to this queue. /// \param SecondQueue is a shared_ptr to the secondary queue. /// \param Loc is the code location of the submit call (default argument) + /// \param StoreAdditionalInfo makes additional info be stored in event_impl /// \return a SYCL event object, which corresponds to the queue the command /// group is being enqueued on. event submit(const std::function &CGF, const std::shared_ptr &Self, const std::shared_ptr &SecondQueue, - const detail::code_location &Loc) { + const detail::code_location &Loc, + const SubmitPostProcessF *PostProcess = nullptr) { try { - return submit_impl(CGF, Self, Loc); + return submit_impl(CGF, Self, Loc, PostProcess); } catch (...) { { std::lock_guard Lock(MMutex); MExceptions.PushBack(std::current_exception()); } - return SecondQueue->submit(CGF, SecondQueue, Loc); + return SecondQueue->submit(CGF, SecondQueue, Loc, PostProcess); } } @@ -192,11 +199,13 @@ class queue_impl { /// \param CGF is a function object containing command group. /// \param Self is a shared_ptr to this queue. /// \param Loc is the code location of the submit call (default argument) + /// \param StoreAdditionalInfo makes additional info be stored in event_impl /// \return a SYCL event object for the submitted command group. event submit(const std::function &CGF, const std::shared_ptr &Self, - const detail::code_location &Loc) { - return submit_impl(CGF, Self, Loc); + const detail::code_location &Loc, + const SubmitPostProcessF *PostProcess = nullptr) { + return submit_impl(CGF, Self, Loc, PostProcess); } /// Performs a blocking wait for the completion of all enqueued tasks in the @@ -393,6 +402,13 @@ class queue_impl { /// \return a native handle. pi_native_handle getNative() const; + bool kernelUsesAssert(const std::string &KernelName, + OSModuleHandle Handle) const; + + buffer &getAssertHappenedBuffer() { + return MAssertHappenedBuffer; + } + private: /// Performs command group submission to the queue. /// @@ -402,10 +418,12 @@ class queue_impl { /// \return a SYCL event representing submitted command group. event submit_impl(const std::function &CGF, const std::shared_ptr &Self, - const detail::code_location &Loc) { + const detail::code_location &Loc, + const SubmitPostProcessF *PostProcess) { handler Handler(Self, MHostQueue); Handler.saveCodeLoc(Loc); CGF(Handler); + // Scheduler will later omit events, that are not required to execute tasks. // Host and interop tasks, however, are not submitted to low-level runtimes // and require separate dependency management. @@ -413,9 +431,27 @@ class queue_impl { (Handler.getType() == CG::CGTYPE::CodeplayHostTask || Handler.getType() == CG::CGTYPE::CodeplayInteropTask)) Handler.depends_on(MLastEvent); - event Event = Handler.finalize(); + + event Event; + + if (PostProcess) { + bool IsKernel = Handler.getType() == CG::Kernel; + bool KernelUsesAssert = false; + if (IsKernel) + KernelUsesAssert = Handler.MKernel + ? true + : kernelUsesAssert(Handler.MKernelName, + Handler.MOSModuleHandle); + + Event = Handler.finalize(); + + (*PostProcess)(IsKernel, KernelUsesAssert, Event); + } else + Event = Handler.finalize(); + if (has_property()) MLastEvent = Event; + addEvent(Event); return Event; } @@ -473,6 +509,9 @@ class queue_impl { // The thread pool is instantiated upon the very first call to getThreadPool() std::unique_ptr MHostTaskThreadPool; + // Buffer to store assert failure descriptor + buffer MAssertHappenedBuffer; + event MLastEvent; }; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 4a6630c2a672e..c412a1ad547d5 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -173,7 +173,7 @@ class DispatchHostTask { ExecCGCommand *MThisCmd; std::vector MReqToMem; - void waitForEvents() const { + pi_result waitForEvents() const { std::map> RequiredEventsPerPlugin; @@ -189,14 +189,27 @@ class DispatchHostTask { // other available job and resume once all required events are ready. for (auto &PluginWithEvents : RequiredEventsPerPlugin) { std::vector RawEvents = getPiEvents(PluginWithEvents.second); - PluginWithEvents.first->call(RawEvents.size(), - RawEvents.data()); + try { + PluginWithEvents.first->call(RawEvents.size(), + RawEvents.data()); + } catch (const sycl::exception &E) { + CGHostTask &HostTask = static_cast(MThisCmd->getCG()); + HostTask.MQueue->reportAsyncException(std::current_exception()); + return (pi_result)E.get_cl_code(); + } catch (...) { + CGHostTask &HostTask = static_cast(MThisCmd->getCG()); + HostTask.MQueue->reportAsyncException(std::current_exception()); + return PI_ERROR_UNKNOWN; + } } - // wait for dependency host events + // Wait for dependency host events. + // Host events can't throw exceptions so don't try to catch it. for (const EventImplPtr &Event : MThisCmd->MPreparedHostDepsEvents) { Event->waitInternal(); } + + return PI_SUCCESS; } public: @@ -205,12 +218,22 @@ class DispatchHostTask { : MThisCmd{ThisCmd}, MReqToMem(std::move(ReqToMem)) {} void operator()() const { - waitForEvents(); - assert(MThisCmd->getCG().getType() == CG::CGTYPE::CodeplayHostTask); CGHostTask &HostTask = static_cast(MThisCmd->getCG()); + pi_result WaitResult = waitForEvents(); + if (WaitResult != PI_SUCCESS) { + std::exception_ptr EPtr = std::make_exception_ptr(sycl::runtime_error( + std::string("Couldn't wait for host-task's dependencies"), + WaitResult)); + HostTask.MQueue->reportAsyncException(EPtr); + + // reset host-task's lambda and quit + HostTask.MHostTask.reset(); + return; + } + try { // we're ready to call the user-defined lambda now if (HostTask.MHostTask->isInteropTask()) { diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index a8010990d6c1d..4edd0a9eebd25 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -763,6 +763,7 @@ class Scheduler { friend class Command; friend class DispatchHostTask; + friend class queue_impl; /// Stream buffers structure. /// diff --git a/sycl/source/event.cpp b/sycl/source/event.cpp index a777a1c66d088..a6fa29fe6a7f3 100644 --- a/sycl/source/event.cpp +++ b/sycl/source/event.cpp @@ -89,6 +89,5 @@ event::event(std::shared_ptr event_impl) backend event::get_backend() const noexcept { return getImplBackend(impl); } pi_native_handle event::getNative() const { return impl->getNative(); } - } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 72e9cf42e0b27..202c4ec7ea8d6 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -136,13 +136,15 @@ event handler::finalize() { switch (getType()) { case detail::CG::Kernel: case detail::CG::RunOnHostIntel: { + // Copy kernel name here instead of move so that it's available after + // running of this method by reductions implementation. This allows for + // assert feature to check if kernel uses assertions CommandGroup.reset(new detail::CGExecKernel( std::move(MNDRDesc), std::move(MHostKernel), std::move(MKernel), std::move(MArgsStorage), std::move(MAccStorage), std::move(MSharedPtrStorage), std::move(MRequirements), - std::move(MEvents), std::move(MArgs), std::move(MKernelName), - std::move(MOSModuleHandle), std::move(MStreamStorage), MCGType, - MCodeLoc)); + std::move(MEvents), std::move(MArgs), MKernelName, MOSModuleHandle, + std::move(MStreamStorage), MCGType, MCodeLoc)); break; } case detail::CG::CodeplayInteropTask: diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 298068ee45201..50031313412a9 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -132,6 +132,19 @@ event queue::submit_impl(std::function CGH, queue SecondQueue, return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc); } +event queue::submit_impl_and_postprocess( + function_class CGH, const detail::code_location &CodeLoc, + const SubmitPostProcessF &PostProcess) { + return impl->submit(CGH, impl, CodeLoc, &PostProcess); +} + +event queue::submit_impl_and_postprocess( + function_class CGH, queue SecondQueue, + const detail::code_location &CodeLoc, + const SubmitPostProcessF &PostProcess) { + return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc, &PostProcess); +} + void queue::wait_proxy(const detail::code_location &CodeLoc) { impl->wait(CodeLoc); } @@ -175,5 +188,8 @@ backend queue::get_backend() const noexcept { return getImplBackend(impl); } pi_native_handle queue::getNative() const { return impl->getNative(); } +buffer &queue::getAssertHappenedBuffer() { + return impl->getAssertHappenedBuffer(); +} } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 02150506d70af..48df1d6dd9ff1 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3665,6 +3665,9 @@ _ZN2cl4sycl5queue11submit_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code _ZN2cl4sycl5queue11submit_implESt8functionIFvRNS0_7handlerEEES1_RKNS0_6detail13code_locationE _ZN2cl4sycl5queue18throw_asynchronousEv _ZN2cl4sycl5queue20wait_and_throw_proxyERKNS0_6detail13code_locationE +_ZN2cl4sycl5queue23getAssertHappenedBufferEv +_ZN2cl4sycl5queue27submit_impl_and_postprocessESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationERKS2_IFvbbRNS0_5eventEEE +_ZN2cl4sycl5queue27submit_impl_and_postprocessESt8functionIFvRNS0_7handlerEEES1_RKNS0_6detail13code_locationERKS2_IFvbbRNS0_5eventEEE _ZN2cl4sycl5queue6memcpyEPvPKvm _ZN2cl4sycl5queue6memcpyEPvPKvmNS0_5eventE _ZN2cl4sycl5queue6memcpyEPvPKvmRKSt6vectorINS0_5eventESaIS6_EE diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 05058379b09d2..dc854bf1395eb 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -2022,6 +2022,7 @@ ?get@platform@sycl@cl@@QEBAPEAU_cl_platform_id@@XZ ?get@program@sycl@cl@@QEBAPEAU_cl_program@@XZ ?get@queue@sycl@cl@@QEBAPEAU_cl_command_queue@@XZ +?getAssertHappenedBuffer@queue@sycl@cl@@AEAAAEAV?$buffer@UAssertHappened@detail@sycl@cl@@$00V?$aligned_allocator@D@234@X@23@XZ ?getBorderColor@detail@sycl@cl@@YA?AV?$vec@M$03@23@W4image_channel_order@23@@Z ?getBufSizeForContext@SYCLMemObjT@detail@sycl@cl@@SA_KAEBV?$shared_ptr@Vcontext_impl@detail@sycl@cl@@@std@@PEAU_cl_mem@@@Z ?getChannelOrder@?$image_impl@$00@detail@sycl@cl@@QEBA?AW4image_channel_order@34@XZ @@ -3981,6 +3982,8 @@ ?submit_barrier@queue@sycl@cl@@QEAA?AVevent@23@AEBV?$vector@Vevent@sycl@cl@@V?$allocator@Vevent@sycl@cl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?submit_impl@queue@sycl@cl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@sycl@cl@@@Z@std@@AEBUcode_location@detail@23@@Z ?submit_impl@queue@sycl@cl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@sycl@cl@@@Z@std@@V123@AEBUcode_location@detail@23@@Z +?submit_impl_and_postprocess@queue@sycl@cl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@sycl@cl@@@Z@std@@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@sycl@cl@@@Z@6@@Z +?submit_impl_and_postprocess@queue@sycl@cl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@sycl@cl@@@Z@std@@V123@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@sycl@cl@@@Z@6@@Z ?sycl_category@sycl@cl@@YAAEBVerror_category@std@@XZ ?tan@__host_std@cl@@YA?AV?$vec@M$00@sycl@2@V342@@Z ?tan@__host_std@cl@@YA?AV?$vec@M$01@sycl@2@V342@@Z diff --git a/sycl/test/check_device_code/atomic_fence.cpp b/sycl/test/check_device_code/atomic_fence.cpp index df4b1a1a4c190..0dc46e11072cc 100644 --- a/sycl/test/check_device_code/atomic_fence.cpp +++ b/sycl/test/check_device_code/atomic_fence.cpp @@ -6,19 +6,19 @@ int main() { sycl::queue Q; Q.single_task([] { - // CHECK: tail call spir_func void @_Z21__spirv_MemoryBarrierjj(i32 2, i32 896) #2 + // CHECK: tail call spir_func void @_Z21__spirv_MemoryBarrierjj(i32 2, i32 896) #{{.*}} sycl::atomic_fence(sycl::memory_order::relaxed, sycl::memory_scope::work_group); - // CHECK: tail call spir_func void @_Z21__spirv_MemoryBarrierjj(i32 2, i32 898) #2 + // CHECK: tail call spir_func void @_Z21__spirv_MemoryBarrierjj(i32 2, i32 898) #{{.*}} sycl::atomic_fence(sycl::memory_order::acquire, sycl::memory_scope::work_group); - // CHECK: tail call spir_func void @_Z21__spirv_MemoryBarrierjj(i32 2, i32 900) #2 + // CHECK: tail call spir_func void @_Z21__spirv_MemoryBarrierjj(i32 2, i32 900) #{{.*}} sycl::atomic_fence(sycl::memory_order::release, sycl::memory_scope::work_group); - // CHECK: tail call spir_func void @_Z21__spirv_MemoryBarrierjj(i32 2, i32 904) #2 + // CHECK: tail call spir_func void @_Z21__spirv_MemoryBarrierjj(i32 2, i32 904) #{{.*}} sycl::atomic_fence(sycl::memory_order::acq_rel, sycl::memory_scope::work_group); - // CHECK: tail call spir_func void @_Z21__spirv_MemoryBarrierjj(i32 2, i32 912) #2 + // CHECK: tail call spir_func void @_Z21__spirv_MemoryBarrierjj(i32 2, i32 912) #{{.*}} sycl::atomic_fence(sycl::memory_order::seq_cst, sycl::memory_scope::work_group); }); diff --git a/sycl/unittests/CMakeLists.txt b/sycl/unittests/CMakeLists.txt index 8120f8ee44599..61aa3fae3e19c 100644 --- a/sycl/unittests/CMakeLists.txt +++ b/sycl/unittests/CMakeLists.txt @@ -7,6 +7,8 @@ foreach(flag_var string(REGEX REPLACE "/MT" "/MD" ${flag_var} "${${flag_var}}") endforeach() +string(TOLOWER "${CMAKE_BUILD_TYPE}" build_type_lower) + include(AddSYCLUnitTest) add_subdirectory(allowlist) @@ -18,3 +20,4 @@ add_subdirectory(scheduler) add_subdirectory(SYCL2020) add_subdirectory(thread_safety) add_subdirectory(program_manager) +add_subdirectory(assert) diff --git a/sycl/unittests/assert/CMakeLists.txt b/sycl/unittests/assert/CMakeLists.txt new file mode 100644 index 0000000000000..ebed0c40378ea --- /dev/null +++ b/sycl/unittests/assert/CMakeLists.txt @@ -0,0 +1,4 @@ +add_sycl_unittest(AssertTests OBJECT + assert.cpp +) + diff --git a/sycl/unittests/assert/assert.cpp b/sycl/unittests/assert/assert.cpp new file mode 100644 index 0000000000000..4d583d7588610 --- /dev/null +++ b/sycl/unittests/assert/assert.cpp @@ -0,0 +1,393 @@ +//==---------- assert.cpp --- Check assert helpers enqueue -----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/* + * This test checks that assert fallback assert feature works well. + * According to the doc, when assert is triggered on device host application + * should abort. That said, a standard `abort()` function is to be called. The + * function makes sure the app terminates due `SIGABRT` signal. This makes it + * impossible to verify the feature in uni-process environment. Hence, we employ + * multi-process envirnment i.e. we call a `fork()`. The child process is should + * abort and the parent process verifies it and checks that child prints correct + * error message to `stderr`. Verification of `stderr` output is performed via + * pipe. + */ + +#include + +#include +#include +#include + +#include + +#ifndef _WIN32 +#include +#include +#endif // _WIN32 + +class TestKernel; + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace detail { +template <> struct KernelInfo { + static constexpr unsigned getNumParams() { return 0; } + static const kernel_param_desc_t &getParamDesc(int) { + static kernel_param_desc_t Dummy; + return Dummy; + } + static constexpr const char *getName() { return "TestKernel"; } + static constexpr bool isESIMD() { return false; } + static constexpr bool callsThisItem() { return false; } + static constexpr bool callsAnyThisFreeFunction() { return false; } +}; + +static constexpr const kernel_param_desc_t Signatures[] = { + {kernel_param_kind_t::kind_accessor, 4062, 0}}; + +template <> struct KernelInfo<::sycl::detail::AssertInfoCopier> { + static constexpr const char *getName() { + return "_ZTSN2cl4sycl6detail16AssertInfoCopierE"; + } + static constexpr unsigned getNumParams() { return 1; } + static constexpr const kernel_param_desc_t &getParamDesc(unsigned Idx) { + assert(!Idx); + return Signatures[Idx]; + } + static constexpr bool isESIMD() { return 0; } + static constexpr bool callsThisItem() { return 0; } + static constexpr bool callsAnyThisFreeFunction() { return 0; } +}; +} // namespace detail +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) + +static sycl::unittest::PiImage generateDefaultImage() { + using namespace sycl::unittest; + + static const std::string KernelName = "TestKernel"; + static const std::string CopierKernelName = + "_ZTSN2cl4sycl6detail16AssertInfoCopierE"; + + PiPropertySet PropSet; + + setKernelUsesAssert({KernelName}, PropSet); + + std::vector Bin{0, 1, 2, 3, 4, 5}; // Random data + + PiArray Entries = makeEmptyKernels({KernelName}); + + PiImage Img{PI_DEVICE_BINARY_TYPE_SPIRV, // Format + __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec + "", // Compile options + "", // Link options + std::move(Bin), + std::move(Entries), + std::move(PropSet)}; + + return Img; +} + +static sycl::unittest::PiImage generateCopierKernelImage() { + using namespace sycl::unittest; + + static const std::string CopierKernelName = + "_ZTSN2cl4sycl6detail16AssertInfoCopierE"; + + PiPropertySet PropSet; + + std::vector Bin{10, 11, 12, 13, 14, 15}; // Random data + + PiArray Entries = makeEmptyKernels({CopierKernelName}); + + PiImage Img{PI_DEVICE_BINARY_TYPE_SPIRV, // Format + __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec + "", // Compile options + "", // Link options + std::move(Bin), + std::move(Entries), + std::move(PropSet)}; + + return Img; +} + +sycl::unittest::PiImage Imgs[] = {generateDefaultImage(), + generateCopierKernelImage()}; +sycl::unittest::PiImageArray<2> ImgArray{Imgs}; + +struct AssertHappened { + int Flag = 0; + char Expr[256 + 1] = ""; + char File[256 + 1] = ""; + char Func[128 + 1] = ""; + + int32_t Line = 0; + + uint64_t GID0 = 0; + uint64_t GID1 = 0; + uint64_t GID2 = 0; + + uint64_t LID0 = 0; + uint64_t LID1 = 0; + uint64_t LID2 = 0; +}; + +// This should not be modified. +// Substituted in memory map operation. +static AssertHappened ExpectedToOutput = { + 2, // assert copying done + "TestExpression", + "TestFile", + "TestFunc", + 123, // line + + 0, // global id + 1, // global id + 2, // global id + 3, // local id + 4, // local id + 5 // local id +}; + +static constexpr int KernelLaunchCounterBase = 0; +static int KernelLaunchCounter = KernelLaunchCounterBase; +static constexpr int MemoryMapCounterBase = 1000; +static int MemoryMapCounter = MemoryMapCounterBase; +static constexpr int PauseWaitOnIdx = KernelLaunchCounterBase + 1; + +// Mock redifinitions +static pi_result redefinedKernelGetGroupInfo(pi_kernel kernel, pi_device device, + pi_kernel_group_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret) { + if (param_name == PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE) { + if (param_value_size_ret) { + *param_value_size_ret = 3 * sizeof(size_t); + } else if (param_value) { + auto size = static_cast(param_value); + size[0] = 1; + size[1] = 1; + size[2] = 1; + } + } + + return PI_SUCCESS; +} + +static pi_result redefinedEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, + const size_t *, const size_t *, + const size_t *LocalSize, + pi_uint32 N, const pi_event *Deps, + pi_event *RetEvent) { + int *Ret = new int[1]; + *Ret = KernelLaunchCounter++; + // This output here is to reduce amount of time requried to debug/reproduce a + // failing test upon feature break + printf("Enqueued %i\n", *Ret); + + if (PauseWaitOnIdx == *Ret) { + // It should be copier kernel. Check if it depends on user's one. + EXPECT_EQ(N, 1U); + int EventIdx = reinterpret_cast(Deps[0])[0]; + EXPECT_EQ(EventIdx, 0); + } + + *RetEvent = reinterpret_cast(Ret); + return PI_SUCCESS; +} + +static pi_result redefinedEventsWait(pi_uint32 num_events, + const pi_event *event_list) { + // there should be two events: one is for memory map and the other is for + // copier kernel + assert(num_events == 2); + + int EventIdx1 = reinterpret_cast(event_list[0])[0]; + int EventIdx2 = reinterpret_cast(event_list[1])[0]; + // This output here is to reduce amount of time requried to debug/reproduce a + // failing test upon feature break + printf("Waiting for events %i, %i\n", EventIdx1, EventIdx2); + return PI_SUCCESS; +} + +static pi_result +redefinedMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size, + void *host_ptr, pi_mem *ret_mem, + const pi_mem_properties *properties = nullptr) { + *ret_mem = nullptr; + return PI_SUCCESS; +} + +static pi_result redefinedMemRelease(pi_mem mem) { return PI_SUCCESS; } + +static pi_result redefinedKernelSetArg(pi_kernel kernel, pi_uint32 arg_index, + size_t arg_size, const void *arg_value) { + return PI_SUCCESS; +} + +static pi_result redefinedEnqueueMemBufferMap( + pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, + pi_map_flags map_flags, size_t offset, size_t size, + pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, + pi_event *RetEvent, void **RetMap) { + int *Ret = new int[1]; + *Ret = MemoryMapCounter++; + // This output here is to reduce amount of time requried to debug/reproduce a + // failing test upon feature break + printf("Memory map %i\n", *Ret); + *RetEvent = reinterpret_cast(Ret); + + *RetMap = (void *)&ExpectedToOutput; + + return PI_SUCCESS; +} + +static pi_result redefinedExtKernelSetArgMemObj(pi_kernel kernel, + pi_uint32 arg_index, + const pi_mem *arg_value) { + return PI_SUCCESS; +} + +static void setupMock(sycl::unittest::PiMock &Mock) { + using namespace sycl::detail; + setupDefaultMockAPIs(Mock); + + Mock.redefine(redefinedKernelGetGroupInfo); + Mock.redefine(redefinedEnqueueKernelLaunch); + Mock.redefine(redefinedMemBufferCreate); + Mock.redefine(redefinedMemRelease); + Mock.redefine(redefinedKernelSetArg); + Mock.redefine(redefinedEnqueueMemBufferMap); + Mock.redefine(redefinedEventsWait); + Mock.redefine( + redefinedExtKernelSetArgMemObj); +} + +#ifndef _WIN32 +void ChildProcess(int StdErrFD) { + static constexpr int StandardStdErrFD = 2; + if (dup2(StdErrFD, StandardStdErrFD) < 0) { + printf("Can't duplicate stderr fd for %i: %s\n", StdErrFD, strerror(errno)); + exit(1); + } + + sycl::platform Plt{sycl::default_selector()}; + + sycl::unittest::PiMock Mock{Plt}; + + setupMock(Mock); + + const sycl::device Dev = Plt.get_devices()[0]; + sycl::queue Queue{Dev}; + + const sycl::context Ctx = Queue.get_context(); + + sycl::kernel_bundle KernelBundle = + sycl::get_kernel_bundle(Ctx, {Dev}); + auto ExecBundle = sycl::build(KernelBundle); + printf("Child process launching kernel\n"); + Queue.submit([&](sycl::handler &H) { + H.use_kernel_bundle(ExecBundle); + H.single_task([] {}); + }); + printf("Child process waiting on the queue\n"); + Queue.wait(); + printf("Child process done waiting on the queue. That's unexpected\n"); + exit(1); +} + +void ParentProcess(int ChildPID, int ChildStdErrFD) { + static constexpr char StandardMessage[] = + "TestFile:123: TestFunc: global id:" + " [0,1,2], local id: [3,4,5] Assertion `TestExpression` failed."; + + int Status = 0; + + printf("Parent process waiting for child %i\n", ChildPID); + + waitpid(ChildPID, &Status, /*options = */ 0); + + int SigNum = WTERMSIG(Status); + + // Fetch number of unread bytes in pipe + int PipeUnread = 0; + if (ioctl(ChildStdErrFD, FIONREAD, &PipeUnread) < 0) { + perror("Couldn't fetch pipe size: "); + exit(1); + } + + std::vector Buf(PipeUnread + 1, '\0'); + + // Read the pipe contents + { + size_t TotalReadCnt = 0; + + while (TotalReadCnt < static_cast(PipeUnread)) { + ssize_t ReadCnt = read(ChildStdErrFD, Buf.data() + TotalReadCnt, + PipeUnread - TotalReadCnt); + + if (ReadCnt < 0) { + perror("Couldn't read from pipe"); + exit(1); + } + + TotalReadCnt += ReadCnt; + } + } + + std::string BufStr(Buf.data()); + + printf("Status: %i, Signal: %i, Buffer: >>> %s <<<\n", Status, SigNum, + Buf.data()); + + EXPECT_EQ(!!WIFSIGNALED(Status), true); + EXPECT_EQ(SigNum, SIGABRT); + EXPECT_NE(BufStr.find(StandardMessage), std::string::npos); +} +#endif // _WIN32 + +TEST(Assert, TestPositive) { + // Preliminary checks + { + sycl::platform Plt{sycl::default_selector()}; + if (Plt.is_host()) { + printf("Test is not supported on host, skipping\n"); + return; + } + + if (Plt.get_backend() == sycl::backend::cuda) { + printf("Test is not supported on CUDA platform, skipping\n"); + return; + } + } + +#ifndef _WIN32 + static constexpr int ReadFDIdx = 0; + static constexpr int WriteFDIdx = 1; + int PipeFD[2]; + + if (pipe(PipeFD) < 0) { + perror("Failed to create pipe for stderr: "); + exit(1); + } + + int ChildPID = fork(); + + if (ChildPID) { + close(PipeFD[WriteFDIdx]); + ParentProcess(ChildPID, PipeFD[ReadFDIdx]); + close(PipeFD[ReadFDIdx]); + } else { + close(PipeFD[ReadFDIdx]); + ChildProcess(PipeFD[WriteFDIdx]); + close(PipeFD[WriteFDIdx]); + } +#endif // _WIN32 +} diff --git a/sycl/unittests/helpers/PiImage.hpp b/sycl/unittests/helpers/PiImage.hpp index e996e4cbe2d6e..56101a7059d56 100644 --- a/sycl/unittests/helpers/PiImage.hpp +++ b/sycl/unittests/helpers/PiImage.hpp @@ -355,6 +355,15 @@ inline PiProperty makeSpecConstant(std::vector &ValData, return Prop; } +/// Utility function to mark kernel as the one using assert +inline void setKernelUsesAssert(const std::vector &Names, + PiPropertySet &Set) { + PiArray Value; + for (const std::string &N : Names) + Value.push_back({N, {4, 0}, PI_PROPERTY_TYPE_UINT32}); + Set.insert(__SYCL_PI_PROPERTY_SET_SYCL_ASSERT_USED, std::move(Value)); +} + /// Utility function to add specialization constants to property set. /// /// This function overrides the default spec constant values.