From 7b3873c7b08f06b2c13190dd027c79392a46607f Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Fri, 2 May 2025 07:34:47 -0700 Subject: [PATCH 01/25] [SYCL] Implement loading SYCLBIN into kernel_bundle This commit implements the functionality for loading SYCLBIN files into kernel bundles. This is done by mimicing the structure of regular device binaries, then letting the existing functionality handle compiling and linking. This implements part of the sycl_ext_oneapi_syclbin extension. Signed-off-by: Larsen, Steffen --- .../experimental/syclbin_kernel_bundle.hpp | 78 ++++ sycl/include/sycl/kernel_bundle.hpp | 7 +- sycl/include/sycl/sycl.hpp | 1 + sycl/source/CMakeLists.txt | 1 + sycl/source/detail/base64.hpp | 121 ++++++ sycl/source/detail/device_binary_image.cpp | 32 +- sycl/source/detail/device_binary_image.hpp | 4 +- sycl/source/detail/device_image_impl.hpp | 92 ++-- sycl/source/detail/kernel_bundle_impl.hpp | 92 +++- .../program_manager/program_manager.cpp | 60 ++- .../program_manager/program_manager.hpp | 6 + sycl/source/detail/property_set_io.hpp | 360 ++++++++++++++++ sycl/source/detail/syclbin.cpp | 392 ++++++++++++++++++ sycl/source/detail/syclbin.hpp | 161 +++++++ sycl/source/kernel_bundle.cpp | 6 + sycl/test-e2e/SYCLBIN/Inputs/basic.hpp | 47 +++ sycl/test-e2e/SYCLBIN/Inputs/basic_kernel.cpp | 10 + sycl/test-e2e/SYCLBIN/Inputs/common.hpp | 54 +++ .../SYCLBIN/Inputs/exporting_function.cpp | 6 + .../SYCLBIN/Inputs/importing_kernel.cpp | 11 + sycl/test-e2e/SYCLBIN/Inputs/link.hpp | 60 +++ sycl/test-e2e/SYCLBIN/Inputs/link_rtc.hpp | 79 ++++ sycl/test-e2e/SYCLBIN/Inputs/lit.cfg.py | 2 + .../Inputs/optional_kernel_features.cpp | 15 + .../Inputs/optional_kernel_features.hpp | 68 +++ sycl/test-e2e/SYCLBIN/basic_executable.cpp | 20 + sycl/test-e2e/SYCLBIN/basic_input.cpp | 20 + sycl/test-e2e/SYCLBIN/basic_object.cpp | 20 + sycl/test-e2e/SYCLBIN/link_input.cpp | 24 ++ sycl/test-e2e/SYCLBIN/link_object.cpp | 24 ++ sycl/test-e2e/SYCLBIN/link_rtc_input.cpp | 24 ++ sycl/test-e2e/SYCLBIN/link_rtc_object.cpp | 24 ++ .../optional_kernel_features_executable.cpp | 20 + .../optional_kernel_features_input.cpp | 20 + .../optional_kernel_features_object.cpp | 20 + sycl/test/abi/sycl_symbols_linux.dump | 1 + .../no_sycl_hpp_in_e2e_tests.cpp | 2 +- 37 files changed, 1886 insertions(+), 98 deletions(-) create mode 100644 sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp create mode 100644 sycl/source/detail/base64.hpp create mode 100644 sycl/source/detail/property_set_io.hpp create mode 100644 sycl/source/detail/syclbin.cpp create mode 100644 sycl/source/detail/syclbin.hpp create mode 100644 sycl/test-e2e/SYCLBIN/Inputs/basic.hpp create mode 100644 sycl/test-e2e/SYCLBIN/Inputs/basic_kernel.cpp create mode 100644 sycl/test-e2e/SYCLBIN/Inputs/common.hpp create mode 100644 sycl/test-e2e/SYCLBIN/Inputs/exporting_function.cpp create mode 100644 sycl/test-e2e/SYCLBIN/Inputs/importing_kernel.cpp create mode 100644 sycl/test-e2e/SYCLBIN/Inputs/link.hpp create mode 100644 sycl/test-e2e/SYCLBIN/Inputs/link_rtc.hpp create mode 100644 sycl/test-e2e/SYCLBIN/Inputs/lit.cfg.py create mode 100644 sycl/test-e2e/SYCLBIN/Inputs/optional_kernel_features.cpp create mode 100644 sycl/test-e2e/SYCLBIN/Inputs/optional_kernel_features.hpp create mode 100644 sycl/test-e2e/SYCLBIN/basic_executable.cpp create mode 100644 sycl/test-e2e/SYCLBIN/basic_input.cpp create mode 100644 sycl/test-e2e/SYCLBIN/basic_object.cpp create mode 100644 sycl/test-e2e/SYCLBIN/link_input.cpp create mode 100644 sycl/test-e2e/SYCLBIN/link_object.cpp create mode 100644 sycl/test-e2e/SYCLBIN/link_rtc_input.cpp create mode 100644 sycl/test-e2e/SYCLBIN/link_rtc_object.cpp create mode 100644 sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp create mode 100644 sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp create mode 100644 sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp diff --git a/sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp b/sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp new file mode 100644 index 0000000000000..eb1bbf099d96d --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp @@ -0,0 +1,78 @@ +//==---- syclbin_kernel_bundle.hpp - SYCLBIN-based kernel_bundle tooling ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +#include +#include +#include + +#if __has_include() +#include +#endif + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { + +template +std::enable_if_t> +get_kernel_bundle(const context &Ctxt, const std::vector &Devs, + const sycl::span &Bytes, PropertyListT = {}) { + std::vector UniqueDevices = + sycl::detail::removeDuplicateDevices(Devs); + + sycl::detail::KernelBundleImplPtr Impl = + sycl::detail::get_kernel_bundle_impl(Ctxt, UniqueDevices, Bytes, State); + return sycl::detail::createSyclObjFromImpl>(Impl); +} + +#if __cpp_lib_span +template +std::enable_if_t> +get_kernel_bundle(const context &Ctxt, const std::vector &Devs, + const std::span &Bytes, PropertyListT Props = {}) { + return experimental::get_kernel_bundle( + Ctxt, Devs, sycl::span(Bytes.data(), Bytes.size()), Props); +} +#endif + +template +std::enable_if_t> +get_kernel_bundle(const context &Ctxt, const std::vector &Devs, + const std::filesystem::path &Filename, + PropertyListT Props = {}) { + std::vector RawSYCLBINData; + { + std::ifstream FileStream{Filename, std::ios::binary}; + if (!FileStream.is_open()) + throw sycl::exception(make_error_code(errc::invalid), + "Failed to open SYCLBIN file: " + + static_cast(Filename)); + RawSYCLBINData = + std::vector{std::istreambuf_iterator(FileStream), + std::istreambuf_iterator()}; + } + return experimental::get_kernel_bundle( + Ctxt, Devs, sycl::span{RawSYCLBINData}, Props); +} + +template +std::enable_if_t> +get_kernel_bundle(const context &Ctxt, const std::filesystem::path &Filename, + PropertyListT Props = {}) { + return experimental::get_kernel_bundle(Ctxt, Ctxt.get_devices(), + Filename, Props); +} + +} // namespace ext::oneapi::experimental +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp index 803504d21f585..eece0db7e17bc 100644 --- a/sycl/include/sycl/kernel_bundle.hpp +++ b/sycl/include/sycl/kernel_bundle.hpp @@ -19,7 +19,8 @@ #include // for kernel, kernel_bundle #include // for bundle_state #include // for property_list -#include // for ur_native_handle_t +#include +#include #include #include // PropertyT @@ -639,6 +640,10 @@ __SYCL_EXPORT detail::KernelBundleImplPtr get_kernel_bundle_impl(const context &Ctx, const std::vector &Devs, bundle_state State); +__SYCL_EXPORT detail::KernelBundleImplPtr +get_kernel_bundle_impl(const context &Ctx, const std::vector &Devs, + const sycl::span &Bytes, bundle_state State); + __SYCL_EXPORT const std::vector removeDuplicateDevices(const std::vector &Devs); diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index 2d0ca6f9183aa..c5806e591b6dd 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -104,6 +104,7 @@ #include #include #include +#include #include #include #include diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 94d694b71e31e..52011bd88c4b2 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -300,6 +300,7 @@ set(SYCL_COMMON_SOURCES "detail/reduction.cpp" "detail/sampler_impl.cpp" "detail/stream_impl.cpp" + "detail/syclbin.cpp" "detail/scheduler/commands.cpp" "detail/scheduler/leaves_collection.cpp" "detail/scheduler/scheduler.cpp" diff --git a/sycl/source/detail/base64.hpp b/sycl/source/detail/base64.hpp new file mode 100644 index 0000000000000..af729b27afaf6 --- /dev/null +++ b/sycl/source/detail/base64.hpp @@ -0,0 +1,121 @@ +//===--- Base64.h - Base64 Encoder/Decoder ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Adjusted copy of llvm/include/llvm/Support/Base64.h. +// TODO: Remove once we can consistently link the SYCL runtime library with +// LLVMSupport. + +#pragma once + +#include +#include +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + +class Base64 { +private: + // Decode a single character. + static inline int decode(char Ch) { + if (Ch >= 'A' && Ch <= 'Z') // 0..25 + return Ch - 'A'; + else if (Ch >= 'a' && Ch <= 'z') // 26..51 + return Ch - 'a' + 26; + else if (Ch >= '0' && Ch <= '9') // 52..61 + return Ch - '0' + 52; + else if (Ch == '+') // 62 + return 62; + else if (Ch == '/') // 63 + return 63; + return -1; + } + + // Decode a quadruple of characters. + static inline void decode4(const char *Src, byte *Dst) { + int BadCh = -1; + + for (auto I = 0; I < 4; ++I) { + char Ch = Src[I]; + int Byte = decode(Ch); + + if (Byte < 0) { + BadCh = Ch; + break; + } + Dst[I] = (byte)Byte; + } + if (BadCh != -1) + throw sycl::exception(make_error_code(errc::invalid), + "Invalid char in base 64 encoding."); + } + +public: + using byte = uint8_t; + + // Get the size of the encoded byte sequence of given size. + static size_t getDecodedSize(size_t SrcSize) { return (SrcSize * 3 + 3) / 4; } + + // Decode a sequence of given size into a pre-allocated memory. + // Returns the number of bytes in the decoded result or 0 in case of error. + static size_t decode(const char *Src, byte *Dst, size_t SrcSize) { + size_t SrcOff = 0; + size_t DstOff = 0; + + // decode full quads + for (size_t Qch = 0; Qch < SrcSize / 4; ++Qch, SrcOff += 4, DstOff += 3) { + byte Ch[4]; + decode4(Src + SrcOff, Ch); + + // each quad of chars produces three bytes of output + Dst[DstOff + 0] = Ch[0] | (Ch[1] << 6); + Dst[DstOff + 1] = (Ch[1] >> 2) | (Ch[2] << 4); + Dst[DstOff + 2] = (Ch[2] >> 4) | (Ch[3] << 2); + } + auto RemChars = SrcSize - SrcOff; + + if (RemChars == 0) + return DstOff; + // decode the remainder; variants: + // 2 chars remain - produces single byte + // 3 chars remain - produces two bytes + + if (RemChars != 2 && RemChars != 3) + throw sycl::exception(make_error_code(errc::invalid), + "Invalid encoded sequence length."); + + int Ch0 = decode(Src[SrcOff++]); + int Ch1 = decode(Src[SrcOff++]); + int Ch2 = RemChars == 3 ? decode(Src[SrcOff]) : 0; + + if (Ch0 < 0 || Ch1 < 0 || Ch2 < 0) + throw sycl::exception( + make_error_code(errc::invalid), + "Invalid characters in the encoded sequence remainder."); + Dst[DstOff++] = Ch0 | (Ch1 << 6); + + if (RemChars == 3) + Dst[DstOff++] = (Ch1 >> 2) | (Ch2 << 4); + return DstOff; + } + + // Allocate minimum required amount of memory and decode a sequence of given + // size into it. + // Returns the decoded result. The size can be obtained via getDecodedSize. + static std::unique_ptr decode(const char *Src, size_t SrcSize) { + size_t DstSize = getDecodedSize(SrcSize); + std::unique_ptr Dst(new byte[DstSize]); + decode(Src, Dst.get(), SrcSize); + return std::move(Dst); + } +}; + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index ec8ff0d895ff0..8c793832abe12 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -162,7 +162,7 @@ RTDeviceBinaryImage::getProperty(const char *PropName) const { return *It; } -void RTDeviceBinaryImage::init(sycl_device_binary Bin) { +RTDeviceBinaryImage::RTDeviceBinaryImage(sycl_device_binary Bin) { ImageId = ImageCounter++; // If there was no binary, we let the owner handle initialization as they see @@ -227,12 +227,11 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage() : RTDeviceBinaryImage() { Bin->DeviceTargetSpec = __SYCL_DEVICE_BINARY_TARGET_UNKNOWN; } -DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( - std::unique_ptr> &&DataPtr, - size_t DataSize) - : DynRTDeviceBinaryImage() { - Data = std::move(DataPtr); - Bin->BinaryStart = reinterpret_cast(Data.get()); +std::unique_ptr CreateDefaultDynBinary( + const std::unique_ptr> &DataPtr, + size_t DataSize) { + auto Bin = std::make_unique(); + Bin->BinaryStart = reinterpret_cast(DataPtr.get()); Bin->BinaryEnd = Bin->BinaryStart + DataSize; Bin->Format = ur::getBinaryImageFormat(Bin->BinaryStart, DataSize); switch (Bin->Format) { @@ -242,9 +241,15 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( default: Bin->DeviceTargetSpec = __SYCL_DEVICE_BINARY_TARGET_UNKNOWN; } - init(Bin); + return Bin; } +DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( + std::unique_ptr> &&DataPtr, + size_t DataSize) + : RTDeviceBinaryImage(CreateDefaultDynBinary(DataPtr, DataSize).release()), + Data{std::move(DataPtr)} {} + DynRTDeviceBinaryImage::~DynRTDeviceBinaryImage() { delete Bin; Bin = nullptr; @@ -479,8 +484,6 @@ static void copyProperty(sycl_device_binary_property &NextFreeProperty, DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( const std::vector &Imgs) : DynRTDeviceBinaryImage() { - init(nullptr); - // Naive merges. auto MergedSpecConstants = naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { @@ -675,18 +678,11 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( #ifndef SYCL_RT_ZSTD_NOT_AVAIABLE CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage( sycl_device_binary CompressedBin) - : RTDeviceBinaryImage() { - - // 'CompressedBin' is part of the executable image loaded into memory - // which can't be modified easily. So, we need to make a copy of it. - Bin = new sycl_device_binary_struct(*CompressedBin); - + : RTDeviceBinaryImage(new sycl_device_binary_struct(*CompressedBin)) { // Get the decompressed size of the binary image. m_ImageSize = ZSTDCompressor::GetDecompressedSize( reinterpret_cast(Bin->BinaryStart), static_cast(Bin->BinaryEnd - Bin->BinaryStart)); - - init(Bin); } void CompressedRTDeviceBinaryImage::Decompress() { diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 0069f35fd11df..d17e981fa76bc 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -140,7 +140,7 @@ class RTDeviceBinaryImage { public: RTDeviceBinaryImage() : Bin(nullptr) {} - RTDeviceBinaryImage(sycl_device_binary Bin) { init(Bin); } + RTDeviceBinaryImage(sycl_device_binary Bin); // Explicitly delete copy constructor/operator= to avoid unintentional copies RTDeviceBinaryImage(const RTDeviceBinaryImage &) = delete; RTDeviceBinaryImage &operator=(const RTDeviceBinaryImage &) = delete; @@ -247,8 +247,6 @@ class RTDeviceBinaryImage { } protected: - void init(); - void init(sycl_device_binary Bin); sycl_device_binary get() const { return Bin; } sycl_device_binary Bin; diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index dc7fc89f23cd9..41429e32e8050 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -53,6 +54,7 @@ using include_pairs_t = constexpr uint8_t ImageOriginSYCLOffline = 1; constexpr uint8_t ImageOriginInterop = 1 << 1; constexpr uint8_t ImageOriginKernelCompiler = 1 << 2; +constexpr uint8_t ImageOriginSYCLBIN = 1 << 3; // Helper class to track and unregister shared SYCL device_globals. class ManagedDeviceGlobalsRegistry { @@ -160,16 +162,11 @@ struct KernelCompilerBinaryInfo { include_pairs_t &&IncludePairsVec) : MLanguage{Lang}, MIncludePairs{std::move(IncludePairsVec)} {} - KernelCompilerBinaryInfo(syclex::source_language Lang, - KernelNameSetT &&KernelNames) - : MLanguage{Lang}, MKernelNames{std::move(KernelNames)} {} - KernelCompilerBinaryInfo( - syclex::source_language Lang, KernelNameSetT &&KernelNames, - MangledKernelNameMapT &&MangledKernelNames, std::string &&Prefix, + syclex::source_language Lang, MangledKernelNameMapT &&MangledKernelNames, + std::string &&Prefix, std::shared_ptr &&DeviceGlobalRegistry) - : MLanguage{Lang}, MKernelNames{std::move(KernelNames)}, - MMangledKernelNames{std::move(MangledKernelNames)}, + : MLanguage{Lang}, MMangledKernelNames{std::move(MangledKernelNames)}, MPrefixes{std::move(Prefix)}, MDeviceGlobalRegistries{std::move(DeviceGlobalRegistry)} {} @@ -193,9 +190,6 @@ struct KernelCompilerBinaryInfo { "Linking binaries with different source " "languages is not currently supported."); - for (const std::string &KernelName : RTCInfo->MKernelNames) - Result->MKernelNames.insert(KernelName); - Result->MMangledKernelNames.insert(RTCInfo->MMangledKernelNames.begin(), RTCInfo->MMangledKernelNames.end()); @@ -226,7 +220,6 @@ struct KernelCompilerBinaryInfo { } syclex::source_language MLanguage; - KernelNameSetT MKernelNames; MangledKernelNameMapT MMangledKernelNames; std::set MPrefixes; include_pairs_t MIncludePairs; @@ -272,10 +265,12 @@ class device_image_impl { ur_program_handle_t Program, const SpecConstMapT &SpecConstMap, const std::vector &SpecConstsBlob, uint8_t Origins, std::optional &&RTCInfo, + KernelNameSetT &&KernelNames, std::unique_ptr &&MergedImageStorage = nullptr) : MBinImage(BinImage), MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(Program), - MKernelIDs(std::move(KernelIDs)), MSpecConstsBlob(SpecConstsBlob), + MKernelIDs(std::move(KernelIDs)), MKernelNames{std::move(KernelNames)}, + MSpecConstsBlob(SpecConstsBlob), MSpecConstsDefValBlob(getSpecConstsDefValBlob()), MSpecConstSymMap(SpecConstMap), MOrigins(Origins), MRTCBinInfo(std::move(RTCInfo)), @@ -288,9 +283,10 @@ class device_image_impl { : MBinImage(BinImage), MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(Program), MKernelIDs(std::make_shared>()), + MKernelNames{std::move(KernelNames)}, MSpecConstsDefValBlob(getSpecConstsDefValBlob()), MOrigins(ImageOriginKernelCompiler), - MRTCBinInfo(KernelCompilerBinaryInfo{Lang, std::move(KernelNames)}) { + MRTCBinInfo(KernelCompilerBinaryInfo{Lang}) { updateSpecConstSymMap(); } @@ -303,12 +299,12 @@ class device_image_impl { std::shared_ptr &&DeviceGlobalRegistry) : MBinImage(BinImage), MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(nullptr), - MKernelIDs(std::move(KernelIDs)), + MKernelIDs(std::move(KernelIDs)), MKernelNames{std::move(KernelNames)}, MSpecConstsDefValBlob(getSpecConstsDefValBlob()), MOrigins(ImageOriginKernelCompiler), MRTCBinInfo(KernelCompilerBinaryInfo{ - Lang, std::move(KernelNames), std::move(MangledKernelNames), - std::move(Prefix), std::move(DeviceGlobalRegistry)}) { + Lang, std::move(MangledKernelNames), std::move(Prefix), + std::move(DeviceGlobalRegistry)}) { updateSpecConstSymMap(); } @@ -347,9 +343,10 @@ class device_image_impl { MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(Program), MKernelIDs(std::make_shared>()), + MKernelNames{std::move(KernelNames)}, MSpecConstsDefValBlob(getSpecConstsDefValBlob()), MOrigins(ImageOriginKernelCompiler), - MRTCBinInfo(KernelCompilerBinaryInfo{Lang, std::move(KernelNames)}) {} + MRTCBinInfo(KernelCompilerBinaryInfo{Lang}) {} bool has_kernel(const kernel_id &KernelIDCand) const noexcept { return std::binary_search(MKernelIDs->begin(), MKernelIDs->end(), @@ -599,6 +596,16 @@ class device_image_impl { } std::string adjustKernelName(std::string_view Name) const { + if (MOrigins & ImageOriginSYCLBIN) { + constexpr const char KernelPrefix[] = "__sycl_kernel_"; + constexpr size_t KernelPrefixLen = + sizeof(KernelPrefix) / sizeof(char) - 1; + if (Name.size() > KernelPrefixLen && + Name.substr(0, KernelPrefixLen) == std::string_view{KernelPrefix}) + return Name.data(); + return std::string{KernelPrefix} + Name.data(); + } + if (!MRTCBinInfo.has_value()) return Name.data(); @@ -611,22 +618,24 @@ class device_image_impl { return Name.data(); } - bool hasKernelName(const std::string &Name) const { - return MRTCBinInfo.has_value() && !Name.empty() && - MRTCBinInfo->MKernelNames.find(adjustKernelName(Name)) != - MRTCBinInfo->MKernelNames.end(); + bool hasKernelName(std::string_view Name) const { + return (getOriginMask() & + (ImageOriginKernelCompiler | ImageOriginSYCLBIN)) && + !Name.empty() && + MKernelNames.find(adjustKernelName(Name)) != MKernelNames.end(); } - std::shared_ptr tryGetSourceBasedKernel( - std::string_view Name, const context &Context, - const std::shared_ptr &OwnerBundle, - const std::shared_ptr &Self) const { - if (!(getOriginMask() & ImageOriginKernelCompiler)) + std::shared_ptr + tryGetExtensionKernel(std::string_view Name, const context &Context, + const std::shared_ptr &OwnerBundle, + const std::shared_ptr &Self) const { + if (!(getOriginMask() & ImageOriginKernelCompiler) && + !((getOriginMask() & ImageOriginSYCLBIN) && hasKernelName(Name))) return nullptr; - assert(MRTCBinInfo); std::string AdjustedName = adjustKernelName(Name); - if (MRTCBinInfo->MLanguage == syclex::source_language::sycl) { + if (MRTCBinInfo && + MRTCBinInfo->MLanguage == syclex::source_language::sycl) { auto &PM = ProgramManager::getInstance(); for (const std::string &Prefix : MRTCBinInfo->MPrefixes) { auto KID = PM.tryGetSYCLKernelID(Prefix + AdjustedName); @@ -684,6 +693,8 @@ class device_image_impl { return MRTCBinInfo; } + const KernelNameSetT &getKernelNames() const noexcept { return MKernelNames; } + bool isNonSYCLSourceBased() const noexcept { return (getOriginMask() & ImageOriginKernelCompiler) && !isFromSourceLanguage(syclex::source_language::sycl); @@ -763,7 +774,7 @@ class device_image_impl { nullptr); std::vector KernelNames = - getKernelNamesFromURProgram(Adapter, UrProgram); + ProgramManager::getKernelNamesFromURProgram(Adapter, UrProgram); KernelNameSetT KernelNameSet{KernelNames.begin(), KernelNames.end()}; // If caching enabled and kernel not fetched from cache, cache. @@ -1241,24 +1252,8 @@ class device_image_impl { return UrProgram; } - static std::vector - getKernelNamesFromURProgram(const AdapterPtr &Adapter, - ur_program_handle_t UrProgram) { - // Get the kernel names. - size_t KernelNamesSize; - Adapter->call( - UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &KernelNamesSize); - - // semi-colon delimited list of kernel names. - std::string KernelNamesStr(KernelNamesSize, ' '); - Adapter->call( - UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, KernelNamesStr.size(), - &KernelNamesStr[0], nullptr); - return detail::split_string(KernelNamesStr, ';'); - } - const std::variant, - const RTDeviceBinaryImage *> + const RTDeviceBinaryImage *, SYCLBINBinaries> MBinImage = static_cast(nullptr); context MContext; std::vector MDevices; @@ -1270,6 +1265,9 @@ class device_image_impl { // according to LessByNameComp std::shared_ptr> MKernelIDs; + // List of known kernel names. + KernelNameSetT MKernelNames; + // A mutex for sycnhronizing access to spec constants blob. Mutable because // needs to be locked in the const method for getting spec constant value. mutable std::mutex MSpecConstAccessMtx; diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index b0747f22c725d..b2e55b316ff55 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -148,6 +149,10 @@ class kernel_bundle_impl { "Not all devices are in the set of associated " "devices for input bundle or vector of devices is empty"); + // Copy SYCLBINs to ensure lifetime is preserved by the executable bundle. + MSYCLBINs.insert(MSYCLBINs.end(), InputBundleImpl->MSYCLBINs.begin(), + InputBundleImpl->MSYCLBINs.end()); + for (const DevImgPlainWithDeps &DevImgWithDeps : InputBundleImpl->MDeviceImages) { // Skip images which are not compatible with devices provided @@ -275,17 +280,13 @@ class kernel_bundle_impl { std::set> SeenKernelNames; std::set> Conflicts; for (const device_image_plain &DevImage : DevImages) { - const std::optional &RTCInfo = - getSyclObjImpl(DevImage)->getRTCInfo(); - if (!RTCInfo.has_value()) - continue; + const KernelNameSetT &KernelNames = + getSyclObjImpl(DevImage)->getKernelNames(); std::vector Intersect; std::set_intersection(SeenKernelNames.begin(), SeenKernelNames.end(), - RTCInfo->MKernelNames.begin(), - RTCInfo->MKernelNames.end(), + KernelNames.begin(), KernelNames.end(), std::inserter(Conflicts, Conflicts.begin())); - SeenKernelNames.insert(RTCInfo->MKernelNames.begin(), - RTCInfo->MKernelNames.end()); + SeenKernelNames.insert(KernelNames.begin(), KernelNames.end()); } if (!Conflicts.empty()) { @@ -455,12 +456,27 @@ class kernel_bundle_impl { "Not all input bundles have the same set of associated devices."); } + // Pre-count and reserve space in vectors. + { + size_t NumDevImgs = 0, NumSharedDevBins = 0, NumSYCLBINs = 0; + for (const detail::KernelBundleImplPtr &Bundle : Bundles) { + NumDevImgs += Bundle->MDeviceImages.size(); + NumSharedDevBins += Bundle->MSharedDeviceBinaries.size(); + NumSYCLBINs += Bundle->MSYCLBINs.size(); + } + MDeviceImages.reserve(NumDevImgs); + MSharedDeviceBinaries.reserve(NumSharedDevBins); + MSYCLBINs.reserve(NumSYCLBINs); + } + for (const detail::KernelBundleImplPtr &Bundle : Bundles) { MDeviceImages.insert(MDeviceImages.end(), Bundle->MDeviceImages.begin(), Bundle->MDeviceImages.end()); MSharedDeviceBinaries.insert(MSharedDeviceBinaries.end(), Bundle->MSharedDeviceBinaries.begin(), Bundle->MSharedDeviceBinaries.end()); + MSYCLBINs.insert(MSYCLBINs.end(), Bundle->MSYCLBINs.begin(), + Bundle->MSYCLBINs.end()); } fillUniqueDeviceImages(); @@ -540,6 +556,32 @@ class kernel_bundle_impl { MDeviceImages.emplace_back(DevImg); } + // SYCLBIN constructor + kernel_bundle_impl(const context &Context, const std::vector &Devs, + const sycl::span &Bytes, bundle_state State) + : MContext(Context), MDevices(Devs), MState(State) { + common_ctor_checks(); + + auto &SYCLBIN = MSYCLBINs.emplace_back( + std::make_shared(Bytes.data(), Bytes.size())); + + if (SYCLBIN->getState() != static_cast(State)) + throw sycl::exception( + make_error_code(errc::invalid), + "kernel_bundle state does not match the state of the SYCLBIN file."); + + std::vector BestImages = + SYCLBIN->getBestCompatibleImages(Devs); + MDeviceImages.reserve(BestImages.size()); + for (const detail::RTDeviceBinaryImage *Image : BestImages) + MDeviceImages.emplace_back(std::make_shared( + Image, Context, Devs, ProgramManager::getBinImageState(Image), + /*KernelIDs=*/nullptr, /*URProgram=*/nullptr, ImageOriginSYCLBIN)); + ProgramManager::getInstance().bringSYCLDeviceImagesToState(MDeviceImages, + State); + fillUniqueDeviceImages(); + } + std::shared_ptr build_from_source( const std::vector Devices, const std::vector &BuildOptions, @@ -600,9 +642,10 @@ class kernel_bundle_impl { kernel ext_oneapi_get_kernel(const std::string &Name, const std::shared_ptr &Self) const { - if (!hasSourceBasedImages()) + if (!hasSourceBasedImages() && !hasSYCLBINImages()) throw sycl::exception(make_error_code(errc::invalid), "'ext_oneapi_get_kernel' is only available in " + "kernel_bundles created from SYCLBIN files and " "kernel_bundles successfully built from " "kernel_bundle."); @@ -615,8 +658,8 @@ class kernel_bundle_impl { const std::shared_ptr &DevImgImpl = getSyclObjImpl(DevImg); if (std::shared_ptr PotentialKernelImpl = - DevImgImpl->tryGetSourceBasedKernel(Name, MContext, Self, - DevImgImpl)) + DevImgImpl->tryGetExtensionKernel(Name, MContext, Self, + DevImgImpl)) return detail::createSyclObjFromImpl( std::move(PotentialKernelImpl)); } @@ -625,12 +668,12 @@ class kernel_bundle_impl { } std::string ext_oneapi_get_raw_kernel_name(const std::string &Name) { - if (!hasSourceBasedImages()) - throw sycl::exception( - make_error_code(errc::invalid), - "'ext_oneapi_get_raw_kernel_name' is only available in " - "kernel_bundles successfully built from " - "kernel_bundle."); + if (!hasSourceBasedImages() && !hasSYCLBINImages()) + throw sycl::exception(make_error_code(errc::invalid), + "'ext_oneapi_get_raw_kernel_name' is only " + "available in kernel_bundles created from SYCLBIN " + "files and kernel_bundles successfully built from " + "kernel_bundle."); auto It = std::find_if(begin(), end(), [&Name](const device_image_plain &DevImg) { @@ -865,6 +908,12 @@ class kernel_bundle_impl { }); } + bool hasSYCLBINImages() const noexcept { + return std::any_of(begin(), end(), [](const device_image_plain &DevImg) { + return getSyclObjImpl(DevImg)->getOriginMask() & ImageOriginSYCLBIN; + }); + } + bool hasSYCLOfflineImages() const noexcept { return std::any_of(begin(), end(), [](const device_image_plain &DevImg) { return getSyclObjImpl(DevImg)->getOriginMask() & ImageOriginSYCLOffline; @@ -954,8 +1003,8 @@ class kernel_bundle_impl { const std::shared_ptr &DevImgImpl = getSyclObjImpl(DevImg); if (std::shared_ptr SourceBasedKernel = - DevImgImpl->tryGetSourceBasedKernel(Name, MContext, Self, - DevImgImpl)) + DevImgImpl->tryGetExtensionKernel(Name, MContext, Self, + DevImgImpl)) return SourceBasedKernel; } @@ -1015,6 +1064,11 @@ class kernel_bundle_impl { // device globals prior to unregistering the binaries. std::vector> MSharedDeviceBinaries; + // SYCLBINs manage their own binary information, so if we have any we store + // them. These are stored as shared_ptr to ensure they stay alive across + // kernel_bundles that use them. + std::vector> MSYCLBINs; + std::vector MDeviceImages; std::vector MUniqueDeviceImages; // This map stores values for specialization constants, that are missing diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index d2abdbe29fb30..76afba4f79a98 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2377,7 +2377,8 @@ ProgramManager::getEliminatedKernelArgMask(ur_program_handle_t NativePrg, return nullptr; } -static bundle_state getBinImageState(const RTDeviceBinaryImage *BinImage) { +bundle_state +ProgramManager::getBinImageState(const RTDeviceBinaryImage *BinImage) { auto IsAOTBinary = [](const char *Format) { return ((strcmp(Format, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64) == 0) || (strcmp(Format, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN) == 0) || @@ -2397,6 +2398,22 @@ static bundle_state getBinImageState(const RTDeviceBinaryImage *BinImage) { : sycl::bundle_state::object; } +std::vector +ProgramManager::getKernelNamesFromURProgram(const AdapterPtr &Adapter, + ur_program_handle_t UrProgram) { + // Get the kernel names. + size_t KernelNamesSize; + Adapter->call( + UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &KernelNamesSize); + + // semi-colon delimited list of kernel names. + std::string KernelNamesStr(KernelNamesSize, ' '); + Adapter->call( + UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, KernelNamesStr.size(), + &KernelNamesStr[0], nullptr); + return detail::split_string(KernelNamesStr, ';'); +} + std::optional ProgramManager::tryGetSYCLKernelID(KernelNameStrRefT KernelName) { std::lock_guard KernelIDsGuard(m_KernelIDsMutex); @@ -2924,6 +2941,8 @@ ProgramManager::compile(const DevImgPlainWithDeps &ImgWithDeps, if (InputImpl->get_bin_image_ref()->supportsSpecConstants()) setSpecializationConstants(InputImpl, Prog, Adapter); + KernelNameSetT KernelNames = InputImpl->getKernelNames(); + std::optional RTCInfo = InputImpl->getRTCInfo(); DeviceImageImplPtr ObjectImpl = std::make_shared( @@ -2932,7 +2951,7 @@ ProgramManager::compile(const DevImgPlainWithDeps &ImgWithDeps, InputImpl->get_kernel_ids_ptr(), Prog, InputImpl->get_spec_const_data_ref(), InputImpl->get_spec_const_blob_ref(), InputImpl->getOriginMask(), - std::move(RTCInfo)); + std::move(RTCInfo), std::move(KernelNames)); std::string CompileOptions; applyCompileOptionsFromEnvironment(CompileOptions); @@ -3115,10 +3134,20 @@ ProgramManager::link(const std::vector &Imgs, std::vector *> RTCInfoPtrs; RTCInfoPtrs.reserve(Imgs.size()); + KernelNameSetT MergedKernelNames; for (const device_image_plain &DevImg : Imgs) { const DeviceImageImplPtr &DevImgImpl = getSyclObjImpl(DevImg); CombinedOrigins |= DevImgImpl->getOriginMask(); RTCInfoPtrs.emplace_back(&(DevImgImpl->getRTCInfo())); + MergedKernelNames.insert(DevImgImpl->getKernelNames().begin(), + DevImgImpl->getKernelNames().end()); + if (DevImgImpl->getOriginMask() & ImageOriginSYCLBIN) { + // SYCLBIN binaries should gather their kernels from the backend. + std::vector GatheredKernelNames = + getKernelNamesFromURProgram(Adapter, LinkedProg); + MergedKernelNames.insert(GatheredKernelNames.begin(), + GatheredKernelNames.end()); + } } auto MergedRTCInfo = detail::KernelCompilerBinaryInfo::Merge(RTCInfoPtrs); @@ -3128,7 +3157,7 @@ ProgramManager::link(const std::vector &Imgs, bundle_state::executable, std::move(KernelIDs), LinkedProg, std::move(NewSpecConstMap), std::move(NewSpecConstBlob), CombinedOrigins, std::move(MergedRTCInfo), - std::move(MergedImageStorage)); + std::move(MergedKernelNames), std::move(MergedImageStorage)); // TODO: Make multiple sets of device images organized by devices they are // compiled for. @@ -3184,6 +3213,9 @@ ProgramManager::build(const DevImgPlainWithDeps &DevImgWithDeps, SpecConstMap = MainInputImpl->get_spec_const_data_ref(); } + ur_program_handle_t ResProgram = getBuiltURProgram( + std::move(BinImgs), ContextImpl, Devs, &DevImgWithDeps, SpecConstBlob); + // The origin becomes the combination of all the origins. uint8_t CombinedOrigins = 0; for (const device_image_plain &DevImg : DevImgWithDeps) @@ -3192,18 +3224,28 @@ ProgramManager::build(const DevImgPlainWithDeps &DevImgWithDeps, std::vector *> RTCInfoPtrs; RTCInfoPtrs.reserve(DevImgWithDeps.size()); - for (const device_image_plain &DevImg : DevImgWithDeps) - RTCInfoPtrs.emplace_back(&(getSyclObjImpl(DevImg)->getRTCInfo())); + KernelNameSetT MergedKernelNames; + for (const device_image_plain &DevImg : DevImgWithDeps) { + const auto &DevImgImpl = getSyclObjImpl(DevImg); + RTCInfoPtrs.emplace_back(&(DevImgImpl->getRTCInfo())); + MergedKernelNames.insert(DevImgImpl->getKernelNames().begin(), + DevImgImpl->getKernelNames().end()); + if (DevImgImpl->getOriginMask() & ImageOriginSYCLBIN) { + // SYCLBIN binaries should gather their kernels from the backend. + std::vector GatheredKernelNames = + getKernelNamesFromURProgram(ContextImpl->getAdapter(), ResProgram); + MergedKernelNames.insert(GatheredKernelNames.begin(), + GatheredKernelNames.end()); + } + } auto MergedRTCInfo = detail::KernelCompilerBinaryInfo::Merge(RTCInfoPtrs); - ur_program_handle_t ResProgram = getBuiltURProgram( - std::move(BinImgs), ContextImpl, Devs, &DevImgWithDeps, SpecConstBlob); - DeviceImageImplPtr ExecImpl = std::make_shared( ResultBinImg, Context, std::vector{Devs}, bundle_state::executable, std::move(KernelIDs), ResProgram, std::move(SpecConstMap), std::move(SpecConstBlob), CombinedOrigins, - std::move(MergedRTCInfo), std::move(MergedImageStorage)); + std::move(MergedRTCInfo), std::move(MergedKernelNames), + std::move(MergedImageStorage)); return createSyclObjFromImpl(std::move(ExecImpl)); } diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index ceb48a2e57d66..77814bde63601 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -388,6 +388,12 @@ class ProgramManager { const device &Dev, bool ErrorOnUnresolvableImport); + static bundle_state getBinImageState(const RTDeviceBinaryImage *BinImage); + + static std::vector + getKernelNamesFromURProgram(const AdapterPtr &Adapter, + ur_program_handle_t UrProgram); + private: ProgramManager(ProgramManager const &) = delete; ProgramManager &operator=(ProgramManager const &) = delete; diff --git a/sycl/source/detail/property_set_io.hpp b/sycl/source/detail/property_set_io.hpp new file mode 100644 index 0000000000000..3d9de09f96060 --- /dev/null +++ b/sycl/source/detail/property_set_io.hpp @@ -0,0 +1,360 @@ +//==-- PropertySetIO.h -- models a sequence of property sets and their I/O -==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Adjusted copy of llvm/include/llvm/Support/PropertySetIO.h. +// TODO: Remove once we can consistently link the SYCL runtime library with +// LLVMSupport. + +#pragma once + +#include "detail/base64.hpp" +#include "sycl/exception.hpp" + +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + +// Represents a property value. PropertyValue name is stored in the encompassing +// container. +class PropertyValue { +public: + // Type of the size of the value. Value size gets serialized along with the + // value data in some cases for later reading at runtime, so size_t is not + // suitable as its size varies. + using SizeTy = uint64_t; + using byte = uint8_t; + + // Defines supported property types + enum Type { first = 0, NONE = first, UINT32, BYTE_ARRAY, last = BYTE_ARRAY }; + + // Translates C++ type to the corresponding type tag. + template static Type getTypeTag() { + static_assert(std::is_same_v || std::is_same_v); + if constexpr (std::is_same_v) { + return UINT32; + } else { + return BYTE_ARRAY; + } + } + + // Casts from int value to a type tag. + static Type getTypeTag(int T) { + if (T < first || T > last) + throw sycl::exception(make_error_code(errc::invalid), + "Bad property type."); + return static_cast(T); + } + + ~PropertyValue() { + if ((getType() == BYTE_ARRAY) && Val.ByteArrayVal) + delete[] Val.ByteArrayVal; + } + + PropertyValue() = default; + PropertyValue(Type T) : Ty(T) {} + + PropertyValue(uint32_t Val) : Ty(UINT32), Val({Val}) {} + PropertyValue(const byte *Data, SizeTy DataBitSize) { + constexpr int ByteSizeInBits = 8; + Ty = BYTE_ARRAY; + SizeTy DataSize = (DataBitSize + (ByteSizeInBits - 1)) / ByteSizeInBits; + constexpr size_t SizeFieldSize = sizeof(SizeTy); + + // Allocate space for size and data. + Val.ByteArrayVal = new byte[SizeFieldSize + DataSize]; + + // Write the size into first bytes. + for (size_t I = 0; I < SizeFieldSize; ++I) { + Val.ByteArrayVal[I] = (byte)DataBitSize; + DataBitSize >>= ByteSizeInBits; + } + // Append data. + std::memcpy(Val.ByteArrayVal + SizeFieldSize, Data, DataSize); + } + template + PropertyValue(const C &Data) + : PropertyValue(reinterpret_cast(Data.data()), + Data.size() * sizeof(T) * /* bits in one byte */ 8) {} + PropertyValue(const std::string_view Str) + : PropertyValue(reinterpret_cast(Str.data()), + Str.size() * sizeof(char) * /* bits in one byte */ 8) {} + PropertyValue(const PropertyValue &P) { *this = P; } + PropertyValue(PropertyValue &&P) { *this = std::move(P); } + + PropertyValue &operator=(PropertyValue &&P) { + copy(P); + + if (P.getType() == BYTE_ARRAY) + P.Val.ByteArrayVal = nullptr; + P.Ty = NONE; + return *this; + } + + PropertyValue &operator=(const PropertyValue &P) { + if (P.getType() == BYTE_ARRAY) + *this = PropertyValue(P.asByteArray(), P.getByteArraySizeInBits()); + else + copy(P); + return *this; + } + + // get property value as unsigned 32-bit integer + uint32_t asUint32() const { + if (Ty != UINT32) + throw sycl::exception(make_error_code(errc::invalid), + "Must be UINT32 value."); + return Val.UInt32Val; + } + + // Get raw data size in bits. + SizeTy getByteArraySizeInBits() const { + if (Ty != BYTE_ARRAY) + throw sycl::exception(make_error_code(errc::invalid), + "Must be BYTE_ARRAY value."); + SizeTy Res = 0; + + for (size_t I = 0; I < sizeof(SizeTy); ++I) + Res |= (SizeTy)Val.ByteArrayVal[I] << (8 * I); + return Res; + } + + // Get byte array data size in bytes. + SizeTy getByteArraySize() const { + SizeTy SizeInBits = getByteArraySizeInBits(); + constexpr unsigned int MASK = 0x7; + return ((SizeInBits + MASK) & ~MASK) / 8; + } + + // Get byte array data size in bytes, including the leading bytes encoding the + // size. + SizeTy getRawByteArraySize() const { + return getByteArraySize() + sizeof(SizeTy); + } + + // Get byte array data including the leading bytes encoding the size. + const byte *asRawByteArray() const { + if (Ty != BYTE_ARRAY) + throw sycl::exception(make_error_code(errc::invalid), + "Must be BYTE_ARRAY value."); + return Val.ByteArrayVal; + } + + // Get byte array data excluding the leading bytes encoding the size. + const byte *asByteArray() const { + if (Ty != BYTE_ARRAY) + throw sycl::exception(make_error_code(errc::invalid), + "Must be BYTE_ARRAY value."); + return Val.ByteArrayVal + sizeof(SizeTy); + } + + bool isValid() const { return getType() != NONE; } + + // set property value; the 'T' type must be convertible to a property type tag + template void set(T V) { + if (getTypeTag() != Ty) + throw sycl::exception(make_error_code(errc::invalid), + "Invalid type tag for this operation."); + getValueRef() = V; + } + + Type getType() const { return Ty; } + + SizeTy size() const { + switch (Ty) { + case UINT32: + return sizeof(Val.UInt32Val); + case BYTE_ARRAY: + return getRawByteArraySize(); + default: + throw sycl::exception(make_error_code(errc::invalid), + "Unsupported property type."); + } + } + + const char *data() const { + switch (Ty) { + case UINT32: + return reinterpret_cast(&Val.UInt32Val); + case BYTE_ARRAY: + return reinterpret_cast(Val.ByteArrayVal); + default: + throw sycl::exception(make_error_code(errc::invalid), + "Unsupported property type."); + } + } + +private: + template T &getValueRef() { + static_assert(std::is_same_v || std::is_same_v); + if constexpr (std::is_same_v) { + return Val.UInt32Val; + } else { + return Val.ByteArrayVal; + } + } + + void copy(const PropertyValue &P) { + Ty = P.Ty; + Val = P.Val; + } + + Type Ty = NONE; + // TODO: replace this union with std::variant when uplifting to C++17 + union { + uint32_t UInt32Val; + // Holds first sizeof(size_t) bytes of size followed by actual raw data. + byte *ByteArrayVal; + } Val; +}; + +using PropertySet = std::unordered_map; + +/// A registry of property sets. Maps a property set name to its +/// content. +/// +/// The order of keys is preserved and corresponds to the order of insertion. +class PropertySetRegistry { +public: + using MapTy = std::unordered_map; + + // SYCLBIN specific property sets. + static constexpr char SYCLBIN_GLOBAL_METADATA[] = "SYCLBIN/global metadata"; + static constexpr char SYCLBIN_IR_MODULE_METADATA[] = + "SYCLBIN/ir module metadata"; + static constexpr char SYCLBIN_NATIVE_DEVICE_CODE_IMAGE_METADATA[] = + "SYCLBIN/native device code image metadata"; + + static std::unique_ptr read(std::string_view Src) { + auto Res = std::make_unique(); + PropertySet *CurPropSet = nullptr; + + // special case when there is no property data, i.e. the resulting property + // set registry should be empty + if (Src.size() == 0) + return std::move(Res); + + size_t CurrentStart = 0; + while (CurrentStart < Src.size()) { + size_t CurrentEnd = CurrentStart; + size_t SkipChars = 0; + for (CurrentEnd = CurrentStart; CurrentEnd < Src.size(); ++CurrentEnd) { + if (Src[CurrentEnd] == '\n') { + SkipChars = 1; + break; + } + if (Src[CurrentEnd] == '\r' && CurrentEnd + 1 != Src.size() && + Src[CurrentEnd + 1] == '\n') { + SkipChars = 2; + break; + } + } + + std::string_view Line = + Src.substr(CurrentStart, CurrentEnd - CurrentStart); + CurrentStart = CurrentEnd + SkipChars; + + // see if this line starts a new property set + if (Line.front() == '[') { + // yes - parse the category (property name) + auto EndPos = Line.rfind(']'); + if (EndPos == std::string_view::npos) + throw sycl::exception(make_error_code(errc::invalid), + "Invalid line: " + std::string{Line}); + std::string_view Category = Line.substr(1, EndPos - 1); + CurPropSet = &(*Res)[Category]; + continue; + } + if (!CurPropSet) + throw sycl::exception(make_error_code(errc::invalid), + "Property category missing."); + + auto SplitSW = [](const std::string_view &View, char C) { + std::string_view Left = View.substr(0, View.find(C)); + if (Left.size() >= View.size() - 1) + return std::make_pair(Left, std::string_view{}); + std::string_view Right = View.substr(Left.size() + 1); + return std::make_pair(Left, Right); + }; + + // parse name and type+value + auto Parts = SplitSW(Line, '='); + + if (Parts.first.empty() || Parts.second.empty()) + throw sycl::exception(make_error_code(errc::invalid), + "Invalid property line: " + std::string{Line}); + auto TypeVal = SplitSW(Parts.second, '|'); + + if (TypeVal.first.empty() || TypeVal.second.empty()) + throw sycl::exception(make_error_code(errc::invalid), + "Invalid property value: " + + std::string{Parts.second}); + + // parse type + int Tint; + auto TintConvRes = + std::from_chars(TypeVal.first.data(), + TypeVal.first.data() + TypeVal.first.size(), Tint); + if (TintConvRes.ec == std::errc::invalid_argument || + TintConvRes.ec == std::errc::result_out_of_range) + throw sycl::exception(make_error_code(errc::invalid), + "Could not convert type to integer: " + + std::string{TypeVal.first}); + PropertyValue::Type Ttag = PropertyValue::getTypeTag(Tint); + std::string_view Val = TypeVal.second; + + PropertyValue Prop(Ttag); + + // parse value depending on its type + switch (Ttag) { + case PropertyValue::Type::UINT32: { + uint32_t ValV; + auto ValVConvRes = + std::from_chars(Val.data(), Val.data() + Val.size(), ValV); + if (ValVConvRes.ec == std::errc::invalid_argument || + ValVConvRes.ec == std::errc::result_out_of_range) + throw sycl::exception(make_error_code(errc::invalid), + "Could not convert value to integer: " + + std::string{Val}); + Prop.set(ValV); + break; + } + case PropertyValue::Type::BYTE_ARRAY: { + std::unique_ptr DecArr = Base64::decode(Val.data(), Val.size()); + Prop.set(DecArr.release()); + break; + } + default: + throw sycl::exception(make_error_code(errc::invalid), + "Unsupported property type: " + + std::string{Ttag}); + } + (*CurPropSet)[std::string{Parts.first}] = std::move(Prop); + } + + return std::move(Res); + } + + MapTy::const_iterator begin() const { return PropSetMap.begin(); } + MapTy::const_iterator end() const { return PropSetMap.end(); } + + /// Retrieves a property set with given \p Name . + PropertySet &operator[](std::string_view Name) { + return PropSetMap[std::string{Name}]; + } + /// Constant access to the underlying map. + const MapTy &getPropSets() const { return PropSetMap; } + +private: + MapTy PropSetMap; +}; + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/detail/syclbin.cpp b/sycl/source/detail/syclbin.cpp new file mode 100644 index 0000000000000..bcac8c885e363 --- /dev/null +++ b/sycl/source/detail/syclbin.cpp @@ -0,0 +1,392 @@ +//==--------------------- syclbin.cpp - SYCLBIN parser ---------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Adjusted copy of llvm/lib/Object/SYCLBIN.cpp. +// TODO: Remove once we can consistently link the SYCL runtime library with +// LLVMObject. + +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + +namespace { + +std::unique_ptr ContentCopy(const char *Data, size_t Size) { + std::unique_ptr Result{new char[Size]}; + std::memcpy(Result.get(), Data, Size); + return std::move(Result); +} + +// Offload binary header and entry. +constexpr uint8_t OffloadBinaryMagic[4] = {0x10, 0xFF, 0x10, 0xAD}; +struct OffloadBinaryHeaderType { + uint8_t Magic[4]; + uint32_t Version; + uint64_t Size; + uint64_t EntryOffset; + uint64_t EntrySize; +}; +struct OffloadBinaryEntryType { + uint16_t ImageKind; + uint16_t OffloadKind; + uint32_t Flags; + uint64_t StringOffset; + uint64_t NumStrings; + uint64_t ImageOffset; + uint64_t ImageSize; +}; + +class BlockReader { +protected: + BlockReader(const char *Data, size_t Size) : Data{Data}, Size{Size} {} + + void ReadSizeCheck(size_t ByteOffset, size_t ReadSize) { + if (ByteOffset + ReadSize > Size) + throw sycl::exception(make_error_code(errc::invalid), + "Unexpected file contents size."); + } + + const char *Data = nullptr; + size_t Size = 0; +}; + +class HeaderBlockReader : public BlockReader { +public: + HeaderBlockReader(const char *Data, size_t Size) : BlockReader(Data, Size) {} + + template const HeaderT *GetHeaderPtr(size_t ByteOffset) { + ReadSizeCheck(ByteOffset, sizeof(HeaderT)); + return reinterpret_cast(Data + ByteOffset); + } +}; + +class SYCLBINByteTableBlockReader : public BlockReader { +public: + SYCLBINByteTableBlockReader(const char *Data, size_t Size) + : BlockReader(Data, Size) {} + + std::string_view GetBinaryBlob(size_t ByteOffset, uint64_t BlobSize) { + ReadSizeCheck(ByteOffset, BlobSize); + return {Data + ByteOffset, BlobSize}; + } + + std::unique_ptr GetMetadata(size_t ByteOffset, + uint64_t MetadataSize) { + return PropertySetRegistry::read(GetBinaryBlob(ByteOffset, MetadataSize)); + } +}; + +std::pair getImageInOffloadBinary(const char *Data, + size_t Size) { + if (sizeof(OffloadBinaryHeaderType) > Size) + throw sycl::exception(make_error_code(errc::invalid), + "Invalid Offload Binary size."); + + // Read the header. + const OffloadBinaryHeaderType *Header = + reinterpret_cast(Data); + if (memcmp(Header->Magic, OffloadBinaryMagic, 4) != 0) + throw sycl::exception(make_error_code(errc::invalid), + "Incorrect Offload Binary magic number."); + + if (Header->Version != 1) + throw sycl::exception(make_error_code(errc::invalid), + "Unsupported Offload Binary version number."); + + if (Header->Version != 1) + throw sycl::exception(make_error_code(errc::invalid), + "Unsupported Offload Binary version number."); + + if (Header->EntrySize != sizeof(OffloadBinaryEntryType)) + throw sycl::exception(make_error_code(errc::invalid), + "Unexpected number of offload entries."); + + if (Header->EntryOffset + sizeof(OffloadBinaryEntryType) > Size) + throw sycl::exception(make_error_code(errc::invalid), + "Invalid entry offset."); + + // Read the table entry. + const OffloadBinaryEntryType *Entry = + reinterpret_cast(Data + + Header->EntryOffset); + + if (Entry->ImageKind != /*IMG_SYCLBIN*/ 6) + throw sycl::exception(make_error_code(errc::invalid), + "Unexpected image type."); + + if (Entry->ImageOffset + Entry->ImageSize > Size) + throw sycl::exception(make_error_code(errc::invalid), + "Invalid image offset and size."); + + return std::make_pair(Data + Entry->ImageOffset, Entry->ImageSize); +} + +} // namespace + +SYCLBIN::SYCLBIN(const char *Data, size_t Size) { + auto [SYCLBINData, SYCLBINSize] = getImageInOffloadBinary(Data, Size); + + if (SYCLBINSize < sizeof(FileHeaderType)) + throw sycl::exception(make_error_code(errc::invalid), + "Unexpected file contents size."); + + // Read the file header. + const FileHeaderType *FileHeader = + reinterpret_cast(SYCLBINData); + if (FileHeader->Magic != MagicNumber) + throw sycl::exception(make_error_code(errc::invalid), + "Incorrect SYCLBIN magic number."); + + if (FileHeader->Version > CurrentVersion) + throw sycl::exception(make_error_code(errc::invalid), + "Unsupported SYCLBIN version " + + std::to_string(FileHeader->Version) + "."); + Version = FileHeader->Version; + + const uint64_t AMHeaderBlockSize = + sizeof(AbstractModuleHeaderType) * FileHeader->AbstractModuleCount; + const uint64_t IRMHeaderBlockSize = + sizeof(IRModuleHeaderType) * FileHeader->IRModuleCount; + const uint64_t NDCIHeaderBlockSize = sizeof(NativeDeviceCodeImageHeaderType) * + FileHeader->NativeDeviceCodeImageCount; + const uint64_t HeaderBlockSize = sizeof(FileHeaderType) + AMHeaderBlockSize + + IRMHeaderBlockSize + NDCIHeaderBlockSize; + // Align metadata table size to 8. + const uint64_t AlignedMetadataByteTableSize = + FileHeader->MetadataByteTableSize + + (-FileHeader->MetadataByteTableSize & 7); + if (SYCLBINSize < HeaderBlockSize + AlignedMetadataByteTableSize + + FileHeader->BinaryByteTableSize) + throw sycl::exception(make_error_code(errc::invalid), + "Unexpected file contents size."); + + // Create reader objects. These help with checking out-of-bounds access. + HeaderBlockReader HeaderBlockReader{SYCLBINData, HeaderBlockSize}; + SYCLBINByteTableBlockReader MetadataByteTableBlockReader{ + SYCLBINData + HeaderBlockSize, FileHeader->MetadataByteTableSize}; + SYCLBINByteTableBlockReader BinaryByteTableBlockReader{ + SYCLBINData + HeaderBlockSize + AlignedMetadataByteTableSize, + FileHeader->BinaryByteTableSize}; + + // Read global metadata. + GlobalMetadata = MetadataByteTableBlockReader.GetMetadata( + FileHeader->GlobalMetadataOffset, FileHeader->GlobalMetadataSize); + + // Read the abstract modules. + AbstractModules.resize(FileHeader->AbstractModuleCount); + for (uint32_t I = 0; I < FileHeader->AbstractModuleCount; ++I) { + AbstractModule &AM = AbstractModules[I]; + + // Read the header for the current abstract module. + const uint64_t AMHeaderByteOffset = + sizeof(FileHeaderType) + sizeof(AbstractModuleHeaderType) * I; + const AbstractModuleHeaderType *AMHeader = + HeaderBlockReader.GetHeaderPtr( + AMHeaderByteOffset); + + // Read the metadata for the current abstract module. + AM.Metadata = MetadataByteTableBlockReader.GetMetadata( + AMHeader->MetadataOffset, AMHeader->MetadataSize); + + // Read the IR modules of the current abstract module. + AM.IRModules.resize(AMHeader->IRModuleCount); + for (uint32_t J = 0; J < AMHeader->IRModuleCount; ++J) { + IRModule &IRM = AM.IRModules[J]; + + // Read the header for the current IR module. + const uint64_t IRMHeaderByteOffset = + sizeof(FileHeaderType) + AMHeaderBlockSize + + sizeof(IRModuleHeaderType) * (AMHeader->IRModuleOffset + J); + const IRModuleHeaderType *IRMHeader = + HeaderBlockReader.GetHeaderPtr( + IRMHeaderByteOffset); + + // Read the metadata for the current IR module. + IRM.Metadata = MetadataByteTableBlockReader.GetMetadata( + IRMHeader->MetadataOffset, IRMHeader->MetadataSize); + + // Read the binary blob for the current IR module. + IRM.RawIRBytes = BinaryByteTableBlockReader.GetBinaryBlob( + IRMHeader->RawIRBytesOffset, IRMHeader->RawIRBytesSize); + } + + // Read the native device code images of the current abstract module. + AM.NativeDeviceCodeImages.resize(AMHeader->NativeDeviceCodeImageCount); + for (uint32_t J = 0; J < AMHeader->NativeDeviceCodeImageCount; ++J) { + NativeDeviceCodeImage &NDCI = AM.NativeDeviceCodeImages[J]; + + // Read the header for the current native device code image. + const uint64_t NDCIHeaderByteOffset = + sizeof(FileHeaderType) + AMHeaderBlockSize + IRMHeaderBlockSize + + sizeof(NativeDeviceCodeImageHeaderType) * + (AMHeader->NativeDeviceCodeImageOffset + J); + const NativeDeviceCodeImageHeaderType *NDCIHeader = + HeaderBlockReader.GetHeaderPtr( + NDCIHeaderByteOffset); + + // Read the metadata for the current native device code image. + NDCI.Metadata = MetadataByteTableBlockReader.GetMetadata( + NDCIHeader->MetadataOffset, NDCIHeader->MetadataSize); + + // Read the binary blob for the current native device code image. + NDCI.RawDeviceCodeImageBytes = BinaryByteTableBlockReader.GetBinaryBlob( + NDCIHeader->BinaryBytesOffset, NDCIHeader->BinaryBytesSize); + } + } +} + +SYCLBINBinaries::SYCLBINBinaries(const char *SYCLBINContent, size_t SYCLBINSize) + : SYCLBINContentCopy{ContentCopy(SYCLBINContent, SYCLBINSize)}, + SYCLBINContentCopySize{SYCLBINSize}, + ParsedSYCLBIN(SYCLBIN{SYCLBINContentCopy.get(), SYCLBINSize}) { + size_t NumJITBinaries = 0, NumNativeBinaries = 0; + for (const SYCLBIN::AbstractModule &AM : ParsedSYCLBIN.AbstractModules) { + NumJITBinaries += AM.IRModules.size(); + NumNativeBinaries += AM.NativeDeviceCodeImages.size(); + } + DeviceBinaries.reserve(NumJITBinaries + NumNativeBinaries); + JITDeviceBinaryImages.reserve(NumJITBinaries); + NativeDeviceBinaryImages.reserve(NumNativeBinaries); + + for (SYCLBIN::AbstractModule &AM : ParsedSYCLBIN.AbstractModules) { + // Construct properties from SYCLBIN metadata. + std::vector<_sycl_device_binary_property_set_struct> &BinPropertySets = + convertAbstractModuleProperties(AM); + + for (SYCLBIN::IRModule &IRM : AM.IRModules) { + sycl_device_binary_struct &DeviceBinary = DeviceBinaries.emplace_back(); + DeviceBinary.Version = SYCL_DEVICE_BINARY_VERSION; + DeviceBinary.Kind = 4; + DeviceBinary.Format = SYCL_DEVICE_BINARY_TYPE_SPIRV; // TODO: Determine. + DeviceBinary.DeviceTargetSpec = + __SYCL_DEVICE_BINARY_TARGET_SPIRV64; // TODO: Determine. + DeviceBinary.CompileOptions = nullptr; + DeviceBinary.LinkOptions = nullptr; + DeviceBinary.ManifestStart = nullptr; + DeviceBinary.ManifestEnd = nullptr; + DeviceBinary.BinaryStart = + reinterpret_cast(IRM.RawIRBytes.data()); + DeviceBinary.BinaryEnd = reinterpret_cast( + IRM.RawIRBytes.data() + IRM.RawIRBytes.size()); + DeviceBinary.EntriesBegin = nullptr; + DeviceBinary.EntriesEnd = nullptr; + DeviceBinary.PropertySetsBegin = BinPropertySets.data(); + DeviceBinary.PropertySetsEnd = + BinPropertySets.data() + BinPropertySets.size(); + // Create an image from it. + JITDeviceBinaryImages.emplace_back(&DeviceBinary); + } + + for (const SYCLBIN::NativeDeviceCodeImage &NDCI : + AM.NativeDeviceCodeImages) { + sycl_device_binary_struct &DeviceBinary = DeviceBinaries.emplace_back(); + DeviceBinary.Version = SYCL_DEVICE_BINARY_VERSION; + DeviceBinary.Kind = 4; + DeviceBinary.Format = SYCL_DEVICE_BINARY_TYPE_NATIVE; + DeviceBinary.DeviceTargetSpec = + __SYCL_DEVICE_BINARY_TARGET_UNKNOWN; // TODO: Determine. + DeviceBinary.CompileOptions = nullptr; + DeviceBinary.LinkOptions = nullptr; + DeviceBinary.ManifestStart = nullptr; + DeviceBinary.ManifestEnd = nullptr; + DeviceBinary.BinaryStart = reinterpret_cast( + NDCI.RawDeviceCodeImageBytes.data()); + DeviceBinary.BinaryEnd = reinterpret_cast( + NDCI.RawDeviceCodeImageBytes.data() + + NDCI.RawDeviceCodeImageBytes.size()); + DeviceBinary.EntriesBegin = nullptr; + DeviceBinary.EntriesEnd = nullptr; + DeviceBinary.PropertySetsBegin = BinPropertySets.data(); + DeviceBinary.PropertySetsEnd = + BinPropertySets.data() + BinPropertySets.size(); + // Create an image from it. + NativeDeviceBinaryImages.emplace_back(&DeviceBinary); + } + } +} + +std::vector<_sycl_device_binary_property_set_struct> & +SYCLBINBinaries::convertAbstractModuleProperties(SYCLBIN::AbstractModule &AM) { + std::vector<_sycl_device_binary_property_set_struct> &BinPropertySets = + BinaryPropertySets.emplace_back(); + BinPropertySets.reserve(AM.Metadata->getPropSets().size()); + for (auto &PropSetIt : *AM.Metadata) { + auto &PropSetName = PropSetIt.first; + auto &PropSetVal = PropSetIt.second; + + // Add a new vector to BinaryProperties and reserve room for all the + // properties we are converting. + std::vector<_sycl_device_binary_property_struct> &PropsList = + BinaryProperties.emplace_back(); + PropsList.reserve(PropSetVal.size()); + + // Then convert all properties in the property set. + for (auto &PropIt : PropSetVal) { + auto &PropName = PropIt.first; + auto &PropVal = PropIt.second; + _sycl_device_binary_property_struct &BinProp = PropsList.emplace_back(); + BinProp.Name = const_cast(PropName.data()); + BinProp.Type = PropVal.getType(); + if (BinProp.Type == SYCL_PROPERTY_TYPE_UINT32) { + // UINT32 properties have their value stored in the size instead. + BinProp.ValAddr = nullptr; + std::memcpy(&BinProp.ValSize, PropVal.data(), sizeof(uint32_t)); + } else { + BinProp.ValAddr = const_cast(PropVal.data()); + BinProp.ValSize = PropVal.size(); + } + } + + // Add a new property set to the list. + _sycl_device_binary_property_set_struct &BinPropSet = + BinPropertySets.emplace_back(); + BinPropSet.Name = const_cast(PropSetName.data()); + BinPropSet.PropertiesBegin = PropsList.data(); + BinPropSet.PropertiesEnd = PropsList.data() + PropsList.size(); + } + return BinPropertySets; +} + +std::vector +SYCLBINBinaries::getBestCompatibleImages(const device &Dev) { + auto SelectCompatibleImages = + [&](const std::vector &Imgs) { + std::vector CompatImgs; + for (const RTDeviceBinaryImage &Img : Imgs) + if (doesDevSupportDeviceRequirements(Dev, Img) && + doesImageTargetMatchDevice(Img, getSyclObjImpl(Dev).get())) + CompatImgs.push_back(&Img); + return CompatImgs; + }; + + // Try with native images first. + std::vector NativeImgs = + SelectCompatibleImages(NativeDeviceBinaryImages); + if (!NativeImgs.empty()) + return NativeImgs; + + // If there were no native images, pick JIT images. + return SelectCompatibleImages(JITDeviceBinaryImages); +} + +std::vector +SYCLBINBinaries::getBestCompatibleImages(const std::vector &Devs) { + std::set Images; + for (const device &Dev : Devs) { + std::vector BestImagesForDev = + getBestCompatibleImages(Dev); + Images.insert(BestImagesForDev.cbegin(), BestImagesForDev.cend()); + } + return {Images.cbegin(), Images.cend()}; +} + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/detail/syclbin.hpp b/sycl/source/detail/syclbin.hpp new file mode 100644 index 0000000000000..312162372938b --- /dev/null +++ b/sycl/source/detail/syclbin.hpp @@ -0,0 +1,161 @@ +//==--------------------- syclbin.hpp - SYCLBIN parser ---------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Adjusted copy of llvm/include/llvm/Object/SYCLBIN.h. +// TODO: Remove once we can consistently link the SYCL runtime library with +// LLVMObject. + +#pragma once + +#include "detail/compiler.hpp" +#include "detail/device_binary_image.hpp" +#include "detail/property_set_io.hpp" +#include "sycl/exception.hpp" + +#include +#include +#include +#include +#include + +namespace sycl { +inline namespace _V1 { + +class device; + +namespace detail { + +// Representation of a SYCLBIN binary object. This is intended for use as an +// image inside a OffloadBinary. +// Adjusted from llvm/include/llvm/Object/SYCLBIN.h and can be removed if +// LLVMObject gets linked into the SYCL runtime library. +class SYCLBIN { +public: + SYCLBIN(const char *Data, size_t Size); + + SYCLBIN(const SYCLBIN &Other) = delete; + SYCLBIN(SYCLBIN &&Other) = default; + + SYCLBIN &operator=(const SYCLBIN &Other) = delete; + SYCLBIN &operator=(SYCLBIN &&Other) = default; + + /// The current version of the binary used for backwards compatibility. + static constexpr uint32_t CurrentVersion = 1; + + /// Magic number used to identify SYCLBIN files. + static constexpr uint32_t MagicNumber = 0x53594249; + + struct IRModule { + std::unique_ptr Metadata; + std::string_view RawIRBytes; + }; + struct NativeDeviceCodeImage { + std::unique_ptr Metadata; + std::string_view RawDeviceCodeImageBytes; + }; + + struct AbstractModule { + std::unique_ptr Metadata; + std::vector IRModules; + std::vector NativeDeviceCodeImages; + }; + + uint32_t Version; + std::unique_ptr GlobalMetadata; + std::vector AbstractModules; + +private: + struct alignas(8) FileHeaderType { + uint32_t Magic; + uint32_t Version; + uint32_t AbstractModuleCount; + uint32_t IRModuleCount; + uint32_t NativeDeviceCodeImageCount; + uint64_t MetadataByteTableSize; + uint64_t BinaryByteTableSize; + uint64_t GlobalMetadataOffset; + uint64_t GlobalMetadataSize; + }; + + struct alignas(8) AbstractModuleHeaderType { + uint64_t MetadataOffset; + uint64_t MetadataSize; + uint32_t IRModuleCount; + uint32_t IRModuleOffset; + uint32_t NativeDeviceCodeImageCount; + uint32_t NativeDeviceCodeImageOffset; + }; + + struct alignas(8) IRModuleHeaderType { + uint64_t MetadataOffset; + uint64_t MetadataSize; + uint64_t RawIRBytesOffset; + uint64_t RawIRBytesSize; + }; + + struct alignas(8) NativeDeviceCodeImageHeaderType { + uint64_t MetadataOffset; + uint64_t MetadataSize; + uint64_t BinaryBytesOffset; + uint64_t BinaryBytesSize; + }; +}; + +// Helper class for managing both a SYCLBIN and binaries created from it, +// allowing existing infrastructure to better understand the contents of the +// SYCLBINs. +struct SYCLBINBinaries { + // Delete copy-ctor to keep binaries unique and avoid costly copies of a + // heavy structure. + SYCLBINBinaries(const SYCLBINBinaries &) = delete; + SYCLBINBinaries &operator=(const SYCLBINBinaries &) = delete; + + SYCLBINBinaries(SYCLBINBinaries &&) = default; + SYCLBINBinaries &operator=(SYCLBINBinaries &&) = default; + + SYCLBINBinaries(const char *SYCLBINContent, size_t SYCLBINSize); + + std::vector + getBestCompatibleImages(const device &Dev); + std::vector + getBestCompatibleImages(const std::vector &Dev); + + uint8_t getState() const noexcept { + PropertySet &GlobalMetadata = + (*ParsedSYCLBIN + .GlobalMetadata)[PropertySetRegistry::SYCLBIN_GLOBAL_METADATA]; + return static_cast( + GlobalMetadata[PropertySet::key_type{"state"}].asUint32()); + } + +private: + std::vector<_sycl_offload_entry_struct> & + convertAbstractModuleEntries(const SYCLBIN::AbstractModule &AM); + + std::vector<_sycl_device_binary_property_set_struct> & + convertAbstractModuleProperties(SYCLBIN::AbstractModule &AM); + + std::unique_ptr SYCLBINContentCopy = nullptr; + size_t SYCLBINContentCopySize = 0; + + SYCLBIN ParsedSYCLBIN; + + // Buffers for holding entries in the binary structs alive. + std::vector> BinaryOffloadEntries; + std::vector> + BinaryProperties; + std::vector> + BinaryPropertySets; + + std::vector DeviceBinaries; + std::vector JITDeviceBinaryImages; + std::vector NativeDeviceBinaryImages; +}; + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index ab12ff67e8590..a0861190815ec 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -212,6 +212,12 @@ get_kernel_bundle_impl(const context &Ctx, const std::vector &Devs, State); } +detail::KernelBundleImplPtr +get_kernel_bundle_impl(const context &Ctx, const std::vector &Devs, + const sycl::span &Bytes, bundle_state State) { + return std::make_shared(Ctx, Devs, Bytes, State); +} + detail::KernelBundleImplPtr get_empty_interop_kernel_bundle_impl(const context &Ctx, const std::vector &Devs) { diff --git a/sycl/test-e2e/SYCLBIN/Inputs/basic.hpp b/sycl/test-e2e/SYCLBIN/Inputs/basic.hpp new file mode 100644 index 0000000000000..96d793ee3aadd --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/basic.hpp @@ -0,0 +1,47 @@ +#include "common.hpp" + +#include + +static constexpr size_t NUM = 1024; +static constexpr size_t WGSIZE = 16; +static constexpr float EPS = 0.001; + +int main(int argc, char *argv[]) { + assert(argc == 2); + + sycl::queue Q; + + int Failed = CommonLoadCheck(Q.get_context(), argv[1]); + +#if defined(SYCLBIN_INPUT_STATE) + auto KBInput = syclexp::get_kernel_bundle( + Q.get_context(), std::string{argv[1]}); + auto KBExe = sycl::build(KBInput); +#elif defined(SYCLBIN_OBJECT_STATE) + auto KBObj = syclexp::get_kernel_bundle( + Q.get_context(), std::string{argv[1]}); + auto KBExe = sycl::link(KBObj); +#else // defined(SYCLBIN_EXECUTABLE_STATE) + auto KBExe = syclexp::get_kernel_bundle( + Q.get_context(), std::string{argv[1]}); +#endif + + assert(KBExe.ext_oneapi_has_kernel("iota")); + sycl::kernel IotaKern = KBExe.ext_oneapi_get_kernel("iota"); + + float *Ptr = sycl::malloc_shared(NUM, Q); + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(3.14f, Ptr); + CGH.parallel_for(sycl::nd_range{{NUM}, {WGSIZE}}, IotaKern); + }).wait_and_throw(); + + for (int I = 0; I < NUM; I++) { + const float Truth = 3.14f + static_cast(I); + if (std::abs(Ptr[I] - Truth) > EPS) { + std::cout << "Result: " << Ptr[I] << " expected " << I << "\n"; + ++Failed; + } + } + sycl::free(Ptr, Q); + return Failed; +} diff --git a/sycl/test-e2e/SYCLBIN/Inputs/basic_kernel.cpp b/sycl/test-e2e/SYCLBIN/Inputs/basic_kernel.cpp new file mode 100644 index 0000000000000..0f9a04ae3762c --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/basic_kernel.cpp @@ -0,0 +1,10 @@ +#include + +namespace syclexp = sycl::ext::oneapi::experimental; +namespace syclext = sycl::ext::oneapi; + +extern "C" SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (syclexp::nd_range_kernel<1>)) void iota(float start, float *ptr) { + size_t id = syclext::this_work_item::get_nd_item<1>().get_global_linear_id(); + ptr[id] = start + static_cast(id); +} diff --git a/sycl/test-e2e/SYCLBIN/Inputs/common.hpp b/sycl/test-e2e/SYCLBIN/Inputs/common.hpp new file mode 100644 index 0000000000000..f302672a1c717 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/common.hpp @@ -0,0 +1,54 @@ +#pragma once + +#include +#include +#include + +namespace syclexp = sycl::ext::oneapi::experimental; +namespace syclext = sycl::ext::oneapi; + +#if !defined(SYCLBIN_INPUT_STATE) && !defined(SYCLBIN_OBJECT_STATE) && \ + !defined(SYCLBIN_EXECUTABLE_STATE) +#error "SYCLBIN state define missing!" +#endif + +template +constexpr std::string_view GetStateName() { + if constexpr (InvalidState == sycl::bundle_state::input) + return "input"; + else if constexpr (InvalidState == sycl::bundle_state::object) + return "object"; + else + return "executable"; +} + +template +int ExpectExceptionInvalidState(const sycl::context &Ctx, const char *File) { + try { + syclexp::get_kernel_bundle(Ctx, std::string{File}); + std::cout << "Unexpectedly created a kernel bundle for invalid state: " + << GetStateName() << std::endl; + return 1; + } catch (sycl::exception &) { + } + return 0; +} + +// SYCLBIN is only directly loadable in the state they were produced in, so +// we run checks to ensure other states will complain. +int CommonLoadCheck(const sycl::context &Ctx, const char *File) { + int Failed = 0; + +#ifndef SYCLBIN_INPUT_STATE + Failed += ExpectExceptionInvalidState(Ctx, File); +#endif +#ifndef SYCLBIN_OBJECT_STATE + Failed += ExpectExceptionInvalidState(Ctx, File); +#endif +#ifndef SYCLBIN_EXECUTABLE_STATE + Failed += + ExpectExceptionInvalidState(Ctx, File); +#endif + + return Failed; +} diff --git a/sycl/test-e2e/SYCLBIN/Inputs/exporting_function.cpp b/sycl/test-e2e/SYCLBIN/Inputs/exporting_function.cpp new file mode 100644 index 0000000000000..708444cac10e9 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/exporting_function.cpp @@ -0,0 +1,6 @@ +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size) { + for (size_t I = 0; I < Size; ++I) + Ptr[I] = I; +} diff --git a/sycl/test-e2e/SYCLBIN/Inputs/importing_kernel.cpp b/sycl/test-e2e/SYCLBIN/Inputs/importing_kernel.cpp new file mode 100644 index 0000000000000..bb113590b15ee --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/importing_kernel.cpp @@ -0,0 +1,11 @@ + +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size); + +// use extern "C" to avoid name mangling +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental:: + single_task_kernel)) void TestKernel1(int *Ptr, int Size) { + TestFunc(Ptr, Size); +} diff --git a/sycl/test-e2e/SYCLBIN/Inputs/link.hpp b/sycl/test-e2e/SYCLBIN/Inputs/link.hpp new file mode 100644 index 0000000000000..ce14379c4e19a --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/link.hpp @@ -0,0 +1,60 @@ +#include "common.hpp" + +#include + +namespace syclex = sycl::ext::oneapi::experimental; + +static constexpr size_t NUM = 10; + +int main(int argc, char *argv[]) { + assert(argc == 3); + + sycl::queue Q; + + int Failed = CommonLoadCheck(Q.get_context(), argv[1]) + + CommonLoadCheck(Q.get_context(), argv[2]); + + // Load SYCLBINs. +#if defined(SYCLBIN_INPUT_STATE) + auto KBInput1 = syclex::get_kernel_bundle( + Q.get_context(), {Q.get_device()}, std::string{argv[1]}); + auto KBInput2 = syclex::get_kernel_bundle( + Q.get_context(), {Q.get_device()}, std::string{argv[2]}); + + // Compile the bundles. + auto KBObj1 = sycl::compile(KBInput1); + auto KBObj2 = sycl::compile(KBInput2); +#elif defined(SYCLBIN_OBJECT_STATE) + auto KBObj1 = syclex::get_kernel_bundle( + Q.get_context(), {Q.get_device()}, std::string{argv[1]}); + auto KBObj2 = syclex::get_kernel_bundle( + Q.get_context(), {Q.get_device()}, std::string{argv[2]}); +#else // defined(SYCLBIN_EXECUTABLE_STATE) +#error "Test does not work with executable state." +#endif + + // Link the bundles. + auto KBExe = sycl::link({KBObj1, KBObj2}); + + // TestKernel1 does not have any requirements, so should be there always. + assert(KBExe.ext_oneapi_has_kernel("TestKernel1")); + sycl::kernel TestKernel1 = KBExe.ext_oneapi_get_kernel("TestKernel1"); + + int *Ptr = sycl::malloc_shared(NUM, Q); + Q.fill(Ptr, int{0}, NUM).wait_and_throw(); + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(Ptr, int{NUM}); + CGH.single_task(TestKernel1); + }).wait_and_throw(); + + for (int I = 0; I < NUM; I++) { + if (Ptr[I] != I) { + std::cout << "Result: " << Ptr[I] << " expected " << I << "\n"; + ++Failed; + } + } + + sycl::free(Ptr, Q); + return Failed; +} diff --git a/sycl/test-e2e/SYCLBIN/Inputs/link_rtc.hpp b/sycl/test-e2e/SYCLBIN/Inputs/link_rtc.hpp new file mode 100644 index 0000000000000..f8f41d5ad25ed --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/link_rtc.hpp @@ -0,0 +1,79 @@ +#include "common.hpp" + +#include + +// TODO: remove SYCL_EXTERNAL from the kernel once it is no longer needed. +auto constexpr SYCLSource = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size); + +// use extern "C" to avoid name mangling +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel1(int *Ptr, int Size) { + TestFunc(Ptr, Size); +} + +)==="; + +static constexpr size_t NUM = 10; + +int main(int argc, char *argv[]) { + assert(argc == 2); + + sycl::queue Q; + + if (!Q.get_device().ext_oneapi_can_compile(syclexp::source_language::sycl)) { + std::cout << "Device does not support one of the source languages: " + << Q.get_device().get_info() + << std::endl; + return 0; + } + + int Failed = CommonLoadCheck(Q.get_context(), argv[1]); + + // Load SYCLBIN and compile it. +#if defined(SYCLBIN_INPUT_STATE) + auto KBInput = syclexp::get_kernel_bundle( + Q.get_context(), {Q.get_device()}, std::string{argv[1]}); + auto KBSYCLBINObj = sycl::compile(KBInput); +#elif defined(SYCLBIN_OBJECT_STATE) + auto KBSYCLBINObj = syclexp::get_kernel_bundle( + Q.get_context(), std::string{argv[1]}); +#else // defined(SYCLBIN_EXECUTABLE_STATE) +#error "Test does not work with executable state." +#endif + + // Compile source kernel bundle. + auto KBSrc = syclexp::create_kernel_bundle_from_source( + Q.get_context(), syclexp::source_language::sycl, SYCLSource); + syclexp::properties BuildOpts{ + syclexp::build_options{"-fsycl-allow-device-image-dependencies"}}; + auto KBSrcObj = syclexp::compile(KBSrc, BuildOpts); + + // Link the bundles. + auto KBExe = sycl::link({KBSYCLBINObj, KBSrcObj}); + + // TestKernel1 does not have any requirements, so should be there always. + assert(KBExe.ext_oneapi_has_kernel("TestKernel1")); + sycl::kernel TestKernel1 = KBExe.ext_oneapi_get_kernel("TestKernel1"); + + int *Ptr = sycl::malloc_shared(NUM, Q); + Q.fill(Ptr, int{0}, NUM).wait_and_throw(); + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(Ptr, int{NUM}); + CGH.single_task(TestKernel1); + }).wait_and_throw(); + + for (int I = 0; I < NUM; I++) { + if (Ptr[I] != I) { + std::cout << "Result: " << Ptr[I] << " expected " << I << "\n"; + ++Failed; + } + } + + sycl::free(Ptr, Q); + return Failed; +} diff --git a/sycl/test-e2e/SYCLBIN/Inputs/lit.cfg.py b/sycl/test-e2e/SYCLBIN/Inputs/lit.cfg.py new file mode 100644 index 0000000000000..2dc34b8abf22b --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/lit.cfg.py @@ -0,0 +1,2 @@ +import lit +config.suffixes = [] # Skip all files in this folder. diff --git a/sycl/test-e2e/SYCLBIN/Inputs/optional_kernel_features.cpp b/sycl/test-e2e/SYCLBIN/Inputs/optional_kernel_features.cpp new file mode 100644 index 0000000000000..15b20a48a8c1b --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/optional_kernel_features.cpp @@ -0,0 +1,15 @@ +#include + +extern "C" SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental:: + single_task_kernel)) void TestKernel1(int *Ptr, int Size) { + for (size_t I = 0; I < Size; ++I) + Ptr[I] = I; +} + +extern "C" SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental:: + single_task_kernel)) void TestKernel2(int *Ptr, int Size) { + for (size_t I = 0; I < Size; ++I) + Ptr[I] = static_cast(static_cast(I) / 2.0); +} diff --git a/sycl/test-e2e/SYCLBIN/Inputs/optional_kernel_features.hpp b/sycl/test-e2e/SYCLBIN/Inputs/optional_kernel_features.hpp new file mode 100644 index 0000000000000..e05cdd66e1a7f --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/optional_kernel_features.hpp @@ -0,0 +1,68 @@ +#include "common.hpp" + +#include + +static constexpr size_t NUM = 10; + +int main(int argc, char *argv[]) { + assert(argc == 2); + + sycl::queue Q; + + int Failed = CommonLoadCheck(Q.get_context(), argv[1]); + +#if defined(SYCLBIN_INPUT_STATE) + auto KBInput = syclexp::get_kernel_bundle( + Q.get_context(), {Q.get_device()}, std::string{argv[1]}); + auto KBExe = sycl::build(KBInput); +#elif defined(SYCLBIN_OBJECT_STATE) + auto KBObj = syclexp::get_kernel_bundle( + Q.get_context(), {Q.get_device()}, std::string{argv[1]}); + auto KBExe = sycl::link(KBObj); +#else // defined(SYCLBIN_EXECUTABLE_STATE) + auto KBExe = syclexp::get_kernel_bundle( + Q.get_context(), {Q.get_device()}, std::string{argv[1]}); +#endif + + // TestKernel1 does not have any requirements, so should be there always. + assert(KBExe.ext_oneapi_has_kernel("TestKernel1")); + sycl::kernel TestKernel1 = KBExe.ext_oneapi_get_kernel("TestKernel1"); + + int *Ptr = sycl::malloc_shared(NUM, Q); + Q.fill(Ptr, int{0}, NUM).wait_and_throw(); + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(Ptr, int{NUM}); + CGH.single_task(TestKernel1); + }).wait_and_throw(); + + for (int I = 0; I < NUM; I++) { + if (Ptr[I] != I) { + std::cout << "Result: " << Ptr[I] << " expected " << I << "\n"; + ++Failed; + } + } + + // TestKernel2 should only be there if the device supports fp64. + if (Q.get_device().has(sycl::aspect::fp64)) { + assert(KBExe.ext_oneapi_has_kernel("TestKernel2")); + sycl::kernel TestKernel2 = KBExe.ext_oneapi_get_kernel("TestKernel2"); + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(Ptr, int{NUM}); + CGH.single_task(TestKernel2); + }).wait_and_throw(); + + for (int I = 0; I < NUM; I++) { + if (Ptr[I] != static_cast(static_cast(I) / 2.0)) { + std::cout << "Result: " << Ptr[I] << " expected " << I << "\n"; + ++Failed; + } + } + } else { + assert(!KBExe.ext_oneapi_has_kernel("TestKernel2")); + } + + sycl::free(Ptr, Q); + return Failed; +} diff --git a/sycl/test-e2e/SYCLBIN/basic_executable.cpp b/sycl/test-e2e/SYCLBIN/basic_executable.cpp new file mode 100644 index 0000000000000..26cca77151250 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/basic_executable.cpp @@ -0,0 +1,20 @@ +//==--------- basic_executable.cpp --- SYCLBIN extension tests -------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_device_allocations + +// -- Basic test for compiling and loading a SYCLBIN kernel_bundle in executable +// -- state. + +// RUN: %clangxx --offload-new-driver -fsyclbin=executable %S/Inputs/basic_kernel.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_EXECUTABLE_STATE + +#include "Inputs/basic.hpp" diff --git a/sycl/test-e2e/SYCLBIN/basic_input.cpp b/sycl/test-e2e/SYCLBIN/basic_input.cpp new file mode 100644 index 0000000000000..4f6a57134ffd4 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/basic_input.cpp @@ -0,0 +1,20 @@ +//==--------- basic_input.cpp --- SYCLBIN extension tests ------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_device_allocations + +// -- Basic test for compiling and loading a SYCLBIN kernel_bundle in input +// -- state. + +// RUN: %clangxx --offload-new-driver -fsyclbin=input %S/Inputs/basic_kernel.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_INPUT_STATE + +#include "Inputs/basic.hpp" diff --git a/sycl/test-e2e/SYCLBIN/basic_object.cpp b/sycl/test-e2e/SYCLBIN/basic_object.cpp new file mode 100644 index 0000000000000..eaf68656ff8fa --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/basic_object.cpp @@ -0,0 +1,20 @@ +//==--------- basic_object.cpp --- SYCLBIN extension tests -----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_device_allocations + +// -- Basic test for compiling and loading a SYCLBIN kernel_bundle in object +// -- state. + +// RUN: %clangxx --offload-new-driver -fsyclbin=object %S/Inputs/basic_kernel.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_OBJECT_STATE + +#include "Inputs/basic.hpp" diff --git a/sycl/test-e2e/SYCLBIN/link_input.cpp b/sycl/test-e2e/SYCLBIN/link_input.cpp new file mode 100644 index 0000000000000..f002b9163fbec --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/link_input.cpp @@ -0,0 +1,24 @@ +//==-------------- link_input.cpp --- SYCLBIN extension tests --------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: (opencl || level_zero) +// REQUIRES: aspect-usm_shared_allocations + +// UNSUPPORTED: accelerator +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. + +// -- Test for linking two SYCLBIN kernel_bundle. + +// RUN: %clangxx --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.export.syclbin +// RUN: %clangxx --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies %S/Inputs/importing_kernel.cpp -o %t.import.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.export.syclbin %t.import.syclbin + +#define SYCLBIN_INPUT_STATE + +#include "Inputs/link.hpp" diff --git a/sycl/test-e2e/SYCLBIN/link_object.cpp b/sycl/test-e2e/SYCLBIN/link_object.cpp new file mode 100644 index 0000000000000..70f99eafcf230 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/link_object.cpp @@ -0,0 +1,24 @@ +//==-------------- link_input.cpp --- SYCLBIN extension tests --------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: (opencl || level_zero) +// REQUIRES: aspect-usm_shared_allocations + +// UNSUPPORTED: accelerator +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. + +// -- Test for linking two SYCLBIN kernel_bundle. + +// RUN: %clangxx --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.export.syclbin +// RUN: %clangxx --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/importing_kernel.cpp -o %t.import.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.export.syclbin %t.import.syclbin + +#define SYCLBIN_OBJECT_STATE + +#include "Inputs/link.hpp" diff --git a/sycl/test-e2e/SYCLBIN/link_rtc_input.cpp b/sycl/test-e2e/SYCLBIN/link_rtc_input.cpp new file mode 100644 index 0000000000000..dcf84def5bcf5 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/link_rtc_input.cpp @@ -0,0 +1,24 @@ +//==------------ link_rtc_input.cpp --- SYCLBIN extension tests ------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: (opencl || level_zero) +// REQUIRES: aspect-usm_shared_allocations + +// UNSUPPORTED: accelerator +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. + +// -- Test for linking where one kernel is runtime-compiled and one is compiled +// -- to SYCLBIN. + +// RUN: %clangxx --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_INPUT_STATE + +#include "Inputs/link_rtc.hpp" diff --git a/sycl/test-e2e/SYCLBIN/link_rtc_object.cpp b/sycl/test-e2e/SYCLBIN/link_rtc_object.cpp new file mode 100644 index 0000000000000..3b0a073f4537f --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/link_rtc_object.cpp @@ -0,0 +1,24 @@ +//==------------ link_rtc_object.cpp --- SYCLBIN extension tests -----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: (opencl || level_zero) +// REQUIRES: aspect-usm_shared_allocations + +// UNSUPPORTED: accelerator +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. + +// -- Test for linking where one kernel is runtime-compiled and one is compiled +// -- to SYCLBIN. + +// RUN: %clangxx --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_OBJECT_STATE + +#include "Inputs/link_rtc.hpp" diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp new file mode 100644 index 0000000000000..701bb8799a163 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp @@ -0,0 +1,20 @@ +//==- optional_kernel_features_executable.cpp --- SYCLBIN extension tests --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_device_allocations + +// -- Test for compiling and loading a kernel bundle with a SYCLBIN containing +// the use of optional kernel features. + +// RUN: %clangxx --offload-new-driver -fsyclbin=executable %S/Inputs/optional_kernel_features.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_EXECUTABLE_STATE + +#include "Inputs/optional_kernel_features.hpp" diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp new file mode 100644 index 0000000000000..1ce4a9bdfe89a --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp @@ -0,0 +1,20 @@ +//==--- optional_kernel_features_input.cpp --- SYCLBIN extension tests -----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_device_allocations + +// -- Test for compiling and loading a kernel bundle with a SYCLBIN containing +// the use of optional kernel features. + +// RUN: %clangxx --offload-new-driver -fsyclbin=input %S/Inputs/optional_kernel_features.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_INPUT_STATE + +#include "Inputs/optional_kernel_features.hpp" diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp new file mode 100644 index 0000000000000..0adf24860a246 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp @@ -0,0 +1,20 @@ +//==--- optional_kernel_features_object.cpp --- SYCLBIN extension tests -----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_device_allocations + +// -- Test for compiling and loading a kernel bundle with a SYCLBIN containing +// the use of optional kernel features. + +// RUN: %clangxx --offload-new-driver -fsyclbin=object %S/Inputs/optional_kernel_features.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_OBJECT_STATE + +#include "Inputs/optional_kernel_features.hpp" diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 3e3461f7503a4..a5c42abaf5f82 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3322,6 +3322,7 @@ _ZN4sycl3_V16detail21LocalAccessorBaseHostC2ENS0_5rangeILi3EEEiiRKNS0_13property _ZN4sycl3_V16detail22addHostAccessorAndWaitEPNS1_16AccessorImplHostE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateERKSt8functionIFbRKSt10shared_ptrINS1_17device_image_implEEEE +_ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EERKNS0_4spanIcLm18446744073709551615EEENS0_12bundle_stateE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EERKS5_INS0_9kernel_idESaISB_EENS0_12bundle_stateE _ZN4sycl3_V16detail22has_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateE _ZN4sycl3_V16detail22has_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EERKS5_INS0_9kernel_idESaISB_EENS0_12bundle_stateE diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp index c913f0d6bcaa6..de2b939756ea0 100644 --- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp +++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp @@ -6,7 +6,7 @@ // CHECK-DAG: README.md // CHECK-DAG: lit.cfg.py // -// CHECK-NUM-MATCHES: 20 +// CHECK-NUM-MATCHES: 25 // // This test verifies that `` isn't used in E2E tests. Instead, // fine-grained includes should used, see From ec21a1041d81c0445f680eabc19f456f4c5a8877 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 12 Jun 2025 02:27:19 -0700 Subject: [PATCH 02/25] Fix formatting Signed-off-by: Larsen, Steffen --- sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp index 0adf24860a246..cb43d0734c1e6 100644 --- a/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp @@ -1,4 +1,5 @@ -//==--- optional_kernel_features_object.cpp --- SYCLBIN extension tests -----==// +//==--- optional_kernel_features_object.cpp --- SYCLBIN extension tests +//-----==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. From 7061d2c771cec76226a0438c476afaeb50f13532 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 12 Jun 2025 04:23:18 -0700 Subject: [PATCH 03/25] Address formatting and warnings Signed-off-by: Larsen, Steffen --- sycl/source/detail/property_set_io.hpp | 6 +++--- sycl/test-e2e/SYCLBIN/Inputs/lit.cfg.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/property_set_io.hpp b/sycl/source/detail/property_set_io.hpp index 3d9de09f96060..b684e33dd7df2 100644 --- a/sycl/source/detail/property_set_io.hpp +++ b/sycl/source/detail/property_set_io.hpp @@ -238,7 +238,7 @@ class PropertySetRegistry { // special case when there is no property data, i.e. the resulting property // set registry should be empty if (Src.size() == 0) - return std::move(Res); + return Res; size_t CurrentStart = 0; while (CurrentStart < Src.size()) { @@ -333,12 +333,12 @@ class PropertySetRegistry { default: throw sycl::exception(make_error_code(errc::invalid), "Unsupported property type: " + - std::string{Ttag}); + std::to_string(Tint)); } (*CurPropSet)[std::string{Parts.first}] = std::move(Prop); } - return std::move(Res); + return Res; } MapTy::const_iterator begin() const { return PropSetMap.begin(); } diff --git a/sycl/test-e2e/SYCLBIN/Inputs/lit.cfg.py b/sycl/test-e2e/SYCLBIN/Inputs/lit.cfg.py index 2dc34b8abf22b..bda30234024d4 100644 --- a/sycl/test-e2e/SYCLBIN/Inputs/lit.cfg.py +++ b/sycl/test-e2e/SYCLBIN/Inputs/lit.cfg.py @@ -1,2 +1,3 @@ import lit + config.suffixes = [] # Skip all files in this folder. From 9bb13aa5ed7f33e4d7b97decd3b2140fcb5c3c21 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 12 Jun 2025 05:21:35 -0700 Subject: [PATCH 04/25] Even more pedantic formatting and errors Signed-off-by: Larsen, Steffen --- sycl/source/detail/base64.hpp | 2 +- sycl/test-e2e/SYCLBIN/Inputs/lit.cfg.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/base64.hpp b/sycl/source/detail/base64.hpp index af729b27afaf6..e56750725b8bf 100644 --- a/sycl/source/detail/base64.hpp +++ b/sycl/source/detail/base64.hpp @@ -112,7 +112,7 @@ class Base64 { size_t DstSize = getDecodedSize(SrcSize); std::unique_ptr Dst(new byte[DstSize]); decode(Src, Dst.get(), SrcSize); - return std::move(Dst); + return Dst; } }; diff --git a/sycl/test-e2e/SYCLBIN/Inputs/lit.cfg.py b/sycl/test-e2e/SYCLBIN/Inputs/lit.cfg.py index bda30234024d4..41588acc03e40 100644 --- a/sycl/test-e2e/SYCLBIN/Inputs/lit.cfg.py +++ b/sycl/test-e2e/SYCLBIN/Inputs/lit.cfg.py @@ -1,3 +1,3 @@ import lit -config.suffixes = [] # Skip all files in this folder. +config.suffixes = [] # Skip all files in this folder. From 258ecee8c6bbc24bde9ed2324e2db525c0986f18 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 12 Jun 2025 06:50:54 -0700 Subject: [PATCH 05/25] Rebase and fix warning Signed-off-by: Larsen, Steffen --- llvm/lib/Object/SYCLBIN.cpp | 2 +- sycl/source/detail/program_manager/program_manager.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Object/SYCLBIN.cpp b/llvm/lib/Object/SYCLBIN.cpp index ccbd96eaa9f8d..1b537ae84deb9 100644 --- a/llvm/lib/Object/SYCLBIN.cpp +++ b/llvm/lib/Object/SYCLBIN.cpp @@ -445,5 +445,5 @@ Expected> SYCLBIN::read(MemoryBufferRef Source) { } } - return std::move(Result); + return Result; } diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 51ed048e3c0c2..3bd0d50a9302a 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -3251,7 +3251,7 @@ ProgramManager::build(const DevImgPlainWithDeps &DevImgWithDeps, if (DevImgImpl->getOriginMask() & ImageOriginSYCLBIN) { // SYCLBIN binaries should gather their kernels from the backend. std::vector GatheredKernelNames = - getKernelNamesFromURProgram(ContextImpl->getAdapter(), ResProgram); + getKernelNamesFromURProgram(ContextImpl.getAdapter(), ResProgram); MergedKernelNames.insert(GatheredKernelNames.begin(), GatheredKernelNames.end()); } From 47003081197ecfefe7490132cbff414adf77ae1f Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 12 Jun 2025 06:59:52 -0700 Subject: [PATCH 06/25] Fix the right file Signed-off-by: Larsen, Steffen --- sycl/source/detail/syclbin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/syclbin.cpp b/sycl/source/detail/syclbin.cpp index bcac8c885e363..52c47ab24b4ba 100644 --- a/sycl/source/detail/syclbin.cpp +++ b/sycl/source/detail/syclbin.cpp @@ -21,7 +21,7 @@ namespace { std::unique_ptr ContentCopy(const char *Data, size_t Size) { std::unique_ptr Result{new char[Size]}; std::memcpy(Result.get(), Data, Size); - return std::move(Result); + return Result; } // Offload binary header and entry. From 2130901f16ee39bad24ef489dccb8a8ee8e11d66 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 12 Jun 2025 07:40:44 -0700 Subject: [PATCH 07/25] Fix windows build failure Signed-off-by: Larsen, Steffen --- .../sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp b/sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp index eb1bbf099d96d..4d5f67f8cb755 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp @@ -56,7 +56,7 @@ get_kernel_bundle(const context &Ctxt, const std::vector &Devs, if (!FileStream.is_open()) throw sycl::exception(make_error_code(errc::invalid), "Failed to open SYCLBIN file: " + - static_cast(Filename)); + Filename.string()); RawSYCLBINData = std::vector{std::istreambuf_iterator(FileStream), std::istreambuf_iterator()}; From c49ddf4a2080d9284bf7225a57ca22993a0b6e64 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 12 Jun 2025 07:44:29 -0700 Subject: [PATCH 08/25] Revert tooling changes Signed-off-by: Larsen, Steffen --- llvm/lib/Object/SYCLBIN.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Object/SYCLBIN.cpp b/llvm/lib/Object/SYCLBIN.cpp index 1b537ae84deb9..ccbd96eaa9f8d 100644 --- a/llvm/lib/Object/SYCLBIN.cpp +++ b/llvm/lib/Object/SYCLBIN.cpp @@ -445,5 +445,5 @@ Expected> SYCLBIN::read(MemoryBufferRef Source) { } } - return Result; + return std::move(Result); } From af8e38e80d0d62be6926f9a885e7f9aa72b8562c Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 12 Jun 2025 08:29:48 -0700 Subject: [PATCH 09/25] Exclude CUDA and HIP for now Signed-off-by: Larsen, Steffen --- sycl/test-e2e/SYCLBIN/basic_executable.cpp | 5 +++++ sycl/test-e2e/SYCLBIN/basic_input.cpp | 5 +++++ sycl/test-e2e/SYCLBIN/basic_object.cpp | 5 +++++ .../test-e2e/SYCLBIN/optional_kernel_features_executable.cpp | 5 +++++ sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp | 5 +++++ sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp | 5 +++++ 6 files changed, 30 insertions(+) diff --git a/sycl/test-e2e/SYCLBIN/basic_executable.cpp b/sycl/test-e2e/SYCLBIN/basic_executable.cpp index 26cca77151250..cfdb3ebf33039 100644 --- a/sycl/test-e2e/SYCLBIN/basic_executable.cpp +++ b/sycl/test-e2e/SYCLBIN/basic_executable.cpp @@ -8,6 +8,11 @@ // REQUIRES: aspect-usm_device_allocations +// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel +// names cannot be resolved for now. +// XFAIL: cuda || hip +// XFAIL-TRACKER: CMPLRLLVM-68469 + // -- Basic test for compiling and loading a SYCLBIN kernel_bundle in executable // -- state. diff --git a/sycl/test-e2e/SYCLBIN/basic_input.cpp b/sycl/test-e2e/SYCLBIN/basic_input.cpp index 4f6a57134ffd4..892b4096b2f83 100644 --- a/sycl/test-e2e/SYCLBIN/basic_input.cpp +++ b/sycl/test-e2e/SYCLBIN/basic_input.cpp @@ -8,6 +8,11 @@ // REQUIRES: aspect-usm_device_allocations +// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel +// names cannot be resolved for now. +// XFAIL: cuda || hip +// XFAIL-TRACKER: CMPLRLLVM-68469 + // -- Basic test for compiling and loading a SYCLBIN kernel_bundle in input // -- state. diff --git a/sycl/test-e2e/SYCLBIN/basic_object.cpp b/sycl/test-e2e/SYCLBIN/basic_object.cpp index eaf68656ff8fa..91a04fc3eac55 100644 --- a/sycl/test-e2e/SYCLBIN/basic_object.cpp +++ b/sycl/test-e2e/SYCLBIN/basic_object.cpp @@ -8,6 +8,11 @@ // REQUIRES: aspect-usm_device_allocations +// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel +// names cannot be resolved for now. +// XFAIL: cuda || hip +// XFAIL-TRACKER: CMPLRLLVM-68469 + // -- Basic test for compiling and loading a SYCLBIN kernel_bundle in object // -- state. diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp index 701bb8799a163..b36d5957b36c9 100644 --- a/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp @@ -8,6 +8,11 @@ // REQUIRES: aspect-usm_device_allocations +// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel +// names cannot be resolved for now. +// XFAIL: cuda || hip +// XFAIL-TRACKER: CMPLRLLVM-68469 + // -- Test for compiling and loading a kernel bundle with a SYCLBIN containing // the use of optional kernel features. diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp index 1ce4a9bdfe89a..d0df88a94bbe9 100644 --- a/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp @@ -8,6 +8,11 @@ // REQUIRES: aspect-usm_device_allocations +// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel +// names cannot be resolved for now. +// XFAIL: cuda || hip +// XFAIL-TRACKER: CMPLRLLVM-68469 + // -- Test for compiling and loading a kernel bundle with a SYCLBIN containing // the use of optional kernel features. diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp index cb43d0734c1e6..43ae03b254c42 100644 --- a/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp @@ -9,6 +9,11 @@ // REQUIRES: aspect-usm_device_allocations +// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel +// names cannot be resolved for now. +// XFAIL: cuda || hip +// XFAIL-TRACKER: CMPLRLLVM-68469 + // -- Test for compiling and loading a kernel bundle with a SYCLBIN containing // the use of optional kernel features. From 9b632ba059a42f90f31d770418e66814d18bce7f Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 12 Jun 2025 23:37:08 -0700 Subject: [PATCH 10/25] Avoid charconv for RHEL builds Signed-off-by: Larsen, Steffen --- sycl/source/detail/property_set_io.hpp | 45 +++++++++++++++----------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/sycl/source/detail/property_set_io.hpp b/sycl/source/detail/property_set_io.hpp index b684e33dd7df2..860c1ef27f50b 100644 --- a/sycl/source/detail/property_set_io.hpp +++ b/sycl/source/detail/property_set_io.hpp @@ -14,13 +14,36 @@ #include "detail/base64.hpp" #include "sycl/exception.hpp" -#include #include namespace sycl { inline namespace _V1 { namespace detail { +// Helper function for converting a string_view to an integer. Allows only +// integer values and the empty string (interpreted as 0). +template +static IntT stringViewToInt(const std::string_view &SV) { + static_assert(std::is_integral_v); + + IntT Result = static_cast(0); + if (SV.empty()) + return Result; + + bool Negate = std::is_signed_v && SV[0] == '-'; + + for (size_t I = static_cast(Negate); I < SV.size(); ++I) { + const char CurrentC = SV[I]; + if (CurrentC < '0' || CurrentC > '9') + throw sycl::exception(make_error_code(errc::invalid), + "Invalid integer numeral: " + + std::string{CurrentC}); + Result *= static_cast(10); + Result += static_cast(CurrentC - '0'); + } + return Negate ? -Result : Result; +} + // Represents a property value. PropertyValue name is stored in the encompassing // container. class PropertyValue { @@ -297,15 +320,7 @@ class PropertySetRegistry { std::string{Parts.second}); // parse type - int Tint; - auto TintConvRes = - std::from_chars(TypeVal.first.data(), - TypeVal.first.data() + TypeVal.first.size(), Tint); - if (TintConvRes.ec == std::errc::invalid_argument || - TintConvRes.ec == std::errc::result_out_of_range) - throw sycl::exception(make_error_code(errc::invalid), - "Could not convert type to integer: " + - std::string{TypeVal.first}); + int Tint = stringViewToInt(TypeVal.first); PropertyValue::Type Ttag = PropertyValue::getTypeTag(Tint); std::string_view Val = TypeVal.second; @@ -314,15 +329,7 @@ class PropertySetRegistry { // parse value depending on its type switch (Ttag) { case PropertyValue::Type::UINT32: { - uint32_t ValV; - auto ValVConvRes = - std::from_chars(Val.data(), Val.data() + Val.size(), ValV); - if (ValVConvRes.ec == std::errc::invalid_argument || - ValVConvRes.ec == std::errc::result_out_of_range) - throw sycl::exception(make_error_code(errc::invalid), - "Could not convert value to integer: " + - std::string{Val}); - Prop.set(ValV); + Prop.set(stringViewToInt(Val)); break; } case PropertyValue::Type::BYTE_ARRAY: { From fcebf1f8b495f5a695d03a8f1b99d4258b9437cf Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Fri, 13 Jun 2025 02:12:04 -0700 Subject: [PATCH 11/25] Add windows symbol Signed-off-by: Larsen, Steffen --- sycl/test/abi/sycl_symbols_windows.dump | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index f675fd68af594..db941c5343b6f 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -4167,6 +4167,7 @@ ?get_kernel@kernel_bundle_plain@detail@_V1@sycl@@IEBA?AVkernel@34@AEBVkernel_id@34@@Z ?get_kernel_bundle@kernel@_V1@sycl@@QEBA?AV?$kernel_bundle@$01@23@XZ ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@5@W4bundle_state@23@@Z +?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBV?$span@D$0?0@23@W4bundle_state@23@@Z ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@W4bundle_state@23@@Z ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@W4bundle_state@23@AEBV?$function@$$A6A_NAEBV?$shared_ptr@Vdevice_image_impl@detail@_V1@sycl@@@std@@@Z@5@@Z ?get_kernel_id_impl@detail@_V1@sycl@@YA?AVkernel_id@23@Vstring_view@123@@Z From af1040c122497b82827a848e21ee31bb3104d970 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Fri, 13 Jun 2025 02:30:11 -0700 Subject: [PATCH 12/25] Avoid warning on RHEL Signed-off-by: Larsen, Steffen --- sycl/source/detail/base64.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/base64.hpp b/sycl/source/detail/base64.hpp index e56750725b8bf..301cb7ee37716 100644 --- a/sycl/source/detail/base64.hpp +++ b/sycl/source/detail/base64.hpp @@ -70,7 +70,7 @@ class Base64 { // decode full quads for (size_t Qch = 0; Qch < SrcSize / 4; ++Qch, SrcOff += 4, DstOff += 3) { - byte Ch[4]; + byte Ch[4] = {0, 0, 0, 0}; decode4(Src + SrcOff, Ch); // each quad of chars produces three bytes of output From c761fe84e646b0f14f89df5ddb73e47cab4196d1 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Fri, 13 Jun 2025 03:14:56 -0700 Subject: [PATCH 13/25] Enable link tests for all targets Signed-off-by: Larsen, Steffen --- sycl/test-e2e/SYCLBIN/link_input.cpp | 7 ++++--- sycl/test-e2e/SYCLBIN/link_object.cpp | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/sycl/test-e2e/SYCLBIN/link_input.cpp b/sycl/test-e2e/SYCLBIN/link_input.cpp index f002b9163fbec..928d05e08c69b 100644 --- a/sycl/test-e2e/SYCLBIN/link_input.cpp +++ b/sycl/test-e2e/SYCLBIN/link_input.cpp @@ -6,11 +6,12 @@ // //===----------------------------------------------------------------------===// -// REQUIRES: (opencl || level_zero) // REQUIRES: aspect-usm_shared_allocations -// UNSUPPORTED: accelerator -// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. +// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel +// names cannot be resolved for now. +// XFAIL: cuda || hip +// XFAIL-TRACKER: CMPLRLLVM-68469 // -- Test for linking two SYCLBIN kernel_bundle. diff --git a/sycl/test-e2e/SYCLBIN/link_object.cpp b/sycl/test-e2e/SYCLBIN/link_object.cpp index 70f99eafcf230..db726bf62e104 100644 --- a/sycl/test-e2e/SYCLBIN/link_object.cpp +++ b/sycl/test-e2e/SYCLBIN/link_object.cpp @@ -6,11 +6,12 @@ // //===----------------------------------------------------------------------===// -// REQUIRES: (opencl || level_zero) // REQUIRES: aspect-usm_shared_allocations -// UNSUPPORTED: accelerator -// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. +// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel +// names cannot be resolved for now. +// XFAIL: cuda || hip +// XFAIL-TRACKER: CMPLRLLVM-68469 // -- Test for linking two SYCLBIN kernel_bundle. From a927f73d8ffe5b7f0f2bc33cacf279d2aa8ce34c Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Fri, 13 Jun 2025 03:30:22 -0700 Subject: [PATCH 14/25] Switch kernel prefix to string_view Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_image_impl.hpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index b223112cc167f..50dc29f02a4c3 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -597,11 +597,9 @@ class device_image_impl { std::string adjustKernelName(std::string_view Name) const { if (MOrigins & ImageOriginSYCLBIN) { - constexpr const char KernelPrefix[] = "__sycl_kernel_"; - constexpr size_t KernelPrefixLen = - sizeof(KernelPrefix) / sizeof(char) - 1; - if (Name.size() > KernelPrefixLen && - Name.substr(0, KernelPrefixLen) == std::string_view{KernelPrefix}) + constexpr std::string_view KernelPrefix = "__sycl_kernel_"; + if (Name.size() > KernelPrefix.size() && + Name.substr(0, KernelPrefix.size()) == KernelPrefix) return Name.data(); return std::string{KernelPrefix} + Name.data(); } From c3a7a096187a58be64d38e1a13808195b9cea02f Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Fri, 13 Jun 2025 03:33:28 -0700 Subject: [PATCH 15/25] Avoid using filesystem on systems that don't support them Signed-off-by: Larsen, Steffen --- .../sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp b/sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp index 4d5f67f8cb755..f258156d7bb99 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp @@ -11,10 +11,13 @@ #include #include -#include #include #include +#if __has_include() +#include +#endif + #if __has_include() #include #endif @@ -45,6 +48,7 @@ get_kernel_bundle(const context &Ctxt, const std::vector &Devs, } #endif +#if __cpp_lib_filesystem template std::enable_if_t> get_kernel_bundle(const context &Ctxt, const std::vector &Devs, @@ -72,6 +76,7 @@ get_kernel_bundle(const context &Ctxt, const std::filesystem::path &Filename, return experimental::get_kernel_bundle(Ctxt, Ctxt.get_devices(), Filename, Props); } +#endif } // namespace ext::oneapi::experimental } // namespace _V1 From f27d08b51413ed00fdb87173aba913144039ba61 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 16 Jun 2025 09:08:39 -0700 Subject: [PATCH 16/25] Gather kernel names from new property set Signed-off-by: Larsen, Steffen --- sycl/source/detail/compiler.hpp | 2 ++ sycl/source/detail/device_binary_image.cpp | 11 +++++-- sycl/source/detail/device_binary_image.hpp | 2 ++ sycl/source/detail/device_image_impl.hpp | 24 ++++++++++++++- .../program_manager/program_manager.cpp | 30 ------------------- .../program_manager/program_manager.hpp | 4 --- sycl/source/detail/syclbin.cpp | 5 ++-- sycl/test-e2e/SYCLBIN/basic_executable.cpp | 5 ---- sycl/test-e2e/SYCLBIN/basic_input.cpp | 5 ---- sycl/test-e2e/SYCLBIN/basic_object.cpp | 5 ---- sycl/test-e2e/SYCLBIN/link_input.cpp | 5 ---- sycl/test-e2e/SYCLBIN/link_object.cpp | 5 ---- .../optional_kernel_features_executable.cpp | 5 ---- .../optional_kernel_features_input.cpp | 5 ---- .../optional_kernel_features_object.cpp | 5 ---- 15 files changed, 39 insertions(+), 79 deletions(-) diff --git a/sycl/source/detail/compiler.hpp b/sycl/source/detail/compiler.hpp index 056895258e40b..90841680ac000 100644 --- a/sycl/source/detail/compiler.hpp +++ b/sycl/source/detail/compiler.hpp @@ -57,6 +57,8 @@ #define __SYCL_PROPERTY_SET_SYCL_MISC_PROP "SYCL/misc properties" /// PropertySetRegistry::SYCL_ASSERT_USED defined in PropertySetIO.h #define __SYCL_PROPERTY_SET_SYCL_ASSERT_USED "SYCL/assert used" +/// PropertySetRegistry::SYCL_KERNEL_NAMES defined in PropertySetIO.h +#define __SYCL_PROPERTY_SET_SYCL_KERNEL_NAMES "SYCL/kernel names" /// PropertySetRegistry::SYCL_EXPORTED_SYMBOLS defined in PropertySetIO.h #define __SYCL_PROPERTY_SET_SYCL_EXPORTED_SYMBOLS "SYCL/exported symbols" /// PropertySetRegistry::SYCL_IMPORTED_SYMBOLS defined in PropertySetIO.h diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 192d6054d53be..1fea167645372 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -199,6 +199,7 @@ RTDeviceBinaryImage::RTDeviceBinaryImage(sycl_device_binary Bin) { ProgramMetadataUR.push_back( ur::mapDeviceBinaryPropertyToProgramMetadata(Prop)); } + KernelNames.init(Bin, __SYCL_PROPERTY_SET_SYCL_KERNEL_NAMES); ExportedSymbols.init(Bin, __SYCL_PROPERTY_SET_SYCL_EXPORTED_SYMBOLS); ImportedSymbols.init(Bin, __SYCL_PROPERTY_SET_SYCL_IMPORTED_SYMBOLS); DeviceGlobals.init(Bin, __SYCL_PROPERTY_SET_SYCL_DEVICE_GLOBALS); @@ -513,6 +514,10 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getImplicitLocalArg(); }); + auto MergedKernelNames = + naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getKernelNames(); + }); auto MergedExportedSymbols = naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getExportedSymbols(); @@ -522,12 +527,13 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( return Img.getRegisteredKernels(); }); - std::array *, 10> MergedVecs{ + std::array *, 11> MergedVecs{ &MergedSpecConstants, &MergedSpecConstantsDefaultValues, &MergedKernelParamOptInfo, &MergedAssertUsed, &MergedDeviceGlobals, &MergedHostPipes, &MergedVirtualFunctions, &MergedImplicitLocalArg, - &MergedExportedSymbols, &MergedRegisteredKernels}; + &MergedKernelNames, &MergedExportedSymbols, + &MergedRegisteredKernels}; // Exclusive merges. auto MergedDeviceLibReqMask = @@ -651,6 +657,7 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( CopyPropertiesVec(MergedHostPipes, HostPipes); CopyPropertiesVec(MergedVirtualFunctions, VirtualFunctions); CopyPropertiesVec(MergedImplicitLocalArg, ImplicitLocalArg); + CopyPropertiesVec(MergedKernelNames, KernelNames); CopyPropertiesVec(MergedExportedSymbols, ExportedSymbols); CopyPropertiesVec(MergedRegisteredKernels, RegisteredKernels); diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 6a0103be7b873..e34a474ad1ab5 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -227,6 +227,7 @@ class RTDeviceBinaryImage { const std::vector &getProgramMetadataUR() const { return ProgramMetadataUR; } + const PropertyRange &getKernelNames() const { return KernelNames; } const PropertyRange &getExportedSymbols() const { return ExportedSymbols; } const PropertyRange &getImportedSymbols() const { return ImportedSymbols; } const PropertyRange &getDeviceGlobals() const { return DeviceGlobals; } @@ -259,6 +260,7 @@ class RTDeviceBinaryImage { RTDeviceBinaryImage::PropertyRange KernelParamOptInfo; RTDeviceBinaryImage::PropertyRange AssertUsed; RTDeviceBinaryImage::PropertyRange ProgramMetadata; + RTDeviceBinaryImage::PropertyRange KernelNames; RTDeviceBinaryImage::PropertyRange ExportedSymbols; RTDeviceBinaryImage::PropertyRange ImportedSymbols; RTDeviceBinaryImage::PropertyRange DeviceGlobals; diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index c8d25b2c6ee4e..74d15c00d9dbe 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -256,6 +256,12 @@ class device_image_impl { MKernelIDs(std::move(KernelIDs)), MSpecConstsDefValBlob(getSpecConstsDefValBlob()), MOrigins(Origins) { updateSpecConstSymMap(); + // SYCLBIN files have the kernel names embedded in the binaries, so we + // collect them. + if (BinImage && (MOrigins & ImageOriginSYCLBIN)) + for (const sycl_device_binary_property &KNProp : + BinImage->getKernelNames()) + MKernelNames.insert(KNProp->Name); } device_image_impl( @@ -771,7 +777,7 @@ class device_image_impl { nullptr); std::vector KernelNames = - ProgramManager::getKernelNamesFromURProgram(Adapter, UrProgram); + getKernelNamesFromURProgram(Adapter, UrProgram); KernelNameSetT KernelNameSet{KernelNames.begin(), KernelNames.end()}; // If caching enabled and kernel not fetched from cache, cache. @@ -1252,6 +1258,22 @@ class device_image_impl { return UrProgram; } + static std::vector + getKernelNamesFromURProgram(const AdapterPtr &Adapter, + ur_program_handle_t UrProgram) { + // Get the kernel names. + size_t KernelNamesSize; + Adapter->call( + UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &KernelNamesSize); + + // semi-colon delimited list of kernel names. + std::string KernelNamesStr(KernelNamesSize, ' '); + Adapter->call( + UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, KernelNamesStr.size(), + &KernelNamesStr[0], nullptr); + return detail::split_string(KernelNamesStr, ';'); + } + const std::variant, const RTDeviceBinaryImage *, SYCLBINBinaries> MBinImage = static_cast(nullptr); diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 114276e613704..4e32e7d11eb7f 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2415,22 +2415,6 @@ ProgramManager::getBinImageState(const RTDeviceBinaryImage *BinImage) { : sycl::bundle_state::object; } -std::vector -ProgramManager::getKernelNamesFromURProgram(const AdapterPtr &Adapter, - ur_program_handle_t UrProgram) { - // Get the kernel names. - size_t KernelNamesSize; - Adapter->call( - UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &KernelNamesSize); - - // semi-colon delimited list of kernel names. - std::string KernelNamesStr(KernelNamesSize, ' '); - Adapter->call( - UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, KernelNamesStr.size(), - &KernelNamesStr[0], nullptr); - return detail::split_string(KernelNamesStr, ';'); -} - std::optional ProgramManager::tryGetSYCLKernelID(KernelNameStrRefT KernelName) { std::lock_guard KernelIDsGuard(m_KernelIDsMutex); @@ -3161,13 +3145,6 @@ ProgramManager::link(const std::vector &Imgs, RTCInfoPtrs.emplace_back(&(DevImgImpl->getRTCInfo())); MergedKernelNames.insert(DevImgImpl->getKernelNames().begin(), DevImgImpl->getKernelNames().end()); - if (DevImgImpl->getOriginMask() & ImageOriginSYCLBIN) { - // SYCLBIN binaries should gather their kernels from the backend. - std::vector GatheredKernelNames = - getKernelNamesFromURProgram(Adapter, LinkedProg); - MergedKernelNames.insert(GatheredKernelNames.begin(), - GatheredKernelNames.end()); - } } auto MergedRTCInfo = detail::KernelCompilerBinaryInfo::Merge(RTCInfoPtrs); @@ -3250,13 +3227,6 @@ ProgramManager::build(const DevImgPlainWithDeps &DevImgWithDeps, RTCInfoPtrs.emplace_back(&(DevImgImpl->getRTCInfo())); MergedKernelNames.insert(DevImgImpl->getKernelNames().begin(), DevImgImpl->getKernelNames().end()); - if (DevImgImpl->getOriginMask() & ImageOriginSYCLBIN) { - // SYCLBIN binaries should gather their kernels from the backend. - std::vector GatheredKernelNames = - getKernelNamesFromURProgram(ContextImpl.getAdapter(), ResProgram); - MergedKernelNames.insert(GatheredKernelNames.begin(), - GatheredKernelNames.end()); - } } auto MergedRTCInfo = detail::KernelCompilerBinaryInfo::Merge(RTCInfoPtrs); diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 8e89664a4b1a8..5bbe663d84f5f 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -389,10 +389,6 @@ class ProgramManager { static bundle_state getBinImageState(const RTDeviceBinaryImage *BinImage); - static std::vector - getKernelNamesFromURProgram(const AdapterPtr &Adapter, - ur_program_handle_t UrProgram); - private: ProgramManager(ProgramManager const &) = delete; ProgramManager &operator=(ProgramManager const &) = delete; diff --git a/sycl/source/detail/syclbin.cpp b/sycl/source/detail/syclbin.cpp index 52c47ab24b4ba..3cef5a2785a03 100644 --- a/sycl/source/detail/syclbin.cpp +++ b/sycl/source/detail/syclbin.cpp @@ -356,12 +356,13 @@ SYCLBINBinaries::convertAbstractModuleProperties(SYCLBIN::AbstractModule &AM) { std::vector SYCLBINBinaries::getBestCompatibleImages(const device &Dev) { + detail::device_impl &DevImpl = *getSyclObjImpl(Dev); auto SelectCompatibleImages = [&](const std::vector &Imgs) { std::vector CompatImgs; for (const RTDeviceBinaryImage &Img : Imgs) - if (doesDevSupportDeviceRequirements(Dev, Img) && - doesImageTargetMatchDevice(Img, getSyclObjImpl(Dev).get())) + if (doesDevSupportDeviceRequirements(DevImpl, Img) && + doesImageTargetMatchDevice(Img, DevImpl)) CompatImgs.push_back(&Img); return CompatImgs; }; diff --git a/sycl/test-e2e/SYCLBIN/basic_executable.cpp b/sycl/test-e2e/SYCLBIN/basic_executable.cpp index cfdb3ebf33039..26cca77151250 100644 --- a/sycl/test-e2e/SYCLBIN/basic_executable.cpp +++ b/sycl/test-e2e/SYCLBIN/basic_executable.cpp @@ -8,11 +8,6 @@ // REQUIRES: aspect-usm_device_allocations -// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel -// names cannot be resolved for now. -// XFAIL: cuda || hip -// XFAIL-TRACKER: CMPLRLLVM-68469 - // -- Basic test for compiling and loading a SYCLBIN kernel_bundle in executable // -- state. diff --git a/sycl/test-e2e/SYCLBIN/basic_input.cpp b/sycl/test-e2e/SYCLBIN/basic_input.cpp index 892b4096b2f83..4f6a57134ffd4 100644 --- a/sycl/test-e2e/SYCLBIN/basic_input.cpp +++ b/sycl/test-e2e/SYCLBIN/basic_input.cpp @@ -8,11 +8,6 @@ // REQUIRES: aspect-usm_device_allocations -// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel -// names cannot be resolved for now. -// XFAIL: cuda || hip -// XFAIL-TRACKER: CMPLRLLVM-68469 - // -- Basic test for compiling and loading a SYCLBIN kernel_bundle in input // -- state. diff --git a/sycl/test-e2e/SYCLBIN/basic_object.cpp b/sycl/test-e2e/SYCLBIN/basic_object.cpp index 91a04fc3eac55..eaf68656ff8fa 100644 --- a/sycl/test-e2e/SYCLBIN/basic_object.cpp +++ b/sycl/test-e2e/SYCLBIN/basic_object.cpp @@ -8,11 +8,6 @@ // REQUIRES: aspect-usm_device_allocations -// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel -// names cannot be resolved for now. -// XFAIL: cuda || hip -// XFAIL-TRACKER: CMPLRLLVM-68469 - // -- Basic test for compiling and loading a SYCLBIN kernel_bundle in object // -- state. diff --git a/sycl/test-e2e/SYCLBIN/link_input.cpp b/sycl/test-e2e/SYCLBIN/link_input.cpp index 928d05e08c69b..89166206cdc84 100644 --- a/sycl/test-e2e/SYCLBIN/link_input.cpp +++ b/sycl/test-e2e/SYCLBIN/link_input.cpp @@ -8,11 +8,6 @@ // REQUIRES: aspect-usm_shared_allocations -// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel -// names cannot be resolved for now. -// XFAIL: cuda || hip -// XFAIL-TRACKER: CMPLRLLVM-68469 - // -- Test for linking two SYCLBIN kernel_bundle. // RUN: %clangxx --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.export.syclbin diff --git a/sycl/test-e2e/SYCLBIN/link_object.cpp b/sycl/test-e2e/SYCLBIN/link_object.cpp index db726bf62e104..9a36a0cfe0300 100644 --- a/sycl/test-e2e/SYCLBIN/link_object.cpp +++ b/sycl/test-e2e/SYCLBIN/link_object.cpp @@ -8,11 +8,6 @@ // REQUIRES: aspect-usm_shared_allocations -// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel -// names cannot be resolved for now. -// XFAIL: cuda || hip -// XFAIL-TRACKER: CMPLRLLVM-68469 - // -- Test for linking two SYCLBIN kernel_bundle. // RUN: %clangxx --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.export.syclbin diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp index b36d5957b36c9..701bb8799a163 100644 --- a/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp @@ -8,11 +8,6 @@ // REQUIRES: aspect-usm_device_allocations -// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel -// names cannot be resolved for now. -// XFAIL: cuda || hip -// XFAIL-TRACKER: CMPLRLLVM-68469 - // -- Test for compiling and loading a kernel bundle with a SYCLBIN containing // the use of optional kernel features. diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp index d0df88a94bbe9..1ce4a9bdfe89a 100644 --- a/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp @@ -8,11 +8,6 @@ // REQUIRES: aspect-usm_device_allocations -// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel -// names cannot be resolved for now. -// XFAIL: cuda || hip -// XFAIL-TRACKER: CMPLRLLVM-68469 - // -- Test for compiling and loading a kernel bundle with a SYCLBIN containing // the use of optional kernel features. diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp index 43ae03b254c42..cb43d0734c1e6 100644 --- a/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp @@ -9,11 +9,6 @@ // REQUIRES: aspect-usm_device_allocations -// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel -// names cannot be resolved for now. -// XFAIL: cuda || hip -// XFAIL-TRACKER: CMPLRLLVM-68469 - // -- Test for compiling and loading a kernel bundle with a SYCLBIN containing // the use of optional kernel features. From 36f92042d088f9dfbf3e6f621ade5ccfd0f8d646 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 17 Jun 2025 00:18:49 -0700 Subject: [PATCH 17/25] Pass targets to tests Signed-off-by: Larsen, Steffen --- sycl/test-e2e/SYCLBIN/basic_executable.cpp | 2 +- sycl/test-e2e/SYCLBIN/basic_input.cpp | 2 +- sycl/test-e2e/SYCLBIN/basic_object.cpp | 2 +- sycl/test-e2e/SYCLBIN/link_input.cpp | 4 ++-- sycl/test-e2e/SYCLBIN/link_object.cpp | 4 ++-- sycl/test-e2e/SYCLBIN/link_rtc_input.cpp | 2 +- sycl/test-e2e/SYCLBIN/link_rtc_object.cpp | 2 +- sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp | 2 +- sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp | 2 +- sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp | 2 +- 10 files changed, 12 insertions(+), 12 deletions(-) diff --git a/sycl/test-e2e/SYCLBIN/basic_executable.cpp b/sycl/test-e2e/SYCLBIN/basic_executable.cpp index 26cca77151250..9b2390b00797b 100644 --- a/sycl/test-e2e/SYCLBIN/basic_executable.cpp +++ b/sycl/test-e2e/SYCLBIN/basic_executable.cpp @@ -11,7 +11,7 @@ // -- Basic test for compiling and loading a SYCLBIN kernel_bundle in executable // -- state. -// RUN: %clangxx --offload-new-driver -fsyclbin=executable %S/Inputs/basic_kernel.cpp -o %t.syclbin +// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=executable %S/Inputs/basic_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/basic_input.cpp b/sycl/test-e2e/SYCLBIN/basic_input.cpp index 4f6a57134ffd4..015c1cccddc75 100644 --- a/sycl/test-e2e/SYCLBIN/basic_input.cpp +++ b/sycl/test-e2e/SYCLBIN/basic_input.cpp @@ -11,7 +11,7 @@ // -- Basic test for compiling and loading a SYCLBIN kernel_bundle in input // -- state. -// RUN: %clangxx --offload-new-driver -fsyclbin=input %S/Inputs/basic_kernel.cpp -o %t.syclbin +// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=input %S/Inputs/basic_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/basic_object.cpp b/sycl/test-e2e/SYCLBIN/basic_object.cpp index eaf68656ff8fa..9f68ab439628c 100644 --- a/sycl/test-e2e/SYCLBIN/basic_object.cpp +++ b/sycl/test-e2e/SYCLBIN/basic_object.cpp @@ -11,7 +11,7 @@ // -- Basic test for compiling and loading a SYCLBIN kernel_bundle in object // -- state. -// RUN: %clangxx --offload-new-driver -fsyclbin=object %S/Inputs/basic_kernel.cpp -o %t.syclbin +// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=object %S/Inputs/basic_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/link_input.cpp b/sycl/test-e2e/SYCLBIN/link_input.cpp index 89166206cdc84..c7d4594fc84f6 100644 --- a/sycl/test-e2e/SYCLBIN/link_input.cpp +++ b/sycl/test-e2e/SYCLBIN/link_input.cpp @@ -10,8 +10,8 @@ // -- Test for linking two SYCLBIN kernel_bundle. -// RUN: %clangxx --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.export.syclbin -// RUN: %clangxx --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies %S/Inputs/importing_kernel.cpp -o %t.import.syclbin +// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.export.syclbin +// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies %S/Inputs/importing_kernel.cpp -o %t.import.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.export.syclbin %t.import.syclbin diff --git a/sycl/test-e2e/SYCLBIN/link_object.cpp b/sycl/test-e2e/SYCLBIN/link_object.cpp index 9a36a0cfe0300..ca3c36685f656 100644 --- a/sycl/test-e2e/SYCLBIN/link_object.cpp +++ b/sycl/test-e2e/SYCLBIN/link_object.cpp @@ -10,8 +10,8 @@ // -- Test for linking two SYCLBIN kernel_bundle. -// RUN: %clangxx --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.export.syclbin -// RUN: %clangxx --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/importing_kernel.cpp -o %t.import.syclbin +// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.export.syclbin +// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/importing_kernel.cpp -o %t.import.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.export.syclbin %t.import.syclbin diff --git a/sycl/test-e2e/SYCLBIN/link_rtc_input.cpp b/sycl/test-e2e/SYCLBIN/link_rtc_input.cpp index dcf84def5bcf5..086ea2cc4528c 100644 --- a/sycl/test-e2e/SYCLBIN/link_rtc_input.cpp +++ b/sycl/test-e2e/SYCLBIN/link_rtc_input.cpp @@ -15,7 +15,7 @@ // -- Test for linking where one kernel is runtime-compiled and one is compiled // -- to SYCLBIN. -// RUN: %clangxx --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.syclbin +// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/link_rtc_object.cpp b/sycl/test-e2e/SYCLBIN/link_rtc_object.cpp index 3b0a073f4537f..6b9dc29a77694 100644 --- a/sycl/test-e2e/SYCLBIN/link_rtc_object.cpp +++ b/sycl/test-e2e/SYCLBIN/link_rtc_object.cpp @@ -15,7 +15,7 @@ // -- Test for linking where one kernel is runtime-compiled and one is compiled // -- to SYCLBIN. -// RUN: %clangxx --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.syclbin +// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp index 701bb8799a163..d8034e2384f15 100644 --- a/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp @@ -11,7 +11,7 @@ // -- Test for compiling and loading a kernel bundle with a SYCLBIN containing // the use of optional kernel features. -// RUN: %clangxx --offload-new-driver -fsyclbin=executable %S/Inputs/optional_kernel_features.cpp -o %t.syclbin +// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=executable %S/Inputs/optional_kernel_features.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp index 1ce4a9bdfe89a..b0404adf1fdc2 100644 --- a/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp @@ -11,7 +11,7 @@ // -- Test for compiling and loading a kernel bundle with a SYCLBIN containing // the use of optional kernel features. -// RUN: %clangxx --offload-new-driver -fsyclbin=input %S/Inputs/optional_kernel_features.cpp -o %t.syclbin +// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=input %S/Inputs/optional_kernel_features.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp index cb43d0734c1e6..c30bc1623c31c 100644 --- a/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp @@ -12,7 +12,7 @@ // -- Test for compiling and loading a kernel bundle with a SYCLBIN containing // the use of optional kernel features. -// RUN: %clangxx --offload-new-driver -fsyclbin=object %S/Inputs/optional_kernel_features.cpp -o %t.syclbin +// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=object %S/Inputs/optional_kernel_features.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin From 5a2f313df5fb617ef29469756bbddef2a7e9fabe Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 17 Jun 2025 06:54:06 -0700 Subject: [PATCH 18/25] Disable on CUDA due to known regression Signed-off-by: Larsen, Steffen --- sycl/test-e2e/SYCLBIN/basic_executable.cpp | 4 ++++ sycl/test-e2e/SYCLBIN/basic_input.cpp | 4 ++++ sycl/test-e2e/SYCLBIN/basic_object.cpp | 4 ++++ sycl/test-e2e/SYCLBIN/link_input.cpp | 4 ++++ sycl/test-e2e/SYCLBIN/link_object.cpp | 4 ++++ sycl/test-e2e/SYCLBIN/link_rtc_input.cpp | 4 ++++ sycl/test-e2e/SYCLBIN/link_rtc_object.cpp | 4 ++++ sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp | 4 ++++ sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp | 4 ++++ sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp | 4 ++++ 10 files changed, 40 insertions(+) diff --git a/sycl/test-e2e/SYCLBIN/basic_executable.cpp b/sycl/test-e2e/SYCLBIN/basic_executable.cpp index 9b2390b00797b..4fd15c6c9612c 100644 --- a/sycl/test-e2e/SYCLBIN/basic_executable.cpp +++ b/sycl/test-e2e/SYCLBIN/basic_executable.cpp @@ -11,6 +11,10 @@ // -- Basic test for compiling and loading a SYCLBIN kernel_bundle in executable // -- state. +// Fails for CUDA target due to new offload driver regression. +// UNSUPPORTED: cuda +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/18432 + // RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=executable %S/Inputs/basic_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/basic_input.cpp b/sycl/test-e2e/SYCLBIN/basic_input.cpp index 015c1cccddc75..945a322250b2d 100644 --- a/sycl/test-e2e/SYCLBIN/basic_input.cpp +++ b/sycl/test-e2e/SYCLBIN/basic_input.cpp @@ -11,6 +11,10 @@ // -- Basic test for compiling and loading a SYCLBIN kernel_bundle in input // -- state. +// Fails for CUDA target due to new offload driver regression. +// UNSUPPORTED: cuda +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/18432 + // RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=input %S/Inputs/basic_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/basic_object.cpp b/sycl/test-e2e/SYCLBIN/basic_object.cpp index 9f68ab439628c..6449790abed74 100644 --- a/sycl/test-e2e/SYCLBIN/basic_object.cpp +++ b/sycl/test-e2e/SYCLBIN/basic_object.cpp @@ -11,6 +11,10 @@ // -- Basic test for compiling and loading a SYCLBIN kernel_bundle in object // -- state. +// Fails for CUDA target due to new offload driver regression. +// UNSUPPORTED: cuda +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/18432 + // RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=object %S/Inputs/basic_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/link_input.cpp b/sycl/test-e2e/SYCLBIN/link_input.cpp index c7d4594fc84f6..7a66b80b3b668 100644 --- a/sycl/test-e2e/SYCLBIN/link_input.cpp +++ b/sycl/test-e2e/SYCLBIN/link_input.cpp @@ -10,6 +10,10 @@ // -- Test for linking two SYCLBIN kernel_bundle. +// Fails for CUDA target due to new offload driver regression. +// UNSUPPORTED: cuda +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/18432 + // RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.export.syclbin // RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies %S/Inputs/importing_kernel.cpp -o %t.import.syclbin // RUN: %{build} -o %t.out diff --git a/sycl/test-e2e/SYCLBIN/link_object.cpp b/sycl/test-e2e/SYCLBIN/link_object.cpp index ca3c36685f656..70923cebdd09d 100644 --- a/sycl/test-e2e/SYCLBIN/link_object.cpp +++ b/sycl/test-e2e/SYCLBIN/link_object.cpp @@ -10,6 +10,10 @@ // -- Test for linking two SYCLBIN kernel_bundle. +// Fails for CUDA target due to new offload driver regression. +// UNSUPPORTED: cuda +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/18432 + // RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.export.syclbin // RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/importing_kernel.cpp -o %t.import.syclbin // RUN: %{build} -o %t.out diff --git a/sycl/test-e2e/SYCLBIN/link_rtc_input.cpp b/sycl/test-e2e/SYCLBIN/link_rtc_input.cpp index 086ea2cc4528c..87561cb1a51d3 100644 --- a/sycl/test-e2e/SYCLBIN/link_rtc_input.cpp +++ b/sycl/test-e2e/SYCLBIN/link_rtc_input.cpp @@ -12,6 +12,10 @@ // UNSUPPORTED: accelerator // UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. +// Fails for CUDA target due to new offload driver regression. +// UNSUPPORTED: cuda +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/18432 + // -- Test for linking where one kernel is runtime-compiled and one is compiled // -- to SYCLBIN. diff --git a/sycl/test-e2e/SYCLBIN/link_rtc_object.cpp b/sycl/test-e2e/SYCLBIN/link_rtc_object.cpp index 6b9dc29a77694..0b28f106a99f9 100644 --- a/sycl/test-e2e/SYCLBIN/link_rtc_object.cpp +++ b/sycl/test-e2e/SYCLBIN/link_rtc_object.cpp @@ -12,6 +12,10 @@ // UNSUPPORTED: accelerator // UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. +// Fails for CUDA target due to new offload driver regression. +// UNSUPPORTED: cuda +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/18432 + // -- Test for linking where one kernel is runtime-compiled and one is compiled // -- to SYCLBIN. diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp index d8034e2384f15..1616a0526fecd 100644 --- a/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp @@ -11,6 +11,10 @@ // -- Test for compiling and loading a kernel bundle with a SYCLBIN containing // the use of optional kernel features. +// Fails for CUDA target due to new offload driver regression. +// UNSUPPORTED: cuda +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/18432 + // RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=executable %S/Inputs/optional_kernel_features.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp index b0404adf1fdc2..6249db9918f98 100644 --- a/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp @@ -11,6 +11,10 @@ // -- Test for compiling and loading a kernel bundle with a SYCLBIN containing // the use of optional kernel features. +// Fails for CUDA target due to new offload driver regression. +// UNSUPPORTED: cuda +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/18432 + // RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=input %S/Inputs/optional_kernel_features.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp index c30bc1623c31c..208ac894137ad 100644 --- a/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp @@ -12,6 +12,10 @@ // -- Test for compiling and loading a kernel bundle with a SYCLBIN containing // the use of optional kernel features. +// Fails for CUDA target due to new offload driver regression. +// UNSUPPORTED: cuda +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/18432 + // RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=object %S/Inputs/optional_kernel_features.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin From bffcbc87328d750e1f2db00be898b0ee929bb041 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 17 Jun 2025 07:39:58 -0700 Subject: [PATCH 19/25] Change requirement to avoid building for nvptx for now Signed-off-by: Larsen, Steffen --- sycl/test-e2e/SYCLBIN/basic_executable.cpp | 9 +++++---- sycl/test-e2e/SYCLBIN/basic_input.cpp | 9 +++++---- sycl/test-e2e/SYCLBIN/basic_object.cpp | 9 +++++---- sycl/test-e2e/SYCLBIN/link_input.cpp | 11 ++++++----- sycl/test-e2e/SYCLBIN/link_object.cpp | 11 ++++++----- sycl/test-e2e/SYCLBIN/link_rtc_input.cpp | 11 ++++++----- sycl/test-e2e/SYCLBIN/link_rtc_object.cpp | 11 ++++++----- .../SYCLBIN/optional_kernel_features_executable.cpp | 9 +++++---- .../SYCLBIN/optional_kernel_features_input.cpp | 9 +++++---- .../SYCLBIN/optional_kernel_features_object.cpp | 9 +++++---- 10 files changed, 54 insertions(+), 44 deletions(-) diff --git a/sycl/test-e2e/SYCLBIN/basic_executable.cpp b/sycl/test-e2e/SYCLBIN/basic_executable.cpp index 4fd15c6c9612c..ad106d6fe9403 100644 --- a/sycl/test-e2e/SYCLBIN/basic_executable.cpp +++ b/sycl/test-e2e/SYCLBIN/basic_executable.cpp @@ -11,11 +11,12 @@ // -- Basic test for compiling and loading a SYCLBIN kernel_bundle in executable // -- state. -// Fails for CUDA target due to new offload driver regression. -// UNSUPPORTED: cuda -// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/18432 +// Due to the regression in https://github.com/intel/llvm/issues/18432 it will +// fail to build the SYCLBIN with nvptx targets. Once this is fixed, +// %{sycl_target_opts} should be added to the SYCLBIN generation run-line. +// REQUIRES: target-spir -// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=executable %S/Inputs/basic_kernel.cpp -o %t.syclbin +// RUN: %clangxx --offload-new-driver -fsyclbin=executable %S/Inputs/basic_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/basic_input.cpp b/sycl/test-e2e/SYCLBIN/basic_input.cpp index 945a322250b2d..645bf0b2dd0a6 100644 --- a/sycl/test-e2e/SYCLBIN/basic_input.cpp +++ b/sycl/test-e2e/SYCLBIN/basic_input.cpp @@ -11,11 +11,12 @@ // -- Basic test for compiling and loading a SYCLBIN kernel_bundle in input // -- state. -// Fails for CUDA target due to new offload driver regression. -// UNSUPPORTED: cuda -// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/18432 +// Due to the regression in https://github.com/intel/llvm/issues/18432 it will +// fail to build the SYCLBIN with nvptx targets. Once this is fixed, +// %{sycl_target_opts} should be added to the SYCLBIN generation run-line. +// REQUIRES: target-spir -// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=input %S/Inputs/basic_kernel.cpp -o %t.syclbin +// RUN: %clangxx --offload-new-driver -fsyclbin=input %S/Inputs/basic_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/basic_object.cpp b/sycl/test-e2e/SYCLBIN/basic_object.cpp index 6449790abed74..02c1a72e192fd 100644 --- a/sycl/test-e2e/SYCLBIN/basic_object.cpp +++ b/sycl/test-e2e/SYCLBIN/basic_object.cpp @@ -11,11 +11,12 @@ // -- Basic test for compiling and loading a SYCLBIN kernel_bundle in object // -- state. -// Fails for CUDA target due to new offload driver regression. -// UNSUPPORTED: cuda -// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/18432 +// Due to the regression in https://github.com/intel/llvm/issues/18432 it will +// fail to build the SYCLBIN with nvptx targets. Once this is fixed, +// %{sycl_target_opts} should be added to the SYCLBIN generation run-line. +// REQUIRES: target-spir -// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=object %S/Inputs/basic_kernel.cpp -o %t.syclbin +// RUN: %clangxx --offload-new-driver -fsyclbin=object %S/Inputs/basic_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/link_input.cpp b/sycl/test-e2e/SYCLBIN/link_input.cpp index 7a66b80b3b668..2fc3893ad7c6e 100644 --- a/sycl/test-e2e/SYCLBIN/link_input.cpp +++ b/sycl/test-e2e/SYCLBIN/link_input.cpp @@ -10,12 +10,13 @@ // -- Test for linking two SYCLBIN kernel_bundle. -// Fails for CUDA target due to new offload driver regression. -// UNSUPPORTED: cuda -// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/18432 +// Due to the regression in https://github.com/intel/llvm/issues/18432 it will +// fail to build the SYCLBIN with nvptx targets. Once this is fixed, +// %{sycl_target_opts} should be added to the SYCLBIN generation run-line. +// REQUIRES: target-spir -// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.export.syclbin -// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies %S/Inputs/importing_kernel.cpp -o %t.import.syclbin +// RUN: %clangxx --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.export.syclbin +// RUN: %clangxx --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies %S/Inputs/importing_kernel.cpp -o %t.import.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.export.syclbin %t.import.syclbin diff --git a/sycl/test-e2e/SYCLBIN/link_object.cpp b/sycl/test-e2e/SYCLBIN/link_object.cpp index 70923cebdd09d..d301a48d3ab59 100644 --- a/sycl/test-e2e/SYCLBIN/link_object.cpp +++ b/sycl/test-e2e/SYCLBIN/link_object.cpp @@ -10,12 +10,13 @@ // -- Test for linking two SYCLBIN kernel_bundle. -// Fails for CUDA target due to new offload driver regression. -// UNSUPPORTED: cuda -// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/18432 +// Due to the regression in https://github.com/intel/llvm/issues/18432 it will +// fail to build the SYCLBIN with nvptx targets. Once this is fixed, +// %{sycl_target_opts} should be added to the SYCLBIN generation run-line. +// REQUIRES: target-spir -// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.export.syclbin -// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/importing_kernel.cpp -o %t.import.syclbin +// RUN: %clangxx --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.export.syclbin +// RUN: %clangxx --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/importing_kernel.cpp -o %t.import.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.export.syclbin %t.import.syclbin diff --git a/sycl/test-e2e/SYCLBIN/link_rtc_input.cpp b/sycl/test-e2e/SYCLBIN/link_rtc_input.cpp index 87561cb1a51d3..b10dc73d9014e 100644 --- a/sycl/test-e2e/SYCLBIN/link_rtc_input.cpp +++ b/sycl/test-e2e/SYCLBIN/link_rtc_input.cpp @@ -12,14 +12,15 @@ // UNSUPPORTED: accelerator // UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. -// Fails for CUDA target due to new offload driver regression. -// UNSUPPORTED: cuda -// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/18432 - // -- Test for linking where one kernel is runtime-compiled and one is compiled // -- to SYCLBIN. -// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.syclbin +// Due to the regression in https://github.com/intel/llvm/issues/18432 it will +// fail to build the SYCLBIN with nvptx targets. Once this is fixed, +// %{sycl_target_opts} should be added to the SYCLBIN generation run-line. +// REQUIRES: target-spir + +// RUN: %clangxx --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/link_rtc_object.cpp b/sycl/test-e2e/SYCLBIN/link_rtc_object.cpp index 0b28f106a99f9..cd86a6c0e6fde 100644 --- a/sycl/test-e2e/SYCLBIN/link_rtc_object.cpp +++ b/sycl/test-e2e/SYCLBIN/link_rtc_object.cpp @@ -12,14 +12,15 @@ // UNSUPPORTED: accelerator // UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. -// Fails for CUDA target due to new offload driver regression. -// UNSUPPORTED: cuda -// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/18432 - // -- Test for linking where one kernel is runtime-compiled and one is compiled // -- to SYCLBIN. -// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.syclbin +// Due to the regression in https://github.com/intel/llvm/issues/18432 it will +// fail to build the SYCLBIN with nvptx targets. Once this is fixed, +// %{sycl_target_opts} should be added to the SYCLBIN generation run-line. +// REQUIRES: target-spir + +// RUN: %clangxx --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp index 1616a0526fecd..ba4faaa2d78b0 100644 --- a/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp @@ -11,11 +11,12 @@ // -- Test for compiling and loading a kernel bundle with a SYCLBIN containing // the use of optional kernel features. -// Fails for CUDA target due to new offload driver regression. -// UNSUPPORTED: cuda -// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/18432 +// Due to the regression in https://github.com/intel/llvm/issues/18432 it will +// fail to build the SYCLBIN with nvptx targets. Once this is fixed, +// %{sycl_target_opts} should be added to the SYCLBIN generation run-line. +// REQUIRES: target-spir -// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=executable %S/Inputs/optional_kernel_features.cpp -o %t.syclbin +// RUN: %clangxx --offload-new-driver -fsyclbin=executable %S/Inputs/optional_kernel_features.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp index 6249db9918f98..b87ef87052afa 100644 --- a/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp @@ -11,11 +11,12 @@ // -- Test for compiling and loading a kernel bundle with a SYCLBIN containing // the use of optional kernel features. -// Fails for CUDA target due to new offload driver regression. -// UNSUPPORTED: cuda -// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/18432 +// Due to the regression in https://github.com/intel/llvm/issues/18432 it will +// fail to build the SYCLBIN with nvptx targets. Once this is fixed, +// %{sycl_target_opts} should be added to the SYCLBIN generation run-line. +// REQUIRES: target-spir -// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=input %S/Inputs/optional_kernel_features.cpp -o %t.syclbin +// RUN: %clangxx --offload-new-driver -fsyclbin=input %S/Inputs/optional_kernel_features.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp index 208ac894137ad..c1938475df267 100644 --- a/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp @@ -12,11 +12,12 @@ // -- Test for compiling and loading a kernel bundle with a SYCLBIN containing // the use of optional kernel features. -// Fails for CUDA target due to new offload driver regression. -// UNSUPPORTED: cuda -// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/18432 +// Due to the regression in https://github.com/intel/llvm/issues/18432 it will +// fail to build the SYCLBIN with nvptx targets. Once this is fixed, +// %{sycl_target_opts} should be added to the SYCLBIN generation run-line. +// REQUIRES: target-spir -// RUN: %clangxx %{sycl_target_opts} --offload-new-driver -fsyclbin=object %S/Inputs/optional_kernel_features.cpp -o %t.syclbin +// RUN: %clangxx --offload-new-driver -fsyclbin=object %S/Inputs/optional_kernel_features.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin From d7ad2b7b10adcd607044a620dff56e7260aa5d74 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 17 Jun 2025 23:24:41 -0700 Subject: [PATCH 20/25] Fix wrong size timing and image ID Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_binary_image.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 1fea167645372..cc36dac033079 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -212,7 +212,8 @@ RTDeviceBinaryImage::RTDeviceBinaryImage(sycl_device_binary Bin) { std::atomic RTDeviceBinaryImage::ImageCounter = 1; -DynRTDeviceBinaryImage::DynRTDeviceBinaryImage() : RTDeviceBinaryImage() { +DynRTDeviceBinaryImage::DynRTDeviceBinaryImage() + : RTDeviceBinaryImage(nullptr) { Bin = new sycl_device_binary_struct(); Bin->Version = SYCL_DEVICE_BINARY_VERSION; Bin->Kind = SYCL_DEVICE_BINARY_OFFLOAD_KIND_SYCL; @@ -685,12 +686,11 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( #ifdef SYCL_RT_ZSTD_AVAILABLE CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage( sycl_device_binary CompressedBin) - : RTDeviceBinaryImage(new sycl_device_binary_struct(*CompressedBin)) { - // Get the decompressed size of the binary image. - m_ImageSize = ZSTDCompressor::GetDecompressedSize( - reinterpret_cast(Bin->BinaryStart), - static_cast(Bin->BinaryEnd - Bin->BinaryStart)); -} + : m_ImageSize(ZSTDCompressor::GetDecompressedSize( + reinterpret_cast(CompressedBin->BinaryStart), + static_cast(CompressedBin->BinaryEnd - + CompressedBin->BinaryStart))), + RTDeviceBinaryImage(new sycl_device_binary_struct(*CompressedBin)) {} void CompressedRTDeviceBinaryImage::Decompress() { From f2b7fea0be44ea1cba2b2bff76aeb6ca7a1e2b10 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 17 Jun 2025 23:49:37 -0700 Subject: [PATCH 21/25] Lazily init compressed size Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_binary_image.cpp | 17 ++++++++++++----- sycl/source/detail/device_binary_image.hpp | 7 ++----- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index cc36dac033079..5fec3de5fe4df 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -686,11 +686,7 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( #ifdef SYCL_RT_ZSTD_AVAILABLE CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage( sycl_device_binary CompressedBin) - : m_ImageSize(ZSTDCompressor::GetDecompressedSize( - reinterpret_cast(CompressedBin->BinaryStart), - static_cast(CompressedBin->BinaryEnd - - CompressedBin->BinaryStart))), - RTDeviceBinaryImage(new sycl_device_binary_struct(*CompressedBin)) {} + : RTDeviceBinaryImage(new sycl_device_binary_struct(*CompressedBin)) {} void CompressedRTDeviceBinaryImage::Decompress() { @@ -715,6 +711,17 @@ CompressedRTDeviceBinaryImage::~CompressedRTDeviceBinaryImage() { delete Bin; Bin = nullptr; } + +size_t CompressedRTDeviceBinaryImage::getSize() { + assert(Bin && "binary image data not set"); + // Lazily calculate the image size first time it is needed. + if (!m_ImageSize) + m_ImageSize = ZSTDCompressor::GetDecompressedSize( + reinterpret_cast(CompressedBin->BinaryStart), + static_cast(CompressedBin->BinaryEnd - + CompressedBin->BinaryStart)); + return *m_ImageSize; +} #endif // SYCL_RT_ZSTD_AVAILABLE } // namespace detail diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index e34a474ad1ab5..9b6e5047be936 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -316,10 +316,7 @@ class CompressedRTDeviceBinaryImage : public RTDeviceBinaryImage { void Decompress(); // We return the size of decompressed data, not the size of compressed data. - size_t getSize() const override { - assert(Bin && "binary image data not set"); - return m_ImageSize; - } + size_t getSize() const override; bool IsCompressed() const { return m_DecompressedData.get() == nullptr; } void print() const override { @@ -329,7 +326,7 @@ class CompressedRTDeviceBinaryImage : public RTDeviceBinaryImage { private: std::unique_ptr m_DecompressedData; - size_t m_ImageSize; + std::optional m_ImageSize = std::nullopt; }; #endif // SYCL_RT_ZSTD_AVAILABLE From ab7a19af5341fcdfad649f40d127b6910f2cbc59 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 17 Jun 2025 23:56:33 -0700 Subject: [PATCH 22/25] Fix def Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_binary_image.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 5fec3de5fe4df..f9d208c5def0b 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -712,14 +712,13 @@ CompressedRTDeviceBinaryImage::~CompressedRTDeviceBinaryImage() { Bin = nullptr; } -size_t CompressedRTDeviceBinaryImage::getSize() { +size_t CompressedRTDeviceBinaryImage::getSize() const { assert(Bin && "binary image data not set"); // Lazily calculate the image size first time it is needed. if (!m_ImageSize) m_ImageSize = ZSTDCompressor::GetDecompressedSize( - reinterpret_cast(CompressedBin->BinaryStart), - static_cast(CompressedBin->BinaryEnd - - CompressedBin->BinaryStart)); + reinterpret_cast(Bin->BinaryStart), + static_cast(Bin->BinaryEnd - Bin->BinaryStart)); return *m_ImageSize; } #endif // SYCL_RT_ZSTD_AVAILABLE From 8d96fb30f9195723c75fa74a2f640b9b3fcd9031 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Wed, 18 Jun 2025 00:10:16 -0700 Subject: [PATCH 23/25] Mutable image size Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_binary_image.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 9b6e5047be936..ab0a357c285d0 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -326,7 +326,9 @@ class CompressedRTDeviceBinaryImage : public RTDeviceBinaryImage { private: std::unique_ptr m_DecompressedData; - std::optional m_ImageSize = std::nullopt; + // m_ImageSize is lazily initialized in getSize to properly answer the query + // in the base ctor. + mutable std::optional m_ImageSize = std::nullopt; }; #endif // SYCL_RT_ZSTD_AVAILABLE From abc3a2bbbefd164af2e98d4085c3f7bb2457bf46 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Wed, 18 Jun 2025 01:58:14 -0700 Subject: [PATCH 24/25] Revert lazy image size change Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_binary_image.cpp | 17 ++++++----------- sycl/source/detail/device_binary_image.hpp | 9 +++++---- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index f9d208c5def0b..778b67391e99a 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -686,7 +686,12 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( #ifdef SYCL_RT_ZSTD_AVAILABLE CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage( sycl_device_binary CompressedBin) - : RTDeviceBinaryImage(new sycl_device_binary_struct(*CompressedBin)) {} + : RTDeviceBinaryImage(new sycl_device_binary_struct(*CompressedBin)) { + // Get the decompressed size of the binary image. + m_ImageSize = ZSTDCompressor::GetDecompressedSize( + reinterpret_cast(Bin->BinaryStart), + static_cast(Bin->BinaryEnd - Bin->BinaryStart)); +} void CompressedRTDeviceBinaryImage::Decompress() { @@ -711,16 +716,6 @@ CompressedRTDeviceBinaryImage::~CompressedRTDeviceBinaryImage() { delete Bin; Bin = nullptr; } - -size_t CompressedRTDeviceBinaryImage::getSize() const { - assert(Bin && "binary image data not set"); - // Lazily calculate the image size first time it is needed. - if (!m_ImageSize) - m_ImageSize = ZSTDCompressor::GetDecompressedSize( - reinterpret_cast(Bin->BinaryStart), - static_cast(Bin->BinaryEnd - Bin->BinaryStart)); - return *m_ImageSize; -} #endif // SYCL_RT_ZSTD_AVAILABLE } // namespace detail diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index ab0a357c285d0..db433ea01a407 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -316,7 +316,10 @@ class CompressedRTDeviceBinaryImage : public RTDeviceBinaryImage { void Decompress(); // We return the size of decompressed data, not the size of compressed data. - size_t getSize() const override; + size_t getSize() const override { + assert(Bin && "binary image data not set"); + return m_ImageSize; + } bool IsCompressed() const { return m_DecompressedData.get() == nullptr; } void print() const override { @@ -326,9 +329,7 @@ class CompressedRTDeviceBinaryImage : public RTDeviceBinaryImage { private: std::unique_ptr m_DecompressedData; - // m_ImageSize is lazily initialized in getSize to properly answer the query - // in the base ctor. - mutable std::optional m_ImageSize = std::nullopt; + size_t m_ImageSize = 0; }; #endif // SYCL_RT_ZSTD_AVAILABLE From b248a6f7e754e08e8650c22a35b5137238e7d242 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 24 Jun 2025 22:58:20 -0700 Subject: [PATCH 25/25] Address comments Signed-off-by: Larsen, Steffen --- .../sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp | 5 ++--- sycl/source/detail/syclbin.cpp | 4 ---- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp b/sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp index f258156d7bb99..6e33c33c0ed75 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp @@ -58,9 +58,8 @@ get_kernel_bundle(const context &Ctxt, const std::vector &Devs, { std::ifstream FileStream{Filename, std::ios::binary}; if (!FileStream.is_open()) - throw sycl::exception(make_error_code(errc::invalid), - "Failed to open SYCLBIN file: " + - Filename.string()); + throw std::ios_base::failure("Failed to open SYCLBIN file: " + + Filename.string()); RawSYCLBINData = std::vector{std::istreambuf_iterator(FileStream), std::istreambuf_iterator()}; diff --git a/sycl/source/detail/syclbin.cpp b/sycl/source/detail/syclbin.cpp index 3cef5a2785a03..48d323626c314 100644 --- a/sycl/source/detail/syclbin.cpp +++ b/sycl/source/detail/syclbin.cpp @@ -100,10 +100,6 @@ std::pair getImageInOffloadBinary(const char *Data, throw sycl::exception(make_error_code(errc::invalid), "Unsupported Offload Binary version number."); - if (Header->Version != 1) - throw sycl::exception(make_error_code(errc::invalid), - "Unsupported Offload Binary version number."); - if (Header->EntrySize != sizeof(OffloadBinaryEntryType)) throw sycl::exception(make_error_code(errc::invalid), "Unexpected number of offload entries.");