diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index bae361ef99f35..9b2a2570934fe 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -140,6 +140,9 @@ install(DIRECTORY ${OpenCL_INCLUDE_DIR}/CL DESTINATION ${SYCL_INCLUDE_DIR}/sycl COMPONENT OpenCL-Headers) +# Option to enable online kernel fusion via a JIT compiler +option(SYCL_ENABLE_KERNEL_FUSION "Enable kernel fusion via JIT compiler" OFF) + # Needed for feature_test.hpp if ("cuda" IN_LIST SYCL_ENABLE_PLUGINS) set(SYCL_BUILD_PI_CUDA ON) diff --git a/sycl/include/sycl/accessor.hpp b/sycl/include/sycl/accessor.hpp index 834fc0cbcc246..6c111d07678b9 100644 --- a/sycl/include/sycl/accessor.hpp +++ b/sycl/include/sycl/accessor.hpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,9 @@ #include #include #include +#if SYCL_EXT_CODEPLAY_KERNEL_FUSION +#include +#endif // SYCL_EXT_CODEPLAY_KERNEL_FUSION #include @@ -887,6 +891,21 @@ class __SYCL_SPECIAL_CLASS accessor : return AdjustedMode; } + static detail::PromotionTarget + getPromotionTarget(const PropertyListT &PropertyList) { +#if SYCL_EXT_CODEPLAY_KERNEL_FUSION + if (PropertyList.template has_property< + ext::codeplay::property::promote_private>()) { + return detail::PromotionTarget::Private; + } + if (PropertyList + .template has_property()) { + return detail::PromotionTarget::Local; + } +#endif // SYCL_EXT_CODEPLAY_KERNEL_FUSION + return detail::PromotionTarget::None; + } + #if __cplusplus >= 201703L template static constexpr bool IsValidTag() { @@ -1025,7 +1044,8 @@ class __SYCL_SPECIAL_CLASS accessor : detail::convertToArrayOfN<3, 1>(BufferRef.get_range()), getAdjustedMode(PropertyList), detail::getSyclObjImpl(BufferRef).get(), AdjustedDim, sizeof(DataT), - BufferRef.OffsetInBytes, BufferRef.IsSubBuffer) { + BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, + getPromotionTarget(PropertyList)) { preScreenAccessor(BufferRef.size(), PropertyList); if (!IsPlaceH) addHostAccessorAndWait(AccessorBaseHost::impl.get()); @@ -1056,7 +1076,8 @@ class __SYCL_SPECIAL_CLASS accessor : detail::convertToArrayOfN<3, 1>(BufferRef.get_range()), getAdjustedMode(PropertyList), detail::getSyclObjImpl(BufferRef).get(), AdjustedDim, sizeof(DataT), - BufferRef.OffsetInBytes, BufferRef.IsSubBuffer) { + BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, + getPromotionTarget(PropertyList)) { preScreenAccessor(BufferRef.size(), PropertyList); if (!IsPlaceH) addHostAccessorAndWait(AccessorBaseHost::impl.get()); @@ -1086,7 +1107,8 @@ class __SYCL_SPECIAL_CLASS accessor : detail::convertToArrayOfN<3, 1>(BufferRef.get_range()), getAdjustedMode(PropertyList), detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), - BufferRef.OffsetInBytes, BufferRef.IsSubBuffer) { + BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, + getPromotionTarget(PropertyList)) { preScreenAccessor(BufferRef.size(), PropertyList); detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(), @@ -1117,7 +1139,8 @@ class __SYCL_SPECIAL_CLASS accessor : detail::convertToArrayOfN<3, 1>(BufferRef.get_range()), getAdjustedMode(PropertyList), detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), - BufferRef.OffsetInBytes, BufferRef.IsSubBuffer) { + BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, + getPromotionTarget(PropertyList)) { preScreenAccessor(BufferRef.size(), PropertyList); detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(), @@ -1147,7 +1170,8 @@ class __SYCL_SPECIAL_CLASS accessor : detail::convertToArrayOfN<3, 1>(BufferRef.get_range()), getAdjustedMode(PropertyList), detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), - BufferRef.OffsetInBytes, BufferRef.IsSubBuffer) { + BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, + getPromotionTarget(PropertyList)) { preScreenAccessor(BufferRef.size(), PropertyList); if (!IsPlaceH) addHostAccessorAndWait(AccessorBaseHost::impl.get()); @@ -1180,7 +1204,8 @@ class __SYCL_SPECIAL_CLASS accessor : detail::convertToArrayOfN<3, 1>(BufferRef.get_range()), getAdjustedMode(PropertyList), detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), - BufferRef.OffsetInBytes, BufferRef.IsSubBuffer) { + BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, + getPromotionTarget(PropertyList)) { preScreenAccessor(BufferRef.size(), PropertyList); if (!IsPlaceH) addHostAccessorAndWait(AccessorBaseHost::impl.get()); @@ -1243,7 +1268,8 @@ class __SYCL_SPECIAL_CLASS accessor : detail::convertToArrayOfN<3, 1>(BufferRef.get_range()), getAdjustedMode(PropertyList), detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), - BufferRef.OffsetInBytes, BufferRef.IsSubBuffer) { + BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, + getPromotionTarget(PropertyList)) { preScreenAccessor(BufferRef.size(), PropertyList); detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(), @@ -1275,7 +1301,8 @@ class __SYCL_SPECIAL_CLASS accessor : detail::convertToArrayOfN<3, 1>(BufferRef.get_range()), getAdjustedMode(PropertyList), detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), - BufferRef.OffsetInBytes, BufferRef.IsSubBuffer) { + BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, + getPromotionTarget(PropertyList)) { preScreenAccessor(BufferRef.size(), PropertyList); detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(), @@ -1461,7 +1488,8 @@ class __SYCL_SPECIAL_CLASS accessor : getAdjustedMode(PropertyList), detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), BufferRef.OffsetInBytes, - BufferRef.IsSubBuffer) { + BufferRef.IsSubBuffer, + getPromotionTarget(PropertyList)) { preScreenAccessor(BufferRef.size(), PropertyList); if (BufferRef.isOutOfBounds(AccessOffset, AccessRange, BufferRef.get_range())) @@ -1502,7 +1530,8 @@ class __SYCL_SPECIAL_CLASS accessor : getAdjustedMode(PropertyList), detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), BufferRef.OffsetInBytes, - BufferRef.IsSubBuffer) { + BufferRef.IsSubBuffer, + getPromotionTarget(PropertyList)) { preScreenAccessor(BufferRef.size(), PropertyList); if (BufferRef.isOutOfBounds(AccessOffset, AccessRange, BufferRef.get_range())) @@ -1574,7 +1603,8 @@ class __SYCL_SPECIAL_CLASS accessor : getAdjustedMode(PropertyList), detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), BufferRef.OffsetInBytes, - BufferRef.IsSubBuffer) { + BufferRef.IsSubBuffer, + getPromotionTarget(PropertyList)) { preScreenAccessor(BufferRef.size(), PropertyList); if (BufferRef.isOutOfBounds(AccessOffset, AccessRange, BufferRef.get_range())) @@ -1614,7 +1644,8 @@ class __SYCL_SPECIAL_CLASS accessor : getAdjustedMode(PropertyList), detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), BufferRef.OffsetInBytes, - BufferRef.IsSubBuffer) { + BufferRef.IsSubBuffer, + getPromotionTarget(PropertyList)) { preScreenAccessor(BufferRef.size(), PropertyList); if (BufferRef.isOutOfBounds(AccessOffset, AccessRange, BufferRef.get_range())) @@ -2161,7 +2192,7 @@ class __SYCL_SPECIAL_CLASS accessor 0)>> accessor(range AllocationSize, handler &, - const property_list &propList, + const property_list &propList, const detail::code_location CodeLoc = detail::code_location::current()) #ifdef __SYCL_DEVICE_ONLY__ diff --git a/sycl/include/sycl/detail/accessor_impl.hpp b/sycl/include/sycl/detail/accessor_impl.hpp index 8dec9653035e1..d5e00993591e7 100644 --- a/sycl/include/sycl/detail/accessor_impl.hpp +++ b/sycl/include/sycl/detail/accessor_impl.hpp @@ -36,6 +36,8 @@ namespace detail { class Command; +enum class PromotionTarget { None, Private, Local }; + // The class describes a requirement to access a SYCL memory object such as // sycl::buffer and sycl::image. For example, each accessor used in a kernel, // except one with access target "local", adds such requirement for the command @@ -79,11 +81,14 @@ class __SYCL_EXPORT AccessorImplHost { AccessorImplHost(id<3> Offset, range<3> AccessRange, range<3> MemoryRange, access::mode AccessMode, detail::SYCLMemObjI *SYCLMemObject, int Dims, int ElemSize, int OffsetInBytes = 0, - bool IsSubBuffer = false, bool IsESIMDAcc = false) + bool IsSubBuffer = false, + PromotionTarget Promotion = PromotionTarget::None, + bool IsESIMDAcc = false) : MOffset(Offset), MAccessRange(AccessRange), MMemoryRange(MemoryRange), MAccessMode(AccessMode), MSYCLMemObj(SYCLMemObject), MDims(Dims), MElemSize(ElemSize), MOffsetInBytes(OffsetInBytes), - MIsSubBuffer(IsSubBuffer), MIsESIMDAcc(IsESIMDAcc) {} + MIsSubBuffer(IsSubBuffer), MPromotionTarget(Promotion), + MIsESIMDAcc(IsESIMDAcc) {} ~AccessorImplHost(); @@ -92,7 +97,9 @@ class __SYCL_EXPORT AccessorImplHost { MMemoryRange(Other.MMemoryRange), MAccessMode(Other.MAccessMode), MSYCLMemObj(Other.MSYCLMemObj), MDims(Other.MDims), MElemSize(Other.MElemSize), MOffsetInBytes(Other.MOffsetInBytes), - MIsSubBuffer(Other.MIsSubBuffer), MIsESIMDAcc(Other.MIsESIMDAcc) {} + MIsSubBuffer(Other.MIsSubBuffer), + MPromotionTarget(Other.MPromotionTarget), + MIsESIMDAcc(Other.MIsESIMDAcc) {} // The resize method provides a way to change the size of the // allocated memory and corresponding properties for the accessor. @@ -125,6 +132,8 @@ class __SYCL_EXPORT AccessorImplHost { bool PerWI = false; + PromotionTarget MPromotionTarget; + // Outdated, leaving to preserve ABI. // TODO: Remove during next major release. bool MIsESIMDAcc; @@ -137,10 +146,11 @@ class AccessorBaseHost { AccessorBaseHost(id<3> Offset, range<3> AccessRange, range<3> MemoryRange, access::mode AccessMode, detail::SYCLMemObjI *SYCLMemObject, int Dims, int ElemSize, int OffsetInBytes = 0, - bool IsSubBuffer = false) { + bool IsSubBuffer = false, + PromotionTarget Promotion = PromotionTarget::None) { impl = std::shared_ptr(new AccessorImplHost( Offset, AccessRange, MemoryRange, AccessMode, SYCLMemObject, Dims, - ElemSize, OffsetInBytes, IsSubBuffer)); + ElemSize, OffsetInBytes, IsSubBuffer, Promotion)); } protected: diff --git a/sycl/include/sycl/detail/property_helper.hpp b/sycl/include/sycl/detail/property_helper.hpp index ff0f4aa8568b0..d0ee0b61654fa 100644 --- a/sycl/include/sycl/detail/property_helper.hpp +++ b/sycl/include/sycl/detail/property_helper.hpp @@ -35,8 +35,10 @@ enum DataLessPropKind { UseDefaultStream = 8, DiscardEvents = 9, DeviceReadOnly = 10, + FusionPromotePrivate = 11, + FusionPromoteLocal = 12, // Indicates the last known dataless property. - LastKnownDataLessPropKind = 10, + LastKnownDataLessPropKind = 12, // Exceeding 32 may cause ABI breaking change on some of OSes. DataLessPropKindSize = 32 }; diff --git a/sycl/include/sycl/ext/codeplay/fusion_properties.hpp b/sycl/include/sycl/ext/codeplay/fusion_properties.hpp new file mode 100644 index 0000000000000..91e239a9793c9 --- /dev/null +++ b/sycl/include/sycl/ext/codeplay/fusion_properties.hpp @@ -0,0 +1,77 @@ +//==----------- fusion_properties.hpp --- SYCL fusion properties -----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include + +namespace sycl { +__SYCL_INLINE_VER_NAMESPACE(_V1) { +namespace ext { +namespace codeplay { +namespace property { + +class promote_private + : public detail::DataLessProperty {}; + +class promote_local + : public detail::DataLessProperty {}; + +} // namespace property +} // namespace codeplay +} // namespace ext + +// Forward declarations +template +class buffer; + +template +class accessor; + +// Property trait specializations. +template <> +struct is_property : std::true_type { +}; + +template <> +struct is_property : std::true_type {}; + +// Buffer property trait specializations +template +struct is_property_of> + : std::true_type {}; + +template +struct is_property_of> + : std::true_type {}; + +// Accessor property trait specializations +template +struct is_property_of> : std::true_type { +}; + +template +struct is_property_of> : std::true_type { +}; + +} // __SYCL_INLINE_VER_NAMESPACE(_V1) +} // namespace sycl diff --git a/sycl/include/sycl/feature_test.hpp.in b/sycl/include/sycl/feature_test.hpp.in index 5347ed5f49150..f8d241d45a697 100644 --- a/sycl/include/sycl/feature_test.hpp.in +++ b/sycl/include/sycl/feature_test.hpp.in @@ -83,6 +83,10 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) { #if SYCL_BUILD_PI_HIP #define SYCL_EXT_ONEAPI_BACKEND_HIP 1 #endif +#cmakedefine01 SYCL_ENABLE_KERNEL_FUSION +#if SYCL_ENABLE_KERNEL_FUSION +#define SYCL_EXT_CODEPLAY_KERNEL_FUSION 1 +#endif } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl diff --git a/sycl/include/sycl/properties/all_properties.hpp b/sycl/include/sycl/properties/all_properties.hpp index 91bf7f0cf584e..828855225f3e2 100644 --- a/sycl/include/sycl/properties/all_properties.hpp +++ b/sycl/include/sycl/properties/all_properties.hpp @@ -4,3 +4,6 @@ #include #include #include +#if SYCL_EXT_CODEPLAY_KERNEL_FUSION +#include +#endif // SYCL_EXT_CODEPLAY_KERNEL_FUSION diff --git a/sycl/test/abi/layout_accessors_host.cpp b/sycl/test/abi/layout_accessors_host.cpp index eff5d7fde2516..07b074c2d1627 100644 --- a/sycl/test/abi/layout_accessors_host.cpp +++ b/sycl/test/abi/layout_accessors_host.cpp @@ -27,9 +27,10 @@ using namespace sycl; // CHECK-NEXT: 104 | void * MData // CHECK-NEXT: 112 | Command * MBlockedCmd // CHECK-NEXT: 120 | _Bool PerWI -// CHECK-NEXT: 121 | _Bool MIsESIMDAcc -// CHECK-NEXT: | [sizeof=128, dsize=122, align=8, -// CHECK-NEXT: | nvsize=122, nvalign=8] +// CHECK-NEXT: 124 | PromotionTarget MPromotionTarget +// CHECK-NEXT: 128 | _Bool MIsESIMDAcc +// CHECK-NEXT: | [sizeof=136, dsize=129, align=8, +// CHECK-NEXT: | nvsize=129, nvalign=8] // CHECK: 0 | class sycl::detail::LocalAccessorImplHost // CHECK-NEXT: 0 | class sycl::range<3> MSize diff --git a/sycl/test/abi/symbol_size_alignment.cpp b/sycl/test/abi/symbol_size_alignment.cpp index 3ef86e5acdc56..7f8a6e84c9fef 100644 --- a/sycl/test/abi/symbol_size_alignment.cpp +++ b/sycl/test/abi/symbol_size_alignment.cpp @@ -46,7 +46,7 @@ int main() { check(); check, 24, 8>(); check, 24, 8>(); - check(); + check(); check(); check(); check, 40, 8>();