diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 77c7f5c610f47..5b22f0cf7f89e 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1254,6 +1254,26 @@ void CheckJITCompilationForImage(const RTDeviceBinaryImage *const &Image, } } +const char *getArchName(const device &Device) { + namespace syclex = sycl::ext::oneapi::experimental; + auto Arch = Device.get_info(); + switch (Arch) { +#define __SYCL_ARCHITECTURE(ARCH, VAL) \ + case syclex::architecture::ARCH: \ + return #ARCH; +#define __SYCL_ARCHITECTURE_ALIAS(ARCH, VAL) +#include +#undef __SYCL_ARCHITECTURE +#undef __SYCL_ARCHITECTURE_ALIAS + } + return "unknown"; +} + +sycl_device_binary getRawImg(RTDeviceBinaryImage *Img) { + return reinterpret_cast( + const_cast(&Img->getRawData())); +} + template RTDeviceBinaryImage *getBinImageFromMultiMap( const std::unordered_multimap &ImagesSet, @@ -1262,16 +1282,51 @@ RTDeviceBinaryImage *getBinImageFromMultiMap( if (ItBegin == ItEnd) return nullptr; - std::vector RawImgs(std::distance(ItBegin, ItEnd)); - auto It = ItBegin; - for (unsigned I = 0; It != ItEnd; ++It, ++I) - RawImgs[I] = reinterpret_cast( - const_cast(&It->second->getRawData())); + // Here, we aim to select all the device images from the + // [ItBegin, ItEnd) range that are AOT compiled for Device + // (checked using info::device::architecture) or JIT compiled. + // This selection will then be passed to urDeviceSelectBinary + // for final selection. + std::string_view ArchName = getArchName(Device); + std::vector DeviceFilteredImgs; + DeviceFilteredImgs.reserve(std::distance(ItBegin, ItEnd)); + for (auto It = ItBegin; It != ItEnd; ++It) { + auto PropRange = It->second->getDeviceRequirements(); + auto PropIt = + std::find_if(PropRange.begin(), PropRange.end(), [&](const auto &Prop) { + return Prop->Name == std::string_view("compile_target"); + }); + auto AddImg = [&]() { DeviceFilteredImgs.push_back(It->second); }; - std::vector UrBinaries(RawImgs.size()); - for (uint32_t BinaryCount = 0; BinaryCount < RawImgs.size(); BinaryCount++) { - UrBinaries[BinaryCount].pDeviceTargetSpec = - getUrDeviceTarget(RawImgs[BinaryCount]->DeviceTargetSpec); + // Device image has no compile_target property, so it is JIT compiled. + if (PropIt == PropRange.end()) { + AddImg(); + continue; + } + + // Device image has the compile_target property, so it is AOT compiled for + // some device, check if that architecture is Device's architecture. + auto CompileTargetByteArray = DeviceBinaryProperty(*PropIt).asByteArray(); + CompileTargetByteArray.dropBytes(8); + std::string_view CompileTarget( + reinterpret_cast(&CompileTargetByteArray[0]), + CompileTargetByteArray.size()); + // Note: there are no explicit targets for CPUs, so on x86_64, + // so we use a spir64_x86_64 compile target image. + if ((ArchName == CompileTarget) || + (ArchName == "x86_64" && CompileTarget == "spir64_x86_64")) { + AddImg(); + } + } + + if (DeviceFilteredImgs.empty()) + return nullptr; + + std::vector UrBinaries(DeviceFilteredImgs.size()); + for (uint32_t BinaryCount = 0; BinaryCount < DeviceFilteredImgs.size(); + BinaryCount++) { + UrBinaries[BinaryCount].pDeviceTargetSpec = getUrDeviceTarget( + getRawImg(DeviceFilteredImgs[BinaryCount])->DeviceTargetSpec); } uint32_t ImgInd = 0; @@ -1280,8 +1335,7 @@ RTDeviceBinaryImage *getBinImageFromMultiMap( getSyclObjImpl(Context)->getPlugin()->call( urDeviceSelectBinary, getSyclObjImpl(Device)->getHandleRef(), UrBinaries.data(), UrBinaries.size(), &ImgInd); - std::advance(ItBegin, ImgInd); - return ItBegin->second; + return DeviceFilteredImgs[ImgInd]; } RTDeviceBinaryImage & @@ -1310,10 +1364,8 @@ ProgramManager::getDeviceImage(const std::string &KernelName, std::lock_guard KernelIDsGuard(m_KernelIDsMutex); if (auto KernelId = m_KernelName2KernelIDs.find(KernelName); KernelId != m_KernelName2KernelIDs.end()) { - // Kernel ID presence guarantees that we have bin image in the storage. Img = getBinImageFromMultiMap(m_KernelIDs2BinImage, KernelId->second, Context, Device); - assert(Img && "No binary image found for kernel id"); } else { Img = getBinImageFromMultiMap(m_ServiceKernels, KernelName, Context, Device); diff --git a/sycl/unittests/helpers/UrImage.hpp b/sycl/unittests/helpers/UrImage.hpp index 566817d7ef375..4a5d309d6b33f 100644 --- a/sycl/unittests/helpers/UrImage.hpp +++ b/sycl/unittests/helpers/UrImage.hpp @@ -541,7 +541,14 @@ inline UrImage generateDefaultImage(std::initializer_list KernelNames) { UrPropertySet PropSet; - std::vector Bin{0, 1, 2, 3, 4, 5}; // Random data + std::string Combined; + for (auto it = KernelNames.begin(); it != KernelNames.end(); ++it) { + if (it != KernelNames.begin()) + Combined += ", "; + Combined += *it; + } + std::vector Bin(Combined.begin(), Combined.end()); + Bin.push_back(0); UrArray Entries = makeEmptyKernels(KernelNames); diff --git a/sycl/unittests/program_manager/CMakeLists.txt b/sycl/unittests/program_manager/CMakeLists.txt index 21c001da2ce3f..3d706b959f827 100644 --- a/sycl/unittests/program_manager/CMakeLists.txt +++ b/sycl/unittests/program_manager/CMakeLists.txt @@ -1,5 +1,6 @@ set(CMAKE_CXX_EXTENSIONS OFF) add_sycl_unittest(ProgramManagerTests OBJECT + CompileTarget.cpp BuildLog.cpp DynamicLinking.cpp itt_annotations.cpp diff --git a/sycl/unittests/program_manager/CompileTarget.cpp b/sycl/unittests/program_manager/CompileTarget.cpp new file mode 100644 index 0000000000000..138ec8ed87c23 --- /dev/null +++ b/sycl/unittests/program_manager/CompileTarget.cpp @@ -0,0 +1,305 @@ +//==------------- CompileTarget.cpp --- CompileTarget unit test ------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define SYCL2020_DISABLE_DEPRECATION_WARNINGS + +#include +#include +#include + +#include + +using namespace sycl; + +namespace sycl::_V1::unittest { +static inline UrImage +generateImageWithCompileTarget(std::string KernelName, + std::string CompileTarget) { + std::vector Data(8 + CompileTarget.size()); + std::copy(CompileTarget.begin(), CompileTarget.end(), Data.data() + 8); + UrProperty CompileTargetProperty("compile_target", Data, + SYCL_PROPERTY_TYPE_BYTE_ARRAY); + UrPropertySet PropSet; + PropSet.insert(__SYCL_PROPERTY_SET_SYCL_DEVICE_REQUIREMENTS, + {CompileTargetProperty}); + + std::vector Bin(CompileTarget.begin(), CompileTarget.end()); + // Null terminate the data so it can be interpreted as c string. + Bin.push_back(0); + + UrArray Entries = makeEmptyKernels({KernelName}); + + auto DeviceTargetSpec = CompileTarget == "spir64_x86_64" + ? __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64 + : __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN; + + UrImage Img{SYCL_DEVICE_BINARY_TYPE_NATIVE, // Format + DeviceTargetSpec, // DeviceTargetSpec + "", // Compile options + "", // Link options + std::move(Bin), + std::move(Entries), + std::move(PropSet)}; + + return Img; +} +} // namespace sycl::_V1::unittest + +class SingleTaskKernel; +class NDRangeKernel; +class RangeKernel; +class NoDeviceKernel; +class JITFallbackKernel; + +MOCK_INTEGRATION_HEADER(SingleTaskKernel) +MOCK_INTEGRATION_HEADER(NDRangeKernel) +MOCK_INTEGRATION_HEADER(RangeKernel) +MOCK_INTEGRATION_HEADER(NoDeviceKernel) +MOCK_INTEGRATION_HEADER(JITFallbackKernel) + +static sycl::unittest::UrImage Img[] = { + sycl::unittest::generateDefaultImage({"SingleTaskKernel"}), + sycl::unittest::generateImageWithCompileTarget("SingleTaskKernel", + "spir64_x86_64"), + sycl::unittest::generateImageWithCompileTarget("SingleTaskKernel", + "intel_gpu_pvc"), + sycl::unittest::generateImageWithCompileTarget("SingleTaskKernel", + "intel_gpu_skl"), + sycl::unittest::generateDefaultImage({"NDRangeKernel"}), + sycl::unittest::generateImageWithCompileTarget("NDRangeKernel", + "spir64_x86_64"), + sycl::unittest::generateImageWithCompileTarget("NDRangeKernel", + "intel_gpu_pvc"), + sycl::unittest::generateImageWithCompileTarget("NDRangeKernel", + "intel_gpu_skl"), + sycl::unittest::generateDefaultImage({"RangeKernel"}), + sycl::unittest::generateImageWithCompileTarget("RangeKernel", + "spir64_x86_64"), + sycl::unittest::generateImageWithCompileTarget("RangeKernel", + "intel_gpu_pvc"), + sycl::unittest::generateImageWithCompileTarget("RangeKernel", + "intel_gpu_skl"), + sycl::unittest::generateImageWithCompileTarget("NoDeviceKernel", + "intel_gpu_bdw"), + sycl::unittest::generateDefaultImage({"JITFallbackKernel"}), + sycl::unittest::generateImageWithCompileTarget("JITFallbackKernel", + "intel_gpu_bdw"), +}; + +static sycl::unittest::UrImageArray ImgArray{Img}; + +ur_device_handle_t MockSklDeviceHandle = + reinterpret_cast(1); +ur_device_handle_t MockPvcDeviceHandle = + reinterpret_cast(2); +ur_device_handle_t MockX86DeviceHandle = + reinterpret_cast(3); +constexpr int SklIp = 0x02400009; +constexpr int PvcIp = 0x030f0000; +constexpr int X86Ip = 0; + +ur_device_handle_t MockDevices[] = { + MockSklDeviceHandle, + MockPvcDeviceHandle, + MockX86DeviceHandle, +}; + +static ur_result_t redefinedDeviceGet(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppNumDevices) { + **params.ppNumDevices = static_cast(std::size(MockDevices)); + return UR_RESULT_SUCCESS; + } + + if (*params.pphDevices) { + assert(*params.pNumEntries <= std::size(MockDevices)); + for (uint32_t i = 0; i < *params.pNumEntries; ++i) { + (*params.pphDevices)[i] = MockDevices[i]; + } + } + + return UR_RESULT_SUCCESS; +} + +std::vector createWithBinaryLog; +static ur_result_t redefinedProgramCreateWithBinary(void *pParams) { + auto params = *static_cast(pParams); + createWithBinaryLog.push_back( + reinterpret_cast(*params.ppBinary)); + return UR_RESULT_SUCCESS; +} + +std::vector createWithILLog; +static ur_result_t redefinedProgramCreateWithIL(void *pParams) { + auto params = *static_cast(pParams); + createWithILLog.push_back(reinterpret_cast(*params.ppIL)); + return UR_RESULT_SUCCESS; +} + +static ur_result_t redefinedDeviceGetInfo(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_DEVICE_INFO_IP_VERSION && *params.ppPropValue) { + int &ret = *static_cast(*params.ppPropValue); + if (*params.phDevice == MockSklDeviceHandle) + ret = SklIp; + if (*params.phDevice == MockPvcDeviceHandle) + ret = PvcIp; + if (*params.phDevice == MockX86DeviceHandle) + ret = X86Ip; + } + if (*params.ppropName == UR_DEVICE_INFO_TYPE && + *params.phDevice == MockX86DeviceHandle) { + if (*params.ppPropValue) + *static_cast(*params.ppPropValue) = + UR_DEVICE_TYPE_CPU; + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(UR_DEVICE_TYPE_CPU); + } + return UR_RESULT_SUCCESS; +} + +static ur_result_t redefinedDeviceSelectBinary(void *pParams) { + auto params = *static_cast(pParams); + auto target = *params.phDevice == MockX86DeviceHandle + ? UR_DEVICE_BINARY_TARGET_SPIRV64_X86_64 + : UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; + uint32_t fallback = *params.pNumBinaries; + for (uint32_t i = 0; i < *params.pNumBinaries; ++i) { + if (strcmp((*params.ppBinaries)[i].pDeviceTargetSpec, target) == 0) { + **params.ppSelectedBinary = i; + return UR_RESULT_SUCCESS; + } + if (strcmp((*params.ppBinaries)[i].pDeviceTargetSpec, + UR_DEVICE_BINARY_TARGET_SPIRV64) == 0) { + fallback = i; + } + } + if (fallback != *params.pNumBinaries) { + **params.ppSelectedBinary = fallback; + return UR_RESULT_SUCCESS; + } + return UR_RESULT_ERROR_INVALID_BINARY; +} + +namespace syclex = sycl::ext::oneapi::experimental; +auto archSelector(syclex::architecture arch) { + return [=](const device &dev) { + if (dev.get_info() == arch) { + return 1; + } + return -1; + }; +} + +class CompileTargetTest : public testing::Test { +protected: + sycl::unittest::UrMock<> Mock; + CompileTargetTest() { + mock::getCallbacks().set_before_callback("urProgramCreateWithBinary", + &redefinedProgramCreateWithBinary); + mock::getCallbacks().set_before_callback("urProgramCreateWithIL", + &redefinedProgramCreateWithIL); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfo); + mock::getCallbacks().set_after_callback("urDeviceGet", &redefinedDeviceGet); + mock::getCallbacks().set_after_callback("urDeviceSelectBinary", + &redefinedDeviceSelectBinary); + } +}; + +template +void checkUsedImageWithCompileTarget(const char *compile_target, F &&f) { + createWithBinaryLog.clear(); + createWithILLog.clear(); + ASSERT_EQ(createWithBinaryLog.size(), 0U) << compile_target; + ASSERT_EQ(createWithILLog.size(), 0U) << compile_target; + f(); + EXPECT_EQ(createWithILLog.size(), 0U) << compile_target; + ASSERT_EQ(createWithBinaryLog.size(), 1U) << compile_target; + EXPECT_EQ(createWithBinaryLog.back(), compile_target) << compile_target; +} + +void launchSingleTaskKernel(queue q) { + q.single_task([]() {}); +} + +TEST_F(CompileTargetTest, SingleTask) { + checkUsedImageWithCompileTarget("intel_gpu_skl", [&]() { + launchSingleTaskKernel( + queue{archSelector(syclex::architecture::intel_gpu_skl)}); + }); + + checkUsedImageWithCompileTarget("intel_gpu_pvc", [&]() { + launchSingleTaskKernel( + queue{archSelector(syclex::architecture::intel_gpu_pvc)}); + }); + + checkUsedImageWithCompileTarget("spir64_x86_64", [&]() { + launchSingleTaskKernel(queue{archSelector(syclex::architecture::x86_64)}); + }); +} + +void launchNDRangeKernel(queue q) { + q.submit([&](handler &cgh) { + cgh.parallel_for(nd_range<1>(1, 1), [](auto) {}); + }); +} + +TEST_F(CompileTargetTest, NDRangeKernel) { + checkUsedImageWithCompileTarget("intel_gpu_skl", [&]() { + launchNDRangeKernel( + queue{archSelector(syclex::architecture::intel_gpu_skl)}); + }); + + checkUsedImageWithCompileTarget("intel_gpu_pvc", [&]() { + launchNDRangeKernel( + queue{archSelector(syclex::architecture::intel_gpu_pvc)}); + }); + + checkUsedImageWithCompileTarget("spir64_x86_64", [&]() { + launchNDRangeKernel(queue{archSelector(syclex::architecture::x86_64)}); + }); +} + +void launchRangeKernel(queue q) { + q.submit([&](handler &cgh) { + cgh.parallel_for(range<1>(1), [](auto) {}); + }); +} + +TEST_F(CompileTargetTest, RangeKernel) { + checkUsedImageWithCompileTarget("intel_gpu_skl", [&]() { + launchRangeKernel(queue{archSelector(syclex::architecture::intel_gpu_skl)}); + }); + + checkUsedImageWithCompileTarget("intel_gpu_pvc", [&]() { + launchRangeKernel(queue{archSelector(syclex::architecture::intel_gpu_pvc)}); + }); + + checkUsedImageWithCompileTarget("spir64_x86_64", [&]() { + launchRangeKernel(queue{archSelector(syclex::architecture::x86_64)}); + }); +} + +TEST_F(CompileTargetTest, NoDeviceKernel) { + try { + queue{}.single_task([]() {}); + } catch (sycl::exception &e) { + ASSERT_EQ(e.what(), + std::string("No kernel named NoDeviceKernel was found")); + } +} + +TEST_F(CompileTargetTest, JITFallbackKernel) { + createWithBinaryLog.clear(); + createWithILLog.clear(); + queue{}.single_task([]() {}); + EXPECT_EQ(createWithBinaryLog.size(), 0U); + ASSERT_EQ(createWithILLog.size(), 1U); + EXPECT_EQ(createWithILLog.back(), "JITFallbackKernel"); +} \ No newline at end of file