diff --git a/include/ur_api.h b/include/ur_api.h index 5c9c7af5da..25295aef92 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -1530,6 +1530,10 @@ typedef enum ur_device_info_t { ///< version than older devices. UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT = 114, ///< [::ur_bool_t] return true if the device supports virtual memory. UR_DEVICE_INFO_ESIMD_SUPPORT = 115, ///< [::ur_bool_t] return true if the device supports ESIMD. + UR_DEVICE_INFO_COMPONENT_DEVICES = 116, ///< [::ur_device_handle_t[]] The set of component devices contained by + ///< this composite device. + UR_DEVICE_INFO_COMPOSITE_DEVICE = 117, ///< [::ur_device_handle_t] The composite device containing this component + ///< device. UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP = 0x2000, ///< [::ur_bool_t] returns true if the device supports the creation of ///< bindless images UR_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP = 0x2001, ///< [::ur_bool_t] returns true if the device supports the creation of diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 70e5b9886d..791ed02a82 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -2401,6 +2401,12 @@ inline std::ostream &operator<<(std::ostream &os, ur_device_info_t value) { case UR_DEVICE_INFO_ESIMD_SUPPORT: os << "UR_DEVICE_INFO_ESIMD_SUPPORT"; break; + case UR_DEVICE_INFO_COMPONENT_DEVICES: + os << "UR_DEVICE_INFO_COMPONENT_DEVICES"; + break; + case UR_DEVICE_INFO_COMPOSITE_DEVICE: + os << "UR_DEVICE_INFO_COMPOSITE_DEVICE"; + break; case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP: os << "UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP"; break; @@ -3809,6 +3815,34 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info os << ")"; } break; + case UR_DEVICE_INFO_COMPONENT_DEVICES: { + + const ur_device_handle_t *tptr = (const ur_device_handle_t *)ptr; + os << "{"; + size_t nelems = size / sizeof(ur_device_handle_t); + for (size_t i = 0; i < nelems; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + tptr[i]); + } + os << "}"; + } break; + case UR_DEVICE_INFO_COMPOSITE_DEVICE: { + const ur_device_handle_t *tptr = (const ur_device_handle_t *)ptr; + if (sizeof(ur_device_handle_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_handle_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + ur::details::printPtr(os, + *tptr); + + os << ")"; + } break; case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { diff --git a/scripts/core/device.yml b/scripts/core/device.yml index 3999fa70f2..61b004a4d0 100644 --- a/scripts/core/device.yml +++ b/scripts/core/device.yml @@ -396,6 +396,10 @@ etors: desc: "[$x_bool_t] return true if the device supports virtual memory." - name: ESIMD_SUPPORT desc: "[$x_bool_t] return true if the device supports ESIMD." + - name: COMPONENT_DEVICES + desc: "[$x_device_handle_t[]] The set of component devices contained by this composite device." + - name: COMPOSITE_DEVICE + desc: "[$x_device_handle_t] The composite device containing this component device." --- #-------------------------------------------------------------------------- type: function desc: "Retrieves various information about device" diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index acea59e1a1..49feced282 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1022,6 +1022,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(true); case UR_DEVICE_INFO_ESIMD_SUPPORT: return ReturnValue(false); + case UR_DEVICE_INFO_COMPONENT_DEVICES: + case UR_DEVICE_INFO_COMPOSITE_DEVICE: + // These two are exclusive of L0. + return ReturnValue(0); case UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS: case UR_DEVICE_INFO_GPU_EU_COUNT: case UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH: diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index e40470f9aa..08d4e87ae4 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -823,6 +823,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(false); case UR_DEVICE_INFO_ESIMD_SUPPORT: return ReturnValue(false); + case UR_DEVICE_INFO_COMPONENT_DEVICES: + case UR_DEVICE_INFO_COMPOSITE_DEVICE: + // These two are exclusive of L0. + return ReturnValue(0); // TODO: Investigate if this information is available on HIP. case UR_DEVICE_INFO_GPU_EU_COUNT: diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 0b8e12c67a..fcff4b662c 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -41,6 +41,34 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet( // Filter available devices based on input DeviceType. std::vector MatchedDevices; std::shared_lock Lock(Platform->URDevicesCacheMutex); + // We need to filter out composite devices when + // ZE_FLAT_DEVICE_HIERARCHY=COMBINED. We can know if we are in combined + // mode depending on the return value of zeDeviceGetRootDevice: + // - If COMPOSITE, L0 returns cards as devices. Since we filter out + // subdevices early, zeDeviceGetRootDevice must return nullptr, because we + // only query for root-devices and they don't have any device higher up in + // the hierarchy. + // - If FLAT, according to L0 spec, zeDeviceGetRootDevice always returns + // nullptr in this mode. + // - If COMBINED, L0 returns tiles as devices, and zeDeviceGetRootdevice + // returns the card containing a given tile. + bool isCombinedMode = + std::any_of(Platform->URDevicesCache.begin(), + Platform->URDevicesCache.end(), [](const auto &D) { + if (D->isSubDevice()) + return false; + ze_device_handle_t RootDev = nullptr; + // Query Root Device for root-devices. + // We cannot use ZE2UR_CALL because under some circumstances + // this call may return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, + // and ZE2UR_CALL will abort because it's not + // UR_RESULT_SUCCESS. Instead, we use ZE_CALL_NOCHECK and we + // check manually that the result is either + // ZE_RESULT_SUCCESS or ZE_RESULT_ERROR_UNSUPPORTED_FEATURE. + auto errc = ZE_CALL_NOCHECK(zeDeviceGetRootDevice, + (D->ZeDevice, &RootDev)); + return (errc == ZE_RESULT_SUCCESS && RootDev != nullptr); + }); for (auto &D : Platform->URDevicesCache) { // Only ever return root-devices from urDeviceGet, but the // devices cache also keeps sub-devices. @@ -70,8 +98,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet( urPrint("Unknown device type"); break; } - if (Matched) - MatchedDevices.push_back(D.get()); + + if (Matched) { + bool isComposite = + isCombinedMode && (D->ZeDeviceProperties->flags & + ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE) == 0; + if (!isComposite) + MatchedDevices.push_back(D.get()); + } } uint32_t ZeDeviceCount = MatchedDevices.size(); @@ -825,6 +859,62 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( return ReturnValue(result); } + case UR_DEVICE_INFO_COMPONENT_DEVICES: { + ze_device_handle_t DevHandle = Device->ZeDevice; + uint32_t SubDeviceCount = 0; + // First call to get SubDeviceCount. + ZE2UR_CALL(zeDeviceGetSubDevices, (DevHandle, &SubDeviceCount, nullptr)); + if (SubDeviceCount == 0) + return ReturnValue(0); + + std::vector SubDevs(SubDeviceCount); + // Second call to get the actual list of devices. + ZE2UR_CALL(zeDeviceGetSubDevices, + (DevHandle, &SubDeviceCount, SubDevs.data())); + + size_t SubDeviceCount_s{SubDeviceCount}; + auto ResSize = + std::min(SubDeviceCount_s, propSize / sizeof(ur_device_handle_t)); + std::vector Res; + for (const auto &d : SubDevs) { + // We can only reach this code if ZE_FLAT_DEVICE_HIERARCHY != FLAT, + // because in flat mode we directly get tiles, and those don't have any + // further divisions, so zeDeviceGetSubDevices always will return an empty + // list. Thus, there's only two options left: (a) composite mode, and (b) + // combined mode. In (b), zeDeviceGet returns tiles as devices, and those + // are presented as root devices (i.e. isSubDevice() returns false). In + // contrast, in (a), zeDeviceGet returns cards as devices, so tiles are + // not root devices (i.e. isSubDevice() returns true). Since we only reach + // this code if there are tiles returned by zeDeviceGetSubDevices, we + // can know if we are in (a) or (b) by checking if a tile is root device + // or not. + ur_device_handle_t URDev = Device->Platform->getDeviceFromNativeHandle(d); + if (URDev->isSubDevice()) + // We are in COMPOSITE mode, return an empty list. + return ReturnValue(0); + + Res.push_back(URDev); + } + if (pSize) + *pSize = SubDeviceCount * sizeof(ur_device_handle_t); + if (ParamValue) { + return ReturnValue(Res.data(), ResSize); + } + return UR_RESULT_SUCCESS; + } + case UR_DEVICE_INFO_COMPOSITE_DEVICE: { + ur_device_handle_t UrRootDev = nullptr; + ze_device_handle_t DevHandle = Device->ZeDevice; + ze_device_handle_t RootDev; + // Query Root Device. + auto errc = ZE_CALL_NOCHECK(zeDeviceGetRootDevice, (DevHandle, &RootDev)); + UrRootDev = Device->Platform->getDeviceFromNativeHandle(RootDev); + if (errc != ZE_RESULT_SUCCESS && + errc != ZE_RESULT_ERROR_UNSUPPORTED_FEATURE) + return ze2urResult(errc); + return ReturnValue(UrRootDev); + } + default: urPrint("Unsupported ParamName in urGetDeviceInfo\n"); urPrint("ParamName=%d(0x%x)\n", ParamName, ParamName); diff --git a/source/adapters/level_zero/platform.cpp b/source/adapters/level_zero/platform.cpp index 335a920294..6dbfaf574c 100644 --- a/source/adapters/level_zero/platform.cpp +++ b/source/adapters/level_zero/platform.cpp @@ -418,6 +418,35 @@ ur_result_t ur_platform_handle_t_::populateDeviceCacheIfNeeded() { } delete[] ZeSubdevices; + // When using ZE_FLAT_DEVICE_HIERARCHY=COMBINED, zeDeviceGet will + // return tiles as devices, but we can get the card device handle + // through zeDeviceGetRootDevice. We need to cache the card device + // handle too, such that it is readily visible to the + // urDeviceCreateWithNativeHandle. + ze_device_handle_t RootDevice = nullptr; + // We cannot use ZE2UR_CALL because under some circumstances this call may + // return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, and ZE2UR_CALL will abort + // because it's not UR_RESULT_SUCCESS. Instead, we use ZE_CALL_NOCHECK and + // we check manually that the result is either ZE_RESULT_SUCCESS or + // ZE_RESULT_ERROR_UNSUPPORTED_FEATURE. + auto errc = ZE_CALL_NOCHECK(zeDeviceGetRootDevice, + (Device->ZeDevice, &RootDevice)); + if (errc != ZE_RESULT_SUCCESS && + errc != ZE_RESULT_ERROR_UNSUPPORTED_FEATURE) + return ze2urResult(errc); + + if (RootDevice) { + if (std::find_if(URDevicesCache.begin(), URDevicesCache.end(), + [&](auto &Dev) { + return Dev->ZeDevice == RootDevice; + }) == URDevicesCache.end()) { + std::unique_ptr UrRootDevice( + new ur_device_handle_t_(RootDevice, (ur_platform_handle_t)this)); + UR_CALL(UrRootDevice->initialize()); + URDevicesCache.push_back(std::move(UrRootDevice)); + } + } + // Save the root device in the cache for future uses. URDevicesCache.push_back(std::move(Device)); } diff --git a/source/adapters/native_cpu/device.cpp b/source/adapters/native_cpu/device.cpp index 3432ce780e..68dafdfc1c 100644 --- a/source/adapters/native_cpu/device.cpp +++ b/source/adapters/native_cpu/device.cpp @@ -300,6 +300,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_ESIMD_SUPPORT: return ReturnValue(false); + case UR_DEVICE_INFO_COMPONENT_DEVICES: + case UR_DEVICE_INFO_COMPOSITE_DEVICE: + // These two are exclusive of L0. + return ReturnValue(0); CASE_UR_UNSUPPORTED(UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH); case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT: diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 27577eab39..5b0d5332db 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -923,6 +923,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } return ReturnValue(SupportedExtensions.c_str()); } + case UR_DEVICE_INFO_COMPONENT_DEVICES: + case UR_DEVICE_INFO_COMPOSITE_DEVICE: + // These two are exclusive of L0. + return ReturnValue(0); /* TODO: Check regularly to see if support is enabled in OpenCL. Intel GPU * EU device-specific information extensions. Some of the queries are * enabled by cl_intel_device_attribute_query extension, but it's not yet in diff --git a/test/conformance/device/urDeviceGetInfo.cpp b/test/conformance/device/urDeviceGetInfo.cpp index 757e09b6fa..d52cff2683 100644 --- a/test/conformance/device/urDeviceGetInfo.cpp +++ b/test/conformance/device/urDeviceGetInfo.cpp @@ -112,6 +112,8 @@ static std::unordered_map device_info_size_map = { {UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT, sizeof(ur_bool_t)}, {UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED, sizeof(ur_bool_t)}, {UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP, sizeof(uint32_t)}, + {UR_DEVICE_INFO_COMPONENT_DEVICES, sizeof(uint32_t)}, + {UR_DEVICE_INFO_COMPOSITE_DEVICE, sizeof(ur_device_handle_t)}, }; struct urDeviceGetInfoTest : uur::urAllDevicesTest, diff --git a/tools/urinfo/urinfo.hpp b/tools/urinfo/urinfo.hpp index d9677c3eab..ff024978ca 100644 --- a/tools/urinfo/urinfo.hpp +++ b/tools/urinfo/urinfo.hpp @@ -323,6 +323,12 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_ESIMD_SUPPORT); std::cout << prefix; + printDeviceInfo(hDevice, + UR_DEVICE_INFO_COMPONENT_DEVICES); + std::cout << prefix; + printDeviceInfo(hDevice, + UR_DEVICE_INFO_COMPOSITE_DEVICE); + std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP); std::cout << prefix;