diff --git a/sycl/doc/EnvironmentVariables.md b/sycl/doc/EnvironmentVariables.md index ae21006aa6093..62209d5efb2e2 100644 --- a/sycl/doc/EnvironmentVariables.md +++ b/sycl/doc/EnvironmentVariables.md @@ -55,25 +55,25 @@ subject to change. Do not rely on these variables in production code. This environment variable limits the SYCL RT to use only a subset of the system's devices. Setting this environment variable affects all of the device query functions (`platform::get_devices()` and `platform::get_platforms()`) and all of the device selectors. -The value of this environment variable is a comma separated list of filters, where each filter is a triple of the form "`backend:device_type:device_num`" (without the quotes). Each element of the triple is optional, but each filter must have at least one value. Possible values of "backend" are: -- host +The value of this environment variable is a comma separated list of filters, where each filter is a triple of the form "`backend`:`device_type`:`device_num`" (without the quotes). Each element of the triple is optional, but each filter must have at least one value. Possible values of `backend` are: +- `host` - `level_zero` -- opencl -- cuda -- \* +- `opencl` +- `cuda` +- `*` -Possible values of "`device_type`" are: -- host -- cpu -- gpu -- acc -- \* +Possible values of `device_type` are: +- `host` +- `cpu` +- `gpu` +- `acc` +- `*` -`Device_num` is an integer that indexes the enumeration of devices from the sycl-ls utility tool, where the first device in that enumeration has index zero in each backend. For example, `SYCL_DEVICE_FILTER`=2 will return all devices with index '2' from all different backends. If multiple devices satisfy this device number (e.g., GPU and CPU devices can be assigned device number '2'), then default_selector will choose the device with the highest heuristic point. +`device_num` is an integer that indexes the enumeration of devices from the sycl-ls utility tool, where the first device in that enumeration has index zero in each backend. For example, `SYCL_DEVICE_FILTER=2` will return all devices with index '2' from all different backends. If multiple devices satisfy this device number (e.g., GPU and CPU devices can be assigned device number '2'), then default_selector will choose the device with the highest heuristic point. When `SYCL_DEVICE_ALLOWLIST` is set, it is applied before enumerating devices and affects `device_num` values. -Assuming a filter has all three elements of the triple, it selects only those devices that come from the given backend, have the specified device type, AND have the given device index. If more than one filter is specified, the RT is restricted to the union of devices selected by all filters. The RT does not include the "host" backend and the host device automatically unless one of the filters explicitly specifies the "host" device type. Therefore, `SYCL_DEVICE_FILTER`=host should be set to enforce SYCL to use the host device only. +Assuming a filter has all three elements of the triple, it selects only those devices that come from the given backend, have the specified device type, AND have the given device index. If more than one filter is specified, the RT is restricted to the union of devices selected by all filters. The RT does not include the `host` backend and the `host` device automatically unless one of the filters explicitly specifies the `host` device type. Therefore, `SYCL_DEVICE_FILTER=host` should be set to enforce SYCL to use the `host` device only. -Note that all device selectors will throw an exception if the filtered list of devices does not include a device that satisfies the selector. For instance, `SYCL_DEVICE_FILTER`=cpu,level_zero will cause host_selector() to throw an exception. `SYCL_DEVICE_FILTER` also limits loading only specified plugins into the SYCL RT. In particular, `SYCL_DEVICE_FILTER`=level_zero will cause the cpu_selector to throw an exception since SYCL RT will only load the level_zero backend which does not support any CPU devices at this time. When multiple devices satisfy the filter (e..g, `SYCL_DEVICE_FILTER`=gpu), only one of them will be selected. +Note that all device selectors will throw an exception if the filtered list of devices does not include a device that satisfies the selector. For instance, `SYCL_DEVICE_FILTER=cpu,level_zero` will cause `host_selector()` to throw an exception. `SYCL_DEVICE_FILTER` also limits loading only specified plugins into the SYCL RT. In particular, `SYCL_DEVICE_FILTER=level_zero` will cause the `cpu_selector` to throw an exception since SYCL RT will only load the `level_zero` backend which does not support any CPU devices at this time. When multiple devices satisfy the filter (e..g, `SYCL_DEVICE_FILTER=gpu`), only one of them will be selected. ### `SYCL_PRINT_EXECUTION_GRAPH` Options diff --git a/sycl/include/CL/sycl/detail/pi.hpp b/sycl/include/CL/sycl/detail/pi.hpp index ef47805d396c1..e1f75f7bf2d6e 100644 --- a/sycl/include/CL/sycl/detail/pi.hpp +++ b/sycl/include/CL/sycl/detail/pi.hpp @@ -154,7 +154,7 @@ template To cast(From value); extern std::shared_ptr GlobalPlugin; // Performs PI one-time initialization. -const std::vector &initialize(); +std::vector &initialize(); // Get the plugin serving given backend. template __SYCL_EXPORT const plugin &getPlugin(); diff --git a/sycl/source/detail/device_filter.cpp b/sycl/source/detail/device_filter.cpp index ba6b15ba8cb1b..6017996e8d40b 100644 --- a/sycl/source/detail/device_filter.cpp +++ b/sycl/source/detail/device_filter.cpp @@ -12,65 +12,80 @@ #include #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { +std::vector tokenize(const std::string &Filter, + const std::string &Delim) { + std::vector Tokens; + size_t Pos = 0; + size_t LastPos = 0; + + while ((Pos = Filter.find(Delim, LastPos)) != std::string::npos) { + std::string_view Tok(Filter.data() + LastPos, (Pos - LastPos)); + + if (!Tok.empty()) { + Tokens.push_back(Tok); + } + // move the search starting index + LastPos = Pos + 1; + } + + // Add remainder if any + if (LastPos < Filter.size()) { + std::string_view Tok(Filter.data() + LastPos, Filter.size() - LastPos); + Tokens.push_back(Tok); + } + return Tokens; +} + device_filter::device_filter(const std::string &FilterString) { - size_t Cursor = 0; - size_t ColonPos = 0; - auto findElement = [&](auto Element) { - size_t Found = FilterString.find(Element.first, Cursor); - if (Found == std::string::npos) - return false; - Cursor = Found; - return true; + std::vector Tokens = tokenize(FilterString, ":"); + size_t TripleValueID = 0; + + auto FindElement = [&](auto Element) { + return std::string::npos != Tokens[TripleValueID].find(Element.first); }; // Handle the optional 1st field of the filter, backend // Check if the first entry matches with a known backend type auto It = std::find_if(std::begin(getSyclBeMap()), std::end(getSyclBeMap()), - findElement); + FindElement); // If no match is found, set the backend type backend::all // which actually means 'any backend' will be a match. if (It == getSyclBeMap().end()) Backend = backend::all; else { Backend = It->second; - ColonPos = FilterString.find(":", Cursor); - if (ColonPos != std::string::npos) - Cursor = ColonPos + 1; - else - Cursor = Cursor + It->first.size(); + TripleValueID++; } + // Handle the optional 2nd field of the filter - device type. // Check if the 2nd entry matches with any known device type. - if (Cursor >= FilterString.size()) { + if (TripleValueID >= Tokens.size()) { DeviceType = info::device_type::all; } else { auto Iter = std::find_if(std::begin(getSyclDeviceTypeMap()), - std::end(getSyclDeviceTypeMap()), findElement); + std::end(getSyclDeviceTypeMap()), FindElement); // If no match is found, set device_type 'all', // which actually means 'any device_type' will be a match. if (Iter == getSyclDeviceTypeMap().end()) DeviceType = info::device_type::all; else { DeviceType = Iter->second; - ColonPos = FilterString.find(":", Cursor); - if (ColonPos != std::string::npos) - Cursor = ColonPos + 1; - else - Cursor = Cursor + Iter->first.size(); + TripleValueID++; } } // Handle the optional 3rd field of the filter, device number // Try to convert the remaining string to an integer. // If succeessful, the converted integer is the desired device num. - if (Cursor < FilterString.size()) { + if (TripleValueID < Tokens.size()) { try { - DeviceNum = stoi(FilterString.substr(Cursor)); + DeviceNum = std::stoi(Tokens[TripleValueID].data()); HasDeviceNum = true; } catch (...) { std::string Message = diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index d7745f148c89c..855bbac4cfb02 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -78,7 +78,7 @@ getPluginOpaqueData(void *); namespace pi { -static void initializePlugins(std::vector *Plugins); +static void initializePlugins(std::vector &Plugins); bool XPTIInitDone = false; @@ -369,17 +369,17 @@ bool trace(TraceLevel Level) { } // Initializes all available Plugins. -const std::vector &initialize() { +std::vector &initialize() { static std::once_flag PluginsInitDone; - - std::call_once(PluginsInitDone, []() { - initializePlugins(&GlobalHandler::instance().getPlugins()); + // std::call_once is blocking all other threads if a thread is already + // creating a vector of plugins. So, no additional lock is needed. + std::call_once(PluginsInitDone, [&]() { + initializePlugins(GlobalHandler::instance().getPlugins()); }); - return GlobalHandler::instance().getPlugins(); } -static void initializePlugins(std::vector *Plugins) { +static void initializePlugins(std::vector &Plugins) { std::vector> PluginNames = findPlugins(); if (PluginNames.empty() && trace(PI_TRACE_ALL)) @@ -438,7 +438,7 @@ static void initializePlugins(std::vector *Plugins) { GlobalPlugin = std::make_shared(PluginInformation, backend::level_zero, Library); } - Plugins->emplace_back( + Plugins.emplace_back( plugin(PluginInformation, PluginNames[I].second, Library)); if (trace(TraceLevel::PI_TRACE_BASIC)) std::cerr << "SYCL_PI_TRACE[basic]: " diff --git a/sycl/source/detail/platform_impl.cpp b/sycl/source/detail/platform_impl.cpp index de3f489217657..3823d2322cac9 100644 --- a/sycl/source/detail/platform_impl.cpp +++ b/sycl/source/detail/platform_impl.cpp @@ -95,27 +95,30 @@ static bool IsBannedPlatform(platform Platform) { std::vector platform_impl::get_platforms() { std::vector Platforms; - const std::vector &Plugins = RT::initialize(); - + std::vector &Plugins = RT::initialize(); info::device_type ForcedType = detail::get_forced_type(); - for (unsigned int i = 0; i < Plugins.size(); i++) { - + for (plugin &Plugin : Plugins) { pi_uint32 NumPlatforms = 0; // Move to the next plugin if the plugin fails to initialize. // This way platforms from other plugins get a chance to be discovered. - if (Plugins[i].call_nocheck( + if (Plugin.call_nocheck( 0, nullptr, &NumPlatforms) != PI_SUCCESS) continue; if (NumPlatforms) { std::vector PiPlatforms(NumPlatforms); - if (Plugins[i].call_nocheck( + if (Plugin.call_nocheck( NumPlatforms, PiPlatforms.data(), nullptr) != PI_SUCCESS) return Platforms; for (const auto &PiPlatform : PiPlatforms) { platform Platform = detail::createSyclObjFromImpl( - getOrMakePlatformImpl(PiPlatform, Plugins[i])); + getOrMakePlatformImpl(PiPlatform, Plugin)); + { + std::lock_guard Guard(*Plugin.getPluginMutex()); + // insert PiPlatform into the Plugin + Plugin.getPlatformId(PiPlatform); + } // Skip platforms which do not contain requested device types if (!Platform.get_devices(ForcedType).empty() && !IsBannedPlatform(Platform)) @@ -141,14 +144,26 @@ std::vector platform_impl::get_platforms() { // This function matches devices in the order of backend, device_type, and // device_num. static void filterDeviceFilter(std::vector &PiDevices, - const plugin &Plugin) { + RT::PiPlatform Platform) { device_filter_list *FilterList = SYCLConfig::get(); if (!FilterList) return; + std::vector &Plugins = RT::initialize(); + auto It = + std::find_if(Plugins.begin(), Plugins.end(), [Platform](plugin &Plugin) { + return Plugin.containsPiPlatform(Platform); + }); + if (It == Plugins.end()) + return; + + plugin &Plugin = *It; backend Backend = Plugin.getBackend(); int InsertIDx = 0; - int DeviceNum = 0; + // DeviceIds should be given consecutive numbers across platforms in the same + // backend + std::lock_guard Guard(*Plugin.getPluginMutex()); + int DeviceNum = Plugin.getStartingDeviceId(Platform); for (RT::PiDevice Device : PiDevices) { RT::PiDeviceType PiDevType; Plugin.call(Device, PI_DEVICE_INFO_TYPE, @@ -181,6 +196,10 @@ static void filterDeviceFilter(std::vector &PiDevices, DeviceNum++; } PiDevices.resize(InsertIDx); + // remember the last backend that has gone through this filter function + // to assign a unique device id number across platforms that belong to + // the same backend. For example, opencl:cpu:0, opencl:acc:1, opencl:gpu:2 + Plugin.setLastDeviceId(Platform, DeviceNum); } std::shared_ptr platform_impl::getOrMakeDeviceImpl( @@ -237,12 +256,12 @@ platform_impl::get_devices(info::device_type DeviceType) const { // Filter out devices that are not present in the SYCL_DEVICE_ALLOWLIST if (SYCLConfig::get()) - applyAllowList(PiDevices, MPlatform, this->getPlugin()); + applyAllowList(PiDevices, MPlatform, Plugin); // Filter out devices that are not compatible with SYCL_DEVICE_FILTER - filterDeviceFilter(PiDevices, Plugin); + filterDeviceFilter(PiDevices, MPlatform); - PlatformImplPtr PlatformImpl = getOrMakePlatformImpl(MPlatform, *MPlugin); + PlatformImplPtr PlatformImpl = getOrMakePlatformImpl(MPlatform, Plugin); std::transform( PiDevices.begin(), PiDevices.end(), std::back_inserter(Res), [PlatformImpl](const RT::PiDevice &PiDevice) -> device { diff --git a/sycl/source/detail/plugin.hpp b/sycl/source/detail/plugin.hpp index bfd614893f221..d660b6dd057bb 100644 --- a/sycl/source/detail/plugin.hpp +++ b/sycl/source/detail/plugin.hpp @@ -89,10 +89,10 @@ auto packCallArguments(ArgsT &&... Args) { class plugin { public: plugin() = delete; - plugin(RT::PiPlugin Plugin, backend UseBackend, void *LibraryHandle) : MPlugin(Plugin), MBackend(UseBackend), MLibraryHandle(LibraryHandle), - TracingMutex(std::make_shared()) {} + TracingMutex(std::make_shared()), + MPluginMutex(std::make_shared()) {} plugin &operator=(const plugin &) = default; plugin(const plugin &) = default; @@ -184,11 +184,59 @@ class plugin { void *getLibraryHandle() { return MLibraryHandle; } int unload() { return RT::unloadPlugin(MLibraryHandle); } + // return the index of PiPlatforms. + // If not found, add it and return its index. + // The function is expected to be called in a thread safe manner. + int getPlatformId(RT::PiPlatform Platform) { + auto It = std::find(PiPlatforms.begin(), PiPlatforms.end(), Platform); + if (It != PiPlatforms.end()) + return It - PiPlatforms.begin(); + + PiPlatforms.push_back(Platform); + LastDeviceIds.push_back(0); + return PiPlatforms.size() - 1; + } + + // Device ids are consecutive across platforms within a plugin. + // We need to return the same starting index for the given platform. + // So, instead of returing the last device id of the given platform, + // return the last device id of the predecessor platform. + // The function is expected to be called in a thread safe manner. + int getStartingDeviceId(RT::PiPlatform Platform) { + int PlatformId = getPlatformId(Platform); + if (PlatformId == 0) + return 0; + return LastDeviceIds[PlatformId - 1]; + } + + // set the id of the last device for the given platform + // The function is expected to be called in a thread safe manner. + void setLastDeviceId(RT::PiPlatform Platform, int Id) { + int PlatformId = getPlatformId(Platform); + LastDeviceIds[PlatformId] = Id; + } + + bool containsPiPlatform(RT::PiPlatform Platform) { + auto It = std::find(PiPlatforms.begin(), PiPlatforms.end(), Platform); + return It != PiPlatforms.end(); + } + + std::shared_ptr getPluginMutex() { return MPluginMutex; } + private: RT::PiPlugin MPlugin; backend MBackend; void *MLibraryHandle; // the handle returned from dlopen std::shared_ptr TracingMutex; + // Mutex to guard PiPlatforms and LastDeviceIds. + // Note that this is a temporary solution until we implement the global + // Device/Platform cache later. + std::shared_ptr MPluginMutex; + // vector of PiPlatforms that belong to this plugin + std::vector PiPlatforms; + // represents the unique ids of the last device of each platform + // index of this vector corresponds to the index in PiPlatforms vector. + std::vector LastDeviceIds; }; // class plugin } // namespace detail } // namespace sycl diff --git a/sycl/tools/sycl-ls/sycl-ls.cpp b/sycl/tools/sycl-ls/sycl-ls.cpp index a50371ed8c934..364382b579211 100644 --- a/sycl/tools/sycl-ls/sycl-ls.cpp +++ b/sycl/tools/sycl-ls/sycl-ls.cpp @@ -38,53 +38,51 @@ class custom_selector : public device_selector { } }; -static void printDeviceInfo(const device &Device, const std::string &Prepend) { +std::string getDeviceTypeName(const device &Device) { auto DeviceType = Device.get_info(); - std::string DeviceTypeName; switch (DeviceType) { case info::device_type::cpu: - DeviceTypeName = "CPU "; - break; + return "cpu"; case info::device_type::gpu: - DeviceTypeName = "GPU "; - break; + return "gpu"; case info::device_type::host: - DeviceTypeName = "HOST"; - break; + return "host"; case info::device_type::accelerator: - DeviceTypeName = "ACC "; - break; + return "acc"; default: - DeviceTypeName = "UNKNOWN"; - break; + return "unknown"; } +} +static void printDeviceInfo(const device &Device, bool Verbose, + const std::string &Prepend) { auto DeviceVersion = Device.get_info(); auto DeviceName = Device.get_info(); auto DeviceVendor = Device.get_info(); auto DeviceDriverVersion = Device.get_info(); - if (verbose) { - std::cout << Prepend << "Type : " << DeviceTypeName << std::endl; + if (Verbose) { + std::cout << Prepend << "Type : " << getDeviceTypeName(Device) + << std::endl; std::cout << Prepend << "Version : " << DeviceVersion << std::endl; std::cout << Prepend << "Name : " << DeviceName << std::endl; std::cout << Prepend << "Vendor : " << DeviceVendor << std::endl; std::cout << Prepend << "Driver : " << DeviceDriverVersion << std::endl; } else { - auto DevicePlatform = Device.get_info(); - auto DevicePlatformName = DevicePlatform.get_info(); - std::cout << Prepend << DeviceTypeName << ": " << DevicePlatformName << " " - << DeviceVersion << " [" << DeviceDriverVersion << "]" - << std::endl; + std::cout << Prepend << ", " << DeviceName << " " << DeviceVersion << " [" + << DeviceDriverVersion << "]" << std::endl; } } static void printSelectorChoice(const device_selector &Selector, const std::string &Prepend) { try { - const auto &Dev = device(Selector); - printDeviceInfo(Dev, Prepend); - + const auto &Device = device(Selector); + std::string DeviceTypeName = getDeviceTypeName(Device); + auto Platform = Device.get_info(); + auto PlatformName = Platform.get_info(); + printDeviceInfo(Device, verbose, + Prepend + DeviceTypeName + ", " + PlatformName); } catch (const cl::sycl::runtime_error &Exception) { // Truncate long string so it can fit in one-line std::string What = Exception.what(); @@ -106,16 +104,41 @@ int main(int argc, char **argv) { return EXIT_FAILURE; } + const char *filter = std::getenv("SYCL_DEVICE_FILTER"); + if (filter) { + std::cout << "Warning: SYCL_DEVICE_FILTER environment variable is set to " + << filter << "." << std::endl; + std::cout + << "To see the correct device id, please unset SYCL_DEVICE_FILTER." + << std::endl + << std::endl; + } + const auto &Platforms = platform::get_platforms(); - if (verbose) - std::cout << "Platforms: " << Platforms.size() << std::endl; - uint32_t PlatformNum = 0; + // For each backend, device num starts at zero. + std::vector DeviceNums(static_cast(backend::all), 0); for (const auto &Platform : Platforms) { - uint32_t DeviceNum = 0; - ++PlatformNum; - if (verbose) { + backend Backend = Platform.get_backend(); + auto PlatformName = Platform.get_info(); + const auto &Devices = Platform.get_devices(); + for (const auto &Device : Devices) { + uint32_t DeviceNum = DeviceNums[(int)Backend]++; + std::cout << "[" << Backend << ":" << getDeviceTypeName(Device) << ":" + << DeviceNum << "] "; + ++DeviceNum; + // Verbose parameter is set to false to print regular devices output first + printDeviceInfo(Device, false, PlatformName); + } + } + + if (verbose) { + std::cout << "\nPlatforms: " << Platforms.size() << std::endl; + uint32_t PlatformNum = 0; + for (const auto &Platform : Platforms) { + backend Backend = Platform.get_backend(); + ++PlatformNum; auto PlatformVersion = Platform.get_info(); auto PlatformName = Platform.get_info(); auto PlatformVendor = Platform.get_info(); @@ -123,23 +146,17 @@ int main(int argc, char **argv) { std::cout << " Version : " << PlatformVersion << std::endl; std::cout << " Name : " << PlatformName << std::endl; std::cout << " Vendor : " << PlatformVendor << std::endl; - } - const auto &Devices = Platform.get_devices(); - if (verbose) + + const auto &Devices = Platform.get_devices(); std::cout << " Devices : " << Devices.size() << std::endl; - for (const auto &Device : Devices) { - if (verbose) + for (const auto &Device : Devices) { + uint32_t DeviceNum = DeviceNums[(int)Backend]++; std::cout << " Device [#" << DeviceNum << "]:" << std::endl; - else { - backend Backend = Platform.get_backend(); - std::cout << "[" << Backend << ":" << DeviceNum << "] "; + ++DeviceNum; + printDeviceInfo(Device, true, " "); } - ++DeviceNum; - printDeviceInfo(Device, verbose ? " " : ""); } - } - - if (!verbose) { + } else { return EXIT_SUCCESS; }