diff --git a/sycl/gdb/libsycl.so-gdb.py b/sycl/gdb/libsycl.so-gdb.py index 5b1eb18292e14..2ed3c5042f052 100644 --- a/sycl/gdb/libsycl.so-gdb.py +++ b/sycl/gdb/libsycl.so-gdb.py @@ -374,8 +374,8 @@ def range_common_array(self): class SYCLDevice(SYCLValue): """Provides information about a sycl::device from a gdb.Value.""" - IMPL_OFFSET_TO_DEVICE_TYPE = 0x18 - IMPL_OFFSET_TO_PLATFORM = 0x28 + IMPL_OFFSET_TO_PLATFORM = 0x18 + IMPL_OFFSET_TO_DEVICE_TYPE = 0x38 PLATFORM_OFFSET_TO_BACKEND = 0x20 def __init__(self, gdb_value): diff --git a/sycl/source/backend/opencl.cpp b/sycl/source/backend/opencl.cpp index 196f02f03866a..12593593f7217 100644 --- a/sycl/source/backend/opencl.cpp +++ b/sycl/source/backend/opencl.cpp @@ -47,11 +47,8 @@ __SYCL_EXPORT bool has_extension(const sycl::device &SyclDevice, "has_extension can only be used with an OpenCL backend"); } - std::string ExtensionsString = urGetInfoString( - *getSyclObjImpl(SyclDevice), UR_DEVICE_INFO_EXTENSIONS); - - return ExtensionsString.find(std::string_view{Extension.data()}) != - std::string::npos; + return getSyclObjImpl(SyclDevice) + ->has_extension(std::string_view{Extension.data()}); } } // namespace detail diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index b052af06cb141..9870f515abf53 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -19,30 +19,197 @@ namespace sycl { inline namespace _V1 { namespace detail { +ext::oneapi::experimental::architecture device_impl::get_architecture() const { + using oneapi_exp_arch = sycl::ext::oneapi::experimental::architecture; + + // Only for NVIDIA and AMD GPU architectures + constexpr std::pair + NvidiaAmdGPUArchitectures[] = { + {"5.0", oneapi_exp_arch::nvidia_gpu_sm_50}, + {"5.2", oneapi_exp_arch::nvidia_gpu_sm_52}, + {"5.3", oneapi_exp_arch::nvidia_gpu_sm_53}, + {"6.0", oneapi_exp_arch::nvidia_gpu_sm_60}, + {"6.1", oneapi_exp_arch::nvidia_gpu_sm_61}, + {"6.2", oneapi_exp_arch::nvidia_gpu_sm_62}, + {"7.0", oneapi_exp_arch::nvidia_gpu_sm_70}, + {"7.2", oneapi_exp_arch::nvidia_gpu_sm_72}, + {"7.5", oneapi_exp_arch::nvidia_gpu_sm_75}, + {"8.0", oneapi_exp_arch::nvidia_gpu_sm_80}, + {"8.6", oneapi_exp_arch::nvidia_gpu_sm_86}, + {"8.7", oneapi_exp_arch::nvidia_gpu_sm_87}, + {"8.9", oneapi_exp_arch::nvidia_gpu_sm_89}, + {"9.0", oneapi_exp_arch::nvidia_gpu_sm_90}, + {"gfx701", oneapi_exp_arch::amd_gpu_gfx701}, + {"gfx702", oneapi_exp_arch::amd_gpu_gfx702}, + {"gfx703", oneapi_exp_arch::amd_gpu_gfx703}, + {"gfx704", oneapi_exp_arch::amd_gpu_gfx704}, + {"gfx705", oneapi_exp_arch::amd_gpu_gfx705}, + {"gfx801", oneapi_exp_arch::amd_gpu_gfx801}, + {"gfx802", oneapi_exp_arch::amd_gpu_gfx802}, + {"gfx803", oneapi_exp_arch::amd_gpu_gfx803}, + {"gfx805", oneapi_exp_arch::amd_gpu_gfx805}, + {"gfx810", oneapi_exp_arch::amd_gpu_gfx810}, + {"gfx900", oneapi_exp_arch::amd_gpu_gfx900}, + {"gfx902", oneapi_exp_arch::amd_gpu_gfx902}, + {"gfx904", oneapi_exp_arch::amd_gpu_gfx904}, + {"gfx906", oneapi_exp_arch::amd_gpu_gfx906}, + {"gfx908", oneapi_exp_arch::amd_gpu_gfx908}, + {"gfx909", oneapi_exp_arch::amd_gpu_gfx909}, + {"gfx90a", oneapi_exp_arch::amd_gpu_gfx90a}, + {"gfx90c", oneapi_exp_arch::amd_gpu_gfx90c}, + {"gfx940", oneapi_exp_arch::amd_gpu_gfx940}, + {"gfx941", oneapi_exp_arch::amd_gpu_gfx941}, + {"gfx942", oneapi_exp_arch::amd_gpu_gfx942}, + {"gfx1010", oneapi_exp_arch::amd_gpu_gfx1010}, + {"gfx1011", oneapi_exp_arch::amd_gpu_gfx1011}, + {"gfx1012", oneapi_exp_arch::amd_gpu_gfx1012}, + {"gfx1013", oneapi_exp_arch::amd_gpu_gfx1013}, + {"gfx1030", oneapi_exp_arch::amd_gpu_gfx1030}, + {"gfx1031", oneapi_exp_arch::amd_gpu_gfx1031}, + {"gfx1032", oneapi_exp_arch::amd_gpu_gfx1032}, + {"gfx1033", oneapi_exp_arch::amd_gpu_gfx1033}, + {"gfx1034", oneapi_exp_arch::amd_gpu_gfx1034}, + {"gfx1035", oneapi_exp_arch::amd_gpu_gfx1035}, + {"gfx1036", oneapi_exp_arch::amd_gpu_gfx1036}, + {"gfx1100", oneapi_exp_arch::amd_gpu_gfx1100}, + {"gfx1101", oneapi_exp_arch::amd_gpu_gfx1101}, + {"gfx1102", oneapi_exp_arch::amd_gpu_gfx1102}, + {"gfx1103", oneapi_exp_arch::amd_gpu_gfx1103}, + {"gfx1150", oneapi_exp_arch::amd_gpu_gfx1150}, + {"gfx1151", oneapi_exp_arch::amd_gpu_gfx1151}, + {"gfx1200", oneapi_exp_arch::amd_gpu_gfx1200}, + {"gfx1201", oneapi_exp_arch::amd_gpu_gfx1201}, + }; + + // Only for Intel GPU architectures + constexpr std::pair IntelGPUArchitectures[] = { + {0x02000000, oneapi_exp_arch::intel_gpu_bdw}, + {0x02400009, oneapi_exp_arch::intel_gpu_skl}, + {0x02404009, oneapi_exp_arch::intel_gpu_kbl}, + {0x02408009, oneapi_exp_arch::intel_gpu_cfl}, + {0x0240c000, oneapi_exp_arch::intel_gpu_apl}, + {0x02410000, oneapi_exp_arch::intel_gpu_glk}, + {0x02414000, oneapi_exp_arch::intel_gpu_whl}, + {0x02418000, oneapi_exp_arch::intel_gpu_aml}, + {0x0241c000, oneapi_exp_arch::intel_gpu_cml}, + {0x02c00000, oneapi_exp_arch::intel_gpu_icllp}, + {0x02c08000, oneapi_exp_arch::intel_gpu_ehl}, + {0x03000000, oneapi_exp_arch::intel_gpu_tgllp}, + {0x03004000, oneapi_exp_arch::intel_gpu_rkl}, + {0x03008000, oneapi_exp_arch::intel_gpu_adl_s}, + {0x0300c000, oneapi_exp_arch::intel_gpu_adl_p}, + {0x03010000, oneapi_exp_arch::intel_gpu_adl_n}, + {0x03028000, oneapi_exp_arch::intel_gpu_dg1}, + {0x030dc000, oneapi_exp_arch::intel_gpu_acm_g10}, // A0 + {0x030dc001, oneapi_exp_arch::intel_gpu_acm_g10}, // A1 + {0x030dc004, oneapi_exp_arch::intel_gpu_acm_g10}, // B0 + {0x030dc008, oneapi_exp_arch::intel_gpu_acm_g10}, // C0 + {0x030e0000, oneapi_exp_arch::intel_gpu_acm_g11}, // A0 + {0x030e0004, oneapi_exp_arch::intel_gpu_acm_g11}, // B0 + {0x030e0005, oneapi_exp_arch::intel_gpu_acm_g11}, // B1 + {0x030e4000, oneapi_exp_arch::intel_gpu_acm_g12}, // A0 + {0x030f0000, oneapi_exp_arch::intel_gpu_pvc}, // XL-A0 + {0x030f0001, oneapi_exp_arch::intel_gpu_pvc}, // XL-AOP + {0x030f0003, oneapi_exp_arch::intel_gpu_pvc}, // XT-A0 + {0x030f0005, oneapi_exp_arch::intel_gpu_pvc}, // XT-B0 + {0x030f0006, oneapi_exp_arch::intel_gpu_pvc}, // XT-B1 + {0x030f0007, oneapi_exp_arch::intel_gpu_pvc}, // XT-C0 + {0x030f4007, oneapi_exp_arch::intel_gpu_pvc_vg}, // C0 + {0x03118000, oneapi_exp_arch::intel_gpu_mtl_u}, // A0 + {0x03118004, oneapi_exp_arch::intel_gpu_mtl_u}, // B0 + {0x0311c000, oneapi_exp_arch::intel_gpu_mtl_h}, // A0 + {0x0311c004, oneapi_exp_arch::intel_gpu_mtl_h}, // B0 + {0x03128000, oneapi_exp_arch::intel_gpu_arl_h}, // A0 + {0x03128004, oneapi_exp_arch::intel_gpu_arl_h}, // B0 + {0x05004000, oneapi_exp_arch::intel_gpu_bmg_g21}, // A0 + {0x05004001, oneapi_exp_arch::intel_gpu_bmg_g21}, // A1 + {0x05004004, oneapi_exp_arch::intel_gpu_bmg_g21}, // B0 + {0x05010000, oneapi_exp_arch::intel_gpu_lnl_m}, // A0 + {0x05010001, oneapi_exp_arch::intel_gpu_lnl_m}, // A1 + {0x05010004, oneapi_exp_arch::intel_gpu_lnl_m}, // B0 + {0x07800000, oneapi_exp_arch::intel_gpu_ptl_h}, // A0 + {0x07800004, oneapi_exp_arch::intel_gpu_ptl_h}, // B0 + {0x07804000, oneapi_exp_arch::intel_gpu_ptl_u}, // A0 + {0x07804001, oneapi_exp_arch::intel_gpu_ptl_u}, // A1 + }; + + // Only for Intel CPU architectures + constexpr std::pair IntelCPUArchitectures[] = { + {8, oneapi_exp_arch::intel_cpu_spr}, + {9, oneapi_exp_arch::intel_cpu_gnr}, + {10, oneapi_exp_arch::intel_cpu_dmr}, + }; + backend CurrentBackend = getBackend(); + auto LookupIPVersion = [&, this](auto &ArchList) + -> std::optional { + auto DeviceIp = get_info_impl_nocheck(); + if (!DeviceIp.has_val()) { + ur_result_t Err = DeviceIp.error(); + if (Err == UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION) { + // Not all devices support this device info query + return std::nullopt; + } + getAdapter()->checkUrResult(Err); + } + + auto Val = static_cast(DeviceIp.value()); + for (const auto &Item : ArchList) { + if (Item.first == Val) + return Item.second; + } + return std::nullopt; + }; + + if (is_gpu() && (backend::ext_oneapi_level_zero == CurrentBackend || + backend::opencl == CurrentBackend)) { + return LookupIPVersion(IntelGPUArchitectures) + .value_or(ext::oneapi::experimental::architecture::unknown); + } else if (is_gpu() && (backend::ext_oneapi_cuda == CurrentBackend || + backend::ext_oneapi_hip == CurrentBackend)) { + auto MapArchIDToArchName = [&](const char *arch) { + for (const auto &Item : NvidiaAmdGPUArchitectures) { + if (std::string_view(Item.first) == arch) + return Item.second; + } + return ext::oneapi::experimental::architecture::unknown; + }; + std::string DeviceArch = + get_info_impl::value>(); + std::string_view DeviceArchSubstr = + std::string_view{DeviceArch}.substr(0, DeviceArch.find(":")); + return MapArchIDToArchName(DeviceArchSubstr.data()); + } else if (is_cpu() && backend::opencl == CurrentBackend) { + return LookupIPVersion(IntelCPUArchitectures) + .value_or(ext::oneapi::experimental::architecture::x86_64); + } // else is not needed + // TODO: add support of other architectures by extending with else if + return ext::oneapi::experimental::architecture::unknown; +} /// Constructs a SYCL device instance using the provided /// UR device instance. device_impl::device_impl(ur_device_handle_t Device, platform_impl &Platform, device_impl::private_tag) : MDevice(Device), MPlatform(Platform.shared_from_this()), - MDeviceHostBaseTime(std::make_pair(0, 0)) { - const AdapterPtr &Adapter = Platform.getAdapter(); - - // TODO catch an exception and put it to list of asynchronous exceptions - MType = get_info_impl(); - - // No need to set MRootDevice when MAlwaysRootDevice is true - // TODO: Is get_info aligned with this? - if (!Platform.MAlwaysRootDevice) { - // TODO catch an exception and put it to list of asynchronous exceptions - MRootDevice = get_info_impl(); - } - + // TODO catch an exception and put it to list of asynchronous exceptions + // for the field initializers below: + MType(get_info_impl()), + // No need to set MRootDevice when MAlwaysRootDevice is true + MRootDevice(Platform.MAlwaysRootDevice + ? nullptr + : get_info_impl()), + MUseNativeAssert(get_info_impl()), + MExtensions([this]() { + auto Extensions = + split_string(get_info_impl(), ' '); + std::sort(Extensions.begin(), Extensions.end()); + return Extensions; + }()), + MDeviceArch(get_architecture()), + MDeviceName(get_info_impl()) { // TODO catch an exception and put it to list of asynchronous exceptions // Interoperability Constructor already calls DeviceRetain in // urDeviceCreateWithNativeHandle. - Adapter->call(MDevice); - - MUseNativeAssert = get_info_impl(); + getAdapter()->call(MDevice); } device_impl::~device_impl() { @@ -116,10 +283,9 @@ device_impl::get_backend_info() const { } #endif -bool device_impl::has_extension(const std::string &ExtensionName) const { - std::string AllExtensionNames = get_info_impl(); - - return (AllExtensionNames.find(ExtensionName) != std::string::npos); +bool device_impl::has_extension(std::string_view ExtensionName) const { + return std::find(MExtensions.begin(), MExtensions.end(), ExtensionName) != + MExtensions.end(); } bool device_impl::is_partition_supported(info::partition_property Prop) const { @@ -415,7 +581,7 @@ bool device_impl::has(aspect Aspect) const { case aspect::ext_oneapi_srgb: return get_info(); case aspect::ext_oneapi_native_assert: - return useNativeAssert(); + return MUseNativeAssert; case aspect::ext_oneapi_cuda_async_barrier: { return get_info_impl_nocheck().value_or(0); } @@ -649,24 +815,6 @@ bool device_impl::has(aspect Aspect) const { return false; // This device aspect has not been implemented yet. } -bool device_impl::useNativeAssert() const { return MUseNativeAssert; } - -std::string device_impl::getDeviceName() const { - std::call_once(MDeviceNameFlag, - [this]() { MDeviceName = get_info(); }); - - return MDeviceName; -} - -ext::oneapi::experimental::architecture device_impl::getDeviceArch() const { - std::call_once(MDeviceArchFlag, [this]() { - MDeviceArch = - get_info(); - }); - - return MDeviceArch; -} - // On the first call this function queries for device timestamp // along with host synchronized timestamp and stores it in member variable // MDeviceHostBaseTime. Subsequent calls to this function would just retrieve diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 8e22fec339447..3ee2aff788b7c 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -237,7 +237,7 @@ class device_impl : public std::enable_shared_from_this { /// /// \param ExtensionName is a name of queried extension. /// \return true if SYCL device supports the extension. - bool has_extension(const std::string &ExtensionName) const; + bool has_extension(std::string_view ExtensionName) const; std::vector create_sub_devices(const ur_device_partition_properties_t *Properties, @@ -332,7 +332,7 @@ class device_impl : public std::enable_shared_from_this { CASE(info::device::device_type) { using device_type = info::device_type; - switch (get_info_impl()) { + switch (MType) { case UR_DEVICE_TYPE_DEFAULT: return device_type::automatic; case UR_DEVICE_TYPE_ALL: @@ -470,6 +470,7 @@ class device_impl : public std::enable_shared_from_this { platform_impl::getOrMakePlatformImpl( get_info_impl(), getAdapter())); } + CASE(info::device::name) { return MDeviceName; } CASE(info::device::profile) { if (getBackend() != backend::opencl) @@ -480,9 +481,7 @@ class device_impl : public std::enable_shared_from_this { return get_info_impl(); } - CASE(info::device::extensions) { - return split_string(get_info_impl(), ' '); - } + CASE(info::device::extensions) { return MExtensions; } CASE(info::device::preferred_interop_user_sync) { if (getBackend() != backend::opencl) @@ -730,7 +729,7 @@ class device_impl : public std::enable_shared_from_this { } CASE(ext::oneapi::experimental::info::device::architecture) { - return get_architecture(); + return MDeviceArch; } CASE(ext::oneapi::experimental::info::device::matrix_combinations) { @@ -968,20 +967,11 @@ class device_impl : public std::enable_shared_from_this { /// \return true if the SYCL device has the given feature. bool has(aspect Aspect) const; - /// Indicates the SYCL device prefers to use its native assert - /// implementation. - /// - /// If this is false we will use the fallback assert implementation, - /// as detailed in doc/design/Assert.md - bool useNativeAssert() const; - bool isRootDevice() const { return MRootDevice == nullptr; } - std::string getDeviceName() const; - bool extOneapiArchitectureIs(ext::oneapi::experimental::architecture Arch) const { - return Arch == getDeviceArch(); + return Arch == MDeviceArch; } bool extOneapiArchitectureIs( @@ -991,8 +981,7 @@ class device_impl : public std::enable_shared_from_this { std::optional CategoryMaxArch = get_category_max_architecture(Category); if (CategoryMinArch.has_value() && CategoryMaxArch.has_value()) - return CategoryMinArch <= getDeviceArch() && - getDeviceArch() <= CategoryMaxArch; + return CategoryMinArch <= MDeviceArch && MDeviceArch <= CategoryMaxArch; return false; } @@ -1047,9 +1036,6 @@ class device_impl : public std::enable_shared_from_this { /// @brief Get the platform impl serving this device platform_impl &getPlatformImpl() const { return *MPlatform; } - /// Get device architecture - ext::oneapi::experimental::architecture getDeviceArch() const; - template std::vector get_fp_config() const { if (Desc == UR_DEVICE_INFO_HALF_FP_CONFIG && @@ -1110,172 +1096,8 @@ class device_impl : public std::enable_shared_from_this { return result; } - ext::oneapi::experimental::architecture get_architecture() const { - using oneapi_exp_arch = sycl::ext::oneapi::experimental::architecture; - - // Only for NVIDIA and AMD GPU architectures - constexpr std::pair - NvidiaAmdGPUArchitectures[] = { - {"5.0", oneapi_exp_arch::nvidia_gpu_sm_50}, - {"5.2", oneapi_exp_arch::nvidia_gpu_sm_52}, - {"5.3", oneapi_exp_arch::nvidia_gpu_sm_53}, - {"6.0", oneapi_exp_arch::nvidia_gpu_sm_60}, - {"6.1", oneapi_exp_arch::nvidia_gpu_sm_61}, - {"6.2", oneapi_exp_arch::nvidia_gpu_sm_62}, - {"7.0", oneapi_exp_arch::nvidia_gpu_sm_70}, - {"7.2", oneapi_exp_arch::nvidia_gpu_sm_72}, - {"7.5", oneapi_exp_arch::nvidia_gpu_sm_75}, - {"8.0", oneapi_exp_arch::nvidia_gpu_sm_80}, - {"8.6", oneapi_exp_arch::nvidia_gpu_sm_86}, - {"8.7", oneapi_exp_arch::nvidia_gpu_sm_87}, - {"8.9", oneapi_exp_arch::nvidia_gpu_sm_89}, - {"9.0", oneapi_exp_arch::nvidia_gpu_sm_90}, - {"gfx701", oneapi_exp_arch::amd_gpu_gfx701}, - {"gfx702", oneapi_exp_arch::amd_gpu_gfx702}, - {"gfx703", oneapi_exp_arch::amd_gpu_gfx703}, - {"gfx704", oneapi_exp_arch::amd_gpu_gfx704}, - {"gfx705", oneapi_exp_arch::amd_gpu_gfx705}, - {"gfx801", oneapi_exp_arch::amd_gpu_gfx801}, - {"gfx802", oneapi_exp_arch::amd_gpu_gfx802}, - {"gfx803", oneapi_exp_arch::amd_gpu_gfx803}, - {"gfx805", oneapi_exp_arch::amd_gpu_gfx805}, - {"gfx810", oneapi_exp_arch::amd_gpu_gfx810}, - {"gfx900", oneapi_exp_arch::amd_gpu_gfx900}, - {"gfx902", oneapi_exp_arch::amd_gpu_gfx902}, - {"gfx904", oneapi_exp_arch::amd_gpu_gfx904}, - {"gfx906", oneapi_exp_arch::amd_gpu_gfx906}, - {"gfx908", oneapi_exp_arch::amd_gpu_gfx908}, - {"gfx909", oneapi_exp_arch::amd_gpu_gfx909}, - {"gfx90a", oneapi_exp_arch::amd_gpu_gfx90a}, - {"gfx90c", oneapi_exp_arch::amd_gpu_gfx90c}, - {"gfx940", oneapi_exp_arch::amd_gpu_gfx940}, - {"gfx941", oneapi_exp_arch::amd_gpu_gfx941}, - {"gfx942", oneapi_exp_arch::amd_gpu_gfx942}, - {"gfx1010", oneapi_exp_arch::amd_gpu_gfx1010}, - {"gfx1011", oneapi_exp_arch::amd_gpu_gfx1011}, - {"gfx1012", oneapi_exp_arch::amd_gpu_gfx1012}, - {"gfx1013", oneapi_exp_arch::amd_gpu_gfx1013}, - {"gfx1030", oneapi_exp_arch::amd_gpu_gfx1030}, - {"gfx1031", oneapi_exp_arch::amd_gpu_gfx1031}, - {"gfx1032", oneapi_exp_arch::amd_gpu_gfx1032}, - {"gfx1033", oneapi_exp_arch::amd_gpu_gfx1033}, - {"gfx1034", oneapi_exp_arch::amd_gpu_gfx1034}, - {"gfx1035", oneapi_exp_arch::amd_gpu_gfx1035}, - {"gfx1036", oneapi_exp_arch::amd_gpu_gfx1036}, - {"gfx1100", oneapi_exp_arch::amd_gpu_gfx1100}, - {"gfx1101", oneapi_exp_arch::amd_gpu_gfx1101}, - {"gfx1102", oneapi_exp_arch::amd_gpu_gfx1102}, - {"gfx1103", oneapi_exp_arch::amd_gpu_gfx1103}, - {"gfx1150", oneapi_exp_arch::amd_gpu_gfx1150}, - {"gfx1151", oneapi_exp_arch::amd_gpu_gfx1151}, - {"gfx1200", oneapi_exp_arch::amd_gpu_gfx1200}, - {"gfx1201", oneapi_exp_arch::amd_gpu_gfx1201}, - }; - - // Only for Intel GPU architectures - constexpr std::pair IntelGPUArchitectures[] = { - {0x02000000, oneapi_exp_arch::intel_gpu_bdw}, - {0x02400009, oneapi_exp_arch::intel_gpu_skl}, - {0x02404009, oneapi_exp_arch::intel_gpu_kbl}, - {0x02408009, oneapi_exp_arch::intel_gpu_cfl}, - {0x0240c000, oneapi_exp_arch::intel_gpu_apl}, - {0x02410000, oneapi_exp_arch::intel_gpu_glk}, - {0x02414000, oneapi_exp_arch::intel_gpu_whl}, - {0x02418000, oneapi_exp_arch::intel_gpu_aml}, - {0x0241c000, oneapi_exp_arch::intel_gpu_cml}, - {0x02c00000, oneapi_exp_arch::intel_gpu_icllp}, - {0x02c08000, oneapi_exp_arch::intel_gpu_ehl}, - {0x03000000, oneapi_exp_arch::intel_gpu_tgllp}, - {0x03004000, oneapi_exp_arch::intel_gpu_rkl}, - {0x03008000, oneapi_exp_arch::intel_gpu_adl_s}, - {0x0300c000, oneapi_exp_arch::intel_gpu_adl_p}, - {0x03010000, oneapi_exp_arch::intel_gpu_adl_n}, - {0x03028000, oneapi_exp_arch::intel_gpu_dg1}, - {0x030dc000, oneapi_exp_arch::intel_gpu_acm_g10}, // A0 - {0x030dc001, oneapi_exp_arch::intel_gpu_acm_g10}, // A1 - {0x030dc004, oneapi_exp_arch::intel_gpu_acm_g10}, // B0 - {0x030dc008, oneapi_exp_arch::intel_gpu_acm_g10}, // C0 - {0x030e0000, oneapi_exp_arch::intel_gpu_acm_g11}, // A0 - {0x030e0004, oneapi_exp_arch::intel_gpu_acm_g11}, // B0 - {0x030e0005, oneapi_exp_arch::intel_gpu_acm_g11}, // B1 - {0x030e4000, oneapi_exp_arch::intel_gpu_acm_g12}, // A0 - {0x030f0000, oneapi_exp_arch::intel_gpu_pvc}, // XL-A0 - {0x030f0001, oneapi_exp_arch::intel_gpu_pvc}, // XL-AOP - {0x030f0003, oneapi_exp_arch::intel_gpu_pvc}, // XT-A0 - {0x030f0005, oneapi_exp_arch::intel_gpu_pvc}, // XT-B0 - {0x030f0006, oneapi_exp_arch::intel_gpu_pvc}, // XT-B1 - {0x030f0007, oneapi_exp_arch::intel_gpu_pvc}, // XT-C0 - {0x030f4007, oneapi_exp_arch::intel_gpu_pvc_vg}, // C0 - {0x03118000, oneapi_exp_arch::intel_gpu_mtl_u}, // A0 - {0x03118004, oneapi_exp_arch::intel_gpu_mtl_u}, // B0 - {0x0311c000, oneapi_exp_arch::intel_gpu_mtl_h}, // A0 - {0x0311c004, oneapi_exp_arch::intel_gpu_mtl_h}, // B0 - {0x03128000, oneapi_exp_arch::intel_gpu_arl_h}, // A0 - {0x03128004, oneapi_exp_arch::intel_gpu_arl_h}, // B0 - {0x05004000, oneapi_exp_arch::intel_gpu_bmg_g21}, // A0 - {0x05004001, oneapi_exp_arch::intel_gpu_bmg_g21}, // A1 - {0x05004004, oneapi_exp_arch::intel_gpu_bmg_g21}, // B0 - {0x05010000, oneapi_exp_arch::intel_gpu_lnl_m}, // A0 - {0x05010001, oneapi_exp_arch::intel_gpu_lnl_m}, // A1 - {0x05010004, oneapi_exp_arch::intel_gpu_lnl_m}, // B0 - {0x07800000, oneapi_exp_arch::intel_gpu_ptl_h}, // A0 - {0x07800004, oneapi_exp_arch::intel_gpu_ptl_h}, // B0 - {0x07804000, oneapi_exp_arch::intel_gpu_ptl_u}, // A0 - {0x07804001, oneapi_exp_arch::intel_gpu_ptl_u}, // A1 - }; - - // Only for Intel CPU architectures - constexpr std::pair IntelCPUArchitectures[] = { - {8, oneapi_exp_arch::intel_cpu_spr}, - {9, oneapi_exp_arch::intel_cpu_gnr}, - {10, oneapi_exp_arch::intel_cpu_dmr}, - }; - backend CurrentBackend = getBackend(); - auto LookupIPVersion = [&, this](auto &ArchList) - -> std::optional { - auto DeviceIp = get_info_impl_nocheck(); - if (!DeviceIp.has_val()) { - ur_result_t Err = DeviceIp.error(); - if (Err == UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION) { - // Not all devices support this device info query - return std::nullopt; - } - getAdapter()->checkUrResult(Err); - } - - auto Val = static_cast(DeviceIp.value()); - for (const auto &Item : ArchList) { - if (Item.first == Val) - return Item.second; - } - return std::nullopt; - }; - - if (is_gpu() && (backend::ext_oneapi_level_zero == CurrentBackend || - backend::opencl == CurrentBackend)) { - return LookupIPVersion(IntelGPUArchitectures) - .value_or(ext::oneapi::experimental::architecture::unknown); - } else if (is_gpu() && (backend::ext_oneapi_cuda == CurrentBackend || - backend::ext_oneapi_hip == CurrentBackend)) { - auto MapArchIDToArchName = [&](const char *arch) { - for (const auto &Item : NvidiaAmdGPUArchitectures) { - if (std::string_view(Item.first) == arch) - return Item.second; - } - return ext::oneapi::experimental::architecture::unknown; - }; - std::string DeviceArch = - get_info_impl::value>(); - std::string_view DeviceArchSubstr = - std::string_view{DeviceArch}.substr(0, DeviceArch.find(":")); - return MapArchIDToArchName(DeviceArchSubstr.data()); - } else if (is_cpu() && backend::opencl == CurrentBackend) { - return LookupIPVersion(IntelCPUArchitectures) - .value_or(ext::oneapi::experimental::architecture::x86_64); - } // else is not needed - // TODO: add support of other architectures by extending with else if - return ext::oneapi::experimental::architecture::unknown; - } + // Only called in ctor, so can be defined in device_impl.cpp. + ext::oneapi::experimental::architecture get_architecture() const; std::vector get_matrix_combinations() const { @@ -1594,15 +1416,25 @@ class device_impl : public std::enable_shared_from_this { private: ur_device_handle_t MDevice = 0; - ur_device_type_t MType; - ur_device_handle_t MRootDevice = nullptr; + // This is used for getAdapter so should be above other properties. std::shared_ptr MPlatform; - bool MUseNativeAssert = false; - mutable std::string MDeviceName; - mutable std::once_flag MDeviceNameFlag; - mutable ext::oneapi::experimental::architecture MDeviceArch{}; - mutable std::once_flag MDeviceArchFlag; + + // TODO: Does this have a race? std::pair MDeviceHostBaseTime{0, 0}; + + const ur_device_type_t MType; + const ur_device_handle_t MRootDevice; + + // Pre-compute some often used properties. + + // Is used during submission. + const bool MUseNativeAssert; + // Multiple heap allocations, also used in fp16/fp64 aspect queries. + const std::vector MExtensions; + // Seems to be used for device image compatibility checks. + const ext::oneapi::experimental::architecture MDeviceArch; + // Used in XPTI tracing, avoid extra heap allocations. + const std::string MDeviceName; }; // class device_impl #ifndef __INTEL_PREVIEW_BREAKING_CHANGES diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 96d62df4c046d..5a372f7700d8f 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1475,7 +1475,7 @@ ProgramManager::ProgramManager() const char *getArchName(const device_impl *DeviceImpl) { namespace syclex = sycl::ext::oneapi::experimental; - auto Arch = DeviceImpl->getDeviceArch(); + auto Arch = DeviceImpl->get_info(); switch (Arch) { #define __SYCL_ARCHITECTURE(ARCH, VAL) \ case syclex::architecture::ARCH: \ @@ -1661,11 +1661,7 @@ getDeviceLibPrograms(const ContextImplPtr Context, // Load a fallback library for an extension if the any device does not // support it. for (auto Device : Devices) { - // TODO: device_impl::has_extension should cache extension string, then we'd - // be able to use that in the loop below directly. - std::string DevExtList = urGetInfoString( - *Context->getPlatformImpl().getDeviceImpl(Device), - UR_DEVICE_INFO_EXTENSIONS); + device_impl &DeviceImpl = *Context->getPlatformImpl().getDeviceImpl(Device); for (auto &Pair : RequiredDeviceLibExt) { DeviceLibExt Ext = Pair.first; @@ -1695,7 +1691,7 @@ getDeviceLibPrograms(const ContextImplPtr Context, InhibitNativeImpl = strstr(Env, ExtName) != nullptr; } - bool DeviceSupports = DevExtList.npos != DevExtList.find(ExtName); + bool DeviceSupports = DeviceImpl.has_extension(ExtName); if (!DeviceSupports || InhibitNativeImpl) { Programs.push_back(loadDeviceLibFallback(Context, Ext, Devices, /*UseNativeLib=*/false)); diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index b9af432a7f573..8e02da4b0719c 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -706,7 +706,8 @@ void queue_impl::constructorNotification() { xpti::addMetadata(TEvent, "sycl_context", reinterpret_cast(MContext->getHandleRef())); - xpti::addMetadata(TEvent, "sycl_device_name", MDevice.getDeviceName()); + xpti::addMetadata(TEvent, "sycl_device_name", + MDevice.get_info()); xpti::addMetadata(TEvent, "sycl_device", reinterpret_cast(MDevice.getHandleRef())); xpti::addMetadata(TEvent, "is_inorder", MIsInorder); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 783ce3b1412bb..d63c64390a6c8 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -110,8 +110,9 @@ static void addDeviceMetadata(xpti_td *TraceEvent, const QueueImplPtr &Queue) { if (Queue) { xpti::addMetadata(TraceEvent, "sycl_device", deviceToID(Queue->get_device())); - xpti::addMetadata(TraceEvent, "sycl_device_name", - getSyclObjImpl(Queue->get_device())->getDeviceName()); + xpti::addMetadata( + TraceEvent, "sycl_device_name", + getSyclObjImpl(Queue->get_device())->get_info()); } } diff --git a/sycl/test/gdb/printers.cpp b/sycl/test/gdb/printers.cpp index 7fcacb89aff1f..4af0ea01c7b56 100644 --- a/sycl/test/gdb/printers.cpp +++ b/sycl/test/gdb/printers.cpp @@ -63,9 +63,9 @@ sycl::range<1> r(3); // CHECK: 32 | backend MBackend // CHECK: 0 | class sycl::detail::device_impl -// CHECK: 24 | ur_device_type_t MType -// CHECK: 40 | class std::shared_ptr MPlatform -// CHECK: 40 | element_type * _M_ptr +// CHECK: 24 | class std::shared_ptr MPlatform +// CHECK: 24 | element_type * _M_ptr +// CHECK: 56 | const ur_device_type_t MType // DEVICE: 0 | class sycl::detail::AccessorImplDevice<1> // DEVICE: 0 | class sycl::id<1> Offset diff --git a/sycl/unittests/buffer/BufferLocation.cpp b/sycl/unittests/buffer/BufferLocation.cpp index dc0276b859c19..e0edc08add908 100644 --- a/sycl/unittests/buffer/BufferLocation.cpp +++ b/sycl/unittests/buffer/BufferLocation.cpp @@ -97,16 +97,17 @@ static ur_result_t redefinedDeviceGetInfoAfter(void *pParams) { class BufferTest : public ::testing::Test { public: - BufferTest() : Mock{}, Plt{sycl::platform()} {} + BufferTest() + : Mock{}, Plt([]() { + // Make sure these are re-defined before we create device hierarchy. + mock::getCallbacks().set_before_callback( + "urMemBufferCreate", &redefinedMemBufferCreateBefore); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoAfter); + return sycl::platform{}; + }()) {} protected: - void SetUp() override { - mock::getCallbacks().set_before_callback("urMemBufferCreate", - &redefinedMemBufferCreateBefore); - mock::getCallbacks().set_after_callback("urDeviceGetInfo", - &redefinedDeviceGetInfoAfter); - } - sycl::unittest::UrMock<> Mock; sycl::platform Plt; }; diff --git a/sycl/unittests/pipes/host_pipe_registration.cpp b/sycl/unittests/pipes/host_pipe_registration.cpp index c821ddf4e09e9..6ba962c61bd08 100644 --- a/sycl/unittests/pipes/host_pipe_registration.cpp +++ b/sycl/unittests/pipes/host_pipe_registration.cpp @@ -96,7 +96,15 @@ void prepareUrMock(unittest::UrMock<> &Mock) { class PipeTest : public ::testing::Test { public: - PipeTest() : Mock{}, Plt{sycl::platform()} {} + PipeTest() + : Mock{}, Plt{[]() { + // Fake extension. Make sure it's redefined before we create device + // hierarchy. + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &after_urDeviceGetInfo); + + return sycl::platform{}; + }()} {} protected: void SetUp() override { @@ -119,9 +127,6 @@ static sycl::unittest::MockDeviceImage Img = generateDefaultImage(); static sycl::unittest::MockDeviceImageArray<1> ImgArray{&Img}; TEST_F(PipeTest, Basic) { - // Fake extension - mock::getCallbacks().set_after_callback("urDeviceGetInfo", - &after_urDeviceGetInfo); // Device registration @@ -154,8 +159,6 @@ ur_result_t after_urEventGetInfo(void *pParams) { } TEST_F(PipeTest, NonBlockingOperationFail) { - mock::getCallbacks().set_after_callback("urDeviceGetInfo", - &after_urDeviceGetInfo); mock::getCallbacks().set_replace_callback("urEventWait", &redefinedEventWait); bool Success = false;