openvinotoolkit · pereanub · Nov 8, 2024 · Nov 7, 2024 · Nov 7, 2024 · Nov 7, 2024
@@ -58,8 +58,6 @@ class ZeroDevice : public IDevice {
 private:
     const std::shared_ptr<ZeroInitStructsHolder> _initStructs;
 
-    ze_graph_dditable_ext_curr_t& _graph_ddi_table_ext;
-
     ze_device_properties_t device_properties = {};
 
     ze_pci_ext_properties_t pci_properties = {};

@@ -13,8 +13,8 @@ namespace intel_npu {
 
 class ZeroHostTensor : public ov::ITensor {
 public:
-    ZeroHostTensor(std::shared_ptr<ov::IRemoteContext> context,
-                   std::shared_ptr<ZeroInitStructsHolder> init_structs,
+    ZeroHostTensor(const std::shared_ptr<ov::IRemoteContext>& context,
+                   const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
                    const ov::element::Type element_type,
                    const ov::Shape& shape,
                    const Config& config);

@@ -63,15 +63,17 @@ class ZeroInferRequest final : public SyncInferRequest {
      * @param index The index corresponding to the position of the tensor inside the I/O structures.
      * @param isInput Used for identifying the structures to which the tensor belongs.
      */
-    void set_tensor_data(const std::shared_ptr<ov::ITensor> tensor, const size_t index, const bool isInput);
+    void set_tensor_data(const std::shared_ptr<ov::ITensor>& tensor, const size_t index, const bool isInput);
 
     /**
      * @brief Check the received remote tensor and copy it to the Level Zero tensor
      * @param tensor Reference to a tensor.
      * @param index The index corresponding to the position of the tensor inside the I/O structures.
      * @param isInput Used for identifying the structures to which the tensor belongs.
      */
-    void set_remote_tensor_data(const std::shared_ptr<ZeroRemoteTensor> tensor, const size_t index, const bool isInput);
+    void set_remote_tensor_data(const std::shared_ptr<ZeroRemoteTensor>& tensor,
+                                const size_t index,
+                                const bool isInput);
 
     void check_network_precision(const ov::element::Type_t precision) const override;
     void create_pipeline();

@@ -25,7 +25,7 @@ struct Pipeline {
              const std::shared_ptr<IGraph>& graph,
              zeroProfiling::ProfilingPool& profiling_pool,
              zeroProfiling::ProfilingQuery& profiling_query,
-             std::shared_ptr<zeroProfiling::NpuInferProfiling> npu_profiling,
+             const std::shared_ptr<zeroProfiling::NpuInferProfiling>& npu_profiling,
              const std::vector<std::vector<std::optional<TensorData>>>& inputTensorsData,
              const std::vector<std::optional<TensorData>>& outputTensorsData,
              size_t numberOfCommandLists,

@@ -10,8 +10,10 @@
 #include <climits>
 #include <map>
 
+#include "intel_npu/common/igraph.hpp"
 #include "intel_npu/config/compiler.hpp"
 #include "intel_npu/utils/logger/logger.hpp"
+#include "intel_npu/utils/zero/zero_init.hpp"
 #include "intel_npu/utils/zero/zero_types.hpp"
 #include "openvino/runtime/profiling_info.hpp"
 
@@ -23,31 +25,29 @@ using LayerStatistics = std::vector<ov::ProfilingInfo>;
 constexpr uint32_t POOL_SIZE = 1;
 
 struct ProfilingPool {
-    ProfilingPool(ze_graph_handle_t graph_handle,
-                  uint32_t profiling_count,
-                  ze_graph_profiling_dditable_ext_curr_t& graph_profiling_ddi_table_ext)
-        : _graph_handle(graph_handle),
-          _profiling_count(profiling_count),
-          _graph_profiling_ddi_table_ext(graph_profiling_ddi_table_ext) {}
+    ProfilingPool(const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
+                  const std::shared_ptr<IGraph>& graph,
+                  uint32_t profiling_count)
+        : _init_structs(init_structs),
+          _graph(graph),
+          _profiling_count(profiling_count) {}
     ProfilingPool(const ProfilingPool&) = delete;
     ProfilingPool& operator=(const ProfilingPool&) = delete;
     bool create();
 
     ~ProfilingPool();
 
-    ze_graph_handle_t _graph_handle;
+    std::shared_ptr<ZeroInitStructsHolder> _init_structs;
+    std::shared_ptr<IGraph> _graph;
     const uint32_t _profiling_count;
+
     ze_graph_profiling_pool_handle_t _handle = nullptr;
-    ze_graph_profiling_dditable_ext_curr_t& _graph_profiling_ddi_table_ext;
 };
 
 struct ProfilingQuery {
-    ProfilingQuery(uint32_t index,
-                   ze_device_handle_t device_handle,
-                   ze_graph_profiling_dditable_ext_curr_t& graph_profiling_ddi_table_ext)
-        : _index(index),
-          _device_handle(device_handle),
-          _graph_profiling_ddi_table_ext(graph_profiling_ddi_table_ext) {}
+    ProfilingQuery(const std::shared_ptr<ZeroInitStructsHolder>& init_structs, uint32_t index)
+        : _init_structs(init_structs),
+          _index(index) {}
     ProfilingQuery(const ProfilingQuery&) = delete;
     ProfilingQuery& operator=(const ProfilingQuery&) = delete;
     void create(const ze_graph_profiling_pool_handle_t& profiling_pool);
@@ -64,18 +64,18 @@ struct ProfilingQuery {
     void getProfilingProperties(ze_device_profiling_data_properties_t* properties) const;
     void verifyProfilingProperties() const;
 
+    std::shared_ptr<ZeroInitStructsHolder> _init_structs;
     const uint32_t _index;
-    ze_device_handle_t _device_handle;
+
     ze_graph_profiling_query_handle_t _handle = nullptr;
-    ze_graph_profiling_dditable_ext_curr_t& _graph_profiling_ddi_table_ext;
 };
 
 extern template std::vector<uint8_t> ProfilingQuery::getData<uint8_t>() const;
 
 using NpuInferStatistics = std::vector<ov::ProfilingInfo>;
 
 struct NpuInferProfiling final {
-    explicit NpuInferProfiling(ze_context_handle_t context, ze_device_handle_t device_handle, ov::log::Level loglevel);
+    explicit NpuInferProfiling(const std::shared_ptr<ZeroInitStructsHolder>& init_structs, ov::log::Level loglevel);
     NpuInferProfiling(const NpuInferProfiling&) = delete;
     NpuInferProfiling& operator=(const NpuInferProfiling&) = delete;
     NpuInferProfiling(NpuInferProfiling&&) = delete;
@@ -91,8 +91,7 @@ struct NpuInferProfiling final {
     void* npu_ts_infer_end = 0;
 
 private:
-    ze_context_handle_t _context = nullptr;
-    ze_device_handle_t _device_handle;
+    std::shared_ptr<ZeroInitStructsHolder> _init_structs;
     ov::log::Level _loglevel;
     Logger _logger;
     ze_device_properties_t _dev_properties = {};

@@ -16,8 +16,8 @@ namespace intel_npu {
 
 class ZeroRemoteTensor : public RemoteTensor {
 public:
-    ZeroRemoteTensor(std::shared_ptr<ov::IRemoteContext> context,
-                     std::shared_ptr<ZeroInitStructsHolder> init_structs,
+    ZeroRemoteTensor(const std::shared_ptr<ov::IRemoteContext>& context,
+                     const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
                      const ov::element::Type& element_type,
                      const ov::Shape& shape,
                      const Config& config,

@@ -15,7 +15,6 @@ using namespace intel_npu;
 
 ZeroDevice::ZeroDevice(const std::shared_ptr<ZeroInitStructsHolder>& initStructs)
     : _initStructs(initStructs),
-      _graph_ddi_table_ext(_initStructs->getGraphDdiTable()),
       log("ZeroDevice", Logger::global().level()) {
     log.debug("ZeroDevice::ZeroDevice init");
     device_properties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
@@ -121,9 +120,10 @@ uint32_t ZeroDevice::getMaxNumSlices() const {
 
 uint64_t ZeroDevice::getAllocMemSize() const {
     ze_graph_memory_query_t query{};
-    ze_result_t result =
-        _graph_ddi_table_ext.pfnQueryContextMemory(_initStructs->getContext(), ZE_GRAPH_QUERY_MEMORY_DDR, &query);
-    THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnQueryContextMemory", result, _graph_ddi_table_ext);
+    ze_result_t result = _initStructs->getGraphDdiTable().pfnQueryContextMemory(_initStructs->getContext(),
+                                                                                ZE_GRAPH_QUERY_MEMORY_DDR,
+                                                                                &query);
+    THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnQueryContextMemory", result, _initStructs->getGraphDdiTable());
 
     return query.allocated;
 }
@@ -132,9 +132,10 @@ uint64_t ZeroDevice::getTotalMemSize() const {
 #define LEGACY_MAX_MEM_ALLOC_SIZE_BYTES (2147483648)  // 2GB in base-2
 
     ze_graph_memory_query_t query{};
-    ze_result_t result =
-        _graph_ddi_table_ext.pfnQueryContextMemory(_initStructs->getContext(), ZE_GRAPH_QUERY_MEMORY_DDR, &query);
-    THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnQueryContextMemory", result, _graph_ddi_table_ext);
+    ze_result_t result = _initStructs->getGraphDdiTable().pfnQueryContextMemory(_initStructs->getContext(),
+                                                                                ZE_GRAPH_QUERY_MEMORY_DDR,
+                                                                                &query);
+    THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnQueryContextMemory", result, _initStructs->getGraphDdiTable());
 
     // For drivers with graph_extension < 1.9 we report fixed 2GB max allocation size (old drivers don't support more)
     // For drivers with graph_extension > 1.9 we report the value they return

@@ -8,8 +8,8 @@
 
 namespace intel_npu {
 
-ZeroHostTensor::ZeroHostTensor(std::shared_ptr<ov::IRemoteContext> context,
-                               std::shared_ptr<ZeroInitStructsHolder> init_structs,
+ZeroHostTensor::ZeroHostTensor(const std::shared_ptr<ov::IRemoteContext>& context,
+                               const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
                                const ov::element::Type element_type,
                                const ov::Shape& shape,
                                const Config& config)

@@ -167,20 +167,16 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
       _levelZeroOutputTensors(_metadata.outputs.size(), nullptr),
       _inputTensorsData(_metadata.inputs.size(), std::vector<std::optional<TensorData>>(1, std::nullopt)),
       _outputTensorsData(_metadata.outputs.size(), std::nullopt),
-      _profilingPool(static_cast<ze_graph_handle_t>(_graph->get_handle()),
-                     zeroProfiling::POOL_SIZE,
-                     _initStructs->getProfilingDdiTable()),
-      _profilingQuery(0, _initStructs->getDevice(), _initStructs->getProfilingDdiTable()) {
+      _profilingPool(_initStructs, _graph, zeroProfiling::POOL_SIZE),
+      _profilingQuery(_initStructs, 0) {
     _logger.debug("ZeroInferRequest::ZeroInferRequest - SyncInferRequest");
     const std::vector<ArgumentDescriptor>& executorInputDescriptors = _graph->get_input_descriptors();
     const std::vector<ArgumentDescriptor>& executorOutputDescriptors = _graph->get_output_descriptors();
 
     auto proftype = config.get<PROFILING_TYPE>();
     if (proftype == ov::intel_npu::ProfilingType::INFER) {
         _logger.debug("ZeroInferRequest::ZeroInferRequest - profiling type == ov::intel_npu::ProfilingType::INFER");
-        _npuProfiling = std::make_shared<zeroProfiling::NpuInferProfiling>(_initStructs->getContext(),
-                                                                           _initStructs->getDevice(),
-                                                                           _config.get<LOG_LEVEL>());
+        _npuProfiling = std::make_shared<zeroProfiling::NpuInferProfiling>(_initStructs, _config.get<LOG_LEVEL>());
     }
 
     _properties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
@@ -296,7 +292,7 @@ void ZeroInferRequest::create_pipeline() {
     _logger.debug("ZeroInferRequest::create_pipeline - SyncInferRequest completed");
 }
 
-void ZeroInferRequest::set_tensor_data(const std::shared_ptr<ov::ITensor> tensor,
+void ZeroInferRequest::set_tensor_data(const std::shared_ptr<ov::ITensor>& tensor,
                                        const size_t index,
                                        const bool isInput) {
     OV_ITT_TASK_CHAIN(ZERO_SET_TENSOR, itt::domains::LevelZeroBackend, "set_tensor", "set_tensor_data");
@@ -347,7 +343,7 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr<ov::ITensor> tensor
     }
 }
 
-void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptr<ZeroRemoteTensor> tensor,
+void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptr<ZeroRemoteTensor>& tensor,
                                               const size_t index,
                                               const bool isInput) {
     OV_ITT_TASK_CHAIN(ZERO_SET_REMOTE_TENSOR, itt::domains::LevelZeroBackend, "set_tensor", "set_remote_tensor_data");

@@ -20,7 +20,7 @@ Pipeline::Pipeline(const Config& config,
                    const std::shared_ptr<IGraph>& graph,
                    zeroProfiling::ProfilingPool& profiling_pool,
                    zeroProfiling::ProfilingQuery& profiling_query,
-                   std::shared_ptr<zeroProfiling::NpuInferProfiling> npu_profiling,
+                   const std::shared_ptr<zeroProfiling::NpuInferProfiling>& npu_profiling,
                    const std::vector<std::vector<std::optional<TensorData>>>& inputTensorsData,
                    const std::vector<std::optional<TensorData>>& outputTensorsData,
                    size_t numberOfCommandLists,
@@ -30,7 +30,7 @@ Pipeline::Pipeline(const Config& config,
       _event_pool{initStructs->getDevice(),
                   initStructs->getContext(),
                   numberOfCommandLists ? static_cast<uint32_t>(numberOfCommandLists) : 1},
-      _npu_profiling(std::move(npu_profiling)),
+      _npu_profiling(npu_profiling),
       _logger("Pipeline", _config.get<LOG_LEVEL>()) {
     OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Zero_infer_request::Pipeline::Pipeline");
     _logger.debug("Pipeline - initialize started");
@@ -45,9 +45,7 @@ Pipeline::Pipeline(const Config& config,
     _logger.debug("Pipeline - emplace_back _event_pool and _command_queue");
     for (size_t i = 0; i < numberOfCommandLists; i++) {
         _command_lists.emplace_back(
-            std::make_unique<CommandList>(initStructs->getDevice(),
-                                          initStructs->getContext(),
-                                          initStructs->getGraphDdiTable(),
+            std::make_unique<CommandList>(initStructs,
                                           group_ordinal,
                                           initStructs->getMutableCommandListVersion() ? true : false));
         _events.emplace_back(std::make_unique<Event>(_event_pool.handle(), static_cast<uint32_t>(i)));

@@ -35,20 +35,21 @@ struct ZeProfilingTypeId<uint8_t> {
 };
 
 bool ProfilingPool::create() {
-    auto ret = _graph_profiling_ddi_table_ext.pfnProfilingPoolCreate(_graph_handle, _profiling_count, &_handle);
+    auto ret =
+        _init_structs->getProfilingDdiTable().pfnProfilingPoolCreate(_graph->get_handle(), _profiling_count, &_handle);
     return ((ZE_RESULT_SUCCESS == ret) && (_handle != nullptr));
 }
 
 ProfilingPool::~ProfilingPool() {
     if (_handle) {
-        _graph_profiling_ddi_table_ext.pfnProfilingPoolDestroy(_handle);
+        _init_structs->getProfilingDdiTable().pfnProfilingPoolDestroy(_handle);
     }
 }
 
 void ProfilingQuery::create(const ze_graph_profiling_pool_handle_t& profiling_pool) {
     THROW_ON_FAIL_FOR_LEVELZERO(
         "pfnProfilingQueryCreate",
-        _graph_profiling_ddi_table_ext.pfnProfilingQueryCreate(profiling_pool, _index, &_handle));
+        _init_structs->getProfilingDdiTable().pfnProfilingQueryCreate(profiling_pool, _index, &_handle));
 }
 
 LayerStatistics ProfilingQuery::getLayerStatistics() const {
@@ -59,7 +60,7 @@ LayerStatistics ProfilingQuery::getLayerStatistics() const {
 
 ProfilingQuery::~ProfilingQuery() {
     if (_handle) {
-        _graph_profiling_ddi_table_ext.pfnProfilingQueryDestroy(_handle);
+        _init_structs->getProfilingDdiTable().pfnProfilingQueryDestroy(_handle);
     }
 }
 
@@ -69,7 +70,7 @@ void ProfilingQuery::queryGetData(const ze_graph_profiling_type_t profilingType,
     if (_handle && pSize) {
         THROW_ON_FAIL_FOR_LEVELZERO(
             "pfnProfilingQueryGetData",
-            _graph_profiling_ddi_table_ext.pfnProfilingQueryGetData(_handle, profilingType, pSize, pData));
+            _init_structs->getProfilingDdiTable().pfnProfilingQueryGetData(_handle, profilingType, pSize, pData));
     }
 }
 
@@ -95,7 +96,8 @@ void ProfilingQuery::getProfilingProperties(ze_device_profiling_data_properties_
     if (_handle && properties) {
         THROW_ON_FAIL_FOR_LEVELZERO(
             "getProfilingProperties",
-            _graph_profiling_ddi_table_ext.pfnDeviceGetProfilingDataProperties(_device_handle, properties));
+            _init_structs->getProfilingDdiTable().pfnDeviceGetProfilingDataProperties(_init_structs->getDevice(),
+                                                                                      properties));
     }
 }
 
@@ -179,30 +181,29 @@ NpuInferStatistics NpuInferProfiling::getNpuInferStatistics() const {
     return npuPerfCounts;
 }
 
-NpuInferProfiling::NpuInferProfiling(ze_context_handle_t context,
-                                     ze_device_handle_t device_handle,
+NpuInferProfiling::NpuInferProfiling(const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
                                      ov::log::Level loglevel)
-    : _context(context),
-      _device_handle(device_handle),
+    : _init_structs(init_structs),
       _loglevel(loglevel),
       _logger("InferProfiling", loglevel) {
     /// Fetch and store the device timer resolution
     _dev_properties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2;
-    THROW_ON_FAIL_FOR_LEVELZERO("zeDeviceGetProperties", zeDeviceGetProperties(_device_handle, &_dev_properties));
+    THROW_ON_FAIL_FOR_LEVELZERO("zeDeviceGetProperties",
+                                zeDeviceGetProperties(_init_structs->getDevice(), &_dev_properties));
     /// Request mem allocations
     ze_host_mem_alloc_desc_t desc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC,
                                      nullptr,
                                      ZE_HOST_MEM_ALLOC_FLAG_BIAS_CACHED};
     THROW_ON_FAIL_FOR_LEVELZERO(
         "zeMemAllocHost",
-        zeMemAllocHost(_context,
+        zeMemAllocHost(_init_structs->getContext(),
                        &desc,
                        sizeof(uint64_t),
                        64,
                        &npu_ts_infer_start));  // align to 64 bytes to match npu l2 cache line size
     THROW_ON_FAIL_FOR_LEVELZERO(
         "zeMemAllocHost",
-        zeMemAllocHost(_context,
+        zeMemAllocHost(_init_structs->getContext(),
                        &desc,
                        sizeof(uint64_t),
                        64,
@@ -235,13 +236,13 @@ int64_t NpuInferProfiling::convertCCtoUS(int64_t val_cc) const {
 NpuInferProfiling::~NpuInferProfiling() {
     /// deallocate npu_ts_infer_start and npu_ts_infer_end, allocated externally by ze driver
     if (npu_ts_infer_start != nullptr) {
-        auto ze_ret = zeMemFree(_context, npu_ts_infer_start);
+        auto ze_ret = zeMemFree(_init_structs->getContext(), npu_ts_infer_start);
         if (ZE_RESULT_SUCCESS != ze_ret) {
             _logger.error("zeMemFree on npu_ts_infer_start failed %#X", uint64_t(ze_ret));
         }
     }
     if (npu_ts_infer_end != nullptr) {
-        auto ze_ret = zeMemFree(_context, npu_ts_infer_end);
+        auto ze_ret = zeMemFree(_init_structs->getContext(), npu_ts_infer_end);
         if (ZE_RESULT_SUCCESS != ze_ret) {
             _logger.error("zeMemFree on npu_ts_infer_end failed %#X", uint64_t(ze_ret));
         }