From 6d48a5a4cc43fb548559d4f9fa8a42756db7290c Mon Sep 17 00:00:00 2001 From: Razvan Apetroaie Date: Wed, 26 Nov 2025 15:18:52 +0000 Subject: [PATCH 1/6] temporary code --- .../compiler_adapter/src/ze_graph_ext_wrappers.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp index c1379a98c9cbba..c265a918b490ba 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp @@ -335,11 +335,18 @@ bool ZeGraphExtWrappers::canCpuVaBeImported(const void* data, size_t size) const return true; } +// ze_graph_input_hash_t graphInputHash = {}; +// graphInputHash.stype = ZE_STRUCTURE_TYPE_GRAPH_INPUT_HASH; +// graphInputHash.hash = hash; + GraphDescriptor ZeGraphExtWrappers::getGraphDescriptor(SerializedIR serializedIR, const std::string& buildFlags, const bool bypassUmdCache) const { ze_graph_handle_t graphHandle = nullptr; + const uint64_t hash = 14; + ze_graph_input_hash_t modelHash = {ZE_STRUCTURE_TYPE_GRAPH_INPUT_HASH, nullptr, hash}; + uint32_t flags = ZE_GRAPH_FLAG_NONE; if (bypassUmdCache) { _logger.debug("getGraphDescriptor - set ZE_GRAPH_FLAG_DISABLE_CACHING"); @@ -347,11 +354,11 @@ GraphDescriptor ZeGraphExtWrappers::getGraphDescriptor(SerializedIR serializedIR } ze_graph_desc_2_t desc = {ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, - nullptr, + &modelHash, ZE_GRAPH_FORMAT_NGRAPH_LITE, serializedIR.first, serializedIR.second.get(), - buildFlags.c_str(), + "", flags}; _logger.debug("getGraphDescriptor - perform pfnCreate2"); @@ -360,6 +367,7 @@ GraphDescriptor ZeGraphExtWrappers::getGraphDescriptor(SerializedIR serializedIR &desc, &graphHandle); THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnCreate2", result, _zeroInitStruct->getGraphDdiTable()); + OPENVINO_THROW(""); return GraphDescriptor{graphHandle}; } From a3fe659cc3df946e8f0d7d7f86536d1a1f72886a Mon Sep 17 00:00:00 2001 From: Razvan Apetroaie Date: Wed, 26 Nov 2025 15:28:42 +0000 Subject: [PATCH 2/6] WeightsPointerAttribute is not deterministic --- .../src/al/include/intel_npu/weights_pointer_attribute.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/weights_pointer_attribute.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/weights_pointer_attribute.hpp index ff61286c71badc..61a770abebcbec 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/weights_pointer_attribute.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/weights_pointer_attribute.hpp @@ -38,6 +38,10 @@ class WeightsPointerAttribute : public ov::RuntimeAttribute { return true; } + bool is_deterministic() const override { + return false; + } + size_t memory_pointer; size_t byte_size; }; From 592714d96505abfbf51e8ae576c601584bc32df6 Mon Sep 17 00:00:00 2001 From: Razvan Apetroaie Date: Wed, 26 Nov 2025 15:41:43 +0000 Subject: [PATCH 3/6] Deleting some code that is useless now that the model instance on which the plugin operates is a clone --- .../compiler_adapter/src/vcl_serializer.cpp | 58 ++++--------------- 1 file changed, 11 insertions(+), 47 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp index 909b69634500c3..dc02e3dd24b91c 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp @@ -162,25 +162,6 @@ void storeWeightsPointerAttribute(const std::shared_ptr& model) { } } -/** - * @brief Removes the attributes stored by "storeWeightsPointerAttribute" in order to restore the model to its original - * state. - * @see storeWeightsPointerAttribute for details. - */ -void removeWeightsPointerAttribute(const std::shared_ptr& model) { - for (auto&& node : model->get_ops()) { - if (!ov::is_type(node)) { - continue; - } - - ov::RTMap& runtimeInfoMap = node->get_rt_info(); - const auto& resultIt = runtimeInfoMap.find(intel_npu::WeightsPointerAttribute::get_type_info_static()); - if (resultIt != runtimeInfoMap.end()) { - runtimeInfoMap.erase(resultIt); - } - } -} - } // namespace namespace intel_npu::driver_compiler_utils { @@ -201,12 +182,6 @@ class VCLSerializerBase { // There is no const variant of run_passes so use const_cast here // as model serialization does not mutate the model _model = std::const_pointer_cast(origModel); - - if (supportedOpset < 11) { - // Need to clone to modify the model and remain thread safe - _model = _model->clone(); - _logger.info("Clone model for offset smaller than 11"); - } } virtual SerializedIR serialize() = 0; @@ -233,29 +208,19 @@ class VCLSerializerBase { register_serialization_pass(manager); - // We modify the original model object here therefore a mutex is required - static std::mutex rtInfoMutex; - - { - std::lock_guard lock(rtInfoMutex); + // Depending on the driver version, the compiler attached to it may request this information as an indicator + // of the precision/layout preprocessing requirement. We are setting this value to "true" since the API + // version is no longer a cause for altering the metadata. This is due to the preprocessing performed in the + // OpenVINO framework's implementaion, the "ov::Model" object is preprocessed before reaching the NPU + // plugin. + _model->set_rt_info(true, "is_new_api"); + // Flag used for indicating an NPU plugin version which switched the I/O identification convention from + // names to indices. The flag is required in order to inform the driver-compiler adapter to expect indices + // when attempting to deserialize the I/O metadata. + _model->set_rt_info(true, "use_indices_for_io_metadata"); - // Depending on the driver version, the compiler attached to it may request this information as an indicator - // of the precision/layout preprocessing requirement. We are setting this value to "true" since the API - // version is no longer a cause for altering the metadata. This is due to the preprocessing performed in the - // OpenVINO framework's implementaion, the "ov::Model" object is preprocessed before reaching the NPU - // plugin. - _model->set_rt_info(true, "is_new_api"); - // Flag used for indicating an NPU plugin version which switched the I/O identification convention from - // names to indices. The flag is required in order to inform the driver-compiler adapter to expect indices - // when attempting to deserialize the I/O metadata. - _model->set_rt_info(true, "use_indices_for_io_metadata"); + manager.run_passes(_model); - manager.run_passes(_model); - - auto& rtInfo = _model->get_rt_info(); - rtInfo.erase("is_new_api"); - rtInfo.erase("use_indices_for_io_metadata"); - } _logger.debug("serialize_model_to_stream end"); } @@ -464,7 +429,6 @@ SerializedIR serializeIR(const std::shared_ptr& model, SerializedIR serializedIR = VCLSerializerWithoutWeightsCopy(model, compilerVersion, supportedOpsetVersion).serialize(); - removeWeightsPointerAttribute(nonConstantModel); return serializedIR; } return VCLSerializerWithWeightsCopy(model, compilerVersion, supportedOpsetVersion).serialize(); From d7f9b57366ca04982c0784544717cc03eee6f7f0 Mon Sep 17 00:00:00 2001 From: Razvan Apetroaie Date: Wed, 26 Nov 2025 16:06:39 +0000 Subject: [PATCH 4/6] tech debt --- .../intel_npu/src/al/include/intel_npu/config/options.hpp | 2 ++ .../src/al/include/intel_npu/npu_private_properties.hpp | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp index 51403d304718bd..9136857f8fda25 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp @@ -1359,6 +1359,8 @@ struct SEPARATE_WEIGHTS_VERSION final : OptionBase batch_mode{"NPU_BATCH_MODE"}; /** * @brief [Experimental, only for NPU Plugin] - * Type: enum. Default is "ITERATIVE". + * Type: enum. Default is "ITERATIVE". If the compiler-in-plugin is used (intel_npu::compiler_type = + * intel_npu::CompilerType::PLUGIN), then the default becomes "ONE_SHOT". * * The value stored in this entry indicates which implementation of the "weights separation" feature will be used. * Note: NPU_COMPILER_TYPE = DRIVER & NPU_SEPARATE_WEIGHTS_VERSION = ONE_SHOT are not compatible. From b6a2d1e6bf611bab7cc1bc79350669bf558406b8 Mon Sep 17 00:00:00 2001 From: Razvan Apetroaie Date: Wed, 26 Nov 2025 16:29:42 +0000 Subject: [PATCH 5/6] Making SerializedIR a struct containing a hash too --- .../src/compiler_adapter/include/vcl_serializer.hpp | 6 +++++- .../src/compiler_adapter/src/vcl_serializer.cpp | 10 +++++++--- .../src/compiler_adapter/src/ze_graph_ext_wrappers.cpp | 8 ++++---- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_serializer.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_serializer.hpp index 74bb2c1c0a3fda..7e86d335b48628 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_serializer.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_serializer.hpp @@ -16,7 +16,11 @@ namespace intel_npu { -using SerializedIR = std::pair>; +struct SerializedIR { + std::shared_ptr buffer; + size_t size; + std::optional hash = std::nullopt; +}; /** * @brief Contain all required transformation on OpenVINO model in case for external compiler usage and diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp index dc02e3dd24b91c..540a6009f47d54 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp @@ -198,6 +198,8 @@ class VCLSerializerBase { void serialize_model_to_stream(const std::function& register_serialization_pass) { _logger.debug("serialize_model_to_stream"); const auto passConfig = std::make_shared(); + + // Step 1: run compatibility passes ov::pass::Manager manager(std::move(passConfig), "NPU:serialize_model_to_stream"); if (_supportedOpset < 11) { @@ -205,7 +207,9 @@ class VCLSerializerBase { manager.register_pass(); _logger.info("Downgrade op for opset smaller than 11"); } - + // Step 2: store the WeightlessCacheAttribute if requested + // Step 3: serialize + // Step 4: compute the hash if requested register_serialization_pass(manager); // Depending on the driver version, the compiler attached to it may request this information as an indicator @@ -296,7 +300,7 @@ class VCLSerializerWithWeightsCopy : public VCLSerializerBase { OPENVINO_ASSERT(offset == sizeOfSerializedIR); - return std::make_pair(sizeOfSerializedIR, buffer); + return {buffer, sizeOfSerializedIR}; } private: @@ -376,7 +380,7 @@ class VCLSerializerWithoutWeightsCopy : public VCLSerializerBase { std::shared_ptr buffer(new uint8_t[_serializedModelSize], std::default_delete()); serialize_model_to_buffer(buffer.get()); - return SerializedIR(_serializedModelSize, buffer); + return {buffer, _serializedModelSize}; } private: diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp index c265a918b490ba..6737758f40b63c 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp @@ -277,8 +277,8 @@ std::unordered_set ZeGraphExtWrappers::queryGraph(SerializedIR seri ze_graph_desc_2_t desc = {ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, nullptr, ZE_GRAPH_FORMAT_NGRAPH_LITE, - serializedIR.first, - serializedIR.second.get(), + serializedIR.size, + serializedIR.buffer.get(), buildFlags.c_str(), ZE_GRAPH_FLAG_NONE}; @@ -356,8 +356,8 @@ GraphDescriptor ZeGraphExtWrappers::getGraphDescriptor(SerializedIR serializedIR ze_graph_desc_2_t desc = {ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, &modelHash, ZE_GRAPH_FORMAT_NGRAPH_LITE, - serializedIR.first, - serializedIR.second.get(), + serializedIR.size, + serializedIR.buffer.get(), "", flags}; From 32f50db55f33f102da1ac871e103bb242279b3a4 Mon Sep 17 00:00:00 2001 From: Razvan Apetroaie Date: Wed, 26 Nov 2025 16:43:07 +0000 Subject: [PATCH 6/6] Working on improving the serialization classes --- .../include/vcl_serializer.hpp | 4 +- .../compiler_adapter/src/vcl_serializer.cpp | 51 ++++++++++++++----- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_serializer.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_serializer.hpp index 7e86d335b48628..304fc8847937d4 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_serializer.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_serializer.hpp @@ -39,7 +39,9 @@ namespace driver_compiler_utils { SerializedIR serializeIR(const std::shared_ptr& model, ze_graph_compiler_version_info_t compilerVersion, const uint32_t supportedOpsetVersion, - const bool useBaseModelSerializer = true); + const bool useBaseModelSerializer = true, + const bool computeModelHash = false, + const bool storeWeightlessCacheAttribute = false); /** * @brief Serialize input / output information to string format. diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp index 540a6009f47d54..3e4096a9f01234 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp @@ -175,12 +175,14 @@ class VCLSerializerBase { public: VCLSerializerBase(const std::shared_ptr& origModel, const ze_graph_compiler_version_info_t compilerVersion, - const uint32_t supportedOpset = 11) + const uint32_t supportedOpset = 11, + const bool computeModelHash = false, + const bool storeWeightlessCacheAttribute = false) : _logger("VCLSerializerBase", Logger::global().level()), _compilerVersion(compilerVersion), - _supportedOpset(supportedOpset) { - // There is no const variant of run_passes so use const_cast here - // as model serialization does not mutate the model + _supportedOpset(supportedOpset), + _computeModelHash(computeModelHash), + _storeWeightlessCacheAttribute(storeWeightlessCacheAttribute) { _model = std::const_pointer_cast(origModel); } @@ -232,6 +234,8 @@ class VCLSerializerBase { std::shared_ptr _model = nullptr; ze_graph_compiler_version_info_t _compilerVersion; uint32_t _supportedOpset = 11; + bool _computeModelHash; + bool _storeWeightlessCacheAttribute; }; /** @@ -241,8 +245,14 @@ class VCLSerializerWithWeightsCopy : public VCLSerializerBase { public: VCLSerializerWithWeightsCopy(const std::shared_ptr& origModel, const ze_graph_compiler_version_info_t compilerVersion, - const uint32_t supportedOpset = 11) - : VCLSerializerBase(origModel, compilerVersion, supportedOpset) { + const uint32_t supportedOpset = 11, + const bool computeModelHash = false, + const bool storeWeightlessCacheAttribute = false) + : VCLSerializerBase(origModel, + compilerVersion, + supportedOpset, + computeModelHash, + storeWeightlessCacheAttribute) { _logger.setName("VCLSerializerWithWeightsCopy"); }; @@ -361,8 +371,14 @@ class VCLSerializerWithoutWeightsCopy : public VCLSerializerBase { public: VCLSerializerWithoutWeightsCopy(const std::shared_ptr& origModel, const ze_graph_compiler_version_info_t compilerVersion, - const uint32_t supportedOpset = 11) - : VCLSerializerBase(origModel, compilerVersion, supportedOpset) { + const uint32_t supportedOpset = 11, + const bool computeModelHash = false, + const bool storeWeightlessCacheAttribute = false) + : VCLSerializerBase(origModel, + compilerVersion, + supportedOpset, + computeModelHash, + storeWeightlessCacheAttribute) { _logger.setName("VCLSerializerWithoutWeightsCopy"); }; @@ -424,18 +440,29 @@ class VCLSerializerWithoutWeightsCopy : public VCLSerializerBase { SerializedIR serializeIR(const std::shared_ptr& model, const ze_graph_compiler_version_info_t compilerVersion, const uint32_t supportedOpsetVersion, - const bool useBaseModelSerializer) { + const bool useBaseModelSerializer, + const bool computeModelHash, + const bool storeWeightlessCacheAttribute) { if (!useBaseModelSerializer) { // Non-constness required for adding & removing weights pointer attributes. The current instance is already a // clone (or should be one), we are not modifying the original model. const std::shared_ptr nonConstantModel = std::const_pointer_cast(model); storeWeightsPointerAttribute(nonConstantModel); - SerializedIR serializedIR = - VCLSerializerWithoutWeightsCopy(model, compilerVersion, supportedOpsetVersion).serialize(); + SerializedIR serializedIR = VCLSerializerWithoutWeightsCopy(model, + compilerVersion, + supportedOpsetVersion, + computeModelHash, + storeWeightlessCacheAttribute) + .serialize(); return serializedIR; } - return VCLSerializerWithWeightsCopy(model, compilerVersion, supportedOpsetVersion).serialize(); + return VCLSerializerWithWeightsCopy(model, + compilerVersion, + supportedOpsetVersion, + computeModelHash, + storeWeightlessCacheAttribute) + .serialize(); } std::string serializeIOInfo(const std::shared_ptr& model, const bool useIndices) {