From 6d48a5a4cc43fb548559d4f9fa8a42756db7290c Mon Sep 17 00:00:00 2001
From: Razvan Apetroaie <razvan-mihai.apetroaie@intel.com>
Date: Wed, 26 Nov 2025 15:18:52 +0000
Subject: [PATCH 1/6] temporary code

---
 .../compiler_adapter/src/ze_graph_ext_wrappers.cpp   | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp
index c1379a98c9cbba..c265a918b490ba 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp
@@ -335,11 +335,18 @@ bool ZeGraphExtWrappers::canCpuVaBeImported(const void* data, size_t size) const
     return true;
 }
 
+// ze_graph_input_hash_t graphInputHash = {};
+//     graphInputHash.stype = ZE_STRUCTURE_TYPE_GRAPH_INPUT_HASH;
+//     graphInputHash.hash = hash;
+
 GraphDescriptor ZeGraphExtWrappers::getGraphDescriptor(SerializedIR serializedIR,
                                                        const std::string& buildFlags,
                                                        const bool bypassUmdCache) const {
     ze_graph_handle_t graphHandle = nullptr;
 
+    const uint64_t hash = 14;
+    ze_graph_input_hash_t modelHash = {ZE_STRUCTURE_TYPE_GRAPH_INPUT_HASH, nullptr, hash};
+
     uint32_t flags = ZE_GRAPH_FLAG_NONE;
     if (bypassUmdCache) {
         _logger.debug("getGraphDescriptor - set ZE_GRAPH_FLAG_DISABLE_CACHING");
@@ -347,11 +354,11 @@ GraphDescriptor ZeGraphExtWrappers::getGraphDescriptor(SerializedIR serializedIR
     }
 
     ze_graph_desc_2_t desc = {ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES,
-                              nullptr,
+                              &modelHash,
                               ZE_GRAPH_FORMAT_NGRAPH_LITE,
                               serializedIR.first,
                               serializedIR.second.get(),
-                              buildFlags.c_str(),
+                              "",
                               flags};
 
     _logger.debug("getGraphDescriptor - perform pfnCreate2");
@@ -360,6 +367,7 @@ GraphDescriptor ZeGraphExtWrappers::getGraphDescriptor(SerializedIR serializedIR
                                                                  &desc,
                                                                  &graphHandle);
     THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnCreate2", result, _zeroInitStruct->getGraphDdiTable());
+    OPENVINO_THROW("");
 
     return GraphDescriptor{graphHandle};
 }

From a3fe659cc3df946e8f0d7d7f86536d1a1f72886a Mon Sep 17 00:00:00 2001
From: Razvan Apetroaie <razvan-mihai.apetroaie@intel.com>
Date: Wed, 26 Nov 2025 15:28:42 +0000
Subject: [PATCH 2/6] WeightsPointerAttribute is not deterministic

---
 .../src/al/include/intel_npu/weights_pointer_attribute.hpp    | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/weights_pointer_attribute.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/weights_pointer_attribute.hpp
index ff61286c71badc..61a770abebcbec 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/weights_pointer_attribute.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/weights_pointer_attribute.hpp
@@ -38,6 +38,10 @@ class WeightsPointerAttribute : public ov::RuntimeAttribute {
         return true;
     }
 
+    bool is_deterministic() const override {
+        return false;
+    }
+
     size_t memory_pointer;
     size_t byte_size;
 };

From 592714d96505abfbf51e8ae576c601584bc32df6 Mon Sep 17 00:00:00 2001
From: Razvan Apetroaie <razvan-mihai.apetroaie@intel.com>
Date: Wed, 26 Nov 2025 15:41:43 +0000
Subject: [PATCH 3/6] Deleting some code that is useless now that the model
 instance on which the plugin operates is a clone

---
 .../compiler_adapter/src/vcl_serializer.cpp   | 58 ++++---------------
 1 file changed, 11 insertions(+), 47 deletions(-)

diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp
index 909b69634500c3..dc02e3dd24b91c 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp
@@ -162,25 +162,6 @@ void storeWeightsPointerAttribute(const std::shared_ptr<ov::Model>& model) {
     }
 }
 
-/**
- * @brief Removes the attributes stored by "storeWeightsPointerAttribute" in order to restore the model to its original
- * state.
- * @see storeWeightsPointerAttribute for details.
- */
-void removeWeightsPointerAttribute(const std::shared_ptr<ov::Model>& model) {
-    for (auto&& node : model->get_ops()) {
-        if (!ov::is_type<ov::op::v0::Constant>(node)) {
-            continue;
-        }
-
-        ov::RTMap& runtimeInfoMap = node->get_rt_info();
-        const auto& resultIt = runtimeInfoMap.find(intel_npu::WeightsPointerAttribute::get_type_info_static());
-        if (resultIt != runtimeInfoMap.end()) {
-            runtimeInfoMap.erase(resultIt);
-        }
-    }
-}
-
 }  // namespace
 
 namespace intel_npu::driver_compiler_utils {
@@ -201,12 +182,6 @@ class VCLSerializerBase {
         // There is no const variant of run_passes so use const_cast here
         // as model serialization does not mutate the model
         _model = std::const_pointer_cast<ov::Model>(origModel);
-
-        if (supportedOpset < 11) {
-            // Need to clone to modify the model and remain thread safe
-            _model = _model->clone();
-            _logger.info("Clone model for offset smaller than 11");
-        }
     }
 
     virtual SerializedIR serialize() = 0;
@@ -233,29 +208,19 @@ class VCLSerializerBase {
 
         register_serialization_pass(manager);
 
-        // We modify the original model object here therefore a mutex is required
-        static std::mutex rtInfoMutex;
-
-        {
-            std::lock_guard<std::mutex> lock(rtInfoMutex);
+        // Depending on the driver version, the compiler attached to it may request this information as an indicator
+        // of the precision/layout preprocessing requirement. We are setting this value to "true" since the API
+        // version is no longer a cause for altering the metadata. This is due to the preprocessing performed in the
+        // OpenVINO framework's implementaion, the "ov::Model" object is preprocessed before reaching the NPU
+        // plugin.
+        _model->set_rt_info(true, "is_new_api");
+        // Flag used for indicating an NPU plugin version which switched the I/O identification convention from
+        // names to indices. The flag is required in order to inform the driver-compiler adapter to expect indices
+        // when attempting to deserialize the I/O metadata.
+        _model->set_rt_info(true, "use_indices_for_io_metadata");
 
-            // Depending on the driver version, the compiler attached to it may request this information as an indicator
-            // of the precision/layout preprocessing requirement. We are setting this value to "true" since the API
-            // version is no longer a cause for altering the metadata. This is due to the preprocessing performed in the
-            // OpenVINO framework's implementaion, the "ov::Model" object is preprocessed before reaching the NPU
-            // plugin.
-            _model->set_rt_info(true, "is_new_api");
-            // Flag used for indicating an NPU plugin version which switched the I/O identification convention from
-            // names to indices. The flag is required in order to inform the driver-compiler adapter to expect indices
-            // when attempting to deserialize the I/O metadata.
-            _model->set_rt_info(true, "use_indices_for_io_metadata");
+        manager.run_passes(_model);
 
-            manager.run_passes(_model);
-
-            auto& rtInfo = _model->get_rt_info();
-            rtInfo.erase("is_new_api");
-            rtInfo.erase("use_indices_for_io_metadata");
-        }
         _logger.debug("serialize_model_to_stream end");
     }
 
@@ -464,7 +429,6 @@ SerializedIR serializeIR(const std::shared_ptr<const ov::Model>& model,
 
         SerializedIR serializedIR =
             VCLSerializerWithoutWeightsCopy(model, compilerVersion, supportedOpsetVersion).serialize();
-        removeWeightsPointerAttribute(nonConstantModel);
         return serializedIR;
     }
     return VCLSerializerWithWeightsCopy(model, compilerVersion, supportedOpsetVersion).serialize();

From d7f9b57366ca04982c0784544717cc03eee6f7f0 Mon Sep 17 00:00:00 2001
From: Razvan Apetroaie <razvan-mihai.apetroaie@intel.com>
Date: Wed, 26 Nov 2025 16:06:39 +0000
Subject: [PATCH 4/6] tech debt

---
 .../intel_npu/src/al/include/intel_npu/config/options.hpp      | 2 ++
 .../src/al/include/intel_npu/npu_private_properties.hpp        | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp
index 51403d304718bd..9136857f8fda25 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp
@@ -1359,6 +1359,8 @@ struct SEPARATE_WEIGHTS_VERSION final : OptionBase<SEPARATE_WEIGHTS_VERSION, ov:
     }
 
     static ov::intel_npu::WSVersion defaultValue() {
+        // Note: if the compiler-in-plugin is used (intel_npu::compiler_type = intel_npu::CompilerType::PLUGIN), then
+        // the default is actually WSVersion::ONE_SHOT
         return ov::intel_npu::WSVersion::ITERATIVE;
     }
 
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp
index 6f88d1e549eb92..2c7b7c84b706a8 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp
@@ -375,7 +375,8 @@ static constexpr ov::Property<BatchMode> batch_mode{"NPU_BATCH_MODE"};
 
 /**
  * @brief [Experimental, only for NPU Plugin]
- * Type: enum. Default is "ITERATIVE".
+ * Type: enum. Default is "ITERATIVE". If the compiler-in-plugin is used (intel_npu::compiler_type =
+ * intel_npu::CompilerType::PLUGIN), then the default becomes "ONE_SHOT".
  *
  * The value stored in this entry indicates which implementation of the "weights separation" feature will be used.
  * Note: NPU_COMPILER_TYPE = DRIVER & NPU_SEPARATE_WEIGHTS_VERSION = ONE_SHOT are not compatible.

From b6a2d1e6bf611bab7cc1bc79350669bf558406b8 Mon Sep 17 00:00:00 2001
From: Razvan Apetroaie <razvan-mihai.apetroaie@intel.com>
Date: Wed, 26 Nov 2025 16:29:42 +0000
Subject: [PATCH 5/6] Making SerializedIR a struct containing a hash too

---
 .../src/compiler_adapter/include/vcl_serializer.hpp    |  6 +++++-
 .../src/compiler_adapter/src/vcl_serializer.cpp        | 10 +++++++---
 .../src/compiler_adapter/src/ze_graph_ext_wrappers.cpp |  8 ++++----
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_serializer.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_serializer.hpp
index 74bb2c1c0a3fda..7e86d335b48628 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_serializer.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_serializer.hpp
@@ -16,7 +16,11 @@
 
 namespace intel_npu {
 
-using SerializedIR = std::pair<size_t, std::shared_ptr<uint8_t>>;
+struct SerializedIR {
+    std::shared_ptr<uint8_t> buffer;
+    size_t size;
+    std::optional<uint64_t> hash = std::nullopt;
+};
 
 /**
  * @brief Contain all required transformation on OpenVINO model in case for external compiler usage and
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp
index dc02e3dd24b91c..540a6009f47d54 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp
@@ -198,6 +198,8 @@ class VCLSerializerBase {
     void serialize_model_to_stream(const std::function<void(ov::pass::Manager&)>& register_serialization_pass) {
         _logger.debug("serialize_model_to_stream");
         const auto passConfig = std::make_shared<ov::pass::PassConfig>();
+
+        // Step 1: run compatibility passes
         ov::pass::Manager manager(std::move(passConfig), "NPU:serialize_model_to_stream");
 
         if (_supportedOpset < 11) {
@@ -205,7 +207,9 @@ class VCLSerializerBase {
             manager.register_pass<ov::pass::ConvertInterpolate11ToInterpolate4>();
             _logger.info("Downgrade op for opset smaller than 11");
         }
-
+        // Step 2: store the WeightlessCacheAttribute if requested
+        // Step 3: serialize
+        // Step 4: compute the hash if requested
         register_serialization_pass(manager);
 
         // Depending on the driver version, the compiler attached to it may request this information as an indicator
@@ -296,7 +300,7 @@ class VCLSerializerWithWeightsCopy : public VCLSerializerBase {
 
         OPENVINO_ASSERT(offset == sizeOfSerializedIR);
 
-        return std::make_pair(sizeOfSerializedIR, buffer);
+        return {buffer, sizeOfSerializedIR};
     }
 
 private:
@@ -376,7 +380,7 @@ class VCLSerializerWithoutWeightsCopy : public VCLSerializerBase {
         std::shared_ptr<uint8_t> buffer(new uint8_t[_serializedModelSize], std::default_delete<uint8_t[]>());
         serialize_model_to_buffer(buffer.get());
 
-        return SerializedIR(_serializedModelSize, buffer);
+        return {buffer, _serializedModelSize};
     }
 
 private:
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp
index c265a918b490ba..6737758f40b63c 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp
@@ -277,8 +277,8 @@ std::unordered_set<std::string> ZeGraphExtWrappers::queryGraph(SerializedIR seri
     ze_graph_desc_2_t desc = {ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES,
                               nullptr,
                               ZE_GRAPH_FORMAT_NGRAPH_LITE,
-                              serializedIR.first,
-                              serializedIR.second.get(),
+                              serializedIR.size,
+                              serializedIR.buffer.get(),
                               buildFlags.c_str(),
                               ZE_GRAPH_FLAG_NONE};
 
@@ -356,8 +356,8 @@ GraphDescriptor ZeGraphExtWrappers::getGraphDescriptor(SerializedIR serializedIR
     ze_graph_desc_2_t desc = {ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES,
                               &modelHash,
                               ZE_GRAPH_FORMAT_NGRAPH_LITE,
-                              serializedIR.first,
-                              serializedIR.second.get(),
+                              serializedIR.size,
+                              serializedIR.buffer.get(),
                               "",
                               flags};
 

From 32f50db55f33f102da1ac871e103bb242279b3a4 Mon Sep 17 00:00:00 2001
From: Razvan Apetroaie <razvan-mihai.apetroaie@intel.com>
Date: Wed, 26 Nov 2025 16:43:07 +0000
Subject: [PATCH 6/6] Working on improving the serialization classes

---
 .../include/vcl_serializer.hpp                |  4 +-
 .../compiler_adapter/src/vcl_serializer.cpp   | 51 ++++++++++++++-----
 2 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_serializer.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_serializer.hpp
index 7e86d335b48628..304fc8847937d4 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_serializer.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_serializer.hpp
@@ -39,7 +39,9 @@ namespace driver_compiler_utils {
 SerializedIR serializeIR(const std::shared_ptr<const ov::Model>& model,
                          ze_graph_compiler_version_info_t compilerVersion,
                          const uint32_t supportedOpsetVersion,
-                         const bool useBaseModelSerializer = true);
+                         const bool useBaseModelSerializer = true,
+                         const bool computeModelHash = false,
+                         const bool storeWeightlessCacheAttribute = false);
 
 /**
  * @brief Serialize input / output information to string format.
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp
index 540a6009f47d54..3e4096a9f01234 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp
@@ -175,12 +175,14 @@ class VCLSerializerBase {
 public:
     VCLSerializerBase(const std::shared_ptr<const ov::Model>& origModel,
                       const ze_graph_compiler_version_info_t compilerVersion,
-                      const uint32_t supportedOpset = 11)
+                      const uint32_t supportedOpset = 11,
+                      const bool computeModelHash = false,
+                      const bool storeWeightlessCacheAttribute = false)
         : _logger("VCLSerializerBase", Logger::global().level()),
           _compilerVersion(compilerVersion),
-          _supportedOpset(supportedOpset) {
-        // There is no const variant of run_passes so use const_cast here
-        // as model serialization does not mutate the model
+          _supportedOpset(supportedOpset),
+          _computeModelHash(computeModelHash),
+          _storeWeightlessCacheAttribute(storeWeightlessCacheAttribute) {
         _model = std::const_pointer_cast<ov::Model>(origModel);
     }
 
@@ -232,6 +234,8 @@ class VCLSerializerBase {
     std::shared_ptr<ov::Model> _model = nullptr;
     ze_graph_compiler_version_info_t _compilerVersion;
     uint32_t _supportedOpset = 11;
+    bool _computeModelHash;
+    bool _storeWeightlessCacheAttribute;
 };
 
 /**
@@ -241,8 +245,14 @@ class VCLSerializerWithWeightsCopy : public VCLSerializerBase {
 public:
     VCLSerializerWithWeightsCopy(const std::shared_ptr<const ov::Model>& origModel,
                                  const ze_graph_compiler_version_info_t compilerVersion,
-                                 const uint32_t supportedOpset = 11)
-        : VCLSerializerBase(origModel, compilerVersion, supportedOpset) {
+                                 const uint32_t supportedOpset = 11,
+                                 const bool computeModelHash = false,
+                                 const bool storeWeightlessCacheAttribute = false)
+        : VCLSerializerBase(origModel,
+                            compilerVersion,
+                            supportedOpset,
+                            computeModelHash,
+                            storeWeightlessCacheAttribute) {
         _logger.setName("VCLSerializerWithWeightsCopy");
     };
 
@@ -361,8 +371,14 @@ class VCLSerializerWithoutWeightsCopy : public VCLSerializerBase {
 public:
     VCLSerializerWithoutWeightsCopy(const std::shared_ptr<const ov::Model>& origModel,
                                     const ze_graph_compiler_version_info_t compilerVersion,
-                                    const uint32_t supportedOpset = 11)
-        : VCLSerializerBase(origModel, compilerVersion, supportedOpset) {
+                                    const uint32_t supportedOpset = 11,
+                                    const bool computeModelHash = false,
+                                    const bool storeWeightlessCacheAttribute = false)
+        : VCLSerializerBase(origModel,
+                            compilerVersion,
+                            supportedOpset,
+                            computeModelHash,
+                            storeWeightlessCacheAttribute) {
         _logger.setName("VCLSerializerWithoutWeightsCopy");
     };
 
@@ -424,18 +440,29 @@ class VCLSerializerWithoutWeightsCopy : public VCLSerializerBase {
 SerializedIR serializeIR(const std::shared_ptr<const ov::Model>& model,
                          const ze_graph_compiler_version_info_t compilerVersion,
                          const uint32_t supportedOpsetVersion,
-                         const bool useBaseModelSerializer) {
+                         const bool useBaseModelSerializer,
+                         const bool computeModelHash,
+                         const bool storeWeightlessCacheAttribute) {
     if (!useBaseModelSerializer) {
         // Non-constness required for adding & removing weights pointer attributes. The current instance is already a
         // clone (or should be one), we are not modifying the original model.
         const std::shared_ptr<ov::Model> nonConstantModel = std::const_pointer_cast<ov::Model>(model);
         storeWeightsPointerAttribute(nonConstantModel);
 
-        SerializedIR serializedIR =
-            VCLSerializerWithoutWeightsCopy(model, compilerVersion, supportedOpsetVersion).serialize();
+        SerializedIR serializedIR = VCLSerializerWithoutWeightsCopy(model,
+                                                                    compilerVersion,
+                                                                    supportedOpsetVersion,
+                                                                    computeModelHash,
+                                                                    storeWeightlessCacheAttribute)
+                                        .serialize();
         return serializedIR;
     }
-    return VCLSerializerWithWeightsCopy(model, compilerVersion, supportedOpsetVersion).serialize();
+    return VCLSerializerWithWeightsCopy(model,
+                                        compilerVersion,
+                                        supportedOpsetVersion,
+                                        computeModelHash,
+                                        storeWeightlessCacheAttribute)
+        .serialize();
 }
 
 std::string serializeIOInfo(const std::shared_ptr<const ov::Model>& model, const bool useIndices) {