From 13319df7236cc820c6cdc0b390978acfc0476771 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Mon, 26 Feb 2024 15:12:37 +0800
Subject: [PATCH 01/32] initial implementation

---
 src/bindings/c/docs/api_overview.md           |  2 +
 .../c/include/openvino/c/ov_property.h        |  7 +++
 src/bindings/c/src/ov_property.cpp            |  1 +
 src/bindings/c/tests/ov_core_test.cpp         |  8 +++
 .../runtime/properties/hint/__init__.py       |  1 +
 .../pyopenvino/core/properties/properties.cpp |  1 +
 .../tests/test_runtime/test_properties.py     |  6 ++-
 .../include/openvino/runtime/properties.hpp   | 51 +++++++++++++++++++
 src/plugins/intel_cpu/src/compiled_model.cpp  |  6 ++-
 src/plugins/intel_cpu/src/config.cpp          | 15 ++++++
 src/plugins/intel_cpu/src/config.h            |  1 +
 src/plugins/intel_cpu/src/plugin.cpp          |  4 ++
 .../custom/behavior/export_import.cpp         | 15 ++++++
 .../ov_executable_network/properties.cpp      |  1 +
 .../custom/behavior/ov_plugin/properties.cpp  |  1 +
 .../behavior/ov_plugin/properties_tests.cpp   |  9 ++++
 16 files changed, 127 insertions(+), 2 deletions(-)

diff --git a/src/bindings/c/docs/api_overview.md b/src/bindings/c/docs/api_overview.md
index 6ca2ad403c1a7e..8c7debee7e3bd1 100644
--- a/src/bindings/c/docs/api_overview.md
+++ b/src/bindings/c/docs/api_overview.md
@@ -309,6 +309,8 @@ OPENVINO_C_VAR(const char*) ov_property_key_affinity;
 
 OPENVINO_C_VAR(const char*) ov_property_key_inference_num_threads;
 
+OPENVINO_C_VAR(const char*) ov_property_key_hint_max_threads_per_stream;
+
 OPENVINO_C_VAR(const char*) ov_property_key_hint_enable_cpu_pinning;
 
 OPENVINO_C_VAR(const char*) ov_property_key_hint_enable_hyper_threading;
diff --git a/src/bindings/c/include/openvino/c/ov_property.h b/src/bindings/c/include/openvino/c/ov_property.h
index dbefcbb366a0e5..f3165d242f22fc 100644
--- a/src/bindings/c/include/openvino/c/ov_property.h
+++ b/src/bindings/c/include/openvino/c/ov_property.h
@@ -128,6 +128,13 @@ ov_property_key_affinity;
 OPENVINO_C_VAR(const char*)
 ov_property_key_inference_num_threads;
 
+/**
+ * @brief Read-write property<int32_t string> to set/get the maximum number of threads per stream of CPU inference.
+ * @ingroup ov_property_c_api
+ */
+OPENVINO_C_VAR(const char*)
+ov_property_key_hint_max_threads_per_stream;
+
 /**
  * @brief Read-write property, it is high-level OpenVINO hint for using CPU pinning to bind CPU threads to processors
  * during inference
diff --git a/src/bindings/c/src/ov_property.cpp b/src/bindings/c/src/ov_property.cpp
index 8a083e2afd8c41..ffb6d9f90e105d 100644
--- a/src/bindings/c/src/ov_property.cpp
+++ b/src/bindings/c/src/ov_property.cpp
@@ -23,6 +23,7 @@ const char* ov_property_key_cache_mode = "CACHE_MODE";
 const char* ov_property_key_num_streams = "NUM_STREAMS";
 const char* ov_property_key_affinity = "AFFINITY";
 const char* ov_property_key_inference_num_threads = "INFERENCE_NUM_THREADS";
+const char* ov_property_key_hint_max_threads_per_stream = "MAX_THREADS_PER_STREAM";
 const char* ov_property_key_hint_performance_mode = "PERFORMANCE_HINT";
 const char* ov_property_key_hint_enable_cpu_pinning = "ENABLE_CPU_PINNING";
 const char* ov_property_key_hint_scheduling_core_type = "SCHEDULING_CORE_TYPE";
diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp
index 69762a901d8f69..8b5d5845475512 100644
--- a/src/bindings/c/tests/ov_core_test.cpp
+++ b/src/bindings/c/tests/ov_core_test.cpp
@@ -298,6 +298,14 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) {
     EXPECT_STREQ(val_type, ret);
     ov_free(ret);
 
+    const char* key_type = ov_property_key_hint_max_threads_per_stream;
+    const char* val_type = "PER_PLATFORM";
+    OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_type, val_type));
+    ret = nullptr;
+    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_type, &ret));
+    EXPECT_STREQ(val_type, ret);
+    ov_free(ret);
+
     OV_EXPECT_NOT_OK(ov_core_set_property(core, device_name.c_str(), key_type, invalid_val));
     ret = nullptr;
     OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_type, &ret));
diff --git a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py
index cce898891e4af3..b32b51ce6482b0 100644
--- a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py
+++ b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py
@@ -14,6 +14,7 @@
 from openvino._pyopenvino.properties.hint import performance_mode
 from openvino._pyopenvino.properties.hint import enable_cpu_pinning
 from openvino._pyopenvino.properties.hint import scheduling_core_type
+from openvino._pyopenvino.properties.hint import max_threads_per_stream
 from openvino._pyopenvino.properties.hint import enable_hyper_threading
 from openvino._pyopenvino.properties.hint import execution_mode
 from openvino._pyopenvino.properties.hint import num_requests
diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
index 6ed59721c59d88..abae0d3dac248f 100644
--- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
+++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
@@ -81,6 +81,7 @@ void regmodule_properties(py::module m) {
     wrap_property_RW(m_hint, ov::hint::performance_mode, "performance_mode");
     wrap_property_RW(m_hint, ov::hint::enable_cpu_pinning, "enable_cpu_pinning");
     wrap_property_RW(m_hint, ov::hint::scheduling_core_type, "scheduling_core_type");
+    wrap_property_RW(m_hint, ov::hint::max_threads_per_stream, "max_threads_per_stream");
     wrap_property_RW(m_hint, ov::hint::enable_hyper_threading, "enable_hyper_threading");
     wrap_property_RW(m_hint, ov::hint::execution_mode, "execution_mode");
     wrap_property_RW(m_hint, ov::hint::num_requests, "num_requests");
diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py
index 64a47a1ceed8fb..ebfd9f6c8dedc7 100644
--- a/src/bindings/python/tests/test_runtime/test_properties.py
+++ b/src/bindings/python/tests/test_runtime/test_properties.py
@@ -279,6 +279,11 @@ def test_properties_ro(ov_property_ro, expected_value):
             "SCHEDULING_CORE_TYPE",
             ((hints.SchedulingCoreType.PCORE_ONLY, hints.SchedulingCoreType.PCORE_ONLY),),
         ),
+        (
+            hints.max_threads_per_stream,
+            "MAX_THREADS_PER_STREAM",
+            ((hints.MaxThreadsPerStream.PER_PLATFORM, hints.MaxThreadsPerStream.PER_PLATFORM),),
+        ),
         (
             hints.enable_hyper_threading,
             "ENABLE_HYPER_THREADING",
@@ -541,7 +546,6 @@ def test_single_property_setting(device):
             props.affinity: "NONE",
             "INFERENCE_PRECISION_HINT": Type.f32,
             hints.performance_mode: hints.PerformanceMode.LATENCY,
-            hints.scheduling_core_type: hints.SchedulingCoreType.PCORE_ONLY,
             hints.num_requests: 12,
             "NUM_STREAMS": streams.Num(5),
             "ENABLE_MMAP": "NO",
diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp
index 9dc28cab1b01cc..b8496a0cfae093 100644
--- a/src/inference/include/openvino/runtime/properties.hpp
+++ b/src/inference/include/openvino/runtime/properties.hpp
@@ -383,6 +383,42 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type)
 }
 /** @endcond */
 
+enum class MaxThreadsPerStream {
+    AUTO,          //!<  Using all threads per platform for one stream. Will create sub stream on dual socket platform.
+    PER_PLATFORM,  //!<  Using all threads per platform for one stream even on dual socket platform.
+    PER_SOCKET,    //!<  Using all threads per socket for one stream on dual socket platform.
+};
+
+/** @cond INTERNAL */
+inline std::ostream& operator<<(std::ostream& os, const MaxThreadsPerStream& stream_mode) {
+    switch (stream_mode) {
+    case MaxThreadsPerStream::AUTO:
+        return os << "AUTO";
+    case MaxThreadsPerStream::PER_PLATFORM:
+        return os << "PER_PLATFORM";
+    case MaxThreadsPerStream::PER_SOCKET:
+        return os << "PER_SOCKET";
+    default:
+        OPENVINO_THROW("Unsupported mode!");
+    }
+}
+
+inline std::istream& operator>>(std::istream& is, MaxThreadsPerStream& stream_mode) {
+    std::string str;
+    is >> str;
+    if (str == "AUTO") {
+        stream_mode = MaxThreadsPerStream::AUTO;
+    } else if (str == "PER_PLATFORM") {
+        stream_mode = MaxThreadsPerStream::PER_PLATFORM;
+    } else if (str == "PER_SOCKET") {
+        stream_mode = MaxThreadsPerStream::PER_SOCKET;
+    } else {
+        OPENVINO_THROW("Unsupported mode: ", str);
+    }
+    return is;
+}
+/** @endcond */
+
 /**
  * @brief This property defines CPU core type which can be used during inference.
  * @ingroup ov_runtime_cpp_prop_api
@@ -399,6 +435,21 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type)
  */
 static constexpr Property<SchedulingCoreType> scheduling_core_type{"SCHEDULING_CORE_TYPE"};
 
+/**
+ * @brief This property defines max threads per stream used for CPU inference.
+ * @ingroup ov_runtime_cpp_prop_api
+ *
+ * Developer can use this property to select max threads per stream for CPU inference. Please refer MaxThreadsPerStream
+ * for all definition of types.
+ *
+ * The following code is an example to only use all threads per socket for one stream on dual sockets platform.
+ *
+ * @code
+ * ie.set_property(ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET));
+ * @endcode
+ */
+static constexpr Property<SchedulingCoreType> max_threads_per_stream{"MAX_THREADS_PER_STREAM"};
+
 /**
  * @brief This property allows CPU pinning during inference.
  * @ingroup ov_runtime_cpp_prop_api
diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp
index 85e84c22afaf2b..377294a7ae3577 100644
--- a/src/plugins/intel_cpu/src/compiled_model.cpp
+++ b/src/plugins/intel_cpu/src/compiled_model.cpp
@@ -193,6 +193,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
             RO_property(ov::hint::num_requests.name()),
             RO_property(ov::hint::enable_cpu_pinning.name()),
             RO_property(ov::hint::scheduling_core_type.name()),
+            RO_property(ov::hint::max_threads_per_stream.name()),
             RO_property(ov::hint::enable_hyper_threading.name()),
             RO_property(ov::execution_devices.name()),
             RO_property(ov::intel_cpu::denormals_optimization.name()),
@@ -246,7 +247,10 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
         const bool use_pin = config.enableCpuPinning;
         return decltype(ov::hint::enable_cpu_pinning)::value_type(use_pin);
     } else if (name == ov::hint::scheduling_core_type) {
-        const auto core_type = config.schedulingCoreType;
+        const auto stream_mode = config.schedulingCoreType;
+        return stream_mode;
+    } else if (name == ov::hint::max_threads_per_stream) {
+        const auto core_type = config.maxThreadsPerStream;
         return core_type;
     } else if (name == ov::hint::enable_hyper_threading.name()) {
         const bool use_ht = config.enableHyperThreading;
diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index 8a8c5cca6a771a..94e98e32f642bb 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -191,6 +191,21 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
                                '/',
                                ov::hint::SchedulingCoreType::ECORE_ONLY);
             }
+        } else if (key == ov::hint::max_threads_per_stream.name()) {
+            try {
+                maxThreadsPerStream = val.as<ov::hint::MaxThreadsPerStream>();
+            } catch (ov::Exception&) {
+                OPENVINO_THROW("Wrong value ",
+                               val.as<std::string>(),
+                               "for property key ",
+                               ov::hint::max_threads_per_stream.name(),
+                               ". Expected only ",
+                               ov::hint::MaxThreadsPerStream::AUTO,
+                               '/',
+                               ov::hint::MaxThreadsPerStream::PER_PLATFORM,
+                               '/',
+                               ov::hint::MaxThreadsPerStream::PER_SOCKET);
+            }
         } else if (key == ov::hint::enable_hyper_threading.name()) {
             try {
                 enableHyperThreading = val.as<bool>();
diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h
index 32faf152e017be..8a97682507785f 100644
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@@ -76,6 +76,7 @@ struct Config {
     bool enableCpuPinning = true;
     bool changedCpuPinning = false;
     ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE;
+    ov::hint::MaxThreadsPerStream maxThreadsPerStream = ov::hint::MaxThreadsPerStream::AUTO;
     bool enableHyperThreading = true;
     bool changedHyperThreading = false;
     Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET;
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index ad48ac4b9f4e98..61ef796ab6fed8 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -410,6 +410,9 @@ ov::Any Engine::get_property(const std::string& name, const ov::AnyMap& options)
     } else if (name == ov::hint::scheduling_core_type) {
         const auto core_type = engConfig.schedulingCoreType;
         return core_type;
+    } else if (name == ov::hint::max_threads_per_stream) {
+        const auto stream_mode = engConfig.maxThreadsPerStream;
+        return stream_mode;
     } else if (name == ov::hint::enable_hyper_threading) {
         const bool ht_value = engConfig.enableHyperThreading;
         return decltype(ov::hint::enable_hyper_threading)::value_type(ht_value);
@@ -479,6 +482,7 @@ ov::Any Engine::get_ro_property(const std::string& name, const ov::AnyMap& optio
                                                     RW_property(ov::hint::num_requests.name()),
                                                     RW_property(ov::hint::enable_cpu_pinning.name()),
                                                     RW_property(ov::hint::scheduling_core_type.name()),
+                                                    RW_property(ov::hint::max_threads_per_stream.name()),
                                                     RW_property(ov::hint::enable_hyper_threading.name()),
                                                     RW_property(ov::device::id.name()),
                                                     RW_property(ov::intel_cpu::denormals_optimization.name()),
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp
index 17ef4ac956d94d..29a5194cf22a8f 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp
@@ -116,6 +116,18 @@ const std::vector<ov::AnyMap> testing_property_for_scheduling_core_type_3 = {
     {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ANY_CORE)},
     {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)}};
 
+const std::vector<ov::AnyMap> testing_property_for_max_threads_per_stream_1 = {
+    {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::AUTO)},
+    {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_PLATFORM)}};
+
+const std::vector<ov::AnyMap> testing_property_for_max_threads_per_stream_2 = {
+    {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_PLATFORM)},
+    {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET)}};
+
+const std::vector<ov::AnyMap> testing_property_for_max_threads_per_stream = {
+    {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::AUTO)},
+    {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET)}};
+
 const std::vector<ov::AnyMap> testing_property_for_enable_hyper_threading = {{ov::hint::enable_hyper_threading(true)},
                                                                              {ov::hint::enable_hyper_threading(false)}};
 
@@ -131,6 +143,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_ExportImportTest,
                                                              testing_property_for_scheduling_core_type_1,
                                                              testing_property_for_scheduling_core_type_2,
                                                              testing_property_for_scheduling_core_type_3,
+                                                             testing_property_for_max_threads_per_stream_1,
+                                                             testing_property_for_max_threads_per_stream_2,
+                                                             testing_property_for_max_threads_per_stream_3,
                                                              testing_property_for_enable_hyper_threading,
                                                              testing_property_for_enable_cpu_pinning)));
 
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
index d0ee8a889414cd..af054b1468bcda 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
@@ -33,6 +33,7 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkSupportedPropertiesAreAvailable
         RO_property(ov::hint::num_requests.name()),
         RO_property(ov::hint::enable_cpu_pinning.name()),
         RO_property(ov::hint::scheduling_core_type.name()),
+        RO_property(ov::hint::max_threads_per_stream.name()),
         RO_property(ov::hint::enable_hyper_threading.name()),
         RO_property(ov::execution_devices.name()),
         RO_property(ov::intel_cpu::denormals_optimization.name()),
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp
index 0d373252eddafd..4088efc4c7110d 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp
@@ -47,6 +47,7 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginAllSupportedPropertiesAreAvailable) {
         RW_property(ov::hint::num_requests.name()),
         RW_property(ov::hint::enable_cpu_pinning.name()),
         RW_property(ov::hint::scheduling_core_type.name()),
+        RW_property(ov::hint::max_threads_per_stream.name()),
         RW_property(ov::hint::enable_hyper_threading.name()),
         RW_property(ov::device::id.name()),
         RW_property(ov::intel_cpu::denormals_optimization.name()),
diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
index 0329245b55caba..5b59900d37c705 100644
--- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
+++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
@@ -332,6 +332,15 @@ std::vector<ov::AnyMap> OVPropertiesTestsWithCompileModelProps::getRWOptionalPro
         }
     }
 
+    if (props.empty() || std::find(props.begin(), props.end(), ov::hint::max_threads_per_stream.name()) != props.end()) {
+        ov::hint::SchedulingCoreType maxThreadsPerStreams[] = {ov::hint::MaxThreadsPerStream::AUTO,
+                                                               ov::hint::MaxThreadsPerStream::PER_PLATFORM,
+                                                               ov::hint::MaxThreadsPerStream::PER_SOCKET};
+        for (auto& maxThreadsPerStream : maxThreadsPerStreams) {
+            res.push_back({ov::hint::max_threads_per_stream(maxThreadsPerStream)});
+        }
+    }
+
     if (props.empty() || std::find(props.begin(), props.end(), ov::enable_mmap.name()) != props.end()) {
         res.push_back({ov::enable_mmap(true)});
         res.push_back({ov::enable_mmap(false)});

From 68bb894aad6572e2745eb40b9c412406658098c6 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Mon, 26 Feb 2024 16:45:14 +0800
Subject: [PATCH 02/32] update for test case

---
 src/bindings/c/tests/ov_core_test.cpp                  | 10 +++++-----
 src/inference/include/openvino/runtime/properties.hpp  |  8 ++++----
 .../tests/functional/custom/behavior/export_import.cpp |  2 +-
 .../shared/src/behavior/ov_plugin/properties_tests.cpp |  6 +++---
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp
index 8b5d5845475512..3b55d8f7cfa2fb 100644
--- a/src/bindings/c/tests/ov_core_test.cpp
+++ b/src/bindings/c/tests/ov_core_test.cpp
@@ -298,12 +298,12 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) {
     EXPECT_STREQ(val_type, ret);
     ov_free(ret);
 
-    const char* key_type = ov_property_key_hint_max_threads_per_stream;
-    const char* val_type = "PER_PLATFORM";
-    OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_type, val_type));
+    const char* key_mode = ov_property_key_hint_max_threads_per_stream;
+    const char* val_mode = "PER_PLATFORM";
+    OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_mode, val_mode));
     ret = nullptr;
-    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_type, &ret));
-    EXPECT_STREQ(val_type, ret);
+    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_mode, &ret));
+    EXPECT_STREQ(val_mode, ret);
     ov_free(ret);
 
     OV_EXPECT_NOT_OK(ov_core_set_property(core, device_name.c_str(), key_type, invalid_val));
diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp
index b8496a0cfae093..93dede583be51f 100644
--- a/src/inference/include/openvino/runtime/properties.hpp
+++ b/src/inference/include/openvino/runtime/properties.hpp
@@ -384,9 +384,9 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type)
 /** @endcond */
 
 enum class MaxThreadsPerStream {
-    AUTO,          //!<  Using all threads per platform for one stream. Will create sub stream on dual socket platform.
-    PER_PLATFORM,  //!<  Using all threads per platform for one stream even on dual socket platform.
-    PER_SOCKET,    //!<  Using all threads per socket for one stream on dual socket platform.
+    AUTO = 0,          //!<  Using all threads per platform for one stream. Will create sub stream on dual socket platform.
+    PER_PLATFORM = 1,  //!<  Using all threads per platform for one stream even on dual socket platform.
+    PER_SOCKET = 2,    //!<  Using all threads per socket for one stream on dual socket platform.
 };
 
 /** @cond INTERNAL */
@@ -448,7 +448,7 @@ static constexpr Property<SchedulingCoreType> scheduling_core_type{"SCHEDULING_C
  * ie.set_property(ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET));
  * @endcode
  */
-static constexpr Property<SchedulingCoreType> max_threads_per_stream{"MAX_THREADS_PER_STREAM"};
+static constexpr Property<MaxThreadsPerStream> max_threads_per_stream{"MAX_THREADS_PER_STREAM"};
 
 /**
  * @brief This property allows CPU pinning during inference.
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp
index 29a5194cf22a8f..2289a16b6d4d59 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp
@@ -124,7 +124,7 @@ const std::vector<ov::AnyMap> testing_property_for_max_threads_per_stream_2 = {
     {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_PLATFORM)},
     {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET)}};
 
-const std::vector<ov::AnyMap> testing_property_for_max_threads_per_stream = {
+const std::vector<ov::AnyMap> testing_property_for_max_threads_per_stream_3 = {
     {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::AUTO)},
     {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET)}};
 
diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
index 5b59900d37c705..f6dbf8497b90ed 100644
--- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
+++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
@@ -333,9 +333,9 @@ std::vector<ov::AnyMap> OVPropertiesTestsWithCompileModelProps::getRWOptionalPro
     }
 
     if (props.empty() || std::find(props.begin(), props.end(), ov::hint::max_threads_per_stream.name()) != props.end()) {
-        ov::hint::SchedulingCoreType maxThreadsPerStreams[] = {ov::hint::MaxThreadsPerStream::AUTO,
-                                                               ov::hint::MaxThreadsPerStream::PER_PLATFORM,
-                                                               ov::hint::MaxThreadsPerStream::PER_SOCKET};
+        ov::hint::MaxThreadsPerStream maxThreadsPerStreams[] = {ov::hint::MaxThreadsPerStream::AUTO,
+                                                                ov::hint::MaxThreadsPerStream::PER_PLATFORM,
+                                                                ov::hint::MaxThreadsPerStream::PER_SOCKET};
         for (auto& maxThreadsPerStream : maxThreadsPerStreams) {
             res.push_back({ov::hint::max_threads_per_stream(maxThreadsPerStream)});
         }

From 14c3f27b25b3d055201c49a24de699b4c22d41cf Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Mon, 26 Feb 2024 17:10:45 +0800
Subject: [PATCH 03/32] update for comments

---
 .../include/openvino/runtime/properties.hpp          | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp
index 93dede583be51f..5881fd6ca227ab 100644
--- a/src/inference/include/openvino/runtime/properties.hpp
+++ b/src/inference/include/openvino/runtime/properties.hpp
@@ -384,7 +384,7 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type)
 /** @endcond */
 
 enum class MaxThreadsPerStream {
-    AUTO = 0,          //!<  Using all threads per platform for one stream. Will create sub stream on dual socket platform.
+    AUTO = 0,  //!<  Using all threads per platform for one stream. Will create sub stream on dual socket platform.
     PER_PLATFORM = 1,  //!<  Using all threads per platform for one stream even on dual socket platform.
     PER_SOCKET = 2,    //!<  Using all threads per socket for one stream on dual socket platform.
 };
@@ -439,10 +439,14 @@ static constexpr Property<SchedulingCoreType> scheduling_core_type{"SCHEDULING_C
  * @brief This property defines max threads per stream used for CPU inference.
  * @ingroup ov_runtime_cpp_prop_api
  *
- * Developer can use this property to select max threads per stream for CPU inference. Please refer MaxThreadsPerStream
- * for all definition of types.
+ * Developer can use this property to select max threads of stream in latency mode for CPU inference on two socket
+ * platform.
+ * -- AUTO mode         : Will create main stream on one socket and sub stream on the other socket. Some node will only
+ * main stream and some node will use both main stream and sub stream.
+ * -- PER_PLATFORM mode : Will create one stream on both sockets
+ * -- PER_SOCKET mode   : Will create one stream on single socket
  *
- * The following code is an example to only use all threads per socket for one stream on dual sockets platform.
+ * The following code is an example to only use all threads of one socket for one stream on dual sockets platform.
  *
  * @code
  * ie.set_property(ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET));

From 5724b7704cfcbf4eeb5fd3e3efcc91204a50f603 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Mon, 26 Feb 2024 22:20:00 +0800
Subject: [PATCH 04/32] update for python

---
 src/bindings/python/src/openvino/properties/hint/__init__.py     | 1 +
 .../python/src/openvino/runtime/properties/hint/__init__.py      | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/bindings/python/src/openvino/properties/hint/__init__.py b/src/bindings/python/src/openvino/properties/hint/__init__.py
index 5ff211301f9c74..4a9e320b18ac61 100644
--- a/src/bindings/python/src/openvino/properties/hint/__init__.py
+++ b/src/bindings/python/src/openvino/properties/hint/__init__.py
@@ -5,6 +5,7 @@
 # Enums
 from openvino._pyopenvino.properties.hint import Priority
 from openvino._pyopenvino.properties.hint import SchedulingCoreType
+from openvino._pyopenvino.properties.hint import MaxThreadsPerStream
 from openvino._pyopenvino.properties.hint import ExecutionMode
 from openvino._pyopenvino.properties.hint import PerformanceMode
 
diff --git a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py
index b32b51ce6482b0..db6d304a383ec9 100644
--- a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py
+++ b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py
@@ -5,6 +5,7 @@
 # Enums
 from openvino._pyopenvino.properties.hint import Priority
 from openvino._pyopenvino.properties.hint import SchedulingCoreType
+from openvino._pyopenvino.properties.hint import MaxThreadsPerStream
 from openvino._pyopenvino.properties.hint import ExecutionMode
 from openvino._pyopenvino.properties.hint import PerformanceMode
 

From 65c312b7eb9205c11c916234d5913876bd77280d Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Mon, 26 Feb 2024 22:41:00 +0800
Subject: [PATCH 05/32] update for python

---
 .../python/src/pyopenvino/core/properties/properties.cpp     | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
index abae0d3dac248f..2c53ba1b78d43f 100644
--- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
+++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
@@ -71,6 +71,11 @@ void regmodule_properties(py::module m) {
         .value("PCORE_ONLY", ov::hint::SchedulingCoreType::PCORE_ONLY)
         .value("ECORE_ONLY", ov::hint::SchedulingCoreType::ECORE_ONLY);
 
+    py::enum_<ov::hint::MaxThreadsPerStream>(m_hint, "MaxThreadsPerStream", py::arithmetic())
+        .value("AUTO", ov::hint::MaxThreadsPerStream::AUTO)
+        .value("PER_PLATFORM", ov::hint::MaxThreadsPerStream::PER_PLATFORM)
+        .value("PER_SOCKET", ov::hint::MaxThreadsPerStream::PER_SOCKET);
+
     py::enum_<ov::hint::ExecutionMode>(m_hint, "ExecutionMode", py::arithmetic())
         .value("PERFORMANCE", ov::hint::ExecutionMode::PERFORMANCE)
         .value("ACCURACY", ov::hint::ExecutionMode::ACCURACY);

From d5f43a0b78ec924fac71d75181e1e24f02031531 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Mon, 26 Feb 2024 23:08:54 +0800
Subject: [PATCH 06/32] update for python

---
 src/bindings/python/tests/test_runtime/test_properties.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py
index ebfd9f6c8dedc7..b21586c13922e7 100644
--- a/src/bindings/python/tests/test_runtime/test_properties.py
+++ b/src/bindings/python/tests/test_runtime/test_properties.py
@@ -86,6 +86,14 @@ def test_properties_rw_base():
                 (hints.SchedulingCoreType.ECORE_ONLY, "SchedulingCoreType.ECORE_ONLY", 2),
             ),
         ),
+        (
+            hints.MaxThreadsPerStream,
+            (
+                (hints.MaxThreadsPerStream.AUTO, "MaxThreadsPerStream.AUTO", 0),
+                (hints.MaxThreadsPerStream.PER_PLATFORM, "MaxThreadsPerStream.PER_PLATFORM", 1),
+                (hints.MaxThreadsPerStream.PER_SOCKET, "MaxThreadsPerStream.PER_SOCKET", 2),
+            ),
+        ),
         (
             hints.ExecutionMode,
             (

From 0b09543d198bfc972f500cf37d0f90047624fa8b Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Mon, 26 Feb 2024 23:36:52 +0800
Subject: [PATCH 07/32] update for python

---
 src/bindings/python/src/pyopenvino/utils/utils.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/bindings/python/src/pyopenvino/utils/utils.cpp b/src/bindings/python/src/pyopenvino/utils/utils.cpp
index 87f6c36576a1ca..3a8f4228a562e3 100644
--- a/src/bindings/python/src/pyopenvino/utils/utils.cpp
+++ b/src/bindings/python/src/pyopenvino/utils/utils.cpp
@@ -176,6 +176,8 @@ py::object from_ov_any(const ov::Any& any) {
         return py::cast(any.as<ov::intel_auto::SchedulePolicy>());
     } else if (any.is<ov::hint::SchedulingCoreType>()) {
         return py::cast(any.as<ov::hint::SchedulingCoreType>());
+    } else if (any.is<ov::hint::MaxThreadsPerStream>()) {
+        return py::cast(any.as<ov::hint::MaxThreadsPerStream>());
     } else if (any.is<ov::hint::ExecutionMode>()) {
         return py::cast(any.as<ov::hint::ExecutionMode>());
     } else if (any.is<ov::log::Level>()) {
@@ -373,6 +375,8 @@ ov::Any py_object_to_any(const py::object& py_obj) {
         return py::cast<ov::intel_auto::SchedulePolicy>(py_obj);
     } else if (py::isinstance<ov::hint::SchedulingCoreType>(py_obj)) {
         return py::cast<ov::hint::SchedulingCoreType>(py_obj);
+    } else if (py::isinstance<ov::hint::MaxThreadsPerStream>(py_obj)) {
+        return py::cast<ov::hint::MaxThreadsPerStream>(py_obj);
     } else if (py::isinstance<ov::log::Level>(py_obj)) {
         return py::cast<ov::log::Level>(py_obj);
     } else if (py::isinstance<ov::device::Type>(py_obj)) {

From fe5173d83a8cabfb78907c73d505e6e823e96efb Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Wed, 13 Mar 2024 15:48:29 +0800
Subject: [PATCH 08/32] change default value to PER_SOCKET

---
 src/plugins/intel_cpu/src/config.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h
index 8a97682507785f..6b27a207c4c9b6 100644
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@@ -76,7 +76,7 @@ struct Config {
     bool enableCpuPinning = true;
     bool changedCpuPinning = false;
     ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE;
-    ov::hint::MaxThreadsPerStream maxThreadsPerStream = ov::hint::MaxThreadsPerStream::AUTO;
+    ov::hint::MaxThreadsPerStream maxThreadsPerStream = ov::hint::MaxThreadsPerStream::PER_SOCKET;
     bool enableHyperThreading = true;
     bool changedHyperThreading = false;
     Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET;

From 19c1ed2a8b1b1cfb252b105d466d63894647ccaa Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Sun, 17 Mar 2024 17:21:12 +0800
Subject: [PATCH 09/32] update property name and value

---
 src/bindings/c/docs/api_overview.md           |  2 +-
 .../c/include/openvino/c/ov_property.h        |  2 +-
 src/bindings/c/src/ov_property.cpp            |  2 +-
 src/bindings/c/tests/ov_core_test.cpp         |  4 +-
 .../src/openvino/properties/hint/__init__.py  |  2 +-
 .../runtime/properties/hint/__init__.py       |  4 +-
 .../pyopenvino/core/properties/properties.cpp | 12 ++--
 .../python/src/pyopenvino/utils/utils.cpp     |  8 +--
 .../tests/test_runtime/test_properties.py     | 16 +++--
 .../include/openvino/runtime/properties.hpp   | 69 +++++++++++--------
 src/plugins/intel_cpu/src/compiled_model.cpp  |  6 +-
 src/plugins/intel_cpu/src/config.cpp          | 33 ++++++---
 src/plugins/intel_cpu/src/config.h            |  2 +-
 src/plugins/intel_cpu/src/plugin.cpp          |  6 +-
 .../custom/behavior/export_import.cpp         | 24 +++----
 .../ov_executable_network/properties.cpp      |  2 +-
 .../custom/behavior/ov_plugin/properties.cpp  |  2 +-
 .../behavior/ov_plugin/properties_tests.cpp   | 13 ++--
 18 files changed, 120 insertions(+), 89 deletions(-)

diff --git a/src/bindings/c/docs/api_overview.md b/src/bindings/c/docs/api_overview.md
index 8c7debee7e3bd1..447b6b069b21a6 100644
--- a/src/bindings/c/docs/api_overview.md
+++ b/src/bindings/c/docs/api_overview.md
@@ -309,7 +309,7 @@ OPENVINO_C_VAR(const char*) ov_property_key_affinity;
 
 OPENVINO_C_VAR(const char*) ov_property_key_inference_num_threads;
 
-OPENVINO_C_VAR(const char*) ov_property_key_hint_max_threads_per_stream;
+OPENVINO_C_VAR(const char*) ov_property_key_hint_llm_distribution_policy;
 
 OPENVINO_C_VAR(const char*) ov_property_key_hint_enable_cpu_pinning;
 
diff --git a/src/bindings/c/include/openvino/c/ov_property.h b/src/bindings/c/include/openvino/c/ov_property.h
index f3165d242f22fc..b23ca9b68f4c33 100644
--- a/src/bindings/c/include/openvino/c/ov_property.h
+++ b/src/bindings/c/include/openvino/c/ov_property.h
@@ -133,7 +133,7 @@ ov_property_key_inference_num_threads;
  * @ingroup ov_property_c_api
  */
 OPENVINO_C_VAR(const char*)
-ov_property_key_hint_max_threads_per_stream;
+ov_property_key_hint_llm_distribution_policy;
 
 /**
  * @brief Read-write property, it is high-level OpenVINO hint for using CPU pinning to bind CPU threads to processors
diff --git a/src/bindings/c/src/ov_property.cpp b/src/bindings/c/src/ov_property.cpp
index ffb6d9f90e105d..5ad1bd0c0b6999 100644
--- a/src/bindings/c/src/ov_property.cpp
+++ b/src/bindings/c/src/ov_property.cpp
@@ -23,7 +23,7 @@ const char* ov_property_key_cache_mode = "CACHE_MODE";
 const char* ov_property_key_num_streams = "NUM_STREAMS";
 const char* ov_property_key_affinity = "AFFINITY";
 const char* ov_property_key_inference_num_threads = "INFERENCE_NUM_THREADS";
-const char* ov_property_key_hint_max_threads_per_stream = "MAX_THREADS_PER_STREAM";
+const char* ov_property_key_hint_llm_distribution_policy = "LLM_DISTRIBUTION_POLICY";
 const char* ov_property_key_hint_performance_mode = "PERFORMANCE_HINT";
 const char* ov_property_key_hint_enable_cpu_pinning = "ENABLE_CPU_PINNING";
 const char* ov_property_key_hint_scheduling_core_type = "SCHEDULING_CORE_TYPE";
diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp
index 3b55d8f7cfa2fb..9069dea86b5fdf 100644
--- a/src/bindings/c/tests/ov_core_test.cpp
+++ b/src/bindings/c/tests/ov_core_test.cpp
@@ -298,8 +298,8 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) {
     EXPECT_STREQ(val_type, ret);
     ov_free(ret);
 
-    const char* key_mode = ov_property_key_hint_max_threads_per_stream;
-    const char* val_mode = "PER_PLATFORM";
+    const char* key_mode = ov_property_key_hint_llm_distribution_policy;
+    const char* val_mode = "ENTIRE_PLATFORM";
     OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_mode, val_mode));
     ret = nullptr;
     OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_mode, &ret));
diff --git a/src/bindings/python/src/openvino/properties/hint/__init__.py b/src/bindings/python/src/openvino/properties/hint/__init__.py
index 4a9e320b18ac61..d014ac0dfec37d 100644
--- a/src/bindings/python/src/openvino/properties/hint/__init__.py
+++ b/src/bindings/python/src/openvino/properties/hint/__init__.py
@@ -5,7 +5,7 @@
 # Enums
 from openvino._pyopenvino.properties.hint import Priority
 from openvino._pyopenvino.properties.hint import SchedulingCoreType
-from openvino._pyopenvino.properties.hint import MaxThreadsPerStream
+from openvino._pyopenvino.properties.hint import LlmDistributionPolicy
 from openvino._pyopenvino.properties.hint import ExecutionMode
 from openvino._pyopenvino.properties.hint import PerformanceMode
 
diff --git a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py
index db6d304a383ec9..471ec63a8e675b 100644
--- a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py
+++ b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py
@@ -5,7 +5,7 @@
 # Enums
 from openvino._pyopenvino.properties.hint import Priority
 from openvino._pyopenvino.properties.hint import SchedulingCoreType
-from openvino._pyopenvino.properties.hint import MaxThreadsPerStream
+from openvino._pyopenvino.properties.hint import LlmDistributionPolicy
 from openvino._pyopenvino.properties.hint import ExecutionMode
 from openvino._pyopenvino.properties.hint import PerformanceMode
 
@@ -15,7 +15,7 @@
 from openvino._pyopenvino.properties.hint import performance_mode
 from openvino._pyopenvino.properties.hint import enable_cpu_pinning
 from openvino._pyopenvino.properties.hint import scheduling_core_type
-from openvino._pyopenvino.properties.hint import max_threads_per_stream
+from openvino._pyopenvino.properties.hint import llm_distribution_policy
 from openvino._pyopenvino.properties.hint import enable_hyper_threading
 from openvino._pyopenvino.properties.hint import execution_mode
 from openvino._pyopenvino.properties.hint import num_requests
diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
index 2c53ba1b78d43f..20a35829afd399 100644
--- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
+++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
@@ -71,10 +71,12 @@ void regmodule_properties(py::module m) {
         .value("PCORE_ONLY", ov::hint::SchedulingCoreType::PCORE_ONLY)
         .value("ECORE_ONLY", ov::hint::SchedulingCoreType::ECORE_ONLY);
 
-    py::enum_<ov::hint::MaxThreadsPerStream>(m_hint, "MaxThreadsPerStream", py::arithmetic())
-        .value("AUTO", ov::hint::MaxThreadsPerStream::AUTO)
-        .value("PER_PLATFORM", ov::hint::MaxThreadsPerStream::PER_PLATFORM)
-        .value("PER_SOCKET", ov::hint::MaxThreadsPerStream::PER_SOCKET);
+    py::enum_<ov::hint::LlmDistributionPolicy>(m_hint, "LlmDistributionPolicy", py::arithmetic())
+        .value("TENSOR_PARTITION", ov::hint::LlmDistributionPolicy::TENSOR_PARTITION)
+        .value("DATA_PARTITION", ov::hint::LlmDistributionPolicy::DATA_PARTITION)
+        .value("PIPELINE_PARTITION", ov::hint::LlmDistributionPolicy::PIPELINE_PARTITION)
+        .value("ENTIRE_PLATFORM", ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM)
+        .value("SINGLE_DEVICE", ov::hint::LlmDistributionPolicy::SINGLE_DEVICE);
 
     py::enum_<ov::hint::ExecutionMode>(m_hint, "ExecutionMode", py::arithmetic())
         .value("PERFORMANCE", ov::hint::ExecutionMode::PERFORMANCE)
@@ -86,7 +88,7 @@ void regmodule_properties(py::module m) {
     wrap_property_RW(m_hint, ov::hint::performance_mode, "performance_mode");
     wrap_property_RW(m_hint, ov::hint::enable_cpu_pinning, "enable_cpu_pinning");
     wrap_property_RW(m_hint, ov::hint::scheduling_core_type, "scheduling_core_type");
-    wrap_property_RW(m_hint, ov::hint::max_threads_per_stream, "max_threads_per_stream");
+    wrap_property_RW(m_hint, ov::hint::llm_distribution_policy, "llm_distribution_policy");
     wrap_property_RW(m_hint, ov::hint::enable_hyper_threading, "enable_hyper_threading");
     wrap_property_RW(m_hint, ov::hint::execution_mode, "execution_mode");
     wrap_property_RW(m_hint, ov::hint::num_requests, "num_requests");
diff --git a/src/bindings/python/src/pyopenvino/utils/utils.cpp b/src/bindings/python/src/pyopenvino/utils/utils.cpp
index e9a1396e8ece66..62c7b5cf744c50 100644
--- a/src/bindings/python/src/pyopenvino/utils/utils.cpp
+++ b/src/bindings/python/src/pyopenvino/utils/utils.cpp
@@ -176,8 +176,8 @@ py::object from_ov_any(const ov::Any& any) {
         return py::cast(any.as<ov::intel_auto::SchedulePolicy>());
     } else if (any.is<ov::hint::SchedulingCoreType>()) {
         return py::cast(any.as<ov::hint::SchedulingCoreType>());
-    } else if (any.is<ov::hint::MaxThreadsPerStream>()) {
-        return py::cast(any.as<ov::hint::MaxThreadsPerStream>());
+    } else if (any.is<ov::hint::LlmDistributionPolicy>()) {
+        return py::cast(any.as<ov::hint::LlmDistributionPolicy>());
     } else if (any.is<ov::hint::ExecutionMode>()) {
         return py::cast(any.as<ov::hint::ExecutionMode>());
     } else if (any.is<ov::log::Level>()) {
@@ -375,8 +375,8 @@ ov::Any py_object_to_any(const py::object& py_obj) {
         return py::cast<ov::intel_auto::SchedulePolicy>(py_obj);
     } else if (py::isinstance<ov::hint::SchedulingCoreType>(py_obj)) {
         return py::cast<ov::hint::SchedulingCoreType>(py_obj);
-    } else if (py::isinstance<ov::hint::MaxThreadsPerStream>(py_obj)) {
-        return py::cast<ov::hint::MaxThreadsPerStream>(py_obj);
+    } else if (py::isinstance<ov::hint::LlmDistributionPolicy>(py_obj)) {
+        return py::cast<ov::hint::LlmDistributionPolicy>(py_obj);
     } else if (py::isinstance<ov::hint::ExecutionMode>(py_obj)) {
         return py::cast<ov::hint::ExecutionMode>(py_obj);
     } else if (py::isinstance<ov::log::Level>(py_obj)) {
diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py
index 8fe93eae0077af..364eef51ada79e 100644
--- a/src/bindings/python/tests/test_runtime/test_properties.py
+++ b/src/bindings/python/tests/test_runtime/test_properties.py
@@ -87,11 +87,13 @@ def test_properties_rw_base():
             ),
         ),
         (
-            hints.MaxThreadsPerStream,
+            hints.LlmDistributionPolicy,
             (
-                (hints.MaxThreadsPerStream.AUTO, "MaxThreadsPerStream.AUTO", 0),
-                (hints.MaxThreadsPerStream.PER_PLATFORM, "MaxThreadsPerStream.PER_PLATFORM", 1),
-                (hints.MaxThreadsPerStream.PER_SOCKET, "MaxThreadsPerStream.PER_SOCKET", 2),
+                (hints.LlmDistributionPolicy.TENSOR_PARTITION, "LlmDistributionPolicy.TENSOR_PARTITION", 0),
+                (hints.LlmDistributionPolicy.DATA_PARTITION, "LlmDistributionPolicy.DATA_PARTITION", 1),
+                (hints.LlmDistributionPolicy.PIPELINE_PARTITION, "LlmDistributionPolicy.PIPELINE_PARTITION", 2),
+                (hints.LlmDistributionPolicy.ENTIRE_PLATFORM, "LlmDistributionPolicy.ENTIRE_PLATFORM", 3),
+                (hints.LlmDistributionPolicy.SINGLE_DEVICE, "LlmDistributionPolicy.SINGLE_DEVICE", 4),
             ),
         ),
         (
@@ -288,9 +290,9 @@ def test_properties_ro(ov_property_ro, expected_value):
             ((hints.SchedulingCoreType.PCORE_ONLY, hints.SchedulingCoreType.PCORE_ONLY),),
         ),
         (
-            hints.max_threads_per_stream,
-            "MAX_THREADS_PER_STREAM",
-            ((hints.MaxThreadsPerStream.PER_PLATFORM, hints.MaxThreadsPerStream.PER_PLATFORM),),
+            hints.llm_distribution_policy,
+            "LLM_DISTRIBUTION_POLICY",
+            ((hints.LlmDistributionPolicy.ENTIRE_PLATFORM, hints.LlmDistributionPolicy.ENTIRE_PLATFORM),),
         ),
         (
             hints.enable_hyper_threading,
diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp
index 5881fd6ca227ab..3303be4f1f3a51 100644
--- a/src/inference/include/openvino/runtime/properties.hpp
+++ b/src/inference/include/openvino/runtime/properties.hpp
@@ -383,37 +383,47 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type)
 }
 /** @endcond */
 
-enum class MaxThreadsPerStream {
-    AUTO = 0,  //!<  Using all threads per platform for one stream. Will create sub stream on dual socket platform.
-    PER_PLATFORM = 1,  //!<  Using all threads per platform for one stream even on dual socket platform.
-    PER_SOCKET = 2,    //!<  Using all threads per socket for one stream on dual socket platform.
+enum class LlmDistributionPolicy {
+    TENSOR_PARTITION = 0,    // Split one node or subgraph into parts and run one part per socket/device in parallel.
+    DATA_PARTITION = 1,      // Split one batch input into parts and run one part per socket/device in parallel.
+    PIPELINE_PARTITION = 2,  // Split one model into parts and run each socket/device in parallel as a pipeline.
+    ENTIRE_PLATFORM = 3,     // Run one model on the entire platform with all sockets/devices.
+    SINGLE_DEVICE = 4,       // Run one model on single socket/device.
 };
 
 /** @cond INTERNAL */
-inline std::ostream& operator<<(std::ostream& os, const MaxThreadsPerStream& stream_mode) {
+inline std::ostream& operator<<(std::ostream& os, const LlmDistributionPolicy& stream_mode) {
     switch (stream_mode) {
-    case MaxThreadsPerStream::AUTO:
-        return os << "AUTO";
-    case MaxThreadsPerStream::PER_PLATFORM:
-        return os << "PER_PLATFORM";
-    case MaxThreadsPerStream::PER_SOCKET:
-        return os << "PER_SOCKET";
+    case LlmDistributionPolicy::TENSOR_PARTITION:
+        return os << "TENSOR_PARTITION";
+    case LlmDistributionPolicy::DATA_PARTITION:
+        return os << "DATA_PARTITION";
+    case LlmDistributionPolicy::PIPELINE_PARTITION:
+        return os << "PIPELINE_PARTITION";
+    case LlmDistributionPolicy::ENTIRE_PLATFORM:
+        return os << "ENTIRE_PLATFORM";
+    case LlmDistributionPolicy::SINGLE_DEVICE:
+        return os << "SINGLE_DEVICE";
     default:
-        OPENVINO_THROW("Unsupported mode!");
+        OPENVINO_THROW("Unsupported LLM distribution policy!");
     }
 }
 
-inline std::istream& operator>>(std::istream& is, MaxThreadsPerStream& stream_mode) {
+inline std::istream& operator>>(std::istream& is, LlmDistributionPolicy& stream_mode) {
     std::string str;
     is >> str;
-    if (str == "AUTO") {
-        stream_mode = MaxThreadsPerStream::AUTO;
-    } else if (str == "PER_PLATFORM") {
-        stream_mode = MaxThreadsPerStream::PER_PLATFORM;
-    } else if (str == "PER_SOCKET") {
-        stream_mode = MaxThreadsPerStream::PER_SOCKET;
+    if (str == "TENSOR_PARTITION") {
+        stream_mode = LlmDistributionPolicy::TENSOR_PARTITION;
+    } else if (str == "DATA_PARTITION") {
+        stream_mode = LlmDistributionPolicy::DATA_PARTITION;
+    } else if (str == "PIPELINE_PARTITION") {
+        stream_mode = LlmDistributionPolicy::PIPELINE_PARTITION;
+    } else if (str == "ENTIRE_PLATFORM") {
+        stream_mode = LlmDistributionPolicy::ENTIRE_PLATFORM;
+    } else if (str == "SINGLE_DEVICE") {
+        stream_mode = LlmDistributionPolicy::SINGLE_DEVICE;
     } else {
-        OPENVINO_THROW("Unsupported mode: ", str);
+        OPENVINO_THROW("Unsupported LLM distribution policy: ", str);
     }
     return is;
 }
@@ -436,23 +446,24 @@ inline std::istream& operator>>(std::istream& is, MaxThreadsPerStream& stream_mo
 static constexpr Property<SchedulingCoreType> scheduling_core_type{"SCHEDULING_CORE_TYPE"};
 
 /**
- * @brief This property defines max threads per stream used for CPU inference.
+ * @brief This property defines distribution policy for Large language models (LLM).
  * @ingroup ov_runtime_cpp_prop_api
  *
- * Developer can use this property to select max threads of stream in latency mode for CPU inference on two socket
- * platform.
- * -- AUTO mode         : Will create main stream on one socket and sub stream on the other socket. Some node will only
- * main stream and some node will use both main stream and sub stream.
- * -- PER_PLATFORM mode : Will create one stream on both sockets
- * -- PER_SOCKET mode   : Will create one stream on single socket
+ * Developer can use this property to select LLM distribution policy for CPU inference with multiple sockets platform or
+ * GPU inference with multiple GPU devices.
+ * -- TENSOR_PARTITION   : Split one node or subgraph into parts and run one part per socket/device in parallel.
+ * -- DATA_PARTITION     : Split one batch input into parts and run one part per socket/device in parallel.
+ * -- PIPELINE_PARTITION : Split one model into parts and run each socket/device in parallel as a pipeline.
+ * -- ENTIRE_PLATFORM    : Run one model on the entire platform with all sockets/devices.
+ * -- SINGLE_DEVICE      : Run one model on single socket/device.
  *
  * The following code is an example to only use all threads of one socket for one stream on dual sockets platform.
  *
  * @code
- * ie.set_property(ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET));
+ * ie.set_property(ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::SINGLE_DEVICE));
  * @endcode
  */
-static constexpr Property<MaxThreadsPerStream> max_threads_per_stream{"MAX_THREADS_PER_STREAM"};
+static constexpr Property<LlmDistributionPolicy> llm_distribution_policy{"LLM_DISTRIBUTION_POLICY"};
 
 /**
  * @brief This property allows CPU pinning during inference.
diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp
index 377294a7ae3577..ff01280d101f9b 100644
--- a/src/plugins/intel_cpu/src/compiled_model.cpp
+++ b/src/plugins/intel_cpu/src/compiled_model.cpp
@@ -193,7 +193,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
             RO_property(ov::hint::num_requests.name()),
             RO_property(ov::hint::enable_cpu_pinning.name()),
             RO_property(ov::hint::scheduling_core_type.name()),
-            RO_property(ov::hint::max_threads_per_stream.name()),
+            RO_property(ov::hint::llm_distribution_policy.name()),
             RO_property(ov::hint::enable_hyper_threading.name()),
             RO_property(ov::execution_devices.name()),
             RO_property(ov::intel_cpu::denormals_optimization.name()),
@@ -249,8 +249,8 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
     } else if (name == ov::hint::scheduling_core_type) {
         const auto stream_mode = config.schedulingCoreType;
         return stream_mode;
-    } else if (name == ov::hint::max_threads_per_stream) {
-        const auto core_type = config.maxThreadsPerStream;
+    } else if (name == ov::hint::llm_distribution_policy) {
+        const auto core_type = config.llmDistributionPolicy;
         return core_type;
     } else if (name == ov::hint::enable_hyper_threading.name()) {
         const bool use_ht = config.enableHyperThreading;
diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index 94e98e32f642bb..eeca1ce38fe390 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -191,20 +191,35 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
                                '/',
                                ov::hint::SchedulingCoreType::ECORE_ONLY);
             }
-        } else if (key == ov::hint::max_threads_per_stream.name()) {
-            try {
-                maxThreadsPerStream = val.as<ov::hint::MaxThreadsPerStream>();
-            } catch (ov::Exception&) {
+        } else if (key == ov::hint::llm_distribution_policy.name()) {
+            auto error_info = [&]() {
                 OPENVINO_THROW("Wrong value ",
                                val.as<std::string>(),
                                "for property key ",
-                               ov::hint::max_threads_per_stream.name(),
-                               ". Expected only ",
-                               ov::hint::MaxThreadsPerStream::AUTO,
+                               ov::hint::llm_distribution_policy.name(),
+                               ". CPU plugin only support ",
+                               ov::hint::LlmDistributionPolicy::TENSOR_PARTITION,
                                '/',
-                               ov::hint::MaxThreadsPerStream::PER_PLATFORM,
+                               ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM,
                                '/',
-                               ov::hint::MaxThreadsPerStream::PER_SOCKET);
+                               ov::hint::LlmDistributionPolicy::SINGLE_DEVICE);
+            };
+
+            ov::hint::LlmDistributionPolicy llm_policy = ov::hint::LlmDistributionPolicy::PIPELINE_PARTITION;
+            try {
+                llm_policy = val.as<ov::hint::LlmDistributionPolicy>();
+            } catch (ov::Exception&) {
+                error_info();
+            }
+
+            switch (llm_policy) {
+            case ov::hint::LlmDistributionPolicy::TENSOR_PARTITION:
+            case ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM:
+            case ov::hint::LlmDistributionPolicy::SINGLE_DEVICE:
+                llmDistributionPolicy = llm_policy;
+                break;
+            default:
+                error_info();
             }
         } else if (key == ov::hint::enable_hyper_threading.name()) {
             try {
diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h
index 6b27a207c4c9b6..cfda310db02dfa 100644
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@@ -76,7 +76,7 @@ struct Config {
     bool enableCpuPinning = true;
     bool changedCpuPinning = false;
     ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE;
-    ov::hint::MaxThreadsPerStream maxThreadsPerStream = ov::hint::MaxThreadsPerStream::PER_SOCKET;
+    ov::hint::LlmDistributionPolicy llmDistributionPolicy = ov::hint::LlmDistributionPolicy::SINGLE_DEVICE;
     bool enableHyperThreading = true;
     bool changedHyperThreading = false;
     Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET;
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index e1e9c2f509389e..3a0ce346fdd06b 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -409,8 +409,8 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options)
     } else if (name == ov::hint::scheduling_core_type) {
         const auto core_type = engConfig.schedulingCoreType;
         return core_type;
-    } else if (name == ov::hint::max_threads_per_stream) {
-        const auto stream_mode = engConfig.maxThreadsPerStream;
+    } else if (name == ov::hint::llm_distribution_policy) {
+        const auto stream_mode = engConfig.llmDistributionPolicy;
         return stream_mode;
     } else if (name == ov::hint::enable_hyper_threading) {
         const bool ht_value = engConfig.enableHyperThreading;
@@ -484,7 +484,7 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio
             RW_property(ov::hint::num_requests.name()),
             RW_property(ov::hint::enable_cpu_pinning.name()),
             RW_property(ov::hint::scheduling_core_type.name()),
-            RW_property(ov::hint::max_threads_per_stream.name()),
+            RW_property(ov::hint::llm_distribution_policy.name()),
             RW_property(ov::hint::enable_hyper_threading.name()),
             RW_property(ov::device::id.name()),
             RW_property(ov::intel_cpu::denormals_optimization.name()),
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp
index 2289a16b6d4d59..32398acb7e9cae 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp
@@ -116,17 +116,17 @@ const std::vector<ov::AnyMap> testing_property_for_scheduling_core_type_3 = {
     {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ANY_CORE)},
     {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)}};
 
-const std::vector<ov::AnyMap> testing_property_for_max_threads_per_stream_1 = {
-    {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::AUTO)},
-    {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_PLATFORM)}};
+const std::vector<ov::AnyMap> testing_property_for_llm_distribution_policy_1 = {
+    {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::TENSOR_PARTITION)},
+    {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM)}};
 
-const std::vector<ov::AnyMap> testing_property_for_max_threads_per_stream_2 = {
-    {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_PLATFORM)},
-    {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET)}};
+const std::vector<ov::AnyMap> testing_property_for_llm_distribution_policy_2 = {
+    {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM)},
+    {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::SINGLE_DEVICE)}};
 
-const std::vector<ov::AnyMap> testing_property_for_max_threads_per_stream_3 = {
-    {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::AUTO)},
-    {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET)}};
+const std::vector<ov::AnyMap> testing_property_for_llm_distribution_policy_3 = {
+    {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::SINGLE_DEVICE)},
+    {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::TENSOR_PARTITION)}};
 
 const std::vector<ov::AnyMap> testing_property_for_enable_hyper_threading = {{ov::hint::enable_hyper_threading(true)},
                                                                              {ov::hint::enable_hyper_threading(false)}};
@@ -143,9 +143,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_ExportImportTest,
                                                              testing_property_for_scheduling_core_type_1,
                                                              testing_property_for_scheduling_core_type_2,
                                                              testing_property_for_scheduling_core_type_3,
-                                                             testing_property_for_max_threads_per_stream_1,
-                                                             testing_property_for_max_threads_per_stream_2,
-                                                             testing_property_for_max_threads_per_stream_3,
+                                                             testing_property_for_llm_distribution_policy_1,
+                                                             testing_property_for_llm_distribution_policy_2,
+                                                             testing_property_for_llm_distribution_policy_3,
                                                              testing_property_for_enable_hyper_threading,
                                                              testing_property_for_enable_cpu_pinning)));
 
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
index af054b1468bcda..aeedd2fbe25b9f 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
@@ -33,7 +33,7 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkSupportedPropertiesAreAvailable
         RO_property(ov::hint::num_requests.name()),
         RO_property(ov::hint::enable_cpu_pinning.name()),
         RO_property(ov::hint::scheduling_core_type.name()),
-        RO_property(ov::hint::max_threads_per_stream.name()),
+        RO_property(ov::hint::llm_distribution_policy.name()),
         RO_property(ov::hint::enable_hyper_threading.name()),
         RO_property(ov::execution_devices.name()),
         RO_property(ov::intel_cpu::denormals_optimization.name()),
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp
index 4088efc4c7110d..40fe41ca82d90e 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp
@@ -47,7 +47,7 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginAllSupportedPropertiesAreAvailable) {
         RW_property(ov::hint::num_requests.name()),
         RW_property(ov::hint::enable_cpu_pinning.name()),
         RW_property(ov::hint::scheduling_core_type.name()),
-        RW_property(ov::hint::max_threads_per_stream.name()),
+        RW_property(ov::hint::llm_distribution_policy.name()),
         RW_property(ov::hint::enable_hyper_threading.name()),
         RW_property(ov::device::id.name()),
         RW_property(ov::intel_cpu::denormals_optimization.name()),
diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
index d10f66d3bc15a1..caa75c97530eaf 100644
--- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
+++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
@@ -335,12 +335,13 @@ std::vector<ov::AnyMap> OVPropertiesTestsWithCompileModelProps::getRWOptionalPro
         }
     }
 
-    if (props.empty() || std::find(props.begin(), props.end(), ov::hint::max_threads_per_stream.name()) != props.end()) {
-        ov::hint::MaxThreadsPerStream maxThreadsPerStreams[] = {ov::hint::MaxThreadsPerStream::AUTO,
-                                                                ov::hint::MaxThreadsPerStream::PER_PLATFORM,
-                                                                ov::hint::MaxThreadsPerStream::PER_SOCKET};
-        for (auto& maxThreadsPerStream : maxThreadsPerStreams) {
-            res.push_back({ov::hint::max_threads_per_stream(maxThreadsPerStream)});
+    if (props.empty() ||
+        std::find(props.begin(), props.end(), ov::hint::llm_distribution_policy.name()) != props.end()) {
+        ov::hint::LlmDistributionPolicy llmDistributionPolicys[] = {ov::hint::LlmDistributionPolicy::TENSOR_PARTITION,
+                                                                    ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM,
+                                                                    ov::hint::LlmDistributionPolicy::SINGLE_DEVICE};
+        for (auto& llmDistributionPolicy : llmDistributionPolicys) {
+            res.push_back({ov::hint::llm_distribution_policy(llmDistributionPolicy)});
         }
     }
 

From 562b01aa79bf015a60874d06aecf6bcdd0828938 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Sun, 17 Mar 2024 21:02:49 +0800
Subject: [PATCH 10/32] update code style

---
 .../include/openvino/runtime/properties.hpp   | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp
index bf8225bb810d49..428276f77b7a05 100644
--- a/src/inference/include/openvino/runtime/properties.hpp
+++ b/src/inference/include/openvino/runtime/properties.hpp
@@ -383,6 +383,22 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type)
 }
 /** @endcond */
 
+/**
+ * @brief This property defines CPU core type which can be used during inference.
+ * @ingroup ov_runtime_cpp_prop_api
+ *
+ * Developer can use this property to select specific CPU cores for inference. Please refer SchedulingCoreType for
+ * all definition of core type.
+ *
+ * The following code is an example to only use efficient-cores for inference on hybrid CPU. If user sets this
+ * configuration on a platform with only performance-cores, CPU inference will still run on the performance-cores.
+ *
+ * @code
+ * ie.set_property(ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY));
+ * @endcode
+ */
+static constexpr Property<SchedulingCoreType> scheduling_core_type{"SCHEDULING_CORE_TYPE"};
+
 enum class LlmDistributionPolicy {
     TENSOR_PARTITION = 0,    // Split one node or subgraph into parts and run one part per socket/device in parallel.
     DATA_PARTITION = 1,      // Split one batch input into parts and run one part per socket/device in parallel.
@@ -429,22 +445,6 @@ inline std::istream& operator>>(std::istream& is, LlmDistributionPolicy& stream_
 }
 /** @endcond */
 
-/**
- * @brief This property defines CPU core type which can be used during inference.
- * @ingroup ov_runtime_cpp_prop_api
- *
- * Developer can use this property to select specific CPU cores for inference. Please refer SchedulingCoreType for
- * all definition of core type.
- *
- * The following code is an example to only use efficient-cores for inference on hybrid CPU. If user sets this
- * configuration on a platform with only performance-cores, CPU inference will still run on the performance-cores.
- *
- * @code
- * ie.set_property(ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY));
- * @endcode
- */
-static constexpr Property<SchedulingCoreType> scheduling_core_type{"SCHEDULING_CORE_TYPE"};
-
 /**
  * @brief This property defines distribution policy for Large language models (LLM).
  * @ingroup ov_runtime_cpp_prop_api

From 9c4a9515475a2bfb001b0f7663c77cf9966d823a Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Mon, 18 Mar 2024 22:22:13 +0800
Subject: [PATCH 11/32] update property name and value

---
 src/bindings/c/docs/api_overview.md           |  2 +-
 .../c/include/openvino/c/ov_property.h        |  2 +-
 src/bindings/c/src/ov_property.cpp            |  2 +-
 src/bindings/c/tests/ov_core_test.cpp         |  4 +-
 .../src/openvino/properties/hint/__init__.py  |  2 +-
 .../runtime/properties/hint/__init__.py       |  4 +-
 .../pyopenvino/core/properties/properties.cpp | 11 ++--
 .../python/src/pyopenvino/utils/utils.cpp     |  8 +--
 .../tests/test_runtime/test_properties.py     | 15 ++---
 .../include/openvino/runtime/properties.hpp   | 64 +++++++------------
 src/plugins/intel_cpu/src/compiled_model.cpp  |  8 +--
 src/plugins/intel_cpu/src/config.cpp          | 23 +++----
 src/plugins/intel_cpu/src/config.h            |  2 +-
 src/plugins/intel_cpu/src/plugin.cpp          |  6 +-
 .../custom/behavior/export_import.cpp         | 21 +++---
 .../ov_executable_network/properties.cpp      |  2 +-
 .../custom/behavior/ov_plugin/properties.cpp  |  2 +-
 .../behavior/ov_plugin/properties_tests.cpp   | 12 ++--
 18 files changed, 79 insertions(+), 111 deletions(-)

diff --git a/src/bindings/c/docs/api_overview.md b/src/bindings/c/docs/api_overview.md
index 447b6b069b21a6..506786189abce3 100644
--- a/src/bindings/c/docs/api_overview.md
+++ b/src/bindings/c/docs/api_overview.md
@@ -309,7 +309,7 @@ OPENVINO_C_VAR(const char*) ov_property_key_affinity;
 
 OPENVINO_C_VAR(const char*) ov_property_key_inference_num_threads;
 
-OPENVINO_C_VAR(const char*) ov_property_key_hint_llm_distribution_policy;
+OPENVINO_C_VAR(const char*) ov_property_key_hint_model_distribution_policy;
 
 OPENVINO_C_VAR(const char*) ov_property_key_hint_enable_cpu_pinning;
 
diff --git a/src/bindings/c/include/openvino/c/ov_property.h b/src/bindings/c/include/openvino/c/ov_property.h
index c8b344e625bc69..1f9bcea14dd9a3 100644
--- a/src/bindings/c/include/openvino/c/ov_property.h
+++ b/src/bindings/c/include/openvino/c/ov_property.h
@@ -133,7 +133,7 @@ ov_property_key_inference_num_threads;
  * @ingroup ov_property_c_api
  */
 OPENVINO_C_VAR(const char*)
-ov_property_key_hint_llm_distribution_policy;
+ov_property_key_hint_model_distribution_policy;
 
 /**
  * @brief Read-write property, it is high-level OpenVINO hint for using CPU pinning to bind CPU threads to processors
diff --git a/src/bindings/c/src/ov_property.cpp b/src/bindings/c/src/ov_property.cpp
index 84a4a94ec3106d..611b36c90c83f8 100644
--- a/src/bindings/c/src/ov_property.cpp
+++ b/src/bindings/c/src/ov_property.cpp
@@ -23,7 +23,7 @@ const char* ov_property_key_cache_mode = "CACHE_MODE";
 const char* ov_property_key_num_streams = "NUM_STREAMS";
 const char* ov_property_key_affinity = "AFFINITY";
 const char* ov_property_key_inference_num_threads = "INFERENCE_NUM_THREADS";
-const char* ov_property_key_hint_llm_distribution_policy = "LLM_DISTRIBUTION_POLICY";
+const char* ov_property_key_hint_model_distribution_policy = "MODEL_DISTRIBUTION_POLICY";
 const char* ov_property_key_hint_performance_mode = "PERFORMANCE_HINT";
 const char* ov_property_key_hint_enable_cpu_pinning = "ENABLE_CPU_PINNING";
 const char* ov_property_key_hint_scheduling_core_type = "SCHEDULING_CORE_TYPE";
diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp
index 0fb826c85ebf6a..e56ec78f4ff6e3 100644
--- a/src/bindings/c/tests/ov_core_test.cpp
+++ b/src/bindings/c/tests/ov_core_test.cpp
@@ -298,8 +298,8 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) {
     EXPECT_STREQ(val_type, ret);
     ov_free(ret);
 
-    const char* key_mode = ov_property_key_hint_llm_distribution_policy;
-    const char* val_mode = "ENTIRE_PLATFORM";
+    const char* key_mode = ov_property_key_hint_model_distribution_policy;
+    const char* val_mode = "TENSOR_PARALLEL";
     OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_mode, val_mode));
     ret = nullptr;
     OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_mode, &ret));
diff --git a/src/bindings/python/src/openvino/properties/hint/__init__.py b/src/bindings/python/src/openvino/properties/hint/__init__.py
index d014ac0dfec37d..1624325ea5e9e2 100644
--- a/src/bindings/python/src/openvino/properties/hint/__init__.py
+++ b/src/bindings/python/src/openvino/properties/hint/__init__.py
@@ -5,7 +5,7 @@
 # Enums
 from openvino._pyopenvino.properties.hint import Priority
 from openvino._pyopenvino.properties.hint import SchedulingCoreType
-from openvino._pyopenvino.properties.hint import LlmDistributionPolicy
+from openvino._pyopenvino.properties.hint import ModelDistributionPolicy
 from openvino._pyopenvino.properties.hint import ExecutionMode
 from openvino._pyopenvino.properties.hint import PerformanceMode
 
diff --git a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py
index 471ec63a8e675b..dd90ded374ca11 100644
--- a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py
+++ b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py
@@ -5,7 +5,7 @@
 # Enums
 from openvino._pyopenvino.properties.hint import Priority
 from openvino._pyopenvino.properties.hint import SchedulingCoreType
-from openvino._pyopenvino.properties.hint import LlmDistributionPolicy
+from openvino._pyopenvino.properties.hint import ModelDistributionPolicy
 from openvino._pyopenvino.properties.hint import ExecutionMode
 from openvino._pyopenvino.properties.hint import PerformanceMode
 
@@ -15,7 +15,7 @@
 from openvino._pyopenvino.properties.hint import performance_mode
 from openvino._pyopenvino.properties.hint import enable_cpu_pinning
 from openvino._pyopenvino.properties.hint import scheduling_core_type
-from openvino._pyopenvino.properties.hint import llm_distribution_policy
+from openvino._pyopenvino.properties.hint import model_distribution_policy
 from openvino._pyopenvino.properties.hint import enable_hyper_threading
 from openvino._pyopenvino.properties.hint import execution_mode
 from openvino._pyopenvino.properties.hint import num_requests
diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
index 20a35829afd399..6310aac026e8c0 100644
--- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
+++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
@@ -71,12 +71,9 @@ void regmodule_properties(py::module m) {
         .value("PCORE_ONLY", ov::hint::SchedulingCoreType::PCORE_ONLY)
         .value("ECORE_ONLY", ov::hint::SchedulingCoreType::ECORE_ONLY);
 
-    py::enum_<ov::hint::LlmDistributionPolicy>(m_hint, "LlmDistributionPolicy", py::arithmetic())
-        .value("TENSOR_PARTITION", ov::hint::LlmDistributionPolicy::TENSOR_PARTITION)
-        .value("DATA_PARTITION", ov::hint::LlmDistributionPolicy::DATA_PARTITION)
-        .value("PIPELINE_PARTITION", ov::hint::LlmDistributionPolicy::PIPELINE_PARTITION)
-        .value("ENTIRE_PLATFORM", ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM)
-        .value("SINGLE_DEVICE", ov::hint::LlmDistributionPolicy::SINGLE_DEVICE);
+    py::enum_<ov::hint::ModelDistributionPolicy>(m_hint, "ModelDistributionPolicy", py::arithmetic())
+        .value("NONE", ov::hint::ModelDistributionPolicy::NONE)
+        .value("TENSOR_PARALLEL", ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL);
 
     py::enum_<ov::hint::ExecutionMode>(m_hint, "ExecutionMode", py::arithmetic())
         .value("PERFORMANCE", ov::hint::ExecutionMode::PERFORMANCE)
@@ -88,7 +85,7 @@ void regmodule_properties(py::module m) {
     wrap_property_RW(m_hint, ov::hint::performance_mode, "performance_mode");
     wrap_property_RW(m_hint, ov::hint::enable_cpu_pinning, "enable_cpu_pinning");
     wrap_property_RW(m_hint, ov::hint::scheduling_core_type, "scheduling_core_type");
-    wrap_property_RW(m_hint, ov::hint::llm_distribution_policy, "llm_distribution_policy");
+    wrap_property_RW(m_hint, ov::hint::model_distribution_policy, "model_distribution_policy");
     wrap_property_RW(m_hint, ov::hint::enable_hyper_threading, "enable_hyper_threading");
     wrap_property_RW(m_hint, ov::hint::execution_mode, "execution_mode");
     wrap_property_RW(m_hint, ov::hint::num_requests, "num_requests");
diff --git a/src/bindings/python/src/pyopenvino/utils/utils.cpp b/src/bindings/python/src/pyopenvino/utils/utils.cpp
index e409e0356fd26a..d21e2a1fef0a23 100644
--- a/src/bindings/python/src/pyopenvino/utils/utils.cpp
+++ b/src/bindings/python/src/pyopenvino/utils/utils.cpp
@@ -176,8 +176,8 @@ py::object from_ov_any(const ov::Any& any) {
         return py::cast(any.as<ov::intel_auto::SchedulePolicy>());
     } else if (any.is<ov::hint::SchedulingCoreType>()) {
         return py::cast(any.as<ov::hint::SchedulingCoreType>());
-    } else if (any.is<ov::hint::LlmDistributionPolicy>()) {
-        return py::cast(any.as<ov::hint::LlmDistributionPolicy>());
+    } else if (any.is<ov::hint::ModelDistributionPolicy>()) {
+        return py::cast(any.as<ov::hint::ModelDistributionPolicy>());
     } else if (any.is<ov::hint::ExecutionMode>()) {
         return py::cast(any.as<ov::hint::ExecutionMode>());
     } else if (any.is<ov::log::Level>()) {
@@ -377,8 +377,8 @@ ov::Any py_object_to_any(const py::object& py_obj) {
         return py::cast<ov::intel_auto::SchedulePolicy>(py_obj);
     } else if (py::isinstance<ov::hint::SchedulingCoreType>(py_obj)) {
         return py::cast<ov::hint::SchedulingCoreType>(py_obj);
-    } else if (py::isinstance<ov::hint::LlmDistributionPolicy>(py_obj)) {
-        return py::cast<ov::hint::LlmDistributionPolicy>(py_obj);
+    } else if (py::isinstance<ov::hint::ModelDistributionPolicy>(py_obj)) {
+        return py::cast<ov::hint::ModelDistributionPolicy>(py_obj);
     } else if (py::isinstance<ov::hint::ExecutionMode>(py_obj)) {
         return py::cast<ov::hint::ExecutionMode>(py_obj);
     } else if (py::isinstance<ov::log::Level>(py_obj)) {
diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py
index 364eef51ada79e..80ecf47ba0ed89 100644
--- a/src/bindings/python/tests/test_runtime/test_properties.py
+++ b/src/bindings/python/tests/test_runtime/test_properties.py
@@ -87,13 +87,10 @@ def test_properties_rw_base():
             ),
         ),
         (
-            hints.LlmDistributionPolicy,
+            hints.ModelDistributionPolicy,
             (
-                (hints.LlmDistributionPolicy.TENSOR_PARTITION, "LlmDistributionPolicy.TENSOR_PARTITION", 0),
-                (hints.LlmDistributionPolicy.DATA_PARTITION, "LlmDistributionPolicy.DATA_PARTITION", 1),
-                (hints.LlmDistributionPolicy.PIPELINE_PARTITION, "LlmDistributionPolicy.PIPELINE_PARTITION", 2),
-                (hints.LlmDistributionPolicy.ENTIRE_PLATFORM, "LlmDistributionPolicy.ENTIRE_PLATFORM", 3),
-                (hints.LlmDistributionPolicy.SINGLE_DEVICE, "LlmDistributionPolicy.SINGLE_DEVICE", 4),
+                (hints.ModelDistributionPolicy.NONE, "ModelDistributionPolicy.NONE", 0),
+                (hints.ModelDistributionPolicy.TENSOR_PARALLEL, "ModelDistributionPolicy.TENSOR_PARALLEL", 1),
             ),
         ),
         (
@@ -290,9 +287,9 @@ def test_properties_ro(ov_property_ro, expected_value):
             ((hints.SchedulingCoreType.PCORE_ONLY, hints.SchedulingCoreType.PCORE_ONLY),),
         ),
         (
-            hints.llm_distribution_policy,
-            "LLM_DISTRIBUTION_POLICY",
-            ((hints.LlmDistributionPolicy.ENTIRE_PLATFORM, hints.LlmDistributionPolicy.ENTIRE_PLATFORM),),
+            hints.model_distribution_policy,
+            "MODEL_DISTRIBUTION_POLICY",
+            ((hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.TENSOR_PARALLEL),),
         ),
         (
             hints.enable_hyper_threading,
diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp
index 428276f77b7a05..61cf50b0363553 100644
--- a/src/inference/include/openvino/runtime/properties.hpp
+++ b/src/inference/include/openvino/runtime/properties.hpp
@@ -399,71 +399,53 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type)
  */
 static constexpr Property<SchedulingCoreType> scheduling_core_type{"SCHEDULING_CORE_TYPE"};
 
-enum class LlmDistributionPolicy {
-    TENSOR_PARTITION = 0,    // Split one node or subgraph into parts and run one part per socket/device in parallel.
-    DATA_PARTITION = 1,      // Split one batch input into parts and run one part per socket/device in parallel.
-    PIPELINE_PARTITION = 2,  // Split one model into parts and run each socket/device in parallel as a pipeline.
-    ENTIRE_PLATFORM = 3,     // Run one model on the entire platform with all sockets/devices.
-    SINGLE_DEVICE = 4,       // Run one model on single socket/device.
+enum class ModelDistributionPolicy {
+    NONE = 0,             // Run one model on single socket/device without parallelism.
+    TENSOR_PARALLEL = 1,  // Split one node or subgraph into parts and run one part per socket/device in parallel.
 };
 
 /** @cond INTERNAL */
-inline std::ostream& operator<<(std::ostream& os, const LlmDistributionPolicy& stream_mode) {
+inline std::ostream& operator<<(std::ostream& os, const ModelDistributionPolicy& stream_mode) {
     switch (stream_mode) {
-    case LlmDistributionPolicy::TENSOR_PARTITION:
-        return os << "TENSOR_PARTITION";
-    case LlmDistributionPolicy::DATA_PARTITION:
-        return os << "DATA_PARTITION";
-    case LlmDistributionPolicy::PIPELINE_PARTITION:
-        return os << "PIPELINE_PARTITION";
-    case LlmDistributionPolicy::ENTIRE_PLATFORM:
-        return os << "ENTIRE_PLATFORM";
-    case LlmDistributionPolicy::SINGLE_DEVICE:
-        return os << "SINGLE_DEVICE";
+    case ModelDistributionPolicy::NONE:
+        return os << "NONE";
+    case ModelDistributionPolicy::TENSOR_PARALLEL:
+        return os << "TENSOR_PARALLEL";
     default:
-        OPENVINO_THROW("Unsupported LLM distribution policy!");
+        OPENVINO_THROW("Unsupported model distribution policy!");
     }
 }
 
-inline std::istream& operator>>(std::istream& is, LlmDistributionPolicy& stream_mode) {
+inline std::istream& operator>>(std::istream& is, ModelDistributionPolicy& stream_mode) {
     std::string str;
     is >> str;
-    if (str == "TENSOR_PARTITION") {
-        stream_mode = LlmDistributionPolicy::TENSOR_PARTITION;
-    } else if (str == "DATA_PARTITION") {
-        stream_mode = LlmDistributionPolicy::DATA_PARTITION;
-    } else if (str == "PIPELINE_PARTITION") {
-        stream_mode = LlmDistributionPolicy::PIPELINE_PARTITION;
-    } else if (str == "ENTIRE_PLATFORM") {
-        stream_mode = LlmDistributionPolicy::ENTIRE_PLATFORM;
-    } else if (str == "SINGLE_DEVICE") {
-        stream_mode = LlmDistributionPolicy::SINGLE_DEVICE;
+    if (str == "NONE") {
+        stream_mode = ModelDistributionPolicy::NONE;
+    } else if (str == "TENSOR_PARALLEL") {
+        stream_mode = ModelDistributionPolicy::TENSOR_PARALLEL;
     } else {
-        OPENVINO_THROW("Unsupported LLM distribution policy: ", str);
+        OPENVINO_THROW("Unsupported model distribution policy: ", str);
     }
     return is;
 }
 /** @endcond */
 
 /**
- * @brief This property defines distribution policy for Large language models (LLM).
+ * @brief This property defines model distribution policy for inference with multiple sockets/devices.
  * @ingroup ov_runtime_cpp_prop_api
  *
- * Developer can use this property to select LLM distribution policy for CPU inference with multiple sockets platform or
- * GPU inference with multiple GPU devices.
- * -- TENSOR_PARTITION   : Split one node or subgraph into parts and run one part per socket/device in parallel.
- * -- DATA_PARTITION     : Split one batch input into parts and run one part per socket/device in parallel.
- * -- PIPELINE_PARTITION : Split one model into parts and run each socket/device in parallel as a pipeline.
- * -- ENTIRE_PLATFORM    : Run one model on the entire platform with all sockets/devices.
- * -- SINGLE_DEVICE      : Run one model on single socket/device.
+ * Developer can use this property to select model distribution policy for CPU inference with multiple sockets
+ * platform or GPU inference with multiple GPU devices.
+ * -- TENSOR_PARALLEL : Split one node or subgraph into parts and run one part per socket/device in parallel.
+ * -- NONE            : Run one model on single socket/device without parallelism.
  *
- * The following code is an example to only use all threads of one socket for one stream on dual sockets platform.
+ * The following code is an example to split node into two parts run one part per socket on dual sockets platform.
  *
  * @code
- * ie.set_property(ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::SINGLE_DEVICE));
+ * ie.set_property(ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::TENSOR_PARTITION));
  * @endcode
  */
-static constexpr Property<LlmDistributionPolicy> llm_distribution_policy{"LLM_DISTRIBUTION_POLICY"};
+static constexpr Property<ModelDistributionPolicy> model_distribution_policy{"MODEL_DISTRIBUTION_POLICY"};
 
 /**
  * @brief This property allows CPU pinning during inference.
diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp
index 21de81e00291eb..05948e861ded62 100644
--- a/src/plugins/intel_cpu/src/compiled_model.cpp
+++ b/src/plugins/intel_cpu/src/compiled_model.cpp
@@ -193,7 +193,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
             RO_property(ov::hint::num_requests.name()),
             RO_property(ov::hint::enable_cpu_pinning.name()),
             RO_property(ov::hint::scheduling_core_type.name()),
-            RO_property(ov::hint::llm_distribution_policy.name()),
+            RO_property(ov::hint::model_distribution_policy.name()),
             RO_property(ov::hint::enable_hyper_threading.name()),
             RO_property(ov::execution_devices.name()),
             RO_property(ov::intel_cpu::denormals_optimization.name()),
@@ -249,9 +249,9 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
     } else if (name == ov::hint::scheduling_core_type) {
         const auto stream_mode = config.schedulingCoreType;
         return stream_mode;
-    } else if (name == ov::hint::llm_distribution_policy) {
-        const auto core_type = config.llmDistributionPolicy;
-        return core_type;
+    } else if (name == ov::hint::model_distribution_policy) {
+        const auto model_policy = config.modelDistributionPolicy;
+        return model_policy;
     } else if (name == ov::hint::enable_hyper_threading.name()) {
         const bool use_ht = config.enableHyperThreading;
         return decltype(ov::hint::enable_hyper_threading)::value_type(use_ht);
diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index 55cb1843c9e8d8..ad1db3e6b305b5 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -191,32 +191,29 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
                                '/',
                                ov::hint::SchedulingCoreType::ECORE_ONLY);
             }
-        } else if (key == ov::hint::llm_distribution_policy.name()) {
+        } else if (key == ov::hint::model_distribution_policy.name()) {
             auto error_info = [&]() {
                 OPENVINO_THROW("Wrong value ",
                                val.as<std::string>(),
                                "for property key ",
-                               ov::hint::llm_distribution_policy.name(),
+                               ov::hint::model_distribution_policy.name(),
                                ". CPU plugin only support ",
-                               ov::hint::LlmDistributionPolicy::TENSOR_PARTITION,
+                               ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL,
                                '/',
-                               ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM,
-                               '/',
-                               ov::hint::LlmDistributionPolicy::SINGLE_DEVICE);
+                               ov::hint::ModelDistributionPolicy::NONE);
             };
 
-            ov::hint::LlmDistributionPolicy llm_policy = ov::hint::LlmDistributionPolicy::PIPELINE_PARTITION;
+            ov::hint::ModelDistributionPolicy model_policy = ov::hint::ModelDistributionPolicy::NONE;
             try {
-                llm_policy = val.as<ov::hint::LlmDistributionPolicy>();
+                model_policy = val.as<ov::hint::ModelDistributionPolicy>();
             } catch (ov::Exception&) {
                 error_info();
             }
 
-            switch (llm_policy) {
-            case ov::hint::LlmDistributionPolicy::TENSOR_PARTITION:
-            case ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM:
-            case ov::hint::LlmDistributionPolicy::SINGLE_DEVICE:
-                llmDistributionPolicy = llm_policy;
+            switch (model_policy) {
+            case ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL:
+            case ov::hint::ModelDistributionPolicy::NONE:
+                modelDistributionPolicy = model_policy;
                 break;
             default:
                 error_info();
diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h
index 2f7445bc7180cd..6460cc07aa5b4b 100644
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@@ -76,7 +76,7 @@ struct Config {
     bool enableCpuPinning = true;
     bool changedCpuPinning = false;
     ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE;
-    ov::hint::LlmDistributionPolicy llmDistributionPolicy = ov::hint::LlmDistributionPolicy::SINGLE_DEVICE;
+    ov::hint::ModelDistributionPolicy modelDistributionPolicy = ov::hint::ModelDistributionPolicy::NONE;
     bool enableHyperThreading = true;
     bool changedHyperThreading = false;
     Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET;
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index 38f7a6aa54af86..c847fe36d2387f 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -409,8 +409,8 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options)
     } else if (name == ov::hint::scheduling_core_type) {
         const auto core_type = engConfig.schedulingCoreType;
         return core_type;
-    } else if (name == ov::hint::llm_distribution_policy) {
-        const auto stream_mode = engConfig.llmDistributionPolicy;
+    } else if (name == ov::hint::model_distribution_policy) {
+        const auto stream_mode = engConfig.modelDistributionPolicy;
         return stream_mode;
     } else if (name == ov::hint::enable_hyper_threading) {
         const bool ht_value = engConfig.enableHyperThreading;
@@ -484,7 +484,7 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio
             RW_property(ov::hint::num_requests.name()),
             RW_property(ov::hint::enable_cpu_pinning.name()),
             RW_property(ov::hint::scheduling_core_type.name()),
-            RW_property(ov::hint::llm_distribution_policy.name()),
+            RW_property(ov::hint::model_distribution_policy.name()),
             RW_property(ov::hint::enable_hyper_threading.name()),
             RW_property(ov::device::id.name()),
             RW_property(ov::intel_cpu::denormals_optimization.name()),
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp
index e0a8ca4346f5eb..ecf3ce8df8af35 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp
@@ -116,17 +116,13 @@ const std::vector<ov::AnyMap> testing_property_for_scheduling_core_type_3 = {
     {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ANY_CORE)},
     {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)}};
 
-const std::vector<ov::AnyMap> testing_property_for_llm_distribution_policy_1 = {
-    {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::TENSOR_PARTITION)},
-    {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM)}};
+const std::vector<ov::AnyMap> testing_property_for_model_distribution_policy_1 = {
+    {ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL)},
+    {ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::NONE)}};
 
-const std::vector<ov::AnyMap> testing_property_for_llm_distribution_policy_2 = {
-    {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM)},
-    {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::SINGLE_DEVICE)}};
-
-const std::vector<ov::AnyMap> testing_property_for_llm_distribution_policy_3 = {
-    {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::SINGLE_DEVICE)},
-    {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::TENSOR_PARTITION)}};
+const std::vector<ov::AnyMap> testing_property_for_model_distribution_policy_2 = {
+    {ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::NONE)},
+    {ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL)}};
 
 const std::vector<ov::AnyMap> testing_property_for_enable_hyper_threading = {{ov::hint::enable_hyper_threading(true)},
                                                                              {ov::hint::enable_hyper_threading(false)}};
@@ -143,9 +139,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_ExportImportTest,
                                                              testing_property_for_scheduling_core_type_1,
                                                              testing_property_for_scheduling_core_type_2,
                                                              testing_property_for_scheduling_core_type_3,
-                                                             testing_property_for_llm_distribution_policy_1,
-                                                             testing_property_for_llm_distribution_policy_2,
-                                                             testing_property_for_llm_distribution_policy_3,
+                                                             testing_property_for_model_distribution_policy_1,
+                                                             testing_property_for_model_distribution_policy_2,
                                                              testing_property_for_enable_hyper_threading,
                                                              testing_property_for_enable_cpu_pinning)));
 
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
index aeedd2fbe25b9f..cef9e809bf2a62 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
@@ -33,7 +33,7 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkSupportedPropertiesAreAvailable
         RO_property(ov::hint::num_requests.name()),
         RO_property(ov::hint::enable_cpu_pinning.name()),
         RO_property(ov::hint::scheduling_core_type.name()),
-        RO_property(ov::hint::llm_distribution_policy.name()),
+        RO_property(ov::hint::model_distribution_policy.name()),
         RO_property(ov::hint::enable_hyper_threading.name()),
         RO_property(ov::execution_devices.name()),
         RO_property(ov::intel_cpu::denormals_optimization.name()),
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp
index 40fe41ca82d90e..11a95f5663c749 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp
@@ -47,7 +47,7 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginAllSupportedPropertiesAreAvailable) {
         RW_property(ov::hint::num_requests.name()),
         RW_property(ov::hint::enable_cpu_pinning.name()),
         RW_property(ov::hint::scheduling_core_type.name()),
-        RW_property(ov::hint::llm_distribution_policy.name()),
+        RW_property(ov::hint::model_distribution_policy.name()),
         RW_property(ov::hint::enable_hyper_threading.name()),
         RW_property(ov::device::id.name()),
         RW_property(ov::intel_cpu::denormals_optimization.name()),
diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
index 022cf3f6c26b39..d10914e299e0be 100644
--- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
+++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
@@ -336,12 +336,12 @@ std::vector<ov::AnyMap> OVPropertiesTestsWithCompileModelProps::getRWOptionalPro
     }
 
     if (props.empty() ||
-        std::find(props.begin(), props.end(), ov::hint::llm_distribution_policy.name()) != props.end()) {
-        ov::hint::LlmDistributionPolicy llmDistributionPolicys[] = {ov::hint::LlmDistributionPolicy::TENSOR_PARTITION,
-                                                                    ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM,
-                                                                    ov::hint::LlmDistributionPolicy::SINGLE_DEVICE};
-        for (auto& llmDistributionPolicy : llmDistributionPolicys) {
-            res.push_back({ov::hint::llm_distribution_policy(llmDistributionPolicy)});
+        std::find(props.begin(), props.end(), ov::hint::model_distribution_policy.name()) != props.end()) {
+        ov::hint::ModelDistributionPolicy modelDistributionPolicys[] = {
+            ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL,
+            ov::hint::ModelDistributionPolicy::NONE};
+        for (auto& modelDistributionPolicy : modelDistributionPolicys) {
+            res.push_back({ov::hint::model_distribution_policy(modelDistributionPolicy)});
         }
     }
 

From ad744b8b02e11081018ebd2ac7fce5c0f4fcec4b Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Tue, 19 Mar 2024 00:50:57 +0800
Subject: [PATCH 12/32] support combined properties

---
 src/plugins/intel_cpu/src/config.cpp          | 48 ++++++++++++++-----
 src/plugins/intel_cpu/src/config.h            |  4 +-
 src/plugins/intel_cpu/src/plugin.cpp          | 13 ++++-
 .../custom/behavior/export_import.cpp         | 22 ++-------
 4 files changed, 55 insertions(+), 32 deletions(-)

diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index ad1db3e6b305b5..b10e4d8143f53b 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -66,6 +66,22 @@ void Config::applyDebugCapsProperties() {
 }
 #endif
 
+std::vector<std::string> parse_multiple_parameters(const std::string& inputs, const char separator = ',') {
+    std::vector<std::string> parameters;
+    std::string::size_type pos = 0;
+    std::string::size_type endpos = 0;
+    while ((endpos = inputs.find(separator, pos)) != std::string::npos) {
+        auto substr = inputs.substr(pos, endpos - pos);
+        if (!substr.empty())
+            parameters.push_back(substr);
+        pos = endpos + 1;
+    }
+    auto substr = inputs.substr(pos, inputs.length() - pos);
+    if (!substr.empty())
+        parameters.push_back(substr);
+    return parameters;
+}
+
 void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
     const auto streamExecutorConfigKeys =
         streamExecutorConfig.get_property(ov::supported_properties.name()).as<std::vector<std::string>>();
@@ -203,20 +219,30 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
                                ov::hint::ModelDistributionPolicy::NONE);
             };
 
-            ov::hint::ModelDistributionPolicy model_policy = ov::hint::ModelDistributionPolicy::NONE;
-            try {
-                model_policy = val.as<ov::hint::ModelDistributionPolicy>();
-            } catch (ov::Exception&) {
+            std::vector<std::string> para_vect = parse_multiple_parameters(val.as<std::string>());
+            if (para_vect.size() == 0) {
                 error_info();
             }
 
-            switch (model_policy) {
-            case ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL:
-            case ov::hint::ModelDistributionPolicy::NONE:
-                modelDistributionPolicy = model_policy;
-                break;
-            default:
-                error_info();
+            ov::hint::ModelDistributionPolicy model_policy;
+            modelDistributionPolicy.clear();
+
+            for (auto& row : para_vect) {
+                std::stringstream str_stream;
+                try {
+                    str_stream.str(row);
+                    str_stream >> model_policy;
+                    switch (model_policy) {
+                    case ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL:
+                    case ov::hint::ModelDistributionPolicy::NONE:
+                        modelDistributionPolicy.emplace(model_policy);
+                        break;
+                    default:
+                        error_info();
+                    }
+                } catch (ov::Exception&) {
+                    error_info();
+                }
             }
         } else if (key == ov::hint::enable_hyper_threading.name()) {
             try {
diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h
index 6460cc07aa5b4b..e91abf14cc300a 100644
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@@ -15,6 +15,7 @@
 #include <bitset>
 #include <map>
 #include <mutex>
+#include <unordered_set>
 
 namespace ov {
 namespace intel_cpu {
@@ -76,7 +77,8 @@ struct Config {
     bool enableCpuPinning = true;
     bool changedCpuPinning = false;
     ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE;
-    ov::hint::ModelDistributionPolicy modelDistributionPolicy = ov::hint::ModelDistributionPolicy::NONE;
+    std::unordered_set<ov::hint::ModelDistributionPolicy> modelDistributionPolicy = {
+        ov::hint::ModelDistributionPolicy::NONE};
     bool enableHyperThreading = true;
     bool changedHyperThreading = false;
     Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET;
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index c847fe36d2387f..a2d5a5e83c3b79 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -410,8 +410,17 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options)
         const auto core_type = engConfig.schedulingCoreType;
         return core_type;
     } else if (name == ov::hint::model_distribution_policy) {
-        const auto stream_mode = engConfig.modelDistributionPolicy;
-        return stream_mode;
+        std::string policy_str = "";
+        if (engConfig.modelDistributionPolicy.size() > 1) {
+            std::stringstream str_stream;
+            for (auto& row : engConfig.modelDistributionPolicy) {
+                str_stream << row;
+                policy_str += str_stream.str() + ", ";
+                str_stream.str("");
+            }
+            policy_str.erase(policy_str.length() - 2);
+        }
+        return policy_str;
     } else if (name == ov::hint::enable_hyper_threading) {
         const bool ht_value = engConfig.enableHyperThreading;
         return decltype(ov::hint::enable_hyper_threading)::value_type(ht_value);
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp
index ecf3ce8df8af35..3983fa33b37491 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp
@@ -104,23 +104,12 @@ const std::vector<ov::AnyMap> testing_property_for_performance_mode = {
     {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)},
     {ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)}};
 
-const std::vector<ov::AnyMap> testing_property_for_scheduling_core_type_1 = {
+const std::vector<ov::AnyMap> testing_property_for_scheduling_core_type = {
     {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ANY_CORE)},
-    {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::PCORE_ONLY)}};
-
-const std::vector<ov::AnyMap> testing_property_for_scheduling_core_type_2 = {
     {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::PCORE_ONLY)},
     {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)}};
 
-const std::vector<ov::AnyMap> testing_property_for_scheduling_core_type_3 = {
-    {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ANY_CORE)},
-    {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)}};
-
-const std::vector<ov::AnyMap> testing_property_for_model_distribution_policy_1 = {
-    {ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL)},
-    {ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::NONE)}};
-
-const std::vector<ov::AnyMap> testing_property_for_model_distribution_policy_2 = {
+const std::vector<ov::AnyMap> testing_property_for_model_distribution_policy = {
     {ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::NONE)},
     {ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL)}};
 
@@ -136,11 +125,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_ExportImportTest,
                                            ::testing::Values(testing_property_for_streams,
                                                              testing_property_for_threads,
                                                              testing_property_for_performance_mode,
-                                                             testing_property_for_scheduling_core_type_1,
-                                                             testing_property_for_scheduling_core_type_2,
-                                                             testing_property_for_scheduling_core_type_3,
-                                                             testing_property_for_model_distribution_policy_1,
-                                                             testing_property_for_model_distribution_policy_2,
+                                                             testing_property_for_scheduling_core_type,
+                                                             testing_property_for_model_distribution_policy,
                                                              testing_property_for_enable_hyper_threading,
                                                              testing_property_for_enable_cpu_pinning)));
 

From d456451679289644804359e05e96d7e51cc7533c Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Tue, 19 Mar 2024 10:22:57 +0800
Subject: [PATCH 13/32] update code style

---
 src/plugins/intel_cpu/src/config.cpp | 32 ++++++++++++++--------------
 src/plugins/intel_cpu/src/config.h   |  2 ++
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index b10e4d8143f53b..671d1b5af45a42 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -24,6 +24,22 @@ namespace intel_cpu {
 using namespace ov::threading;
 using namespace dnnl::impl::cpu::x64;
 
+std::vector<std::string> parse_multiple_parameters(const std::string& inputs, const char separator) {
+    std::vector<std::string> parameters;
+    std::string::size_type pos = 0;
+    std::string::size_type endpos = 0;
+    while ((endpos = inputs.find(separator, pos)) != std::string::npos) {
+        auto substr = inputs.substr(pos, endpos - pos);
+        if (!substr.empty())
+            parameters.push_back(substr);
+        pos = endpos + 1;
+    }
+    auto substr = inputs.substr(pos, inputs.length() - pos);
+    if (!substr.empty())
+        parameters.push_back(substr);
+    return parameters;
+}
+
 Config::Config() {
     // this is default mode
 #if defined(__APPLE__) || defined(_WIN32)
@@ -66,22 +82,6 @@ void Config::applyDebugCapsProperties() {
 }
 #endif
 
-std::vector<std::string> parse_multiple_parameters(const std::string& inputs, const char separator = ',') {
-    std::vector<std::string> parameters;
-    std::string::size_type pos = 0;
-    std::string::size_type endpos = 0;
-    while ((endpos = inputs.find(separator, pos)) != std::string::npos) {
-        auto substr = inputs.substr(pos, endpos - pos);
-        if (!substr.empty())
-            parameters.push_back(substr);
-        pos = endpos + 1;
-    }
-    auto substr = inputs.substr(pos, inputs.length() - pos);
-    if (!substr.empty())
-        parameters.push_back(substr);
-    return parameters;
-}
-
 void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
     const auto streamExecutorConfigKeys =
         streamExecutorConfig.get_property(ov::supported_properties.name()).as<std::vector<std::string>>();
diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h
index e91abf14cc300a..e81fb3b662ca3d 100644
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@@ -115,5 +115,7 @@ struct Config {
 #endif
 };
 
+std::vector<std::string> parse_multiple_parameters(const std::string& inputs, const char separator = ',');
+
 }  // namespace intel_cpu
 }   // namespace ov

From 911a79e01686b36c3c2e17d9a474e1948a7c504b Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Tue, 19 Mar 2024 13:46:03 +0800
Subject: [PATCH 14/32] update test case for combined properties

---
 src/bindings/c/tests/ov_core_test.cpp        |  7 +++++++
 src/plugins/intel_cpu/src/compiled_model.cpp | 13 +++++++++++--
 src/plugins/intel_cpu/src/plugin.cpp         |  4 ++--
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp
index e56ec78f4ff6e3..54ea701495ebbf 100644
--- a/src/bindings/c/tests/ov_core_test.cpp
+++ b/src/bindings/c/tests/ov_core_test.cpp
@@ -306,6 +306,13 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) {
     EXPECT_STREQ(val_mode, ret);
     ov_free(ret);
 
+    val_mode = "TENSOR_PARALLEL, NONE";
+    OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_mode, val_mode));
+    ret = nullptr;
+    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_mode, &ret));
+    EXPECT_STREQ(val_mode, ret);
+    ov_free(ret);
+
     OV_EXPECT_NOT_OK(ov_core_set_property(core, device_name.c_str(), key_type, invalid_val));
     ret = nullptr;
     OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_type, &ret));
diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp
index 05948e861ded62..7dbc2eb76f40c7 100644
--- a/src/plugins/intel_cpu/src/compiled_model.cpp
+++ b/src/plugins/intel_cpu/src/compiled_model.cpp
@@ -250,8 +250,17 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
         const auto stream_mode = config.schedulingCoreType;
         return stream_mode;
     } else if (name == ov::hint::model_distribution_policy) {
-        const auto model_policy = config.modelDistributionPolicy;
-        return model_policy;
+        std::string policy_str = "";
+        if (config.modelDistributionPolicy.size() > 0) {
+            std::stringstream str_stream;
+            for (auto& row : config.modelDistributionPolicy) {
+                str_stream << row;
+                policy_str = str_stream.str() + ", " + policy_str;
+                str_stream.str("");
+            }
+            policy_str.erase(policy_str.length() - 2);
+        }
+        return policy_str;
     } else if (name == ov::hint::enable_hyper_threading.name()) {
         const bool use_ht = config.enableHyperThreading;
         return decltype(ov::hint::enable_hyper_threading)::value_type(use_ht);
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index a2d5a5e83c3b79..1470713b4ecff5 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -411,11 +411,11 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options)
         return core_type;
     } else if (name == ov::hint::model_distribution_policy) {
         std::string policy_str = "";
-        if (engConfig.modelDistributionPolicy.size() > 1) {
+        if (engConfig.modelDistributionPolicy.size() > 0) {
             std::stringstream str_stream;
             for (auto& row : engConfig.modelDistributionPolicy) {
                 str_stream << row;
-                policy_str += str_stream.str() + ", ";
+                policy_str = str_stream.str() + ", " + policy_str;
                 str_stream.str("");
             }
             policy_str.erase(policy_str.length() - 2);

From 9f1189f38f823024d7b9837bdc53d7139adccb81 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Tue, 19 Mar 2024 15:15:37 +0800
Subject: [PATCH 15/32] update test case for combined properties

---
 src/bindings/c/tests/ov_core_test.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp
index 54ea701495ebbf..61f5adda5c2886 100644
--- a/src/bindings/c/tests/ov_core_test.cpp
+++ b/src/bindings/c/tests/ov_core_test.cpp
@@ -310,7 +310,9 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) {
     OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_mode, val_mode));
     ret = nullptr;
     OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_mode, &ret));
-    EXPECT_STREQ(val_mode, ret);
+    if ((ret != "TENSOR_PARALLEL, NONE") && (ret != "NONE, TENSOR_PARALLEL")) {
+        EXPECT_STREQ(val_mode, ret);
+    }
     ov_free(ret);
 
     OV_EXPECT_NOT_OK(ov_core_set_property(core, device_name.c_str(), key_type, invalid_val));

From 685827278008d5a0af40d6d76a02941daa66b599 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Tue, 19 Mar 2024 15:40:35 +0800
Subject: [PATCH 16/32] update test case for combined properties

---
 src/bindings/c/tests/ov_core_test.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp
index 61f5adda5c2886..592479a6b02166 100644
--- a/src/bindings/c/tests/ov_core_test.cpp
+++ b/src/bindings/c/tests/ov_core_test.cpp
@@ -310,7 +310,7 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) {
     OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_mode, val_mode));
     ret = nullptr;
     OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_mode, &ret));
-    if ((ret != "TENSOR_PARALLEL, NONE") && (ret != "NONE, TENSOR_PARALLEL")) {
+    if ((strcmp(ret, "TENSOR_PARALLEL, NONE") != 0) && (strcmp(ret, "NONE, TENSOR_PARALLEL") != 0)) {
         EXPECT_STREQ(val_mode, ret);
     }
     ov_free(ret);

From 7d0af101cdf16bbdbf3c1299768ce7d63d845ab4 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Tue, 19 Mar 2024 18:23:27 +0800
Subject: [PATCH 17/32] update for combined properties

---
 src/bindings/c/tests/ov_core_test.cpp        | 4 +---
 src/plugins/intel_cpu/src/compiled_model.cpp | 2 +-
 src/plugins/intel_cpu/src/config.cpp         | 2 +-
 src/plugins/intel_cpu/src/config.h           | 4 +---
 src/plugins/intel_cpu/src/plugin.cpp         | 2 +-
 5 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp
index 592479a6b02166..54ea701495ebbf 100644
--- a/src/bindings/c/tests/ov_core_test.cpp
+++ b/src/bindings/c/tests/ov_core_test.cpp
@@ -310,9 +310,7 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) {
     OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_mode, val_mode));
     ret = nullptr;
     OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_mode, &ret));
-    if ((strcmp(ret, "TENSOR_PARALLEL, NONE") != 0) && (strcmp(ret, "NONE, TENSOR_PARALLEL") != 0)) {
-        EXPECT_STREQ(val_mode, ret);
-    }
+    EXPECT_STREQ(val_mode, ret);
     ov_free(ret);
 
     OV_EXPECT_NOT_OK(ov_core_set_property(core, device_name.c_str(), key_type, invalid_val));
diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp
index 7dbc2eb76f40c7..71589a6a18b6ae 100644
--- a/src/plugins/intel_cpu/src/compiled_model.cpp
+++ b/src/plugins/intel_cpu/src/compiled_model.cpp
@@ -255,7 +255,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
             std::stringstream str_stream;
             for (auto& row : config.modelDistributionPolicy) {
                 str_stream << row;
-                policy_str = str_stream.str() + ", " + policy_str;
+                policy_str += str_stream.str() + ", ";
                 str_stream.str("");
             }
             policy_str.erase(policy_str.length() - 2);
diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index 671d1b5af45a42..4a77c64e2eba84 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -235,7 +235,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
                     switch (model_policy) {
                     case ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL:
                     case ov::hint::ModelDistributionPolicy::NONE:
-                        modelDistributionPolicy.emplace(model_policy);
+                        modelDistributionPolicy.emplace_back(model_policy);
                         break;
                     default:
                         error_info();
diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h
index e81fb3b662ca3d..acd81c4a7efc86 100644
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@@ -15,7 +15,6 @@
 #include <bitset>
 #include <map>
 #include <mutex>
-#include <unordered_set>
 
 namespace ov {
 namespace intel_cpu {
@@ -77,8 +76,7 @@ struct Config {
     bool enableCpuPinning = true;
     bool changedCpuPinning = false;
     ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE;
-    std::unordered_set<ov::hint::ModelDistributionPolicy> modelDistributionPolicy = {
-        ov::hint::ModelDistributionPolicy::NONE};
+    std::vector<ov::hint::ModelDistributionPolicy> modelDistributionPolicy = {ov::hint::ModelDistributionPolicy::NONE};
     bool enableHyperThreading = true;
     bool changedHyperThreading = false;
     Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET;
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index 1470713b4ecff5..2ffb22c41dc381 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -415,7 +415,7 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options)
             std::stringstream str_stream;
             for (auto& row : engConfig.modelDistributionPolicy) {
                 str_stream << row;
-                policy_str = str_stream.str() + ", " + policy_str;
+                policy_str += str_stream.str() + ", ";
                 str_stream.str("");
             }
             policy_str.erase(policy_str.length() - 2);

From 03d09e8286559dd9a74eec96216b11ca11537df8 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Wed, 20 Mar 2024 15:45:43 +0800
Subject: [PATCH 18/32] remove CAPI interface

---
 src/bindings/c/docs/api_overview.md             |  2 --
 src/bindings/c/include/openvino/c/ov_property.h |  7 -------
 src/bindings/c/src/ov_property.cpp              |  1 -
 src/bindings/c/tests/ov_core_test.cpp           | 15 ---------------
 4 files changed, 25 deletions(-)

diff --git a/src/bindings/c/docs/api_overview.md b/src/bindings/c/docs/api_overview.md
index 506786189abce3..6ca2ad403c1a7e 100644
--- a/src/bindings/c/docs/api_overview.md
+++ b/src/bindings/c/docs/api_overview.md
@@ -309,8 +309,6 @@ OPENVINO_C_VAR(const char*) ov_property_key_affinity;
 
 OPENVINO_C_VAR(const char*) ov_property_key_inference_num_threads;
 
-OPENVINO_C_VAR(const char*) ov_property_key_hint_model_distribution_policy;
-
 OPENVINO_C_VAR(const char*) ov_property_key_hint_enable_cpu_pinning;
 
 OPENVINO_C_VAR(const char*) ov_property_key_hint_enable_hyper_threading;
diff --git a/src/bindings/c/include/openvino/c/ov_property.h b/src/bindings/c/include/openvino/c/ov_property.h
index 1f9bcea14dd9a3..5532287057a886 100644
--- a/src/bindings/c/include/openvino/c/ov_property.h
+++ b/src/bindings/c/include/openvino/c/ov_property.h
@@ -128,13 +128,6 @@ ov_property_key_affinity;
 OPENVINO_C_VAR(const char*)
 ov_property_key_inference_num_threads;
 
-/**
- * @brief Read-write property<int32_t string> to set/get the maximum number of threads per stream of CPU inference.
- * @ingroup ov_property_c_api
- */
-OPENVINO_C_VAR(const char*)
-ov_property_key_hint_model_distribution_policy;
-
 /**
  * @brief Read-write property, it is high-level OpenVINO hint for using CPU pinning to bind CPU threads to processors
  * during inference
diff --git a/src/bindings/c/src/ov_property.cpp b/src/bindings/c/src/ov_property.cpp
index 611b36c90c83f8..7c33b4b8dbb9cd 100644
--- a/src/bindings/c/src/ov_property.cpp
+++ b/src/bindings/c/src/ov_property.cpp
@@ -23,7 +23,6 @@ const char* ov_property_key_cache_mode = "CACHE_MODE";
 const char* ov_property_key_num_streams = "NUM_STREAMS";
 const char* ov_property_key_affinity = "AFFINITY";
 const char* ov_property_key_inference_num_threads = "INFERENCE_NUM_THREADS";
-const char* ov_property_key_hint_model_distribution_policy = "MODEL_DISTRIBUTION_POLICY";
 const char* ov_property_key_hint_performance_mode = "PERFORMANCE_HINT";
 const char* ov_property_key_hint_enable_cpu_pinning = "ENABLE_CPU_PINNING";
 const char* ov_property_key_hint_scheduling_core_type = "SCHEDULING_CORE_TYPE";
diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp
index 54ea701495ebbf..3e8ceebcaa0e49 100644
--- a/src/bindings/c/tests/ov_core_test.cpp
+++ b/src/bindings/c/tests/ov_core_test.cpp
@@ -298,21 +298,6 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) {
     EXPECT_STREQ(val_type, ret);
     ov_free(ret);
 
-    const char* key_mode = ov_property_key_hint_model_distribution_policy;
-    const char* val_mode = "TENSOR_PARALLEL";
-    OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_mode, val_mode));
-    ret = nullptr;
-    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_mode, &ret));
-    EXPECT_STREQ(val_mode, ret);
-    ov_free(ret);
-
-    val_mode = "TENSOR_PARALLEL, NONE";
-    OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_mode, val_mode));
-    ret = nullptr;
-    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_mode, &ret));
-    EXPECT_STREQ(val_mode, ret);
-    ov_free(ret);
-
     OV_EXPECT_NOT_OK(ov_core_set_property(core, device_name.c_str(), key_type, invalid_val));
     ret = nullptr;
     OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_type, &ret));

From eef60acf20a22675731e9336fd84d51715403a89 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Wed, 20 Mar 2024 16:36:52 +0800
Subject: [PATCH 19/32] draft implementation for std::set value

---
 .../tests/test_runtime/test_properties.py     |  2 +-
 .../include/openvino/runtime/properties.hpp   |  4 +--
 src/plugins/intel_cpu/src/compiled_model.cpp  | 13 ++-------
 src/plugins/intel_cpu/src/config.cpp          | 28 +++++--------------
 src/plugins/intel_cpu/src/plugin.cpp          | 13 ++-------
 .../custom/behavior/export_import.cpp         |  6 ++--
 .../behavior/ov_plugin/properties_tests.cpp   |  2 +-
 7 files changed, 19 insertions(+), 49 deletions(-)

diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py
index 80ecf47ba0ed89..accc7dd9e13fe8 100644
--- a/src/bindings/python/tests/test_runtime/test_properties.py
+++ b/src/bindings/python/tests/test_runtime/test_properties.py
@@ -289,7 +289,7 @@ def test_properties_ro(ov_property_ro, expected_value):
         (
             hints.model_distribution_policy,
             "MODEL_DISTRIBUTION_POLICY",
-            ((hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.TENSOR_PARALLEL),),
+            (({hints.ModelDistributionPolicy.TENSOR_PARALLEL}, {hints.ModelDistributionPolicy.TENSOR_PARALLEL}),),
         ),
         (
             hints.enable_hyper_threading,
diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp
index 61cf50b0363553..debf176e7e3c63 100644
--- a/src/inference/include/openvino/runtime/properties.hpp
+++ b/src/inference/include/openvino/runtime/properties.hpp
@@ -442,10 +442,10 @@ inline std::istream& operator>>(std::istream& is, ModelDistributionPolicy& strea
  * The following code is an example to split node into two parts run one part per socket on dual sockets platform.
  *
  * @code
- * ie.set_property(ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::TENSOR_PARTITION));
+ * ie.set_property(ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::TENSOR_PARTITION}));
  * @endcode
  */
-static constexpr Property<ModelDistributionPolicy> model_distribution_policy{"MODEL_DISTRIBUTION_POLICY"};
+static constexpr Property<std::set<ModelDistributionPolicy>> model_distribution_policy{"MODEL_DISTRIBUTION_POLICY"};
 
 /**
  * @brief This property allows CPU pinning during inference.
diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp
index 71589a6a18b6ae..039b96c70f824c 100644
--- a/src/plugins/intel_cpu/src/compiled_model.cpp
+++ b/src/plugins/intel_cpu/src/compiled_model.cpp
@@ -250,17 +250,8 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
         const auto stream_mode = config.schedulingCoreType;
         return stream_mode;
     } else if (name == ov::hint::model_distribution_policy) {
-        std::string policy_str = "";
-        if (config.modelDistributionPolicy.size() > 0) {
-            std::stringstream str_stream;
-            for (auto& row : config.modelDistributionPolicy) {
-                str_stream << row;
-                policy_str += str_stream.str() + ", ";
-                str_stream.str("");
-            }
-            policy_str.erase(policy_str.length() - 2);
-        }
-        return policy_str;
+        const auto distribution_policy = config.modelDistributionPolicy;
+        return distribution_policy;
     } else if (name == ov::hint::enable_hyper_threading.name()) {
         const bool use_ht = config.enableHyperThreading;
         return decltype(ov::hint::enable_hyper_threading)::value_type(use_ht);
diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index f3d0df66cfb9ca..74226db8762860 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -219,30 +219,16 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
                                ov::hint::ModelDistributionPolicy::NONE);
             };
 
-            std::vector<std::string> para_vect = parse_multiple_parameters(val.as<std::string>());
-            if (para_vect.size() == 0) {
-                error_info();
-            }
-
-            ov::hint::ModelDistributionPolicy model_policy;
-            modelDistributionPolicy.clear();
-
-            for (auto& row : para_vect) {
-                std::stringstream str_stream;
-                try {
-                    str_stream.str(row);
-                    str_stream >> model_policy;
-                    switch (model_policy) {
-                    case ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL:
-                    case ov::hint::ModelDistributionPolicy::NONE:
-                        modelDistributionPolicy.emplace_back(model_policy);
-                        break;
-                    default:
+            try {
+                for (auto& row : val.as<std::set>()) {
+                    if ((row.as<ov::hint::ModelDistributionPolicy>() != ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL) &&
+                        (row.as<ov::hint::ModelDistributionPolicy>() != ov::hint::ModelDistributionPolicy::NONE)) {
                         error_info();
                     }
-                } catch (ov::Exception&) {
-                    error_info();
                 }
+                modelDistributionPolicy = val.as<std::set>();
+            } catch (ov::Exception&) {
+                error_info();
             }
         } else if (key == ov::hint::enable_hyper_threading.name()) {
             try {
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index 2ffb22c41dc381..e8d502fc0d3922 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -410,17 +410,8 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options)
         const auto core_type = engConfig.schedulingCoreType;
         return core_type;
     } else if (name == ov::hint::model_distribution_policy) {
-        std::string policy_str = "";
-        if (engConfig.modelDistributionPolicy.size() > 0) {
-            std::stringstream str_stream;
-            for (auto& row : engConfig.modelDistributionPolicy) {
-                str_stream << row;
-                policy_str += str_stream.str() + ", ";
-                str_stream.str("");
-            }
-            policy_str.erase(policy_str.length() - 2);
-        }
-        return policy_str;
+        const auto distribution_policy = engConfig.modelDistributionPolicy;
+        return distribution_policy;
     } else if (name == ov::hint::enable_hyper_threading) {
         const bool ht_value = engConfig.enableHyperThreading;
         return decltype(ov::hint::enable_hyper_threading)::value_type(ht_value);
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp
index 3983fa33b37491..3455df47b5b6d6 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp
@@ -110,8 +110,10 @@ const std::vector<ov::AnyMap> testing_property_for_scheduling_core_type = {
     {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)}};
 
 const std::vector<ov::AnyMap> testing_property_for_model_distribution_policy = {
-    {ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::NONE)},
-    {ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL)}};
+    {ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::NONE})},
+    {ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL})},
+    {ov::hint::model_distribution_policy(
+        {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL, ov::hint::ModelDistributionPolicy::NONE})}};
 
 const std::vector<ov::AnyMap> testing_property_for_enable_hyper_threading = {{ov::hint::enable_hyper_threading(true)},
                                                                              {ov::hint::enable_hyper_threading(false)}};
diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
index d10914e299e0be..362d42a5c83663 100644
--- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
+++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
@@ -341,7 +341,7 @@ std::vector<ov::AnyMap> OVPropertiesTestsWithCompileModelProps::getRWOptionalPro
             ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL,
             ov::hint::ModelDistributionPolicy::NONE};
         for (auto& modelDistributionPolicy : modelDistributionPolicys) {
-            res.push_back({ov::hint::model_distribution_policy(modelDistributionPolicy)});
+            res.push_back({ov::hint::model_distribution_policy({modelDistributionPolicy})});
         }
     }
 

From 77f30a9a12747d0bf819be490f121d4fad32f558 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Wed, 20 Mar 2024 18:40:02 +0800
Subject: [PATCH 20/32] update c++ implementation for std::set value

---
 src/core/include/openvino/core/any.hpp        |  3 +++
 src/plugins/intel_cpu/src/config.cpp          | 15 +++++++------
 src/plugins/intel_cpu/src/config.h            |  2 +-
 .../custom/behavior/export_import.cpp         |  7 ------
 .../custom/behavior/ov_plugin/properties.cpp  | 22 +++++++++++++++++++
 .../behavior/ov_plugin/properties_tests.cpp   | 10 ---------
 6 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/src/core/include/openvino/core/any.hpp b/src/core/include/openvino/core/any.hpp
index ca0c86aa924062..9dd33f3c5a34d7 100644
--- a/src/core/include/openvino/core/any.hpp
+++ b/src/core/include/openvino/core/any.hpp
@@ -10,6 +10,7 @@
 
 #include <map>
 #include <memory>
+#include <set>
 #include <string>
 #include <typeindex>
 #include <typeinfo>
@@ -949,6 +950,8 @@ using RTMap = AnyMap;
 
 using AnyVector = std::vector<ov::Any>;
 
+using AnySet = std::set<ov::Any>;
+
 /** @cond INTERNAL */
 inline static void PrintTo(const Any& any, std::ostream* os) {
     any.print(*os);
diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index 74226db8762860..8774638e038947 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -213,20 +213,21 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
                                val.as<std::string>(),
                                "for property key ",
                                ov::hint::model_distribution_policy.name(),
-                               ". CPU plugin only support ",
+                               ". CPU plugin only support {",
                                ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL,
-                               '/',
-                               ov::hint::ModelDistributionPolicy::NONE);
+                               '}/{',
+                               ov::hint::ModelDistributionPolicy::NONE,
+                               '}');
             };
 
             try {
-                for (auto& row : val.as<std::set>()) {
-                    if ((row.as<ov::hint::ModelDistributionPolicy>() != ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL) &&
-                        (row.as<ov::hint::ModelDistributionPolicy>() != ov::hint::ModelDistributionPolicy::NONE)) {
+                for (auto& row : val.as<std::set<ov::hint::ModelDistributionPolicy>>()) {
+                    if ((row != ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL) &&
+                        (row != ov::hint::ModelDistributionPolicy::NONE)) {
                         error_info();
                     }
                 }
-                modelDistributionPolicy = val.as<std::set>();
+                modelDistributionPolicy = val.as<std::set<ov::hint::ModelDistributionPolicy>>();
             } catch (ov::Exception&) {
                 error_info();
             }
diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h
index acd81c4a7efc86..216bd7667fe444 100644
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@@ -76,7 +76,7 @@ struct Config {
     bool enableCpuPinning = true;
     bool changedCpuPinning = false;
     ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE;
-    std::vector<ov::hint::ModelDistributionPolicy> modelDistributionPolicy = {ov::hint::ModelDistributionPolicy::NONE};
+    std::set<ov::hint::ModelDistributionPolicy> modelDistributionPolicy = {ov::hint::ModelDistributionPolicy::NONE};
     bool enableHyperThreading = true;
     bool changedHyperThreading = false;
     Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET;
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp
index 3455df47b5b6d6..5fc89b979c261c 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp
@@ -109,12 +109,6 @@ const std::vector<ov::AnyMap> testing_property_for_scheduling_core_type = {
     {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::PCORE_ONLY)},
     {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)}};
 
-const std::vector<ov::AnyMap> testing_property_for_model_distribution_policy = {
-    {ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::NONE})},
-    {ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL})},
-    {ov::hint::model_distribution_policy(
-        {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL, ov::hint::ModelDistributionPolicy::NONE})}};
-
 const std::vector<ov::AnyMap> testing_property_for_enable_hyper_threading = {{ov::hint::enable_hyper_threading(true)},
                                                                              {ov::hint::enable_hyper_threading(false)}};
 
@@ -128,7 +122,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_ExportImportTest,
                                                              testing_property_for_threads,
                                                              testing_property_for_performance_mode,
                                                              testing_property_for_scheduling_core_type,
-                                                             testing_property_for_model_distribution_policy,
                                                              testing_property_for_enable_hyper_threading,
                                                              testing_property_for_enable_cpu_pinning)));
 
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp
index 11a95f5663c749..f9bbd19e94b676 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp
@@ -108,6 +108,28 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigInferenceNumThreads) {
     ASSERT_EQ(num_threads, value);
 }
 
+TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigModelDistributionPolicy) {
+    ov::Core ie;
+    std::set<ov::hint::ModelDistributionPolicy> value = {ov::hint::ModelDistributionPolicy::NONE};
+    std::set<ov::hint::ModelDistributionPolicy> model_policy = {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL};
+
+    ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::model_distribution_policy(model_policy)));
+    ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::model_distribution_policy));
+    ASSERT_EQ(model_policy, value);
+
+    model_policy = {ov::hint::ModelDistributionPolicy::NONE};
+
+    ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::model_distribution_policy(model_policy)));
+    ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::model_distribution_policy));
+    ASSERT_EQ(model_policy, value);
+
+    model_policy = {ov::hint::ModelDistributionPolicy::NONE, ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL};
+
+    ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::model_distribution_policy(model_policy)));
+    ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::model_distribution_policy));
+    ASSERT_EQ(model_policy, value);
+}
+
 TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigStreamsNum) {
     ov::Core ie;
     int32_t value = 0;
diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
index 362d42a5c83663..9452690a596e55 100644
--- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
+++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
@@ -335,16 +335,6 @@ std::vector<ov::AnyMap> OVPropertiesTestsWithCompileModelProps::getRWOptionalPro
         }
     }
 
-    if (props.empty() ||
-        std::find(props.begin(), props.end(), ov::hint::model_distribution_policy.name()) != props.end()) {
-        ov::hint::ModelDistributionPolicy modelDistributionPolicys[] = {
-            ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL,
-            ov::hint::ModelDistributionPolicy::NONE};
-        for (auto& modelDistributionPolicy : modelDistributionPolicys) {
-            res.push_back({ov::hint::model_distribution_policy({modelDistributionPolicy})});
-        }
-    }
-
     if (props.empty() || std::find(props.begin(), props.end(), ov::enable_mmap.name()) != props.end()) {
         res.push_back({ov::enable_mmap(true)});
         res.push_back({ov::enable_mmap(false)});

From 5269cac0a882fe8115b85c5c14303fc7b5c21c5c Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Wed, 20 Mar 2024 18:41:42 +0800
Subject: [PATCH 21/32] update c++ implementation for std::set value

---
 src/core/include/openvino/core/any.hpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/core/include/openvino/core/any.hpp b/src/core/include/openvino/core/any.hpp
index 9dd33f3c5a34d7..59cf8a4c4ced04 100644
--- a/src/core/include/openvino/core/any.hpp
+++ b/src/core/include/openvino/core/any.hpp
@@ -950,8 +950,6 @@ using RTMap = AnyMap;
 
 using AnyVector = std::vector<ov::Any>;
 
-using AnySet = std::set<ov::Any>;
-
 /** @cond INTERNAL */
 inline static void PrintTo(const Any& any, std::ostream* os) {
     any.print(*os);

From 018eabb88280c5c7217a8437d9887a754a5cc9db Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Wed, 20 Mar 2024 18:42:42 +0800
Subject: [PATCH 22/32] update c++ implementation for std::set value

---
 src/core/include/openvino/core/any.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/core/include/openvino/core/any.hpp b/src/core/include/openvino/core/any.hpp
index 59cf8a4c4ced04..ca0c86aa924062 100644
--- a/src/core/include/openvino/core/any.hpp
+++ b/src/core/include/openvino/core/any.hpp
@@ -10,7 +10,6 @@
 
 #include <map>
 #include <memory>
-#include <set>
 #include <string>
 #include <typeindex>
 #include <typeinfo>

From 922554c12489a9bf8569865c0c87e784c9a8c5c1 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Wed, 20 Mar 2024 23:14:48 +0800
Subject: [PATCH 23/32] remove unused function

---
 src/plugins/intel_cpu/src/config.cpp | 16 ----------------
 src/plugins/intel_cpu/src/config.h   |  2 --
 2 files changed, 18 deletions(-)

diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index 8774638e038947..f7744738a31013 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -24,22 +24,6 @@ namespace intel_cpu {
 using namespace ov::threading;
 using namespace dnnl::impl::cpu::x64;
 
-std::vector<std::string> parse_multiple_parameters(const std::string& inputs, const char separator) {
-    std::vector<std::string> parameters;
-    std::string::size_type pos = 0;
-    std::string::size_type endpos = 0;
-    while ((endpos = inputs.find(separator, pos)) != std::string::npos) {
-        auto substr = inputs.substr(pos, endpos - pos);
-        if (!substr.empty())
-            parameters.push_back(substr);
-        pos = endpos + 1;
-    }
-    auto substr = inputs.substr(pos, inputs.length() - pos);
-    if (!substr.empty())
-        parameters.push_back(substr);
-    return parameters;
-}
-
 Config::Config() {
     // this is default mode
 #if defined(__APPLE__) || defined(_WIN32)
diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h
index 216bd7667fe444..ef069865875fa1 100644
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@@ -113,7 +113,5 @@ struct Config {
 #endif
 };
 
-std::vector<std::string> parse_multiple_parameters(const std::string& inputs, const char separator = ',');
-
 }  // namespace intel_cpu
 }   // namespace ov

From 828e583ceeea9f91f45426c26cad684be87c938d Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Wed, 20 Mar 2024 23:24:39 +0800
Subject: [PATCH 24/32] update python

---
 src/bindings/python/src/pyopenvino/utils/utils.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/bindings/python/src/pyopenvino/utils/utils.cpp b/src/bindings/python/src/pyopenvino/utils/utils.cpp
index d21e2a1fef0a23..ffbcf3e4ac730f 100644
--- a/src/bindings/python/src/pyopenvino/utils/utils.cpp
+++ b/src/bindings/python/src/pyopenvino/utils/utils.cpp
@@ -7,6 +7,7 @@
 #include <pybind11/stl.h>
 
 #include <map>
+#include <set>
 #include <string>
 #include <tuple>
 #include <vector>
@@ -176,8 +177,8 @@ py::object from_ov_any(const ov::Any& any) {
         return py::cast(any.as<ov::intel_auto::SchedulePolicy>());
     } else if (any.is<ov::hint::SchedulingCoreType>()) {
         return py::cast(any.as<ov::hint::SchedulingCoreType>());
-    } else if (any.is<ov::hint::ModelDistributionPolicy>()) {
-        return py::cast(any.as<ov::hint::ModelDistributionPolicy>());
+    } else if (any.is<std::set<ov::hint::ModelDistributionPolicy>>()) {
+        return py::cast(any.as<std::set<ov::hint::ModelDistributionPolicy>>());
     } else if (any.is<ov::hint::ExecutionMode>()) {
         return py::cast(any.as<ov::hint::ExecutionMode>());
     } else if (any.is<ov::log::Level>()) {
@@ -377,8 +378,8 @@ ov::Any py_object_to_any(const py::object& py_obj) {
         return py::cast<ov::intel_auto::SchedulePolicy>(py_obj);
     } else if (py::isinstance<ov::hint::SchedulingCoreType>(py_obj)) {
         return py::cast<ov::hint::SchedulingCoreType>(py_obj);
-    } else if (py::isinstance<ov::hint::ModelDistributionPolicy>(py_obj)) {
-        return py::cast<ov::hint::ModelDistributionPolicy>(py_obj);
+    } else if (py::isinstance<std::set<ov::hint::ModelDistributionPolicy>>(py_obj)) {
+        return py::cast<std::set<ov::hint::ModelDistributionPolicy>>(py_obj);
     } else if (py::isinstance<ov::hint::ExecutionMode>(py_obj)) {
         return py::cast<ov::hint::ExecutionMode>(py_obj);
     } else if (py::isinstance<ov::log::Level>(py_obj)) {

From 73ce7576145795d9f5b846eea46368615eb0ef82 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Wed, 20 Mar 2024 23:29:25 +0800
Subject: [PATCH 25/32] update python test case

---
 src/bindings/python/tests/test_runtime/test_properties.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py
index accc7dd9e13fe8..efef8d52d15338 100644
--- a/src/bindings/python/tests/test_runtime/test_properties.py
+++ b/src/bindings/python/tests/test_runtime/test_properties.py
@@ -289,7 +289,11 @@ def test_properties_ro(ov_property_ro, expected_value):
         (
             hints.model_distribution_policy,
             "MODEL_DISTRIBUTION_POLICY",
-            (({hints.ModelDistributionPolicy.TENSOR_PARALLEL}, {hints.ModelDistributionPolicy.TENSOR_PARALLEL}),),
+            (
+                ({hints.ModelDistributionPolicy.TENSOR_PARALLEL}, {hints.ModelDistributionPolicy.TENSOR_PARALLEL}),
+                ({hints.ModelDistributionPolicy.NONE}, {hints.ModelDistributionPolicy.NONE}),
+                ({hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE}, {hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE}),
+            ),
         ),
         (
             hints.enable_hyper_threading,

From d18568e21b44b44f7e5423a7193122f22c365511 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Wed, 20 Mar 2024 23:35:44 +0800
Subject: [PATCH 26/32] update python code style

---
 src/bindings/python/tests/test_runtime/test_properties.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py
index efef8d52d15338..54d459cd971c81 100644
--- a/src/bindings/python/tests/test_runtime/test_properties.py
+++ b/src/bindings/python/tests/test_runtime/test_properties.py
@@ -292,7 +292,8 @@ def test_properties_ro(ov_property_ro, expected_value):
             (
                 ({hints.ModelDistributionPolicy.TENSOR_PARALLEL}, {hints.ModelDistributionPolicy.TENSOR_PARALLEL}),
                 ({hints.ModelDistributionPolicy.NONE}, {hints.ModelDistributionPolicy.NONE}),
-                ({hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE}, {hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE}),
+                ({hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE}, 
+                 {hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE}),
             ),
         ),
         (

From 988cb56d19f2f096131ba2a7895898c9e7723356 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Wed, 20 Mar 2024 23:41:54 +0800
Subject: [PATCH 27/32] update python code style

---
 src/bindings/python/tests/test_runtime/test_properties.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py
index 54d459cd971c81..fcffc2fb193295 100644
--- a/src/bindings/python/tests/test_runtime/test_properties.py
+++ b/src/bindings/python/tests/test_runtime/test_properties.py
@@ -292,7 +292,7 @@ def test_properties_ro(ov_property_ro, expected_value):
             (
                 ({hints.ModelDistributionPolicy.TENSOR_PARALLEL}, {hints.ModelDistributionPolicy.TENSOR_PARALLEL}),
                 ({hints.ModelDistributionPolicy.NONE}, {hints.ModelDistributionPolicy.NONE}),
-                ({hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE}, 
+                ({hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE},
                  {hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE}),
             ),
         ),

From e9d25905e6c18925dcfb84453293d5abe84ee668 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Thu, 21 Mar 2024 00:03:10 +0800
Subject: [PATCH 28/32] update code style

---
 src/plugins/intel_cpu/src/config.cpp | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index f7744738a31013..3e86f7e527868a 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -184,12 +184,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
                                val.as<std::string>(),
                                "for property key ",
                                ov::hint::scheduling_core_type.name(),
-                               ". Expected only ",
-                               ov::hint::SchedulingCoreType::ANY_CORE,
-                               '/',
-                               ov::hint::SchedulingCoreType::PCORE_ONLY,
-                               '/',
-                               ov::hint::SchedulingCoreType::ECORE_ONLY);
+                               ". Expected only ov::hint::SchedulingCoreType::ANY_CORE/PCORE_ONLY/ECORE_ONLY");
             }
         } else if (key == ov::hint::model_distribution_policy.name()) {
             auto error_info = [&]() {
@@ -197,11 +192,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
                                val.as<std::string>(),
                                "for property key ",
                                ov::hint::model_distribution_policy.name(),
-                               ". CPU plugin only support {",
-                               ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL,
-                               '}/{',
-                               ov::hint::ModelDistributionPolicy::NONE,
-                               '}');
+                               ". CPU plugin only support {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL/NONE}");
             };
 
             try {

From 9db450065db58262b23a7a29d9ecf200b8712e63 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Thu, 21 Mar 2024 16:20:11 +0800
Subject: [PATCH 29/32] update for comments

---
 .../pyopenvino/core/properties/properties.cpp |  1 -
 .../tests/test_runtime/test_properties.py     |  6 +----
 .../include/openvino/runtime/properties.hpp   | 24 +++++++++----------
 src/plugins/intel_cpu/src/config.cpp          |  3 +--
 src/plugins/intel_cpu/src/config.h            |  2 +-
 .../custom/behavior/ov_plugin/properties.cpp  | 10 ++------
 6 files changed, 16 insertions(+), 30 deletions(-)

diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
index 6310aac026e8c0..f1edeaa18ff1ef 100644
--- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
+++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
@@ -72,7 +72,6 @@ void regmodule_properties(py::module m) {
         .value("ECORE_ONLY", ov::hint::SchedulingCoreType::ECORE_ONLY);
 
     py::enum_<ov::hint::ModelDistributionPolicy>(m_hint, "ModelDistributionPolicy", py::arithmetic())
-        .value("NONE", ov::hint::ModelDistributionPolicy::NONE)
         .value("TENSOR_PARALLEL", ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL);
 
     py::enum_<ov::hint::ExecutionMode>(m_hint, "ExecutionMode", py::arithmetic())
diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py
index fcffc2fb193295..ffa3cabcf88b29 100644
--- a/src/bindings/python/tests/test_runtime/test_properties.py
+++ b/src/bindings/python/tests/test_runtime/test_properties.py
@@ -89,8 +89,7 @@ def test_properties_rw_base():
         (
             hints.ModelDistributionPolicy,
             (
-                (hints.ModelDistributionPolicy.NONE, "ModelDistributionPolicy.NONE", 0),
-                (hints.ModelDistributionPolicy.TENSOR_PARALLEL, "ModelDistributionPolicy.TENSOR_PARALLEL", 1),
+                (hints.ModelDistributionPolicy.TENSOR_PARALLEL, "ModelDistributionPolicy.TENSOR_PARALLEL", 0),
             ),
         ),
         (
@@ -291,9 +290,6 @@ def test_properties_ro(ov_property_ro, expected_value):
             "MODEL_DISTRIBUTION_POLICY",
             (
                 ({hints.ModelDistributionPolicy.TENSOR_PARALLEL}, {hints.ModelDistributionPolicy.TENSOR_PARALLEL}),
-                ({hints.ModelDistributionPolicy.NONE}, {hints.ModelDistributionPolicy.NONE}),
-                ({hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE},
-                 {hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE}),
             ),
         ),
         (
diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp
index debf176e7e3c63..654131d8a49217 100644
--- a/src/inference/include/openvino/runtime/properties.hpp
+++ b/src/inference/include/openvino/runtime/properties.hpp
@@ -400,15 +400,14 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type)
 static constexpr Property<SchedulingCoreType> scheduling_core_type{"SCHEDULING_CORE_TYPE"};
 
 enum class ModelDistributionPolicy {
-    NONE = 0,             // Run one model on single socket/device without parallelism.
-    TENSOR_PARALLEL = 1,  // Split one node or subgraph into parts and run one part per socket/device in parallel.
+    TENSOR_PARALLEL = 0,  // Split tensor into several parts and disribute them between sockets/devices during model
+                          // compilation. At inference time sockets/devices process tensors in parallel and do
+                          // syncronization at the end ensuring mathematical correctness.
 };
 
 /** @cond INTERNAL */
 inline std::ostream& operator<<(std::ostream& os, const ModelDistributionPolicy& stream_mode) {
     switch (stream_mode) {
-    case ModelDistributionPolicy::NONE:
-        return os << "NONE";
     case ModelDistributionPolicy::TENSOR_PARALLEL:
         return os << "TENSOR_PARALLEL";
     default:
@@ -419,9 +418,7 @@ inline std::ostream& operator<<(std::ostream& os, const ModelDistributionPolicy&
 inline std::istream& operator>>(std::istream& is, ModelDistributionPolicy& stream_mode) {
     std::string str;
     is >> str;
-    if (str == "NONE") {
-        stream_mode = ModelDistributionPolicy::NONE;
-    } else if (str == "TENSOR_PARALLEL") {
+    if (str == "TENSOR_PARALLEL") {
         stream_mode = ModelDistributionPolicy::TENSOR_PARALLEL;
     } else {
         OPENVINO_THROW("Unsupported model distribution policy: ", str);
@@ -434,15 +431,16 @@ inline std::istream& operator>>(std::istream& is, ModelDistributionPolicy& strea
  * @brief This property defines model distribution policy for inference with multiple sockets/devices.
  * @ingroup ov_runtime_cpp_prop_api
  *
- * Developer can use this property to select model distribution policy for CPU inference with multiple sockets
- * platform or GPU inference with multiple GPU devices.
- * -- TENSOR_PARALLEL : Split one node or subgraph into parts and run one part per socket/device in parallel.
- * -- NONE            : Run one model on single socket/device without parallelism.
+ * This property can be used to select model distribution policy between execution units (e.g. between CPU sockets/NUMA
+ * nodes or between different GPUs).
+ * -- TENSOR_PARALLEL : Split tensor into several parts and disribute them between sockets/devices during model
+ *                      compilation. At inference time sockets/devices process tensors in parallel and do syncronization
+ *                      at the end ensuring mathematical correctness.
  *
- * The following code is an example to split node into two parts run one part per socket on dual sockets platform.
+ * The following code is an example how TENSOR_PARALLEL model disrtibution policy might be enabled.
  *
  * @code
- * ie.set_property(ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::TENSOR_PARTITION}));
+ * ie.set_property(ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL}));
  * @endcode
  */
 static constexpr Property<std::set<ModelDistributionPolicy>> model_distribution_policy{"MODEL_DISTRIBUTION_POLICY"};
diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index 3e86f7e527868a..b235281c3fca3b 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -197,8 +197,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
 
             try {
                 for (auto& row : val.as<std::set<ov::hint::ModelDistributionPolicy>>()) {
-                    if ((row != ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL) &&
-                        (row != ov::hint::ModelDistributionPolicy::NONE)) {
+                    if ((row != ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL)) {
                         error_info();
                     }
                 }
diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h
index ef069865875fa1..10d7274dc66f7c 100644
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@@ -76,7 +76,7 @@ struct Config {
     bool enableCpuPinning = true;
     bool changedCpuPinning = false;
     ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE;
-    std::set<ov::hint::ModelDistributionPolicy> modelDistributionPolicy = {ov::hint::ModelDistributionPolicy::NONE};
+    std::set<ov::hint::ModelDistributionPolicy> modelDistributionPolicy = {};
     bool enableHyperThreading = true;
     bool changedHyperThreading = false;
     Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET;
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp
index f9bbd19e94b676..1b29347d6c0605 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp
@@ -110,20 +110,14 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigInferenceNumThreads) {
 
 TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigModelDistributionPolicy) {
     ov::Core ie;
-    std::set<ov::hint::ModelDistributionPolicy> value = {ov::hint::ModelDistributionPolicy::NONE};
+    std::set<ov::hint::ModelDistributionPolicy> value = {};
     std::set<ov::hint::ModelDistributionPolicy> model_policy = {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL};
 
     ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::model_distribution_policy(model_policy)));
     ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::model_distribution_policy));
     ASSERT_EQ(model_policy, value);
 
-    model_policy = {ov::hint::ModelDistributionPolicy::NONE};
-
-    ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::model_distribution_policy(model_policy)));
-    ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::model_distribution_policy));
-    ASSERT_EQ(model_policy, value);
-
-    model_policy = {ov::hint::ModelDistributionPolicy::NONE, ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL};
+    model_policy = {};
 
     ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::model_distribution_policy(model_policy)));
     ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::model_distribution_policy));

From c77970151c93453214cb412cc83eba61af9ae6f4 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Thu, 21 Mar 2024 16:28:19 +0800
Subject: [PATCH 30/32] update for typo

---
 src/bindings/python/tests/test_runtime/test_properties.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py
index ffa3cabcf88b29..d4ad725679a351 100644
--- a/src/bindings/python/tests/test_runtime/test_properties.py
+++ b/src/bindings/python/tests/test_runtime/test_properties.py
@@ -554,6 +554,7 @@ def test_single_property_setting(device):
             props.affinity: "NONE",
             "INFERENCE_PRECISION_HINT": Type.f32,
             hints.performance_mode: hints.PerformanceMode.LATENCY,
+            hints.scheduling_core_type: hints.SchedulingCoreType.PCORE_ONLY,
             hints.num_requests: 12,
             "NUM_STREAMS": streams.Num(5),
             "ENABLE_MMAP": False,

From 0ae8b3ec021f837644547e9116f607a36d28bb95 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Thu, 21 Mar 2024 16:59:53 +0800
Subject: [PATCH 31/32] remove value NONE for ModelDistributionPolicy

---
 src/plugins/intel_cpu/src/config.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index b235281c3fca3b..8567914415e459 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -192,7 +192,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
                                val.as<std::string>(),
                                "for property key ",
                                ov::hint::model_distribution_policy.name(),
-                               ". CPU plugin only support {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL/NONE}");
+                               ". CPU plugin only support {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL}");
             };
 
             try {

From 88d9929bb3c39874e6cd8c0ed4924416946c75c5 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Thu, 21 Mar 2024 22:49:07 +0800
Subject: [PATCH 32/32] fix typo

---
 src/inference/include/openvino/runtime/properties.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp
index 654131d8a49217..2ddd8702eb87fd 100644
--- a/src/inference/include/openvino/runtime/properties.hpp
+++ b/src/inference/include/openvino/runtime/properties.hpp
@@ -400,7 +400,7 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type)
 static constexpr Property<SchedulingCoreType> scheduling_core_type{"SCHEDULING_CORE_TYPE"};
 
 enum class ModelDistributionPolicy {
-    TENSOR_PARALLEL = 0,  // Split tensor into several parts and disribute them between sockets/devices during model
+    TENSOR_PARALLEL = 0,  // Split tensor into several parts and distribute them between sockets/devices during model
                           // compilation. At inference time sockets/devices process tensors in parallel and do
                           // syncronization at the end ensuring mathematical correctness.
 };
@@ -433,7 +433,7 @@ inline std::istream& operator>>(std::istream& is, ModelDistributionPolicy& strea
  *
  * This property can be used to select model distribution policy between execution units (e.g. between CPU sockets/NUMA
  * nodes or between different GPUs).
- * -- TENSOR_PARALLEL : Split tensor into several parts and disribute them between sockets/devices during model
+ * -- TENSOR_PARALLEL : Split tensor into several parts and distribute them between sockets/devices during model
  *                      compilation. At inference time sockets/devices process tensors in parallel and do syncronization
  *                      at the end ensuring mathematical correctness.
  *