openvinotoolkit · dmitry-gorokhov · Mar 22, 2024 · Feb 26, 2024 · Feb 26, 2024 · Feb 26, 2024
@@ -5,6 +5,7 @@
 # Enums
 from openvino._pyopenvino.properties.hint import Priority
 from openvino._pyopenvino.properties.hint import SchedulingCoreType
+from openvino._pyopenvino.properties.hint import ModelDistributionPolicy
 from openvino._pyopenvino.properties.hint import ExecutionMode
 from openvino._pyopenvino.properties.hint import PerformanceMode
 

@@ -5,6 +5,7 @@
 # Enums
 from openvino._pyopenvino.properties.hint import Priority
 from openvino._pyopenvino.properties.hint import SchedulingCoreType
+from openvino._pyopenvino.properties.hint import ModelDistributionPolicy
 from openvino._pyopenvino.properties.hint import ExecutionMode
 from openvino._pyopenvino.properties.hint import PerformanceMode
 
@@ -14,6 +15,7 @@
 from openvino._pyopenvino.properties.hint import performance_mode
 from openvino._pyopenvino.properties.hint import enable_cpu_pinning
 from openvino._pyopenvino.properties.hint import scheduling_core_type
+from openvino._pyopenvino.properties.hint import model_distribution_policy
 from openvino._pyopenvino.properties.hint import enable_hyper_threading
 from openvino._pyopenvino.properties.hint import execution_mode
 from openvino._pyopenvino.properties.hint import num_requests

@@ -71,6 +71,10 @@ void regmodule_properties(py::module m) {
         .value("PCORE_ONLY", ov::hint::SchedulingCoreType::PCORE_ONLY)
         .value("ECORE_ONLY", ov::hint::SchedulingCoreType::ECORE_ONLY);
 
+    py::enum_<ov::hint::ModelDistributionPolicy>(m_hint, "ModelDistributionPolicy", py::arithmetic())
+        .value("NONE", ov::hint::ModelDistributionPolicy::NONE)
+        .value("TENSOR_PARALLEL", ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL);
+
     py::enum_<ov::hint::ExecutionMode>(m_hint, "ExecutionMode", py::arithmetic())
         .value("PERFORMANCE", ov::hint::ExecutionMode::PERFORMANCE)
         .value("ACCURACY", ov::hint::ExecutionMode::ACCURACY);
@@ -81,6 +85,7 @@ void regmodule_properties(py::module m) {
     wrap_property_RW(m_hint, ov::hint::performance_mode, "performance_mode");
     wrap_property_RW(m_hint, ov::hint::enable_cpu_pinning, "enable_cpu_pinning");
     wrap_property_RW(m_hint, ov::hint::scheduling_core_type, "scheduling_core_type");
+    wrap_property_RW(m_hint, ov::hint::model_distribution_policy, "model_distribution_policy");
     wrap_property_RW(m_hint, ov::hint::enable_hyper_threading, "enable_hyper_threading");
     wrap_property_RW(m_hint, ov::hint::execution_mode, "execution_mode");
     wrap_property_RW(m_hint, ov::hint::num_requests, "num_requests");

@@ -7,6 +7,7 @@
 #include <pybind11/stl.h>
 
 #include <map>
+#include <set>
 #include <string>
 #include <tuple>
 #include <vector>
@@ -176,6 +177,8 @@ py::object from_ov_any(const ov::Any& any) {
         return py::cast(any.as<ov::intel_auto::SchedulePolicy>());
     } else if (any.is<ov::hint::SchedulingCoreType>()) {
         return py::cast(any.as<ov::hint::SchedulingCoreType>());
+    } else if (any.is<std::set<ov::hint::ModelDistributionPolicy>>()) {
+        return py::cast(any.as<std::set<ov::hint::ModelDistributionPolicy>>());
     } else if (any.is<ov::hint::ExecutionMode>()) {
         return py::cast(any.as<ov::hint::ExecutionMode>());
     } else if (any.is<ov::log::Level>()) {
@@ -375,6 +378,8 @@ ov::Any py_object_to_any(const py::object& py_obj) {
         return py::cast<ov::intel_auto::SchedulePolicy>(py_obj);
     } else if (py::isinstance<ov::hint::SchedulingCoreType>(py_obj)) {
         return py::cast<ov::hint::SchedulingCoreType>(py_obj);
+    } else if (py::isinstance<std::set<ov::hint::ModelDistributionPolicy>>(py_obj)) {
+        return py::cast<std::set<ov::hint::ModelDistributionPolicy>>(py_obj);
     } else if (py::isinstance<ov::hint::ExecutionMode>(py_obj)) {
         return py::cast<ov::hint::ExecutionMode>(py_obj);
     } else if (py::isinstance<ov::log::Level>(py_obj)) {

@@ -86,6 +86,13 @@ def test_properties_rw_base():
                 (hints.SchedulingCoreType.ECORE_ONLY, "SchedulingCoreType.ECORE_ONLY", 2),
             ),
         ),
+        (
+            hints.ModelDistributionPolicy,
+            (
+                (hints.ModelDistributionPolicy.NONE, "ModelDistributionPolicy.NONE", 0),
+                (hints.ModelDistributionPolicy.TENSOR_PARALLEL, "ModelDistributionPolicy.TENSOR_PARALLEL", 1),
+            ),
+        ),
         (
             hints.ExecutionMode,
             (
@@ -279,6 +286,16 @@ def test_properties_ro(ov_property_ro, expected_value):
             "SCHEDULING_CORE_TYPE",
             ((hints.SchedulingCoreType.PCORE_ONLY, hints.SchedulingCoreType.PCORE_ONLY),),
         ),
+        (
+            hints.model_distribution_policy,
+            "MODEL_DISTRIBUTION_POLICY",
+            (
+                ({hints.ModelDistributionPolicy.TENSOR_PARALLEL}, {hints.ModelDistributionPolicy.TENSOR_PARALLEL}),
+                ({hints.ModelDistributionPolicy.NONE}, {hints.ModelDistributionPolicy.NONE}),
+                ({hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE},
+                 {hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE}),
+            ),
+        ),
         (
             hints.enable_hyper_threading,
             "ENABLE_HYPER_THREADING",
@@ -541,7 +558,6 @@ def test_single_property_setting(device):
             props.affinity: "NONE",
             "INFERENCE_PRECISION_HINT": Type.f32,
             hints.performance_mode: hints.PerformanceMode.LATENCY,
-            hints.scheduling_core_type: hints.SchedulingCoreType.PCORE_ONLY,
             hints.num_requests: 12,
             "NUM_STREAMS": streams.Num(5),
             "ENABLE_MMAP": False,

@@ -399,6 +399,54 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type)
  */
 static constexpr Property<SchedulingCoreType> scheduling_core_type{"SCHEDULING_CORE_TYPE"};
 
+enum class ModelDistributionPolicy {
+    NONE = 0,             // Run one model on single socket/device without parallelism.
+    TENSOR_PARALLEL = 1,  // Split one node or subgraph into parts and run one part per socket/device in parallel.
+};
+
+/** @cond INTERNAL */
+inline std::ostream& operator<<(std::ostream& os, const ModelDistributionPolicy& stream_mode) {
+    switch (stream_mode) {
+    case ModelDistributionPolicy::NONE:
+        return os << "NONE";
+    case ModelDistributionPolicy::TENSOR_PARALLEL:
+        return os << "TENSOR_PARALLEL";
+    default:
+        OPENVINO_THROW("Unsupported model distribution policy!");
+    }
+}
+
+inline std::istream& operator>>(std::istream& is, ModelDistributionPolicy& stream_mode) {
+    std::string str;
+    is >> str;
+    if (str == "NONE") {
+        stream_mode = ModelDistributionPolicy::NONE;
+    } else if (str == "TENSOR_PARALLEL") {
+        stream_mode = ModelDistributionPolicy::TENSOR_PARALLEL;
+    } else {
+        OPENVINO_THROW("Unsupported model distribution policy: ", str);
+    }
+    return is;
+}
+/** @endcond */
+
+/**
+ * @brief This property defines model distribution policy for inference with multiple sockets/devices.
+ * @ingroup ov_runtime_cpp_prop_api
+ *
+ * Developer can use this property to select model distribution policy for CPU inference with multiple sockets
+ * platform or GPU inference with multiple GPU devices.
+ * -- TENSOR_PARALLEL : Split one node or subgraph into parts and run one part per socket/device in parallel.
+ * -- NONE            : Run one model on single socket/device without parallelism.
+ *
+ * The following code is an example to split node into two parts run one part per socket on dual sockets platform.
+ *
+ * @code
+ * ie.set_property(ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::TENSOR_PARTITION}));
+ * @endcode
+ */
+static constexpr Property<std::set<ModelDistributionPolicy>> model_distribution_policy{"MODEL_DISTRIBUTION_POLICY"};
+
 /**
  * @brief This property allows CPU pinning during inference.
  * @ingroup ov_runtime_cpp_prop_api

@@ -193,6 +193,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
             RO_property(ov::hint::num_requests.name()),
             RO_property(ov::hint::enable_cpu_pinning.name()),
             RO_property(ov::hint::scheduling_core_type.name()),
+            RO_property(ov::hint::model_distribution_policy.name()),
             RO_property(ov::hint::enable_hyper_threading.name()),
             RO_property(ov::execution_devices.name()),
             RO_property(ov::intel_cpu::denormals_optimization.name()),
@@ -246,8 +247,11 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
         const bool use_pin = config.enableCpuPinning;
         return decltype(ov::hint::enable_cpu_pinning)::value_type(use_pin);
     } else if (name == ov::hint::scheduling_core_type) {
-        const auto core_type = config.schedulingCoreType;
-        return core_type;
+        const auto stream_mode = config.schedulingCoreType;
+        return stream_mode;
+    } else if (name == ov::hint::model_distribution_policy) {
+        const auto distribution_policy = config.modelDistributionPolicy;
+        return distribution_policy;
     } else if (name == ov::hint::enable_hyper_threading.name()) {
         const bool use_ht = config.enableHyperThreading;
         return decltype(ov::hint::enable_hyper_threading)::value_type(use_ht);

@@ -184,12 +184,27 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
                                val.as<std::string>(),
                                "for property key ",
                                ov::hint::scheduling_core_type.name(),
-                               ". Expected only ",
-                               ov::hint::SchedulingCoreType::ANY_CORE,
-                               '/',
-                               ov::hint::SchedulingCoreType::PCORE_ONLY,
-                               '/',
-                               ov::hint::SchedulingCoreType::ECORE_ONLY);
+                               ". Expected only ov::hint::SchedulingCoreType::ANY_CORE/PCORE_ONLY/ECORE_ONLY");
+            }
+        } else if (key == ov::hint::model_distribution_policy.name()) {
+            auto error_info = [&]() {
+                OPENVINO_THROW("Wrong value ",
+                               val.as<std::string>(),
+                               "for property key ",
+                               ov::hint::model_distribution_policy.name(),
+                               ". CPU plugin only support {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL/NONE}");
+            };
+
+            try {
+                for (auto& row : val.as<std::set<ov::hint::ModelDistributionPolicy>>()) {
+                    if ((row != ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL) &&
+                        (row != ov::hint::ModelDistributionPolicy::NONE)) {
+                        error_info();
+                    }
+                }
+                modelDistributionPolicy = val.as<std::set<ov::hint::ModelDistributionPolicy>>();
+            } catch (ov::Exception&) {
+                error_info();
             }
         } else if (key == ov::hint::enable_hyper_threading.name()) {
             try {

@@ -76,6 +76,7 @@ struct Config {
     bool enableCpuPinning = true;
     bool changedCpuPinning = false;
     ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE;
+    std::set<ov::hint::ModelDistributionPolicy> modelDistributionPolicy = {ov::hint::ModelDistributionPolicy::NONE};
     bool enableHyperThreading = true;
     bool changedHyperThreading = false;
     Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET;

@@ -409,6 +409,9 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options)
     } else if (name == ov::hint::scheduling_core_type) {
         const auto core_type = engConfig.schedulingCoreType;
         return core_type;
+    } else if (name == ov::hint::model_distribution_policy) {
+        const auto distribution_policy = engConfig.modelDistributionPolicy;
+        return distribution_policy;
     } else if (name == ov::hint::enable_hyper_threading) {
         const bool ht_value = engConfig.enableHyperThreading;
         return decltype(ov::hint::enable_hyper_threading)::value_type(ht_value);
@@ -481,6 +484,7 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio
             RW_property(ov::hint::num_requests.name()),
             RW_property(ov::hint::enable_cpu_pinning.name()),
             RW_property(ov::hint::scheduling_core_type.name()),
+            RW_property(ov::hint::model_distribution_policy.name()),
             RW_property(ov::hint::enable_hyper_threading.name()),
             RW_property(ov::device::id.name()),
             RW_property(ov::intel_cpu::denormals_optimization.name()),

@@ -104,18 +104,11 @@ const std::vector<ov::AnyMap> testing_property_for_performance_mode = {
     {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)},
     {ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)}};
 
-const std::vector<ov::AnyMap> testing_property_for_scheduling_core_type_1 = {
+const std::vector<ov::AnyMap> testing_property_for_scheduling_core_type = {
     {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ANY_CORE)},
-    {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::PCORE_ONLY)}};
-
-const std::vector<ov::AnyMap> testing_property_for_scheduling_core_type_2 = {
     {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::PCORE_ONLY)},
     {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)}};
 
-const std::vector<ov::AnyMap> testing_property_for_scheduling_core_type_3 = {
-    {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ANY_CORE)},
-    {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)}};
-
 const std::vector<ov::AnyMap> testing_property_for_enable_hyper_threading = {{ov::hint::enable_hyper_threading(true)},
                                                                              {ov::hint::enable_hyper_threading(false)}};
 
@@ -128,9 +121,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ExportImportTest,
                                            ::testing::Values(testing_property_for_streams,
                                                              testing_property_for_threads,
                                                              testing_property_for_performance_mode,
-                                                             testing_property_for_scheduling_core_type_1,
-                                                             testing_property_for_scheduling_core_type_2,
-                                                             testing_property_for_scheduling_core_type_3,
+                                                             testing_property_for_scheduling_core_type,
                                                              testing_property_for_enable_hyper_threading,
                                                              testing_property_for_enable_cpu_pinning)));
 

@@ -33,6 +33,7 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkSupportedPropertiesAreAvailable
         RO_property(ov::hint::num_requests.name()),
         RO_property(ov::hint::enable_cpu_pinning.name()),
         RO_property(ov::hint::scheduling_core_type.name()),
+        RO_property(ov::hint::model_distribution_policy.name()),
         RO_property(ov::hint::enable_hyper_threading.name()),
         RO_property(ov::execution_devices.name()),
         RO_property(ov::intel_cpu::denormals_optimization.name()),

@@ -47,6 +47,7 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginAllSupportedPropertiesAreAvailable) {
         RW_property(ov::hint::num_requests.name()),
         RW_property(ov::hint::enable_cpu_pinning.name()),
         RW_property(ov::hint::scheduling_core_type.name()),
+        RW_property(ov::hint::model_distribution_policy.name()),
         RW_property(ov::hint::enable_hyper_threading.name()),
         RW_property(ov::device::id.name()),
         RW_property(ov::intel_cpu::denormals_optimization.name()),
@@ -107,6 +108,28 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigInferenceNumThreads) {
     ASSERT_EQ(num_threads, value);
 }
 
+TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigModelDistributionPolicy) {
+    ov::Core ie;
+    std::set<ov::hint::ModelDistributionPolicy> value = {ov::hint::ModelDistributionPolicy::NONE};
+    std::set<ov::hint::ModelDistributionPolicy> model_policy = {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL};
+
+    ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::model_distribution_policy(model_policy)));
+    ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::model_distribution_policy));
+    ASSERT_EQ(model_policy, value);
+
+    model_policy = {ov::hint::ModelDistributionPolicy::NONE};
+
+    ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::model_distribution_policy(model_policy)));
+    ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::model_distribution_policy));
+    ASSERT_EQ(model_policy, value);
+
+    model_policy = {ov::hint::ModelDistributionPolicy::NONE, ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL};
+
+    ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::model_distribution_policy(model_policy)));
+    ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::model_distribution_policy));
+    ASSERT_EQ(model_policy, value);
+}
+
 TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigStreamsNum) {
     ov::Core ie;
     int32_t value = 0;