Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable new property model_distribution_policy for CPU inference #23077

Merged
Merged
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
13319df
initial implementation
wangleis Feb 26, 2024
68bb894
update for test case
wangleis Feb 26, 2024
14c3f27
update for comments
wangleis Feb 26, 2024
1f6a0ca
Merge branch 'master' into property_max_threads_per_stream
wangleis Feb 26, 2024
5724b77
update for python
wangleis Feb 26, 2024
65c312b
update for python
wangleis Feb 26, 2024
d5f43a0
update for python
wangleis Feb 26, 2024
0b09543
update for python
wangleis Feb 26, 2024
d4ef1e0
Merge branch 'master' into property_max_threads_per_stream
wangleis Mar 1, 2024
fe5173d
change default value to PER_SOCKET
wangleis Mar 13, 2024
9a1cc28
Merge branch 'master' into property_max_threads_per_stream
wangleis Mar 13, 2024
19c1ed2
update property name and value
wangleis Mar 17, 2024
ba6d37f
Merge branch 'master' into property_max_threads_per_stream
wangleis Mar 17, 2024
562b01a
update code style
wangleis Mar 17, 2024
9c4a951
update property name and value
wangleis Mar 18, 2024
ad744b8
support combined properties
wangleis Mar 18, 2024
d456451
update code style
wangleis Mar 19, 2024
911a79e
update test case for combined properties
wangleis Mar 19, 2024
9f1189f
update test case for combined properties
wangleis Mar 19, 2024
6858272
update test case for combined properties
wangleis Mar 19, 2024
7d0af10
update for combined properties
wangleis Mar 19, 2024
f6d3bdd
Merge branch 'master' into property_max_threads_per_stream
wangleis Mar 19, 2024
03d09e8
remove CAPI interface
wangleis Mar 20, 2024
eef60ac
draft implementation for std::set value
wangleis Mar 20, 2024
77f30a9
update c++ implementation for std::set value
wangleis Mar 20, 2024
5269cac
update c++ implementation for std::set value
wangleis Mar 20, 2024
018eabb
update c++ implementation for std::set value
wangleis Mar 20, 2024
922554c
remove unused function
wangleis Mar 20, 2024
828e583
update python
wangleis Mar 20, 2024
73ce757
update python test case
wangleis Mar 20, 2024
d18568e
update python code style
wangleis Mar 20, 2024
988cb56
update python code style
wangleis Mar 20, 2024
e9d2590
update code style
wangleis Mar 20, 2024
9db4500
update for comments
wangleis Mar 21, 2024
c779701
update for typo
wangleis Mar 21, 2024
0ae8b3e
remove value NONE for ModelDistributionPolicy
wangleis Mar 21, 2024
88d9929
fix typo
wangleis Mar 21, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Enums
from openvino._pyopenvino.properties.hint import Priority
from openvino._pyopenvino.properties.hint import SchedulingCoreType
from openvino._pyopenvino.properties.hint import ModelDistributionPolicy
from openvino._pyopenvino.properties.hint import ExecutionMode
from openvino._pyopenvino.properties.hint import PerformanceMode

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Enums
from openvino._pyopenvino.properties.hint import Priority
from openvino._pyopenvino.properties.hint import SchedulingCoreType
from openvino._pyopenvino.properties.hint import ModelDistributionPolicy
from openvino._pyopenvino.properties.hint import ExecutionMode
from openvino._pyopenvino.properties.hint import PerformanceMode

Expand All @@ -14,6 +15,7 @@
from openvino._pyopenvino.properties.hint import performance_mode
from openvino._pyopenvino.properties.hint import enable_cpu_pinning
from openvino._pyopenvino.properties.hint import scheduling_core_type
from openvino._pyopenvino.properties.hint import model_distribution_policy
from openvino._pyopenvino.properties.hint import enable_hyper_threading
from openvino._pyopenvino.properties.hint import execution_mode
from openvino._pyopenvino.properties.hint import num_requests
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ void regmodule_properties(py::module m) {
.value("PCORE_ONLY", ov::hint::SchedulingCoreType::PCORE_ONLY)
.value("ECORE_ONLY", ov::hint::SchedulingCoreType::ECORE_ONLY);

py::enum_<ov::hint::ModelDistributionPolicy>(m_hint, "ModelDistributionPolicy", py::arithmetic())
.value("NONE", ov::hint::ModelDistributionPolicy::NONE)
.value("TENSOR_PARALLEL", ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL);

py::enum_<ov::hint::ExecutionMode>(m_hint, "ExecutionMode", py::arithmetic())
.value("PERFORMANCE", ov::hint::ExecutionMode::PERFORMANCE)
.value("ACCURACY", ov::hint::ExecutionMode::ACCURACY);
Expand All @@ -81,6 +85,7 @@ void regmodule_properties(py::module m) {
wrap_property_RW(m_hint, ov::hint::performance_mode, "performance_mode");
wrap_property_RW(m_hint, ov::hint::enable_cpu_pinning, "enable_cpu_pinning");
wrap_property_RW(m_hint, ov::hint::scheduling_core_type, "scheduling_core_type");
wrap_property_RW(m_hint, ov::hint::model_distribution_policy, "model_distribution_policy");
wrap_property_RW(m_hint, ov::hint::enable_hyper_threading, "enable_hyper_threading");
wrap_property_RW(m_hint, ov::hint::execution_mode, "execution_mode");
wrap_property_RW(m_hint, ov::hint::num_requests, "num_requests");
Expand Down
5 changes: 5 additions & 0 deletions src/bindings/python/src/pyopenvino/utils/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <pybind11/stl.h>

#include <map>
#include <set>
#include <string>
#include <tuple>
#include <vector>
Expand Down Expand Up @@ -176,6 +177,8 @@ py::object from_ov_any(const ov::Any& any) {
return py::cast(any.as<ov::intel_auto::SchedulePolicy>());
} else if (any.is<ov::hint::SchedulingCoreType>()) {
return py::cast(any.as<ov::hint::SchedulingCoreType>());
} else if (any.is<std::set<ov::hint::ModelDistributionPolicy>>()) {
return py::cast(any.as<std::set<ov::hint::ModelDistributionPolicy>>());
} else if (any.is<ov::hint::ExecutionMode>()) {
return py::cast(any.as<ov::hint::ExecutionMode>());
} else if (any.is<ov::log::Level>()) {
Expand Down Expand Up @@ -375,6 +378,8 @@ ov::Any py_object_to_any(const py::object& py_obj) {
return py::cast<ov::intel_auto::SchedulePolicy>(py_obj);
} else if (py::isinstance<ov::hint::SchedulingCoreType>(py_obj)) {
return py::cast<ov::hint::SchedulingCoreType>(py_obj);
} else if (py::isinstance<std::set<ov::hint::ModelDistributionPolicy>>(py_obj)) {
return py::cast<std::set<ov::hint::ModelDistributionPolicy>>(py_obj);
} else if (py::isinstance<ov::hint::ExecutionMode>(py_obj)) {
return py::cast<ov::hint::ExecutionMode>(py_obj);
} else if (py::isinstance<ov::log::Level>(py_obj)) {
Expand Down
18 changes: 17 additions & 1 deletion src/bindings/python/tests/test_runtime/test_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,13 @@ def test_properties_rw_base():
(hints.SchedulingCoreType.ECORE_ONLY, "SchedulingCoreType.ECORE_ONLY", 2),
),
),
(
hints.ModelDistributionPolicy,
(
(hints.ModelDistributionPolicy.NONE, "ModelDistributionPolicy.NONE", 0),
(hints.ModelDistributionPolicy.TENSOR_PARALLEL, "ModelDistributionPolicy.TENSOR_PARALLEL", 1),
),
),
(
hints.ExecutionMode,
(
Expand Down Expand Up @@ -279,6 +286,16 @@ def test_properties_ro(ov_property_ro, expected_value):
"SCHEDULING_CORE_TYPE",
((hints.SchedulingCoreType.PCORE_ONLY, hints.SchedulingCoreType.PCORE_ONLY),),
),
(
hints.model_distribution_policy,
"MODEL_DISTRIBUTION_POLICY",
(
({hints.ModelDistributionPolicy.TENSOR_PARALLEL}, {hints.ModelDistributionPolicy.TENSOR_PARALLEL}),
({hints.ModelDistributionPolicy.NONE}, {hints.ModelDistributionPolicy.NONE}),
({hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE},
{hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE}),
),
),
(
hints.enable_hyper_threading,
"ENABLE_HYPER_THREADING",
Expand Down Expand Up @@ -541,7 +558,6 @@ def test_single_property_setting(device):
props.affinity: "NONE",
"INFERENCE_PRECISION_HINT": Type.f32,
hints.performance_mode: hints.PerformanceMode.LATENCY,
hints.scheduling_core_type: hints.SchedulingCoreType.PCORE_ONLY,
hints.num_requests: 12,
"NUM_STREAMS": streams.Num(5),
"ENABLE_MMAP": False,
Expand Down
48 changes: 48 additions & 0 deletions src/inference/include/openvino/runtime/properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,54 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type)
*/
static constexpr Property<SchedulingCoreType> scheduling_core_type{"SCHEDULING_CORE_TYPE"};

enum class ModelDistributionPolicy {
NONE = 0, // Run one model on single socket/device without parallelism.
TENSOR_PARALLEL = 1, // Split one node or subgraph into parts and run one part per socket/device in parallel.
};

/** @cond INTERNAL */
inline std::ostream& operator<<(std::ostream& os, const ModelDistributionPolicy& stream_mode) {
switch (stream_mode) {
case ModelDistributionPolicy::NONE:
return os << "NONE";
case ModelDistributionPolicy::TENSOR_PARALLEL:
return os << "TENSOR_PARALLEL";
default:
OPENVINO_THROW("Unsupported model distribution policy!");
}
}

inline std::istream& operator>>(std::istream& is, ModelDistributionPolicy& stream_mode) {
std::string str;
is >> str;
if (str == "NONE") {
stream_mode = ModelDistributionPolicy::NONE;
} else if (str == "TENSOR_PARALLEL") {
stream_mode = ModelDistributionPolicy::TENSOR_PARALLEL;
} else {
OPENVINO_THROW("Unsupported model distribution policy: ", str);
}
return is;
}
/** @endcond */

/**
* @brief This property defines model distribution policy for inference with multiple sockets/devices.
* @ingroup ov_runtime_cpp_prop_api
*
* Developer can use this property to select model distribution policy for CPU inference with multiple sockets
dmitry-gorokhov marked this conversation as resolved.
Show resolved Hide resolved
* platform or GPU inference with multiple GPU devices.
* -- TENSOR_PARALLEL : Split one node or subgraph into parts and run one part per socket/device in parallel.
dmitry-gorokhov marked this conversation as resolved.
Show resolved Hide resolved
* -- NONE : Run one model on single socket/device without parallelism.
*
* The following code is an example to split node into two parts run one part per socket on dual sockets platform.
dmitry-gorokhov marked this conversation as resolved.
Show resolved Hide resolved
*
* @code
* ie.set_property(ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::TENSOR_PARTITION}));
dmitry-gorokhov marked this conversation as resolved.
Show resolved Hide resolved
* @endcode
*/
static constexpr Property<std::set<ModelDistributionPolicy>> model_distribution_policy{"MODEL_DISTRIBUTION_POLICY"};
dmitry-gorokhov marked this conversation as resolved.
Show resolved Hide resolved

/**
* @brief This property allows CPU pinning during inference.
* @ingroup ov_runtime_cpp_prop_api
Expand Down
8 changes: 6 additions & 2 deletions src/plugins/intel_cpu/src/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
RO_property(ov::hint::num_requests.name()),
RO_property(ov::hint::enable_cpu_pinning.name()),
RO_property(ov::hint::scheduling_core_type.name()),
RO_property(ov::hint::model_distribution_policy.name()),
RO_property(ov::hint::enable_hyper_threading.name()),
RO_property(ov::execution_devices.name()),
RO_property(ov::intel_cpu::denormals_optimization.name()),
Expand Down Expand Up @@ -246,8 +247,11 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
const bool use_pin = config.enableCpuPinning;
return decltype(ov::hint::enable_cpu_pinning)::value_type(use_pin);
} else if (name == ov::hint::scheduling_core_type) {
const auto core_type = config.schedulingCoreType;
return core_type;
const auto stream_mode = config.schedulingCoreType;
return stream_mode;
} else if (name == ov::hint::model_distribution_policy) {
const auto distribution_policy = config.modelDistributionPolicy;
return distribution_policy;
} else if (name == ov::hint::enable_hyper_threading.name()) {
const bool use_ht = config.enableHyperThreading;
return decltype(ov::hint::enable_hyper_threading)::value_type(use_ht);
Expand Down
27 changes: 21 additions & 6 deletions src/plugins/intel_cpu/src/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,12 +184,27 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
val.as<std::string>(),
"for property key ",
ov::hint::scheduling_core_type.name(),
". Expected only ",
ov::hint::SchedulingCoreType::ANY_CORE,
'/',
ov::hint::SchedulingCoreType::PCORE_ONLY,
'/',
ov::hint::SchedulingCoreType::ECORE_ONLY);
". Expected only ov::hint::SchedulingCoreType::ANY_CORE/PCORE_ONLY/ECORE_ONLY");
}
} else if (key == ov::hint::model_distribution_policy.name()) {
auto error_info = [&]() {
OPENVINO_THROW("Wrong value ",
val.as<std::string>(),
"for property key ",
ov::hint::model_distribution_policy.name(),
". CPU plugin only support {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL/NONE}");
};

try {
for (auto& row : val.as<std::set<ov::hint::ModelDistributionPolicy>>()) {
if ((row != ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL) &&
(row != ov::hint::ModelDistributionPolicy::NONE)) {
error_info();
}
}
modelDistributionPolicy = val.as<std::set<ov::hint::ModelDistributionPolicy>>();
} catch (ov::Exception&) {
error_info();
}
} else if (key == ov::hint::enable_hyper_threading.name()) {
try {
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_cpu/src/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ struct Config {
bool enableCpuPinning = true;
bool changedCpuPinning = false;
ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE;
std::set<ov::hint::ModelDistributionPolicy> modelDistributionPolicy = {ov::hint::ModelDistributionPolicy::NONE};
bool enableHyperThreading = true;
bool changedHyperThreading = false;
Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET;
Expand Down
4 changes: 4 additions & 0 deletions src/plugins/intel_cpu/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,9 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options)
} else if (name == ov::hint::scheduling_core_type) {
const auto core_type = engConfig.schedulingCoreType;
return core_type;
} else if (name == ov::hint::model_distribution_policy) {
const auto distribution_policy = engConfig.modelDistributionPolicy;
return distribution_policy;
} else if (name == ov::hint::enable_hyper_threading) {
const bool ht_value = engConfig.enableHyperThreading;
return decltype(ov::hint::enable_hyper_threading)::value_type(ht_value);
Expand Down Expand Up @@ -481,6 +484,7 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio
RW_property(ov::hint::num_requests.name()),
RW_property(ov::hint::enable_cpu_pinning.name()),
RW_property(ov::hint::scheduling_core_type.name()),
RW_property(ov::hint::model_distribution_policy.name()),
RW_property(ov::hint::enable_hyper_threading.name()),
RW_property(ov::device::id.name()),
RW_property(ov::intel_cpu::denormals_optimization.name()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,18 +104,11 @@ const std::vector<ov::AnyMap> testing_property_for_performance_mode = {
{ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)},
{ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)}};

const std::vector<ov::AnyMap> testing_property_for_scheduling_core_type_1 = {
const std::vector<ov::AnyMap> testing_property_for_scheduling_core_type = {
{ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ANY_CORE)},
{ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::PCORE_ONLY)}};

const std::vector<ov::AnyMap> testing_property_for_scheduling_core_type_2 = {
{ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::PCORE_ONLY)},
{ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)}};

const std::vector<ov::AnyMap> testing_property_for_scheduling_core_type_3 = {
{ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ANY_CORE)},
{ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)}};

const std::vector<ov::AnyMap> testing_property_for_enable_hyper_threading = {{ov::hint::enable_hyper_threading(true)},
{ov::hint::enable_hyper_threading(false)}};

Expand All @@ -128,9 +121,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ExportImportTest,
::testing::Values(testing_property_for_streams,
testing_property_for_threads,
testing_property_for_performance_mode,
testing_property_for_scheduling_core_type_1,
testing_property_for_scheduling_core_type_2,
testing_property_for_scheduling_core_type_3,
testing_property_for_scheduling_core_type,
testing_property_for_enable_hyper_threading,
testing_property_for_enable_cpu_pinning)));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkSupportedPropertiesAreAvailable
RO_property(ov::hint::num_requests.name()),
RO_property(ov::hint::enable_cpu_pinning.name()),
RO_property(ov::hint::scheduling_core_type.name()),
RO_property(ov::hint::model_distribution_policy.name()),
RO_property(ov::hint::enable_hyper_threading.name()),
RO_property(ov::execution_devices.name()),
RO_property(ov::intel_cpu::denormals_optimization.name()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginAllSupportedPropertiesAreAvailable) {
RW_property(ov::hint::num_requests.name()),
RW_property(ov::hint::enable_cpu_pinning.name()),
RW_property(ov::hint::scheduling_core_type.name()),
RW_property(ov::hint::model_distribution_policy.name()),
RW_property(ov::hint::enable_hyper_threading.name()),
RW_property(ov::device::id.name()),
RW_property(ov::intel_cpu::denormals_optimization.name()),
Expand Down Expand Up @@ -107,6 +108,28 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigInferenceNumThreads) {
ASSERT_EQ(num_threads, value);
}

TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigModelDistributionPolicy) {
ov::Core ie;
std::set<ov::hint::ModelDistributionPolicy> value = {ov::hint::ModelDistributionPolicy::NONE};
std::set<ov::hint::ModelDistributionPolicy> model_policy = {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL};

ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::model_distribution_policy(model_policy)));
ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::model_distribution_policy));
ASSERT_EQ(model_policy, value);

model_policy = {ov::hint::ModelDistributionPolicy::NONE};

ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::model_distribution_policy(model_policy)));
ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::model_distribution_policy));
ASSERT_EQ(model_policy, value);

model_policy = {ov::hint::ModelDistributionPolicy::NONE, ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL};

ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::model_distribution_policy(model_policy)));
ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::model_distribution_policy));
ASSERT_EQ(model_policy, value);
}

TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigStreamsNum) {
ov::Core ie;
int32_t value = 0;
Expand Down
Loading