From 13319df7236cc820c6cdc0b390978acfc0476771 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Mon, 26 Feb 2024 15:12:37 +0800 Subject: [PATCH 01/32] initial implementation --- src/bindings/c/docs/api_overview.md | 2 + .../c/include/openvino/c/ov_property.h | 7 +++ src/bindings/c/src/ov_property.cpp | 1 + src/bindings/c/tests/ov_core_test.cpp | 8 +++ .../runtime/properties/hint/__init__.py | 1 + .../pyopenvino/core/properties/properties.cpp | 1 + .../tests/test_runtime/test_properties.py | 6 ++- .../include/openvino/runtime/properties.hpp | 51 +++++++++++++++++++ src/plugins/intel_cpu/src/compiled_model.cpp | 6 ++- src/plugins/intel_cpu/src/config.cpp | 15 ++++++ src/plugins/intel_cpu/src/config.h | 1 + src/plugins/intel_cpu/src/plugin.cpp | 4 ++ .../custom/behavior/export_import.cpp | 15 ++++++ .../ov_executable_network/properties.cpp | 1 + .../custom/behavior/ov_plugin/properties.cpp | 1 + .../behavior/ov_plugin/properties_tests.cpp | 9 ++++ 16 files changed, 127 insertions(+), 2 deletions(-) diff --git a/src/bindings/c/docs/api_overview.md b/src/bindings/c/docs/api_overview.md index 6ca2ad403c1a7e..8c7debee7e3bd1 100644 --- a/src/bindings/c/docs/api_overview.md +++ b/src/bindings/c/docs/api_overview.md @@ -309,6 +309,8 @@ OPENVINO_C_VAR(const char*) ov_property_key_affinity; OPENVINO_C_VAR(const char*) ov_property_key_inference_num_threads; +OPENVINO_C_VAR(const char*) ov_property_key_hint_max_threads_per_stream; + OPENVINO_C_VAR(const char*) ov_property_key_hint_enable_cpu_pinning; OPENVINO_C_VAR(const char*) ov_property_key_hint_enable_hyper_threading; diff --git a/src/bindings/c/include/openvino/c/ov_property.h b/src/bindings/c/include/openvino/c/ov_property.h index dbefcbb366a0e5..f3165d242f22fc 100644 --- a/src/bindings/c/include/openvino/c/ov_property.h +++ b/src/bindings/c/include/openvino/c/ov_property.h @@ -128,6 +128,13 @@ ov_property_key_affinity; OPENVINO_C_VAR(const char*) ov_property_key_inference_num_threads; +/** + * @brief Read-write property to set/get the maximum number of threads per stream of CPU inference. + * @ingroup ov_property_c_api + */ +OPENVINO_C_VAR(const char*) +ov_property_key_hint_max_threads_per_stream; + /** * @brief Read-write property, it is high-level OpenVINO hint for using CPU pinning to bind CPU threads to processors * during inference diff --git a/src/bindings/c/src/ov_property.cpp b/src/bindings/c/src/ov_property.cpp index 8a083e2afd8c41..ffb6d9f90e105d 100644 --- a/src/bindings/c/src/ov_property.cpp +++ b/src/bindings/c/src/ov_property.cpp @@ -23,6 +23,7 @@ const char* ov_property_key_cache_mode = "CACHE_MODE"; const char* ov_property_key_num_streams = "NUM_STREAMS"; const char* ov_property_key_affinity = "AFFINITY"; const char* ov_property_key_inference_num_threads = "INFERENCE_NUM_THREADS"; +const char* ov_property_key_hint_max_threads_per_stream = "MAX_THREADS_PER_STREAM"; const char* ov_property_key_hint_performance_mode = "PERFORMANCE_HINT"; const char* ov_property_key_hint_enable_cpu_pinning = "ENABLE_CPU_PINNING"; const char* ov_property_key_hint_scheduling_core_type = "SCHEDULING_CORE_TYPE"; diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp index 69762a901d8f69..8b5d5845475512 100644 --- a/src/bindings/c/tests/ov_core_test.cpp +++ b/src/bindings/c/tests/ov_core_test.cpp @@ -298,6 +298,14 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) { EXPECT_STREQ(val_type, ret); ov_free(ret); + const char* key_type = ov_property_key_hint_max_threads_per_stream; + const char* val_type = "PER_PLATFORM"; + OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_type, val_type)); + ret = nullptr; + OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_type, &ret)); + EXPECT_STREQ(val_type, ret); + ov_free(ret); + OV_EXPECT_NOT_OK(ov_core_set_property(core, device_name.c_str(), key_type, invalid_val)); ret = nullptr; OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_type, &ret)); diff --git a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py index cce898891e4af3..b32b51ce6482b0 100644 --- a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py +++ b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py @@ -14,6 +14,7 @@ from openvino._pyopenvino.properties.hint import performance_mode from openvino._pyopenvino.properties.hint import enable_cpu_pinning from openvino._pyopenvino.properties.hint import scheduling_core_type +from openvino._pyopenvino.properties.hint import max_threads_per_stream from openvino._pyopenvino.properties.hint import enable_hyper_threading from openvino._pyopenvino.properties.hint import execution_mode from openvino._pyopenvino.properties.hint import num_requests diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp index 6ed59721c59d88..abae0d3dac248f 100644 --- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp +++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp @@ -81,6 +81,7 @@ void regmodule_properties(py::module m) { wrap_property_RW(m_hint, ov::hint::performance_mode, "performance_mode"); wrap_property_RW(m_hint, ov::hint::enable_cpu_pinning, "enable_cpu_pinning"); wrap_property_RW(m_hint, ov::hint::scheduling_core_type, "scheduling_core_type"); + wrap_property_RW(m_hint, ov::hint::max_threads_per_stream, "max_threads_per_stream"); wrap_property_RW(m_hint, ov::hint::enable_hyper_threading, "enable_hyper_threading"); wrap_property_RW(m_hint, ov::hint::execution_mode, "execution_mode"); wrap_property_RW(m_hint, ov::hint::num_requests, "num_requests"); diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py index 64a47a1ceed8fb..ebfd9f6c8dedc7 100644 --- a/src/bindings/python/tests/test_runtime/test_properties.py +++ b/src/bindings/python/tests/test_runtime/test_properties.py @@ -279,6 +279,11 @@ def test_properties_ro(ov_property_ro, expected_value): "SCHEDULING_CORE_TYPE", ((hints.SchedulingCoreType.PCORE_ONLY, hints.SchedulingCoreType.PCORE_ONLY),), ), + ( + hints.max_threads_per_stream, + "MAX_THREADS_PER_STREAM", + ((hints.MaxThreadsPerStream.PER_PLATFORM, hints.MaxThreadsPerStream.PER_PLATFORM),), + ), ( hints.enable_hyper_threading, "ENABLE_HYPER_THREADING", @@ -541,7 +546,6 @@ def test_single_property_setting(device): props.affinity: "NONE", "INFERENCE_PRECISION_HINT": Type.f32, hints.performance_mode: hints.PerformanceMode.LATENCY, - hints.scheduling_core_type: hints.SchedulingCoreType.PCORE_ONLY, hints.num_requests: 12, "NUM_STREAMS": streams.Num(5), "ENABLE_MMAP": "NO", diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index 9dc28cab1b01cc..b8496a0cfae093 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -383,6 +383,42 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type) } /** @endcond */ +enum class MaxThreadsPerStream { + AUTO, //!< Using all threads per platform for one stream. Will create sub stream on dual socket platform. + PER_PLATFORM, //!< Using all threads per platform for one stream even on dual socket platform. + PER_SOCKET, //!< Using all threads per socket for one stream on dual socket platform. +}; + +/** @cond INTERNAL */ +inline std::ostream& operator<<(std::ostream& os, const MaxThreadsPerStream& stream_mode) { + switch (stream_mode) { + case MaxThreadsPerStream::AUTO: + return os << "AUTO"; + case MaxThreadsPerStream::PER_PLATFORM: + return os << "PER_PLATFORM"; + case MaxThreadsPerStream::PER_SOCKET: + return os << "PER_SOCKET"; + default: + OPENVINO_THROW("Unsupported mode!"); + } +} + +inline std::istream& operator>>(std::istream& is, MaxThreadsPerStream& stream_mode) { + std::string str; + is >> str; + if (str == "AUTO") { + stream_mode = MaxThreadsPerStream::AUTO; + } else if (str == "PER_PLATFORM") { + stream_mode = MaxThreadsPerStream::PER_PLATFORM; + } else if (str == "PER_SOCKET") { + stream_mode = MaxThreadsPerStream::PER_SOCKET; + } else { + OPENVINO_THROW("Unsupported mode: ", str); + } + return is; +} +/** @endcond */ + /** * @brief This property defines CPU core type which can be used during inference. * @ingroup ov_runtime_cpp_prop_api @@ -399,6 +435,21 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type) */ static constexpr Property scheduling_core_type{"SCHEDULING_CORE_TYPE"}; +/** + * @brief This property defines max threads per stream used for CPU inference. + * @ingroup ov_runtime_cpp_prop_api + * + * Developer can use this property to select max threads per stream for CPU inference. Please refer MaxThreadsPerStream + * for all definition of types. + * + * The following code is an example to only use all threads per socket for one stream on dual sockets platform. + * + * @code + * ie.set_property(ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET)); + * @endcode + */ +static constexpr Property max_threads_per_stream{"MAX_THREADS_PER_STREAM"}; + /** * @brief This property allows CPU pinning during inference. * @ingroup ov_runtime_cpp_prop_api diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp index 85e84c22afaf2b..377294a7ae3577 100644 --- a/src/plugins/intel_cpu/src/compiled_model.cpp +++ b/src/plugins/intel_cpu/src/compiled_model.cpp @@ -193,6 +193,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const { RO_property(ov::hint::num_requests.name()), RO_property(ov::hint::enable_cpu_pinning.name()), RO_property(ov::hint::scheduling_core_type.name()), + RO_property(ov::hint::max_threads_per_stream.name()), RO_property(ov::hint::enable_hyper_threading.name()), RO_property(ov::execution_devices.name()), RO_property(ov::intel_cpu::denormals_optimization.name()), @@ -246,7 +247,10 @@ ov::Any CompiledModel::get_property(const std::string& name) const { const bool use_pin = config.enableCpuPinning; return decltype(ov::hint::enable_cpu_pinning)::value_type(use_pin); } else if (name == ov::hint::scheduling_core_type) { - const auto core_type = config.schedulingCoreType; + const auto stream_mode = config.schedulingCoreType; + return stream_mode; + } else if (name == ov::hint::max_threads_per_stream) { + const auto core_type = config.maxThreadsPerStream; return core_type; } else if (name == ov::hint::enable_hyper_threading.name()) { const bool use_ht = config.enableHyperThreading; diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index 8a8c5cca6a771a..94e98e32f642bb 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -191,6 +191,21 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { '/', ov::hint::SchedulingCoreType::ECORE_ONLY); } + } else if (key == ov::hint::max_threads_per_stream.name()) { + try { + maxThreadsPerStream = val.as(); + } catch (ov::Exception&) { + OPENVINO_THROW("Wrong value ", + val.as(), + "for property key ", + ov::hint::max_threads_per_stream.name(), + ". Expected only ", + ov::hint::MaxThreadsPerStream::AUTO, + '/', + ov::hint::MaxThreadsPerStream::PER_PLATFORM, + '/', + ov::hint::MaxThreadsPerStream::PER_SOCKET); + } } else if (key == ov::hint::enable_hyper_threading.name()) { try { enableHyperThreading = val.as(); diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index 32faf152e017be..8a97682507785f 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -76,6 +76,7 @@ struct Config { bool enableCpuPinning = true; bool changedCpuPinning = false; ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE; + ov::hint::MaxThreadsPerStream maxThreadsPerStream = ov::hint::MaxThreadsPerStream::AUTO; bool enableHyperThreading = true; bool changedHyperThreading = false; Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET; diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index ad48ac4b9f4e98..61ef796ab6fed8 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -410,6 +410,9 @@ ov::Any Engine::get_property(const std::string& name, const ov::AnyMap& options) } else if (name == ov::hint::scheduling_core_type) { const auto core_type = engConfig.schedulingCoreType; return core_type; + } else if (name == ov::hint::max_threads_per_stream) { + const auto stream_mode = engConfig.maxThreadsPerStream; + return stream_mode; } else if (name == ov::hint::enable_hyper_threading) { const bool ht_value = engConfig.enableHyperThreading; return decltype(ov::hint::enable_hyper_threading)::value_type(ht_value); @@ -479,6 +482,7 @@ ov::Any Engine::get_ro_property(const std::string& name, const ov::AnyMap& optio RW_property(ov::hint::num_requests.name()), RW_property(ov::hint::enable_cpu_pinning.name()), RW_property(ov::hint::scheduling_core_type.name()), + RW_property(ov::hint::max_threads_per_stream.name()), RW_property(ov::hint::enable_hyper_threading.name()), RW_property(ov::device::id.name()), RW_property(ov::intel_cpu::denormals_optimization.name()), diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp index 17ef4ac956d94d..29a5194cf22a8f 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp @@ -116,6 +116,18 @@ const std::vector testing_property_for_scheduling_core_type_3 = { {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ANY_CORE)}, {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)}}; +const std::vector testing_property_for_max_threads_per_stream_1 = { + {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::AUTO)}, + {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_PLATFORM)}}; + +const std::vector testing_property_for_max_threads_per_stream_2 = { + {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_PLATFORM)}, + {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET)}}; + +const std::vector testing_property_for_max_threads_per_stream = { + {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::AUTO)}, + {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET)}}; + const std::vector testing_property_for_enable_hyper_threading = {{ov::hint::enable_hyper_threading(true)}, {ov::hint::enable_hyper_threading(false)}}; @@ -131,6 +143,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_ExportImportTest, testing_property_for_scheduling_core_type_1, testing_property_for_scheduling_core_type_2, testing_property_for_scheduling_core_type_3, + testing_property_for_max_threads_per_stream_1, + testing_property_for_max_threads_per_stream_2, + testing_property_for_max_threads_per_stream_3, testing_property_for_enable_hyper_threading, testing_property_for_enable_cpu_pinning))); diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp index d0ee8a889414cd..af054b1468bcda 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp @@ -33,6 +33,7 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkSupportedPropertiesAreAvailable RO_property(ov::hint::num_requests.name()), RO_property(ov::hint::enable_cpu_pinning.name()), RO_property(ov::hint::scheduling_core_type.name()), + RO_property(ov::hint::max_threads_per_stream.name()), RO_property(ov::hint::enable_hyper_threading.name()), RO_property(ov::execution_devices.name()), RO_property(ov::intel_cpu::denormals_optimization.name()), diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp index 0d373252eddafd..4088efc4c7110d 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp @@ -47,6 +47,7 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginAllSupportedPropertiesAreAvailable) { RW_property(ov::hint::num_requests.name()), RW_property(ov::hint::enable_cpu_pinning.name()), RW_property(ov::hint::scheduling_core_type.name()), + RW_property(ov::hint::max_threads_per_stream.name()), RW_property(ov::hint::enable_hyper_threading.name()), RW_property(ov::device::id.name()), RW_property(ov::intel_cpu::denormals_optimization.name()), diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp index 0329245b55caba..5b59900d37c705 100644 --- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp @@ -332,6 +332,15 @@ std::vector OVPropertiesTestsWithCompileModelProps::getRWOptionalPro } } + if (props.empty() || std::find(props.begin(), props.end(), ov::hint::max_threads_per_stream.name()) != props.end()) { + ov::hint::SchedulingCoreType maxThreadsPerStreams[] = {ov::hint::MaxThreadsPerStream::AUTO, + ov::hint::MaxThreadsPerStream::PER_PLATFORM, + ov::hint::MaxThreadsPerStream::PER_SOCKET}; + for (auto& maxThreadsPerStream : maxThreadsPerStreams) { + res.push_back({ov::hint::max_threads_per_stream(maxThreadsPerStream)}); + } + } + if (props.empty() || std::find(props.begin(), props.end(), ov::enable_mmap.name()) != props.end()) { res.push_back({ov::enable_mmap(true)}); res.push_back({ov::enable_mmap(false)}); From 68bb894aad6572e2745eb40b9c412406658098c6 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Mon, 26 Feb 2024 16:45:14 +0800 Subject: [PATCH 02/32] update for test case --- src/bindings/c/tests/ov_core_test.cpp | 10 +++++----- src/inference/include/openvino/runtime/properties.hpp | 8 ++++---- .../tests/functional/custom/behavior/export_import.cpp | 2 +- .../shared/src/behavior/ov_plugin/properties_tests.cpp | 6 +++--- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp index 8b5d5845475512..3b55d8f7cfa2fb 100644 --- a/src/bindings/c/tests/ov_core_test.cpp +++ b/src/bindings/c/tests/ov_core_test.cpp @@ -298,12 +298,12 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) { EXPECT_STREQ(val_type, ret); ov_free(ret); - const char* key_type = ov_property_key_hint_max_threads_per_stream; - const char* val_type = "PER_PLATFORM"; - OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_type, val_type)); + const char* key_mode = ov_property_key_hint_max_threads_per_stream; + const char* val_mode = "PER_PLATFORM"; + OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_mode, val_mode)); ret = nullptr; - OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_type, &ret)); - EXPECT_STREQ(val_type, ret); + OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_mode, &ret)); + EXPECT_STREQ(val_mode, ret); ov_free(ret); OV_EXPECT_NOT_OK(ov_core_set_property(core, device_name.c_str(), key_type, invalid_val)); diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index b8496a0cfae093..93dede583be51f 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -384,9 +384,9 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type) /** @endcond */ enum class MaxThreadsPerStream { - AUTO, //!< Using all threads per platform for one stream. Will create sub stream on dual socket platform. - PER_PLATFORM, //!< Using all threads per platform for one stream even on dual socket platform. - PER_SOCKET, //!< Using all threads per socket for one stream on dual socket platform. + AUTO = 0, //!< Using all threads per platform for one stream. Will create sub stream on dual socket platform. + PER_PLATFORM = 1, //!< Using all threads per platform for one stream even on dual socket platform. + PER_SOCKET = 2, //!< Using all threads per socket for one stream on dual socket platform. }; /** @cond INTERNAL */ @@ -448,7 +448,7 @@ static constexpr Property scheduling_core_type{"SCHEDULING_C * ie.set_property(ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET)); * @endcode */ -static constexpr Property max_threads_per_stream{"MAX_THREADS_PER_STREAM"}; +static constexpr Property max_threads_per_stream{"MAX_THREADS_PER_STREAM"}; /** * @brief This property allows CPU pinning during inference. diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp index 29a5194cf22a8f..2289a16b6d4d59 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp @@ -124,7 +124,7 @@ const std::vector testing_property_for_max_threads_per_stream_2 = { {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_PLATFORM)}, {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET)}}; -const std::vector testing_property_for_max_threads_per_stream = { +const std::vector testing_property_for_max_threads_per_stream_3 = { {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::AUTO)}, {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET)}}; diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp index 5b59900d37c705..f6dbf8497b90ed 100644 --- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp @@ -333,9 +333,9 @@ std::vector OVPropertiesTestsWithCompileModelProps::getRWOptionalPro } if (props.empty() || std::find(props.begin(), props.end(), ov::hint::max_threads_per_stream.name()) != props.end()) { - ov::hint::SchedulingCoreType maxThreadsPerStreams[] = {ov::hint::MaxThreadsPerStream::AUTO, - ov::hint::MaxThreadsPerStream::PER_PLATFORM, - ov::hint::MaxThreadsPerStream::PER_SOCKET}; + ov::hint::MaxThreadsPerStream maxThreadsPerStreams[] = {ov::hint::MaxThreadsPerStream::AUTO, + ov::hint::MaxThreadsPerStream::PER_PLATFORM, + ov::hint::MaxThreadsPerStream::PER_SOCKET}; for (auto& maxThreadsPerStream : maxThreadsPerStreams) { res.push_back({ov::hint::max_threads_per_stream(maxThreadsPerStream)}); } From 14c3f27b25b3d055201c49a24de699b4c22d41cf Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Mon, 26 Feb 2024 17:10:45 +0800 Subject: [PATCH 03/32] update for comments --- .../include/openvino/runtime/properties.hpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index 93dede583be51f..5881fd6ca227ab 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -384,7 +384,7 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type) /** @endcond */ enum class MaxThreadsPerStream { - AUTO = 0, //!< Using all threads per platform for one stream. Will create sub stream on dual socket platform. + AUTO = 0, //!< Using all threads per platform for one stream. Will create sub stream on dual socket platform. PER_PLATFORM = 1, //!< Using all threads per platform for one stream even on dual socket platform. PER_SOCKET = 2, //!< Using all threads per socket for one stream on dual socket platform. }; @@ -439,10 +439,14 @@ static constexpr Property scheduling_core_type{"SCHEDULING_C * @brief This property defines max threads per stream used for CPU inference. * @ingroup ov_runtime_cpp_prop_api * - * Developer can use this property to select max threads per stream for CPU inference. Please refer MaxThreadsPerStream - * for all definition of types. + * Developer can use this property to select max threads of stream in latency mode for CPU inference on two socket + * platform. + * -- AUTO mode : Will create main stream on one socket and sub stream on the other socket. Some node will only + * main stream and some node will use both main stream and sub stream. + * -- PER_PLATFORM mode : Will create one stream on both sockets + * -- PER_SOCKET mode : Will create one stream on single socket * - * The following code is an example to only use all threads per socket for one stream on dual sockets platform. + * The following code is an example to only use all threads of one socket for one stream on dual sockets platform. * * @code * ie.set_property(ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET)); From 5724b7704cfcbf4eeb5fd3e3efcc91204a50f603 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Mon, 26 Feb 2024 22:20:00 +0800 Subject: [PATCH 04/32] update for python --- src/bindings/python/src/openvino/properties/hint/__init__.py | 1 + .../python/src/openvino/runtime/properties/hint/__init__.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/bindings/python/src/openvino/properties/hint/__init__.py b/src/bindings/python/src/openvino/properties/hint/__init__.py index 5ff211301f9c74..4a9e320b18ac61 100644 --- a/src/bindings/python/src/openvino/properties/hint/__init__.py +++ b/src/bindings/python/src/openvino/properties/hint/__init__.py @@ -5,6 +5,7 @@ # Enums from openvino._pyopenvino.properties.hint import Priority from openvino._pyopenvino.properties.hint import SchedulingCoreType +from openvino._pyopenvino.properties.hint import MaxThreadsPerStream from openvino._pyopenvino.properties.hint import ExecutionMode from openvino._pyopenvino.properties.hint import PerformanceMode diff --git a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py index b32b51ce6482b0..db6d304a383ec9 100644 --- a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py +++ b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py @@ -5,6 +5,7 @@ # Enums from openvino._pyopenvino.properties.hint import Priority from openvino._pyopenvino.properties.hint import SchedulingCoreType +from openvino._pyopenvino.properties.hint import MaxThreadsPerStream from openvino._pyopenvino.properties.hint import ExecutionMode from openvino._pyopenvino.properties.hint import PerformanceMode From 65c312b7eb9205c11c916234d5913876bd77280d Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Mon, 26 Feb 2024 22:41:00 +0800 Subject: [PATCH 05/32] update for python --- .../python/src/pyopenvino/core/properties/properties.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp index abae0d3dac248f..2c53ba1b78d43f 100644 --- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp +++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp @@ -71,6 +71,11 @@ void regmodule_properties(py::module m) { .value("PCORE_ONLY", ov::hint::SchedulingCoreType::PCORE_ONLY) .value("ECORE_ONLY", ov::hint::SchedulingCoreType::ECORE_ONLY); + py::enum_(m_hint, "MaxThreadsPerStream", py::arithmetic()) + .value("AUTO", ov::hint::MaxThreadsPerStream::AUTO) + .value("PER_PLATFORM", ov::hint::MaxThreadsPerStream::PER_PLATFORM) + .value("PER_SOCKET", ov::hint::MaxThreadsPerStream::PER_SOCKET); + py::enum_(m_hint, "ExecutionMode", py::arithmetic()) .value("PERFORMANCE", ov::hint::ExecutionMode::PERFORMANCE) .value("ACCURACY", ov::hint::ExecutionMode::ACCURACY); From d5f43a0b78ec924fac71d75181e1e24f02031531 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Mon, 26 Feb 2024 23:08:54 +0800 Subject: [PATCH 06/32] update for python --- src/bindings/python/tests/test_runtime/test_properties.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py index ebfd9f6c8dedc7..b21586c13922e7 100644 --- a/src/bindings/python/tests/test_runtime/test_properties.py +++ b/src/bindings/python/tests/test_runtime/test_properties.py @@ -86,6 +86,14 @@ def test_properties_rw_base(): (hints.SchedulingCoreType.ECORE_ONLY, "SchedulingCoreType.ECORE_ONLY", 2), ), ), + ( + hints.MaxThreadsPerStream, + ( + (hints.MaxThreadsPerStream.AUTO, "MaxThreadsPerStream.AUTO", 0), + (hints.MaxThreadsPerStream.PER_PLATFORM, "MaxThreadsPerStream.PER_PLATFORM", 1), + (hints.MaxThreadsPerStream.PER_SOCKET, "MaxThreadsPerStream.PER_SOCKET", 2), + ), + ), ( hints.ExecutionMode, ( From 0b09543d198bfc972f500cf37d0f90047624fa8b Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Mon, 26 Feb 2024 23:36:52 +0800 Subject: [PATCH 07/32] update for python --- src/bindings/python/src/pyopenvino/utils/utils.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/bindings/python/src/pyopenvino/utils/utils.cpp b/src/bindings/python/src/pyopenvino/utils/utils.cpp index 87f6c36576a1ca..3a8f4228a562e3 100644 --- a/src/bindings/python/src/pyopenvino/utils/utils.cpp +++ b/src/bindings/python/src/pyopenvino/utils/utils.cpp @@ -176,6 +176,8 @@ py::object from_ov_any(const ov::Any& any) { return py::cast(any.as()); } else if (any.is()) { return py::cast(any.as()); + } else if (any.is()) { + return py::cast(any.as()); } else if (any.is()) { return py::cast(any.as()); } else if (any.is()) { @@ -373,6 +375,8 @@ ov::Any py_object_to_any(const py::object& py_obj) { return py::cast(py_obj); } else if (py::isinstance(py_obj)) { return py::cast(py_obj); + } else if (py::isinstance(py_obj)) { + return py::cast(py_obj); } else if (py::isinstance(py_obj)) { return py::cast(py_obj); } else if (py::isinstance(py_obj)) { From fe5173d83a8cabfb78907c73d505e6e823e96efb Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Wed, 13 Mar 2024 15:48:29 +0800 Subject: [PATCH 08/32] change default value to PER_SOCKET --- src/plugins/intel_cpu/src/config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index 8a97682507785f..6b27a207c4c9b6 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -76,7 +76,7 @@ struct Config { bool enableCpuPinning = true; bool changedCpuPinning = false; ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE; - ov::hint::MaxThreadsPerStream maxThreadsPerStream = ov::hint::MaxThreadsPerStream::AUTO; + ov::hint::MaxThreadsPerStream maxThreadsPerStream = ov::hint::MaxThreadsPerStream::PER_SOCKET; bool enableHyperThreading = true; bool changedHyperThreading = false; Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET; From 19c1ed2a8b1b1cfb252b105d466d63894647ccaa Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Sun, 17 Mar 2024 17:21:12 +0800 Subject: [PATCH 09/32] update property name and value --- src/bindings/c/docs/api_overview.md | 2 +- .../c/include/openvino/c/ov_property.h | 2 +- src/bindings/c/src/ov_property.cpp | 2 +- src/bindings/c/tests/ov_core_test.cpp | 4 +- .../src/openvino/properties/hint/__init__.py | 2 +- .../runtime/properties/hint/__init__.py | 4 +- .../pyopenvino/core/properties/properties.cpp | 12 ++-- .../python/src/pyopenvino/utils/utils.cpp | 8 +-- .../tests/test_runtime/test_properties.py | 16 +++-- .../include/openvino/runtime/properties.hpp | 69 +++++++++++-------- src/plugins/intel_cpu/src/compiled_model.cpp | 6 +- src/plugins/intel_cpu/src/config.cpp | 33 ++++++--- src/plugins/intel_cpu/src/config.h | 2 +- src/plugins/intel_cpu/src/plugin.cpp | 6 +- .../custom/behavior/export_import.cpp | 24 +++---- .../ov_executable_network/properties.cpp | 2 +- .../custom/behavior/ov_plugin/properties.cpp | 2 +- .../behavior/ov_plugin/properties_tests.cpp | 13 ++-- 18 files changed, 120 insertions(+), 89 deletions(-) diff --git a/src/bindings/c/docs/api_overview.md b/src/bindings/c/docs/api_overview.md index 8c7debee7e3bd1..447b6b069b21a6 100644 --- a/src/bindings/c/docs/api_overview.md +++ b/src/bindings/c/docs/api_overview.md @@ -309,7 +309,7 @@ OPENVINO_C_VAR(const char*) ov_property_key_affinity; OPENVINO_C_VAR(const char*) ov_property_key_inference_num_threads; -OPENVINO_C_VAR(const char*) ov_property_key_hint_max_threads_per_stream; +OPENVINO_C_VAR(const char*) ov_property_key_hint_llm_distribution_policy; OPENVINO_C_VAR(const char*) ov_property_key_hint_enable_cpu_pinning; diff --git a/src/bindings/c/include/openvino/c/ov_property.h b/src/bindings/c/include/openvino/c/ov_property.h index f3165d242f22fc..b23ca9b68f4c33 100644 --- a/src/bindings/c/include/openvino/c/ov_property.h +++ b/src/bindings/c/include/openvino/c/ov_property.h @@ -133,7 +133,7 @@ ov_property_key_inference_num_threads; * @ingroup ov_property_c_api */ OPENVINO_C_VAR(const char*) -ov_property_key_hint_max_threads_per_stream; +ov_property_key_hint_llm_distribution_policy; /** * @brief Read-write property, it is high-level OpenVINO hint for using CPU pinning to bind CPU threads to processors diff --git a/src/bindings/c/src/ov_property.cpp b/src/bindings/c/src/ov_property.cpp index ffb6d9f90e105d..5ad1bd0c0b6999 100644 --- a/src/bindings/c/src/ov_property.cpp +++ b/src/bindings/c/src/ov_property.cpp @@ -23,7 +23,7 @@ const char* ov_property_key_cache_mode = "CACHE_MODE"; const char* ov_property_key_num_streams = "NUM_STREAMS"; const char* ov_property_key_affinity = "AFFINITY"; const char* ov_property_key_inference_num_threads = "INFERENCE_NUM_THREADS"; -const char* ov_property_key_hint_max_threads_per_stream = "MAX_THREADS_PER_STREAM"; +const char* ov_property_key_hint_llm_distribution_policy = "LLM_DISTRIBUTION_POLICY"; const char* ov_property_key_hint_performance_mode = "PERFORMANCE_HINT"; const char* ov_property_key_hint_enable_cpu_pinning = "ENABLE_CPU_PINNING"; const char* ov_property_key_hint_scheduling_core_type = "SCHEDULING_CORE_TYPE"; diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp index 3b55d8f7cfa2fb..9069dea86b5fdf 100644 --- a/src/bindings/c/tests/ov_core_test.cpp +++ b/src/bindings/c/tests/ov_core_test.cpp @@ -298,8 +298,8 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) { EXPECT_STREQ(val_type, ret); ov_free(ret); - const char* key_mode = ov_property_key_hint_max_threads_per_stream; - const char* val_mode = "PER_PLATFORM"; + const char* key_mode = ov_property_key_hint_llm_distribution_policy; + const char* val_mode = "ENTIRE_PLATFORM"; OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_mode, val_mode)); ret = nullptr; OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_mode, &ret)); diff --git a/src/bindings/python/src/openvino/properties/hint/__init__.py b/src/bindings/python/src/openvino/properties/hint/__init__.py index 4a9e320b18ac61..d014ac0dfec37d 100644 --- a/src/bindings/python/src/openvino/properties/hint/__init__.py +++ b/src/bindings/python/src/openvino/properties/hint/__init__.py @@ -5,7 +5,7 @@ # Enums from openvino._pyopenvino.properties.hint import Priority from openvino._pyopenvino.properties.hint import SchedulingCoreType -from openvino._pyopenvino.properties.hint import MaxThreadsPerStream +from openvino._pyopenvino.properties.hint import LlmDistributionPolicy from openvino._pyopenvino.properties.hint import ExecutionMode from openvino._pyopenvino.properties.hint import PerformanceMode diff --git a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py index db6d304a383ec9..471ec63a8e675b 100644 --- a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py +++ b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py @@ -5,7 +5,7 @@ # Enums from openvino._pyopenvino.properties.hint import Priority from openvino._pyopenvino.properties.hint import SchedulingCoreType -from openvino._pyopenvino.properties.hint import MaxThreadsPerStream +from openvino._pyopenvino.properties.hint import LlmDistributionPolicy from openvino._pyopenvino.properties.hint import ExecutionMode from openvino._pyopenvino.properties.hint import PerformanceMode @@ -15,7 +15,7 @@ from openvino._pyopenvino.properties.hint import performance_mode from openvino._pyopenvino.properties.hint import enable_cpu_pinning from openvino._pyopenvino.properties.hint import scheduling_core_type -from openvino._pyopenvino.properties.hint import max_threads_per_stream +from openvino._pyopenvino.properties.hint import llm_distribution_policy from openvino._pyopenvino.properties.hint import enable_hyper_threading from openvino._pyopenvino.properties.hint import execution_mode from openvino._pyopenvino.properties.hint import num_requests diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp index 2c53ba1b78d43f..20a35829afd399 100644 --- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp +++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp @@ -71,10 +71,12 @@ void regmodule_properties(py::module m) { .value("PCORE_ONLY", ov::hint::SchedulingCoreType::PCORE_ONLY) .value("ECORE_ONLY", ov::hint::SchedulingCoreType::ECORE_ONLY); - py::enum_(m_hint, "MaxThreadsPerStream", py::arithmetic()) - .value("AUTO", ov::hint::MaxThreadsPerStream::AUTO) - .value("PER_PLATFORM", ov::hint::MaxThreadsPerStream::PER_PLATFORM) - .value("PER_SOCKET", ov::hint::MaxThreadsPerStream::PER_SOCKET); + py::enum_(m_hint, "LlmDistributionPolicy", py::arithmetic()) + .value("TENSOR_PARTITION", ov::hint::LlmDistributionPolicy::TENSOR_PARTITION) + .value("DATA_PARTITION", ov::hint::LlmDistributionPolicy::DATA_PARTITION) + .value("PIPELINE_PARTITION", ov::hint::LlmDistributionPolicy::PIPELINE_PARTITION) + .value("ENTIRE_PLATFORM", ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM) + .value("SINGLE_DEVICE", ov::hint::LlmDistributionPolicy::SINGLE_DEVICE); py::enum_(m_hint, "ExecutionMode", py::arithmetic()) .value("PERFORMANCE", ov::hint::ExecutionMode::PERFORMANCE) @@ -86,7 +88,7 @@ void regmodule_properties(py::module m) { wrap_property_RW(m_hint, ov::hint::performance_mode, "performance_mode"); wrap_property_RW(m_hint, ov::hint::enable_cpu_pinning, "enable_cpu_pinning"); wrap_property_RW(m_hint, ov::hint::scheduling_core_type, "scheduling_core_type"); - wrap_property_RW(m_hint, ov::hint::max_threads_per_stream, "max_threads_per_stream"); + wrap_property_RW(m_hint, ov::hint::llm_distribution_policy, "llm_distribution_policy"); wrap_property_RW(m_hint, ov::hint::enable_hyper_threading, "enable_hyper_threading"); wrap_property_RW(m_hint, ov::hint::execution_mode, "execution_mode"); wrap_property_RW(m_hint, ov::hint::num_requests, "num_requests"); diff --git a/src/bindings/python/src/pyopenvino/utils/utils.cpp b/src/bindings/python/src/pyopenvino/utils/utils.cpp index e9a1396e8ece66..62c7b5cf744c50 100644 --- a/src/bindings/python/src/pyopenvino/utils/utils.cpp +++ b/src/bindings/python/src/pyopenvino/utils/utils.cpp @@ -176,8 +176,8 @@ py::object from_ov_any(const ov::Any& any) { return py::cast(any.as()); } else if (any.is()) { return py::cast(any.as()); - } else if (any.is()) { - return py::cast(any.as()); + } else if (any.is()) { + return py::cast(any.as()); } else if (any.is()) { return py::cast(any.as()); } else if (any.is()) { @@ -375,8 +375,8 @@ ov::Any py_object_to_any(const py::object& py_obj) { return py::cast(py_obj); } else if (py::isinstance(py_obj)) { return py::cast(py_obj); - } else if (py::isinstance(py_obj)) { - return py::cast(py_obj); + } else if (py::isinstance(py_obj)) { + return py::cast(py_obj); } else if (py::isinstance(py_obj)) { return py::cast(py_obj); } else if (py::isinstance(py_obj)) { diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py index 8fe93eae0077af..364eef51ada79e 100644 --- a/src/bindings/python/tests/test_runtime/test_properties.py +++ b/src/bindings/python/tests/test_runtime/test_properties.py @@ -87,11 +87,13 @@ def test_properties_rw_base(): ), ), ( - hints.MaxThreadsPerStream, + hints.LlmDistributionPolicy, ( - (hints.MaxThreadsPerStream.AUTO, "MaxThreadsPerStream.AUTO", 0), - (hints.MaxThreadsPerStream.PER_PLATFORM, "MaxThreadsPerStream.PER_PLATFORM", 1), - (hints.MaxThreadsPerStream.PER_SOCKET, "MaxThreadsPerStream.PER_SOCKET", 2), + (hints.LlmDistributionPolicy.TENSOR_PARTITION, "LlmDistributionPolicy.TENSOR_PARTITION", 0), + (hints.LlmDistributionPolicy.DATA_PARTITION, "LlmDistributionPolicy.DATA_PARTITION", 1), + (hints.LlmDistributionPolicy.PIPELINE_PARTITION, "LlmDistributionPolicy.PIPELINE_PARTITION", 2), + (hints.LlmDistributionPolicy.ENTIRE_PLATFORM, "LlmDistributionPolicy.ENTIRE_PLATFORM", 3), + (hints.LlmDistributionPolicy.SINGLE_DEVICE, "LlmDistributionPolicy.SINGLE_DEVICE", 4), ), ), ( @@ -288,9 +290,9 @@ def test_properties_ro(ov_property_ro, expected_value): ((hints.SchedulingCoreType.PCORE_ONLY, hints.SchedulingCoreType.PCORE_ONLY),), ), ( - hints.max_threads_per_stream, - "MAX_THREADS_PER_STREAM", - ((hints.MaxThreadsPerStream.PER_PLATFORM, hints.MaxThreadsPerStream.PER_PLATFORM),), + hints.llm_distribution_policy, + "LLM_DISTRIBUTION_POLICY", + ((hints.LlmDistributionPolicy.ENTIRE_PLATFORM, hints.LlmDistributionPolicy.ENTIRE_PLATFORM),), ), ( hints.enable_hyper_threading, diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index 5881fd6ca227ab..3303be4f1f3a51 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -383,37 +383,47 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type) } /** @endcond */ -enum class MaxThreadsPerStream { - AUTO = 0, //!< Using all threads per platform for one stream. Will create sub stream on dual socket platform. - PER_PLATFORM = 1, //!< Using all threads per platform for one stream even on dual socket platform. - PER_SOCKET = 2, //!< Using all threads per socket for one stream on dual socket platform. +enum class LlmDistributionPolicy { + TENSOR_PARTITION = 0, // Split one node or subgraph into parts and run one part per socket/device in parallel. + DATA_PARTITION = 1, // Split one batch input into parts and run one part per socket/device in parallel. + PIPELINE_PARTITION = 2, // Split one model into parts and run each socket/device in parallel as a pipeline. + ENTIRE_PLATFORM = 3, // Run one model on the entire platform with all sockets/devices. + SINGLE_DEVICE = 4, // Run one model on single socket/device. }; /** @cond INTERNAL */ -inline std::ostream& operator<<(std::ostream& os, const MaxThreadsPerStream& stream_mode) { +inline std::ostream& operator<<(std::ostream& os, const LlmDistributionPolicy& stream_mode) { switch (stream_mode) { - case MaxThreadsPerStream::AUTO: - return os << "AUTO"; - case MaxThreadsPerStream::PER_PLATFORM: - return os << "PER_PLATFORM"; - case MaxThreadsPerStream::PER_SOCKET: - return os << "PER_SOCKET"; + case LlmDistributionPolicy::TENSOR_PARTITION: + return os << "TENSOR_PARTITION"; + case LlmDistributionPolicy::DATA_PARTITION: + return os << "DATA_PARTITION"; + case LlmDistributionPolicy::PIPELINE_PARTITION: + return os << "PIPELINE_PARTITION"; + case LlmDistributionPolicy::ENTIRE_PLATFORM: + return os << "ENTIRE_PLATFORM"; + case LlmDistributionPolicy::SINGLE_DEVICE: + return os << "SINGLE_DEVICE"; default: - OPENVINO_THROW("Unsupported mode!"); + OPENVINO_THROW("Unsupported LLM distribution policy!"); } } -inline std::istream& operator>>(std::istream& is, MaxThreadsPerStream& stream_mode) { +inline std::istream& operator>>(std::istream& is, LlmDistributionPolicy& stream_mode) { std::string str; is >> str; - if (str == "AUTO") { - stream_mode = MaxThreadsPerStream::AUTO; - } else if (str == "PER_PLATFORM") { - stream_mode = MaxThreadsPerStream::PER_PLATFORM; - } else if (str == "PER_SOCKET") { - stream_mode = MaxThreadsPerStream::PER_SOCKET; + if (str == "TENSOR_PARTITION") { + stream_mode = LlmDistributionPolicy::TENSOR_PARTITION; + } else if (str == "DATA_PARTITION") { + stream_mode = LlmDistributionPolicy::DATA_PARTITION; + } else if (str == "PIPELINE_PARTITION") { + stream_mode = LlmDistributionPolicy::PIPELINE_PARTITION; + } else if (str == "ENTIRE_PLATFORM") { + stream_mode = LlmDistributionPolicy::ENTIRE_PLATFORM; + } else if (str == "SINGLE_DEVICE") { + stream_mode = LlmDistributionPolicy::SINGLE_DEVICE; } else { - OPENVINO_THROW("Unsupported mode: ", str); + OPENVINO_THROW("Unsupported LLM distribution policy: ", str); } return is; } @@ -436,23 +446,24 @@ inline std::istream& operator>>(std::istream& is, MaxThreadsPerStream& stream_mo static constexpr Property scheduling_core_type{"SCHEDULING_CORE_TYPE"}; /** - * @brief This property defines max threads per stream used for CPU inference. + * @brief This property defines distribution policy for Large language models (LLM). * @ingroup ov_runtime_cpp_prop_api * - * Developer can use this property to select max threads of stream in latency mode for CPU inference on two socket - * platform. - * -- AUTO mode : Will create main stream on one socket and sub stream on the other socket. Some node will only - * main stream and some node will use both main stream and sub stream. - * -- PER_PLATFORM mode : Will create one stream on both sockets - * -- PER_SOCKET mode : Will create one stream on single socket + * Developer can use this property to select LLM distribution policy for CPU inference with multiple sockets platform or + * GPU inference with multiple GPU devices. + * -- TENSOR_PARTITION : Split one node or subgraph into parts and run one part per socket/device in parallel. + * -- DATA_PARTITION : Split one batch input into parts and run one part per socket/device in parallel. + * -- PIPELINE_PARTITION : Split one model into parts and run each socket/device in parallel as a pipeline. + * -- ENTIRE_PLATFORM : Run one model on the entire platform with all sockets/devices. + * -- SINGLE_DEVICE : Run one model on single socket/device. * * The following code is an example to only use all threads of one socket for one stream on dual sockets platform. * * @code - * ie.set_property(ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET)); + * ie.set_property(ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::SINGLE_DEVICE)); * @endcode */ -static constexpr Property max_threads_per_stream{"MAX_THREADS_PER_STREAM"}; +static constexpr Property llm_distribution_policy{"LLM_DISTRIBUTION_POLICY"}; /** * @brief This property allows CPU pinning during inference. diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp index 377294a7ae3577..ff01280d101f9b 100644 --- a/src/plugins/intel_cpu/src/compiled_model.cpp +++ b/src/plugins/intel_cpu/src/compiled_model.cpp @@ -193,7 +193,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const { RO_property(ov::hint::num_requests.name()), RO_property(ov::hint::enable_cpu_pinning.name()), RO_property(ov::hint::scheduling_core_type.name()), - RO_property(ov::hint::max_threads_per_stream.name()), + RO_property(ov::hint::llm_distribution_policy.name()), RO_property(ov::hint::enable_hyper_threading.name()), RO_property(ov::execution_devices.name()), RO_property(ov::intel_cpu::denormals_optimization.name()), @@ -249,8 +249,8 @@ ov::Any CompiledModel::get_property(const std::string& name) const { } else if (name == ov::hint::scheduling_core_type) { const auto stream_mode = config.schedulingCoreType; return stream_mode; - } else if (name == ov::hint::max_threads_per_stream) { - const auto core_type = config.maxThreadsPerStream; + } else if (name == ov::hint::llm_distribution_policy) { + const auto core_type = config.llmDistributionPolicy; return core_type; } else if (name == ov::hint::enable_hyper_threading.name()) { const bool use_ht = config.enableHyperThreading; diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index 94e98e32f642bb..eeca1ce38fe390 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -191,20 +191,35 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { '/', ov::hint::SchedulingCoreType::ECORE_ONLY); } - } else if (key == ov::hint::max_threads_per_stream.name()) { - try { - maxThreadsPerStream = val.as(); - } catch (ov::Exception&) { + } else if (key == ov::hint::llm_distribution_policy.name()) { + auto error_info = [&]() { OPENVINO_THROW("Wrong value ", val.as(), "for property key ", - ov::hint::max_threads_per_stream.name(), - ". Expected only ", - ov::hint::MaxThreadsPerStream::AUTO, + ov::hint::llm_distribution_policy.name(), + ". CPU plugin only support ", + ov::hint::LlmDistributionPolicy::TENSOR_PARTITION, '/', - ov::hint::MaxThreadsPerStream::PER_PLATFORM, + ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM, '/', - ov::hint::MaxThreadsPerStream::PER_SOCKET); + ov::hint::LlmDistributionPolicy::SINGLE_DEVICE); + }; + + ov::hint::LlmDistributionPolicy llm_policy = ov::hint::LlmDistributionPolicy::PIPELINE_PARTITION; + try { + llm_policy = val.as(); + } catch (ov::Exception&) { + error_info(); + } + + switch (llm_policy) { + case ov::hint::LlmDistributionPolicy::TENSOR_PARTITION: + case ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM: + case ov::hint::LlmDistributionPolicy::SINGLE_DEVICE: + llmDistributionPolicy = llm_policy; + break; + default: + error_info(); } } else if (key == ov::hint::enable_hyper_threading.name()) { try { diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index 6b27a207c4c9b6..cfda310db02dfa 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -76,7 +76,7 @@ struct Config { bool enableCpuPinning = true; bool changedCpuPinning = false; ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE; - ov::hint::MaxThreadsPerStream maxThreadsPerStream = ov::hint::MaxThreadsPerStream::PER_SOCKET; + ov::hint::LlmDistributionPolicy llmDistributionPolicy = ov::hint::LlmDistributionPolicy::SINGLE_DEVICE; bool enableHyperThreading = true; bool changedHyperThreading = false; Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET; diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index e1e9c2f509389e..3a0ce346fdd06b 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -409,8 +409,8 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) } else if (name == ov::hint::scheduling_core_type) { const auto core_type = engConfig.schedulingCoreType; return core_type; - } else if (name == ov::hint::max_threads_per_stream) { - const auto stream_mode = engConfig.maxThreadsPerStream; + } else if (name == ov::hint::llm_distribution_policy) { + const auto stream_mode = engConfig.llmDistributionPolicy; return stream_mode; } else if (name == ov::hint::enable_hyper_threading) { const bool ht_value = engConfig.enableHyperThreading; @@ -484,7 +484,7 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio RW_property(ov::hint::num_requests.name()), RW_property(ov::hint::enable_cpu_pinning.name()), RW_property(ov::hint::scheduling_core_type.name()), - RW_property(ov::hint::max_threads_per_stream.name()), + RW_property(ov::hint::llm_distribution_policy.name()), RW_property(ov::hint::enable_hyper_threading.name()), RW_property(ov::device::id.name()), RW_property(ov::intel_cpu::denormals_optimization.name()), diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp index 2289a16b6d4d59..32398acb7e9cae 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp @@ -116,17 +116,17 @@ const std::vector testing_property_for_scheduling_core_type_3 = { {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ANY_CORE)}, {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)}}; -const std::vector testing_property_for_max_threads_per_stream_1 = { - {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::AUTO)}, - {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_PLATFORM)}}; +const std::vector testing_property_for_llm_distribution_policy_1 = { + {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::TENSOR_PARTITION)}, + {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM)}}; -const std::vector testing_property_for_max_threads_per_stream_2 = { - {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_PLATFORM)}, - {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET)}}; +const std::vector testing_property_for_llm_distribution_policy_2 = { + {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM)}, + {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::SINGLE_DEVICE)}}; -const std::vector testing_property_for_max_threads_per_stream_3 = { - {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::AUTO)}, - {ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET)}}; +const std::vector testing_property_for_llm_distribution_policy_3 = { + {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::SINGLE_DEVICE)}, + {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::TENSOR_PARTITION)}}; const std::vector testing_property_for_enable_hyper_threading = {{ov::hint::enable_hyper_threading(true)}, {ov::hint::enable_hyper_threading(false)}}; @@ -143,9 +143,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_ExportImportTest, testing_property_for_scheduling_core_type_1, testing_property_for_scheduling_core_type_2, testing_property_for_scheduling_core_type_3, - testing_property_for_max_threads_per_stream_1, - testing_property_for_max_threads_per_stream_2, - testing_property_for_max_threads_per_stream_3, + testing_property_for_llm_distribution_policy_1, + testing_property_for_llm_distribution_policy_2, + testing_property_for_llm_distribution_policy_3, testing_property_for_enable_hyper_threading, testing_property_for_enable_cpu_pinning))); diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp index af054b1468bcda..aeedd2fbe25b9f 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp @@ -33,7 +33,7 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkSupportedPropertiesAreAvailable RO_property(ov::hint::num_requests.name()), RO_property(ov::hint::enable_cpu_pinning.name()), RO_property(ov::hint::scheduling_core_type.name()), - RO_property(ov::hint::max_threads_per_stream.name()), + RO_property(ov::hint::llm_distribution_policy.name()), RO_property(ov::hint::enable_hyper_threading.name()), RO_property(ov::execution_devices.name()), RO_property(ov::intel_cpu::denormals_optimization.name()), diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp index 4088efc4c7110d..40fe41ca82d90e 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp @@ -47,7 +47,7 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginAllSupportedPropertiesAreAvailable) { RW_property(ov::hint::num_requests.name()), RW_property(ov::hint::enable_cpu_pinning.name()), RW_property(ov::hint::scheduling_core_type.name()), - RW_property(ov::hint::max_threads_per_stream.name()), + RW_property(ov::hint::llm_distribution_policy.name()), RW_property(ov::hint::enable_hyper_threading.name()), RW_property(ov::device::id.name()), RW_property(ov::intel_cpu::denormals_optimization.name()), diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp index d10f66d3bc15a1..caa75c97530eaf 100644 --- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp @@ -335,12 +335,13 @@ std::vector OVPropertiesTestsWithCompileModelProps::getRWOptionalPro } } - if (props.empty() || std::find(props.begin(), props.end(), ov::hint::max_threads_per_stream.name()) != props.end()) { - ov::hint::MaxThreadsPerStream maxThreadsPerStreams[] = {ov::hint::MaxThreadsPerStream::AUTO, - ov::hint::MaxThreadsPerStream::PER_PLATFORM, - ov::hint::MaxThreadsPerStream::PER_SOCKET}; - for (auto& maxThreadsPerStream : maxThreadsPerStreams) { - res.push_back({ov::hint::max_threads_per_stream(maxThreadsPerStream)}); + if (props.empty() || + std::find(props.begin(), props.end(), ov::hint::llm_distribution_policy.name()) != props.end()) { + ov::hint::LlmDistributionPolicy llmDistributionPolicys[] = {ov::hint::LlmDistributionPolicy::TENSOR_PARTITION, + ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM, + ov::hint::LlmDistributionPolicy::SINGLE_DEVICE}; + for (auto& llmDistributionPolicy : llmDistributionPolicys) { + res.push_back({ov::hint::llm_distribution_policy(llmDistributionPolicy)}); } } From 562b01aa79bf015a60874d06aecf6bcdd0828938 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Sun, 17 Mar 2024 21:02:49 +0800 Subject: [PATCH 10/32] update code style --- .../include/openvino/runtime/properties.hpp | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index bf8225bb810d49..428276f77b7a05 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -383,6 +383,22 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type) } /** @endcond */ +/** + * @brief This property defines CPU core type which can be used during inference. + * @ingroup ov_runtime_cpp_prop_api + * + * Developer can use this property to select specific CPU cores for inference. Please refer SchedulingCoreType for + * all definition of core type. + * + * The following code is an example to only use efficient-cores for inference on hybrid CPU. If user sets this + * configuration on a platform with only performance-cores, CPU inference will still run on the performance-cores. + * + * @code + * ie.set_property(ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)); + * @endcode + */ +static constexpr Property scheduling_core_type{"SCHEDULING_CORE_TYPE"}; + enum class LlmDistributionPolicy { TENSOR_PARTITION = 0, // Split one node or subgraph into parts and run one part per socket/device in parallel. DATA_PARTITION = 1, // Split one batch input into parts and run one part per socket/device in parallel. @@ -429,22 +445,6 @@ inline std::istream& operator>>(std::istream& is, LlmDistributionPolicy& stream_ } /** @endcond */ -/** - * @brief This property defines CPU core type which can be used during inference. - * @ingroup ov_runtime_cpp_prop_api - * - * Developer can use this property to select specific CPU cores for inference. Please refer SchedulingCoreType for - * all definition of core type. - * - * The following code is an example to only use efficient-cores for inference on hybrid CPU. If user sets this - * configuration on a platform with only performance-cores, CPU inference will still run on the performance-cores. - * - * @code - * ie.set_property(ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)); - * @endcode - */ -static constexpr Property scheduling_core_type{"SCHEDULING_CORE_TYPE"}; - /** * @brief This property defines distribution policy for Large language models (LLM). * @ingroup ov_runtime_cpp_prop_api From 9c4a9515475a2bfb001b0f7663c77cf9966d823a Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Mon, 18 Mar 2024 22:22:13 +0800 Subject: [PATCH 11/32] update property name and value --- src/bindings/c/docs/api_overview.md | 2 +- .../c/include/openvino/c/ov_property.h | 2 +- src/bindings/c/src/ov_property.cpp | 2 +- src/bindings/c/tests/ov_core_test.cpp | 4 +- .../src/openvino/properties/hint/__init__.py | 2 +- .../runtime/properties/hint/__init__.py | 4 +- .../pyopenvino/core/properties/properties.cpp | 11 ++-- .../python/src/pyopenvino/utils/utils.cpp | 8 +-- .../tests/test_runtime/test_properties.py | 15 ++--- .../include/openvino/runtime/properties.hpp | 64 +++++++------------ src/plugins/intel_cpu/src/compiled_model.cpp | 8 +-- src/plugins/intel_cpu/src/config.cpp | 23 +++---- src/plugins/intel_cpu/src/config.h | 2 +- src/plugins/intel_cpu/src/plugin.cpp | 6 +- .../custom/behavior/export_import.cpp | 21 +++--- .../ov_executable_network/properties.cpp | 2 +- .../custom/behavior/ov_plugin/properties.cpp | 2 +- .../behavior/ov_plugin/properties_tests.cpp | 12 ++-- 18 files changed, 79 insertions(+), 111 deletions(-) diff --git a/src/bindings/c/docs/api_overview.md b/src/bindings/c/docs/api_overview.md index 447b6b069b21a6..506786189abce3 100644 --- a/src/bindings/c/docs/api_overview.md +++ b/src/bindings/c/docs/api_overview.md @@ -309,7 +309,7 @@ OPENVINO_C_VAR(const char*) ov_property_key_affinity; OPENVINO_C_VAR(const char*) ov_property_key_inference_num_threads; -OPENVINO_C_VAR(const char*) ov_property_key_hint_llm_distribution_policy; +OPENVINO_C_VAR(const char*) ov_property_key_hint_model_distribution_policy; OPENVINO_C_VAR(const char*) ov_property_key_hint_enable_cpu_pinning; diff --git a/src/bindings/c/include/openvino/c/ov_property.h b/src/bindings/c/include/openvino/c/ov_property.h index c8b344e625bc69..1f9bcea14dd9a3 100644 --- a/src/bindings/c/include/openvino/c/ov_property.h +++ b/src/bindings/c/include/openvino/c/ov_property.h @@ -133,7 +133,7 @@ ov_property_key_inference_num_threads; * @ingroup ov_property_c_api */ OPENVINO_C_VAR(const char*) -ov_property_key_hint_llm_distribution_policy; +ov_property_key_hint_model_distribution_policy; /** * @brief Read-write property, it is high-level OpenVINO hint for using CPU pinning to bind CPU threads to processors diff --git a/src/bindings/c/src/ov_property.cpp b/src/bindings/c/src/ov_property.cpp index 84a4a94ec3106d..611b36c90c83f8 100644 --- a/src/bindings/c/src/ov_property.cpp +++ b/src/bindings/c/src/ov_property.cpp @@ -23,7 +23,7 @@ const char* ov_property_key_cache_mode = "CACHE_MODE"; const char* ov_property_key_num_streams = "NUM_STREAMS"; const char* ov_property_key_affinity = "AFFINITY"; const char* ov_property_key_inference_num_threads = "INFERENCE_NUM_THREADS"; -const char* ov_property_key_hint_llm_distribution_policy = "LLM_DISTRIBUTION_POLICY"; +const char* ov_property_key_hint_model_distribution_policy = "MODEL_DISTRIBUTION_POLICY"; const char* ov_property_key_hint_performance_mode = "PERFORMANCE_HINT"; const char* ov_property_key_hint_enable_cpu_pinning = "ENABLE_CPU_PINNING"; const char* ov_property_key_hint_scheduling_core_type = "SCHEDULING_CORE_TYPE"; diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp index 0fb826c85ebf6a..e56ec78f4ff6e3 100644 --- a/src/bindings/c/tests/ov_core_test.cpp +++ b/src/bindings/c/tests/ov_core_test.cpp @@ -298,8 +298,8 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) { EXPECT_STREQ(val_type, ret); ov_free(ret); - const char* key_mode = ov_property_key_hint_llm_distribution_policy; - const char* val_mode = "ENTIRE_PLATFORM"; + const char* key_mode = ov_property_key_hint_model_distribution_policy; + const char* val_mode = "TENSOR_PARALLEL"; OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_mode, val_mode)); ret = nullptr; OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_mode, &ret)); diff --git a/src/bindings/python/src/openvino/properties/hint/__init__.py b/src/bindings/python/src/openvino/properties/hint/__init__.py index d014ac0dfec37d..1624325ea5e9e2 100644 --- a/src/bindings/python/src/openvino/properties/hint/__init__.py +++ b/src/bindings/python/src/openvino/properties/hint/__init__.py @@ -5,7 +5,7 @@ # Enums from openvino._pyopenvino.properties.hint import Priority from openvino._pyopenvino.properties.hint import SchedulingCoreType -from openvino._pyopenvino.properties.hint import LlmDistributionPolicy +from openvino._pyopenvino.properties.hint import ModelDistributionPolicy from openvino._pyopenvino.properties.hint import ExecutionMode from openvino._pyopenvino.properties.hint import PerformanceMode diff --git a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py index 471ec63a8e675b..dd90ded374ca11 100644 --- a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py +++ b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py @@ -5,7 +5,7 @@ # Enums from openvino._pyopenvino.properties.hint import Priority from openvino._pyopenvino.properties.hint import SchedulingCoreType -from openvino._pyopenvino.properties.hint import LlmDistributionPolicy +from openvino._pyopenvino.properties.hint import ModelDistributionPolicy from openvino._pyopenvino.properties.hint import ExecutionMode from openvino._pyopenvino.properties.hint import PerformanceMode @@ -15,7 +15,7 @@ from openvino._pyopenvino.properties.hint import performance_mode from openvino._pyopenvino.properties.hint import enable_cpu_pinning from openvino._pyopenvino.properties.hint import scheduling_core_type -from openvino._pyopenvino.properties.hint import llm_distribution_policy +from openvino._pyopenvino.properties.hint import model_distribution_policy from openvino._pyopenvino.properties.hint import enable_hyper_threading from openvino._pyopenvino.properties.hint import execution_mode from openvino._pyopenvino.properties.hint import num_requests diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp index 20a35829afd399..6310aac026e8c0 100644 --- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp +++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp @@ -71,12 +71,9 @@ void regmodule_properties(py::module m) { .value("PCORE_ONLY", ov::hint::SchedulingCoreType::PCORE_ONLY) .value("ECORE_ONLY", ov::hint::SchedulingCoreType::ECORE_ONLY); - py::enum_(m_hint, "LlmDistributionPolicy", py::arithmetic()) - .value("TENSOR_PARTITION", ov::hint::LlmDistributionPolicy::TENSOR_PARTITION) - .value("DATA_PARTITION", ov::hint::LlmDistributionPolicy::DATA_PARTITION) - .value("PIPELINE_PARTITION", ov::hint::LlmDistributionPolicy::PIPELINE_PARTITION) - .value("ENTIRE_PLATFORM", ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM) - .value("SINGLE_DEVICE", ov::hint::LlmDistributionPolicy::SINGLE_DEVICE); + py::enum_(m_hint, "ModelDistributionPolicy", py::arithmetic()) + .value("NONE", ov::hint::ModelDistributionPolicy::NONE) + .value("TENSOR_PARALLEL", ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL); py::enum_(m_hint, "ExecutionMode", py::arithmetic()) .value("PERFORMANCE", ov::hint::ExecutionMode::PERFORMANCE) @@ -88,7 +85,7 @@ void regmodule_properties(py::module m) { wrap_property_RW(m_hint, ov::hint::performance_mode, "performance_mode"); wrap_property_RW(m_hint, ov::hint::enable_cpu_pinning, "enable_cpu_pinning"); wrap_property_RW(m_hint, ov::hint::scheduling_core_type, "scheduling_core_type"); - wrap_property_RW(m_hint, ov::hint::llm_distribution_policy, "llm_distribution_policy"); + wrap_property_RW(m_hint, ov::hint::model_distribution_policy, "model_distribution_policy"); wrap_property_RW(m_hint, ov::hint::enable_hyper_threading, "enable_hyper_threading"); wrap_property_RW(m_hint, ov::hint::execution_mode, "execution_mode"); wrap_property_RW(m_hint, ov::hint::num_requests, "num_requests"); diff --git a/src/bindings/python/src/pyopenvino/utils/utils.cpp b/src/bindings/python/src/pyopenvino/utils/utils.cpp index e409e0356fd26a..d21e2a1fef0a23 100644 --- a/src/bindings/python/src/pyopenvino/utils/utils.cpp +++ b/src/bindings/python/src/pyopenvino/utils/utils.cpp @@ -176,8 +176,8 @@ py::object from_ov_any(const ov::Any& any) { return py::cast(any.as()); } else if (any.is()) { return py::cast(any.as()); - } else if (any.is()) { - return py::cast(any.as()); + } else if (any.is()) { + return py::cast(any.as()); } else if (any.is()) { return py::cast(any.as()); } else if (any.is()) { @@ -377,8 +377,8 @@ ov::Any py_object_to_any(const py::object& py_obj) { return py::cast(py_obj); } else if (py::isinstance(py_obj)) { return py::cast(py_obj); - } else if (py::isinstance(py_obj)) { - return py::cast(py_obj); + } else if (py::isinstance(py_obj)) { + return py::cast(py_obj); } else if (py::isinstance(py_obj)) { return py::cast(py_obj); } else if (py::isinstance(py_obj)) { diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py index 364eef51ada79e..80ecf47ba0ed89 100644 --- a/src/bindings/python/tests/test_runtime/test_properties.py +++ b/src/bindings/python/tests/test_runtime/test_properties.py @@ -87,13 +87,10 @@ def test_properties_rw_base(): ), ), ( - hints.LlmDistributionPolicy, + hints.ModelDistributionPolicy, ( - (hints.LlmDistributionPolicy.TENSOR_PARTITION, "LlmDistributionPolicy.TENSOR_PARTITION", 0), - (hints.LlmDistributionPolicy.DATA_PARTITION, "LlmDistributionPolicy.DATA_PARTITION", 1), - (hints.LlmDistributionPolicy.PIPELINE_PARTITION, "LlmDistributionPolicy.PIPELINE_PARTITION", 2), - (hints.LlmDistributionPolicy.ENTIRE_PLATFORM, "LlmDistributionPolicy.ENTIRE_PLATFORM", 3), - (hints.LlmDistributionPolicy.SINGLE_DEVICE, "LlmDistributionPolicy.SINGLE_DEVICE", 4), + (hints.ModelDistributionPolicy.NONE, "ModelDistributionPolicy.NONE", 0), + (hints.ModelDistributionPolicy.TENSOR_PARALLEL, "ModelDistributionPolicy.TENSOR_PARALLEL", 1), ), ), ( @@ -290,9 +287,9 @@ def test_properties_ro(ov_property_ro, expected_value): ((hints.SchedulingCoreType.PCORE_ONLY, hints.SchedulingCoreType.PCORE_ONLY),), ), ( - hints.llm_distribution_policy, - "LLM_DISTRIBUTION_POLICY", - ((hints.LlmDistributionPolicy.ENTIRE_PLATFORM, hints.LlmDistributionPolicy.ENTIRE_PLATFORM),), + hints.model_distribution_policy, + "MODEL_DISTRIBUTION_POLICY", + ((hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.TENSOR_PARALLEL),), ), ( hints.enable_hyper_threading, diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index 428276f77b7a05..61cf50b0363553 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -399,71 +399,53 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type) */ static constexpr Property scheduling_core_type{"SCHEDULING_CORE_TYPE"}; -enum class LlmDistributionPolicy { - TENSOR_PARTITION = 0, // Split one node or subgraph into parts and run one part per socket/device in parallel. - DATA_PARTITION = 1, // Split one batch input into parts and run one part per socket/device in parallel. - PIPELINE_PARTITION = 2, // Split one model into parts and run each socket/device in parallel as a pipeline. - ENTIRE_PLATFORM = 3, // Run one model on the entire platform with all sockets/devices. - SINGLE_DEVICE = 4, // Run one model on single socket/device. +enum class ModelDistributionPolicy { + NONE = 0, // Run one model on single socket/device without parallelism. + TENSOR_PARALLEL = 1, // Split one node or subgraph into parts and run one part per socket/device in parallel. }; /** @cond INTERNAL */ -inline std::ostream& operator<<(std::ostream& os, const LlmDistributionPolicy& stream_mode) { +inline std::ostream& operator<<(std::ostream& os, const ModelDistributionPolicy& stream_mode) { switch (stream_mode) { - case LlmDistributionPolicy::TENSOR_PARTITION: - return os << "TENSOR_PARTITION"; - case LlmDistributionPolicy::DATA_PARTITION: - return os << "DATA_PARTITION"; - case LlmDistributionPolicy::PIPELINE_PARTITION: - return os << "PIPELINE_PARTITION"; - case LlmDistributionPolicy::ENTIRE_PLATFORM: - return os << "ENTIRE_PLATFORM"; - case LlmDistributionPolicy::SINGLE_DEVICE: - return os << "SINGLE_DEVICE"; + case ModelDistributionPolicy::NONE: + return os << "NONE"; + case ModelDistributionPolicy::TENSOR_PARALLEL: + return os << "TENSOR_PARALLEL"; default: - OPENVINO_THROW("Unsupported LLM distribution policy!"); + OPENVINO_THROW("Unsupported model distribution policy!"); } } -inline std::istream& operator>>(std::istream& is, LlmDistributionPolicy& stream_mode) { +inline std::istream& operator>>(std::istream& is, ModelDistributionPolicy& stream_mode) { std::string str; is >> str; - if (str == "TENSOR_PARTITION") { - stream_mode = LlmDistributionPolicy::TENSOR_PARTITION; - } else if (str == "DATA_PARTITION") { - stream_mode = LlmDistributionPolicy::DATA_PARTITION; - } else if (str == "PIPELINE_PARTITION") { - stream_mode = LlmDistributionPolicy::PIPELINE_PARTITION; - } else if (str == "ENTIRE_PLATFORM") { - stream_mode = LlmDistributionPolicy::ENTIRE_PLATFORM; - } else if (str == "SINGLE_DEVICE") { - stream_mode = LlmDistributionPolicy::SINGLE_DEVICE; + if (str == "NONE") { + stream_mode = ModelDistributionPolicy::NONE; + } else if (str == "TENSOR_PARALLEL") { + stream_mode = ModelDistributionPolicy::TENSOR_PARALLEL; } else { - OPENVINO_THROW("Unsupported LLM distribution policy: ", str); + OPENVINO_THROW("Unsupported model distribution policy: ", str); } return is; } /** @endcond */ /** - * @brief This property defines distribution policy for Large language models (LLM). + * @brief This property defines model distribution policy for inference with multiple sockets/devices. * @ingroup ov_runtime_cpp_prop_api * - * Developer can use this property to select LLM distribution policy for CPU inference with multiple sockets platform or - * GPU inference with multiple GPU devices. - * -- TENSOR_PARTITION : Split one node or subgraph into parts and run one part per socket/device in parallel. - * -- DATA_PARTITION : Split one batch input into parts and run one part per socket/device in parallel. - * -- PIPELINE_PARTITION : Split one model into parts and run each socket/device in parallel as a pipeline. - * -- ENTIRE_PLATFORM : Run one model on the entire platform with all sockets/devices. - * -- SINGLE_DEVICE : Run one model on single socket/device. + * Developer can use this property to select model distribution policy for CPU inference with multiple sockets + * platform or GPU inference with multiple GPU devices. + * -- TENSOR_PARALLEL : Split one node or subgraph into parts and run one part per socket/device in parallel. + * -- NONE : Run one model on single socket/device without parallelism. * - * The following code is an example to only use all threads of one socket for one stream on dual sockets platform. + * The following code is an example to split node into two parts run one part per socket on dual sockets platform. * * @code - * ie.set_property(ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::SINGLE_DEVICE)); + * ie.set_property(ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::TENSOR_PARTITION)); * @endcode */ -static constexpr Property llm_distribution_policy{"LLM_DISTRIBUTION_POLICY"}; +static constexpr Property model_distribution_policy{"MODEL_DISTRIBUTION_POLICY"}; /** * @brief This property allows CPU pinning during inference. diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp index 21de81e00291eb..05948e861ded62 100644 --- a/src/plugins/intel_cpu/src/compiled_model.cpp +++ b/src/plugins/intel_cpu/src/compiled_model.cpp @@ -193,7 +193,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const { RO_property(ov::hint::num_requests.name()), RO_property(ov::hint::enable_cpu_pinning.name()), RO_property(ov::hint::scheduling_core_type.name()), - RO_property(ov::hint::llm_distribution_policy.name()), + RO_property(ov::hint::model_distribution_policy.name()), RO_property(ov::hint::enable_hyper_threading.name()), RO_property(ov::execution_devices.name()), RO_property(ov::intel_cpu::denormals_optimization.name()), @@ -249,9 +249,9 @@ ov::Any CompiledModel::get_property(const std::string& name) const { } else if (name == ov::hint::scheduling_core_type) { const auto stream_mode = config.schedulingCoreType; return stream_mode; - } else if (name == ov::hint::llm_distribution_policy) { - const auto core_type = config.llmDistributionPolicy; - return core_type; + } else if (name == ov::hint::model_distribution_policy) { + const auto model_policy = config.modelDistributionPolicy; + return model_policy; } else if (name == ov::hint::enable_hyper_threading.name()) { const bool use_ht = config.enableHyperThreading; return decltype(ov::hint::enable_hyper_threading)::value_type(use_ht); diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index 55cb1843c9e8d8..ad1db3e6b305b5 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -191,32 +191,29 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { '/', ov::hint::SchedulingCoreType::ECORE_ONLY); } - } else if (key == ov::hint::llm_distribution_policy.name()) { + } else if (key == ov::hint::model_distribution_policy.name()) { auto error_info = [&]() { OPENVINO_THROW("Wrong value ", val.as(), "for property key ", - ov::hint::llm_distribution_policy.name(), + ov::hint::model_distribution_policy.name(), ". CPU plugin only support ", - ov::hint::LlmDistributionPolicy::TENSOR_PARTITION, + ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL, '/', - ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM, - '/', - ov::hint::LlmDistributionPolicy::SINGLE_DEVICE); + ov::hint::ModelDistributionPolicy::NONE); }; - ov::hint::LlmDistributionPolicy llm_policy = ov::hint::LlmDistributionPolicy::PIPELINE_PARTITION; + ov::hint::ModelDistributionPolicy model_policy = ov::hint::ModelDistributionPolicy::NONE; try { - llm_policy = val.as(); + model_policy = val.as(); } catch (ov::Exception&) { error_info(); } - switch (llm_policy) { - case ov::hint::LlmDistributionPolicy::TENSOR_PARTITION: - case ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM: - case ov::hint::LlmDistributionPolicy::SINGLE_DEVICE: - llmDistributionPolicy = llm_policy; + switch (model_policy) { + case ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL: + case ov::hint::ModelDistributionPolicy::NONE: + modelDistributionPolicy = model_policy; break; default: error_info(); diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index 2f7445bc7180cd..6460cc07aa5b4b 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -76,7 +76,7 @@ struct Config { bool enableCpuPinning = true; bool changedCpuPinning = false; ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE; - ov::hint::LlmDistributionPolicy llmDistributionPolicy = ov::hint::LlmDistributionPolicy::SINGLE_DEVICE; + ov::hint::ModelDistributionPolicy modelDistributionPolicy = ov::hint::ModelDistributionPolicy::NONE; bool enableHyperThreading = true; bool changedHyperThreading = false; Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET; diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 38f7a6aa54af86..c847fe36d2387f 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -409,8 +409,8 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) } else if (name == ov::hint::scheduling_core_type) { const auto core_type = engConfig.schedulingCoreType; return core_type; - } else if (name == ov::hint::llm_distribution_policy) { - const auto stream_mode = engConfig.llmDistributionPolicy; + } else if (name == ov::hint::model_distribution_policy) { + const auto stream_mode = engConfig.modelDistributionPolicy; return stream_mode; } else if (name == ov::hint::enable_hyper_threading) { const bool ht_value = engConfig.enableHyperThreading; @@ -484,7 +484,7 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio RW_property(ov::hint::num_requests.name()), RW_property(ov::hint::enable_cpu_pinning.name()), RW_property(ov::hint::scheduling_core_type.name()), - RW_property(ov::hint::llm_distribution_policy.name()), + RW_property(ov::hint::model_distribution_policy.name()), RW_property(ov::hint::enable_hyper_threading.name()), RW_property(ov::device::id.name()), RW_property(ov::intel_cpu::denormals_optimization.name()), diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp index e0a8ca4346f5eb..ecf3ce8df8af35 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp @@ -116,17 +116,13 @@ const std::vector testing_property_for_scheduling_core_type_3 = { {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ANY_CORE)}, {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)}}; -const std::vector testing_property_for_llm_distribution_policy_1 = { - {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::TENSOR_PARTITION)}, - {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM)}}; +const std::vector testing_property_for_model_distribution_policy_1 = { + {ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL)}, + {ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::NONE)}}; -const std::vector testing_property_for_llm_distribution_policy_2 = { - {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM)}, - {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::SINGLE_DEVICE)}}; - -const std::vector testing_property_for_llm_distribution_policy_3 = { - {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::SINGLE_DEVICE)}, - {ov::hint::llm_distribution_policy(ov::hint::LlmDistributionPolicy::TENSOR_PARTITION)}}; +const std::vector testing_property_for_model_distribution_policy_2 = { + {ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::NONE)}, + {ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL)}}; const std::vector testing_property_for_enable_hyper_threading = {{ov::hint::enable_hyper_threading(true)}, {ov::hint::enable_hyper_threading(false)}}; @@ -143,9 +139,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_ExportImportTest, testing_property_for_scheduling_core_type_1, testing_property_for_scheduling_core_type_2, testing_property_for_scheduling_core_type_3, - testing_property_for_llm_distribution_policy_1, - testing_property_for_llm_distribution_policy_2, - testing_property_for_llm_distribution_policy_3, + testing_property_for_model_distribution_policy_1, + testing_property_for_model_distribution_policy_2, testing_property_for_enable_hyper_threading, testing_property_for_enable_cpu_pinning))); diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp index aeedd2fbe25b9f..cef9e809bf2a62 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp @@ -33,7 +33,7 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkSupportedPropertiesAreAvailable RO_property(ov::hint::num_requests.name()), RO_property(ov::hint::enable_cpu_pinning.name()), RO_property(ov::hint::scheduling_core_type.name()), - RO_property(ov::hint::llm_distribution_policy.name()), + RO_property(ov::hint::model_distribution_policy.name()), RO_property(ov::hint::enable_hyper_threading.name()), RO_property(ov::execution_devices.name()), RO_property(ov::intel_cpu::denormals_optimization.name()), diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp index 40fe41ca82d90e..11a95f5663c749 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp @@ -47,7 +47,7 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginAllSupportedPropertiesAreAvailable) { RW_property(ov::hint::num_requests.name()), RW_property(ov::hint::enable_cpu_pinning.name()), RW_property(ov::hint::scheduling_core_type.name()), - RW_property(ov::hint::llm_distribution_policy.name()), + RW_property(ov::hint::model_distribution_policy.name()), RW_property(ov::hint::enable_hyper_threading.name()), RW_property(ov::device::id.name()), RW_property(ov::intel_cpu::denormals_optimization.name()), diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp index 022cf3f6c26b39..d10914e299e0be 100644 --- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp @@ -336,12 +336,12 @@ std::vector OVPropertiesTestsWithCompileModelProps::getRWOptionalPro } if (props.empty() || - std::find(props.begin(), props.end(), ov::hint::llm_distribution_policy.name()) != props.end()) { - ov::hint::LlmDistributionPolicy llmDistributionPolicys[] = {ov::hint::LlmDistributionPolicy::TENSOR_PARTITION, - ov::hint::LlmDistributionPolicy::ENTIRE_PLATFORM, - ov::hint::LlmDistributionPolicy::SINGLE_DEVICE}; - for (auto& llmDistributionPolicy : llmDistributionPolicys) { - res.push_back({ov::hint::llm_distribution_policy(llmDistributionPolicy)}); + std::find(props.begin(), props.end(), ov::hint::model_distribution_policy.name()) != props.end()) { + ov::hint::ModelDistributionPolicy modelDistributionPolicys[] = { + ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL, + ov::hint::ModelDistributionPolicy::NONE}; + for (auto& modelDistributionPolicy : modelDistributionPolicys) { + res.push_back({ov::hint::model_distribution_policy(modelDistributionPolicy)}); } } From ad744b8b02e11081018ebd2ac7fce5c0f4fcec4b Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Tue, 19 Mar 2024 00:50:57 +0800 Subject: [PATCH 12/32] support combined properties --- src/plugins/intel_cpu/src/config.cpp | 48 ++++++++++++++----- src/plugins/intel_cpu/src/config.h | 4 +- src/plugins/intel_cpu/src/plugin.cpp | 13 ++++- .../custom/behavior/export_import.cpp | 22 ++------- 4 files changed, 55 insertions(+), 32 deletions(-) diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index ad1db3e6b305b5..b10e4d8143f53b 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -66,6 +66,22 @@ void Config::applyDebugCapsProperties() { } #endif +std::vector parse_multiple_parameters(const std::string& inputs, const char separator = ',') { + std::vector parameters; + std::string::size_type pos = 0; + std::string::size_type endpos = 0; + while ((endpos = inputs.find(separator, pos)) != std::string::npos) { + auto substr = inputs.substr(pos, endpos - pos); + if (!substr.empty()) + parameters.push_back(substr); + pos = endpos + 1; + } + auto substr = inputs.substr(pos, inputs.length() - pos); + if (!substr.empty()) + parameters.push_back(substr); + return parameters; +} + void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { const auto streamExecutorConfigKeys = streamExecutorConfig.get_property(ov::supported_properties.name()).as>(); @@ -203,20 +219,30 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { ov::hint::ModelDistributionPolicy::NONE); }; - ov::hint::ModelDistributionPolicy model_policy = ov::hint::ModelDistributionPolicy::NONE; - try { - model_policy = val.as(); - } catch (ov::Exception&) { + std::vector para_vect = parse_multiple_parameters(val.as()); + if (para_vect.size() == 0) { error_info(); } - switch (model_policy) { - case ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL: - case ov::hint::ModelDistributionPolicy::NONE: - modelDistributionPolicy = model_policy; - break; - default: - error_info(); + ov::hint::ModelDistributionPolicy model_policy; + modelDistributionPolicy.clear(); + + for (auto& row : para_vect) { + std::stringstream str_stream; + try { + str_stream.str(row); + str_stream >> model_policy; + switch (model_policy) { + case ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL: + case ov::hint::ModelDistributionPolicy::NONE: + modelDistributionPolicy.emplace(model_policy); + break; + default: + error_info(); + } + } catch (ov::Exception&) { + error_info(); + } } } else if (key == ov::hint::enable_hyper_threading.name()) { try { diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index 6460cc07aa5b4b..e91abf14cc300a 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -15,6 +15,7 @@ #include #include #include +#include namespace ov { namespace intel_cpu { @@ -76,7 +77,8 @@ struct Config { bool enableCpuPinning = true; bool changedCpuPinning = false; ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE; - ov::hint::ModelDistributionPolicy modelDistributionPolicy = ov::hint::ModelDistributionPolicy::NONE; + std::unordered_set modelDistributionPolicy = { + ov::hint::ModelDistributionPolicy::NONE}; bool enableHyperThreading = true; bool changedHyperThreading = false; Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET; diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index c847fe36d2387f..a2d5a5e83c3b79 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -410,8 +410,17 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) const auto core_type = engConfig.schedulingCoreType; return core_type; } else if (name == ov::hint::model_distribution_policy) { - const auto stream_mode = engConfig.modelDistributionPolicy; - return stream_mode; + std::string policy_str = ""; + if (engConfig.modelDistributionPolicy.size() > 1) { + std::stringstream str_stream; + for (auto& row : engConfig.modelDistributionPolicy) { + str_stream << row; + policy_str += str_stream.str() + ", "; + str_stream.str(""); + } + policy_str.erase(policy_str.length() - 2); + } + return policy_str; } else if (name == ov::hint::enable_hyper_threading) { const bool ht_value = engConfig.enableHyperThreading; return decltype(ov::hint::enable_hyper_threading)::value_type(ht_value); diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp index ecf3ce8df8af35..3983fa33b37491 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp @@ -104,23 +104,12 @@ const std::vector testing_property_for_performance_mode = { {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)}, {ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)}}; -const std::vector testing_property_for_scheduling_core_type_1 = { +const std::vector testing_property_for_scheduling_core_type = { {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ANY_CORE)}, - {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::PCORE_ONLY)}}; - -const std::vector testing_property_for_scheduling_core_type_2 = { {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::PCORE_ONLY)}, {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)}}; -const std::vector testing_property_for_scheduling_core_type_3 = { - {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ANY_CORE)}, - {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)}}; - -const std::vector testing_property_for_model_distribution_policy_1 = { - {ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL)}, - {ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::NONE)}}; - -const std::vector testing_property_for_model_distribution_policy_2 = { +const std::vector testing_property_for_model_distribution_policy = { {ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::NONE)}, {ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL)}}; @@ -136,11 +125,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_ExportImportTest, ::testing::Values(testing_property_for_streams, testing_property_for_threads, testing_property_for_performance_mode, - testing_property_for_scheduling_core_type_1, - testing_property_for_scheduling_core_type_2, - testing_property_for_scheduling_core_type_3, - testing_property_for_model_distribution_policy_1, - testing_property_for_model_distribution_policy_2, + testing_property_for_scheduling_core_type, + testing_property_for_model_distribution_policy, testing_property_for_enable_hyper_threading, testing_property_for_enable_cpu_pinning))); From d456451679289644804359e05e96d7e51cc7533c Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Tue, 19 Mar 2024 10:22:57 +0800 Subject: [PATCH 13/32] update code style --- src/plugins/intel_cpu/src/config.cpp | 32 ++++++++++++++-------------- src/plugins/intel_cpu/src/config.h | 2 ++ 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index b10e4d8143f53b..671d1b5af45a42 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -24,6 +24,22 @@ namespace intel_cpu { using namespace ov::threading; using namespace dnnl::impl::cpu::x64; +std::vector parse_multiple_parameters(const std::string& inputs, const char separator) { + std::vector parameters; + std::string::size_type pos = 0; + std::string::size_type endpos = 0; + while ((endpos = inputs.find(separator, pos)) != std::string::npos) { + auto substr = inputs.substr(pos, endpos - pos); + if (!substr.empty()) + parameters.push_back(substr); + pos = endpos + 1; + } + auto substr = inputs.substr(pos, inputs.length() - pos); + if (!substr.empty()) + parameters.push_back(substr); + return parameters; +} + Config::Config() { // this is default mode #if defined(__APPLE__) || defined(_WIN32) @@ -66,22 +82,6 @@ void Config::applyDebugCapsProperties() { } #endif -std::vector parse_multiple_parameters(const std::string& inputs, const char separator = ',') { - std::vector parameters; - std::string::size_type pos = 0; - std::string::size_type endpos = 0; - while ((endpos = inputs.find(separator, pos)) != std::string::npos) { - auto substr = inputs.substr(pos, endpos - pos); - if (!substr.empty()) - parameters.push_back(substr); - pos = endpos + 1; - } - auto substr = inputs.substr(pos, inputs.length() - pos); - if (!substr.empty()) - parameters.push_back(substr); - return parameters; -} - void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { const auto streamExecutorConfigKeys = streamExecutorConfig.get_property(ov::supported_properties.name()).as>(); diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index e91abf14cc300a..e81fb3b662ca3d 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -115,5 +115,7 @@ struct Config { #endif }; +std::vector parse_multiple_parameters(const std::string& inputs, const char separator = ','); + } // namespace intel_cpu } // namespace ov From 911a79e01686b36c3c2e17d9a474e1948a7c504b Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Tue, 19 Mar 2024 13:46:03 +0800 Subject: [PATCH 14/32] update test case for combined properties --- src/bindings/c/tests/ov_core_test.cpp | 7 +++++++ src/plugins/intel_cpu/src/compiled_model.cpp | 13 +++++++++++-- src/plugins/intel_cpu/src/plugin.cpp | 4 ++-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp index e56ec78f4ff6e3..54ea701495ebbf 100644 --- a/src/bindings/c/tests/ov_core_test.cpp +++ b/src/bindings/c/tests/ov_core_test.cpp @@ -306,6 +306,13 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) { EXPECT_STREQ(val_mode, ret); ov_free(ret); + val_mode = "TENSOR_PARALLEL, NONE"; + OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_mode, val_mode)); + ret = nullptr; + OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_mode, &ret)); + EXPECT_STREQ(val_mode, ret); + ov_free(ret); + OV_EXPECT_NOT_OK(ov_core_set_property(core, device_name.c_str(), key_type, invalid_val)); ret = nullptr; OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_type, &ret)); diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp index 05948e861ded62..7dbc2eb76f40c7 100644 --- a/src/plugins/intel_cpu/src/compiled_model.cpp +++ b/src/plugins/intel_cpu/src/compiled_model.cpp @@ -250,8 +250,17 @@ ov::Any CompiledModel::get_property(const std::string& name) const { const auto stream_mode = config.schedulingCoreType; return stream_mode; } else if (name == ov::hint::model_distribution_policy) { - const auto model_policy = config.modelDistributionPolicy; - return model_policy; + std::string policy_str = ""; + if (config.modelDistributionPolicy.size() > 0) { + std::stringstream str_stream; + for (auto& row : config.modelDistributionPolicy) { + str_stream << row; + policy_str = str_stream.str() + ", " + policy_str; + str_stream.str(""); + } + policy_str.erase(policy_str.length() - 2); + } + return policy_str; } else if (name == ov::hint::enable_hyper_threading.name()) { const bool use_ht = config.enableHyperThreading; return decltype(ov::hint::enable_hyper_threading)::value_type(use_ht); diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index a2d5a5e83c3b79..1470713b4ecff5 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -411,11 +411,11 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) return core_type; } else if (name == ov::hint::model_distribution_policy) { std::string policy_str = ""; - if (engConfig.modelDistributionPolicy.size() > 1) { + if (engConfig.modelDistributionPolicy.size() > 0) { std::stringstream str_stream; for (auto& row : engConfig.modelDistributionPolicy) { str_stream << row; - policy_str += str_stream.str() + ", "; + policy_str = str_stream.str() + ", " + policy_str; str_stream.str(""); } policy_str.erase(policy_str.length() - 2); From 9f1189f38f823024d7b9837bdc53d7139adccb81 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Tue, 19 Mar 2024 15:15:37 +0800 Subject: [PATCH 15/32] update test case for combined properties --- src/bindings/c/tests/ov_core_test.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp index 54ea701495ebbf..61f5adda5c2886 100644 --- a/src/bindings/c/tests/ov_core_test.cpp +++ b/src/bindings/c/tests/ov_core_test.cpp @@ -310,7 +310,9 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) { OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_mode, val_mode)); ret = nullptr; OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_mode, &ret)); - EXPECT_STREQ(val_mode, ret); + if ((ret != "TENSOR_PARALLEL, NONE") && (ret != "NONE, TENSOR_PARALLEL")) { + EXPECT_STREQ(val_mode, ret); + } ov_free(ret); OV_EXPECT_NOT_OK(ov_core_set_property(core, device_name.c_str(), key_type, invalid_val)); From 685827278008d5a0af40d6d76a02941daa66b599 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Tue, 19 Mar 2024 15:40:35 +0800 Subject: [PATCH 16/32] update test case for combined properties --- src/bindings/c/tests/ov_core_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp index 61f5adda5c2886..592479a6b02166 100644 --- a/src/bindings/c/tests/ov_core_test.cpp +++ b/src/bindings/c/tests/ov_core_test.cpp @@ -310,7 +310,7 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) { OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_mode, val_mode)); ret = nullptr; OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_mode, &ret)); - if ((ret != "TENSOR_PARALLEL, NONE") && (ret != "NONE, TENSOR_PARALLEL")) { + if ((strcmp(ret, "TENSOR_PARALLEL, NONE") != 0) && (strcmp(ret, "NONE, TENSOR_PARALLEL") != 0)) { EXPECT_STREQ(val_mode, ret); } ov_free(ret); From 7d0af101cdf16bbdbf3c1299768ce7d63d845ab4 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Tue, 19 Mar 2024 18:23:27 +0800 Subject: [PATCH 17/32] update for combined properties --- src/bindings/c/tests/ov_core_test.cpp | 4 +--- src/plugins/intel_cpu/src/compiled_model.cpp | 2 +- src/plugins/intel_cpu/src/config.cpp | 2 +- src/plugins/intel_cpu/src/config.h | 4 +--- src/plugins/intel_cpu/src/plugin.cpp | 2 +- 5 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp index 592479a6b02166..54ea701495ebbf 100644 --- a/src/bindings/c/tests/ov_core_test.cpp +++ b/src/bindings/c/tests/ov_core_test.cpp @@ -310,9 +310,7 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) { OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_mode, val_mode)); ret = nullptr; OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_mode, &ret)); - if ((strcmp(ret, "TENSOR_PARALLEL, NONE") != 0) && (strcmp(ret, "NONE, TENSOR_PARALLEL") != 0)) { - EXPECT_STREQ(val_mode, ret); - } + EXPECT_STREQ(val_mode, ret); ov_free(ret); OV_EXPECT_NOT_OK(ov_core_set_property(core, device_name.c_str(), key_type, invalid_val)); diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp index 7dbc2eb76f40c7..71589a6a18b6ae 100644 --- a/src/plugins/intel_cpu/src/compiled_model.cpp +++ b/src/plugins/intel_cpu/src/compiled_model.cpp @@ -255,7 +255,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const { std::stringstream str_stream; for (auto& row : config.modelDistributionPolicy) { str_stream << row; - policy_str = str_stream.str() + ", " + policy_str; + policy_str += str_stream.str() + ", "; str_stream.str(""); } policy_str.erase(policy_str.length() - 2); diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index 671d1b5af45a42..4a77c64e2eba84 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -235,7 +235,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { switch (model_policy) { case ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL: case ov::hint::ModelDistributionPolicy::NONE: - modelDistributionPolicy.emplace(model_policy); + modelDistributionPolicy.emplace_back(model_policy); break; default: error_info(); diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index e81fb3b662ca3d..acd81c4a7efc86 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -15,7 +15,6 @@ #include #include #include -#include namespace ov { namespace intel_cpu { @@ -77,8 +76,7 @@ struct Config { bool enableCpuPinning = true; bool changedCpuPinning = false; ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE; - std::unordered_set modelDistributionPolicy = { - ov::hint::ModelDistributionPolicy::NONE}; + std::vector modelDistributionPolicy = {ov::hint::ModelDistributionPolicy::NONE}; bool enableHyperThreading = true; bool changedHyperThreading = false; Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET; diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 1470713b4ecff5..2ffb22c41dc381 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -415,7 +415,7 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) std::stringstream str_stream; for (auto& row : engConfig.modelDistributionPolicy) { str_stream << row; - policy_str = str_stream.str() + ", " + policy_str; + policy_str += str_stream.str() + ", "; str_stream.str(""); } policy_str.erase(policy_str.length() - 2); From 03d09e8286559dd9a74eec96216b11ca11537df8 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Wed, 20 Mar 2024 15:45:43 +0800 Subject: [PATCH 18/32] remove CAPI interface --- src/bindings/c/docs/api_overview.md | 2 -- src/bindings/c/include/openvino/c/ov_property.h | 7 ------- src/bindings/c/src/ov_property.cpp | 1 - src/bindings/c/tests/ov_core_test.cpp | 15 --------------- 4 files changed, 25 deletions(-) diff --git a/src/bindings/c/docs/api_overview.md b/src/bindings/c/docs/api_overview.md index 506786189abce3..6ca2ad403c1a7e 100644 --- a/src/bindings/c/docs/api_overview.md +++ b/src/bindings/c/docs/api_overview.md @@ -309,8 +309,6 @@ OPENVINO_C_VAR(const char*) ov_property_key_affinity; OPENVINO_C_VAR(const char*) ov_property_key_inference_num_threads; -OPENVINO_C_VAR(const char*) ov_property_key_hint_model_distribution_policy; - OPENVINO_C_VAR(const char*) ov_property_key_hint_enable_cpu_pinning; OPENVINO_C_VAR(const char*) ov_property_key_hint_enable_hyper_threading; diff --git a/src/bindings/c/include/openvino/c/ov_property.h b/src/bindings/c/include/openvino/c/ov_property.h index 1f9bcea14dd9a3..5532287057a886 100644 --- a/src/bindings/c/include/openvino/c/ov_property.h +++ b/src/bindings/c/include/openvino/c/ov_property.h @@ -128,13 +128,6 @@ ov_property_key_affinity; OPENVINO_C_VAR(const char*) ov_property_key_inference_num_threads; -/** - * @brief Read-write property to set/get the maximum number of threads per stream of CPU inference. - * @ingroup ov_property_c_api - */ -OPENVINO_C_VAR(const char*) -ov_property_key_hint_model_distribution_policy; - /** * @brief Read-write property, it is high-level OpenVINO hint for using CPU pinning to bind CPU threads to processors * during inference diff --git a/src/bindings/c/src/ov_property.cpp b/src/bindings/c/src/ov_property.cpp index 611b36c90c83f8..7c33b4b8dbb9cd 100644 --- a/src/bindings/c/src/ov_property.cpp +++ b/src/bindings/c/src/ov_property.cpp @@ -23,7 +23,6 @@ const char* ov_property_key_cache_mode = "CACHE_MODE"; const char* ov_property_key_num_streams = "NUM_STREAMS"; const char* ov_property_key_affinity = "AFFINITY"; const char* ov_property_key_inference_num_threads = "INFERENCE_NUM_THREADS"; -const char* ov_property_key_hint_model_distribution_policy = "MODEL_DISTRIBUTION_POLICY"; const char* ov_property_key_hint_performance_mode = "PERFORMANCE_HINT"; const char* ov_property_key_hint_enable_cpu_pinning = "ENABLE_CPU_PINNING"; const char* ov_property_key_hint_scheduling_core_type = "SCHEDULING_CORE_TYPE"; diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp index 54ea701495ebbf..3e8ceebcaa0e49 100644 --- a/src/bindings/c/tests/ov_core_test.cpp +++ b/src/bindings/c/tests/ov_core_test.cpp @@ -298,21 +298,6 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) { EXPECT_STREQ(val_type, ret); ov_free(ret); - const char* key_mode = ov_property_key_hint_model_distribution_policy; - const char* val_mode = "TENSOR_PARALLEL"; - OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_mode, val_mode)); - ret = nullptr; - OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_mode, &ret)); - EXPECT_STREQ(val_mode, ret); - ov_free(ret); - - val_mode = "TENSOR_PARALLEL, NONE"; - OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_mode, val_mode)); - ret = nullptr; - OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_mode, &ret)); - EXPECT_STREQ(val_mode, ret); - ov_free(ret); - OV_EXPECT_NOT_OK(ov_core_set_property(core, device_name.c_str(), key_type, invalid_val)); ret = nullptr; OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_type, &ret)); From eef60acf20a22675731e9336fd84d51715403a89 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Wed, 20 Mar 2024 16:36:52 +0800 Subject: [PATCH 19/32] draft implementation for std::set value --- .../tests/test_runtime/test_properties.py | 2 +- .../include/openvino/runtime/properties.hpp | 4 +-- src/plugins/intel_cpu/src/compiled_model.cpp | 13 ++------- src/plugins/intel_cpu/src/config.cpp | 28 +++++-------------- src/plugins/intel_cpu/src/plugin.cpp | 13 ++------- .../custom/behavior/export_import.cpp | 6 ++-- .../behavior/ov_plugin/properties_tests.cpp | 2 +- 7 files changed, 19 insertions(+), 49 deletions(-) diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py index 80ecf47ba0ed89..accc7dd9e13fe8 100644 --- a/src/bindings/python/tests/test_runtime/test_properties.py +++ b/src/bindings/python/tests/test_runtime/test_properties.py @@ -289,7 +289,7 @@ def test_properties_ro(ov_property_ro, expected_value): ( hints.model_distribution_policy, "MODEL_DISTRIBUTION_POLICY", - ((hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.TENSOR_PARALLEL),), + (({hints.ModelDistributionPolicy.TENSOR_PARALLEL}, {hints.ModelDistributionPolicy.TENSOR_PARALLEL}),), ), ( hints.enable_hyper_threading, diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index 61cf50b0363553..debf176e7e3c63 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -442,10 +442,10 @@ inline std::istream& operator>>(std::istream& is, ModelDistributionPolicy& strea * The following code is an example to split node into two parts run one part per socket on dual sockets platform. * * @code - * ie.set_property(ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::TENSOR_PARTITION)); + * ie.set_property(ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::TENSOR_PARTITION})); * @endcode */ -static constexpr Property model_distribution_policy{"MODEL_DISTRIBUTION_POLICY"}; +static constexpr Property> model_distribution_policy{"MODEL_DISTRIBUTION_POLICY"}; /** * @brief This property allows CPU pinning during inference. diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp index 71589a6a18b6ae..039b96c70f824c 100644 --- a/src/plugins/intel_cpu/src/compiled_model.cpp +++ b/src/plugins/intel_cpu/src/compiled_model.cpp @@ -250,17 +250,8 @@ ov::Any CompiledModel::get_property(const std::string& name) const { const auto stream_mode = config.schedulingCoreType; return stream_mode; } else if (name == ov::hint::model_distribution_policy) { - std::string policy_str = ""; - if (config.modelDistributionPolicy.size() > 0) { - std::stringstream str_stream; - for (auto& row : config.modelDistributionPolicy) { - str_stream << row; - policy_str += str_stream.str() + ", "; - str_stream.str(""); - } - policy_str.erase(policy_str.length() - 2); - } - return policy_str; + const auto distribution_policy = config.modelDistributionPolicy; + return distribution_policy; } else if (name == ov::hint::enable_hyper_threading.name()) { const bool use_ht = config.enableHyperThreading; return decltype(ov::hint::enable_hyper_threading)::value_type(use_ht); diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index f3d0df66cfb9ca..74226db8762860 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -219,30 +219,16 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { ov::hint::ModelDistributionPolicy::NONE); }; - std::vector para_vect = parse_multiple_parameters(val.as()); - if (para_vect.size() == 0) { - error_info(); - } - - ov::hint::ModelDistributionPolicy model_policy; - modelDistributionPolicy.clear(); - - for (auto& row : para_vect) { - std::stringstream str_stream; - try { - str_stream.str(row); - str_stream >> model_policy; - switch (model_policy) { - case ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL: - case ov::hint::ModelDistributionPolicy::NONE: - modelDistributionPolicy.emplace_back(model_policy); - break; - default: + try { + for (auto& row : val.as()) { + if ((row.as() != ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL) && + (row.as() != ov::hint::ModelDistributionPolicy::NONE)) { error_info(); } - } catch (ov::Exception&) { - error_info(); } + modelDistributionPolicy = val.as(); + } catch (ov::Exception&) { + error_info(); } } else if (key == ov::hint::enable_hyper_threading.name()) { try { diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 2ffb22c41dc381..e8d502fc0d3922 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -410,17 +410,8 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) const auto core_type = engConfig.schedulingCoreType; return core_type; } else if (name == ov::hint::model_distribution_policy) { - std::string policy_str = ""; - if (engConfig.modelDistributionPolicy.size() > 0) { - std::stringstream str_stream; - for (auto& row : engConfig.modelDistributionPolicy) { - str_stream << row; - policy_str += str_stream.str() + ", "; - str_stream.str(""); - } - policy_str.erase(policy_str.length() - 2); - } - return policy_str; + const auto distribution_policy = engConfig.modelDistributionPolicy; + return distribution_policy; } else if (name == ov::hint::enable_hyper_threading) { const bool ht_value = engConfig.enableHyperThreading; return decltype(ov::hint::enable_hyper_threading)::value_type(ht_value); diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp index 3983fa33b37491..3455df47b5b6d6 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp @@ -110,8 +110,10 @@ const std::vector testing_property_for_scheduling_core_type = { {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)}}; const std::vector testing_property_for_model_distribution_policy = { - {ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::NONE)}, - {ov::hint::model_distribution_policy(ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL)}}; + {ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::NONE})}, + {ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL})}, + {ov::hint::model_distribution_policy( + {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL, ov::hint::ModelDistributionPolicy::NONE})}}; const std::vector testing_property_for_enable_hyper_threading = {{ov::hint::enable_hyper_threading(true)}, {ov::hint::enable_hyper_threading(false)}}; diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp index d10914e299e0be..362d42a5c83663 100644 --- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp @@ -341,7 +341,7 @@ std::vector OVPropertiesTestsWithCompileModelProps::getRWOptionalPro ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL, ov::hint::ModelDistributionPolicy::NONE}; for (auto& modelDistributionPolicy : modelDistributionPolicys) { - res.push_back({ov::hint::model_distribution_policy(modelDistributionPolicy)}); + res.push_back({ov::hint::model_distribution_policy({modelDistributionPolicy})}); } } From 77f30a9a12747d0bf819be490f121d4fad32f558 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Wed, 20 Mar 2024 18:40:02 +0800 Subject: [PATCH 20/32] update c++ implementation for std::set value --- src/core/include/openvino/core/any.hpp | 3 +++ src/plugins/intel_cpu/src/config.cpp | 15 +++++++------ src/plugins/intel_cpu/src/config.h | 2 +- .../custom/behavior/export_import.cpp | 7 ------ .../custom/behavior/ov_plugin/properties.cpp | 22 +++++++++++++++++++ .../behavior/ov_plugin/properties_tests.cpp | 10 --------- 6 files changed, 34 insertions(+), 25 deletions(-) diff --git a/src/core/include/openvino/core/any.hpp b/src/core/include/openvino/core/any.hpp index ca0c86aa924062..9dd33f3c5a34d7 100644 --- a/src/core/include/openvino/core/any.hpp +++ b/src/core/include/openvino/core/any.hpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -949,6 +950,8 @@ using RTMap = AnyMap; using AnyVector = std::vector; +using AnySet = std::set; + /** @cond INTERNAL */ inline static void PrintTo(const Any& any, std::ostream* os) { any.print(*os); diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index 74226db8762860..8774638e038947 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -213,20 +213,21 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { val.as(), "for property key ", ov::hint::model_distribution_policy.name(), - ". CPU plugin only support ", + ". CPU plugin only support {", ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL, - '/', - ov::hint::ModelDistributionPolicy::NONE); + '}/{', + ov::hint::ModelDistributionPolicy::NONE, + '}'); }; try { - for (auto& row : val.as()) { - if ((row.as() != ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL) && - (row.as() != ov::hint::ModelDistributionPolicy::NONE)) { + for (auto& row : val.as>()) { + if ((row != ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL) && + (row != ov::hint::ModelDistributionPolicy::NONE)) { error_info(); } } - modelDistributionPolicy = val.as(); + modelDistributionPolicy = val.as>(); } catch (ov::Exception&) { error_info(); } diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index acd81c4a7efc86..216bd7667fe444 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -76,7 +76,7 @@ struct Config { bool enableCpuPinning = true; bool changedCpuPinning = false; ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE; - std::vector modelDistributionPolicy = {ov::hint::ModelDistributionPolicy::NONE}; + std::set modelDistributionPolicy = {ov::hint::ModelDistributionPolicy::NONE}; bool enableHyperThreading = true; bool changedHyperThreading = false; Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET; diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp index 3455df47b5b6d6..5fc89b979c261c 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp @@ -109,12 +109,6 @@ const std::vector testing_property_for_scheduling_core_type = { {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::PCORE_ONLY)}, {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)}}; -const std::vector testing_property_for_model_distribution_policy = { - {ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::NONE})}, - {ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL})}, - {ov::hint::model_distribution_policy( - {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL, ov::hint::ModelDistributionPolicy::NONE})}}; - const std::vector testing_property_for_enable_hyper_threading = {{ov::hint::enable_hyper_threading(true)}, {ov::hint::enable_hyper_threading(false)}}; @@ -128,7 +122,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_ExportImportTest, testing_property_for_threads, testing_property_for_performance_mode, testing_property_for_scheduling_core_type, - testing_property_for_model_distribution_policy, testing_property_for_enable_hyper_threading, testing_property_for_enable_cpu_pinning))); diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp index 11a95f5663c749..f9bbd19e94b676 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp @@ -108,6 +108,28 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigInferenceNumThreads) { ASSERT_EQ(num_threads, value); } +TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigModelDistributionPolicy) { + ov::Core ie; + std::set value = {ov::hint::ModelDistributionPolicy::NONE}; + std::set model_policy = {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL}; + + ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::model_distribution_policy(model_policy))); + ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::model_distribution_policy)); + ASSERT_EQ(model_policy, value); + + model_policy = {ov::hint::ModelDistributionPolicy::NONE}; + + ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::model_distribution_policy(model_policy))); + ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::model_distribution_policy)); + ASSERT_EQ(model_policy, value); + + model_policy = {ov::hint::ModelDistributionPolicy::NONE, ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL}; + + ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::model_distribution_policy(model_policy))); + ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::model_distribution_policy)); + ASSERT_EQ(model_policy, value); +} + TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigStreamsNum) { ov::Core ie; int32_t value = 0; diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp index 362d42a5c83663..9452690a596e55 100644 --- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp @@ -335,16 +335,6 @@ std::vector OVPropertiesTestsWithCompileModelProps::getRWOptionalPro } } - if (props.empty() || - std::find(props.begin(), props.end(), ov::hint::model_distribution_policy.name()) != props.end()) { - ov::hint::ModelDistributionPolicy modelDistributionPolicys[] = { - ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL, - ov::hint::ModelDistributionPolicy::NONE}; - for (auto& modelDistributionPolicy : modelDistributionPolicys) { - res.push_back({ov::hint::model_distribution_policy({modelDistributionPolicy})}); - } - } - if (props.empty() || std::find(props.begin(), props.end(), ov::enable_mmap.name()) != props.end()) { res.push_back({ov::enable_mmap(true)}); res.push_back({ov::enable_mmap(false)}); From 5269cac0a882fe8115b85c5c14303fc7b5c21c5c Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Wed, 20 Mar 2024 18:41:42 +0800 Subject: [PATCH 21/32] update c++ implementation for std::set value --- src/core/include/openvino/core/any.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/core/include/openvino/core/any.hpp b/src/core/include/openvino/core/any.hpp index 9dd33f3c5a34d7..59cf8a4c4ced04 100644 --- a/src/core/include/openvino/core/any.hpp +++ b/src/core/include/openvino/core/any.hpp @@ -950,8 +950,6 @@ using RTMap = AnyMap; using AnyVector = std::vector; -using AnySet = std::set; - /** @cond INTERNAL */ inline static void PrintTo(const Any& any, std::ostream* os) { any.print(*os); From 018eabb88280c5c7217a8437d9887a754a5cc9db Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Wed, 20 Mar 2024 18:42:42 +0800 Subject: [PATCH 22/32] update c++ implementation for std::set value --- src/core/include/openvino/core/any.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/core/include/openvino/core/any.hpp b/src/core/include/openvino/core/any.hpp index 59cf8a4c4ced04..ca0c86aa924062 100644 --- a/src/core/include/openvino/core/any.hpp +++ b/src/core/include/openvino/core/any.hpp @@ -10,7 +10,6 @@ #include #include -#include #include #include #include From 922554c12489a9bf8569865c0c87e784c9a8c5c1 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Wed, 20 Mar 2024 23:14:48 +0800 Subject: [PATCH 23/32] remove unused function --- src/plugins/intel_cpu/src/config.cpp | 16 ---------------- src/plugins/intel_cpu/src/config.h | 2 -- 2 files changed, 18 deletions(-) diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index 8774638e038947..f7744738a31013 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -24,22 +24,6 @@ namespace intel_cpu { using namespace ov::threading; using namespace dnnl::impl::cpu::x64; -std::vector parse_multiple_parameters(const std::string& inputs, const char separator) { - std::vector parameters; - std::string::size_type pos = 0; - std::string::size_type endpos = 0; - while ((endpos = inputs.find(separator, pos)) != std::string::npos) { - auto substr = inputs.substr(pos, endpos - pos); - if (!substr.empty()) - parameters.push_back(substr); - pos = endpos + 1; - } - auto substr = inputs.substr(pos, inputs.length() - pos); - if (!substr.empty()) - parameters.push_back(substr); - return parameters; -} - Config::Config() { // this is default mode #if defined(__APPLE__) || defined(_WIN32) diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index 216bd7667fe444..ef069865875fa1 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -113,7 +113,5 @@ struct Config { #endif }; -std::vector parse_multiple_parameters(const std::string& inputs, const char separator = ','); - } // namespace intel_cpu } // namespace ov From 828e583ceeea9f91f45426c26cad684be87c938d Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Wed, 20 Mar 2024 23:24:39 +0800 Subject: [PATCH 24/32] update python --- src/bindings/python/src/pyopenvino/utils/utils.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/bindings/python/src/pyopenvino/utils/utils.cpp b/src/bindings/python/src/pyopenvino/utils/utils.cpp index d21e2a1fef0a23..ffbcf3e4ac730f 100644 --- a/src/bindings/python/src/pyopenvino/utils/utils.cpp +++ b/src/bindings/python/src/pyopenvino/utils/utils.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -176,8 +177,8 @@ py::object from_ov_any(const ov::Any& any) { return py::cast(any.as()); } else if (any.is()) { return py::cast(any.as()); - } else if (any.is()) { - return py::cast(any.as()); + } else if (any.is>()) { + return py::cast(any.as>()); } else if (any.is()) { return py::cast(any.as()); } else if (any.is()) { @@ -377,8 +378,8 @@ ov::Any py_object_to_any(const py::object& py_obj) { return py::cast(py_obj); } else if (py::isinstance(py_obj)) { return py::cast(py_obj); - } else if (py::isinstance(py_obj)) { - return py::cast(py_obj); + } else if (py::isinstance>(py_obj)) { + return py::cast>(py_obj); } else if (py::isinstance(py_obj)) { return py::cast(py_obj); } else if (py::isinstance(py_obj)) { From 73ce7576145795d9f5b846eea46368615eb0ef82 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Wed, 20 Mar 2024 23:29:25 +0800 Subject: [PATCH 25/32] update python test case --- src/bindings/python/tests/test_runtime/test_properties.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py index accc7dd9e13fe8..efef8d52d15338 100644 --- a/src/bindings/python/tests/test_runtime/test_properties.py +++ b/src/bindings/python/tests/test_runtime/test_properties.py @@ -289,7 +289,11 @@ def test_properties_ro(ov_property_ro, expected_value): ( hints.model_distribution_policy, "MODEL_DISTRIBUTION_POLICY", - (({hints.ModelDistributionPolicy.TENSOR_PARALLEL}, {hints.ModelDistributionPolicy.TENSOR_PARALLEL}),), + ( + ({hints.ModelDistributionPolicy.TENSOR_PARALLEL}, {hints.ModelDistributionPolicy.TENSOR_PARALLEL}), + ({hints.ModelDistributionPolicy.NONE}, {hints.ModelDistributionPolicy.NONE}), + ({hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE}, {hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE}), + ), ), ( hints.enable_hyper_threading, From d18568e21b44b44f7e5423a7193122f22c365511 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Wed, 20 Mar 2024 23:35:44 +0800 Subject: [PATCH 26/32] update python code style --- src/bindings/python/tests/test_runtime/test_properties.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py index efef8d52d15338..54d459cd971c81 100644 --- a/src/bindings/python/tests/test_runtime/test_properties.py +++ b/src/bindings/python/tests/test_runtime/test_properties.py @@ -292,7 +292,8 @@ def test_properties_ro(ov_property_ro, expected_value): ( ({hints.ModelDistributionPolicy.TENSOR_PARALLEL}, {hints.ModelDistributionPolicy.TENSOR_PARALLEL}), ({hints.ModelDistributionPolicy.NONE}, {hints.ModelDistributionPolicy.NONE}), - ({hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE}, {hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE}), + ({hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE}, + {hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE}), ), ), ( From 988cb56d19f2f096131ba2a7895898c9e7723356 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Wed, 20 Mar 2024 23:41:54 +0800 Subject: [PATCH 27/32] update python code style --- src/bindings/python/tests/test_runtime/test_properties.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py index 54d459cd971c81..fcffc2fb193295 100644 --- a/src/bindings/python/tests/test_runtime/test_properties.py +++ b/src/bindings/python/tests/test_runtime/test_properties.py @@ -292,7 +292,7 @@ def test_properties_ro(ov_property_ro, expected_value): ( ({hints.ModelDistributionPolicy.TENSOR_PARALLEL}, {hints.ModelDistributionPolicy.TENSOR_PARALLEL}), ({hints.ModelDistributionPolicy.NONE}, {hints.ModelDistributionPolicy.NONE}), - ({hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE}, + ({hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE}, {hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE}), ), ), From e9d25905e6c18925dcfb84453293d5abe84ee668 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Thu, 21 Mar 2024 00:03:10 +0800 Subject: [PATCH 28/32] update code style --- src/plugins/intel_cpu/src/config.cpp | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index f7744738a31013..3e86f7e527868a 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -184,12 +184,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { val.as(), "for property key ", ov::hint::scheduling_core_type.name(), - ". Expected only ", - ov::hint::SchedulingCoreType::ANY_CORE, - '/', - ov::hint::SchedulingCoreType::PCORE_ONLY, - '/', - ov::hint::SchedulingCoreType::ECORE_ONLY); + ". Expected only ov::hint::SchedulingCoreType::ANY_CORE/PCORE_ONLY/ECORE_ONLY"); } } else if (key == ov::hint::model_distribution_policy.name()) { auto error_info = [&]() { @@ -197,11 +192,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { val.as(), "for property key ", ov::hint::model_distribution_policy.name(), - ". CPU plugin only support {", - ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL, - '}/{', - ov::hint::ModelDistributionPolicy::NONE, - '}'); + ". CPU plugin only support {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL/NONE}"); }; try { From 9db450065db58262b23a7a29d9ecf200b8712e63 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Thu, 21 Mar 2024 16:20:11 +0800 Subject: [PATCH 29/32] update for comments --- .../pyopenvino/core/properties/properties.cpp | 1 - .../tests/test_runtime/test_properties.py | 6 +---- .../include/openvino/runtime/properties.hpp | 24 +++++++++---------- src/plugins/intel_cpu/src/config.cpp | 3 +-- src/plugins/intel_cpu/src/config.h | 2 +- .../custom/behavior/ov_plugin/properties.cpp | 10 ++------ 6 files changed, 16 insertions(+), 30 deletions(-) diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp index 6310aac026e8c0..f1edeaa18ff1ef 100644 --- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp +++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp @@ -72,7 +72,6 @@ void regmodule_properties(py::module m) { .value("ECORE_ONLY", ov::hint::SchedulingCoreType::ECORE_ONLY); py::enum_(m_hint, "ModelDistributionPolicy", py::arithmetic()) - .value("NONE", ov::hint::ModelDistributionPolicy::NONE) .value("TENSOR_PARALLEL", ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL); py::enum_(m_hint, "ExecutionMode", py::arithmetic()) diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py index fcffc2fb193295..ffa3cabcf88b29 100644 --- a/src/bindings/python/tests/test_runtime/test_properties.py +++ b/src/bindings/python/tests/test_runtime/test_properties.py @@ -89,8 +89,7 @@ def test_properties_rw_base(): ( hints.ModelDistributionPolicy, ( - (hints.ModelDistributionPolicy.NONE, "ModelDistributionPolicy.NONE", 0), - (hints.ModelDistributionPolicy.TENSOR_PARALLEL, "ModelDistributionPolicy.TENSOR_PARALLEL", 1), + (hints.ModelDistributionPolicy.TENSOR_PARALLEL, "ModelDistributionPolicy.TENSOR_PARALLEL", 0), ), ), ( @@ -291,9 +290,6 @@ def test_properties_ro(ov_property_ro, expected_value): "MODEL_DISTRIBUTION_POLICY", ( ({hints.ModelDistributionPolicy.TENSOR_PARALLEL}, {hints.ModelDistributionPolicy.TENSOR_PARALLEL}), - ({hints.ModelDistributionPolicy.NONE}, {hints.ModelDistributionPolicy.NONE}), - ({hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE}, - {hints.ModelDistributionPolicy.TENSOR_PARALLEL, hints.ModelDistributionPolicy.NONE}), ), ), ( diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index debf176e7e3c63..654131d8a49217 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -400,15 +400,14 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type) static constexpr Property scheduling_core_type{"SCHEDULING_CORE_TYPE"}; enum class ModelDistributionPolicy { - NONE = 0, // Run one model on single socket/device without parallelism. - TENSOR_PARALLEL = 1, // Split one node or subgraph into parts and run one part per socket/device in parallel. + TENSOR_PARALLEL = 0, // Split tensor into several parts and disribute them between sockets/devices during model + // compilation. At inference time sockets/devices process tensors in parallel and do + // syncronization at the end ensuring mathematical correctness. }; /** @cond INTERNAL */ inline std::ostream& operator<<(std::ostream& os, const ModelDistributionPolicy& stream_mode) { switch (stream_mode) { - case ModelDistributionPolicy::NONE: - return os << "NONE"; case ModelDistributionPolicy::TENSOR_PARALLEL: return os << "TENSOR_PARALLEL"; default: @@ -419,9 +418,7 @@ inline std::ostream& operator<<(std::ostream& os, const ModelDistributionPolicy& inline std::istream& operator>>(std::istream& is, ModelDistributionPolicy& stream_mode) { std::string str; is >> str; - if (str == "NONE") { - stream_mode = ModelDistributionPolicy::NONE; - } else if (str == "TENSOR_PARALLEL") { + if (str == "TENSOR_PARALLEL") { stream_mode = ModelDistributionPolicy::TENSOR_PARALLEL; } else { OPENVINO_THROW("Unsupported model distribution policy: ", str); @@ -434,15 +431,16 @@ inline std::istream& operator>>(std::istream& is, ModelDistributionPolicy& strea * @brief This property defines model distribution policy for inference with multiple sockets/devices. * @ingroup ov_runtime_cpp_prop_api * - * Developer can use this property to select model distribution policy for CPU inference with multiple sockets - * platform or GPU inference with multiple GPU devices. - * -- TENSOR_PARALLEL : Split one node or subgraph into parts and run one part per socket/device in parallel. - * -- NONE : Run one model on single socket/device without parallelism. + * This property can be used to select model distribution policy between execution units (e.g. between CPU sockets/NUMA + * nodes or between different GPUs). + * -- TENSOR_PARALLEL : Split tensor into several parts and disribute them between sockets/devices during model + * compilation. At inference time sockets/devices process tensors in parallel and do syncronization + * at the end ensuring mathematical correctness. * - * The following code is an example to split node into two parts run one part per socket on dual sockets platform. + * The following code is an example how TENSOR_PARALLEL model disrtibution policy might be enabled. * * @code - * ie.set_property(ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::TENSOR_PARTITION})); + * ie.set_property(ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL})); * @endcode */ static constexpr Property> model_distribution_policy{"MODEL_DISTRIBUTION_POLICY"}; diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index 3e86f7e527868a..b235281c3fca3b 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -197,8 +197,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { try { for (auto& row : val.as>()) { - if ((row != ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL) && - (row != ov::hint::ModelDistributionPolicy::NONE)) { + if ((row != ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL)) { error_info(); } } diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index ef069865875fa1..10d7274dc66f7c 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -76,7 +76,7 @@ struct Config { bool enableCpuPinning = true; bool changedCpuPinning = false; ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE; - std::set modelDistributionPolicy = {ov::hint::ModelDistributionPolicy::NONE}; + std::set modelDistributionPolicy = {}; bool enableHyperThreading = true; bool changedHyperThreading = false; Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET; diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp index f9bbd19e94b676..1b29347d6c0605 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp @@ -110,20 +110,14 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigInferenceNumThreads) { TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigModelDistributionPolicy) { ov::Core ie; - std::set value = {ov::hint::ModelDistributionPolicy::NONE}; + std::set value = {}; std::set model_policy = {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL}; ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::model_distribution_policy(model_policy))); ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::model_distribution_policy)); ASSERT_EQ(model_policy, value); - model_policy = {ov::hint::ModelDistributionPolicy::NONE}; - - ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::model_distribution_policy(model_policy))); - ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::model_distribution_policy)); - ASSERT_EQ(model_policy, value); - - model_policy = {ov::hint::ModelDistributionPolicy::NONE, ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL}; + model_policy = {}; ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::model_distribution_policy(model_policy))); ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::model_distribution_policy)); From c77970151c93453214cb412cc83eba61af9ae6f4 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Thu, 21 Mar 2024 16:28:19 +0800 Subject: [PATCH 30/32] update for typo --- src/bindings/python/tests/test_runtime/test_properties.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py index ffa3cabcf88b29..d4ad725679a351 100644 --- a/src/bindings/python/tests/test_runtime/test_properties.py +++ b/src/bindings/python/tests/test_runtime/test_properties.py @@ -554,6 +554,7 @@ def test_single_property_setting(device): props.affinity: "NONE", "INFERENCE_PRECISION_HINT": Type.f32, hints.performance_mode: hints.PerformanceMode.LATENCY, + hints.scheduling_core_type: hints.SchedulingCoreType.PCORE_ONLY, hints.num_requests: 12, "NUM_STREAMS": streams.Num(5), "ENABLE_MMAP": False, From 0ae8b3ec021f837644547e9116f607a36d28bb95 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Thu, 21 Mar 2024 16:59:53 +0800 Subject: [PATCH 31/32] remove value NONE for ModelDistributionPolicy --- src/plugins/intel_cpu/src/config.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index b235281c3fca3b..8567914415e459 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -192,7 +192,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { val.as(), "for property key ", ov::hint::model_distribution_policy.name(), - ". CPU plugin only support {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL/NONE}"); + ". CPU plugin only support {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL}"); }; try { From 88d9929bb3c39874e6cd8c0ed4924416946c75c5 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Thu, 21 Mar 2024 22:49:07 +0800 Subject: [PATCH 32/32] fix typo --- src/inference/include/openvino/runtime/properties.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index 654131d8a49217..2ddd8702eb87fd 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -400,7 +400,7 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type) static constexpr Property scheduling_core_type{"SCHEDULING_CORE_TYPE"}; enum class ModelDistributionPolicy { - TENSOR_PARALLEL = 0, // Split tensor into several parts and disribute them between sockets/devices during model + TENSOR_PARALLEL = 0, // Split tensor into several parts and distribute them between sockets/devices during model // compilation. At inference time sockets/devices process tensors in parallel and do // syncronization at the end ensuring mathematical correctness. }; @@ -433,7 +433,7 @@ inline std::istream& operator>>(std::istream& is, ModelDistributionPolicy& strea * * This property can be used to select model distribution policy between execution units (e.g. between CPU sockets/NUMA * nodes or between different GPUs). - * -- TENSOR_PARALLEL : Split tensor into several parts and disribute them between sockets/devices during model + * -- TENSOR_PARALLEL : Split tensor into several parts and distribute them between sockets/devices during model * compilation. At inference time sockets/devices process tensors in parallel and do syncronization * at the end ensuring mathematical correctness. *