diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp index 5d6c6da22eb994..67dce9621bfb4e 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp @@ -30,8 +30,8 @@ static constexpr ov::Property devices{"NPUW_DEVICES"}; * @brief * Type: std::string. * Force the specific subgraph to specific device. The device must be present in the NPUW_DEVICES list. - * Possible values: Comma-separated "Subgraph index:OpenVINO device name" pairs, - * e.g. "0:CPU,1:NPU". + * Possible values: Comma-separated "Subgraph index:OpenVINO device name" pairs, "last" keyword can be + * used for last subgraph, e.g. "0:CPU,1:NPU,last:CPU". * Default value: empty. */ static constexpr ov::Property submodel_device{"NPUW_SUBMODEL_DEVICE"}; @@ -323,7 +323,9 @@ static constexpr ov::Property full{"NPUW_DUMP_FULL"}; * Type: std::string. * Dump the specified subgraph(s) in OpenVINO IR form in the current directory. * Possible values: Comma-separated list of subgraph indices or "YES" for all - * subgraphs, "NO" or just empty value to turn option off. E.g. "0,1" or "YES". + * subgraphs, "NO" or just empty value to turn option off. Keyword "last" can + * be used for dumping last subgraph without specifying it by specific index. + * E.g. "0,1" or "0,1,last" or "YES". * Default value: empty. */ static constexpr ov::Property subgraphs{"NPUW_DUMP_SUBS"}; @@ -333,7 +335,8 @@ static constexpr ov::Property subgraphs{"NPUW_DUMP_SUBS"}; * Type: std::string. * Dump subgraph on disk if a compilation failure happens. * Possible values: Comma-separated list of subgraph indices or "YES" for all - * subgraphs, "NO" or just empty value to turn option off. E.g. "0,1" or "YES". + * subgraphs, "NO" or just empty value to turn option off. Keyword "last" can + * be used for dumping last subgraph. E.g. "0,1" or "0,1,last" or "YES". * Default value: empty. */ static constexpr ov::Property subgraphs_on_fail{"NPUW_DUMP_SUBS_ON_FAIL"}; @@ -343,7 +346,8 @@ static constexpr ov::Property subgraphs_on_fail{"NPUW_DUMP_SUBS_ON_ * Type: std::string. * Dump input & output tensors for subgraph(s). * Possible values: Comma-separated list of subgraph indices or "YES" for all - * subgraphs, "NO" or just empty value to turn option off. E.g. "0,1" or "YES". + * subgraphs, "NO" or just empty value to turn option off. Keyword "last" can + * be used for last subgraph. E.g. "0,1" or "0,1,last" or "YES". * Default value: empty. */ static constexpr ov::Property inputs_outputs{"NPUW_DUMP_IO"}; diff --git a/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.cpp b/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.cpp index 02422e8c72a199..216b1a35b4315c 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.cpp @@ -211,7 +211,8 @@ void ov::npuw::IBaseInferRequest::infer() { void ov::npuw::IBaseInferRequest::dump_input_tensors(std::size_t idx) { const std::string dump_ios_opt = m_npuw_model->m_cfg.get<::intel_npu::NPUW_DUMP_IO>(); - if (!ov::npuw::util::is_set(idx, dump_ios_opt)) { + const std::size_t end_idx = m_npuw_model->m_compiled_submodels.size(); + if (!ov::npuw::util::is_set(idx, dump_ios_opt, end_idx)) { return; } @@ -288,7 +289,8 @@ void ov::npuw::IBaseInferRequest::dump_input_tensors(std::size_t idx) { void ov::npuw::IBaseInferRequest::dump_output_tensors(std::size_t idx) { const std::string dump_ios_opt = m_npuw_model->m_cfg.get<::intel_npu::NPUW_DUMP_IO>(); - if (!ov::npuw::util::is_set(idx, dump_ios_opt)) { + const std::size_t end_idx = m_npuw_model->m_compiled_submodels.size(); + if (!ov::npuw::util::is_set(idx, dump_ios_opt, end_idx)) { return; } diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp index 69d68e020b887b..4bf92588e0ebbd 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp @@ -5,6 +5,7 @@ #include #include +#include #include "accuracy/comparator.hpp" #include "intel_npu/npu_private_properties.hpp" @@ -256,6 +257,7 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr& model, // - dump the subgraphs, if necessary std::map compiledFunctions; m_compiled_submodels.resize(orderedSubgraphs.size()); + const std::size_t end_sub_idx = orderedSubgraphs.size(); const std::string dump_sub_opt = m_cfg.get<::intel_npu::NPUW_DUMP_SUBS>(); @@ -323,7 +325,7 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr& model, fill_empty_tensor_names(m_compiled_submodels[real_id].model); } - if (ov::npuw::util::is_set(id, dump_sub_opt)) { + if (ov::npuw::util::is_set(id, dump_sub_opt, end_sub_idx)) { LOG_INFO("Dumping Subgraph[" << id << "]"); LOG_BLOCK(); if (real_id != id) { @@ -340,7 +342,14 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr& model, } // for(orderedSubgraphs) std::map forced_sub_devices{}; - const std::string fsd_opt = m_cfg.get<::intel_npu::NPUW_SUBMODEL_DEVICE>(); + std::string fsd_opt = m_cfg.get<::intel_npu::NPUW_SUBMODEL_DEVICE>(); + // Change "last" keyword to tail subgraph number + std::size_t last_pos = fsd_opt.find("last"); + if (last_pos != std::string::npos) { + fsd_opt.erase(last_pos, 4); + fsd_opt.insert(last_pos, std::to_string(end_sub_idx)); + } + forced_sub_devices = ::intel_npu ::OptionParser>::parse(fsd_opt); // Exclude optimized out subgraphs from compilation target beforehand - otherwise we might get head and repeated @@ -694,8 +703,9 @@ ov::SoPtr ov::npuw::CompiledModel::compile_submodel(const st void ov::npuw::CompiledModel::dump_on_fail(std::size_t id, const std::string& device_to_try, const char* extra) { const std::string dof_opt = m_cfg.get<::intel_npu::NPUW_DUMP_SUBS_ON_FAIL>(); + const std::size_t end_idx = m_compiled_submodels.size(); - if (ov::npuw::util::is_set(id, dof_opt)) { + if (ov::npuw::util::is_set(id, dof_opt, end_idx)) { ov::npuw::dump_failure(m_compiled_submodels[id].model, device_to_try, extra); } } diff --git a/src/plugins/intel_npu/src/plugin/npuw/util.cpp b/src/plugins/intel_npu/src/plugin/npuw/util.cpp index dfef4defb48c04..e9deb34ee2ded7 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/util.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/util.cpp @@ -17,7 +17,7 @@ #include "openvino/runtime/make_tensor.hpp" // get_tensor_impl #include "util_xarch.hpp" -bool ov::npuw::util::is_set(const std::size_t sub_idx, const std::string& opt) { +bool ov::npuw::util::is_set(const std::size_t sub_idx, const std::string& opt, const std::size_t end_idx) { if (opt.empty() || opt == "NO") { return false; } @@ -25,12 +25,20 @@ bool ov::npuw::util::is_set(const std::size_t sub_idx, const std::string& opt) { return true; } + std::string str(opt); + std::size_t last_pos = str.find("last"); + if (last_pos != std::string::npos) { + str.erase(last_pos, 4); + if (end_idx != SIZE_MAX && sub_idx == end_idx - 1) { + return true; + } + } + std::vector sub_inds{}; - sub_inds = ::intel_npu ::OptionParser>::parse(opt); + sub_inds = ::intel_npu ::OptionParser>::parse(str); if (std::find(sub_inds.begin(), sub_inds.end(), sub_idx) != sub_inds.end()) { return true; } - return false; } diff --git a/src/plugins/intel_npu/src/plugin/npuw/util.hpp b/src/plugins/intel_npu/src/plugin/npuw/util.hpp index a826d00e032977..c7b54ee89fd010 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/util.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/util.hpp @@ -15,7 +15,7 @@ namespace ov { namespace npuw { namespace util { -bool is_set(const std::size_t sub_idx, const std::string& opt); +bool is_set(const std::size_t sub_idx, const std::string& opt, const std::size_t end_idx = SIZE_MAX); // Every great project has its own string class... // NB: Newer C++ standards would allow to use string views or smt