@@ -188,15 +188,20 @@ ov::SoPtr<ov::ITensor> ov::npuw::IBaseInferRequest::get_tensor(const ov::Output<
188188
189189void ov::npuw::IBaseInferRequest::set_tensor (const ov::Output<const ov::Node>& port,
190190 const ov::SoPtr<ov::ITensor>& tensor) {
191- if (is_not_stored_io (port)) {
192- m_port_to_tensor[port] = TensorStorage{tensor, true , false , true };
191+ if (!is_stored (port)) {
192+ // TODO: might be useful to check if the tensor is allocated on the device
193+ m_port_to_tensor[port] = TensorStorage{tensor, false , false , true };
193194 } else {
194195 m_port_to_tensor.at (port).tensor = tensor;
196+ m_port_to_tensor.at (port).set_from_outside = true ;
197+ }
198+
199+ if (is_io (port)) {
200+ m_port_to_tensor.at (port).persistent = true ;
195201 }
196202
197203 // Check if setting input tensor
198204 if (m_port_to_tensor.at (port).persistent ) {
199- m_port_to_tensor.at (port).set_from_outside = true ;
200205 handle_set_remote_input (port, tensor);
201206 }
202207}
@@ -217,25 +222,20 @@ void ov::npuw::IBaseInferRequest::check_tensors() const {
217222 return ;
218223}
219224
220- bool ov::npuw::IBaseInferRequest::is_not_stored_io (const ov::Output<const ov::Node>& port) const {
221- // Due to lazy I/O allocation we need to create stored object here
222- if (m_port_to_tensor.find (port) == m_port_to_tensor.end ()) {
223- // Only I/O set_tensor() is allowed for this class - check it
224- bool is_io = false ;
225- for (std::size_t i = 0 ; i < m_npuw_model->inputs ().size (); ++i) {
226- if (m_npuw_model->inputs ()[i] == port) {
227- is_io = true ;
228- break ;
229- }
225+ bool ov::npuw::IBaseInferRequest::is_stored (const ov::Output<const ov::Node>& port) const {
226+ return m_port_to_tensor.find (port) != m_port_to_tensor.end ();
227+ }
228+
229+ bool ov::npuw::IBaseInferRequest::is_io (const ov::Output<const ov::Node>& port) const {
230+ for (std::size_t i = 0 ; i < m_npuw_model->inputs ().size (); ++i) {
231+ if (m_npuw_model->inputs ()[i] == port) {
232+ return true ;
230233 }
231- for (std::size_t i = 0 ; i < m_npuw_model->outputs ().size (); ++i) {
232- if (m_npuw_model->outputs ()[i] == port) {
233- is_io = true ;
234- break ;
235- }
234+ }
235+ for (std::size_t i = 0 ; i < m_npuw_model->outputs ().size (); ++i) {
236+ if (m_npuw_model->outputs ()[i] == port) {
237+ return true ;
236238 }
237- NPUW_ASSERT (is_io && " Only I/O tensors might be left unset at this point. Internal error!" );
238- return true ;
239239 }
240240 return false ;
241241}
@@ -551,7 +551,7 @@ void ov::npuw::IBaseInferRequest::bind_global_params(std::size_t idx, RqPtr requ
551551 LOG_DEBUG (" Processing " << param_idx << " -> " << sub_in_idx << std::endl);
552552
553553 const auto & g_port = m_npuw_model->inputs ()[param_idx];
554- const auto & g_tnsr = is_not_stored_io (g_port) ? get_tensor (g_port) : m_port_to_tensor. at (g_port). tensor ;
554+ const auto & g_tnsr = is_stored (g_port) ? m_port_to_tensor. at (g_port). tensor : get_tensor (g_port);
555555 const auto & s_port = request->get_inputs ()[sub_in_idx];
556556 LOG_DEBUG (" Processing " << g_port << " -> " << s_port << " ..." );
557557 LOG_BLOCK ();
@@ -770,7 +770,7 @@ void ov::npuw::IBaseInferRequest::bind_global_results(std::size_t idx, RqPtr req
770770 std::tie (result_idx, sub_out_idx) = it;
771771 const auto & g_port = m_npuw_model->outputs ()[result_idx];
772772 const auto & s_port = request->get_outputs ()[sub_out_idx];
773- request->set_tensor (s_port, is_not_stored_io (g_port) ? get_tensor (g_port) : m_port_to_tensor. at (g_port). tensor );
773+ request->set_tensor (s_port, is_stored (g_port) ? m_port_to_tensor. at (g_port). tensor : get_tensor (g_port));
774774 }
775775
776776 LOG_DEBUG (" Done" );
0 commit comments