Skip to content

Commit bd27bbc

Browse files
Address review comments
1 parent f6fbf64 commit bd27bbc

File tree

3 files changed

+28
-27
lines changed

3 files changed

+28
-27
lines changed

src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.cpp

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -188,15 +188,20 @@ ov::SoPtr<ov::ITensor> ov::npuw::IBaseInferRequest::get_tensor(const ov::Output<
188188

189189
void ov::npuw::IBaseInferRequest::set_tensor(const ov::Output<const ov::Node>& port,
190190
const ov::SoPtr<ov::ITensor>& tensor) {
191-
if (is_not_stored_io(port)) {
192-
m_port_to_tensor[port] = TensorStorage{tensor, true, false, true};
191+
if (!is_stored(port)) {
192+
// TODO: might be useful to check if the tensor is allocated on the device
193+
m_port_to_tensor[port] = TensorStorage{tensor, false, false, true};
193194
} else {
194195
m_port_to_tensor.at(port).tensor = tensor;
196+
m_port_to_tensor.at(port).set_from_outside = true;
197+
}
198+
199+
if (is_io(port)) {
200+
m_port_to_tensor.at(port).persistent = true;
195201
}
196202

197203
// Check if setting input tensor
198204
if (m_port_to_tensor.at(port).persistent) {
199-
m_port_to_tensor.at(port).set_from_outside = true;
200205
handle_set_remote_input(port, tensor);
201206
}
202207
}
@@ -217,25 +222,20 @@ void ov::npuw::IBaseInferRequest::check_tensors() const {
217222
return;
218223
}
219224

220-
bool ov::npuw::IBaseInferRequest::is_not_stored_io(const ov::Output<const ov::Node>& port) const {
221-
// Due to lazy I/O allocation we need to create stored object here
222-
if (m_port_to_tensor.find(port) == m_port_to_tensor.end()) {
223-
// Only I/O set_tensor() is allowed for this class - check it
224-
bool is_io = false;
225-
for (std::size_t i = 0; i < m_npuw_model->inputs().size(); ++i) {
226-
if (m_npuw_model->inputs()[i] == port) {
227-
is_io = true;
228-
break;
229-
}
225+
bool ov::npuw::IBaseInferRequest::is_stored(const ov::Output<const ov::Node>& port) const {
226+
return m_port_to_tensor.find(port) != m_port_to_tensor.end();
227+
}
228+
229+
bool ov::npuw::IBaseInferRequest::is_io(const ov::Output<const ov::Node>& port) const {
230+
for (std::size_t i = 0; i < m_npuw_model->inputs().size(); ++i) {
231+
if (m_npuw_model->inputs()[i] == port) {
232+
return true;
230233
}
231-
for (std::size_t i = 0; i < m_npuw_model->outputs().size(); ++i) {
232-
if (m_npuw_model->outputs()[i] == port) {
233-
is_io = true;
234-
break;
235-
}
234+
}
235+
for (std::size_t i = 0; i < m_npuw_model->outputs().size(); ++i) {
236+
if (m_npuw_model->outputs()[i] == port) {
237+
return true;
236238
}
237-
NPUW_ASSERT(is_io && "Only I/O tensors might be left unset at this point. Internal error!");
238-
return true;
239239
}
240240
return false;
241241
}
@@ -551,7 +551,7 @@ void ov::npuw::IBaseInferRequest::bind_global_params(std::size_t idx, RqPtr requ
551551
LOG_DEBUG("Processing " << param_idx << " -> " << sub_in_idx << std::endl);
552552

553553
const auto& g_port = m_npuw_model->inputs()[param_idx];
554-
const auto& g_tnsr = is_not_stored_io(g_port) ? get_tensor(g_port) : m_port_to_tensor.at(g_port).tensor;
554+
const auto& g_tnsr = is_stored(g_port) ? m_port_to_tensor.at(g_port).tensor : get_tensor(g_port);
555555
const auto& s_port = request->get_inputs()[sub_in_idx];
556556
LOG_DEBUG("Processing " << g_port << " -> " << s_port << "...");
557557
LOG_BLOCK();
@@ -770,7 +770,7 @@ void ov::npuw::IBaseInferRequest::bind_global_results(std::size_t idx, RqPtr req
770770
std::tie(result_idx, sub_out_idx) = it;
771771
const auto& g_port = m_npuw_model->outputs()[result_idx];
772772
const auto& s_port = request->get_outputs()[sub_out_idx];
773-
request->set_tensor(s_port, is_not_stored_io(g_port) ? get_tensor(g_port) : m_port_to_tensor.at(g_port).tensor);
773+
request->set_tensor(s_port, is_stored(g_port) ? m_port_to_tensor.at(g_port).tensor : get_tensor(g_port));
774774
}
775775

776776
LOG_DEBUG("Done");

src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,10 @@ class IBaseInferRequest : public ov::ISyncInferRequest {
104104
mutable std::map<ov::Output<const ov::Node>, TensorStorage>
105105
m_port_to_tensor; // mutable due to lazy I/O allocation in get_tensor()
106106

107-
// Check to verify that m_port_to_tensor doesn't have anything stored at the port and it's I/O
108-
bool is_not_stored_io(const ov::Output<const ov::Node>& port) const;
107+
// Check that m_port_to_tensor does have a tensor stored at the port
108+
bool is_stored(const ov::Output<const ov::Node>& port) const;
109+
// Check the port is I/O
110+
bool is_io(const ov::Output<const ov::Node>& port) const;
109111

110112
struct QuantGatherTensors {
111113
ov::Tensor w, z, s;

src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -389,9 +389,8 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptr<ov::npuw::Com
389389

390390
void ov::npuw::JustInferRequest::set_tensor(const ov::Output<const ov::Node>& port,
391391
const ov::SoPtr<ov::ITensor>& tensor) {
392-
if (is_not_stored_io(port)) {
393-
m_port_to_tensor[port] = TensorStorage{tensor, true, false, true};
394-
}
392+
NPUW_ASSERT(is_io(port));
393+
m_port_to_tensor[port] = TensorStorage{tensor, true, false, true};
395394

396395
// Check if setting output tensor
397396
for (std::size_t i = 0; i < m_npuw_model->outputs().size(); ++i) {

0 commit comments

Comments
 (0)