Skip to content

Commit 7d56aeb

Browse files
authored
[NPU] Use the friendly name of the tensor instead of the legacy name of it (openvinotoolkit#23945)
### Details: - *Use friendly name of the tensor instead of legacy name of it* - *...* ### Tickets: - *ticket-id*
1 parent fe84092 commit 7d56aeb

10 files changed

+82
-63
lines changed

src/plugins/intel_npu/src/al/include/sync_infer_request.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ class SyncInferRequest : public ov::IInferRequest {
201201
std::vector<std::string> _outputAndStateOutputNames;
202202

203203
std::unordered_map<std::string, std::string> _nodeNameToLegacyName;
204+
std::unordered_map<std::string, std::string> _legacyNameToNodeName;
204205
};
205206

206207
} // namespace intel_npu

src/plugins/intel_npu/src/al/src/sync_infer_request.cpp

+14-17
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,9 @@ SyncInferRequest::SyncInferRequest(const std::shared_ptr<const ICompiledModel>&
2424

2525
// Map the node names to the legacy ones used by the I/O tensors in order to allow an easier access to the tensors'
2626
// contents
27-
for (const auto& [legacyName, parameterDescriptor] : _metadata.parameters) {
28-
_nodeNameToLegacyName[parameterDescriptor.currentNodeName] = legacyName;
29-
}
30-
for (const auto& [legacyName, resultDescriptor] : _metadata.results) {
31-
_nodeNameToLegacyName[resultDescriptor.currentNodeName] = legacyName;
27+
for (const auto& [name, resultDescriptor] : _metadata.results) {
28+
_nodeNameToLegacyName[name] = resultDescriptor.legacyName;
29+
_legacyNameToNodeName[resultDescriptor.legacyName] = name;
3230
}
3331

3432
_inputAndStateInputNames = _metadata.inputNames;
@@ -48,8 +46,12 @@ SyncInferRequest::SyncInferRequest(const std::shared_ptr<const ICompiledModel>&
4846
if (contains(_inputAndStateInputNames, shapeName)) {
4947
_inputAndStateInputNames.push_back(SHAPE_TENSOR_PREFIX + shapeName);
5048
}
51-
if (contains(_outputAndStateOutputNames, shapeName)) {
52-
_outputAndStateOutputNames.push_back(SHAPE_TENSOR_PREFIX + shapeName);
49+
50+
const auto& shapeNameMatch = _legacyNameToNodeName.find(shapeName);
51+
if (shapeNameMatch != _legacyNameToNodeName.end()) {
52+
if (contains(_outputAndStateOutputNames, shapeNameMatch->second)) {
53+
_outputAndStateOutputNames.push_back(SHAPE_TENSOR_PREFIX + shapeName);
54+
}
5355
}
5456
}
5557
}
@@ -83,10 +85,7 @@ std::vector<ov::SoPtr<ov::IVariableState>> SyncInferRequest::query_state() const
8385
}
8486

8587
ov::SoPtr<ov::ITensor> SyncInferRequest::get_tensor(const ov::Output<const ov::Node>& port) const {
86-
const auto& nodeNameMatch = _nodeNameToLegacyName.find(port.get_node()->get_friendly_name());
87-
OPENVINO_ASSERT(nodeNameMatch != _nodeNameToLegacyName.end(), "Cannot find tensor for port ", port);
88-
89-
return _allTensors.at(nodeNameMatch->second);
88+
return _allTensors.at(port.get_node()->get_friendly_name());
9089
}
9190

9291
void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const ov::SoPtr<ov::ITensor>& tensor) {
@@ -97,8 +96,7 @@ void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const
9796
OPENVINO_THROW("Failed to set tensor. ", ex.what());
9897
}
9998

100-
const std::string& legacyName = _nodeNameToLegacyName.at(port.get_node()->get_friendly_name());
101-
_allTensors[legacyName] = tensor._ptr;
99+
_allTensors[port.get_node()->get_friendly_name()] = tensor._ptr;
102100
}
103101

104102
std::vector<ov::SoPtr<ov::ITensor>> SyncInferRequest::get_tensors(const ov::Output<const ov::Node>& /*port*/) const {
@@ -151,14 +149,12 @@ void SyncInferRequest::check_tensor(const ov::Output<const ov::Node>& port,
151149
void SyncInferRequest::check_tensors() const {
152150
const auto& inputs = _compiledModel->inputs();
153151
for (size_t i = 0; i < inputs.size(); i++) {
154-
const std::string& legacyName = _nodeNameToLegacyName.at(inputs[i].get_node()->get_friendly_name());
155-
check_tensor(inputs[i], _allTensors.at(legacyName));
152+
check_tensor(inputs[i], _allTensors.at(inputs[i].get_node()->get_friendly_name()));
156153
}
157154

158155
const auto& outputs = _compiledModel->outputs();
159156
for (size_t i = 0; i < outputs.size(); i++) {
160-
const std::string& legacyName = _nodeNameToLegacyName.at(outputs[i].get_node()->get_friendly_name());
161-
check_tensor(outputs[i], _allTensors.at(legacyName));
157+
check_tensor(outputs[i], _allTensors.at(outputs[i].get_node()->get_friendly_name()));
162158
}
163159
}
164160

@@ -180,6 +176,7 @@ void SyncInferRequest::allocate_tensor(std::string tensorName,
180176
_shapesTensors[tensorName] = tensor;
181177
tensorName = SHAPE_TENSOR_PREFIX + tensorName;
182178
}
179+
183180
if (tensorType == TensorType::State) {
184181
_variableStates[tensorName] = std::make_shared<VariableState>(tensorName, tensor);
185182

src/plugins/intel_npu/src/backend/include/zero_executor.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class ZeroExecutor final : public IExecutor {
2727
~ZeroExecutor() override;
2828

2929
struct ArgumentDescriptor {
30-
ze_graph_argument_properties_t info;
30+
ze_graph_argument_properties_3_t info;
3131
uint32_t idx;
3232
};
3333

@@ -67,6 +67,7 @@ class ZeroExecutor final : public IExecutor {
6767

6868
ze_graph_handle_t _graph = nullptr;
6969
ze_graph_properties_t _props{};
70+
7071
std::unordered_map<std::string, ArgumentDescriptor> _inputs_desc_map;
7172
std::unordered_map<std::string, ArgumentDescriptor> _outputs_desc_map;
7273

src/plugins/intel_npu/src/backend/include/zero_memory.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ class HostMemAllocator final {
9696
struct MemoryManagementUnit {
9797
MemoryManagementUnit() = default;
9898

99-
void appendArgument(const std::string& name, const ze_graph_argument_properties_t& argument);
99+
void appendArgument(const std::string& name, const std::size_t argSize);
100100
/* Allocate Device memories */
101101
void allocate(const ze_device_handle_t device_handle, const ze_context_handle_t context);
102102

src/plugins/intel_npu/src/backend/include/zero_utils.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ static inline std::size_t layoutCount(const ze_graph_argument_layout_t val) {
164164
}
165165
}
166166

167-
static inline std::size_t getSizeIOBytes(const ze_graph_argument_properties_t& argument) {
167+
static inline std::size_t getSizeIOBytes(const ze_graph_argument_properties_3_t& argument) {
168168
std::size_t num_elements = 1;
169169
for (std::size_t i = 0; i < layoutCount(argument.deviceLayout); ++i) {
170170
num_elements *= argument.dims[i];

src/plugins/intel_npu/src/backend/src/zero_executor.cpp

+29-10
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#include "intel_npu/al/config/common.hpp"
1515
#include "intel_npu/al/itt.hpp"
16+
#include "intel_npu/al/prefix.hpp"
1617
#include "zero_device.hpp"
1718
#include "zero_utils.hpp"
1819

@@ -55,11 +56,9 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr<const ZeroInitStructsHolder>& i
5556
_config,
5657
_group_ordinal);
5758
Fence fence(graph_command_queue, _config);
58-
ze_device_properties_t properties = {};
59-
properties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
60-
zeroUtils::throwOnFail("zeDeviceGetProperties", zeDeviceGetProperties(_initStructs->getDevice(), &properties));
6159

6260
OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_GRAPH, itt::domains::LevelZeroBackend, "Executor::ZeroExecutor", "graphCreate");
61+
6362
ze_graph_desc_t desc{ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES,
6463
nullptr,
6564
ZE_GRAPH_FORMAT_NATIVE,
@@ -73,17 +72,37 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr<const ZeroInitStructsHolder>& i
7372
OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGetProperties");
7473
zeroUtils::throwOnFail("pfnGetProperties", _graph_ddi_table_ext->pfnGetProperties(_graph, &_props));
7574

76-
OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGetArgumentProperties");
75+
auto targetDriverExtVersion = _initStructs->getDriverExtVersion();
76+
if (targetDriverExtVersion <= ZE_GRAPH_EXT_VERSION_1_1) {
77+
OPENVINO_THROW("Incompatibility between the NPU plugin and driver! The driver version is too old, please "
78+
"update the driver version");
79+
}
80+
81+
OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGetArgumentProperties3");
7782
for (uint32_t index = 0; index < _props.numGraphArgs; ++index) {
78-
ze_graph_argument_properties_t arg;
79-
zeroUtils::throwOnFail("pfnGetArgumentProperties",
80-
_graph_ddi_table_ext->pfnGetArgumentProperties(_graph, index, &arg));
81-
if (ZE_GRAPH_ARGUMENT_TYPE_INPUT == arg.type) {
82-
_inputs_desc_map.emplace(std::make_pair(std::string(arg.name), ArgumentDescriptor{arg, index}));
83+
ze_graph_argument_properties_3_t arg3;
84+
zeroUtils::throwOnFail("pfnGetArgumentProperties3",
85+
_graph_ddi_table_ext->pfnGetArgumentProperties3(_graph, index, &arg3));
86+
87+
if (ZE_GRAPH_ARGUMENT_TYPE_INPUT == arg3.type) {
88+
if (isStateInputName(arg3.name) || isShapeTensorName(arg3.name)) {
89+
_inputs_desc_map.emplace(std::make_pair(std::string(arg3.name), ArgumentDescriptor{arg3, index}));
90+
91+
} else {
92+
_inputs_desc_map.emplace(
93+
std::make_pair(std::string(arg3.debug_friendly_name), ArgumentDescriptor{arg3, index}));
94+
}
8395
} else {
84-
_outputs_desc_map.emplace(std::make_pair(std::string(arg.name), ArgumentDescriptor{arg, index}));
96+
if (isStateOutputName(arg3.name) || isShapeTensorName(arg3.name)) {
97+
_outputs_desc_map.emplace(std::make_pair(std::string(arg3.name), ArgumentDescriptor{arg3, index}));
98+
99+
} else {
100+
_outputs_desc_map.emplace(
101+
std::make_pair(std::string(arg3.debug_friendly_name), ArgumentDescriptor{arg3, index}));
102+
}
85103
}
86104
}
105+
87106
OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "appendGraphInitialize");
88107
graph_command_list.appendGraphInitialize(_graph);
89108
graph_command_list.close();

src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp

+29-27
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
9191
return std::find(container.begin(), container.end(), value) != container.end();
9292
};
9393

94+
auto allocator = zeroMemory::HostMemAllocator(backendPtr);
95+
9496
for (const std::string& inputName : _metadata.inputNames) {
9597
if (!executorInputDescriptors.count(inputName)) {
9698
OPENVINO_THROW("Invalid graph input descriptor key: " + inputName);
@@ -99,15 +101,15 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
99101
const IONodeDescriptor& parameterDescriptor = _metadata.parameters.at(inputName);
100102
check_level_zero_attributes_match(parameterDescriptor, executorInputDescriptors.at(inputName), inputName);
101103

102-
ov::Allocator allocator;
104+
ov::Allocator inputAllocator;
103105
if (properties.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED) {
104-
allocator = zeroMemory::HostMemAllocator(backendPtr, ZE_HOST_MEM_ALLOC_FLAG_BIAS_WRITE_COMBINED);
106+
inputAllocator = zeroMemory::HostMemAllocator(backendPtr, ZE_HOST_MEM_ALLOC_FLAG_BIAS_WRITE_COMBINED);
105107
} else {
106-
allocator = zeroMemory::HostMemAllocator(backendPtr);
107-
}
108+
inputAllocator = zeroMemory::HostMemAllocator(backendPtr);
109+
};
108110

109111
// The I/O buffers already allocated using the Level Zero API are being reused here
110-
allocate_tensor(inputName, parameterDescriptor, TensorType::InputOrOutput, allocator);
112+
allocate_tensor(inputName, parameterDescriptor, TensorType::InputOrOutput, inputAllocator);
111113

112114
if (contains(_metadata.shapeNames, inputName)) {
113115
const std::string shapeBufferName = SHAPE_TENSOR_PREFIX + inputName;
@@ -117,8 +119,7 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
117119
executorInputDescriptors.at(shapeBufferName),
118120
shapeBufferName);
119121

120-
auto allocator = zeroMemory::HostMemAllocator(backendPtr);
121-
allocate_tensor(inputName, shapeDescriptor, TensorType::Shape, allocator);
122+
allocate_tensor(inputName, shapeDescriptor, TensorType::Shape, inputAllocator);
122123
}
123124
}
124125

@@ -130,20 +131,20 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
130131
const IONodeDescriptor& resultDescriptor = _metadata.results.at(outputName);
131132
check_level_zero_attributes_match(resultDescriptor, executorOutputDescriptors.at(outputName), outputName);
132133

133-
auto allocator = zeroMemory::HostMemAllocator(backendPtr);
134-
135134
allocate_tensor(outputName, resultDescriptor, TensorType::InputOrOutput, allocator);
136135

137-
if (contains(_metadata.shapeNames, outputName)) {
138-
const std::string shapeBufferName = SHAPE_TENSOR_PREFIX + outputName;
139-
const IONodeDescriptor& shapeDescriptor = _metadata.shapes.at(outputName);
136+
const auto& shapeNameMatch = _nodeNameToLegacyName.find(outputName);
137+
if (shapeNameMatch != _nodeNameToLegacyName.end()) {
138+
if (contains(_metadata.shapeNames, shapeNameMatch->second)) {
139+
const std::string shapeBufferName = SHAPE_TENSOR_PREFIX + shapeNameMatch->second;
140+
const IONodeDescriptor& shapeDescriptor = _metadata.shapes.at(shapeNameMatch->second);
140141

141-
check_level_zero_attributes_match(shapeDescriptor,
142-
executorOutputDescriptors.at(shapeBufferName),
143-
shapeBufferName);
142+
check_level_zero_attributes_match(shapeDescriptor,
143+
executorOutputDescriptors.at(shapeBufferName),
144+
shapeBufferName);
144145

145-
auto allocator = zeroMemory::HostMemAllocator(backendPtr);
146-
allocate_tensor(outputName, shapeDescriptor, TensorType::Shape, allocator);
146+
allocate_tensor(shapeNameMatch->second, shapeDescriptor, TensorType::Shape, allocator);
147+
}
147148
}
148149
}
149150

@@ -166,8 +167,6 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
166167
executorOutputDescriptors.at(stateOutputBufferName),
167168
stateOutputBufferName);
168169

169-
auto allocator = zeroMemory::HostMemAllocator(backendPtr);
170-
171170
// Only one buffer per state variable is required, we'll use the "output" one since this one captures the latest
172171
// tensor value
173172
allocate_tensor(stateName, stateDescriptor, TensorType::State, allocator);
@@ -226,15 +225,18 @@ void ZeroInferRequest::get_result() {
226225

227226
if (isShapeTensorName(name)) {
228227
const auto actualTensorName = name.substr(SHAPE_TENSOR_PREFIX.size());
229-
ov::Shape actualDims;
230-
actualDims.reserve(outputTensor->get_size());
231-
232-
for (size_t i = 0; i < outputTensor->get_size(); ++i) {
233-
const auto reverseIdx = outputTensor->get_size() - 1 - i;
234-
actualDims.push_back(outputTensor->data<uint32_t>()[reverseIdx]);
228+
const auto& shapeNameMatch = _legacyNameToNodeName.find(actualTensorName);
229+
if (shapeNameMatch != _legacyNameToNodeName.end()) {
230+
ov::Shape actualDims;
231+
actualDims.reserve(outputTensor->get_size());
232+
233+
for (size_t i = 0; i < outputTensor->get_size(); ++i) {
234+
const auto reverseIdx = outputTensor->get_size() - 1 - i;
235+
actualDims.push_back(outputTensor->data<uint32_t>()[reverseIdx]);
236+
}
237+
auto& tensorToBeReshaped = _allTensors.at(shapeNameMatch->second);
238+
tensorToBeReshaped->set_shape(actualDims);
235239
}
236-
auto& tensorToBeReshaped = _allTensors.at(actualTensorName);
237-
tensorToBeReshaped->set_shape(actualDims);
238240
}
239241

240242
uint8_t* tensorBuffer = reinterpret_cast<uint8_t*>(outputTensor->data());

src/plugins/intel_npu/src/backend/src/zero_memory.cpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,9 @@ bool HostMemAllocator::is_equal(const HostMemAllocator& other) const {
6969
return other._data != nullptr && _data != nullptr && other._data == _data;
7070
}
7171

72-
void MemoryManagementUnit::appendArgument(const std::string& name, const ze_graph_argument_properties_t& argument) {
72+
void MemoryManagementUnit::appendArgument(const std::string& name, const std::size_t argSize) {
7373
_offsets.emplace(std::make_pair(name, _size));
7474

75-
const std::size_t argSize = zeroUtils::getSizeIOBytes(argument);
7675
_size += argSize + alignment -
7776
(argSize % alignment); // is this really necessary? if 0==argSize%alignment -> add 1 * alignment
7877
}

src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ struct DiscretePipeline final : public Pipeline {
4141

4242
OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Zero_infer_request::DiscretePipeline::DiscretePipeline");
4343
for (const auto& desc : executor->inputs_desc_map()) {
44-
_deviceInputs.appendArgument(desc.first, desc.second.info);
44+
_deviceInputs.appendArgument(desc.first, zeroUtils::getSizeIOBytes(desc.second.info));
4545
}
4646
_deviceInputs.allocate(device_handle, context);
4747

@@ -61,7 +61,7 @@ struct DiscretePipeline final : public Pipeline {
6161
_event[stage::UPLOAD].AppendSignalEvent(_command_list[stage::UPLOAD]);
6262

6363
for (const auto& desc : executor->outputs_desc_map()) {
64-
_deviceOutputs.appendArgument(desc.first, desc.second.info);
64+
_deviceOutputs.appendArgument(desc.first, zeroUtils::getSizeIOBytes(desc.second.info));
6565
}
6666
_deviceOutputs.allocate(device_handle, context);
6767

src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -996,8 +996,8 @@ static void getNodeDescriptor(IONodeDescriptorMap& nodeDescriptors,
996996
}
997997
const std::string& legacyName = arg.name;
998998

999-
names.push_back(legacyName);
1000-
nodeDescriptors[legacyName] =
999+
names.push_back(arg.debug_friendly_name);
1000+
nodeDescriptors[arg.debug_friendly_name] =
10011001
{legacyName, arg.debug_friendly_name, std::move(outputTensorNames), precision, shape, shape};
10021002
}
10031003

0 commit comments

Comments
 (0)