Skip to content

Commit 81ae163

Browse files
committed
Initial commit
Signed-off-by: Bogdan Pereanu <bogdan.pereanu@intel.com>
1 parent 6c18118 commit 81ae163

File tree

18 files changed

+221
-76
lines changed

18 files changed

+221
-76
lines changed

src/inference/include/openvino/runtime/intel_npu/properties.hpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ static constexpr ov::Property<bool> turbo{"NPU_TURBO"};
110110
* @brief [Only for NPU Compiler]
111111
* Type: integer, default is -1
112112
* Sets the number of npu tiles to compile the model for.
113+
* @ingroup ov_runtime_npu_prop_cpp_api
113114
*/
114115
static constexpr ov::Property<int64_t> tiles{"NPU_TILES"};
115116

@@ -118,6 +119,7 @@ static constexpr ov::Property<int64_t> tiles{"NPU_TILES"};
118119
* Type: integer, default is -1
119120
* Maximum number of tiles supported by the device we compile for. Can be set for offline compilation. If not set, it
120121
* will be populated by driver.
122+
* @ingroup ov_runtime_npu_prop_cpp_api
121123
*/
122124
static constexpr ov::Property<int64_t> max_tiles{"NPU_MAX_TILES"};
123125

@@ -133,6 +135,7 @@ static constexpr ov::Property<bool> bypass_umd_caching{"NPU_BYPASS_UMD_CACHING"}
133135
* @brief [Only for NPU Plugin]
134136
* Type: boolean, default is false
135137
* This option allows to delay loading the weights until inference is created
138+
* @ingroup ov_runtime_npu_prop_cpp_api
136139
*/
137140
static constexpr ov::Property<bool> defer_weights_load{"NPU_DEFER_WEIGHTS_LOAD"};
138141

@@ -145,5 +148,14 @@ static constexpr ov::Property<bool> defer_weights_load{"NPU_DEFER_WEIGHTS_LOAD"}
145148
*/
146149
static constexpr ov::Property<bool> run_inferences_sequentially{"NPU_RUN_INFERENCES_SEQUENTIALLY"};
147150

151+
/**
152+
* @brief [Only for NPU plugin]
153+
* Type: std::bool, default is false
154+
* Enable ROI Tensor feature. The compiler shall be aware that ROI tensors will be used; they must be enabled at
155+
* compilation time if needed later at runtime.
156+
* @ingroup ov_runtime_npu_prop_cpp_api
157+
*/
158+
static constexpr ov::Property<bool> enable_roi_tensor{"NPU_ENABLE_ROI_TENSOR"};
159+
148160
} // namespace intel_npu
149161
} // namespace ov

src/plugins/intel_npu/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ The following properties are supported (may differ based on current system confi
205205
| `ov::intel_npu::bypass_umd_caching`/</br>`NPU_BYPASS_UMD_CACHING` | RW | Bypass the caching of compiled models in UMD. | `YES`/ `NO`| `NO` |
206206
| `ov::intel_npu::defer_weights_load`/</br>`NPU_DEFER_WEIGHTS_LOAD` | RW | Delay loading the weights until inference is created. | `YES`/ `NO`| `NO` |
207207
| `ov::intel_npu::run_inferences_sequentially`/</br>`NPU_RUN_INFERENCES_SEQUENTIALLY` | RW | Run inferences in async mode sequentially in the order in which they are started to optimize host scheduling. | `YES`/ `NO`| `NO` |
208+
| `ov::intel_npu::enable_roi_tensor`/</br>`NPU_ENABLE_ROI_TENSOR` | RW | Enable ROI Tensor feature. The compiler shall be aware that ROI tensors will be used; they must be enabled at compilation time if needed later at runtime. | `YES`/ `NO`| `NO` |
208209
<br>
209210

210211
### Compiled_model properties VS Plugin properties

src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1426,4 +1426,22 @@ struct USE_BASE_MODEL_SERIALIZER final : OptionBase<USE_BASE_MODEL_SERIALIZER, b
14261426
}
14271427
};
14281428

1429+
struct ENABLE_ROI_TENSOR final : OptionBase<ENABLE_ROI_TENSOR, bool> {
1430+
static std::string_view key() {
1431+
return ov::intel_npu::enable_roi_tensor.name();
1432+
}
1433+
1434+
static bool defaultValue() {
1435+
return false;
1436+
}
1437+
1438+
static bool isPublic() {
1439+
return true;
1440+
}
1441+
1442+
static OptionMode mode() {
1443+
return OptionMode::CompileTime;
1444+
}
1445+
};
1446+
14291447
} // namespace intel_npu

src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,14 @@ struct Pipeline final {
2929
void pull();
3030
void reset() const;
3131

32-
void update_graph_arguments(uint32_t arg_index, const void* arg_data, size_t byte_size);
33-
void update_graph_arguments_batching(uint32_t arg_index, const void* arg_data, size_t batch_index);
32+
void update_graph_arguments(uint32_t index, const std::shared_ptr<ZeroTensor>& tensor);
33+
void update_graph_arguments(uint32_t index, const std::shared_ptr<ZeroTensor>& tensor, size_t command_list_index);
3434

3535
std::vector<ov::ProfilingInfo> get_profiling_info() const;
3636

3737
protected:
38+
ov::Strides get_strides(const std::shared_ptr<ZeroTensor>& tensor);
39+
3840
std::shared_ptr<ZeroInitStructsHolder> _init_structs;
3941
std::shared_ptr<IGraph> _graph;
4042
const Config _config;

src/plugins/intel_npu/src/backend/include/zero_tensor.hpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,24 +73,23 @@ class ZeroTensor final : public ov::ITensor {
7373

7474
private:
7575
void update_strides() const;
76-
size_t get_capacity() const;
7776
size_t get_bytes_capacity() const;
7877

7978
std::shared_ptr<ZeroInitStructsHolder> _init_structs;
8079
Logger _logger;
8180

81+
ov::SoPtr<ov::ITensor> _user_tensor;
82+
8283
ov::element::Type _element_type;
8384
ov::Shape _shape;
84-
ov::Shape _capacity;
85+
size_t _bytes_capacity;
8586
mutable ov::Strides _strides;
8687
mutable std::once_flag _strides_once;
8788
void* _ptr = nullptr;
8889
bool _reset_tensor_memory = false;
8990
bool _is_input = false;
9091
bool _can_be_reused = false;
9192

92-
ov::SoPtr<ov::ITensor> _user_tensor;
93-
9493
std::shared_ptr<ZeroMem> _mem_ref;
9594
};
9695

src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -424,8 +424,7 @@ void ZeroInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const
424424
_pipeline->update_graph_arguments(foundPort.is_input()
425425
? _graph->get_input_descriptors().at(foundPort.idx).idx
426426
: _graph->get_output_descriptors().at(foundPort.idx).idx,
427-
levelZeroTensor->data(),
428-
levelZeroTensor->get_byte_size());
427+
levelZeroTensor);
429428
}
430429
}
431430
// If command list updates are not supported, fallback to copying tensors every time.
@@ -501,9 +500,9 @@ void ZeroInferRequest::set_tensors(const ov::Output<const ov::Node>& port,
501500
OPENVINO_ASSERT(get_level_zero_input(foundPort.idx, i)->data(), "Empty buffer");
502501
OV_ITT_TASK_NEXT(ZERO_SET_TENSORS, "updateCommandList");
503502

504-
_pipeline->update_graph_arguments_batching(_graph->get_input_descriptors().at(foundPort.idx).idx,
505-
get_level_zero_input(foundPort.idx, i)->data(),
506-
i);
503+
_pipeline->update_graph_arguments(_graph->get_input_descriptors().at(foundPort.idx).idx,
504+
get_level_zero_input(foundPort.idx, i),
505+
i);
507506
}
508507
}
509508
}
@@ -625,8 +624,7 @@ void ZeroInferRequest::update_pipeline_if_memory_changed() {
625624
OPENVINO_ASSERT(levelZeroTensor.at(SINGLE_TENSOR)->data(), "Empty buffer");
626625

627626
_pipeline->update_graph_arguments(_graph->get_input_descriptors().at(ioIndex).idx,
628-
levelZeroTensor.at(SINGLE_TENSOR)->data(),
629-
levelZeroTensor.at(SINGLE_TENSOR)->get_byte_size());
627+
levelZeroTensor.at(SINGLE_TENSOR));
630628

631629
if (!inputDescriptor.isStateInput) {
632630
levelZeroTensor.at(SINGLE_TENSOR)->reset_memory_flag();
@@ -656,9 +654,7 @@ void ZeroInferRequest::update_pipeline_if_memory_changed() {
656654
_logger.debug("Update output graph descriptor with the new tensor");
657655
OPENVINO_ASSERT(levelZeroTensor->data(), "Empty buffer");
658656

659-
_pipeline->update_graph_arguments(_graph->get_output_descriptors().at(ioIndex).idx,
660-
levelZeroTensor->data(),
661-
levelZeroTensor->get_byte_size());
657+
_pipeline->update_graph_arguments(_graph->get_output_descriptors().at(ioIndex).idx, levelZeroTensor);
662658

663659
levelZeroTensor->reset_memory_flag();
664660
}
@@ -688,13 +684,10 @@ void ZeroInferRequest::update_states_if_memory_changed() {
688684
zeroState->clear_zero_state_update_pending();
689685

690686
_pipeline->update_graph_arguments(_graphInputDescriptors.at(zeroState->get_tensor_index()).idx,
691-
get_level_zero_input(zeroState->get_tensor_index())->data(),
692-
get_level_zero_input(zeroState->get_tensor_index())->get_byte_size());
687+
get_level_zero_input(zeroState->get_tensor_index()));
693688

694-
_pipeline->update_graph_arguments(
695-
_graphOutputDescriptors.at(zeroState->get_related_tensor_index()).idx,
696-
_levelZeroOutputTensors.at(zeroState->get_related_tensor_index())->data(),
697-
_levelZeroOutputTensors.at(zeroState->get_related_tensor_index())->get_byte_size());
689+
_pipeline->update_graph_arguments(_graphOutputDescriptors.at(zeroState->get_related_tensor_index()).idx,
690+
_levelZeroOutputTensors.at(zeroState->get_related_tensor_index()));
698691
}
699692
}
700693
}

src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -96,26 +96,29 @@ Pipeline::Pipeline(const Config& config,
9696
if (input_tensors.at(io_index).size() > 1) {
9797
_logger.debug("Pipeline - set args for input index: %zu", io_index);
9898

99-
graph->set_argument_value(desc.idx, input_tensors.at(io_index).at(i)->data());
99+
const auto& tensor = input_tensors.at(io_index).at(i);
100+
graph->set_argument_value(desc.idx, tensor->data(), get_strides(tensor));
100101

101102
++io_index;
102103
continue;
103104
}
104105

106+
const auto& tensor = input_tensors.at(io_index).at(0);
105107
graph->set_argument_value(
106108
desc.idx,
107-
static_cast<unsigned char*>(input_tensors.at(io_index).at(0)->data()) +
108-
(i * input_tensors.at(io_index).at(0)->get_byte_size()) / _number_of_command_lists);
109+
static_cast<unsigned char*>(tensor->data()) + (i * tensor->get_byte_size()) / _number_of_command_lists,
110+
get_strides(tensor));
109111

110112
++io_index;
111113
}
112114

113115
io_index = 0;
114116
for (const auto& desc : graph->get_output_descriptors()) {
117+
const auto& tensor = output_tensors.at(io_index);
115118
graph->set_argument_value(
116119
desc.idx,
117-
static_cast<unsigned char*>(output_tensors.at(io_index)->data()) +
118-
(i * output_tensors.at(io_index)->get_byte_size()) / _number_of_command_lists);
120+
static_cast<unsigned char*>(tensor->data()) + (i * tensor->get_byte_size()) / _number_of_command_lists,
121+
get_strides(tensor));
119122
++io_index;
120123
}
121124

@@ -223,20 +226,23 @@ void Pipeline::reset() const {
223226
_logger.debug("Pipeline - rest() completed");
224227
};
225228

226-
void Pipeline::update_graph_arguments(uint32_t arg_index, const void* arg_data, size_t byte_size) {
229+
void Pipeline::update_graph_arguments(uint32_t index, const std::shared_ptr<ZeroTensor>& tensor) {
227230
OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_UMCL, itt::domains::LevelZeroBackend, "Pipeline", "updateCommandList");
228231
_logger.debug("Pipeline - updateCommandList");
229232

230233
const size_t number_of_command_lists = _command_lists.size();
231234

232235
for (size_t i = 0; i < number_of_command_lists; i++) {
233236
_command_lists.at(i)->updateMutableCommandList(
234-
arg_index,
235-
static_cast<const unsigned char*>(arg_data) + (i * byte_size) / number_of_command_lists);
237+
index,
238+
static_cast<const unsigned char*>(tensor->data()) + (i * tensor->get_byte_size()) / number_of_command_lists,
239+
get_strides(tensor));
236240
}
237241
};
238242

239-
void Pipeline::update_graph_arguments_batching(uint32_t arg_index, const void* arg_data, size_t command_list_index) {
243+
void Pipeline::update_graph_arguments(uint32_t index,
244+
const std::shared_ptr<ZeroTensor>& tensor,
245+
size_t command_list_index) {
240246
OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_UMCL, itt::domains::LevelZeroBackend, "Pipeline", "updateCommandListIndex");
241247
_logger.debug("Pipeline - updateCommandListIndex");
242248

@@ -246,7 +252,7 @@ void Pipeline::update_graph_arguments_batching(uint32_t arg_index, const void* a
246252
"Command list index is higher than the number of Command lists ",
247253
command_list_index);
248254

249-
_command_lists.at(command_list_index)->updateMutableCommandList(arg_index, arg_data);
255+
_command_lists.at(command_list_index)->updateMutableCommandList(index, tensor->data(), get_strides(tensor));
250256
};
251257

252258
std::vector<ov::ProfilingInfo> Pipeline::get_profiling_info() const {
@@ -272,4 +278,16 @@ std::vector<ov::ProfilingInfo> Pipeline::get_profiling_info() const {
272278
}
273279
}
274280

281+
ov::Strides Pipeline::get_strides(const std::shared_ptr<ZeroTensor>& tensor) {
282+
if (tensor->get_element_type().bitwidth() < 8 || tensor->is_continuous()) {
283+
return ov::Strides{};
284+
}
285+
286+
if (!_graph->is_strided_tensor_supported()) {
287+
OPENVINO_THROW("Strides are not supported by the current driver version.");
288+
}
289+
290+
return tensor->get_strides();
291+
};
292+
275293
} // namespace intel_npu

src/plugins/intel_npu/src/backend/src/zero_tensor.cpp

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,15 @@ ZeroTensor::ZeroTensor(const std::shared_ptr<ZeroInitStructsHolder>& init_struct
3636
_logger("ZeroTensor", config.get<LOG_LEVEL>()),
3737
_element_type{element_type},
3838
_shape{shape},
39-
_capacity{_shape},
4039
_strides{},
4140
_strides_once{},
4241
_is_input(is_input) {
4342
OPENVINO_ASSERT(_element_type.is_static());
4443
const auto byte_size = ov::util::get_memory_size_safe(element_type, _shape);
4544
OPENVINO_ASSERT(byte_size, "Cannot allocate memory for type: ", element_type, " and shape: ", _shape);
4645

46+
_bytes_capacity = get_bytes_capacity();
47+
4748
_mem_ref = ZeroMemPool::get_instance().allocate_zero_memory(_init_structs,
4849
byte_size.value(),
4950
utils::STANDARD_PAGE_SIZE,
@@ -59,14 +60,15 @@ ZeroTensor::ZeroTensor(const std::shared_ptr<ZeroInitStructsHolder>& init_struct
5960
const ov::SoPtr<ov::ITensor>& user_tensor)
6061
: _init_structs(init_structs),
6162
_logger("ZeroTensor", config.get<LOG_LEVEL>()),
62-
_element_type{user_tensor->get_element_type()},
63-
_shape{user_tensor->get_shape()},
64-
_capacity{_shape},
65-
_strides{_element_type.bitwidth() >= 8 ? user_tensor->get_strides() : ov::Strides{}},
66-
_strides_once{},
67-
_user_tensor(user_tensor) {
63+
_user_tensor(user_tensor),
64+
_element_type{_user_tensor->get_element_type()},
65+
_shape{_user_tensor->get_shape()},
66+
_strides{_element_type.bitwidth() >= 8 ? _user_tensor->get_strides() : ov::Strides{}},
67+
_strides_once{} {
6868
OPENVINO_ASSERT(_element_type.is_static());
6969

70+
_bytes_capacity = get_bytes_capacity();
71+
7072
// Data pointer of the given user_tensor must be a valid address in the level zero context
7173
// Check first if the given tensor is a ZeroRemoteTensor (which has a different method to expose the internal
7274
// storage)
@@ -81,9 +83,7 @@ ZeroTensor::ZeroTensor(const std::shared_ptr<ZeroInitStructsHolder>& init_struct
8183
// _mem_ref will keep a reference to that allocation. Otherwise the function will try to import it into the level
8284
// zero context.
8385
_logger.debug("ZeroTensor::ZeroTensor - get tensor from pool or import it");
84-
_mem_ref = ZeroMemPool::get_instance().import_standard_allocation_memory(_init_structs,
85-
_ptr,
86-
_user_tensor->get_byte_size());
86+
_mem_ref = ZeroMemPool::get_instance().import_standard_allocation_memory(_init_structs, _ptr, _bytes_capacity);
8787
}
8888

8989
// Note: Override data() members to not used OpenVINO library code to improve performance
@@ -138,20 +138,28 @@ void ZeroTensor::update_strides() const {
138138
}
139139
}
140140

141+
size_t ZeroTensor::get_bytes_capacity() const {
142+
size_t original_shape_size = shape_size(_shape);
143+
144+
if (_user_tensor == nullptr) {
145+
return ov::util::get_memory_size(_element_type, original_shape_size);
146+
}
147+
148+
if (_element_type.bitwidth() < 8 || original_shape_size == 0 || _shape.empty() || _strides.empty()) {
149+
return ov::util::get_memory_size(_element_type, original_shape_size);
150+
}
151+
152+
return _strides[0] * _shape[0];
153+
}
154+
141155
const ov::Strides& ZeroTensor::get_strides() const {
142156
OPENVINO_ASSERT(_element_type.bitwidth() >= 8,
143157
"Could not get strides for types with bitwidths less than 8 bit. Tensor type: ",
144158
_element_type);
145-
std::call_once(_strides_once, &ZeroTensor::update_strides, this);
146-
return _strides;
147-
}
148159

149-
size_t ZeroTensor::get_capacity() const {
150-
return shape_size(_capacity);
151-
}
160+
std::call_once(_strides_once, &ZeroTensor::update_strides, this);
152161

153-
size_t ZeroTensor::get_bytes_capacity() const {
154-
return ov::util::get_memory_size(get_element_type(), get_capacity());
162+
return _strides;
155163
}
156164

157165
void ZeroTensor::set_shape(ov::Shape new_shape) {
@@ -161,7 +169,7 @@ void ZeroTensor::set_shape(ov::Shape new_shape) {
161169

162170
_shape = std::move(new_shape);
163171

164-
if (get_size() > get_capacity()) {
172+
if (get_byte_size() > _bytes_capacity) {
165173
OPENVINO_ASSERT(_init_structs->getMutableCommandListExtVersion() >= ZE_MAKE_VERSION(1, 0),
166174
"Re-shaping the tensor with a larger shape is not available using this driver version. "
167175
"Please update the driver to the latest version.");
@@ -173,13 +181,15 @@ void ZeroTensor::set_shape(ov::Shape new_shape) {
173181
_ptr = nullptr;
174182

175183
// allocate buffer and initialize objects from scratch
176-
_capacity = _shape;
184+
const auto byte_size = ov::util::get_memory_size_safe(_element_type, _shape);
185+
OPENVINO_ASSERT(byte_size, "Cannot allocate memory for type: ", _element_type, " and shape: ", _shape);
177186
_mem_ref = ZeroMemPool::get_instance().allocate_zero_memory(_init_structs,
178-
get_bytes_capacity(),
187+
byte_size.value(),
179188
utils::STANDARD_PAGE_SIZE,
180189
_is_input);
181190
_ptr = _mem_ref->data();
182-
OPENVINO_ASSERT(get_bytes_capacity() == 0 || _ptr != nullptr, "Failed to allocate zero memory");
191+
OPENVINO_ASSERT(byte_size.value() == 0 || _ptr != nullptr, "Failed to allocate zero memory");
192+
_bytes_capacity = get_byte_size();
183193

184194
_reset_tensor_memory = true;
185195
}

src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
3434
virtual std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
3535
const Config& config) const = 0;
3636

37-
virtual void set_argument_value(uint32_t argi, const void* argv) const = 0;
37+
virtual void set_argument_value(uint32_t id, const void* data, const std::vector<size_t>& strides = {}) const = 0;
3838

3939
virtual void initialize(const Config& config) = 0;
4040

@@ -67,6 +67,8 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
6767
virtual void set_last_submitted_id(uint32_t id_index) = 0;
6868
virtual uint32_t get_last_submitted_id() const = 0;
6969

70+
virtual bool is_strided_tensor_supported() const = 0;
71+
7072
protected:
7173
// Used to protect zero pipeline creation in the graph. The pipeline should be created only once per graph when the
7274
// first inference starts running

0 commit comments

Comments
 (0)