Skip to content

Commit 1ca62ca

Browse files
authored
Merge pull request #141 from intel/sahar/2.0_api
Modification to include new api 2.0 changes in the code
2 parents d83517f + ebdcb21 commit 1ca62ca

File tree

14 files changed

+804
-451
lines changed

14 files changed

+804
-451
lines changed

onnxruntime/core/providers/openvino/backend_manager.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33

44
#pragma once
55

6-
#include <inference_engine.hpp>
7-
6+
#include "ov_interface.h"
87
#include "contexts.h"
98
#include "ibackend.h"
109

onnxruntime/core/providers/openvino/backend_utils.cc

Lines changed: 145 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,11 @@
77
#include <sstream>
88
#include <fstream>
99

10-
#include <inference_engine.hpp>
11-
12-
#if defined (OPENVINO_2021_4) || (OPENVINO_2022_1)
13-
using Exception = InferenceEngine::Exception;
14-
#else
15-
using Exception = InferenceEngine::details::InferenceEngineException;
16-
#endif
17-
10+
#include "ov_interface.h"
1811
#include <ngraph/frontend/onnx_import/onnx.hpp>
1912
#include <ngraph/pass/convert_fp32_to_fp16.hpp>
2013
#include <ngraph/pass/constant_folding.hpp>
21-
2214
#include "core/providers/shared_library/provider_api.h"
23-
2415
#include "backend_utils.h"
2516

2617
namespace onnxruntime {
@@ -107,59 +98,48 @@ struct static_cast_int64 {
10798

10899
std::shared_ptr<InferenceEngine::CNNNetwork>
109100
CreateCNNNetwork(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context, std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map) {
110-
std::shared_ptr<ngraph::Function> ng_function;
111-
// NGraph Function
101+
112102
if(IsCILogEnabled()) {
113103
std::cout << "CreateNgraphFunc" << std::endl;
114104
}
115105

116-
117106
#ifndef NDEBUG
118107
if (IsDebugEnabled()) {
119108
DumpOnnxModelProto(model_proto, subgraph_context.subgraph_name + "_static.onnx");
120109
}
121110
#endif
122111

123-
#if (defined OPENVINO_2021_2) || (defined OPENVINO_2021_3)
124-
ORT_UNUSED_PARAMETER(const_outputs_map);
125-
std::istringstream model_stream{model_proto.SerializeAsString()};
126-
try {
112+
std::shared_ptr<ngraph::Function> ng_function;
113+
#if (defined OPENVINO_2021_2) || (defined OPENVINO_2021_3)
114+
ORT_UNUSED_PARAMETER(const_outputs_map);
115+
std::istringstream model_stream{model_proto.SerializeAsString()};
116+
try {
127117
ng_function = ngraph::onnx_import::import_onnx_model(model_stream);
128118
LOGS_DEFAULT(INFO) << "ONNX Import Done";
129-
} catch (const std::exception& exp) {
130-
ORT_THROW(log_tag + "[OpenVINO-EP] Exception while importing model to nGraph Func: " + std::string(exp.what()));
131-
} catch (...) {
132-
ORT_THROW(log_tag + "[OpenVINO-EP] Unknown exception while importing model to nGraph Func");
133-
}
134-
#else
135-
//ReadNetwork() API flow will be used in OpenVINO-EP starting from OpenVINO 2021.4
136-
InferenceEngine::CNNNetwork cnn_network;
137-
const std::string model = model_proto.SerializeAsString();
138-
InferenceEngine::Blob::Ptr blob = {nullptr};
139-
try {
140-
cnn_network = global_context.ie_core.ReadNetwork(model, blob);
141-
LOGS_DEFAULT(INFO) << "Read network Done";
142-
} catch (const Exception& e) {
143-
ORT_THROW(log_tag + "[OpenVINO-EP] Exception while Reading network: " + std::string(e.what()));
144-
} catch (...) {
145-
ORT_THROW(log_tag + "[OpenVINO-EP] Unknown exception while Reading network");
146-
}
147-
ng_function = cnn_network.getFunction();
148-
#endif
119+
} catch (const std::exception& exp) {
120+
ORT_THROW(log_tag + "[OpenVINO-EP] Exception while importing model to nGraph Func: " + std::string(exp.what()));
121+
} catch (...) {
122+
ORT_THROW(log_tag + "[OpenVINO-EP] Unknown exception while importing model to nGraph Func");
123+
}
124+
#elif defined (OPENVINO_2021_4)
125+
const std::string model = model_proto.SerializeAsString();
126+
auto cnn_network = global_context.ie_core.ReadModel(model);
127+
ng_function = cnn_network.getFunction();
128+
#else
129+
ORT_UNUSED_PARAMETER(model_proto);
130+
#endif
149131

150132
if (global_context.device_type.find("GPU") != std::string::npos &&
151133
subgraph_context.precision == InferenceEngine::Precision::FP16) {
152134
//FP16 transformations
153135
ngraph::pass::ConvertFP32ToFP16().run_on_function(ng_function);
154136
ng_function->validate_nodes_and_infer_types();
155137
}
156-
157138
if (!global_context.is_wholly_supported_graph) {
158139
std::map<std::string, std::string> result_to_output;
159140
for (auto& result : ng_function->get_results()) {
160141
result_to_output[result->get_friendly_name()] = result->input_value(0).get_node_shared_ptr()->get_friendly_name();
161142
}
162-
163143
ngraph::pass::ConstantFolding().run_on_function(ng_function);
164144
auto& results = const_cast<::ngraph::ResultVector&>(ng_function->get_results());
165145
size_t index = results.size() - 1;
@@ -182,13 +162,47 @@ CreateCNNNetwork(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalCont
182162
#endif
183163
}
184164

185-
try {
186-
return std::make_shared<InferenceEngine::CNNNetwork>(ng_function);
187-
} catch (const Exception& e) {
188-
ORT_THROW(log_tag + " Exception thrown while making IE::CNNNetwork: " + e.what());
189-
} catch (...) {
190-
ORT_THROW(log_tag + " Exception thrown while making IE::CNNNetwork");
165+
return std::make_shared<InferenceEngine::CNNNetwork>(ng_function);
166+
};
167+
168+
std::shared_ptr<ov::Model>
169+
CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context, std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map) {
170+
171+
if(IsCILogEnabled()) {
172+
std::cout << "CreateNgraphFunc" << std::endl;
191173
}
174+
175+
#ifndef NDEBUG
176+
if (IsDebugEnabled()) {
177+
DumpOnnxModelProto(model_proto, subgraph_context.subgraph_name + "_static.onnx");
178+
}
179+
#endif
180+
181+
const std::string model = model_proto.SerializeAsString();
182+
auto cnn_network = global_context.ie_core.ReadModel(model);
183+
if (global_context.device_type.find("GPU") != std::string::npos &&
184+
subgraph_context.precision == InferenceEngine::Precision::FP16) {
185+
//FP16 transformations
186+
ov::pass::ConvertFP32ToFP16 pass_obj;
187+
pass_obj.run_on_model(cnn_network);
188+
cnn_network.get()->validate_nodes_and_infer_types();
189+
}
190+
//Check for Constant Folding
191+
if (!global_context.is_wholly_supported_graph) {
192+
ov::pass::ConstantFolding pass_const_obj;
193+
pass_const_obj.run_on_model(cnn_network);
194+
auto& results = const_cast<ov::ResultVector&>(cnn_network.get()->get_results());
195+
size_t index = results.size() - 1;
196+
197+
for (auto it = results.rbegin(); it != results.rend(); ++it) {
198+
if (auto const_node = std::dynamic_pointer_cast<ngraph::op::Constant>((*it)->input_value(0).get_node_shared_ptr())) {
199+
const_outputs_map[(*it)->get_friendly_name()] = const_node;
200+
results.erase(results.begin() + index);
201+
}
202+
--index;
203+
}
204+
}
205+
return cnn_network;
192206
}
193207

194208
InferenceEngine::Precision ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type, std::string device) {
@@ -255,13 +269,18 @@ void SetIODefs(const ONNX_NAMESPACE::ModelProto& model_proto,
255269

256270
OrtValue*
257271
GetOutputTensor(Ort::CustomOpApi& ort, OrtKernelContext* context, size_t batch_size,
258-
InferenceEngine::InferRequest::Ptr infer_request,
272+
OVInferRequestPtr infer_request,
259273
std::string output_name,
260274
std::unordered_map<std::string, int> output_names) {
261275
OrtValue* output_tensor;
276+
auto graph_output_blob = infer_request->GetTensor(output_name);
277+
278+
#if defined (OPENVINO_2022_1)
279+
auto graph_output_dims = graph_output_blob->get_shape();
280+
#else
281+
auto graph_output_dims = graph_output_blob->TensorDesc().getDims();
282+
#endif
262283

263-
auto graph_output_blob = infer_request->GetBlob(output_name);
264-
auto graph_output_dims = graph_output_blob->getTensorDesc().getDims();
265284
if (batch_size > 1) {
266285
// Add the batch size as dim 0.
267286
graph_output_dims.insert(graph_output_dims.begin(), batch_size);
@@ -276,9 +295,7 @@ GetOutputTensor(Ort::CustomOpApi& ort, OrtKernelContext* context, size_t batch_s
276295
ORT_THROW(log_tag + "Output names mismatch between OpenVINO and ONNX");
277296
}
278297
int index = it->second;
279-
280298
output_tensor = ort.KernelContext_GetOutput(context, index, output_shape.get(), num_dims);
281-
282299
return output_tensor;
283300
}
284301

@@ -445,6 +462,75 @@ perfCountersSorted(std::map<std::string, InferenceEngine::InferenceEngineProfile
445462
return sorted;
446463
}
447464

465+
void FillInputBlob(OVTensorPtr inputBlob, size_t batch_slice_idx,
466+
std::string input_name, Ort::CustomOpApi& ort, OrtKernelContext* context,
467+
const SubGraphContext& subgraph_context) {
468+
469+
size_t input_data_size = inputBlob->get_byte_size();
470+
auto input_data = inputBlob->data();
471+
const OrtValue* tensor = ort.KernelContext_GetInput(context, subgraph_context.input_names.at(input_name));
472+
auto mem_info = ort.GetTensorMemoryInfo(tensor);
473+
if (strcmp(mem_info->name, OpenVINO_GPU) == 0) {
474+
ORT_THROW(log_tag + "IO Buffering is not enabled, Please enable Input on CPU");
475+
}
476+
// Copy input data into OpenVINO's input buffer
477+
const char* tensor_data = ort.GetTensorData<char>(tensor);
478+
const char* batch_memory_offset = tensor_data + input_data_size * batch_slice_idx;
479+
std::memcpy(input_data, batch_memory_offset, input_data_size);
480+
}
481+
482+
void FillOutputBlob(OVTensorPtr outputBlob, OrtValue* output_tensor,
483+
Ort::CustomOpApi& ort, size_t batch_slice_idx) {
484+
auto output_data = outputBlob->data();
485+
size_t output_data_size = outputBlob->get_byte_size();
486+
char* tensor_data = ort.GetTensorMutableData<char>(output_tensor);
487+
char* batch_memory_offset = tensor_data + output_data_size * batch_slice_idx;
488+
std::memcpy(batch_memory_offset, output_data, output_data_size);
489+
}
490+
491+
492+
void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
493+
std::ostream& stream, std::string deviceName) {
494+
long long totalTime = 0;
495+
// Print performance counts
496+
stream << std::endl
497+
<< "performance counts:" << std::endl
498+
<< std::endl;
499+
500+
for (const auto& it : performanceMap) {
501+
std::string toPrint(it.node_name);
502+
const int maxLayerName = 30;
503+
504+
if (it.node_name.length() >= maxLayerName) {
505+
toPrint = it.node_name.substr(0, maxLayerName - 4);
506+
toPrint += "...";
507+
}
508+
stream << std::setw(maxLayerName) << std::left << toPrint;
509+
switch (it.status) {
510+
case OVProfilingInfo::Status::EXECUTED:
511+
stream << std::setw(15) << std::left << "EXECUTED";
512+
break;
513+
case OVProfilingInfo::Status::NOT_RUN:
514+
stream << std::setw(15) << std::left << "NOT_RUN";
515+
break;
516+
case OVProfilingInfo::Status::OPTIMIZED_OUT:
517+
stream << std::setw(15) << std::left << "OPTIMIZED_OUT";
518+
break;
519+
}
520+
stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " ";
521+
stream << std::setw(20) << std::left << "realTime: " + std::to_string(it.real_time.count());
522+
stream << std::setw(20) << std::left << "cpu: " + std::to_string(it.cpu_time.count());
523+
stream << " execType: " << it.exec_type << std::endl;
524+
if (it.real_time.count() > 0) {
525+
totalTime += it.real_time.count();
526+
}
527+
}
528+
stream << std::setw(20) << std::left << "Total time: " + std::to_string(totalTime) << " microseconds" << std::endl;
529+
std::cout << std::endl;
530+
std::cout << "Full device name: " << deviceName << std::endl;
531+
std::cout << std::endl;
532+
}
533+
448534
void printPerformanceCounts(const std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& performanceMap,
449535
std::ostream& stream, std::string deviceName) {
450536
long long totalTime = 0;
@@ -454,7 +540,7 @@ void printPerformanceCounts(const std::map<std::string, InferenceEngine::Inferen
454540
<< std::endl;
455541

456542
auto performanceMapSorted = perfCountersSorted(performanceMap);
457-
543+
458544
for (const auto& it : performanceMapSorted) {
459545
std::string toPrint(it.first);
460546
const int maxLayerName = 30;
@@ -489,14 +575,14 @@ void printPerformanceCounts(const std::map<std::string, InferenceEngine::Inferen
489575
std::cout << std::endl;
490576
}
491577

492-
void printPerformanceCounts(InferenceEngine::InferRequest::Ptr request, std::ostream& stream, std::string deviceName) {
493-
auto performanceMap = request->GetPerformanceCounts();
494-
printPerformanceCounts(performanceMap, stream, deviceName);
495-
}
496-
497-
void printPerformanceCounts(InferenceEngine::InferRequest request, std::ostream& stream, std::string deviceName) {
498-
auto performanceMap = request.GetPerformanceCounts();
499-
printPerformanceCounts(performanceMap, stream, deviceName);
578+
void printPerformanceCounts(OVInferRequestPtr request, std::ostream& stream, std::string deviceName) {
579+
#if defined (OPENVINO_2022_1)
580+
auto performanceMap = request->GetNewObj().get_profiling_info();
581+
printPerformanceCounts(performanceMap, stream, deviceName);
582+
#else
583+
auto performanceMap = request->GetObj().GetPerformanceCounts();
584+
printPerformanceCounts(performanceMap, stream, deviceName);
585+
#endif
500586
}
501587

502588
} // namespace backend_utils

onnxruntime/core/providers/openvino/backend_utils.h

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,11 @@
33

44
#pragma once
55

6-
#include <inference_engine.hpp>
7-
86
#define ORT_API_MANUAL_INIT
97
#include "core/session/onnxruntime_cxx_api.h"
108
#include "contexts.h"
119
#include <iomanip>
12-
10+
#include "ov_interface.h"
1311
#ifdef _WIN32
1412
#include <direct.h>
1513
#define GetCurrentDir _getcwd
@@ -49,6 +47,9 @@ void SetIODefs(const ONNX_NAMESPACE::ModelProto& model_proto,
4947
std::shared_ptr<InferenceEngine::CNNNetwork>
5048
CreateCNNNetwork(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context, std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map);
5149

50+
std::shared_ptr<ov::Model>
51+
CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context, std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map);
52+
5253
int GetFirstAvailableDevice(GlobalContext& global_context);
5354

5455
void FillOutputsWithConstantData(Ort::CustomOpApi& ort, std::shared_ptr<ngraph::Node> node, OrtValue* out_tensor);
@@ -67,26 +68,34 @@ ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type, std::
6768

6869
OrtValue*
6970
GetOutputTensor(Ort::CustomOpApi& ort, OrtKernelContext* context, size_t batch_size,
70-
InferenceEngine::InferRequest::Ptr infer_request,
71+
OVInferRequestPtr infer_request,
7172
std::string output_name,
7273
std::unordered_map<std::string, int> output_names);
7374

75+
void FillInputBlob(OVTensorPtr inputBlob, size_t batch_slice_idx,
76+
std::string input_name, Ort::CustomOpApi& ort, OrtKernelContext* context,
77+
const SubGraphContext& subgraph_context);
78+
79+
void FillOutputBlob(OVTensorPtr outputBlob, OrtValue* output_tensor,
80+
Ort::CustomOpApi& ort, size_t batch_slice_idx);
81+
82+
void printPerformanceCounts(const std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& performanceMap,
83+
std::ostream& stream, std::string deviceName);
84+
85+
std::vector<std::pair<std::string, InferenceEngine::InferenceEngineProfileInfo>>
86+
perfCountersSorted(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> perfMap);
87+
7488
void FillInputBlob(InferenceEngine::Blob::Ptr& inputBlob, size_t batch_slice_idx,
7589
std::string input_name, Ort::CustomOpApi& ort, OrtKernelContext* context,
7690
InferenceEngine::Precision precision, const SubGraphContext& subgraph_context);
7791

7892
void FillOutputBlob(InferenceEngine::Blob::Ptr& outputBlob, OrtValue* output_tensor,
7993
Ort::CustomOpApi& ort, InferenceEngine::Precision precision, size_t batch_slice_idx);
8094

81-
std::vector<std::pair<std::string, InferenceEngine::InferenceEngineProfileInfo>>
82-
perfCountersSorted(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> perfMap);
83-
84-
void printPerformanceCounts(const std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& performanceMap,
95+
void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
8596
std::ostream& stream, std::string deviceName);
8697

87-
void printPerformanceCounts(InferenceEngine::InferRequest::Ptr request, std::ostream& stream, std::string deviceName);
88-
89-
void printPerformanceCounts(InferenceEngine::InferRequest request, std::ostream& stream, std::string deviceName);
98+
void printPerformanceCounts(OVInferRequestPtr request, std::ostream& stream, std::string deviceName);
9099

91100
} // namespace backend_utils
92101
} // namespace openvino_ep

0 commit comments

Comments
 (0)