77#include < sstream>
88#include < fstream>
99
10- #include < inference_engine.hpp>
11-
12- #if defined (OPENVINO_2021_4) || (OPENVINO_2022_1)
13- using Exception = InferenceEngine::Exception;
14- #else
15- using Exception = InferenceEngine::details::InferenceEngineException;
16- #endif
17-
10+ #include " ov_interface.h"
1811#include < ngraph/frontend/onnx_import/onnx.hpp>
1912#include < ngraph/pass/convert_fp32_to_fp16.hpp>
2013#include < ngraph/pass/constant_folding.hpp>
21-
2214#include " core/providers/shared_library/provider_api.h"
23-
2415#include " backend_utils.h"
2516
2617namespace onnxruntime {
@@ -107,59 +98,48 @@ struct static_cast_int64 {
10798
10899std::shared_ptr<InferenceEngine::CNNNetwork>
109100CreateCNNNetwork (const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context, std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map) {
110- std::shared_ptr<ngraph::Function> ng_function;
111- // NGraph Function
101+
112102 if (IsCILogEnabled ()) {
113103 std::cout << " CreateNgraphFunc" << std::endl;
114104 }
115105
116-
117106#ifndef NDEBUG
118107 if (IsDebugEnabled ()) {
119108 DumpOnnxModelProto (model_proto, subgraph_context.subgraph_name + " _static.onnx" );
120109 }
121110#endif
122111
123- #if (defined OPENVINO_2021_2) || (defined OPENVINO_2021_3)
124- ORT_UNUSED_PARAMETER (const_outputs_map);
125- std::istringstream model_stream{model_proto.SerializeAsString ()};
126- try {
112+ std::shared_ptr<ngraph::Function> ng_function;
113+ #if (defined OPENVINO_2021_2) || (defined OPENVINO_2021_3)
114+ ORT_UNUSED_PARAMETER (const_outputs_map);
115+ std::istringstream model_stream{model_proto.SerializeAsString ()};
116+ try {
127117 ng_function = ngraph::onnx_import::import_onnx_model (model_stream);
128118 LOGS_DEFAULT (INFO) << " ONNX Import Done" ;
129- } catch (const std::exception& exp) {
130- ORT_THROW (log_tag + " [OpenVINO-EP] Exception while importing model to nGraph Func: " + std::string (exp.what ()));
131- } catch (...) {
132- ORT_THROW (log_tag + " [OpenVINO-EP] Unknown exception while importing model to nGraph Func" );
133- }
134- #else
135- // ReadNetwork() API flow will be used in OpenVINO-EP starting from OpenVINO 2021.4
136- InferenceEngine::CNNNetwork cnn_network;
137- const std::string model = model_proto.SerializeAsString ();
138- InferenceEngine::Blob::Ptr blob = {nullptr };
139- try {
140- cnn_network = global_context.ie_core .ReadNetwork (model, blob);
141- LOGS_DEFAULT (INFO) << " Read network Done" ;
142- } catch (const Exception& e) {
143- ORT_THROW (log_tag + " [OpenVINO-EP] Exception while Reading network: " + std::string (e.what ()));
144- } catch (...) {
145- ORT_THROW (log_tag + " [OpenVINO-EP] Unknown exception while Reading network" );
146- }
147- ng_function = cnn_network.getFunction ();
148- #endif
119+ } catch (const std::exception& exp) {
120+ ORT_THROW (log_tag + " [OpenVINO-EP] Exception while importing model to nGraph Func: " + std::string (exp.what ()));
121+ } catch (...) {
122+ ORT_THROW (log_tag + " [OpenVINO-EP] Unknown exception while importing model to nGraph Func" );
123+ }
124+ #elif defined (OPENVINO_2021_4)
125+ const std::string model = model_proto.SerializeAsString ();
126+ auto cnn_network = global_context.ie_core .ReadModel (model);
127+ ng_function = cnn_network.getFunction ();
128+ #else
129+ ORT_UNUSED_PARAMETER (model_proto);
130+ #endif
149131
150132 if (global_context.device_type .find (" GPU" ) != std::string::npos &&
151133 subgraph_context.precision == InferenceEngine::Precision::FP16) {
152134 // FP16 transformations
153135 ngraph::pass::ConvertFP32ToFP16 ().run_on_function (ng_function);
154136 ng_function->validate_nodes_and_infer_types ();
155137 }
156-
157138 if (!global_context.is_wholly_supported_graph ) {
158139 std::map<std::string, std::string> result_to_output;
159140 for (auto & result : ng_function->get_results ()) {
160141 result_to_output[result->get_friendly_name ()] = result->input_value (0 ).get_node_shared_ptr ()->get_friendly_name ();
161142 }
162-
163143 ngraph::pass::ConstantFolding ().run_on_function (ng_function);
164144 auto & results = const_cast <::ngraph::ResultVector&>(ng_function->get_results ());
165145 size_t index = results.size () - 1 ;
@@ -182,13 +162,47 @@ CreateCNNNetwork(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalCont
182162 #endif
183163 }
184164
185- try {
186- return std::make_shared<InferenceEngine::CNNNetwork>(ng_function);
187- } catch (const Exception& e) {
188- ORT_THROW (log_tag + " Exception thrown while making IE::CNNNetwork: " + e.what ());
189- } catch (...) {
190- ORT_THROW (log_tag + " Exception thrown while making IE::CNNNetwork" );
165+ return std::make_shared<InferenceEngine::CNNNetwork>(ng_function);
166+ };
167+
168+ std::shared_ptr<ov::Model>
169+ CreateOVModel (const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context, std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map) {
170+
171+ if (IsCILogEnabled ()) {
172+ std::cout << " CreateNgraphFunc" << std::endl;
191173 }
174+
175+ #ifndef NDEBUG
176+ if (IsDebugEnabled ()) {
177+ DumpOnnxModelProto (model_proto, subgraph_context.subgraph_name + " _static.onnx" );
178+ }
179+ #endif
180+
181+ const std::string model = model_proto.SerializeAsString ();
182+ auto cnn_network = global_context.ie_core .ReadModel (model);
183+ if (global_context.device_type .find (" GPU" ) != std::string::npos &&
184+ subgraph_context.precision == InferenceEngine::Precision::FP16) {
185+ // FP16 transformations
186+ ov::pass::ConvertFP32ToFP16 pass_obj;
187+ pass_obj.run_on_model (cnn_network);
188+ cnn_network.get ()->validate_nodes_and_infer_types ();
189+ }
190+ // Check for Constant Folding
191+ if (!global_context.is_wholly_supported_graph ) {
192+ ov::pass::ConstantFolding pass_const_obj;
193+ pass_const_obj.run_on_model (cnn_network);
194+ auto & results = const_cast <ov::ResultVector&>(cnn_network.get ()->get_results ());
195+ size_t index = results.size () - 1 ;
196+
197+ for (auto it = results.rbegin (); it != results.rend (); ++it) {
198+ if (auto const_node = std::dynamic_pointer_cast<ngraph::op::Constant>((*it)->input_value (0 ).get_node_shared_ptr ())) {
199+ const_outputs_map[(*it)->get_friendly_name ()] = const_node;
200+ results.erase (results.begin () + index);
201+ }
202+ --index;
203+ }
204+ }
205+ return cnn_network;
192206}
193207
194208InferenceEngine::Precision ConvertPrecisionONNXToOpenVINO (const ONNX_NAMESPACE::TypeProto& onnx_type, std::string device) {
@@ -255,13 +269,18 @@ void SetIODefs(const ONNX_NAMESPACE::ModelProto& model_proto,
255269
256270OrtValue*
257271GetOutputTensor (Ort::CustomOpApi& ort, OrtKernelContext* context, size_t batch_size,
258- InferenceEngine::InferRequest::Ptr infer_request,
272+ OVInferRequestPtr infer_request,
259273 std::string output_name,
260274 std::unordered_map<std::string, int > output_names) {
261275 OrtValue* output_tensor;
276+ auto graph_output_blob = infer_request->GetTensor (output_name);
277+
278+ #if defined (OPENVINO_2022_1)
279+ auto graph_output_dims = graph_output_blob->get_shape ();
280+ #else
281+ auto graph_output_dims = graph_output_blob->TensorDesc ().getDims ();
282+ #endif
262283
263- auto graph_output_blob = infer_request->GetBlob (output_name);
264- auto graph_output_dims = graph_output_blob->getTensorDesc ().getDims ();
265284 if (batch_size > 1 ) {
266285 // Add the batch size as dim 0.
267286 graph_output_dims.insert (graph_output_dims.begin (), batch_size);
@@ -276,9 +295,7 @@ GetOutputTensor(Ort::CustomOpApi& ort, OrtKernelContext* context, size_t batch_s
276295 ORT_THROW (log_tag + " Output names mismatch between OpenVINO and ONNX" );
277296 }
278297 int index = it->second ;
279-
280298 output_tensor = ort.KernelContext_GetOutput (context, index, output_shape.get (), num_dims);
281-
282299 return output_tensor;
283300}
284301
@@ -445,6 +462,75 @@ perfCountersSorted(std::map<std::string, InferenceEngine::InferenceEngineProfile
445462 return sorted;
446463}
447464
465+ void FillInputBlob (OVTensorPtr inputBlob, size_t batch_slice_idx,
466+ std::string input_name, Ort::CustomOpApi& ort, OrtKernelContext* context,
467+ const SubGraphContext& subgraph_context) {
468+
469+ size_t input_data_size = inputBlob->get_byte_size ();
470+ auto input_data = inputBlob->data ();
471+ const OrtValue* tensor = ort.KernelContext_GetInput (context, subgraph_context.input_names .at (input_name));
472+ auto mem_info = ort.GetTensorMemoryInfo (tensor);
473+ if (strcmp (mem_info->name , OpenVINO_GPU) == 0 ) {
474+ ORT_THROW (log_tag + " IO Buffering is not enabled, Please enable Input on CPU" );
475+ }
476+ // Copy input data into OpenVINO's input buffer
477+ const char * tensor_data = ort.GetTensorData <char >(tensor);
478+ const char * batch_memory_offset = tensor_data + input_data_size * batch_slice_idx;
479+ std::memcpy (input_data, batch_memory_offset, input_data_size);
480+ }
481+
482+ void FillOutputBlob (OVTensorPtr outputBlob, OrtValue* output_tensor,
483+ Ort::CustomOpApi& ort, size_t batch_slice_idx) {
484+ auto output_data = outputBlob->data ();
485+ size_t output_data_size = outputBlob->get_byte_size ();
486+ char * tensor_data = ort.GetTensorMutableData <char >(output_tensor);
487+ char * batch_memory_offset = tensor_data + output_data_size * batch_slice_idx;
488+ std::memcpy (batch_memory_offset, output_data, output_data_size);
489+ }
490+
491+
492+ void printPerformanceCounts (const std::vector<OVProfilingInfo>& performanceMap,
493+ std::ostream& stream, std::string deviceName) {
494+ long long totalTime = 0 ;
495+ // Print performance counts
496+ stream << std::endl
497+ << " performance counts:" << std::endl
498+ << std::endl;
499+
500+ for (const auto & it : performanceMap) {
501+ std::string toPrint (it.node_name );
502+ const int maxLayerName = 30 ;
503+
504+ if (it.node_name .length () >= maxLayerName) {
505+ toPrint = it.node_name .substr (0 , maxLayerName - 4 );
506+ toPrint += " ..." ;
507+ }
508+ stream << std::setw (maxLayerName) << std::left << toPrint;
509+ switch (it.status ) {
510+ case OVProfilingInfo::Status::EXECUTED:
511+ stream << std::setw (15 ) << std::left << " EXECUTED" ;
512+ break ;
513+ case OVProfilingInfo::Status::NOT_RUN:
514+ stream << std::setw (15 ) << std::left << " NOT_RUN" ;
515+ break ;
516+ case OVProfilingInfo::Status::OPTIMIZED_OUT:
517+ stream << std::setw (15 ) << std::left << " OPTIMIZED_OUT" ;
518+ break ;
519+ }
520+ stream << std::setw (30 ) << std::left << " layerType: " + std::string (it.node_type ) + " " ;
521+ stream << std::setw (20 ) << std::left << " realTime: " + std::to_string (it.real_time .count ());
522+ stream << std::setw (20 ) << std::left << " cpu: " + std::to_string (it.cpu_time .count ());
523+ stream << " execType: " << it.exec_type << std::endl;
524+ if (it.real_time .count () > 0 ) {
525+ totalTime += it.real_time .count ();
526+ }
527+ }
528+ stream << std::setw (20 ) << std::left << " Total time: " + std::to_string (totalTime) << " microseconds" << std::endl;
529+ std::cout << std::endl;
530+ std::cout << " Full device name: " << deviceName << std::endl;
531+ std::cout << std::endl;
532+ }
533+
448534void printPerformanceCounts (const std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& performanceMap,
449535 std::ostream& stream, std::string deviceName) {
450536 long long totalTime = 0 ;
@@ -454,7 +540,7 @@ void printPerformanceCounts(const std::map<std::string, InferenceEngine::Inferen
454540 << std::endl;
455541
456542 auto performanceMapSorted = perfCountersSorted (performanceMap);
457-
543+
458544 for (const auto & it : performanceMapSorted) {
459545 std::string toPrint (it.first );
460546 const int maxLayerName = 30 ;
@@ -489,14 +575,14 @@ void printPerformanceCounts(const std::map<std::string, InferenceEngine::Inferen
489575 std::cout << std::endl;
490576}
491577
492- void printPerformanceCounts (InferenceEngine::InferRequest::Ptr request, std::ostream& stream, std::string deviceName) {
493- auto performanceMap = request-> GetPerformanceCounts ();
494- printPerformanceCounts ( performanceMap, stream, deviceName );
495- }
496-
497- void printPerformanceCounts (InferenceEngine::InferRequest request, std::ostream& stream, std::string deviceName) {
498- auto performanceMap = request. GetPerformanceCounts ( );
499- printPerformanceCounts (performanceMap, stream, deviceName);
578+ void printPerformanceCounts (OVInferRequestPtr request, std::ostream& stream, std::string deviceName) {
579+ # if defined (OPENVINO_2022_1)
580+ auto performanceMap = request-> GetNewObj (). get_profiling_info ( );
581+ printPerformanceCounts (performanceMap, stream, deviceName);
582+ # else
583+ auto performanceMap = request-> GetObj (). GetPerformanceCounts ();
584+ printPerformanceCounts ( performanceMap, stream, deviceName );
585+ # endif
500586}
501587
502588} // namespace backend_utils
0 commit comments