From 1355b8ef28d865a75c7ca1a953ecf07e84c9bb0e Mon Sep 17 00:00:00 2001 From: ivikhrev Date: Thu, 31 Mar 2022 18:36:58 +0300 Subject: [PATCH 1/6] fix code style for model api demos --- .../cpp/grid_mat.hpp | 80 +++-- .../cpp/main.cpp | 89 +++-- .../models/associative_embedding_decoder.h | 78 +++-- .../include/models/classification_model.h | 19 +- .../models/include/models/deblurring_model.h | 22 +- .../models/include/models/detection_model.h | 15 +- .../models/detection_model_centernet.h | 30 +- .../models/detection_model_faceboxes.h | 37 ++- .../models/detection_model_retinaface.h | 49 +-- .../models/detection_model_retinaface_pt.h | 55 +++- .../include/models/detection_model_ssd.h | 25 +- .../include/models/detection_model_yolo.h | 52 ++- .../models/hpe_model_associative_embedding.h | 29 +- .../include/models/hpe_model_openpose.h | 32 +- .../cpp/models/include/models/image_model.h | 17 +- .../cpp/models/include/models/input_data.h | 6 +- .../include/models/internal_model_data.h | 17 +- .../include/models/jpeg_restoration_model.h | 13 +- .../cpp/models/include/models/model_base.h | 34 +- .../models/include/models/openpose_decoder.h | 35 +- .../cpp/models/include/models/results.h | 49 +-- .../include/models/segmentation_model.h | 11 +- .../include/models/style_transfer_model.h | 17 +- .../include/models/super_resolution_model.h | 23 +- .../src/associative_embedding_decoder.cpp | 73 +++-- .../cpp/models/src/classification_model.cpp | 88 ++--- .../cpp/models/src/deblurring_model.cpp | 66 ++-- .../common/cpp/models/src/detection_model.cpp | 19 +- .../models/src/detection_model_centernet.cpp | 107 +++--- .../models/src/detection_model_faceboxes.cpp | 118 ++++--- .../models/src/detection_model_retinaface.cpp | 146 +++++---- .../src/detection_model_retinaface_pt.cpp | 128 ++++---- .../cpp/models/src/detection_model_ssd.cpp | 133 ++++---- .../cpp/models/src/detection_model_yolo.cpp | 308 +++++++++++------- .../src/hpe_model_associative_embedding.cpp | 90 ++--- .../cpp/models/src/hpe_model_openpose.cpp | 113 ++++--- demos/common/cpp/models/src/image_model.cpp | 19 +- .../cpp/models/src/jpeg_restoration.cpp | 66 ++-- demos/common/cpp/models/src/model_base.cpp | 18 +- .../cpp/models/src/openpose_decoder.cpp | 138 ++++---- .../cpp/models/src/segmentation_model.cpp | 83 ++--- .../cpp/models/src/style_transfer_model.cpp | 52 +-- .../cpp/models/src/super_resolution_model.cpp | 53 +-- .../include/pipelines/async_pipeline.h | 58 +++- .../pipelines/include/pipelines/metadata.h | 23 +- .../include/pipelines/requests_pool.h | 18 +- .../cpp/pipelines/src/async_pipeline.cpp | 56 ++-- .../cpp/pipelines/src/requests_pool.cpp | 24 +- .../cpp/utils/include/utils/config_factory.h | 18 +- demos/common/cpp/utils/src/config_factory.cpp | 33 +- demos/human_pose_estimation_demo/cpp/main.cpp | 217 +++++++----- demos/image_processing_demo/cpp/main.cpp | 131 ++++---- .../image_processing_demo/cpp/visualizer.cpp | 51 ++- .../image_processing_demo/cpp/visualizer.hpp | 15 +- demos/object_detection_demo/cpp/main.cpp | 221 ++++++++----- demos/segmentation_demo/cpp/main.cpp | 139 ++++---- 56 files changed, 2221 insertions(+), 1435 deletions(-) diff --git a/demos/classification_benchmark_demo/cpp/grid_mat.hpp b/demos/classification_benchmark_demo/cpp/grid_mat.hpp index 97f5dcde62c..03d6073591f 100644 --- a/demos/classification_benchmark_demo/cpp/grid_mat.hpp +++ b/demos/classification_benchmark_demo/cpp/grid_mat.hpp @@ -1,24 +1,22 @@ -// Copyright (C) 2018-2019 Intel Corporation +// Copyright (C) 2018-2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #pragma once #include +#include #include #include #include -#include + +#include +#include #include #include -#include -#include - -enum class PredictionResult { Correct, - Incorrect, - Unknown }; +enum class PredictionResult { Correct, Incorrect, Unknown }; class GridMat { public: @@ -27,9 +25,8 @@ class GridMat { explicit GridMat(Presenter& presenter, const cv::Size maxDisp = cv::Size{1920, 1080}, const cv::Size aspectRatio = cv::Size{16, 9}, - double targetFPS = 60 - ): - currSourceId{0} { + double targetFPS = 60) + : currSourceId{0} { cv::Size size(static_cast(std::round(sqrt(1. * targetFPS * aspectRatio.width / aspectRatio.height))), static_cast(std::round(sqrt(1. * targetFPS * aspectRatio.height / aspectRatio.width)))); if (size.width == 0 || size.height == 0) { @@ -45,12 +42,13 @@ class GridMat { } outImg.create((cellSize.height * size.height) + presenter.graphSize.height, - cellSize.width * size.width, CV_8UC3); + cellSize.width * size.width, + CV_8UC3); outImg.setTo(0); textSize = cv::getTextSize("", fontType, fontScale, thickness, &baseline); accuracyMessageSize = cv::getTextSize("Accuracy (top 0): 0.000", fontType, fontScale, thickness, &baseline); - testMessageSize = cv::getTextSize(testMessage, fontType, fontScale, thickness, &baseline); + testMessageSize = cv::getTextSize(GridMat::testMessage, fontType, fontScale, thickness, &baseline); } void textUpdate(PerformanceMetrics& metrics, @@ -60,28 +58,37 @@ class GridMat { bool isFpsTest, bool showAccuracy, Presenter& presenter) { - rectangle(outImg, - {0, 0}, {outImg.cols, presenter.graphSize.height}, - cv::Scalar(0, 0, 0), cv::FILLED); + rectangle(outImg, {0, 0}, {outImg.cols, presenter.graphSize.height}, cv::Scalar(0, 0, 0), cv::FILLED); presenter.drawGraphs(outImg); - metrics.update(lastRequestStartTime, outImg, cv::Point(textPadding, textSize.height + textPadding), - fontType, fontScale, cv::Scalar(255, 100, 100), thickness); + metrics.update(lastRequestStartTime, + outImg, + cv::Point(textPadding, textSize.height + textPadding), + fontType, + fontScale, + cv::Scalar(255, 100, 100), + thickness); if (showAccuracy) { cv::putText(outImg, cv::format("Accuracy (top %d): %.3f", nTop, accuracy), cv::Point(outImg.cols - accuracyMessageSize.width - textPadding, textSize.height + textPadding), - fontType, fontScale, cv::Scalar(255, 255, 255), thickness); + fontType, + fontScale, + cv::Scalar(255, 255, 255), + thickness); } if (isFpsTest) { - cv::putText(outImg, - testMessage, - cv::Point(outImg.cols - testMessageSize.width - textPadding, - (textSize.height + textPadding) * 2), - fontType, fontScale, cv::Scalar(50, 50, 255), thickness); + cv::putText( + outImg, + GridMat::testMessage, + cv::Point(outImg.cols - testMessageSize.width - textPadding, (textSize.height + textPadding) * 2), + fontType, + fontScale, + cv::Scalar(50, 50, 255), + thickness); } } @@ -94,11 +101,14 @@ class GridMat { cv::Scalar textColor; switch (predictionResul) { case PredictionResult::Correct: - textColor = cv::Scalar(75, 255, 75); break; // green + textColor = cv::Scalar(75, 255, 75); + break; // green case PredictionResult::Incorrect: - textColor = cv::Scalar(50, 50, 255); break; // red + textColor = cv::Scalar(50, 50, 255); + break; // red case PredictionResult::Unknown: - textColor = cv::Scalar(200, 10, 10); break; // blue + textColor = cv::Scalar(200, 10, 10); + break; // blue default: throw std::runtime_error("Undefined type of prediction result"); } @@ -106,9 +116,13 @@ class GridMat { cv::Size labelTextSize = cv::getTextSize(label, fontType, 1, 2, &baseline); double labelFontScale = static_cast(cellSize.width - 2 * labelThickness) / labelTextSize.width; cv::resize(mat, prevImg, cellSize); - putHighlightedText(prevImg, label, - cv::Point(labelThickness, cellSize.height - labelThickness - labelTextSize.height), - fontType, labelFontScale, textColor, 2); + putHighlightedText(prevImg, + label, + cv::Point(labelThickness, cellSize.height - labelThickness - labelTextSize.height), + fontType, + labelFontScale, + textColor, + 2); cv::Mat cell = outImg(cv::Rect(points[currSourceId], cellSize)); prevImg.copyTo(cell); cv::rectangle(cell, {0, 0}, {cell.cols, cell.rows}, {255, 50, 50}, labelThickness); // draw a border @@ -129,11 +143,13 @@ class GridMat { static constexpr double fontScale = 1.5; static const int thickness = 2; static const int textPadding = 10; - static const std::string testMessage; + static constexpr const char testMessage[] = "Testing, please wait..."; int baseline; cv::Size textSize; cv::Size accuracyMessageSize; cv::Size testMessageSize; }; -const std::string GridMat::testMessage = "Testing, please wait..."; +// defenition of the static member needed to remove clang build error +// https://stackoverflow.com/questions/8016780/undefined-reference-to-static-constexpr-char +constexpr const char GridMat::testMessage[]; diff --git a/demos/classification_benchmark_demo/cpp/main.cpp b/demos/classification_benchmark_demo/cpp/main.cpp index 3ad9a756caf..9f43a401dcc 100644 --- a/demos/classification_benchmark_demo/cpp/main.cpp +++ b/demos/classification_benchmark_demo/cpp/main.cpp @@ -2,25 +2,38 @@ // SPDX-License-Identifier: Apache-2.0 // +#include + +#include #include +#include #include #include #include +#include +#include +#include +#include #include +#include #include #include -#include +#include +#include +#include #include #include +#include +#include #include +#include #include #include - #include #include -#include +#include #include #include @@ -88,7 +101,7 @@ static void showUsage() { std::cout << " -u " << utilization_monitors_message << std::endl; } -bool ParseAndCheckCommandLine(int argc, char *argv[]) { +bool ParseAndCheckCommandLine(int argc, char* argv[]) { // ---------------------------Parsing and validation of input args-------------------------------------- gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); if (FLAGS_h) { @@ -119,7 +132,7 @@ cv::Mat centerSquareCrop(const cv::Mat& image) { return image(cv::Rect(0, (image.rows - image.cols) / 2, image.cols, image.cols)); } -int main(int argc, char *argv[]) { +int main(int argc, char* argv[]) { try { PerformanceMetrics metrics, readerMetrics, renderMetrics; @@ -132,7 +145,8 @@ int main(int argc, char *argv[]) { std::vector imageNames; std::vector inputImages; parseInputFilesArguments(imageNames); - if (imageNames.empty()) throw std::runtime_error("No images provided"); + if (imageNames.empty()) + throw std::runtime_error("No images provided"); std::sort(imageNames.begin(), imageNames.end()); for (size_t i = 0; i < imageNames.size(); i++) { const std::string& name = imageNames[i]; @@ -194,18 +208,20 @@ int main(int argc, char *argv[]) { //------------------------------ Running routines ---------------------------------------------- std::vector labels = ClassificationModel::loadLabels(FLAGS_labels); - for (const auto & classIndex : classIndices) { + for (const auto& classIndex : classIndices) { if (classIndex >= labels.size()) { - throw std::runtime_error("Class index " + std::to_string(classIndex) - + " is outside the range supported by the model."); - } + throw std::runtime_error("Class index " + std::to_string(classIndex) + + " is outside the range supported by the model."); + } } slog::info << ov::get_openvino_version() << slog::endl; ov::Core core; - AsyncPipeline pipeline(std::unique_ptr(new ClassificationModel(FLAGS_m, FLAGS_nt, FLAGS_auto_resize, labels, FLAGS_layout)), - ConfigFactory::getUserConfig(FLAGS_d, FLAGS_nireq, FLAGS_nstreams, FLAGS_nthreads), core); + AsyncPipeline pipeline(std::unique_ptr( + new ClassificationModel(FLAGS_m, FLAGS_nt, FLAGS_auto_resize, labels, FLAGS_layout)), + ConfigFactory::getUserConfig(FLAGS_d, FLAGS_nireq, FLAGS_nstreams, FLAGS_nthreads), + core); Presenter presenter(FLAGS_u, 0); int width; @@ -238,9 +254,11 @@ int main(int argc, char *argv[]) { if (isTestMode && elapsedSeconds >= testDuration) { isTestMode = false; typedef std::chrono::duration Sec; - gridMat = GridMat(presenter, cv::Size(width, height), cv::Size(16, 9), - (framesNum - framesNumOnCalculationStart) / std::chrono::duration_cast( - fpsCalculationDuration).count()); + gridMat = GridMat(presenter, + cv::Size(width, height), + cv::Size(16, 9), + (framesNum - framesNumOnCalculationStart) / + std::chrono::duration_cast(fpsCalculationDuration).count()); metrics = PerformanceMetrics(); startTime = std::chrono::steady_clock::now(); framesNum = 0; @@ -252,14 +270,17 @@ int main(int argc, char *argv[]) { auto imageStartTime = std::chrono::steady_clock::now(); pipeline.submitData(ImageInputData(inputImages[nextImageIndex]), - std::make_shared(inputImages[nextImageIndex], imageStartTime, classIndices[nextImageIndex])); + std::make_shared(inputImages[nextImageIndex], + imageStartTime, + classIndices[nextImageIndex])); nextImageIndex++; if (nextImageIndex == imageNames.size()) { nextImageIndex = 0; } } - //--- Waiting for free input slot or output data available. Function will return immediately if any of them are available. + //--- Waiting for free input slot or output data available. Function will return immediately if any of them + // are available. pipeline.waitForData(false); //--- Checking for results and rendering data if it's ready @@ -269,8 +290,8 @@ int main(int argc, char *argv[]) { if (!classificationResult.metaData) { throw std::invalid_argument("Renderer: metadata is null"); } - const ClassificationImageMetaData& classificationImageMetaData - = classificationResult.metaData->asRef(); + const ClassificationImageMetaData& classificationImageMetaData = + classificationResult.metaData->asRef(); auto outputImg = classificationImageMetaData.img; @@ -295,8 +316,13 @@ int main(int argc, char *argv[]) { framesNum++; gridMat.updateMat(outputImg, label, predictionResult); accuracy = static_cast(correctPredictionsCount) / framesNum; - gridMat.textUpdate(metrics, classificationResult.metaData->asRef().timeStamp, accuracy, FLAGS_nt, isTestMode, - !FLAGS_gt.empty(), presenter); + gridMat.textUpdate(metrics, + classificationResult.metaData->asRef().timeStamp, + accuracy, + FLAGS_nt, + isTestMode, + !FLAGS_gt.empty(), + presenter); renderMetrics.update(renderingStart); elapsedSeconds = std::chrono::steady_clock::now() - startTime; if (!FLAGS_no_show) { @@ -305,8 +331,8 @@ int main(int argc, char *argv[]) { int key = cv::waitKey(1); if (27 == key || 'q' == key || 'Q' == key) { // Esc keepRunning = false; - } - else if (32 == key || 'r' == key || 'R' == key) { // press space or r to restart testing if needed + } else if (32 == key || 'r' == key || + 'R' == key) { // press space or r to restart testing if needed isTestMode = true; framesNum = 0; framesNumOnCalculationStart = 0; @@ -314,8 +340,7 @@ int main(int argc, char *argv[]) { accuracy = 0; elapsedSeconds = std::chrono::steady_clock::duration(0); startTime = std::chrono::steady_clock::now(); - } - else { + } else { presenter.handleKey(key); } } @@ -328,16 +353,16 @@ int main(int argc, char *argv[]) { slog::info << "Metrics report:" << slog::endl; metrics.logTotal(); - logLatencyPerStage(readerMetrics.getTotal().latency, pipeline.getPreprocessMetrics().getTotal().latency, - pipeline.getInferenceMetircs().getTotal().latency, pipeline.getPostprocessMetrics().getTotal().latency, - renderMetrics.getTotal().latency); + logLatencyPerStage(readerMetrics.getTotal().latency, + pipeline.getPreprocessMetrics().getTotal().latency, + pipeline.getInferenceMetircs().getTotal().latency, + pipeline.getPostprocessMetrics().getTotal().latency, + renderMetrics.getTotal().latency); slog::info << presenter.reportMeans() << slog::endl; - } - catch (const std::exception& error) { + } catch (const std::exception& error) { slog::err << error.what() << slog::endl; return 1; - } - catch (...) { + } catch (...) { slog::err << "Unknown/internal exception happened." << slog::endl; return 1; } diff --git a/demos/common/cpp/models/include/models/associative_embedding_decoder.h b/demos/common/cpp/models/include/models/associative_embedding_decoder.h index c3e29edfbb7..94afbda91a3 100644 --- a/demos/common/cpp/models/include/models/associative_embedding_decoder.h +++ b/demos/common/cpp/models/include/models/associative_embedding_decoder.h @@ -15,15 +15,18 @@ */ #pragma once +#include + +#include +#include + #include struct Peak { - explicit Peak(const cv::Point2f& keypoint = cv::Point2f(-1, -1), - const float score = 0.0f, - const float tag = 0.0f) : - keypoint(keypoint), - score(score), - tag(tag) {} + explicit Peak(const cv::Point2f& keypoint = cv::Point2f(-1, -1), const float score = 0.0f, const float tag = 0.0f) + : keypoint(keypoint), + score(score), + tag(tag) {} cv::Point2f keypoint; float score; @@ -31,48 +34,61 @@ struct Peak { }; class Pose { - public: - explicit Pose(size_t numJoints) : peaks(numJoints) {} +public: + explicit Pose(size_t numJoints) : peaks(numJoints) {} - void add(size_t index, Peak peak) { - peaks[index] = peak; - sum += peak.score; - poseTag = poseTag * static_cast(validPointsNum) + peak.tag; - poseCenter = poseCenter * static_cast(validPointsNum) + peak.keypoint; - validPointsNum += 1; - poseTag = poseTag / static_cast(validPointsNum); - poseCenter = poseCenter / static_cast(validPointsNum); - } + void add(size_t index, Peak peak) { + peaks[index] = peak; + sum += peak.score; + poseTag = poseTag * static_cast(validPointsNum) + peak.tag; + poseCenter = poseCenter * static_cast(validPointsNum) + peak.keypoint; + validPointsNum += 1; + poseTag = poseTag / static_cast(validPointsNum); + poseCenter = poseCenter / static_cast(validPointsNum); + } - float getPoseTag() const { return poseTag; } + float getPoseTag() const { + return poseTag; + } - float getMeanScore() const { return sum / static_cast(size()); } + float getMeanScore() const { + return sum / static_cast(size()); + } - Peak& getPeak(size_t index) { return peaks[index]; } + Peak& getPeak(size_t index) { + return peaks[index]; + } - cv::Point2f& getPoseCenter() { return poseCenter; } + cv::Point2f& getPoseCenter() { + return poseCenter; + } - size_t size() const { return peaks.size(); } + size_t size() const { + return peaks.size(); + } - private: - std::vector peaks; - cv::Point2f poseCenter = cv::Point2f(0.f, 0.f); - int validPointsNum = 0; - float poseTag = 0; - float sum = 0; +private: + std::vector peaks; + cv::Point2f poseCenter = cv::Point2f(0.f, 0.f); + int validPointsNum = 0; + float poseTag = 0; + float sum = 0; }; void findPeaks(const std::vector& nmsHeatMaps, const std::vector& aembdsMaps, std::vector>& allPeaks, - size_t jointId, size_t maxNumPeople, + size_t jointId, + size_t maxNumPeople, float detectionThreshold); std::vector matchByTag(std::vector>& allPeaks, - size_t maxNumPeople, size_t numJoints, + size_t maxNumPeople, + size_t numJoints, float tagThreshold); void adjustAndRefine(std::vector& allPoses, const std::vector& heatMaps, const std::vector& aembdsMaps, - int poseId, float delta); + int poseId, + float delta); diff --git a/demos/common/cpp/models/include/models/classification_model.h b/demos/common/cpp/models/include/models/classification_model.h index d6cc5cf3483..6d32e44283d 100644 --- a/demos/common/cpp/models/include/models/classification_model.h +++ b/demos/common/cpp/models/include/models/classification_model.h @@ -15,11 +15,19 @@ */ #pragma once +#include + +#include #include #include -#include + #include "models/image_model.h" -#include "models/results.h" + +namespace ov { +class Model; +} // namespace ov +struct InferenceResult; +struct ResultBase; class ClassificationModel : public ImageModel { public: @@ -31,8 +39,11 @@ class ClassificationModel : public ImageModel { /// Otherwise, image will be preprocessed and resized using OpenCV routines. /// @param labels - array of labels for every class. /// @param layout - model input layout - ClassificationModel(const std::string& modelFileName, size_t nTop, bool useAutoResize, - const std::vector& labels, const std::string& layout = ""); + ClassificationModel(const std::string& modelFileName, + size_t nTop, + bool useAutoResize, + const std::vector& labels, + const std::string& layout = ""); std::unique_ptr postprocess(InferenceResult& infResult) override; diff --git a/demos/common/cpp/models/include/models/deblurring_model.h b/demos/common/cpp/models/include/models/deblurring_model.h index 2ee548eba0b..860390d1504 100644 --- a/demos/common/cpp/models/include/models/deblurring_model.h +++ b/demos/common/cpp/models/include/models/deblurring_model.h @@ -15,12 +15,23 @@ */ #pragma once +#include + +#include #include -#include -#include -#include + +#include + #include "models/image_model.h" -#include "models/results.h" + +namespace ov { +class InferRequest; +class Model; +} // namespace ov +struct InferenceResult; +struct InputData; +struct InternalModelData; +struct ResultBase; class DeblurringModel : public ImageModel { public: @@ -30,8 +41,7 @@ class DeblurringModel : public ImageModel { /// @param layout - model input layout DeblurringModel(const std::string& modelFileName, const cv::Size& inputImgSize, const std::string& layout = ""); - std::shared_ptr preprocess( - const InputData& inputData, ov::InferRequest& request) override; + std::shared_ptr preprocess(const InputData& inputData, ov::InferRequest& request) override; std::unique_ptr postprocess(InferenceResult& infResult) override; protected: diff --git a/demos/common/cpp/models/include/models/detection_model.h b/demos/common/cpp/models/include/models/detection_model.h index c6436c0744f..4d57a61b521 100644 --- a/demos/common/cpp/models/include/models/detection_model.h +++ b/demos/common/cpp/models/include/models/detection_model.h @@ -15,8 +15,11 @@ */ #pragma once +#include + #include #include + #include "models/image_model.h" class DetectionModel : public ImageModel { @@ -30,8 +33,11 @@ class DetectionModel : public ImageModel { /// @param labels - array of labels for every class. If this array is empty or contains less elements /// than actual classes number, default "Label #N" will be shown for missing items. /// @param layout - model input layout - DetectionModel(const std::string& modelFileName, float confidenceThreshold, bool useAutoResize, - const std::vector& labels, const std::string& layout = ""); + DetectionModel(const std::string& modelFileName, + float confidenceThreshold, + bool useAutoResize, + const std::vector& labels, + const std::string& layout = ""); static std::vector loadLabels(const std::string& labelFilename); @@ -39,6 +45,7 @@ class DetectionModel : public ImageModel { float confidenceThreshold; std::vector labels; - std::string getLabelName(int labelID) { return (size_t)labelID < labels.size() ? - labels[labelID] : std::string("Label #") + std::to_string(labelID); } + std::string getLabelName(int labelID) { + return (size_t)labelID < labels.size() ? labels[labelID] : std::string("Label #") + std::to_string(labelID); + } }; diff --git a/demos/common/cpp/models/include/models/detection_model_centernet.h b/demos/common/cpp/models/include/models/detection_model_centernet.h index 83def4083d0..db9ebdb15bb 100644 --- a/demos/common/cpp/models/include/models/detection_model_centernet.h +++ b/demos/common/cpp/models/include/models/detection_model_centernet.h @@ -15,11 +15,20 @@ */ #pragma once +#include #include #include -#include + #include "models/detection_model.h" -#include "models/results.h" + +namespace ov { +class InferRequest; +class Model; +} // namespace ov +struct InferenceResult; +struct InputData; +struct InternalModelData; +struct ResultBase; class ModelCenterNet : public DetectionModel { public: @@ -29,15 +38,20 @@ class ModelCenterNet : public DetectionModel { float right; float bottom; - float getWidth() const { return (right - left) + 1.0f; } - float getHeight() const { return (bottom - top) + 1.0f; } + float getWidth() const { + return (right - left) + 1.0f; + } + float getHeight() const { + return (bottom - top) + 1.0f; + } }; static const int INIT_VECTOR_SIZE = 200; - ModelCenterNet(const std::string& modelFileName, float confidenceThreshold, - const std::vector& labels = std::vector(), const std::string& layout = ""); - std::shared_ptr preprocess( - const InputData& inputData, ov::InferRequest& request) override; + ModelCenterNet(const std::string& modelFileName, + float confidenceThreshold, + const std::vector& labels = std::vector(), + const std::string& layout = ""); + std::shared_ptr preprocess(const InputData& inputData, ov::InferRequest& request) override; std::unique_ptr postprocess(InferenceResult& infResult) override; protected: diff --git a/demos/common/cpp/models/include/models/detection_model_faceboxes.h b/demos/common/cpp/models/include/models/detection_model_faceboxes.h index 21a14af066b..0aaff2723aa 100644 --- a/demos/common/cpp/models/include/models/detection_model_faceboxes.h +++ b/demos/common/cpp/models/include/models/detection_model_faceboxes.h @@ -15,11 +15,20 @@ */ #pragma once +#include + +#include #include +#include #include -#include + #include "models/detection_model.h" -#include "models/results.h" + +namespace ov { +class Model; +} // namespace ov +struct InferenceResult; +struct ResultBase; class ModelFaceBoxes : public DetectionModel { public: @@ -29,15 +38,26 @@ class ModelFaceBoxes : public DetectionModel { float right; float bottom; - float getWidth() const { return (right - left) + 1.0f; } - float getHeight() const { return (bottom - top) + 1.0f; } - float getXCenter() const { return left + (getWidth() - 1.0f) / 2.0f; } - float getYCenter() const { return top + (getHeight() - 1.0f) / 2.0f; } + float getWidth() const { + return (right - left) + 1.0f; + } + float getHeight() const { + return (bottom - top) + 1.0f; + } + float getXCenter() const { + return left + (getWidth() - 1.0f) / 2.0f; + } + float getYCenter() const { + return top + (getHeight() - 1.0f) / 2.0f; + } }; static const int INIT_VECTOR_SIZE = 200; - ModelFaceBoxes(const std::string& modelFileName, float confidenceThreshold, bool useAutoResize, - float boxIOUThreshold, const std::string& layout = ""); + ModelFaceBoxes(const std::string& modelFileName, + float confidenceThreshold, + bool useAutoResize, + float boxIOUThreshold, + const std::string& layout = ""); std::unique_ptr postprocess(InferenceResult& infResult) override; protected: @@ -49,5 +69,4 @@ class ModelFaceBoxes : public DetectionModel { std::vector anchors; void prepareInputsOutputs(std::shared_ptr& model) override; void priorBoxes(const std::vector>& featureMaps); - }; diff --git a/demos/common/cpp/models/include/models/detection_model_retinaface.h b/demos/common/cpp/models/include/models/detection_model_retinaface.h index 80a22ee6280..5eac3f485b8 100644 --- a/demos/common/cpp/models/include/models/detection_model_retinaface.h +++ b/demos/common/cpp/models/include/models/detection_model_retinaface.h @@ -15,14 +15,20 @@ */ #pragma once +#include +#include #include #include -#include + #include "models/detection_model.h" -#include "models/results.h" -class ModelRetinaFace - : public DetectionModel { +namespace ov { +class Model; +} // namespace ov +struct InferenceResult; +struct ResultBase; + +class ModelRetinaFace : public DetectionModel { public: struct Anchor { float left; @@ -30,10 +36,18 @@ class ModelRetinaFace float right; float bottom; - float getWidth() const { return (right - left) + 1.0f; } - float getHeight() const { return (bottom - top) + 1.0f; } - float getXCenter() const { return left + (getWidth() - 1.0f) / 2.0f; } - float getYCenter() const { return top + (getHeight() - 1.0f) / 2.0f; } + float getWidth() const { + return (right - left) + 1.0f; + } + float getHeight() const { + return (bottom - top) + 1.0f; + } + float getXCenter() const { + return left + (getWidth() - 1.0f) / 2.0f; + } + float getYCenter() const { + return top + (getHeight() - 1.0f) / 2.0f; + } }; static const int LANDMARKS_NUM = 5; @@ -45,8 +59,11 @@ class ModelRetinaFace /// @param useAutoResize - if true, image will be resized by openvino. /// @param boxIOUThreshold - threshold for NMS boxes filtering, varies in [0.0, 1.0] range. /// @param layout - model input layout - ModelRetinaFace(const std::string& model_name, float confidenceThreshold, bool useAutoResize, - float boxIOUThreshold, const std::string& layout = ""); + ModelRetinaFace(const std::string& model_name, + float confidenceThreshold, + bool useAutoResize, + float boxIOUThreshold, + const std::string& layout = ""); std::unique_ptr postprocess(InferenceResult& infResult) override; protected: @@ -63,17 +80,11 @@ class ModelRetinaFace const float maskThreshold; float landmarkStd; - enum OutputType { - OUT_BOXES, - OUT_SCORES, - OUT_LANDMARKS, - OUT_MASKSCORES, - OUT_MAX - }; + enum OutputType { OUT_BOXES, OUT_SCORES, OUT_LANDMARKS, OUT_MASKSCORES, OUT_MAX }; - std::vector separateOutputsNames[OUT_MAX]; + std::vector separateOutputsNames[OUT_MAX]; const std::vector anchorCfg; - std::map> anchorsFpn; + std::map> anchorsFpn; std::vector> anchors; void generateAnchorsFpn(); diff --git a/demos/common/cpp/models/include/models/detection_model_retinaface_pt.h b/demos/common/cpp/models/include/models/detection_model_retinaface_pt.h index 1faa410b79c..5e9d402122d 100644 --- a/demos/common/cpp/models/include/models/detection_model_retinaface_pt.h +++ b/demos/common/cpp/models/include/models/detection_model_retinaface_pt.h @@ -15,11 +15,22 @@ */ #pragma once +#include + +#include #include #include -#include + +#include + #include "models/detection_model.h" -#include "models/results.h" + +namespace ov { +class Model; +class Tensor; +} // namespace ov +struct InferenceResult; +struct ResultBase; class ModelRetinaFacePT : public DetectionModel { public: @@ -36,10 +47,18 @@ class ModelRetinaFacePT : public DetectionModel { float right; float bottom; - float getWidth() const { return (right - left) + 1.0f; } - float getHeight() const { return (bottom - top) + 1.0f; } - float getXCenter() const { return left + (getWidth() - 1.0f) / 2.0f; } - float getYCenter() const { return top + (getHeight() - 1.0f) / 2.0f; } + float getWidth() const { + return (right - left) + 1.0f; + } + float getHeight() const { + return (bottom - top) + 1.0f; + } + float getXCenter() const { + return left + (getWidth() - 1.0f) / 2.0f; + } + float getYCenter() const { + return top + (getHeight() - 1.0f) / 2.0f; + } }; /// Loads model and performs required initialization @@ -49,31 +68,33 @@ class ModelRetinaFacePT : public DetectionModel { /// @param useAutoResize - if true, image will be resized by openvino. /// @param boxIOUThreshold - threshold for NMS boxes filtering, varies in [0.0, 1.0] range. /// @param layout - model input layout - ModelRetinaFacePT(const std::string& modelFileName, float confidenceThreshold, bool useAutoResize, - float boxIOUThreshold, const std::string& layout = ""); + ModelRetinaFacePT(const std::string& modelFileName, + float confidenceThreshold, + bool useAutoResize, + float boxIOUThreshold, + const std::string& layout = ""); std::unique_ptr postprocess(InferenceResult& infResult) override; protected: size_t landmarksNum; const float boxIOUThreshold; - float variance[2] = { 0.1f, 0.2f }; + float variance[2] = {0.1f, 0.2f}; - enum OutputType { - OUT_BOXES, - OUT_SCORES, - OUT_LANDMARKS, - OUT_MAX - }; + enum OutputType { OUT_BOXES, OUT_SCORES, OUT_LANDMARKS, OUT_MAX }; std::vector priors; std::vector filterByScore(const ov::Tensor& scoresTensor, const float confidenceThreshold); std::vector getFilteredScores(const ov::Tensor& scoresTensor, const std::vector& indicies); std::vector getFilteredLandmarks(const ov::Tensor& landmarksTensor, - const std::vector& indicies, int imgWidth, int imgHeight); + const std::vector& indicies, + int imgWidth, + int imgHeight); std::vector generatePriorData(); std::vector getFilteredProposals(const ov::Tensor& boxesTensor, - const std::vector& indicies, int imgWidth, int imgHeight); + const std::vector& indicies, + int imgWidth, + int imgHeight); void prepareInputsOutputs(std::shared_ptr& model) override; }; diff --git a/demos/common/cpp/models/include/models/detection_model_ssd.h b/demos/common/cpp/models/include/models/detection_model_ssd.h index dce5f0c74bc..646d7b045dc 100644 --- a/demos/common/cpp/models/include/models/detection_model_ssd.h +++ b/demos/common/cpp/models/include/models/detection_model_ssd.h @@ -15,11 +15,22 @@ */ #pragma once +#include + +#include #include #include -#include + #include "models/detection_model.h" -#include "models/results.h" + +namespace ov { +class InferRequest; +class Model; +} // namespace ov +struct InferenceResult; +struct InputData; +struct InternalModelData; +struct ResultBase; class ModelSSD : public DetectionModel { public: @@ -33,12 +44,12 @@ class ModelSSD : public DetectionModel { /// than actual classes number, default "Label #N" will be shown for missing items. /// @param layout - model input layout ModelSSD(const std::string& modelFileName, - float confidenceThreshold, bool useAutoResize, - const std::vector& labels = std::vector(), - const std::string& layout = ""); + float confidenceThreshold, + bool useAutoResize, + const std::vector& labels = std::vector(), + const std::string& layout = ""); - std::shared_ptr preprocess( - const InputData& inputData, ov::InferRequest& request) override; + std::shared_ptr preprocess(const InputData& inputData, ov::InferRequest& request) override; std::unique_ptr postprocess(InferenceResult& infResult) override; protected: diff --git a/demos/common/cpp/models/include/models/detection_model_yolo.h b/demos/common/cpp/models/include/models/detection_model_yolo.h index 202a9cdfabc..38b0b64b5b5 100644 --- a/demos/common/cpp/models/include/models/detection_model_yolo.h +++ b/demos/common/cpp/models/include/models/detection_model_yolo.h @@ -15,12 +15,22 @@ */ #pragma once +#include +#include + +#include +#include #include #include -#include + #include +#include + #include "models/detection_model.h" -#include + +struct DetectedObject; +struct InferenceResult; +struct ResultBase; class ModelYolo : public DetectionModel { protected: @@ -34,17 +44,16 @@ class ModelYolo : public DetectionModel { size_t outputHeight = 0; Region(const std::shared_ptr& regionYolo); - Region(size_t classes, int coords, const std::vector& anchors, const std::vector& masks, size_t outputWidth, size_t outputHeight); + Region(size_t classes, + int coords, + const std::vector& anchors, + const std::vector& masks, + size_t outputWidth, + size_t outputHeight); }; public: - enum YoloVersion { - YOLO_V1V2, - YOLO_V3, - YOLO_V4, - YOLO_V4_TINY, - YOLOF - }; + enum YoloVersion { YOLO_V1V2, YOLO_V3, YOLO_V4, YOLO_V4_TINY, YOLOF }; /// Constructor. /// @param modelFileName name of model to load @@ -61,19 +70,28 @@ class ModelYolo : public DetectionModel { /// @param anchors - vector of anchors coordinates. Required for YOLOv4, for other versions it may be omitted. /// @param masks - vector of masks values. Required for YOLOv4, for other versions it may be omitted. /// @param layout - model input layout - ModelYolo(const std::string& modelFileName, float confidenceThreshold, bool useAutoResize, - bool useAdvancedPostprocessing = true, float boxIOUThreshold = 0.5, const std::vector& labels = std::vector(), - const std::vector& anchors = std::vector(), const std::vector& masks = std::vector(), - const std::string& layout = ""); + ModelYolo(const std::string& modelFileName, + float confidenceThreshold, + bool useAutoResize, + bool useAdvancedPostprocessing = true, + float boxIOUThreshold = 0.5, + const std::vector& labels = std::vector(), + const std::vector& anchors = std::vector(), + const std::vector& masks = std::vector(), + const std::string& layout = ""); std::unique_ptr postprocess(InferenceResult& infResult) override; protected: void prepareInputsOutputs(std::shared_ptr& model) override; - void parseYOLOOutput(const std::string& output_name, const ov::Tensor& tensor, - const unsigned long resized_im_h, const unsigned long resized_im_w, const unsigned long original_im_h, - const unsigned long original_im_w, std::vector& objects); + void parseYOLOOutput(const std::string& output_name, + const ov::Tensor& tensor, + const unsigned long resized_im_h, + const unsigned long resized_im_w, + const unsigned long original_im_h, + const unsigned long original_im_w, + std::vector& objects); static int calculateEntryIndex(int entriesNum, int lcoords, size_t lclasses, int location, int entry); static double intersectionOverUnion(const DetectedObject& o1, const DetectedObject& o2); diff --git a/demos/common/cpp/models/include/models/hpe_model_associative_embedding.h b/demos/common/cpp/models/include/models/hpe_model_associative_embedding.h index ec2e32b256b..6780225caf8 100644 --- a/demos/common/cpp/models/include/models/hpe_model_associative_embedding.h +++ b/demos/common/cpp/models/include/models/hpe_model_associative_embedding.h @@ -15,13 +15,22 @@ */ #pragma once +#include #include #include -#include + +#include #include + #include + #include "models/image_model.h" -#include + +struct HumanPose; +struct InferenceResult; +struct InputData; +struct InternalModelData; +struct ResultBase; class HpeAssociativeEmbedding : public ImageModel { public: @@ -32,14 +41,17 @@ class HpeAssociativeEmbedding : public ImageModel { /// @param confidenceThreshold - threshold to eliminate low-confidence poses. /// Any pose with confidence lower than this threshold will be ignored. /// @param layout - model input layout - HpeAssociativeEmbedding(const std::string& modelFileName, double aspectRatio, int targetSize, - float confidenceThreshold, const std::string& layout = "", - float delta = 0.0, RESIZE_MODE resizeMode = RESIZE_KEEP_ASPECT); + HpeAssociativeEmbedding(const std::string& modelFileName, + double aspectRatio, + int targetSize, + float confidenceThreshold, + const std::string& layout = "", + float delta = 0.0, + RESIZE_MODE resizeMode = RESIZE_KEEP_ASPECT); std::unique_ptr postprocess(InferenceResult& infResult) override; - std::shared_ptr preprocess( - const InputData& inputData, ov::InferRequest& request) override; + std::shared_ptr preprocess(const InputData& inputData, ov::InferRequest& request) override; protected: void prepareInputsOutputs(std::shared_ptr& model) override; @@ -64,8 +76,7 @@ class HpeAssociativeEmbedding : public ImageModel { void changeInputSize(std::shared_ptr& model); - std::string findTensorByName(const std::string& tensorName, - const std::vector& outputsNames); + std::string findTensorByName(const std::string& tensorName, const std::vector& outputsNames); std::vector split(float* data, const ov::Shape& shape); diff --git a/demos/common/cpp/models/include/models/hpe_model_openpose.h b/demos/common/cpp/models/include/models/hpe_model_openpose.h index bb5430beda9..d5e1ce7e076 100644 --- a/demos/common/cpp/models/include/models/hpe_model_openpose.h +++ b/demos/common/cpp/models/include/models/hpe_model_openpose.h @@ -15,12 +15,25 @@ */ #pragma once +#include + +#include #include #include -#include -#include + +#include + #include "models/image_model.h" -#include "models/results.h" + +namespace ov { +class InferRequest; +class Model; +} // namespace ov +struct HumanPose; +struct InferenceResult; +struct InputData; +struct InternalModelData; +struct ResultBase; class HPEOpenPose : public ImageModel { public: @@ -30,13 +43,15 @@ class HPEOpenPose : public ImageModel { /// @param targetSize - the height used for model reshaping. /// @param confidenceThreshold - threshold to eliminate low-confidence keypoints. /// @param layout - model input layout - HPEOpenPose(const std::string& modelFileName, double aspectRatio, int targetSize, - float confidenceThreshold, const std::string& layout = ""); + HPEOpenPose(const std::string& modelFileName, + double aspectRatio, + int targetSize, + float confidenceThreshold, + const std::string& layout = ""); std::unique_ptr postprocess(InferenceResult& infResult) override; - std::shared_ptr preprocess( - const InputData& inputData, ov::InferRequest& request) override; + std::shared_ptr preprocess(const InputData& inputData, ov::InferRequest& request) override; static const size_t keypointsNumber = 18; @@ -56,8 +71,7 @@ class HPEOpenPose : public ImageModel { int targetSize; float confidenceThreshold; - std::vector extractPoses(const std::vector& heatMaps, - const std::vector& pafs) const; + std::vector extractPoses(const std::vector& heatMaps, const std::vector& pafs) const; void resizeFeatureMaps(std::vector& featureMaps) const; void changeInputSize(std::shared_ptr& model); diff --git a/demos/common/cpp/models/include/models/image_model.h b/demos/common/cpp/models/include/models/image_model.h index 60f7d283e10..3215e4870f8 100644 --- a/demos/common/cpp/models/include/models/image_model.h +++ b/demos/common/cpp/models/include/models/image_model.h @@ -1,4 +1,4 @@ - /* +/* // Copyright (C) 2021-2022 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,9 +15,18 @@ */ #pragma once -#include +#include + +#include +#include + #include "models/model_base.h" -#include "models/internal_model_data.h" + +namespace ov { +class InferRequest; +} // namespace ov +struct InputData; +struct InternalModelData; class ImageModel : public ModelBase { public: @@ -27,7 +36,7 @@ class ImageModel : public ModelBase { /// @param layout - model input layout ImageModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout = ""); - virtual std::shared_ptr preprocess(const InputData& inputData, ov::InferRequest& request) override; + std::shared_ptr preprocess(const InputData& inputData, ov::InferRequest& request) override; protected: bool useAutoResize; diff --git a/demos/common/cpp/models/include/models/input_data.h b/demos/common/cpp/models/include/models/input_data.h index 6e0fa7f9097..bff9fa5dcfb 100644 --- a/demos/common/cpp/models/include/models/input_data.h +++ b/demos/common/cpp/models/include/models/input_data.h @@ -20,11 +20,13 @@ struct InputData { virtual ~InputData() {} - template T& asRef() { + template + T& asRef() { return dynamic_cast(*this); } - template const T& asRef() const { + template + const T& asRef() const { return dynamic_cast(*this); } }; diff --git a/demos/common/cpp/models/include/models/internal_model_data.h b/demos/common/cpp/models/include/models/internal_model_data.h index 6b408df67b3..61d7744e3f8 100644 --- a/demos/common/cpp/models/include/models/internal_model_data.h +++ b/demos/common/cpp/models/include/models/internal_model_data.h @@ -19,28 +19,29 @@ struct InternalModelData { virtual ~InternalModelData() {} - template T& asRef() { + template + T& asRef() { return dynamic_cast(*this); } - template const T& asRef() const { + template + const T& asRef() const { return dynamic_cast(*this); } }; struct InternalImageModelData : public InternalModelData { - InternalImageModelData(int width, int height) : - inputImgWidth(width), - inputImgHeight(height) {} + InternalImageModelData(int width, int height) : inputImgWidth(width), inputImgHeight(height) {} int inputImgWidth; int inputImgHeight; }; struct InternalScaleData : public InternalImageModelData { - InternalScaleData(int width, int height, float scaleX, float scaleY) : - InternalImageModelData(width, height), - scaleX(scaleX), scaleY(scaleY) {} + InternalScaleData(int width, int height, float scaleX, float scaleY) + : InternalImageModelData(width, height), + scaleX(scaleX), + scaleY(scaleY) {} float scaleX; float scaleY; diff --git a/demos/common/cpp/models/include/models/jpeg_restoration_model.h b/demos/common/cpp/models/include/models/jpeg_restoration_model.h index edeec699e73..08794578822 100644 --- a/demos/common/cpp/models/include/models/jpeg_restoration_model.h +++ b/demos/common/cpp/models/include/models/jpeg_restoration_model.h @@ -15,8 +15,12 @@ */ #pragma once + +#include #include + #include + #include "models/image_model.h" #include "models/results.h" @@ -27,11 +31,12 @@ class JPEGRestorationModel : public ImageModel { /// @param inputImgSize size of image to set model input shape /// @param jpegCompression flag allows to perform compression before the inference /// @param layout - model input layout - JPEGRestorationModel(const std::string& modelFileName, const cv::Size& inputImgSize, - bool jpegCompression, const std::string& layout = ""); + JPEGRestorationModel(const std::string& modelFileName, + const cv::Size& inputImgSize, + bool jpegCompression, + const std::string& layout = ""); - std::shared_ptr preprocess( - const InputData& inputData, ov::InferRequest& request) override; + std::shared_ptr preprocess(const InputData& inputData, ov::InferRequest& request) override; std::unique_ptr postprocess(InferenceResult& infResult) override; protected: diff --git a/demos/common/cpp/models/include/models/model_base.h b/demos/common/cpp/models/include/models/model_base.h index 10a33c492e0..c6d9cc103d5 100644 --- a/demos/common/cpp/models/include/models/model_base.h +++ b/demos/common/cpp/models/include/models/model_base.h @@ -15,17 +15,27 @@ */ #pragma once +#include +#include +#include +#include + #include + #include -#include #include -#include "models/input_data.h" -#include "models/results.h" +#include + +struct InferenceResult; +struct InputData; +struct InternalModelData; +struct ResultBase; class ModelBase { public: ModelBase(const std::string& modelFileName, const std::string& layout = "") - : modelFileName(modelFileName), inputsLayouts(parseLayoutString(layout)) {} + : modelFileName(modelFileName), + inputsLayouts(parseLayoutString(layout)) {} virtual ~ModelBase() {} @@ -34,12 +44,20 @@ class ModelBase { virtual void onLoadCompleted(const std::vector& requests) {} virtual std::unique_ptr postprocess(InferenceResult& infResult) = 0; - const std::vector& getOutputsNames() const { return outputsNames; } - const std::vector& getInputsNames() const { return inputsNames; } + const std::vector& getOutputsNames() const { + return outputsNames; + } + const std::vector& getInputsNames() const { + return inputsNames; + } - std::string getModelFileName() { return modelFileName; } + std::string getModelFileName() { + return modelFileName; + } - void setInputsPreprocessing(bool reverseInputChannels, const std::string &meanValues, const std::string &scaleValues) { + void setInputsPreprocessing(bool reverseInputChannels, + const std::string& meanValues, + const std::string& scaleValues) { this->inputTransform = InputTransform(reverseInputChannels, meanValues, scaleValues); } diff --git a/demos/common/cpp/models/include/models/openpose_decoder.h b/demos/common/cpp/models/include/models/openpose_decoder.h index 571a06b5f7b..d40e56e1e46 100644 --- a/demos/common/cpp/models/include/models/openpose_decoder.h +++ b/demos/common/cpp/models/include/models/openpose_decoder.h @@ -15,13 +15,16 @@ */ #pragma once +#include + +#include + #include -#include "models/results.h" + +struct HumanPose; struct Peak { - Peak(const int id = -1, - const cv::Point2f& pos = cv::Point2f(), - const float score = 0.0f); + Peak(const int id = -1, const cv::Point2f& pos = cv::Point2f(), const float score = 0.0f); int id; cv::Point2f pos; @@ -37,9 +40,7 @@ struct HumanPoseByPeaksIndices { }; struct TwoJointsConnection { - TwoJointsConnection(const int firstJointIdx, - const int secondJointIdx, - const float score); + TwoJointsConnection(const int firstJointIdx, const int secondJointIdx, const float score); int firstJointIdx; int secondJointIdx; @@ -49,13 +50,13 @@ struct TwoJointsConnection { void findPeaks(const std::vector& heatMaps, const float minPeaksDistance, std::vector>& allPeaks, - int heatMapId, float confidenceThreshold); - -std::vector groupPeaksToPoses( - const std::vector>& allPeaks, - const std::vector& pafs, - const size_t keypointsNumber, - const float midPointsScoreThreshold, - const float foundMidPointsRatioThreshold, - const int minJointsNumber, - const float minSubsetScore); + int heatMapId, + float confidenceThreshold); + +std::vector groupPeaksToPoses(const std::vector>& allPeaks, + const std::vector& pafs, + const size_t keypointsNumber, + const float midPointsScoreThreshold, + const float foundMidPointsRatioThreshold, + const int minJointsNumber, + const float minSubsetScore); diff --git a/demos/common/cpp/models/include/models/results.h b/demos/common/cpp/models/include/models/results.h index 9fc63b2bee8..6b3a89d4ef2 100644 --- a/demos/common/cpp/models/include/models/results.h +++ b/demos/common/cpp/models/include/models/results.h @@ -16,26 +16,36 @@ #pragma once #include +#include +#include +#include + #include #include + #include "internal_model_data.h" struct MetaData; struct ResultBase { - ResultBase(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) : - frameId(frameId), metaData(metaData) {} + ResultBase(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) + : frameId(frameId), + metaData(metaData) {} virtual ~ResultBase() {} int64_t frameId; std::shared_ptr metaData; - bool IsEmpty() { return frameId < 0; } + bool IsEmpty() { + return frameId < 0; + } - template T& asRef() { + template + T& asRef() { return dynamic_cast(*this); } - template const T& asRef() const { + template + const T& asRef() const { return dynamic_cast(*this); } }; @@ -56,20 +66,21 @@ struct InferenceResult : public ResultBase { /// Returns true if object contains no valid data /// @returns true if object contains no valid data - bool IsEmpty() { return outputsData.empty(); } + bool IsEmpty() { + return outputsData.empty(); + } }; struct ClassificationResult : public ResultBase { - ClassificationResult(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) : - ResultBase(frameId, metaData) {} + ClassificationResult(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) + : ResultBase(frameId, metaData) {} struct Classification { unsigned int id; std::string label; float score; - Classification(unsigned int id, const std::string& label, float score) : - id(id), label(label), score(score) {}; + Classification(unsigned int id, const std::string& label, float score) : id(id), label(label), score(score) {} }; std::vector topLabels; @@ -82,21 +93,20 @@ struct DetectedObject : public cv::Rect2f { }; struct DetectionResult : public ResultBase { - DetectionResult(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) : - ResultBase(frameId, metaData) {} + DetectionResult(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) + : ResultBase(frameId, metaData) {} std::vector objects; }; struct RetinaFaceDetectionResult : public DetectionResult { - RetinaFaceDetectionResult(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) : - DetectionResult(frameId, metaData) { - } + RetinaFaceDetectionResult(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) + : DetectionResult(frameId, metaData) {} std::vector landmarks; }; struct ImageResult : public ResultBase { - ImageResult(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) : - ResultBase(frameId, metaData) {} + ImageResult(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) + : ResultBase(frameId, metaData) {} cv::Mat resultImage; }; @@ -106,8 +116,7 @@ struct HumanPose { }; struct HumanPoseResult : public ResultBase { - HumanPoseResult(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) : - ResultBase(frameId, metaData) { - } + HumanPoseResult(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) + : ResultBase(frameId, metaData) {} std::vector poses; }; diff --git a/demos/common/cpp/models/include/models/segmentation_model.h b/demos/common/cpp/models/include/models/segmentation_model.h index 97978301c95..9d4d2cb1112 100644 --- a/demos/common/cpp/models/include/models/segmentation_model.h +++ b/demos/common/cpp/models/include/models/segmentation_model.h @@ -15,10 +15,17 @@ */ #pragma once +#include #include -#include +#include + #include "models/image_model.h" -#include "models/results.h" + +namespace ov { +class Model; +} // namespace ov +struct InferenceResult; +struct ResultBase; #pragma once class SegmentationModel : public ImageModel { diff --git a/demos/common/cpp/models/include/models/style_transfer_model.h b/demos/common/cpp/models/include/models/style_transfer_model.h index 84e376e2904..8bd4e16de48 100644 --- a/demos/common/cpp/models/include/models/style_transfer_model.h +++ b/demos/common/cpp/models/include/models/style_transfer_model.h @@ -15,9 +15,19 @@ */ #pragma once -#include +#include +#include + #include "models/image_model.h" -#include "models/results.h" + +namespace ov { +class InferRequest; +class Model; +} // namespace ov +struct InferenceResult; +struct InputData; +struct InternalModelData; +struct ResultBase; class StyleTransferModel : public ImageModel { public: @@ -26,8 +36,7 @@ class StyleTransferModel : public ImageModel { /// @param layout - model input layout StyleTransferModel(const std::string& modelFileName, const std::string& layout = ""); - std::shared_ptr preprocess( - const InputData& inputData, ov::InferRequest& request) override; + std::shared_ptr preprocess(const InputData& inputData, ov::InferRequest& request) override; std::unique_ptr postprocess(InferenceResult& infResult) override; protected: diff --git a/demos/common/cpp/models/include/models/super_resolution_model.h b/demos/common/cpp/models/include/models/super_resolution_model.h index 55c7627d0ed..ac9805e4cb8 100644 --- a/demos/common/cpp/models/include/models/super_resolution_model.h +++ b/demos/common/cpp/models/include/models/super_resolution_model.h @@ -15,19 +15,32 @@ */ #pragma once -#include +#include +#include + +#include + #include "models/image_model.h" -#include "models/results.h" + +namespace ov { +class InferRequest; +class Model; +} // namespace ov +struct InferenceResult; +struct InputData; +struct InternalModelData; +struct ResultBase; class SuperResolutionModel : public ImageModel { public: /// Constructor /// @param modelFileName name of model to load /// @param layout - model input layout - SuperResolutionModel(const std::string& modelFileName, const cv::Size& inputImgSize, const std::string& layout = ""); + SuperResolutionModel(const std::string& modelFileName, + const cv::Size& inputImgSize, + const std::string& layout = ""); - std::shared_ptr preprocess( - const InputData& inputData, ov::InferRequest& request) override; + std::shared_ptr preprocess(const InputData& inputData, ov::InferRequest& request) override; std::unique_ptr postprocess(InferenceResult& infResult) override; protected: diff --git a/demos/common/cpp/models/src/associative_embedding_decoder.cpp b/demos/common/cpp/models/src/associative_embedding_decoder.cpp index f2b3d8a6047..b1e828542d7 100644 --- a/demos/common/cpp/models/src/associative_embedding_decoder.cpp +++ b/demos/common/cpp/models/src/associative_embedding_decoder.cpp @@ -14,28 +14,34 @@ // limitations under the License. */ +#include "models/associative_embedding_decoder.h" + #include -#include +#include +#include +#include #include -#include -#include -#include "models/associative_embedding_decoder.h" +#include void findPeaks(const std::vector& nmsHeatMaps, const std::vector& aembdsMaps, std::vector>& allPeaks, - size_t jointId, size_t maxNumPeople, + size_t jointId, + size_t maxNumPeople, float detectionThreshold) { - const cv::Mat& nmsHeatMap = nmsHeatMaps[jointId]; const float* heatMapData = nmsHeatMap.ptr(); cv::Size outputSize = nmsHeatMap.size(); std::vector indices(outputSize.area()); std::iota(std::begin(indices), std::end(indices), 0); - std::partial_sort(std::begin(indices), std::begin(indices) + maxNumPeople, std::end(indices), - [heatMapData](int l, int r) { return heatMapData[l] > heatMapData[r]; }); + std::partial_sort(std::begin(indices), + std::begin(indices) + maxNumPeople, + std::end(indices), + [heatMapData](int l, int r) { + return heatMapData[l] > heatMapData[r]; + }); for (size_t personId = 0; personId < maxNumPeople; personId++) { int index = indices[personId]; @@ -45,21 +51,21 @@ void findPeaks(const std::vector& nmsHeatMaps, float score = heatMapData[index]; allPeaks[jointId].reserve(maxNumPeople); if (score > detectionThreshold) { - allPeaks[jointId].emplace_back(Peak{cv::Point2f(static_cast(x), static_cast(y)), - score, tag}); + allPeaks[jointId].emplace_back(Peak{cv::Point2f(static_cast(x), static_cast(y)), score, tag}); } } } std::vector matchByTag(std::vector>& allPeaks, - size_t maxNumPeople, size_t numJoints, + size_t maxNumPeople, + size_t numJoints, float tagThreshold) { - size_t jointOrder[] { 0, 1, 2, 3, 4, 5, 6, 11, 12, 7, 8, 9, 10, 13, 14, 15, 16 }; + size_t jointOrder[]{0, 1, 2, 3, 4, 5, 6, 11, 12, 7, 8, 9, 10, 13, 14, 15, 16}; std::vector allPoses; for (size_t jointId : jointOrder) { std::vector& jointPeaks = allPeaks[jointId]; std::vector tags; - for (auto& peak: jointPeaks) { + for (auto& peak : jointPeaks) { tags.push_back(peak.tag); } if (allPoses.empty()) { @@ -109,8 +115,14 @@ std::vector matchByTag(std::vector>& allPeaks, } if (numAdded > numGrouped) { - cv::copyMakeBorder(matchingCost, matchingCost, 0, 0, 0, numAdded - numGrouped, - cv::BORDER_CONSTANT, 10000000); + cv::copyMakeBorder(matchingCost, + matchingCost, + 0, + 0, + 0, + numAdded - numGrouped, + cv::BORDER_CONSTANT, + 10000000); } // Get pairs auto res = KuhnMunkres().Solve(matchingCost); @@ -118,8 +130,7 @@ std::vector matchByTag(std::vector>& allPeaks, size_t col = res[row]; if (row < numAdded && col < numGrouped && tagsDiff.at(row, col) < tagThreshold) { allPoses[col].add(jointId, jointPeaks[row]); - } - else { + } else { Pose pose = Pose(numJoints); pose.add(jointId, jointPeaks[row]); allPoses.push_back(pose); @@ -130,24 +141,25 @@ std::vector matchByTag(std::vector>& allPeaks, } namespace { - cv::Point2f adjustLocation(const int x, const int y, const cv::Mat& heatMap) { - cv::Point2f delta(0.f, 0.f); - int width = heatMap.cols; - int height = heatMap.rows; - if ((1 < x) && (x < width - 1) && (1 < y) && (y < height - 1)) { - auto diffX = heatMap.at(y, x + 1) - heatMap.at(y, x - 1); - auto diffY = heatMap.at(y + 1, x) - heatMap.at(y - 1, x); - delta.x = diffX > 0 ? 0.25f : -0.25f; - delta.y = diffY > 0 ? 0.25f : -0.25f; - } - return delta; +cv::Point2f adjustLocation(const int x, const int y, const cv::Mat& heatMap) { + cv::Point2f delta(0.f, 0.f); + int width = heatMap.cols; + int height = heatMap.rows; + if ((1 < x) && (x < width - 1) && (1 < y) && (y < height - 1)) { + auto diffX = heatMap.at(y, x + 1) - heatMap.at(y, x - 1); + auto diffY = heatMap.at(y + 1, x) - heatMap.at(y - 1, x); + delta.x = diffX > 0 ? 0.25f : -0.25f; + delta.y = diffY > 0 ? 0.25f : -0.25f; } + return delta; } +} // namespace void adjustAndRefine(std::vector& allPoses, const std::vector& heatMaps, const std::vector& aembdsMaps, - int poseId, const float delta) { + int poseId, + const float delta) { Pose& pose = allPoses[poseId]; float poseTag = pose.getPoseTag(); for (size_t jointId = 0; jointId < pose.size(); jointId++) { @@ -164,8 +176,7 @@ void adjustAndRefine(std::vector& allPoses, peak.keypoint.x += delta; peak.keypoint.y += delta; } - } - else { + } else { // Refine // Get position with the closest tag value to the pose tag cv::Mat diff = cv::abs(aembds - poseTag); diff --git a/demos/common/cpp/models/src/classification_model.cpp b/demos/common/cpp/models/src/classification_model.cpp index 17b72157b5b..90bc0d58da1 100644 --- a/demos/common/cpp/models/src/classification_model.cpp +++ b/demos/common/cpp/models/src/classification_model.cpp @@ -14,22 +14,33 @@ // limitations under the License. */ +#include "models/classification_model.h" + +#include #include +#include +#include +#include #include +#include #include -#include + #include #include -#include +#include + #include -#include "models/classification_model.h" + #include "models/results.h" -ClassificationModel::ClassificationModel(const std::string& modelFileName, size_t nTop, bool useAutoResize, - const std::vector& labels, const std::string& layout) : - ImageModel(modelFileName, useAutoResize, layout), - nTop(nTop), - labels(labels) {} +ClassificationModel::ClassificationModel(const std::string& modelFileName, + size_t nTop, + bool useAutoResize, + const std::vector& labels, + const std::string& layout) + : ImageModel(modelFileName, useAutoResize, layout), + nTop(nTop), + labels(labels) {} std::unique_ptr ClassificationModel::postprocess(InferenceResult& infResult) { const ov::Tensor& indicesTensor = infResult.outputsData.find(outputsNames[0])->second; @@ -43,7 +54,7 @@ std::unique_ptr ClassificationModel::postprocess(InferenceResult& in result->topLabels.reserve(scoresTensor.get_size()); for (size_t i = 0; i < scoresTensor.get_size(); ++i) { int ind = indicesPtr[i]; - if (ind < 0 || ind >= (int)labels.size()) { + if (ind < 0 || ind >= static_cast(labels.size())) { throw std::runtime_error("Invalid index for the class label is found during postprocessing"); } result->topLabels.emplace_back(ind, labels[ind], scoresPtr[i]); @@ -94,22 +105,20 @@ void ClassificationModel::prepareInputsOutputs(std::shared_ptr& model const auto height = inputShape[ov::layout::height_idx(inputLayout)]; if (height != width) { throw std::logic_error("Model input has incorrect image shape. Must be NxN square." - " Got " + std::to_string(height) + - "x" + std::to_string(width) + "."); + " Got " + + std::to_string(height) + "x" + std::to_string(width) + "."); } ov::preprocess::PrePostProcessor ppp(model); - ppp.input().tensor(). - set_element_type(ov::element::u8). - set_layout({ "NHWC" }); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"}); if (useAutoResize) { - ppp.input().tensor(). - set_spatial_dynamic_shape(); + ppp.input().tensor().set_spatial_dynamic_shape(); - ppp.input().preprocess(). - convert_element_type(ov::element::f32). - resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + ppp.input() + .preprocess() + .convert_element_type(ov::element::f32) + .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); } ppp.input().model().set_layout(inputLayout); @@ -122,28 +131,27 @@ void ClassificationModel::prepareInputsOutputs(std::shared_ptr& model const ov::Shape& outputShape = model->output().get_shape(); if (outputShape.size() != 2 && outputShape.size() != 4) { throw std::logic_error("Classification model wrapper supports topologies only with" - " 2-dimensional or 4-dimensional output"); + " 2-dimensional or 4-dimensional output"); } const ov::Layout outputLayout("NCHW"); - if (outputShape.size() == 4 && (outputShape[ov::layout::height_idx(outputLayout)] != 1 - || outputShape[ov::layout::width_idx(outputLayout)] != 1)) { + if (outputShape.size() == 4 && (outputShape[ov::layout::height_idx(outputLayout)] != 1 || + outputShape[ov::layout::width_idx(outputLayout)] != 1)) { throw std::logic_error("Classification model wrapper supports topologies only" - " with 4-dimensional output which has last two dimensions of size 1"); + " with 4-dimensional output which has last two dimensions of size 1"); } size_t classesNum = outputShape[ov::layout::channels_idx(outputLayout)]; if (nTop > classesNum) { - throw std::logic_error("The model provides " + std::to_string(classesNum) + " classes, but " - + std::to_string(nTop) + " labels are requested to be predicted"); + throw std::logic_error("The model provides " + std::to_string(classesNum) + " classes, but " + + std::to_string(nTop) + " labels are requested to be predicted"); } if (classesNum == labels.size() + 1) { labels.insert(labels.begin(), "other"); slog::warn << "Inserted 'other' label as first." << slog::endl; - } - else if (classesNum != labels.size()) { - throw std::logic_error("Model's number of classes and parsed labels must match (" - + std::to_string(outputShape[1]) + " and " + std::to_string(labels.size()) + ')'); + } else if (classesNum != labels.size()) { + throw std::logic_error("Model's number of classes and parsed labels must match (" + + std::to_string(outputShape[1]) + " and " + std::to_string(labels.size()) + ')'); } ppp.output().tensor().set_element_type(ov::element::f32); @@ -151,33 +159,33 @@ void ClassificationModel::prepareInputsOutputs(std::shared_ptr& model // --------------------------- Adding softmax and topK output --------------------------- auto nodes = model->get_ops(); - auto softmaxNodeIt = std::find_if(std::begin(nodes), std::end(nodes), - [](const std::shared_ptr& op) { return std::string(op->get_type_name()) == "Softmax"; }); + auto softmaxNodeIt = std::find_if(std::begin(nodes), std::end(nodes), [](const std::shared_ptr& op) { + return std::string(op->get_type_name()) == "Softmax"; + }); std::shared_ptr softmaxNode; if (softmaxNodeIt == nodes.end()) { auto logitsNode = model->get_output_op(0)->input(0).get_source_output().get_node(); softmaxNode = std::make_shared(logitsNode->output(0), 1); - } - else { + } else { softmaxNode = *softmaxNodeIt; } - const auto k = std::make_shared(ov::element::i32, - ov::Shape{}, - std::vector{nTop}); - std::shared_ptr topkNode = std::make_shared(softmaxNode, k, 1, + const auto k = std::make_shared(ov::element::i32, ov::Shape{}, std::vector{nTop}); + std::shared_ptr topkNode = std::make_shared(softmaxNode, + k, + 1, ov::op::v3::TopK::Mode::MAX, ov::op::v3::TopK::SortType::SORT_VALUES); auto indices = std::make_shared(topkNode->output(0)); auto scores = std::make_shared(topkNode->output(1)); - ov::ResultVector res({ scores, indices }); + ov::ResultVector res({scores, indices}); model = std::make_shared(res, model->get_parameters(), "classification"); // manually set output tensors name for created topK node - model->outputs()[0].set_names({ "indices" }); + model->outputs()[0].set_names({"indices"}); outputsNames.push_back("indices"); - model->outputs()[1].set_names({ "scores" }); + model->outputs()[1].set_names({"scores"}); outputsNames.push_back("scores"); // set output precisions diff --git a/demos/common/cpp/models/src/deblurring_model.cpp b/demos/common/cpp/models/src/deblurring_model.cpp index a82f3cd9afa..261efb3f506 100644 --- a/demos/common/cpp/models/src/deblurring_model.cpp +++ b/demos/common/cpp/models/src/deblurring_model.cpp @@ -14,18 +14,29 @@ // limitations under the License. */ +#include "models/deblurring_model.h" + +#include +#include #include #include -#include + +#include #include + #include #include -#include "models/deblurring_model.h" -DeblurringModel::DeblurringModel(const std::string& modelFileName, const cv::Size& inputImgSize, const std::string& layout) : - ImageModel(modelFileName, false, layout) { - netInputHeight = inputImgSize.height; - netInputWidth = inputImgSize.width; +#include "models/input_data.h" +#include "models/internal_model_data.h" +#include "models/results.h" + +DeblurringModel::DeblurringModel(const std::string& modelFileName, + const cv::Size& inputImgSize, + const std::string& layout) + : ImageModel(modelFileName, false, layout) { + netInputHeight = inputImgSize.height; + netInputWidth = inputImgSize.width; } void DeblurringModel::prepareInputsOutputs(std::shared_ptr& model) { @@ -40,15 +51,13 @@ void DeblurringModel::prepareInputsOutputs(std::shared_ptr& model) { const ov::Shape& inputShape = model->input().get_shape(); const ov::Layout& inputLayout = getInputLayout(model->input()); - if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 - || inputShape[ov::layout::channels_idx(inputLayout)] != 3) { + if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 || + inputShape[ov::layout::channels_idx(inputLayout)] != 3) { throw std::logic_error("3-channel 4-dimensional model's input is expected"); } ov::preprocess::PrePostProcessor ppp(model); - ppp.input().tensor(). - set_element_type(ov::element::u8). - set_layout("NHWC"); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC"); ppp.input().model().set_layout(inputLayout); @@ -61,8 +70,8 @@ void DeblurringModel::prepareInputsOutputs(std::shared_ptr& model) { const ov::Shape& outputShape = model->output().get_shape(); const ov::Layout outputLayout("NCHW"); - if (outputShape.size() != 4 || outputShape[ov::layout::batch_idx(outputLayout)] != 1 - || outputShape[ov::layout::channels_idx(outputLayout)] != 3) { + if (outputShape.size() != 4 || outputShape[ov::layout::batch_idx(outputLayout)] != 1 || + outputShape[ov::layout::channels_idx(outputLayout)] != 3) { throw std::logic_error("3-channel 4-dimensional model's output is expected"); } @@ -81,9 +90,9 @@ void DeblurringModel::changeInputSize(std::shared_ptr& model) { const auto widthId = ov::layout::width_idx(layout); if (inputShape[heightId] % stride || inputShape[widthId] % stride) { - throw std::logic_error("Model input shape HxW = " - + std::to_string(inputShape[heightId]) + "x" + std::to_string(inputShape[widthId]) - + "must be divisible by stride " + std::to_string(stride)); + throw std::logic_error("Model input shape HxW = " + std::to_string(inputShape[heightId]) + "x" + + std::to_string(inputShape[widthId]) + "must be divisible by stride " + + std::to_string(stride)); } netInputHeight = static_cast((netInputHeight + stride - 1) / stride) * stride; @@ -102,12 +111,10 @@ std::shared_ptr DeblurringModel::preprocess(const InputData& size_t w = image.cols; cv::Mat resizedImage; - if (netInputHeight - stride < h && h <= netInputHeight - && netInputWidth - stride < w && w <= netInputWidth) { + if (netInputHeight - stride < h && h <= netInputHeight && netInputWidth - stride < w && w <= netInputWidth) { int bottom = netInputHeight - h; int right = netInputWidth - w; - cv::copyMakeBorder(image, resizedImage, 0, bottom, 0, right, - cv::BORDER_CONSTANT, 0); + cv::copyMakeBorder(image, resizedImage, 0, bottom, 0, right, cv::BORDER_CONSTANT, 0); } else { slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl; cv::resize(image, resizedImage, cv::Size(netInputWidth, netInputHeight)); @@ -125,20 +132,21 @@ std::unique_ptr DeblurringModel::postprocess(InferenceResult& infRes const auto outputData = infResult.getFirstOutputTensor().data(); std::vector imgPlanes; - const ov::Shape& outputShape= infResult.getFirstOutputTensor().get_shape(); + const ov::Shape& outputShape = infResult.getFirstOutputTensor().get_shape(); const ov::Layout outputLayout("NCHW"); - size_t outHeight = (int)(outputShape[ov::layout::height_idx(outputLayout)]); - size_t outWidth = (int)(outputShape[ov::layout::width_idx(outputLayout)]); + size_t outHeight = static_cast((outputShape[ov::layout::height_idx(outputLayout)])); + size_t outWidth = static_cast((outputShape[ov::layout::width_idx(outputLayout)])); size_t numOfPixels = outWidth * outHeight; - imgPlanes = std::vector{ - cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0])), - cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])), - cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2]))}; + imgPlanes = std::vector{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0])), + cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])), + cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2]))}; cv::Mat resultImg; cv::merge(imgPlanes, resultImg); - if (netInputHeight - stride < static_cast(inputImgSize.inputImgHeight) && static_cast(inputImgSize.inputImgHeight) <= netInputHeight - && netInputWidth - stride < static_cast(inputImgSize.inputImgWidth) && static_cast(inputImgSize.inputImgWidth) <= netInputWidth) { + if (netInputHeight - stride < static_cast(inputImgSize.inputImgHeight) && + static_cast(inputImgSize.inputImgHeight) <= netInputHeight && + netInputWidth - stride < static_cast(inputImgSize.inputImgWidth) && + static_cast(inputImgSize.inputImgWidth) <= netInputWidth) { result->resultImage = resultImg(cv::Rect(0, 0, inputImgSize.inputImgWidth, inputImgSize.inputImgHeight)); } else { cv::resize(resultImg, result->resultImage, cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight)); diff --git a/demos/common/cpp/models/src/detection_model.cpp b/demos/common/cpp/models/src/detection_model.cpp index 72929f69f65..83e2d22b02c 100644 --- a/demos/common/cpp/models/src/detection_model.cpp +++ b/demos/common/cpp/models/src/detection_model.cpp @@ -14,18 +14,23 @@ // limitations under the License. */ +#include "models/detection_model.h" + #include +#include #include #include -#include "models/detection_model.h" + #include "models/image_model.h" -DetectionModel::DetectionModel(const std::string& modelFileName, float confidenceThreshold, bool useAutoResize, - const std::vector& labels, const std::string& layout) : - ImageModel(modelFileName, useAutoResize, layout), - confidenceThreshold(confidenceThreshold), - labels(labels) { -} +DetectionModel::DetectionModel(const std::string& modelFileName, + float confidenceThreshold, + bool useAutoResize, + const std::vector& labels, + const std::string& layout) + : ImageModel(modelFileName, useAutoResize, layout), + confidenceThreshold(confidenceThreshold), + labels(labels) {} std::vector DetectionModel::loadLabels(const std::string& labelFilename) { std::vector labelsList; diff --git a/demos/common/cpp/models/src/detection_model_centernet.cpp b/demos/common/cpp/models/src/detection_model_centernet.cpp index 4a4540e4f3b..eac42a79610 100644 --- a/demos/common/cpp/models/src/detection_model_centernet.cpp +++ b/demos/common/cpp/models/src/detection_model_centernet.cpp @@ -14,17 +14,33 @@ // limitations under the License. */ +#include "models/detection_model_centernet.h" + +#include + +#include +#include +#include +#include +#include + +#include #include #include + #include #include #include -#include "models/detection_model_centernet.h" + +#include "models/input_data.h" +#include "models/internal_model_data.h" +#include "models/results.h" ModelCenterNet::ModelCenterNet(const std::string& modelFileName, - float confidenceThreshold, const std::vector& labels, const std::string& layout) - : DetectionModel(modelFileName, confidenceThreshold, false, labels, layout) { -} + float confidenceThreshold, + const std::vector& labels, + const std::string& layout) + : DetectionModel(modelFileName, confidenceThreshold, false, labels, layout) {} void ModelCenterNet::prepareInputsOutputs(std::shared_ptr& model) { // --------------------------- Configure input & output ------------------------------------------------- @@ -42,8 +58,7 @@ void ModelCenterNet::prepareInputsOutputs(std::shared_ptr& model) { ov::preprocess::PrePostProcessor ppp(model); inputTransform.setPrecision(ppp, model->input().get_any_name()); - ppp.input().tensor(). - set_layout("NHWC"); + ppp.input().tensor().set_layout("NHWC"); ppp.input().model().set_layout(inputLayout); @@ -57,13 +72,11 @@ void ModelCenterNet::prepareInputsOutputs(std::shared_ptr& model) { throw std::logic_error("CenterNet model wrapper expects models that have 3 outputs"); } - const ov::Layout outLayout{ "NCHW" }; + const ov::Layout outLayout{"NCHW"}; for (const auto& output : model->outputs()) { auto outTensorName = output.get_any_name(); outputsNames.push_back(outTensorName); - ppp.output(outTensorName).tensor(). - set_element_type(ov::element::f32). - set_layout(outLayout); + ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outLayout); } std::sort(outputsNames.begin(), outputsNames.end()); model = ppp.build(); @@ -85,26 +98,31 @@ cv::Point2f get3rdPoint(const cv::Point2f& a, const cv::Point2f& b) { return b + cv::Point2f(-direct.y, direct.x); } -cv::Mat getAffineTransform(float centerX, float centerY, int srcW, float rot, size_t outputWidth, size_t outputHeight, bool inv = false) { - float rotRad = static_cast(CV_PI) * rot / 180.0f; - auto srcDir = getDir({ 0.0f, -0.5f * srcW }, rotRad); - cv::Point2f dstDir(0.0f, -0.5f * outputWidth); - std::vector src(3, { 0.0f, 0.0f }); - std::vector dst(3, { 0.0f, 0.0f }); - - src[0] = { centerX, centerY }; +cv::Mat getAffineTransform(float centerX, + float centerY, + int srcW, + float rot, + size_t outputWidth, + size_t outputHeight, + bool inv = false) { + float rotRad = static_cast(CV_PI) * rot / 180.0f; + auto srcDir = getDir({0.0f, -0.5f * srcW}, rotRad); + cv::Point2f dstDir(0.0f, -0.5f * outputWidth); + std::vector src(3, {0.0f, 0.0f}); + std::vector dst(3, {0.0f, 0.0f}); + + src[0] = {centerX, centerY}; src[1] = srcDir + src[0]; src[2] = get3rdPoint(src[0], src[1]); - dst[0] = { outputWidth * 0.5f, outputHeight * 0.5f }; + dst[0] = {outputWidth * 0.5f, outputHeight * 0.5f}; dst[1] = dst[0] + dstDir; dst[2] = get3rdPoint(dst[0], dst[1]); cv::Mat trans; if (inv) { trans = cv::getAffineTransform(dst, src); - } - else { + } else { trans = cv::getAffineTransform(src, dst); } @@ -139,7 +157,7 @@ std::vector> nms(float* scoresPtr, const ov::Shape& sha } // --------------------- store index and score------------------------------------ - scores.push_back({ chSize * ch + shape[2] * w + h, max }); + scores.push_back({chSize * ch + shape[2] * w + h, max}); bool next = true; // ---------------------- maxpool2d ----------------------------------------------- @@ -151,8 +169,7 @@ std::vector> nms(float* scoresPtr, const ov::Shape& sha next = false; break; } - } - else { + } else { if (max < 0) { scores.pop_back(); next = false; @@ -168,7 +185,6 @@ std::vector> nms(float* scoresPtr, const ov::Shape& sha return scores; } - static std::vector> filterScores(const ov::Tensor& scoresTensor, float threshold) { auto shape = scoresTensor.get_shape(); float* scoresPtr = scoresTensor.data(); @@ -176,30 +192,36 @@ static std::vector> filterScores(const ov::Tensor& scor return nms(scoresPtr, shape, threshold); } -std::vector> filterReg(const ov::Tensor& regressionTensor, const std::vector>& scores, size_t chSize) { +std::vector> filterReg(const ov::Tensor& regressionTensor, + const std::vector>& scores, + size_t chSize) { const float* regPtr = regressionTensor.data(); std::vector> reg; for (auto s : scores) { - reg.push_back({ regPtr[s.first % chSize], regPtr[chSize + s.first % chSize] }); + reg.push_back({regPtr[s.first % chSize], regPtr[chSize + s.first % chSize]}); } return reg; } -std::vector> filterWH(const ov::Tensor& whTensor, const std::vector>& scores, size_t chSize) { +std::vector> filterWH(const ov::Tensor& whTensor, + const std::vector>& scores, + size_t chSize) { const float* whPtr = whTensor.data(); std::vector> wh; for (auto s : scores) { - wh.push_back({ whPtr[s.first % chSize], whPtr[chSize + s.first % chSize] }); + wh.push_back({whPtr[s.first % chSize], whPtr[chSize + s.first % chSize]}); } return wh; } -std::vector calcBoxes(const std::vector>& scores, const std::vector>& reg, - const std::vector>& wh, const ov::Shape& shape) { +std::vector calcBoxes(const std::vector>& scores, + const std::vector>& reg, + const std::vector>& wh, + const ov::Shape& shape) { std::vector boxes(scores.size()); for (size_t i = 0; i < boxes.size(); ++i) { @@ -216,16 +238,20 @@ std::vector calcBoxes(const std::vector& boxes, const ov::Shape& shape, int scale, float centerX, float centerY) { +void transform(std::vector& boxes, + const ov::Shape& shape, + int scale, + float centerX, + float centerY) { cv::Mat1f trans = getAffineTransform(centerX, centerY, scale, 0, shape[2], shape[3], true); for (auto& b : boxes) { ModelCenterNet::BBox newbb; - newbb.left = trans.at(0, 0) * b.left + trans.at(0, 1) * b.top + trans.at(0, 2); - newbb.top = trans.at(1, 0) * b.left + trans.at(1, 1) * b.top + trans.at(1, 2); - newbb.right = trans.at(0, 0) * b.right + trans.at(0, 1) * b.bottom + trans.at(0, 2); - newbb.bottom = trans.at(1, 0) * b.right + trans.at(1, 1) * b.bottom + trans.at(1, 2); + newbb.left = trans.at(0, 0) * b.left + trans.at(0, 1) * b.top + trans.at(0, 2); + newbb.top = trans.at(1, 0) * b.left + trans.at(1, 1) * b.top + trans.at(1, 2); + newbb.right = trans.at(0, 0) * b.right + trans.at(0, 1) * b.bottom + trans.at(0, 2); + newbb.bottom = trans.at(1, 0) * b.right + trans.at(1, 1) * b.bottom + trans.at(1, 2); b = newbb; } @@ -244,7 +270,6 @@ std::unique_ptr ModelCenterNet::postprocess(InferenceResult& infResu const auto& whTensor = infResult.outputsData[outputsNames[2]]; const auto wh = filterWH(whTensor, scores, chSize); - // --------------------------- Calculate bounding boxes & apply inverse affine transform ---------- auto boxes = calcBoxes(scores, reg, wh, heatmapTensorShape); @@ -265,10 +290,10 @@ std::unique_ptr ModelCenterNet::postprocess(InferenceResult& infResu desc.confidence = scores[i].second; desc.labelID = scores[i].first / chSize; desc.label = getLabelName(desc.labelID); - desc.x = clamp(boxes[i].left, 0.f, (float)imgWidth); - desc.y = clamp(boxes[i].top, 0.f, (float)imgHeight); - desc.width = clamp(boxes[i].getWidth(), 0.f, (float)imgWidth); - desc.height = clamp(boxes[i].getHeight(), 0.f, (float)imgHeight); + desc.x = clamp(boxes[i].left, 0.f, static_cast(imgWidth)); + desc.y = clamp(boxes[i].top, 0.f, static_cast(imgHeight)); + desc.width = clamp(boxes[i].getWidth(), 0.f, static_cast(imgWidth)); + desc.height = clamp(boxes[i].getHeight(), 0.f, static_cast(imgHeight)); result->objects.push_back(desc); } diff --git a/demos/common/cpp/models/src/detection_model_faceboxes.cpp b/demos/common/cpp/models/src/detection_model_faceboxes.cpp index 6cfcf939032..f4c29a7f7c5 100644 --- a/demos/common/cpp/models/src/detection_model_faceboxes.cpp +++ b/demos/common/cpp/models/src/detection_model_faceboxes.cpp @@ -14,18 +14,34 @@ // limitations under the License. */ +#include "models/detection_model_faceboxes.h" + +#include + #include +#include +#include + #include + #include #include -#include "models/detection_model_faceboxes.h" +#include + +#include "models/internal_model_data.h" +#include "models/results.h" ModelFaceBoxes::ModelFaceBoxes(const std::string& modelFileName, - float confidenceThreshold, bool useAutoResize, float boxIOUThreshold, const std::string& layout) + float confidenceThreshold, + bool useAutoResize, + float boxIOUThreshold, + const std::string& layout) : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, {"Face"}, layout), - maxProposalsCount(0), boxIOUThreshold(boxIOUThreshold), variance({0.1f, 0.2f}), - steps({32, 64, 128}), minSizes({ {32, 64, 128}, {256}, {512} }) { -} + maxProposalsCount(0), + boxIOUThreshold(boxIOUThreshold), + variance({0.1f, 0.2f}), + steps({32, 64, 128}), + minSizes({{32, 64, 128}, {256}, {512}}) {} void ModelFaceBoxes::prepareInputsOutputs(std::shared_ptr& model) { // --------------------------- Configure input & output ------------------------------------------------- @@ -43,16 +59,15 @@ void ModelFaceBoxes::prepareInputsOutputs(std::shared_ptr& model) { ov::preprocess::PrePostProcessor ppp(model); inputTransform.setPrecision(ppp, model->input().get_any_name()); - ppp.input().tensor(). - set_layout({ "NHWC" }); + ppp.input().tensor().set_layout({"NHWC"}); if (useAutoResize) { - ppp.input().tensor(). - set_spatial_dynamic_shape(); + ppp.input().tensor().set_spatial_dynamic_shape(); - ppp.input().preprocess(). - convert_element_type(ov::element::f32). - resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + ppp.input() + .preprocess() + .convert_element_type(ov::element::f32) + .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); } ppp.input().model().set_layout(inputLayout); @@ -67,14 +82,12 @@ void ModelFaceBoxes::prepareInputsOutputs(std::shared_ptr& model) { throw std::logic_error("FaceBoxes model wrapper expects models that have 2 outputs"); } - const ov::Layout outputLayout{ "CHW" }; + const ov::Layout outputLayout{"CHW"}; maxProposalsCount = model->outputs().front().get_shape()[ov::layout::height_idx(outputLayout)]; for (const auto& output : model->outputs()) { const auto outTensorName = output.get_any_name(); outputsNames.push_back(outTensorName); - ppp.output(outTensorName).tensor(). - set_element_type(ov::element::f32). - set_layout(outputLayout); + ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outputLayout); } std::sort(outputsNames.begin(), outputsNames.end()); model = ppp.build(); @@ -82,14 +95,17 @@ void ModelFaceBoxes::prepareInputsOutputs(std::shared_ptr& model) { // --------------------------- Calculating anchors ---------------------------------------------------- std::vector> featureMaps; for (auto s : steps) { - featureMaps.push_back({ netInputHeight / s, netInputWidth / s }); + featureMaps.push_back({netInputHeight / s, netInputWidth / s}); } priorBoxes(featureMaps); } -void calculateAnchors(std::vector& anchors, const std::vector& vx, const std::vector& vy, - const int minSize, const int step) { +void calculateAnchors(std::vector& anchors, + const std::vector& vx, + const std::vector& vy, + const int minSize, + const int step) { float skx = static_cast(minSize); float sky = static_cast(minSize); @@ -105,15 +121,17 @@ void calculateAnchors(std::vector& anchors, const std::v for (auto cy : dense_cy) { for (auto cx : dense_cx) { - anchors.push_back({ cx - 0.5f * skx, cy - 0.5f * sky, - cx + 0.5f * skx, cy + 0.5f * sky }); // left top right bottom + anchors.push_back( + {cx - 0.5f * skx, cy - 0.5f * sky, cx + 0.5f * skx, cy + 0.5f * sky}); // left top right bottom } } - } -void calculateAnchorsZeroLevel(std::vector& anchors, const int fx, const int fy, - const std::vector& minSizes, const int step) { +void calculateAnchorsZeroLevel(std::vector& anchors, + const int fx, + const int fy, + const std::vector& minSizes, + const int step) { for (auto s : minSizes) { std::vector vx, vy; if (s == 32) { @@ -126,15 +144,13 @@ void calculateAnchorsZeroLevel(std::vector& anchors, con vy.push_back(fy + 0.25f); vy.push_back(fy + 0.5f); vy.push_back(fy + 0.75f); - } - else if (s == 64) { + } else if (s == 64) { vx.push_back(static_cast(fx)); vx.push_back(fx + 0.5f); vy.push_back(static_cast(fy)); vy.push_back(fy + 0.5f); - } - else { + } else { vx.push_back(fx + 0.5f); vy.push_back(fy + 0.5f); } @@ -150,17 +166,17 @@ void ModelFaceBoxes::priorBoxes(const std::vector>& fe for (size_t i = 0; i < featureMaps[k].first; ++i) { for (size_t j = 0; j < featureMaps[k].second; ++j) { if (k == 0) { - calculateAnchorsZeroLevel(anchors, j, i, minSizes[k], steps[k]); - } - else { - calculateAnchors(anchors, { j + 0.5f }, { i + 0.5f }, minSizes[k][0], steps[k]); + calculateAnchorsZeroLevel(anchors, j, i, minSizes[k], steps[k]); + } else { + calculateAnchors(anchors, {j + 0.5f}, {i + 0.5f}, minSizes[k][0], steps[k]); } } } } } -std::pair, std::vector> filterScores(const ov::Tensor& scoresTensor, const float confidenceThreshold) { +std::pair, std::vector> filterScores(const ov::Tensor& scoresTensor, + const float confidenceThreshold) { auto shape = scoresTensor.get_shape(); const float* scoresPtr = scoresTensor.data(); @@ -175,11 +191,13 @@ std::pair, std::vector> filterScores(const ov::Tensor } } - return { indices, scores }; + return {indices, scores}; } -std::vector filterBoxes(const ov::Tensor& boxesTensor, const std::vector& anchors, - const std::vector& validIndices, const std::vector& variance) { +std::vector filterBoxes(const ov::Tensor& boxesTensor, + const std::vector& anchors, + const std::vector& validIndices, + const std::vector& variance) { auto shape = boxesTensor.get_shape(); const float* boxesPtr = boxesTensor.data(); @@ -198,26 +216,32 @@ std::vector filterBoxes(const ov::Tensor& boxesTensor, c auto predW = exp(dw * variance[1]) * anchors[i].getWidth(); auto predH = exp(dh * variance[1]) * anchors[i].getHeight(); - boxes.push_back({ static_cast(predCtrX - 0.5f * predW), static_cast(predCtrY - 0.5f * predH), - static_cast(predCtrX + 0.5f * predW), static_cast(predCtrY + 0.5f * predH) }); + boxes.push_back({static_cast(predCtrX - 0.5f * predW), + static_cast(predCtrY - 0.5f * predH), + static_cast(predCtrX + 0.5f * predW), + static_cast(predCtrY + 0.5f * predH)}); } return boxes; } std::unique_ptr ModelFaceBoxes::postprocess(InferenceResult& infResult) { - // --------------------------- Filter scores and get valid indices for bounding boxes---------------------------------- + // --------------------------- Filter scores and get valid indices for bounding + // boxes---------------------------------- const auto scoresTensor = infResult.outputsData[outputsNames[1]]; const auto scores = filterScores(scoresTensor, confidenceThreshold); - // --------------------------- Filter bounding boxes on indices ------------------------------------------------------- + // --------------------------- Filter bounding boxes on indices + // ------------------------------------------------------- auto boxesTensor = infResult.outputsData[outputsNames[0]]; std::vector boxes = filterBoxes(boxesTensor, anchors, scores.first, variance); - // --------------------------- Apply Non-maximum Suppression ---------------------------------------------------------- + // --------------------------- Apply Non-maximum Suppression + // ---------------------------------------------------------- const std::vector keep = nms(boxes, scores.second, boxIOUThreshold); - // --------------------------- Create detection result objects -------------------------------------------------------- + // --------------------------- Create detection result objects + // -------------------------------------------------------- DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); const auto imgWidth = infResult.internalModelData->asRef().inputImgWidth; const auto imgHeight = infResult.internalModelData->asRef().inputImgHeight; @@ -228,11 +252,11 @@ std::unique_ptr ModelFaceBoxes::postprocess(InferenceResult& infResu for (auto i : keep) { DetectedObject desc; desc.confidence = scores.second[i]; - desc.x = clamp(boxes[i].left / scaleX, 0.f, (float)imgWidth); - desc.y = clamp(boxes[i].top / scaleY, 0.f, (float)imgHeight); - desc.width = clamp(boxes[i].getWidth() / scaleX, 0.f, (float)imgWidth); - desc.height = clamp(boxes[i].getHeight() / scaleY, 0.f, (float)imgHeight); - desc.labelID = 0; + desc.x = clamp(boxes[i].left / scaleX, 0.f, static_cast(imgWidth)); + desc.y = clamp(boxes[i].top / scaleY, 0.f, static_cast(imgHeight)); + desc.width = clamp(boxes[i].getWidth() / scaleX, 0.f, static_cast(imgWidth)); + desc.height = clamp(boxes[i].getHeight() / scaleY, 0.f, static_cast(imgHeight)); + desc.labelID = 0; desc.label = labels[0]; result->objects.push_back(desc); diff --git a/demos/common/cpp/models/src/detection_model_retinaface.cpp b/demos/common/cpp/models/src/detection_model_retinaface.cpp index a50a6fbf3ba..fa6716ac2f7 100644 --- a/demos/common/cpp/models/src/detection_model_retinaface.cpp +++ b/demos/common/cpp/models/src/detection_model_retinaface.cpp @@ -14,18 +14,35 @@ // limitations under the License. */ +#include "models/detection_model_retinaface.h" + +#include +#include + +#include +#include + +#include #include + #include #include -#include "models/detection_model_retinaface.h" -ModelRetinaFace::ModelRetinaFace(const std::string& modelFileName, float confidenceThreshold, bool useAutoResize, - float boxIOUThreshold, const std::string& layout) +#include "models/internal_model_data.h" +#include "models/results.h" + +ModelRetinaFace::ModelRetinaFace(const std::string& modelFileName, + float confidenceThreshold, + bool useAutoResize, + float boxIOUThreshold, + const std::string& layout) : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, {"Face"}, layout), // Default label is "Face" - shouldDetectMasks(false), shouldDetectLandmarks(false), boxIOUThreshold(boxIOUThreshold), maskThreshold(0.8f), landmarkStd(1.0f), - anchorCfg({ {32, { 32, 16 }, 16, { 1 }}, - { 16, { 8, 4 }, 16, { 1 }}, - { 8, { 2, 1 }, 16, { 1 }} }) { + shouldDetectMasks(false), + shouldDetectLandmarks(false), + boxIOUThreshold(boxIOUThreshold), + maskThreshold(0.8f), + landmarkStd(1.0f), + anchorCfg({{32, {32, 16}, 16, {1}}, {16, {8, 4}, 16, {1}}, {8, {2, 1}, 16, {1}}}) { generateAnchorsFpn(); } @@ -38,23 +55,20 @@ void ModelRetinaFace::prepareInputsOutputs(std::shared_ptr& model) { const ov::Shape& inputShape = model->input().get_shape(); const ov::Layout& inputLayout = getInputLayout(model->input()); - if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) { throw std::logic_error("Expected 3-channel input"); } ov::preprocess::PrePostProcessor ppp(model); - ppp.input().tensor(). - set_element_type(ov::element::u8). - set_layout({ "NHWC" }); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"}); if (useAutoResize) { - ppp.input().tensor(). - set_spatial_dynamic_shape(); + ppp.input().tensor().set_spatial_dynamic_shape(); - ppp.input().preprocess(). - convert_element_type(ov::element::f32). - resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + ppp.input() + .preprocess() + .convert_element_type(ov::element::f32) + .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); } ppp.input().model().set_layout(inputLayout); @@ -71,35 +85,29 @@ void ModelRetinaFace::prepareInputsOutputs(std::shared_ptr& model) { throw std::logic_error("RetinaFace model wrapper expects models that have 6, 9 or 12 outputs"); } - const ov::Layout outputLayout{ "NCHW" }; + const ov::Layout outputLayout{"NCHW"}; std::vector outputsSizes[OUT_MAX]; for (const auto& output : model->outputs()) { auto outTensorName = output.get_any_name(); outputsNames.push_back(outTensorName); - ppp.output(outTensorName).tensor(). - set_element_type(ov::element::f32). - set_layout(outputLayout); + ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outputLayout); OutputType type = OUT_MAX; if (outTensorName.find("box") != std::string::npos) { type = OUT_BOXES; - } - else if (outTensorName.find("cls") != std::string::npos) { + } else if (outTensorName.find("cls") != std::string::npos) { type = OUT_SCORES; - } - else if (outTensorName.find("landmark") != std::string::npos) { + } else if (outTensorName.find("landmark") != std::string::npos) { type = OUT_LANDMARKS; shouldDetectLandmarks = true; - } - else if (outTensorName.find("type") != std::string::npos) { + } else if (outTensorName.find("type") != std::string::npos) { type = OUT_MASKSCORES; labels.clear(); labels.push_back("No Mask"); labels.push_back("Mask"); shouldDetectMasks = true; landmarkStd = 0.2f; - } - else { + } else { continue; } @@ -112,7 +120,6 @@ void ModelRetinaFace::prepareInputsOutputs(std::shared_ptr& model) { } separateOutputsNames[type].insert(separateOutputsNames[type].begin() + i, outTensorName); outputsSizes[type].insert(outputsSizes[type].begin() + i, num); - } model = ppp.build(); @@ -151,8 +158,10 @@ std::vector ratioEnum(const ModelRetinaFace::Anchor& an const auto sizeRatio = static_cast(size) / ratio; const auto ws = sqrt(sizeRatio); const auto hs = ws * ratio; - retVal.push_back({ static_cast(xCtr - 0.5f * (ws - 1.0f)), static_cast(yCtr - 0.5f * (hs - 1.0f)), - static_cast(xCtr + 0.5f * (ws - 1.0f)), static_cast(yCtr + 0.5f * (hs - 1.0f)) }); + retVal.push_back({static_cast(xCtr - 0.5f * (ws - 1.0f)), + static_cast(yCtr - 0.5f * (hs - 1.0f)), + static_cast(xCtr + 0.5f * (ws - 1.0f)), + static_cast(yCtr + 0.5f * (hs - 1.0f))}); } return retVal; } @@ -167,14 +176,18 @@ std::vector scaleEnum(const ModelRetinaFace::Anchor& an for (auto scale : scales) { const auto ws = w * scale; const auto hs = h * scale; - retVal.push_back({ static_cast(xCtr - 0.5f * (ws - 1.0f)), static_cast(yCtr - 0.5f * (hs - 1.0f)), - static_cast(xCtr + 0.5f * (ws - 1.0f)), static_cast(yCtr + 0.5f * (hs - 1.0f)) }); + retVal.push_back({static_cast(xCtr - 0.5f * (ws - 1.0f)), + static_cast(yCtr - 0.5f * (hs - 1.0f)), + static_cast(xCtr + 0.5f * (ws - 1.0f)), + static_cast(yCtr + 0.5f * (hs - 1.0f))}); } return retVal; } -std::vector generateAnchors(const int baseSize, const std::vector& ratios, const std::vector& scales) { - ModelRetinaFace::Anchor baseAnchor{ 0.0f, 0.0f, baseSize - 1.0f, baseSize - 1.0f }; +std::vector generateAnchors(const int baseSize, + const std::vector& ratios, + const std::vector& scales) { + ModelRetinaFace::Anchor baseAnchor{0.0f, 0.0f, baseSize - 1.0f, baseSize - 1.0f}; auto ratioAnchors = ratioEnum(baseAnchor, ratios); std::vector retVal; @@ -187,14 +200,15 @@ std::vector generateAnchors(const int baseSize, const s void ModelRetinaFace::generateAnchorsFpn() { auto cfg = anchorCfg; - std::sort(cfg.begin(), cfg.end(), [](const AnchorCfgLine& x, const AnchorCfgLine& y) { return x.stride > y.stride; }); + std::sort(cfg.begin(), cfg.end(), [](const AnchorCfgLine& x, const AnchorCfgLine& y) { + return x.stride > y.stride; + }); for (const auto& cfgLine : cfg) { anchorsFpn.emplace(cfgLine.stride, generateAnchors(cfgLine.baseSize, cfgLine.ratios, cfgLine.scales)); } } - std::vector thresholding(const ov::Tensor& scoresTensor, const int anchorNum, const float confidenceThreshold) { std::vector indices; indices.reserve(ModelRetinaFace::INIT_VECTOR_SIZE); @@ -217,7 +231,10 @@ std::vector thresholding(const ov::Tensor& scoresTensor, const int ancho return indices; } -void filterScores(std::vector& scores, const std::vector& indices, const ov::Tensor& scoresTensor, const int anchorNum) { +void filterScores(std::vector& scores, + const std::vector& indices, + const ov::Tensor& scoresTensor, + const int anchorNum) { const auto& shape = scoresTensor.get_shape(); const float* scoresPtr = scoresTensor.data(); const auto start = shape[2] * shape[3] * anchorNum; @@ -228,14 +245,16 @@ void filterScores(std::vector& scores, const std::vector& indices } } -void filterBoxes(std::vector& boxes, const std::vector& indices, const ov::Tensor& boxesTensor, - int anchorNum, const std::vector& anchors) { +void filterBoxes(std::vector& boxes, + const std::vector& indices, + const ov::Tensor& boxesTensor, + int anchorNum, + const std::vector& anchors) { const auto& shape = boxesTensor.get_shape(); const float* boxesPtr = boxesTensor.data(); const auto boxPredLen = shape[1] / anchorNum; const auto blockWidth = shape[2] * shape[3]; - for (auto i : indices) { auto offset = blockWidth * boxPredLen * (i % anchorNum) + (i / anchorNum); @@ -249,14 +268,19 @@ void filterBoxes(std::vector& boxes, const std::vector< const auto predW = exp(dw) * anchors[i].getWidth(); const auto predH = exp(dh) * anchors[i].getHeight(); - boxes.push_back({ static_cast(predCtrX - 0.5f * (predW - 1.0f)), static_cast(predCtrY - 0.5f * (predH - 1.0f)), - static_cast(predCtrX + 0.5f * (predW - 1.0f)), static_cast(predCtrY + 0.5f * (predH - 1.0f)) }); + boxes.push_back({static_cast(predCtrX - 0.5f * (predW - 1.0f)), + static_cast(predCtrY - 0.5f * (predH - 1.0f)), + static_cast(predCtrX + 0.5f * (predW - 1.0f)), + static_cast(predCtrY + 0.5f * (predH - 1.0f))}); } } - -void filterLandmarks(std::vector& landmarks, const std::vector& indices, const ov::Tensor& landmarksTensor, - int anchorNum, const std::vector& anchors, const float landmarkStd) { +void filterLandmarks(std::vector& landmarks, + const std::vector& indices, + const ov::Tensor& landmarksTensor, + int anchorNum, + const std::vector& anchors, + const float landmarkStd) { const auto& shape = landmarksTensor.get_shape(); const float* landmarksPtr = landmarksTensor.data(); const auto landmarkPredLen = shape[1] / anchorNum; @@ -268,12 +292,15 @@ void filterLandmarks(std::vector& landmarks, const std::vector& masks, const std::vector& indices, const ov::Tensor& maskScoresTensor, const int anchorNum) { +void filterMasksScores(std::vector& masks, + const std::vector& indices, + const ov::Tensor& maskScoresTensor, + const int anchorNum) { auto shape = maskScoresTensor.get_shape(); const float* maskScoresPtr = maskScoresTensor.data(); auto start = shape[2] * shape[3] * anchorNum * 2; @@ -299,7 +326,8 @@ std::unique_ptr ModelRetinaFace::postprocess(InferenceResult& infRes masks.reserve(INIT_VECTOR_SIZE); } - // --------------------------- Gather & Filter output from all levels ---------------------------------------------------------- + // --------------------------- Gather & Filter output from all levels + // ---------------------------------------------------------- for (size_t idx = 0; idx < anchorCfg.size(); ++idx) { const auto boxRaw = infResult.outputsData[separateOutputsNames[OUT_BOXES][idx]]; const auto scoresRaw = infResult.outputsData[separateOutputsNames[OUT_SCORES][idx]]; @@ -318,11 +346,13 @@ std::unique_ptr ModelRetinaFace::postprocess(InferenceResult& infRes filterMasksScores(masks, validIndices, masksRaw, anchorNum); } } - // --------------------------- Apply Non-maximum Suppression ---------------------------------------------------------- - // !shouldDetectLandmarks determines nms behavior, if true - boundaries are included in areas calculation + // --------------------------- Apply Non-maximum Suppression + // ---------------------------------------------------------- !shouldDetectLandmarks determines nms behavior, if + // true - boundaries are included in areas calculation const auto keep = nms(boxes, scores, boxIOUThreshold, !shouldDetectLandmarks); - // --------------------------- Create detection result objects -------------------------------------------------------- + // --------------------------- Create detection result objects + // -------------------------------------------------------- RetinaFaceDetectionResult* result = new RetinaFaceDetectionResult(infResult.frameId, infResult.metaData); const auto imgWidth = infResult.internalModelData->asRef().inputImgWidth; @@ -341,10 +371,10 @@ std::unique_ptr ModelRetinaFace::postprocess(InferenceResult& infRes boxes[i].right /= scaleX; boxes[i].bottom /= scaleY; - desc.x = clamp(boxes[i].left, 0.f, (float)imgWidth); - desc.y = clamp(boxes[i].top, 0.f, (float)imgHeight); - desc.width = clamp(boxes[i].getWidth(), 0.f, (float)imgWidth); - desc.height = clamp(boxes[i].getHeight(), 0.f, (float)imgHeight); + desc.x = clamp(boxes[i].left, 0.f, static_cast(imgWidth)); + desc.y = clamp(boxes[i].top, 0.f, static_cast(imgHeight)); + desc.width = clamp(boxes[i].getWidth(), 0.f, static_cast(imgWidth)); + desc.height = clamp(boxes[i].getHeight(), 0.f, static_cast(imgHeight)); //--- Default label 0 - Face. If detecting masks then labels would be 0 - No Mask, 1 - Mask desc.labelID = shouldDetectMasks ? (masks[i] > maskThreshold) : 0; desc.label = labels[desc.labelID]; @@ -353,9 +383,9 @@ std::unique_ptr ModelRetinaFace::postprocess(InferenceResult& infRes //--- Scaling landmarks coordinates for (size_t l = 0; l < ModelRetinaFace::LANDMARKS_NUM && shouldDetectLandmarks; ++l) { landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].x = - clamp(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].x / scaleX, 0.f, (float)imgWidth); + clamp(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].x / scaleX, 0.f, static_cast(imgWidth)); landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].y = - clamp(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].y / scaleY, 0.f, (float)imgHeight); + clamp(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].y / scaleY, 0.f, static_cast(imgHeight)); result->landmarks.push_back(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l]); } } diff --git a/demos/common/cpp/models/src/detection_model_retinaface_pt.cpp b/demos/common/cpp/models/src/detection_model_retinaface_pt.cpp index 6313a5b8001..0a8bd1c52fd 100644 --- a/demos/common/cpp/models/src/detection_model_retinaface_pt.cpp +++ b/demos/common/cpp/models/src/detection_model_retinaface_pt.cpp @@ -14,21 +14,34 @@ // limitations under the License. */ +#include "models/detection_model_retinaface_pt.h" + +#include +#include + +#include +#include +#include #include #include -#include + #include + #include #include -#include -#include "models/detection_model_retinaface_pt.h" +#include + +#include "models/internal_model_data.h" #include "models/results.h" -ModelRetinaFacePT::ModelRetinaFacePT(const std::string& modelFileName, float confidenceThreshold, bool useAutoResize, - float boxIOUThreshold, const std::string& layout) +ModelRetinaFacePT::ModelRetinaFacePT(const std::string& modelFileName, + float confidenceThreshold, + bool useAutoResize, + float boxIOUThreshold, + const std::string& layout) : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, {"Face"}, layout), // Default label is "Face" - landmarksNum(0), boxIOUThreshold(boxIOUThreshold) { -} + landmarksNum(0), + boxIOUThreshold(boxIOUThreshold) {} void ModelRetinaFacePT::prepareInputsOutputs(std::shared_ptr& model) { // --------------------------- Configure input & output ------------------------------------------------- @@ -46,16 +59,15 @@ void ModelRetinaFacePT::prepareInputsOutputs(std::shared_ptr& model) ov::preprocess::PrePostProcessor ppp(model); inputTransform.setPrecision(ppp, model->input().get_any_name()); - ppp.input().tensor(). - set_layout({ "NHWC" }); + ppp.input().tensor().set_layout({"NHWC"}); if (useAutoResize) { - ppp.input().tensor(). - set_spatial_dynamic_shape(); + ppp.input().tensor().set_spatial_dynamic_shape(); - ppp.input().preprocess(). - convert_element_type(ov::element::f32). - resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + ppp.input() + .preprocess() + .convert_element_type(ov::element::f32) + .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); } ppp.input().model().set_layout(inputLayout); @@ -79,24 +91,23 @@ void ModelRetinaFacePT::prepareInputsOutputs(std::shared_ptr& model) for (auto& output : model->outputs()) { auto outTensorName = output.get_any_name(); outputsNames.push_back(outTensorName); - ppp.output(outTensorName).tensor(). - set_element_type(ov::element::f32). - set_layout(output.get_shape().size() == 4 ? nchw : chw); + ppp.output(outTensorName) + .tensor() + .set_element_type(ov::element::f32) + .set_layout(output.get_shape().size() == 4 ? nchw : chw); if (outTensorName.find("bbox") != std::string::npos) { outputsNames[OUT_BOXES] = outTensorName; - } - else if (outTensorName.find("cls") != std::string::npos) { + } else if (outTensorName.find("cls") != std::string::npos) { outputsNames[OUT_SCORES] = outTensorName; - } - else if (outTensorName.find("landmark") != std::string::npos) { - // Landmarks might be optional, if it is present, resize names array to fit landmarks output name to the last item of array - // Considering that other outputs names are already filled in or will be filled later + } else if (outTensorName.find("landmark") != std::string::npos) { + // Landmarks might be optional, if it is present, resize names array to fit landmarks output name to the + // last item of array Considering that other outputs names are already filled in or will be filled later outputsNames.resize(std::max(outputsNames.size(), (size_t)OUT_LANDMARKS + 1)); outputsNames[OUT_LANDMARKS] = outTensorName; - landmarksNum = output.get_shape()[ov::layout::width_idx(chw)] / 2; // Each landmark consist of 2 variables (x and y) - } - else { + landmarksNum = + output.get_shape()[ov::layout::width_idx(chw)] / 2; // Each landmark consist of 2 variables (x and y) + } else { continue; } } @@ -109,7 +120,6 @@ void ModelRetinaFacePT::prepareInputsOutputs(std::shared_ptr& model) priors = generatePriorData(); } - std::vector ModelRetinaFacePT::filterByScore(const ov::Tensor& scoresTensor, const float confidenceThreshold) { std::vector indicies; const auto& shape = scoresTensor.get_shape(); @@ -126,7 +136,8 @@ std::vector ModelRetinaFacePT::filterByScore(const ov::Tensor& scoresTen return indicies; } -std::vector ModelRetinaFacePT::getFilteredScores(const ov::Tensor& scoresTensor, const std::vector& indicies) { +std::vector ModelRetinaFacePT::getFilteredScores(const ov::Tensor& scoresTensor, + const std::vector& indicies) { const auto& shape = scoresTensor.get_shape(); const float* scoresPtr = scoresTensor.data(); @@ -134,38 +145,41 @@ std::vector ModelRetinaFacePT::getFilteredScores(const ov::Tensor& scores scores.reserve(indicies.size()); for (auto i : indicies) { - scores.push_back(scoresPtr[i*shape[2] + 1]); + scores.push_back(scoresPtr[i * shape[2] + 1]); } return scores; } std::vector ModelRetinaFacePT::getFilteredLandmarks(const ov::Tensor& landmarksTensor, - const std::vector& indicies, int imgWidth, int imgHeight) { + const std::vector& indicies, + int imgWidth, + int imgHeight) { const auto& shape = landmarksTensor.get_shape(); const float* landmarksPtr = landmarksTensor.data(); - std::vector landmarks(landmarksNum*indicies.size()); + std::vector landmarks(landmarksNum * indicies.size()); for (size_t i = 0; i < indicies.size(); i++) { const size_t idx = indicies[i]; const auto& prior = priors[idx]; for (size_t j = 0; j < landmarksNum; j++) { - landmarks[i*landmarksNum + j].x = - clamp(prior.cX + landmarksPtr[idx*shape[2] + j*2] * variance[0] * prior.width, 0.f, 1.f) * imgWidth; - landmarks[i*landmarksNum + j].y = - clamp(prior.cY + landmarksPtr[idx*shape[2] + j*2 + 1] * variance[0] * prior.height, 0.f, 1.f) * imgHeight; + landmarks[i * landmarksNum + j].x = + clamp(prior.cX + landmarksPtr[idx * shape[2] + j * 2] * variance[0] * prior.width, 0.f, 1.f) * imgWidth; + landmarks[i * landmarksNum + j].y = + clamp(prior.cY + landmarksPtr[idx * shape[2] + j * 2 + 1] * variance[0] * prior.height, 0.f, 1.f) * + imgHeight; } } return landmarks; } std::vector ModelRetinaFacePT::generatePriorData() { - const float globalMinSizes[][2] = { {16, 32}, {64, 128}, {256, 512} }; - const float steps[] = { 8., 16., 32. }; + const float globalMinSizes[][2] = {{16, 32}, {64, 128}, {256, 512}}; + const float steps[] = {8., 16., 32.}; std::vector anchors; for (size_t stepNum = 0; stepNum < arraySize(steps); stepNum++) { - const int featureW = (int)std::round(netInputWidth / steps[stepNum]); - const int featureH = (int)std::round(netInputHeight / steps[stepNum]); + const int featureW = static_cast(std::round(netInputWidth / steps[stepNum])); + const int featureH = static_cast(std::round(netInputHeight / steps[stepNum])); const auto& minSizes = globalMinSizes[stepNum]; for (int i = 0; i < featureH; i++) { @@ -184,14 +198,15 @@ std::vector ModelRetinaFacePT::generatePriorData() { } std::vector ModelRetinaFacePT::getFilteredProposals(const ov::Tensor& boxesTensor, - const std::vector& indicies,int imgWidth, int imgHeight) { + const std::vector& indicies, + int imgWidth, + int imgHeight) { std::vector rects; rects.reserve(indicies.size()); const auto& shape = boxesTensor.get_shape(); const float* boxesPtr = boxesTensor.data(); - if (shape[1] != priors.size()) { throw std::logic_error("rawBoxes size is not equal to priors size"); } @@ -203,18 +218,17 @@ std::vector ModelRetinaFacePT::getFilteredProposals(con const float cY = priors[i].cY + pRawBox->cY * variance[0] * prior.height; const float width = prior.width * exp(pRawBox->width * variance[1]); const float height = prior.height * exp(pRawBox->height * variance[1]); - rects.push_back(Rect{ - clamp(cX - width / 2, 0.f, 1.f) * imgWidth, - clamp(cY - height / 2, 0.f, 1.f) * imgHeight, - clamp(cX + width / 2, 0.f, 1.f) * imgWidth, - clamp(cY + height / 2, 0.f, 1.f) * imgHeight }); + rects.push_back(Rect{clamp(cX - width / 2, 0.f, 1.f) * imgWidth, + clamp(cY - height / 2, 0.f, 1.f) * imgHeight, + clamp(cX + width / 2, 0.f, 1.f) * imgWidth, + clamp(cY + height / 2, 0.f, 1.f) * imgHeight}); } return rects; } std::unique_ptr ModelRetinaFacePT::postprocess(InferenceResult& infResult) { - //(raw_output, scale_x, scale_y, face_prob_threshold, image_size): + // (raw_output, scale_x, scale_y, face_prob_threshold, image_size): const auto boxesTensor = infResult.outputsData[outputsNames[OUT_BOXES]]; const auto scoresTensor = infResult.outputsData[outputsNames[OUT_SCORES]]; @@ -222,17 +236,19 @@ std::unique_ptr ModelRetinaFacePT::postprocess(InferenceResult& infR const auto& scores = getFilteredScores(scoresTensor, validIndicies); const auto& internalData = infResult.internalModelData->asRef(); - const auto& landmarks = landmarksNum ? - getFilteredLandmarks(infResult.outputsData[outputsNames[OUT_LANDMARKS]], - validIndicies, internalData.inputImgWidth, internalData.inputImgHeight) : - std::vector(); + const auto& landmarks = landmarksNum ? getFilteredLandmarks(infResult.outputsData[outputsNames[OUT_LANDMARKS]], + validIndicies, + internalData.inputImgWidth, + internalData.inputImgHeight) + : std::vector(); - const auto& proposals = getFilteredProposals(boxesTensor, validIndicies, - internalData.inputImgWidth, internalData.inputImgHeight); + const auto& proposals = + getFilteredProposals(boxesTensor, validIndicies, internalData.inputImgWidth, internalData.inputImgHeight); const auto& keptIndicies = nms(proposals, scores, boxIOUThreshold, !landmarksNum); - // --------------------------- Create detection result objects -------------------------------------------------------- + // --------------------------- Create detection result objects + // -------------------------------------------------------- RetinaFaceDetectionResult* result = new RetinaFaceDetectionResult(infResult.frameId, infResult.metaData); result->objects.reserve(keptIndicies.size()); @@ -253,9 +269,7 @@ std::unique_ptr ModelRetinaFacePT::postprocess(InferenceResult& infR //--- Filtering landmarks coordinates for (uint32_t l = 0; l < landmarksNum; ++l) { - result->landmarks.emplace_back(landmarks[i*landmarksNum +l].x, - landmarks[i*landmarksNum + l].y - ); + result->landmarks.emplace_back(landmarks[i * landmarksNum + l].x, landmarks[i * landmarksNum + l].y); } } diff --git a/demos/common/cpp/models/src/detection_model_ssd.cpp b/demos/common/cpp/models/src/detection_model_ssd.cpp index c7d78d1f426..ef741ee06a6 100644 --- a/demos/common/cpp/models/src/detection_model_ssd.cpp +++ b/demos/common/cpp/models/src/detection_model_ssd.cpp @@ -14,19 +14,31 @@ // limitations under the License. */ +#include "models/detection_model_ssd.h" + +#include +#include +#include #include +#include #include + #include + #include -#include "models/detection_model_ssd.h" +#include + +#include "models/internal_model_data.h" #include "models/results.h" +struct InputData; + ModelSSD::ModelSSD(const std::string& modelFileName, - float confidenceThreshold, bool useAutoResize, - const std::vector& labels, - const std::string& layout) : - DetectionModel(modelFileName, confidenceThreshold, useAutoResize, labels, layout) { -} + float confidenceThreshold, + bool useAutoResize, + const std::vector& labels, + const std::string& layout) + : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, labels, layout) {} std::shared_ptr ModelSSD::preprocess(const InputData& inputData, ov::InferRequest& request) { if (inputsNames.size() > 1) { @@ -42,9 +54,7 @@ std::shared_ptr ModelSSD::preprocess(const InputData& inputDa } std::unique_ptr ModelSSD::postprocess(InferenceResult& infResult) { - return outputsNames.size() > 1 ? - postprocessMultipleOutputs(infResult) : - postprocessSingleOutput(infResult); + return outputsNames.size() > 1 ? postprocessMultipleOutputs(infResult) : postprocessSingleOutput(infResult); } std::unique_ptr ModelSSD::postprocessSingleOutput(InferenceResult& infResult) { @@ -74,13 +84,19 @@ std::unique_ptr ModelSSD::postprocessSingleOutput(InferenceResult& i desc.label = getLabelName(desc.labelID); desc.x = clamp(detections[i * objectSize + 3] * internalData.inputImgWidth, - 0.f, (float)internalData.inputImgWidth); + 0.f, + static_cast(internalData.inputImgWidth)); desc.y = clamp(detections[i * objectSize + 4] * internalData.inputImgHeight, - 0.f, (float)internalData.inputImgHeight); + 0.f, + static_cast(internalData.inputImgHeight)); desc.width = clamp(detections[i * objectSize + 5] * internalData.inputImgWidth, - 0.f, (float)internalData.inputImgWidth) - desc.x; + 0.f, + static_cast(internalData.inputImgWidth)) - + desc.x; desc.height = clamp(detections[i * objectSize + 6] * internalData.inputImgHeight, - 0.f, (float)internalData.inputImgHeight) - desc.y; + 0.f, + static_cast(internalData.inputImgHeight)) - + desc.y; result->objects.push_back(desc); } @@ -102,8 +118,8 @@ std::unique_ptr ModelSSD::postprocessMultipleOutputs(InferenceResult // In models with scores are stored in separate output, coordinates are normalized to [0,1] // In other multiple-outputs models coordinates are normalized to [0,netInputWidth] and [0,netInputHeight] - float widthScale = ((float)internalData.inputImgWidth) / (scores ? 1 : netInputWidth); - float heightScale = ((float)internalData.inputImgHeight) / (scores ? 1 : netInputHeight); + float widthScale = static_cast(internalData.inputImgWidth) / (scores ? 1 : netInputWidth); + float heightScale = static_cast(internalData.inputImgHeight) / (scores ? 1 : netInputHeight); for (size_t i = 0; i < detectionsNum; i++) { float confidence = scores ? scores[i] : boxes[i * objectSize + 4]; @@ -116,14 +132,15 @@ std::unique_ptr ModelSSD::postprocessMultipleOutputs(InferenceResult desc.labelID = static_cast(labels[i]); desc.label = getLabelName(desc.labelID); - desc.x = clamp(boxes[i * objectSize] * widthScale, - 0.f, (float)internalData.inputImgWidth); - desc.y = clamp(boxes[i * objectSize + 1] * heightScale, - 0.f, (float)internalData.inputImgHeight); - desc.width = clamp(boxes[i * objectSize + 2] * widthScale, - 0.f, (float)internalData.inputImgWidth) - desc.x; - desc.height = clamp(boxes[i * objectSize + 3] * heightScale, - 0.f, (float)internalData.inputImgHeight) - desc.y; + desc.x = clamp(boxes[i * objectSize] * widthScale, 0.f, static_cast(internalData.inputImgWidth)); + desc.y = + clamp(boxes[i * objectSize + 1] * heightScale, 0.f, static_cast(internalData.inputImgHeight)); + desc.width = + clamp(boxes[i * objectSize + 2] * widthScale, 0.f, static_cast(internalData.inputImgWidth)) - + desc.x; + desc.height = + clamp(boxes[i * objectSize + 3] * heightScale, 0.f, static_cast(internalData.inputImgHeight)) - + desc.y; result->objects.push_back(desc); } @@ -144,40 +161,37 @@ void ModelSSD::prepareInputsOutputs(std::shared_ptr& model) { if (shape.size() == 4) { // 1st input contains images if (inputsNames.empty()) { inputsNames.push_back(inputTensorName); - } - else { + } else { inputsNames[0] = inputTensorName; } inputTransform.setPrecision(ppp, inputTensorName); - ppp.input(inputTensorName).tensor(). - set_layout({ "NHWC" }); + ppp.input(inputTensorName).tensor().set_layout({"NHWC"}); if (useAutoResize) { - ppp.input(inputTensorName).tensor(). - set_spatial_dynamic_shape(); + ppp.input(inputTensorName).tensor().set_spatial_dynamic_shape(); - ppp.input(inputTensorName).preprocess(). - convert_element_type(ov::element::f32). - resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + ppp.input(inputTensorName) + .preprocess() + .convert_element_type(ov::element::f32) + .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); } ppp.input(inputTensorName).model().set_layout(inputLayout); netInputWidth = shape[ov::layout::width_idx(inputLayout)]; netInputHeight = shape[ov::layout::height_idx(inputLayout)]; - } - else if (shape.size() == 2) { // 2nd input contains image info + } else if (shape.size() == 2) { // 2nd input contains image info inputsNames.resize(2); inputsNames[1] = inputTensorName; - ppp.input(inputTensorName).tensor(). - set_element_type(ov::element::f32); - } - else { - throw std::logic_error("Unsupported " + - std::to_string(input.get_shape().size()) + "D " - "input layer '" + input.get_any_name() + "'. " - "Only 2D and 4D input layers are supported"); + ppp.input(inputTensorName).tensor().set_element_type(ov::element::f32); + } else { + throw std::logic_error("Unsupported " + std::to_string(input.get_shape().size()) + + "D " + "input layer '" + + input.get_any_name() + + "'. " + "Only 2D and 4D input layers are supported"); } } model = ppp.build(); @@ -185,8 +199,7 @@ void ModelSSD::prepareInputsOutputs(std::shared_ptr& model) { // --------------------------- Prepare output ----------------------------------------------------- if (model->outputs().size() == 1) { prepareSingleOutput(model); - } - else { + } else { prepareMultipleOutputs(model); } } @@ -203,12 +216,11 @@ void ModelSSD::prepareSingleOutput(std::shared_ptr& model) { detectionsNumId = ov::layout::height_idx(layout); objectSize = shape[ov::layout::width_idx(layout)]; if (objectSize != 7) { - throw std::logic_error("SSD single output must have 7 as a last dimension, but had " + std::to_string(objectSize)); + throw std::logic_error("SSD single output must have 7 as a last dimension, but had " + + std::to_string(objectSize)); } ov::preprocess::PrePostProcessor ppp(model); - ppp.output().tensor(). - set_element_type(ov::element::f32). - set_layout(layout); + ppp.output().tensor().set_element_type(ov::element::f32).set_layout(layout); model = ppp.build(); } @@ -220,19 +232,18 @@ void ModelSSD::prepareMultipleOutputs(std::shared_ptr& model) { if (name.find("boxes") != std::string::npos) { outputsNames.push_back(name); break; - } - else if (name.find("labels") != std::string::npos) { + } else if (name.find("labels") != std::string::npos) { outputsNames.push_back(name); break; - } - else if (name.find("scores") != std::string::npos) { + } else if (name.find("scores") != std::string::npos) { outputsNames.push_back(name); break; } } } if (outputsNames.size() != 2 && outputsNames.size() != 3) { - throw std::logic_error("SSD model wrapper must have 2 or 3 outputs, but had " + std::to_string(outputsNames.size())); + throw std::logic_error("SSD model wrapper must have 2 or 3 outputs, but had " + + std::to_string(outputsNames.size())); } std::sort(outputsNames.begin(), outputsNames.end()); @@ -248,8 +259,7 @@ void ModelSSD::prepareMultipleOutputs(std::shared_ptr& model) { if (objectSize != 5) { throw std::logic_error("Incorrect 'boxes' output shape, [n][5] shape is required"); } - } - else if (boxesShape.size() == 3) { + } else if (boxesShape.size() == 3) { boxesLayout = "CHW"; detectionsNumId = ov::layout::height_idx(boxesLayout); objectSize = boxesShape[ov::layout::width_idx(boxesLayout)]; @@ -257,18 +267,15 @@ void ModelSSD::prepareMultipleOutputs(std::shared_ptr& model) { if (objectSize != 4) { throw std::logic_error("Incorrect 'boxes' output shape, [b][n][4] shape is required"); } - } - else { - throw std::logic_error("Incorrect number of 'boxes' output dimensions, expected 2 or 3, but had " - + std::to_string(boxesShape.size())); + } else { + throw std::logic_error("Incorrect number of 'boxes' output dimensions, expected 2 or 3, but had " + + std::to_string(boxesShape.size())); } - ppp.output(outputsNames[0]).tensor(). - set_layout(boxesLayout); + ppp.output(outputsNames[0]).tensor().set_layout(boxesLayout); for (const auto& outName : outputsNames) { - ppp.output(outName).tensor(). - set_element_type(ov::element::f32); + ppp.output(outName).tensor().set_element_type(ov::element::f32); } model = ppp.build(); } diff --git a/demos/common/cpp/models/src/detection_model_yolo.cpp b/demos/common/cpp/models/src/detection_model_yolo.cpp index 0a9eeda9c26..2c28dc4a71e 100644 --- a/demos/common/cpp/models/src/detection_model_yolo.cpp +++ b/demos/common/cpp/models/src/detection_model_yolo.cpp @@ -14,33 +14,70 @@ // limitations under the License. */ +#include "models/detection_model_yolo.h" + +#include + +#include +#include +#include #include +#include #include + #include -#include + #include -#include -#include "models/detection_model_yolo.h" +#include + +#include "models/internal_model_data.h" #include "models/results.h" std::vector defaultAnchors[] = { // YOLOv1v2 - { 0.57273f, 0.677385f, 1.87446f, 2.06253f, 3.33843f, 5.47434f, 7.88282f, 3.52778f, 9.77052f, 9.16828f }, + {0.57273f, 0.677385f, 1.87446f, 2.06253f, 3.33843f, 5.47434f, 7.88282f, 3.52778f, 9.77052f, 9.16828f}, // YOLOv3 - { 10.0f, 13.0f, 16.0f, 30.0f, 33.0f, 23.0f, - 30.0f, 61.0f, 62.0f, 45.0f, 59.0f, 119.0f, - 116.0f, 90.0f, 156.0f, 198.0f, 373.0f, 326.0f}, + {10.0f, + 13.0f, + 16.0f, + 30.0f, + 33.0f, + 23.0f, + 30.0f, + 61.0f, + 62.0f, + 45.0f, + 59.0f, + 119.0f, + 116.0f, + 90.0f, + 156.0f, + 198.0f, + 373.0f, + 326.0f}, // YOLOv4 - { 12.0f, 16.0f, 19.0f, 36.0f, 40.0f, 28.0f, - 36.0f, 75.0f, 76.0f, 55.0f, 72.0f, 146.0f, - 142.0f, 110.0f, 192.0f, 243.0f, 459.0f, 401.0f}, + {12.0f, + 16.0f, + 19.0f, + 36.0f, + 40.0f, + 28.0f, + 36.0f, + 75.0f, + 76.0f, + 55.0f, + 72.0f, + 146.0f, + 142.0f, + 110.0f, + 192.0f, + 243.0f, + 459.0f, + 401.0f}, // YOLOv4_Tiny - { 10.0f, 14.0f, 23.0f, 27.0f, 37.0f, 58.0f, - 81.0f, 82.0f, 135.0f, 169.0f, 344.0f, 319.0f}, + {10.0f, 14.0f, 23.0f, 27.0f, 37.0f, 58.0f, 81.0f, 82.0f, 135.0f, 169.0f, 344.0f, 319.0f}, // YOLOF - { 16.0f, 16.0f, 32.0f, 32.0f, 64.0f, 64.0f, - 128.0f, 128.0f, 256.0f, 256.0f, 512.0f, 512.0f} -}; + {16.0f, 16.0f, 32.0f, 32.0f, 64.0f, 64.0f, 128.0f, 128.0f, 256.0f, 256.0f, 512.0f, 512.0f}}; const std::vector defaultMasks[] = { // YOLOv1v2 @@ -48,12 +85,11 @@ const std::vector defaultMasks[] = { // YOLOv3 {}, // YOLOv4 - {0, 1, 2, 3, 4, 5, 6, 7, 8 }, + {0, 1, 2, 3, 4, 5, 6, 7, 8}, // YOLOv4_Tiny {1, 2, 3, 3, 4, 5}, // YOLOF - {0, 1, 2, 3, 4, 5} -}; + {0, 1, 2, 3, 4, 5}}; static inline float sigmoid(float x) { return 1.f / (1.f + exp(-x)); @@ -63,18 +99,21 @@ static inline float linear(float x) { return x; } - -ModelYolo::ModelYolo(const std::string& modelFileName, float confidenceThreshold, bool useAutoResize, - bool useAdvancedPostprocessing, float boxIOUThreshold, const std::vector& labels, - const std::vector& anchors, const std::vector& masks, - const std::string& layout) : - DetectionModel(modelFileName, confidenceThreshold, useAutoResize, labels, layout), - boxIOUThreshold(boxIOUThreshold), - useAdvancedPostprocessing(useAdvancedPostprocessing), - yoloVersion(YOLO_V3), - presetAnchors(anchors), - presetMasks(masks) { -} +ModelYolo::ModelYolo(const std::string& modelFileName, + float confidenceThreshold, + bool useAutoResize, + bool useAdvancedPostprocessing, + float boxIOUThreshold, + const std::vector& labels, + const std::vector& anchors, + const std::vector& masks, + const std::string& layout) + : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, labels, layout), + boxIOUThreshold(boxIOUThreshold), + useAdvancedPostprocessing(useAdvancedPostprocessing), + yoloVersion(YOLO_V3), + presetAnchors(anchors), + presetMasks(masks) {} void ModelYolo::prepareInputsOutputs(std::shared_ptr& model) { // --------------------------- Configure input & output ------------------------------------------------- @@ -92,17 +131,15 @@ void ModelYolo::prepareInputsOutputs(std::shared_ptr& model) { } ov::preprocess::PrePostProcessor ppp(model); - ppp.input().tensor(). - set_element_type(ov::element::u8). - set_layout({ "NHWC" }); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"}); if (useAutoResize) { - ppp.input().tensor(). - set_spatial_dynamic_shape(); + ppp.input().tensor().set_spatial_dynamic_shape(); - ppp.input().preprocess(). - convert_element_type(ov::element::f32). - resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + ppp.input() + .preprocess() + .convert_element_type(ov::element::f32) + .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); } ppp.input().model().set_layout(inputLayout); @@ -118,9 +155,10 @@ void ModelYolo::prepareInputsOutputs(std::shared_ptr& model) { for (auto& out : outputs) { ppp.output(out.get_any_name()).tensor().set_element_type(ov::element::f32); if (out.get_shape().size() == 4) { - if (out.get_shape()[ov::layout::height_idx(yoloRegionLayout)] != out.get_shape()[ov::layout::width_idx(yoloRegionLayout)] && - out.get_shape()[ov::layout::height_idx({ "NHWC" })] == out.get_shape()[ov::layout::width_idx({ "NHWC" })]) { - yoloRegionLayout = { "NHWC" }; + if (out.get_shape()[ov::layout::height_idx(yoloRegionLayout)] != + out.get_shape()[ov::layout::width_idx(yoloRegionLayout)] && + out.get_shape()[ov::layout::height_idx({"NHWC"})] == out.get_shape()[ov::layout::width_idx({"NHWC"})]) { + yoloRegionLayout = {"NHWC"}; } ppp.output(out.get_any_name()).tensor().set_layout(yoloRegionLayout); } @@ -152,17 +190,17 @@ void ModelYolo::prepareInputsOutputs(std::shared_ptr& model) { } } - if(!isRegionFound) { - switch(outputsNames.size()) { - case 1: - yoloVersion = YOLOF; - break; - case 2: - yoloVersion = YOLO_V4_TINY; - break; - case 3: - yoloVersion = YOLO_V4; - break; + if (!isRegionFound) { + switch (outputsNames.size()) { + case 1: + yoloVersion = YOLOF; + break; + case 2: + yoloVersion = YOLO_V4_TINY; + break; + case 3: + yoloVersion = YOLO_V4; + break; } int num = yoloVersion == YOLOF ? 6 : 3; @@ -170,34 +208,40 @@ void ModelYolo::prepareInputsOutputs(std::shared_ptr& model) { int i = 0; auto chosenMasks = presetMasks.size() ? presetMasks : defaultMasks[yoloVersion]; - if(chosenMasks.size() != num * outputs.size()) { - throw std::runtime_error(std::string("Invalid size of masks array, got ") + std::to_string(presetMasks.size()) + - ", should be " + std::to_string(num * outputs.size())); + if (chosenMasks.size() != num * outputs.size()) { + throw std::runtime_error(std::string("Invalid size of masks array, got ") + + std::to_string(presetMasks.size()) + ", should be " + + std::to_string(num * outputs.size())); } - std::sort(outputsNames.begin(), outputsNames.end(), - [&outShapes, this](const std::string& x, const std::string& y) - {return outShapes[x][ov::layout::height_idx(yoloRegionLayout)] > outShapes[y][ov::layout::height_idx(yoloRegionLayout)]; }); + std::sort(outputsNames.begin(), + outputsNames.end(), + [&outShapes, this](const std::string& x, const std::string& y) { + return outShapes[x][ov::layout::height_idx(yoloRegionLayout)] > + outShapes[y][ov::layout::height_idx(yoloRegionLayout)]; + }); for (const auto& name : outputsNames) { const auto& shape = outShapes[name]; if (shape[ov::layout::channels_idx(yoloRegionLayout)] % num != 0) { throw std::logic_error(std::string("Output tenosor ") + name + " has wrong 2nd dimension"); } - regions.emplace(name, Region( - shape[ov::layout::channels_idx(yoloRegionLayout)] / num - 4 - (isObjConf ? 1 : 0), - 4, - presetAnchors.size() ? presetAnchors : defaultAnchors[yoloVersion], - std::vector(chosenMasks.begin() + i * num, chosenMasks.begin() + (i + 1) * num), - shape[ov::layout::width_idx(yoloRegionLayout)], shape[ov::layout::height_idx(yoloRegionLayout)])); + regions.emplace( + name, + Region(shape[ov::layout::channels_idx(yoloRegionLayout)] / num - 4 - (isObjConf ? 1 : 0), + 4, + presetAnchors.size() ? presetAnchors : defaultAnchors[yoloVersion], + std::vector(chosenMasks.begin() + i * num, chosenMasks.begin() + (i + 1) * num), + shape[ov::layout::width_idx(yoloRegionLayout)], + shape[ov::layout::height_idx(yoloRegionLayout)])); i++; } - } - else { + } else { // Currently externally set anchors and masks are supported only for YoloV4 - if(presetAnchors.size() || presetMasks.size()) { + if (presetAnchors.size() || presetMasks.size()) { slog::warn << "Preset anchors and mask can be set for YoloV4 model only. " - "This model is not YoloV4, so these options will be ignored." << slog::endl; + "This model is not YoloV4, so these options will be ignored." + << slog::endl; } } } @@ -210,8 +254,13 @@ std::unique_ptr ModelYolo::postprocess(InferenceResult& infResult) { const auto& internalData = infResult.internalModelData->asRef(); for (auto& output : infResult.outputsData) { - this->parseYOLOOutput(output.first, output.second, netInputHeight, netInputWidth, - internalData.inputImgHeight, internalData.inputImgWidth, objects); + this->parseYOLOOutput(output.first, + output.second, + netInputHeight, + netInputWidth, + internalData.inputImgHeight, + internalData.inputImgWidth, + objects); } if (useAdvancedPostprocessing) { @@ -222,7 +271,9 @@ std::unique_ptr ModelYolo::postprocess(InferenceResult& infResult) { for (const auto& obj1 : objects) { bool isGoodResult = true; for (const auto& obj2 : objects) { - if (obj1.labelID == obj2.labelID && obj1.confidence < obj2.confidence && intersectionOverUnion(obj1, obj2) >= boxIOUThreshold) { // if obj1 is the same as obj2, condition expression will evaluate to false anyway + if (obj1.labelID == obj2.labelID && obj1.confidence < obj2.confidence && + intersectionOverUnion(obj1, obj2) >= boxIOUThreshold) { // if obj1 is the same as obj2, condition + // expression will evaluate to false anyway isGoodResult = false; break; } @@ -233,7 +284,9 @@ std::unique_ptr ModelYolo::postprocess(InferenceResult& infResult) { } } else { // Classic postprocessing - std::sort(objects.begin(), objects.end(), [](const DetectedObject& x, const DetectedObject& y) { return x.confidence > y.confidence; }); + std::sort(objects.begin(), objects.end(), [](const DetectedObject& x, const DetectedObject& y) { + return x.confidence > y.confidence; + }); for (size_t i = 0; i < objects.size(); ++i) { if (objects[i].confidence == 0) continue; @@ -248,10 +301,12 @@ std::unique_ptr ModelYolo::postprocess(InferenceResult& infResult) { } void ModelYolo::parseYOLOOutput(const std::string& output_name, - const ov::Tensor& tensor, const unsigned long resized_im_h, - const unsigned long resized_im_w, const unsigned long original_im_h, - const unsigned long original_im_w, - std::vector& objects) { + const ov::Tensor& tensor, + const unsigned long resized_im_h, + const unsigned long resized_im_w, + const unsigned long original_im_h, + const unsigned long original_im_w, + std::vector& objects) { // --------------------------- Extracting layer parameters ------------------------------------- auto it = regions.find(output_name); if (it == regions.end()) { @@ -263,28 +318,29 @@ void ModelYolo::parseYOLOOutput(const std::string& output_name, int sideH = 0; unsigned long scaleH; unsigned long scaleW; - switch(yoloVersion) { - case YOLO_V1V2: - sideH = region.outputHeight; - sideW = region.outputWidth; - scaleW = region.outputWidth; - scaleH = region.outputHeight; - break; - case YOLO_V3: - case YOLO_V4: - case YOLO_V4_TINY: - case YOLOF: - sideH = static_cast(tensor.get_shape()[ov::layout::height_idx(yoloRegionLayout)]); - sideW = static_cast(tensor.get_shape()[ov::layout::width_idx(yoloRegionLayout)]); - scaleW = resized_im_w; - scaleH = resized_im_h; - break; + switch (yoloVersion) { + case YOLO_V1V2: + sideH = region.outputHeight; + sideW = region.outputWidth; + scaleW = region.outputWidth; + scaleH = region.outputHeight; + break; + case YOLO_V3: + case YOLO_V4: + case YOLO_V4_TINY: + case YOLOF: + sideH = static_cast(tensor.get_shape()[ov::layout::height_idx(yoloRegionLayout)]); + sideW = static_cast(tensor.get_shape()[ov::layout::width_idx(yoloRegionLayout)]); + scaleW = resized_im_w; + scaleH = resized_im_h; + break; } auto entriesNum = sideW * sideH; const float* outData = tensor.data(); - auto postprocessRawData = (yoloVersion == YOLO_V4 || yoloVersion == YOLO_V4_TINY || yoloVersion == YOLOF) ? sigmoid : linear; + auto postprocessRawData = + (yoloVersion == YOLO_V4 || yoloVersion == YOLO_V4_TINY || yoloVersion == YOLOF) ? sigmoid : linear; // --------------------------- Parsing YOLO Region output ------------------------------------- for (int i = 0; i < entriesNum; ++i) { @@ -292,32 +348,49 @@ void ModelYolo::parseYOLOOutput(const std::string& output_name, int col = i % sideW; for (int n = 0; n < region.num; ++n) { //--- Getting region data - int obj_index = calculateEntryIndex(entriesNum, region.coords, region.classes + isObjConf, n * entriesNum + i, region.coords); - int box_index = calculateEntryIndex(entriesNum, region.coords, region.classes + isObjConf, n * entriesNum + i, 0); + int obj_index = calculateEntryIndex(entriesNum, + region.coords, + region.classes + isObjConf, + n * entriesNum + i, + region.coords); + int box_index = + calculateEntryIndex(entriesNum, region.coords, region.classes + isObjConf, n * entriesNum + i, 0); float scale = isObjConf ? postprocessRawData(outData[obj_index]) : 1; //--- Preliminary check for confidence threshold conformance - if (scale >= confidenceThreshold){ + if (scale >= confidenceThreshold) { //--- Calculating scaled region's coordinates float x, y; if (yoloVersion == YOLOF) { - x = ((float)col / sideW + outData[box_index + 0 * entriesNum] * region.anchors[2 * n] / scaleW) * original_im_w; - y = ((float)row / sideH + outData[box_index + 1 * entriesNum] * region.anchors[2 * n + 1] / scaleH) * original_im_h; + x = (static_cast(col) / sideW + + outData[box_index + 0 * entriesNum] * region.anchors[2 * n] / scaleW) * + original_im_w; + y = (static_cast(row) / sideH + + outData[box_index + 1 * entriesNum] * region.anchors[2 * n + 1] / scaleH) * + original_im_h; } else { - x = (float)(col + postprocessRawData(outData[box_index + 0 * entriesNum])) / sideW * original_im_w; - y = (float)(row + postprocessRawData(outData[box_index + 1 * entriesNum])) / sideH * original_im_h; + x = static_cast((col + postprocessRawData(outData[box_index + 0 * entriesNum])) / sideW * + original_im_w); + y = static_cast((row + postprocessRawData(outData[box_index + 1 * entriesNum])) / sideH * + original_im_h); } - float height = (float)std::exp(outData[box_index + 3 * entriesNum]) * region.anchors[2 * n + 1] * original_im_h / scaleH; - float width = (float)std::exp(outData[box_index + 2 * entriesNum]) * region.anchors[2 * n] * original_im_w / scaleW; + float height = static_cast(std::exp(outData[box_index + 3 * entriesNum]) * + region.anchors[2 * n + 1] * original_im_h / scaleH); + float width = static_cast(std::exp(outData[box_index + 2 * entriesNum]) * region.anchors[2 * n] * + original_im_w / scaleW); DetectedObject obj; - obj.x = clamp(x-width/2, 0.f, (float)original_im_w); - obj.y = clamp(y-height/2, 0.f, (float)original_im_h); - obj.width = clamp(width, 0.f, (float)original_im_w - obj.x); - obj.height = clamp(height, 0.f, (float)original_im_h - obj.y); + obj.x = clamp(x - width / 2, 0.f, static_cast(original_im_w)); + obj.y = clamp(y - height / 2, 0.f, static_cast(original_im_h)); + obj.width = clamp(width, 0.f, static_cast(original_im_w - obj.x)); + obj.height = clamp(height, 0.f, static_cast(original_im_h - obj.y)); for (size_t j = 0; j < region.classes; ++j) { - int class_index = calculateEntryIndex(entriesNum, region.coords, region.classes + isObjConf, n * entriesNum + i, region.coords + isObjConf + j); + int class_index = calculateEntryIndex(entriesNum, + region.coords, + region.classes + isObjConf, + n * entriesNum + i, + region.coords + isObjConf + j); float prob = scale * postprocessRawData(outData[class_index]); //--- Checking confidence threshold conformance and adding region to the list @@ -342,7 +415,8 @@ int ModelYolo::calculateEntryIndex(int totalCells, int lcoords, size_t lclasses, double ModelYolo::intersectionOverUnion(const DetectedObject& o1, const DetectedObject& o2) { double overlappingWidth = fmin(o1.x + o1.width, o2.x + o2.width) - fmax(o1.x, o2.x); double overlappingHeight = fmin(o1.y + o1.height, o2.y + o2.height) - fmax(o1.y, o2.y); - double intersectionArea = (overlappingWidth < 0 || overlappingHeight < 0) ? 0 : overlappingHeight * overlappingWidth; + double intersectionArea = + (overlappingWidth < 0 || overlappingHeight < 0) ? 0 : overlappingHeight * overlappingWidth; double unionArea = o1.width * o1.height + o2.width * o2.height - intersectionArea; return intersectionArea / unionArea; } @@ -358,7 +432,6 @@ ModelYolo::Region::Region(const std::shared_ptr& regionY outputHeight = shape[2]; if (num) { - // Parsing YoloV3 parameters anchors.resize(num * 2); @@ -367,7 +440,6 @@ ModelYolo::Region::Region(const std::shared_ptr& regionY anchors[i * 2 + 1] = regionYolo->get_anchors()[mask[i] * 2 + 1]; } } else { - // Parsing YoloV2 parameters num = regionYolo->get_num_regions(); anchors = regionYolo->get_anchors(); @@ -378,9 +450,16 @@ ModelYolo::Region::Region(const std::shared_ptr& regionY } } -ModelYolo::Region::Region(size_t classes, int coords, const std::vector& anchors, const std::vector& masks, size_t outputWidth, size_t outputHeight) : - classes(classes), coords(coords), - outputWidth(outputWidth), outputHeight(outputHeight) { +ModelYolo::Region::Region(size_t classes, + int coords, + const std::vector& anchors, + const std::vector& masks, + size_t outputWidth, + size_t outputHeight) + : classes(classes), + coords(coords), + outputWidth(outputWidth), + outputHeight(outputHeight) { num = masks.size(); if (anchors.size() == 0 || anchors.size() % 2 != 0) { @@ -394,8 +473,7 @@ ModelYolo::Region::Region(size_t classes, int coords, const std::vector& this->anchors[i * 2] = anchors[masks[i] * 2]; this->anchors[i * 2 + 1] = anchors[masks[i] * 2 + 1]; } - } - else { + } else { this->anchors = anchors; num = anchors.size() / 2; } diff --git a/demos/common/cpp/models/src/hpe_model_associative_embedding.cpp b/demos/common/cpp/models/src/hpe_model_associative_embedding.cpp index 4a42db8d3b2..91976157883 100644 --- a/demos/common/cpp/models/src/hpe_model_associative_embedding.cpp +++ b/demos/common/cpp/models/src/hpe_model_associative_embedding.cpp @@ -14,29 +14,47 @@ // limitations under the License. */ +#include "models/hpe_model_associative_embedding.h" + +#include +#include + #include +#include +#include #include +#include +#include #include -#include + #include -#include + +#include #include #include + #include "models/associative_embedding_decoder.h" -#include "models/hpe_model_associative_embedding.h" +#include "models/input_data.h" +#include "models/internal_model_data.h" +#include "models/results.h" const cv::Vec3f HpeAssociativeEmbedding::meanPixel = cv::Vec3f::all(128); const float HpeAssociativeEmbedding::detectionThreshold = 0.1f; const float HpeAssociativeEmbedding::tagThreshold = 1.0f; -HpeAssociativeEmbedding::HpeAssociativeEmbedding(const std::string& modelFileName, double aspectRatio, - int targetSize, float confidenceThreshold, const std::string& layout, float delta, RESIZE_MODE resizeMode) : - ImageModel(modelFileName, false, layout), - aspectRatio(aspectRatio), - targetSize(targetSize), - confidenceThreshold(confidenceThreshold), - delta(delta), - resizeMode(resizeMode) {} +HpeAssociativeEmbedding::HpeAssociativeEmbedding(const std::string& modelFileName, + double aspectRatio, + int targetSize, + float confidenceThreshold, + const std::string& layout, + float delta, + RESIZE_MODE resizeMode) + : ImageModel(modelFileName, false, layout), + aspectRatio(aspectRatio), + targetSize(targetSize), + confidenceThreshold(confidenceThreshold), + delta(delta), + resizeMode(resizeMode) {} void HpeAssociativeEmbedding::prepareInputsOutputs(std::shared_ptr& model) { // --------------------------- Configure input & output ------------------------------------------------- @@ -49,15 +67,13 @@ void HpeAssociativeEmbedding::prepareInputsOutputs(std::shared_ptr& m const ov::Shape& inputShape = model->input().get_shape(); const ov::Layout& inputLayout = getInputLayout(model->input()); - if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 - || inputShape[ov::layout::channels_idx(inputLayout)] != 3) { + if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 || + inputShape[ov::layout::channels_idx(inputLayout)] != 3) { throw std::logic_error("3-channel 4-dimensional model's input is expected"); } ov::preprocess::PrePostProcessor ppp(model); - ppp.input().tensor(). - set_element_type(ov::element::u8). - set_layout({ "NHWC" }); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"}); ppp.input().model().set_layout(inputLayout); @@ -69,8 +85,7 @@ void HpeAssociativeEmbedding::prepareInputsOutputs(std::shared_ptr& m for (const auto& output : model->outputs()) { const auto& outTensorName = output.get_any_name(); - ppp.output(outTensorName).tensor(). - set_element_type(ov::element::f32); + ppp.output(outTensorName).tensor().set_element_type(ov::element::f32); for (const auto& name : output.get_names()) { outputsNames.push_back(name); @@ -80,8 +95,7 @@ void HpeAssociativeEmbedding::prepareInputsOutputs(std::shared_ptr& m if (outputShape.size() != 4 && outputShape.size() != 5) { throw std::logic_error("output tensors are expected to be 4-dimensional or 5-dimensional"); } - if (outputShape[ov::layout::batch_idx("NC...")] != 1 - || outputShape[ov::layout::channels_idx("NC...")] != 17) { + if (outputShape[ov::layout::batch_idx("NC...")] != 1 || outputShape[ov::layout::channels_idx("NC...")] != 17) { throw std::logic_error("output tensors are expected to have 1 batch size and 17 channels"); } } @@ -91,10 +105,7 @@ void HpeAssociativeEmbedding::prepareInputsOutputs(std::shared_ptr& m heatmapsTensorName = findTensorByName("heatmaps", outputsNames); try { nmsHeatmapsTensorName = findTensorByName("nms_heatmaps", outputsNames); - } - catch (const std::runtime_error&) { - nmsHeatmapsTensorName = heatmapsTensorName; - } + } catch (const std::runtime_error&) { nmsHeatmapsTensorName = heatmapsTensorName; } changeInputSize(model); } @@ -107,7 +118,7 @@ void HpeAssociativeEmbedding::changeInputSize(std::shared_ptr& model) const auto widthId = ov::layout::width_idx(layout); if (!targetSize) { - targetSize = static_cast(std::min(inputShape[heightId], inputShape[widthId])); + targetSize = static_cast(std::min(inputShape[heightId], inputShape[widthId])); } int inputHeight = aspectRatio >= 1.0 ? targetSize : static_cast(std::round(targetSize / aspectRatio)); int inputWidth = aspectRatio >= 1.0 ? static_cast(std::round(targetSize * aspectRatio)) : targetSize; @@ -121,18 +132,20 @@ void HpeAssociativeEmbedding::changeInputSize(std::shared_ptr& model) model->reshape(inputShape); } -std::shared_ptr HpeAssociativeEmbedding::preprocess(const InputData& inputData, ov::InferRequest& request) { +std::shared_ptr HpeAssociativeEmbedding::preprocess(const InputData& inputData, + ov::InferRequest& request) { auto& image = inputData.asRef().inputImage; cv::Rect roi; auto paddedImage = resizeImageExt(image, inputLayerSize.width, inputLayerSize.height, resizeMode, true, &roi); - if (inputLayerSize.height - stride >= roi.height - || inputLayerSize.width - stride >= roi.width) { + if (inputLayerSize.height - stride >= roi.height || inputLayerSize.width - stride >= roi.width) { slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl; } request.set_input_tensor(wrapMat2Tensor(paddedImage)); - return std::make_shared(paddedImage.cols, paddedImage.rows, - image.size().width / static_cast(roi.width), image.size().height / static_cast(roi.height)); + return std::make_shared(paddedImage.cols, + paddedImage.rows, + image.size().width / static_cast(roi.width), + image.size().height / static_cast(roi.height)); } std::unique_ptr HpeAssociativeEmbedding::postprocess(InferenceResult& infResult) { @@ -189,17 +202,16 @@ std::unique_ptr HpeAssociativeEmbedding::postprocess(InferenceResult } std::string HpeAssociativeEmbedding::findTensorByName(const std::string& tensorName, - const std::vector& outputsNames) { + const std::vector& outputsNames) { std::vector suitableLayers; - for (auto& outputName: outputsNames) { + for (auto& outputName : outputsNames) { if (outputName.rfind(tensorName, 0) == 0) { - suitableLayers.push_back(outputName); + suitableLayers.push_back(outputName); } } if (suitableLayers.empty()) { throw std::runtime_error("Suitable tensor for " + tensorName + " output is not found"); - } - else if (suitableLayers.size() > 1) { + } else if (suitableLayers.size() > 1) { throw std::runtime_error("More than 1 tensor matched to " + tensorName + " output"); } return suitableLayers[0]; @@ -213,11 +225,9 @@ std::vector HpeAssociativeEmbedding::split(float* data, const ov::Shape return flattenData; } -std::vector HpeAssociativeEmbedding::extractPoses( - std::vector& heatMaps, - const std::vector& aembdsMaps, - const std::vector& nmsHeatMaps) const { - +std::vector HpeAssociativeEmbedding::extractPoses(std::vector& heatMaps, + const std::vector& aembdsMaps, + const std::vector& nmsHeatMaps) const { std::vector> allPeaks(numJoints); for (int i = 0; i < numJoints; i++) { findPeaks(nmsHeatMaps, aembdsMaps, allPeaks, i, maxNumPeople, detectionThreshold); diff --git a/demos/common/cpp/models/src/hpe_model_openpose.cpp b/demos/common/cpp/models/src/hpe_model_openpose.cpp index 237cd472ae4..a0fc5a2cac2 100644 --- a/demos/common/cpp/models/src/hpe_model_openpose.cpp +++ b/demos/common/cpp/models/src/hpe_model_openpose.cpp @@ -14,17 +14,28 @@ // limitations under the License. */ +#include "models/hpe_model_openpose.h" + +#include + #include +#include +#include #include +#include #include -#include + +#include #include -#include + +#include #include #include -#include -#include "models/hpe_model_openpose.h" + +#include "models/input_data.h" +#include "models/internal_model_data.h" #include "models/openpose_decoder.h" +#include "models/results.h" const cv::Vec3f HPEOpenPose::meanPixel = cv::Vec3f::all(128); const float HPEOpenPose::minPeaksDistance = 3.0f; @@ -32,13 +43,15 @@ const float HPEOpenPose::midPointsScoreThreshold = 0.05f; const float HPEOpenPose::foundMidPointsRatioThreshold = 0.8f; const float HPEOpenPose::minSubsetScore = 0.2f; -HPEOpenPose::HPEOpenPose(const std::string& modelFileName, double aspectRatio, int targetSize, - float confidenceThreshold, const std::string& layout) : - ImageModel(modelFileName, false, layout), - aspectRatio(aspectRatio), - targetSize(targetSize), - confidenceThreshold(confidenceThreshold) { -} +HPEOpenPose::HPEOpenPose(const std::string& modelFileName, + double aspectRatio, + int targetSize, + float confidenceThreshold, + const std::string& layout) + : ImageModel(modelFileName, false, layout), + aspectRatio(aspectRatio), + targetSize(targetSize), + confidenceThreshold(confidenceThreshold) {} void HPEOpenPose::prepareInputsOutputs(std::shared_ptr& model) { // --------------------------- Configure input & output ------------------------------------------------- @@ -50,14 +63,12 @@ void HPEOpenPose::prepareInputsOutputs(std::shared_ptr& model) { const ov::Shape& inputShape = model->input().get_shape(); const ov::Layout& inputLayout = getInputLayout(model->input()); - if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 - || inputShape[ov::layout::channels_idx(inputLayout)] != 3) + if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 || + inputShape[ov::layout::channels_idx(inputLayout)] != 3) throw std::logic_error("3-channel 4-dimensional model's input is expected"); ov::preprocess::PrePostProcessor ppp(model); - ppp.input().tensor(). - set_element_type(ov::element::u8). - set_layout({ "NHWC" }); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"}); ppp.input().model().set_layout(inputLayout); @@ -70,9 +81,7 @@ void HPEOpenPose::prepareInputsOutputs(std::shared_ptr& model) { const ov::Layout outputLayout("NCHW"); for (const auto& output : model->outputs()) { const auto& outTensorName = output.get_any_name(); - ppp.output(outTensorName).tensor(). - set_element_type(ov::element::f32) - .set_layout(outputLayout); + ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outputLayout); outputsNames.push_back(outTensorName); } model = ppp.build(); @@ -89,16 +98,18 @@ void HPEOpenPose::prepareInputsOutputs(std::shared_ptr& model) { std::swap(outputsNames[0], outputsNames[1]); } - if (heatmapsOutputShape.size() != 4 || heatmapsOutputShape[batchId] != 1 - || heatmapsOutputShape[ov::layout::channels_idx(outputLayout)] != keypointsNumber + 1) { - throw std::logic_error("1x" + std::to_string(keypointsNumber + 1) + "xHFMxWFM dimension of model's heatmap is expected"); + if (heatmapsOutputShape.size() != 4 || heatmapsOutputShape[batchId] != 1 || + heatmapsOutputShape[ov::layout::channels_idx(outputLayout)] != keypointsNumber + 1) { + throw std::logic_error("1x" + std::to_string(keypointsNumber + 1) + + "xHFMxWFM dimension of model's heatmap is expected"); } - if (pafsOutputShape.size() != 4 || pafsOutputShape[batchId] != 1 - || pafsOutputShape[channelsId] != 2 * (keypointsNumber + 1)) { - throw std::logic_error("1x" + std::to_string(2 * (keypointsNumber + 1)) + "xHFMxWFM dimension of model's output is expected"); + if (pafsOutputShape.size() != 4 || pafsOutputShape[batchId] != 1 || + pafsOutputShape[channelsId] != 2 * (keypointsNumber + 1)) { + throw std::logic_error("1x" + std::to_string(2 * (keypointsNumber + 1)) + + "xHFMxWFM dimension of model's output is expected"); } - if (pafsOutputShape[heightId] != heatmapsOutputShape[heightId] - || pafsOutputShape[widthId] != heatmapsOutputShape[widthId]) { + if (pafsOutputShape[heightId] != heatmapsOutputShape[heightId] || + pafsOutputShape[widthId] != heatmapsOutputShape[widthId]) { throw std::logic_error("output and heatmap are expected to have matching last two dimensions"); } @@ -128,7 +139,8 @@ void HPEOpenPose::changeInputSize(std::shared_ptr& model) { std::shared_ptr HPEOpenPose::preprocess(const InputData& inputData, ov::InferRequest& request) { auto& image = inputData.asRef().inputImage; cv::Rect roi; - auto paddedImage = resizeImageExt(image, inputLayerSize.width, inputLayerSize.height, RESIZE_KEEP_ASPECT, true, &roi); + auto paddedImage = + resizeImageExt(image, inputLayerSize.width, inputLayerSize.height, RESIZE_KEEP_ASPECT, true, &roi); if (inputLayerSize.width < roi.width) throw std::runtime_error("The image aspect ratio doesn't fit current model shape"); @@ -137,8 +149,10 @@ std::shared_ptr HPEOpenPose::preprocess(const InputData& inpu } request.set_input_tensor(wrapMat2Tensor(paddedImage)); - return std::make_shared(paddedImage.cols, paddedImage.rows, - image.cols / static_cast(roi.width), image.rows / static_cast(roi.height)); + return std::make_shared(paddedImage.cols, + paddedImage.rows, + image.cols / static_cast(roi.width), + image.rows / static_cast(roi.height)); } std::unique_ptr HPEOpenPose::postprocess(InferenceResult& infResult) { @@ -155,15 +169,15 @@ std::unique_ptr HPEOpenPose::postprocess(InferenceResult& infResult) std::vector heatMaps(keypointsNumber); for (size_t i = 0; i < heatMaps.size(); i++) { - heatMaps[i] = cv::Mat(heatMapShape[2], heatMapShape[3], CV_32FC1, - heats + i * heatMapShape[2] * heatMapShape[3]); + heatMaps[i] = + cv::Mat(heatMapShape[2], heatMapShape[3], CV_32FC1, heats + i * heatMapShape[2] * heatMapShape[3]); } resizeFeatureMaps(heatMaps); std::vector pafs(outputShape[1]); for (size_t i = 0; i < pafs.size(); i++) { - pafs[i] = cv::Mat(heatMapShape[2], heatMapShape[3], CV_32FC1, - predictions + i * heatMapShape[2] * heatMapShape[3]); + pafs[i] = + cv::Mat(heatMapShape[2], heatMapShape[3], CV_32FC1, predictions + i * heatMapShape[2] * heatMapShape[3]); } resizeFeatureMaps(pafs); @@ -189,15 +203,16 @@ std::unique_ptr HPEOpenPose::postprocess(InferenceResult& infResult) void HPEOpenPose::resizeFeatureMaps(std::vector& featureMaps) const { for (auto& featureMap : featureMaps) { - cv::resize(featureMap, featureMap, cv::Size(), - upsampleRatio, upsampleRatio, cv::INTER_CUBIC); + cv::resize(featureMap, featureMap, cv::Size(), upsampleRatio, upsampleRatio, cv::INTER_CUBIC); } } -class FindPeaksBody: public cv::ParallelLoopBody { +class FindPeaksBody : public cv::ParallelLoopBody { public: - FindPeaksBody(const std::vector& heatMaps, float minPeaksDistance, - std::vector >& peaksFromHeatMap, float confidenceThreshold) + FindPeaksBody(const std::vector& heatMaps, + float minPeaksDistance, + std::vector>& peaksFromHeatMap, + float confidenceThreshold) : heatMaps(heatMaps), minPeaksDistance(minPeaksDistance), peaksFromHeatMap(peaksFromHeatMap), @@ -212,17 +227,15 @@ class FindPeaksBody: public cv::ParallelLoopBody { private: const std::vector& heatMaps; float minPeaksDistance; - std::vector >& peaksFromHeatMap; + std::vector>& peaksFromHeatMap; float confidenceThreshold; }; -std::vector HPEOpenPose::extractPoses( - const std::vector& heatMaps, - const std::vector& pafs) const { +std::vector HPEOpenPose::extractPoses(const std::vector& heatMaps, + const std::vector& pafs) const { std::vector> peaksFromHeatMap(heatMaps.size()); FindPeaksBody findPeaksBody(heatMaps, minPeaksDistance, peaksFromHeatMap, confidenceThreshold); - cv::parallel_for_(cv::Range(0, static_cast(heatMaps.size())), - findPeaksBody); + cv::parallel_for_(cv::Range(0, static_cast(heatMaps.size())), findPeaksBody); int peaksBefore = 0; for (size_t heatmapId = 1; heatmapId < heatMaps.size(); heatmapId++) { peaksBefore += static_cast(peaksFromHeatMap[heatmapId - 1].size()); @@ -230,8 +243,12 @@ std::vector HPEOpenPose::extractPoses( peak.id += peaksBefore; } } - std::vector poses = groupPeaksToPoses( - peaksFromHeatMap, pafs, keypointsNumber, midPointsScoreThreshold, - foundMidPointsRatioThreshold, minJointsNumber, minSubsetScore); + std::vector poses = groupPeaksToPoses(peaksFromHeatMap, + pafs, + keypointsNumber, + midPointsScoreThreshold, + foundMidPointsRatioThreshold, + minJointsNumber, + minSubsetScore); return poses; } diff --git a/demos/common/cpp/models/src/image_model.cpp b/demos/common/cpp/models/src/image_model.cpp index 4fc489113da..da230e045d4 100644 --- a/demos/common/cpp/models/src/image_model.cpp +++ b/demos/common/cpp/models/src/image_model.cpp @@ -14,14 +14,23 @@ // limitations under the License. */ +#include "models/image_model.h" + +#include +#include + +#include #include + #include -#include "models/image_model.h" +#include -ImageModel::ImageModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout) : - ModelBase(modelFileName, layout), - useAutoResize(useAutoResize) { -} +#include "models/input_data.h" +#include "models/internal_model_data.h" + +ImageModel::ImageModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout) + : ModelBase(modelFileName, layout), + useAutoResize(useAutoResize) {} std::shared_ptr ImageModel::preprocess(const InputData& inputData, ov::InferRequest& request) { const auto& origImg = inputData.asRef().inputImage; diff --git a/demos/common/cpp/models/src/jpeg_restoration.cpp b/demos/common/cpp/models/src/jpeg_restoration.cpp index b44b5b65ef0..8eb3ae19ba6 100644 --- a/demos/common/cpp/models/src/jpeg_restoration.cpp +++ b/demos/common/cpp/models/src/jpeg_restoration.cpp @@ -14,22 +14,36 @@ // limitations under the License. */ +#include + +#include +#include +#include #include #include -#include + +#include +#include +#include #include + #include #include + +#include "models/image_model.h" +#include "models/input_data.h" +#include "models/internal_model_data.h" #include "models/jpeg_restoration_model.h" #include "models/results.h" -JPEGRestorationModel::JPEGRestorationModel(const std::string& modelFileName, const cv::Size& inputImgSize, - bool _jpegCompression, const std::string& layout) : - ImageModel(modelFileName, false, layout) { - netInputHeight = inputImgSize.height; - netInputWidth = inputImgSize.width; - jpegCompression = _jpegCompression; - +JPEGRestorationModel::JPEGRestorationModel(const std::string& modelFileName, + const cv::Size& inputImgSize, + bool _jpegCompression, + const std::string& layout) + : ImageModel(modelFileName, false, layout) { + netInputHeight = inputImgSize.height; + netInputWidth = inputImgSize.width; + jpegCompression = _jpegCompression; } void JPEGRestorationModel::prepareInputsOutputs(std::shared_ptr& model) { @@ -49,9 +63,7 @@ void JPEGRestorationModel::prepareInputsOutputs(std::shared_ptr& mode } ov::preprocess::PrePostProcessor ppp(model); - ppp.input().tensor(). - set_element_type(ov::element::u8). - set_layout("NHWC"); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC"); ppp.input().model().set_layout(inputLayout); @@ -61,9 +73,9 @@ void JPEGRestorationModel::prepareInputsOutputs(std::shared_ptr& mode throw std::logic_error("The JPEG Restoration model wrapper supports topologies with only 1 output"); } const ov::Shape& outputShape = model->output().get_shape(); - const ov::Layout outputLayout{ "NCHW" }; - if (outputShape.size() != 4 || outputShape[ov::layout::batch_idx(outputLayout)] != 1 - || outputShape[ov::layout::channels_idx(outputLayout)] != 3) { + const ov::Layout outputLayout{"NCHW"}; + if (outputShape.size() != 4 || outputShape[ov::layout::batch_idx(outputLayout)] != 1 || + outputShape[ov::layout::channels_idx(outputLayout)] != 3) { throw std::logic_error("3-channel 4-dimensional model's output is expected"); } @@ -96,7 +108,8 @@ void JPEGRestorationModel::changeInputSize(std::shared_ptr& model) { model->reshape(inputShape); } -std::shared_ptr JPEGRestorationModel::preprocess(const InputData& inputData, ov::InferRequest& request) { +std::shared_ptr JPEGRestorationModel::preprocess(const InputData& inputData, + ov::InferRequest& request) { cv::Mat image = inputData.asRef().inputImage; const size_t h = image.rows; const size_t w = image.cols; @@ -108,12 +121,10 @@ std::shared_ptr JPEGRestorationModel::preprocess(const InputD image = cv::imdecode(cv::Mat(encimg), 3); } - if (netInputHeight - stride < h && h <= netInputHeight - && netInputWidth - stride < w && w <= netInputWidth) { + if (netInputHeight - stride < h && h <= netInputHeight && netInputWidth - stride < w && w <= netInputWidth) { int bottom = netInputHeight - h; int right = netInputWidth - w; - cv::copyMakeBorder(image, resizedImage, 0, bottom, 0, right, - cv::BORDER_CONSTANT, 0); + cv::copyMakeBorder(image, resizedImage, 0, bottom, 0, right, cv::BORDER_CONSTANT, 0); } else { slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl; cv::resize(image, resizedImage, cv::Size(netInputWidth, netInputHeight)); @@ -132,18 +143,19 @@ std::unique_ptr JPEGRestorationModel::postprocess(InferenceResult& i std::vector imgPlanes; const ov::Shape& outputShape = infResult.getFirstOutputTensor().get_shape(); - const size_t outHeight = (int)(outputShape[2]); - const size_t outWidth = (int)(outputShape[3]); + const size_t outHeight = static_cast(outputShape[2]); + const size_t outWidth = static_cast(outputShape[3]); const size_t numOfPixels = outWidth * outHeight; - imgPlanes = std::vector{ - cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0])), - cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])), - cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2]))}; + imgPlanes = std::vector{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0])), + cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])), + cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2]))}; cv::Mat resultImg; cv::merge(imgPlanes, resultImg); - if (netInputHeight - stride < static_cast(inputImgSize.inputImgHeight) && static_cast(inputImgSize.inputImgHeight) <= netInputHeight - && netInputWidth - stride < static_cast(inputImgSize.inputImgWidth) && static_cast(inputImgSize.inputImgWidth) <= netInputWidth) { + if (netInputHeight - stride < static_cast(inputImgSize.inputImgHeight) && + static_cast(inputImgSize.inputImgHeight) <= netInputHeight && + netInputWidth - stride < static_cast(inputImgSize.inputImgWidth) && + static_cast(inputImgSize.inputImgWidth) <= netInputWidth) { result->resultImage = resultImg(cv::Rect(0, 0, inputImgSize.inputImgWidth, inputImgSize.inputImgHeight)); } else { cv::resize(resultImg, result->resultImage, cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight)); diff --git a/demos/common/cpp/models/src/model_base.cpp b/demos/common/cpp/models/src/model_base.cpp index 7a798468b55..c2ebd1bc882 100644 --- a/demos/common/cpp/models/src/model_base.cpp +++ b/demos/common/cpp/models/src/model_base.cpp @@ -14,16 +14,22 @@ // limitations under the License. */ +#include "models/model_base.h" + +#include + #include + #include +#include +#include #include -#include "models/model_base.h" std::shared_ptr ModelBase::prepareModel(ov::Core& core) { // --------------------------- Read IR Generated by ModelOptimizer (.xml and .bin files) ------------ /** Read model **/ slog::info << "Reading model " << modelFileName << slog::endl; - std::shared_ptr model = core.read_model(modelFileName); + std::shared_ptr model = core.read_model(modelFileName); logBasicModelInfo(model); // -------------------------- Reading all outputs names and customizing I/O tensors (in inherited classes) prepareInputsOutputs(model); @@ -49,12 +55,10 @@ ov::Layout ModelBase::getInputLayout(const ov::Output& input) { if (inputsLayouts.empty()) { layout = getLayoutFromShape(inputShape); slog::warn << "Automatically detected layout '" << layout.to_string() << "' for input '" - << input.get_any_name() << "' will be used." << slog::endl; - } - else if (inputsLayouts.size() == 1) { + << input.get_any_name() << "' will be used." << slog::endl; + } else if (inputsLayouts.size() == 1) { layout = inputsLayouts.begin()->second; - } - else { + } else { layout = inputsLayouts[input.get_any_name()]; } } diff --git a/demos/common/cpp/models/src/openpose_decoder.cpp b/demos/common/cpp/models/src/openpose_decoder.cpp index daa1f33cdb1..723495f73a0 100644 --- a/demos/common/cpp/models/src/openpose_decoder.cpp +++ b/demos/common/cpp/models/src/openpose_decoder.cpp @@ -14,26 +14,27 @@ // limitations under the License. */ +#include "models/openpose_decoder.h" + +#include + #include +#include #include #include + #include -#include "models/openpose_decoder.h" +#include "models/results.h" -Peak::Peak(const int id, const cv::Point2f& pos, const float score) - : id(id), - pos(pos), - score(score) {} +Peak::Peak(const int id, const cv::Point2f& pos, const float score) : id(id), pos(pos), score(score) {} HumanPoseByPeaksIndices::HumanPoseByPeaksIndices(const int keypointsNumber) : peaksIndices(std::vector(keypointsNumber, -1)), nJoints(0), score(0.0f) {} -TwoJointsConnection::TwoJointsConnection(const int firstJointIdx, - const int secondJointIdx, - const float score) +TwoJointsConnection::TwoJointsConnection(const int firstJointIdx, const int secondJointIdx, const float score) : firstJointIdx(firstJointIdx), secondJointIdx(secondJointIdx), score(score) {} @@ -41,7 +42,8 @@ TwoJointsConnection::TwoJointsConnection(const int firstJointIdx, void findPeaks(const std::vector& heatMaps, const float minPeaksDistance, std::vector>& allPeaks, - int heatMapId, float confidenceThreshold) { + int heatMapId, + float confidenceThreshold) { std::vector peaks; const cv::Mat& heatMap = heatMaps[heatMapId]; const float* heatMapData = heatMap.ptr(); @@ -49,50 +51,36 @@ void findPeaks(const std::vector& heatMaps, for (int y = -1; y < heatMap.rows + 1; y++) { for (int x = -1; x < heatMap.cols + 1; x++) { float val = 0; - if (x >= 0 - && y >= 0 - && x < heatMap.cols - && y < heatMap.rows) { + if (x >= 0 && y >= 0 && x < heatMap.cols && y < heatMap.rows) { val = heatMapData[y * heatMapStep + x]; val = val >= confidenceThreshold ? val : 0; } float left_val = 0; - if (y >= 0 - && x < (heatMap.cols - 1) - && y < heatMap.rows) { + if (y >= 0 && x < (heatMap.cols - 1) && y < heatMap.rows) { left_val = heatMapData[y * heatMapStep + x + 1]; left_val = left_val >= confidenceThreshold ? left_val : 0; } float right_val = 0; - if (x > 0 - && y >= 0 - && y < heatMap.rows) { + if (x > 0 && y >= 0 && y < heatMap.rows) { right_val = heatMapData[y * heatMapStep + x - 1]; right_val = right_val >= confidenceThreshold ? right_val : 0; } float top_val = 0; - if (x >= 0 - && x < heatMap.cols - && y < (heatMap.rows - 1)) { + if (x >= 0 && x < heatMap.cols && y < (heatMap.rows - 1)) { top_val = heatMapData[(y + 1) * heatMapStep + x]; top_val = top_val >= confidenceThreshold ? top_val : 0; } float bottom_val = 0; - if (x >= 0 - && y > 0 - && x < heatMap.cols) { + if (x >= 0 && y > 0 && x < heatMap.cols) { bottom_val = heatMapData[(y - 1) * heatMapStep + x]; bottom_val = bottom_val >= confidenceThreshold ? bottom_val : 0; } - if ((val > left_val) - && (val > right_val) - && (val > top_val) - && (val > bottom_val)) { + if ((val > left_val) && (val > right_val) && (val > top_val) && (val > bottom_val)) { peaks.push_back(cv::Point(x, y)); } } @@ -123,33 +111,62 @@ std::vector groupPeaksToPoses(const std::vector>& a const float foundMidPointsRatioThreshold, const int minJointsNumber, const float minSubsetScore) { - static const std::pair limbIdsHeatmap[] = { - {2, 3}, {2, 6}, {3, 4}, {4, 5}, {6, 7}, {7, 8}, {2, 9}, {9, 10}, {10, 11}, {2, 12}, {12, 13}, {13, 14}, - {2, 1}, {1, 15}, {15, 17}, {1, 16}, {16, 18}, {3, 17}, {6, 18} - }; - static const std::pair limbIdsPaf[] = { - {31, 32}, {39, 40}, {33, 34}, {35, 36}, {41, 42}, {43, 44}, {19, 20}, {21, 22}, {23, 24}, {25, 26}, - {27, 28}, {29, 30}, {47, 48}, {49, 50}, {53, 54}, {51, 52}, {55, 56}, {37, 38}, {45, 46} - }; + static const std::pair limbIdsHeatmap[] = {{2, 3}, + {2, 6}, + {3, 4}, + {4, 5}, + {6, 7}, + {7, 8}, + {2, 9}, + {9, 10}, + {10, 11}, + {2, 12}, + {12, 13}, + {13, 14}, + {2, 1}, + {1, 15}, + {15, 17}, + {1, 16}, + {16, 18}, + {3, 17}, + {6, 18}}; + static const std::pair limbIdsPaf[] = {{31, 32}, + {39, 40}, + {33, 34}, + {35, 36}, + {41, 42}, + {43, 44}, + {19, 20}, + {21, 22}, + {23, 24}, + {25, 26}, + {27, 28}, + {29, 30}, + {47, 48}, + {49, 50}, + {53, 54}, + {51, 52}, + {55, 56}, + {37, 38}, + {45, 46}}; std::vector candidates; for (const auto& peaks : allPeaks) { - candidates.insert(candidates.end(), peaks.begin(), peaks.end()); + candidates.insert(candidates.end(), peaks.begin(), peaks.end()); } std::vector subset(0, HumanPoseByPeaksIndices(keypointsNumber)); for (size_t k = 0; k < arraySize(limbIdsPaf); k++) { std::vector connections; const int mapIdxOffset = keypointsNumber + 1; - std::pair scoreMid = { pafs[limbIdsPaf[k].first - mapIdxOffset], - pafs[limbIdsPaf[k].second - mapIdxOffset] }; + std::pair scoreMid = {pafs[limbIdsPaf[k].first - mapIdxOffset], + pafs[limbIdsPaf[k].second - mapIdxOffset]}; const int idxJointA = limbIdsHeatmap[k].first - 1; const int idxJointB = limbIdsHeatmap[k].second - 1; const std::vector& candA = allPeaks[idxJointA]; const std::vector& candB = allPeaks[idxJointB]; const size_t nJointsA = candA.size(); const size_t nJointsB = candB.size(); - if (nJointsA == 0 - && nJointsB == 0) { + if (nJointsA == 0 && nJointsB == 0) { continue; } else if (nJointsA == 0) { for (size_t i = 0; i < nJointsB; i++) { @@ -201,7 +218,7 @@ std::vector groupPeaksToPoses(const std::vector>& a } vec /= norm_vec; float score = vec.x * scoreMid.first.at(mid) + vec.y * scoreMid.second.at(mid); - int height_n = pafs[0].rows / 2; + int height_n = pafs[0].rows / 2; float suc_ratio = 0.0f; float mid_score = 0.0f; const int mid_num = 10; @@ -209,13 +226,12 @@ std::vector groupPeaksToPoses(const std::vector>& a if (score > scoreThreshold) { float p_sum = 0; int p_count = 0; - cv::Size2f step((candB[j].pos.x - candA[i].pos.x)/(mid_num - 1), - (candB[j].pos.y - candA[i].pos.y)/(mid_num - 1)); + cv::Size2f step((candB[j].pos.x - candA[i].pos.x) / (mid_num - 1), + (candB[j].pos.y - candA[i].pos.y) / (mid_num - 1)); for (int n = 0; n < mid_num; n++) { cv::Point midPoint(cvRound(candA[i].pos.x + n * step.width), cvRound(candA[i].pos.y + n * step.height)); - cv::Point2f pred(scoreMid.first.at(midPoint), - scoreMid.second.at(midPoint)); + cv::Point2f pred(scoreMid.first.at(midPoint), scoreMid.second.at(midPoint)); score = vec.x * pred.x + vec.y * pred.y; if (score > midPointsScoreThreshold) { p_sum += score; @@ -226,18 +242,17 @@ std::vector groupPeaksToPoses(const std::vector>& a float ratio = p_count > 0 ? p_sum / p_count : 0.0f; mid_score = ratio + static_cast(std::min(height_n / norm_vec - 1, 0.0)); } - if (mid_score > 0 - && suc_ratio > foundMidPointsRatioThreshold) { + if (mid_score > 0 && suc_ratio > foundMidPointsRatioThreshold) { tempJointConnections.push_back(TwoJointsConnection(i, j, mid_score)); } } } if (!tempJointConnections.empty()) { - std::sort(tempJointConnections.begin(), tempJointConnections.end(), - [](const TwoJointsConnection& a, - const TwoJointsConnection& b) { - return (a.score > b.score); - }); + std::sort(tempJointConnections.begin(), + tempJointConnections.end(), + [](const TwoJointsConnection& a, const TwoJointsConnection& b) { + return (a.score > b.score); + }); } size_t num_limbs = std::min(nJointsA, nJointsB); size_t cnt = 0; @@ -250,8 +265,7 @@ std::vector groupPeaksToPoses(const std::vector>& a const int& indexA = tempJointConnections[row].firstJointIdx; const int& indexB = tempJointConnections[row].secondJointIdx; const float& score = tempJointConnections[row].score; - if (occurA[indexA] == 0 - && occurB[indexB] == 0) { + if (occurA[indexA] == 0 && occurB[indexB] == 0) { connections.push_back(TwoJointsConnection(candA[indexA].id, candB[indexB].id, score)); cnt++; occurA[indexA] = 1; @@ -264,8 +278,7 @@ std::vector groupPeaksToPoses(const std::vector>& a bool extraJointConnections = (k == 17 || k == 18); if (k == 0) { - subset = std::vector( - connections.size(), HumanPoseByPeaksIndices(keypointsNumber)); + subset = std::vector(connections.size(), HumanPoseByPeaksIndices(keypointsNumber)); for (size_t i = 0; i < connections.size(); i++) { const int& indexA = connections[i].firstJointIdx; const int& indexB = connections[i].secondJointIdx; @@ -279,11 +292,9 @@ std::vector groupPeaksToPoses(const std::vector>& a const int& indexA = connections[i].firstJointIdx; const int& indexB = connections[i].secondJointIdx; for (size_t j = 0; j < subset.size(); j++) { - if (subset[j].peaksIndices[idxJointA] == indexA - && subset[j].peaksIndices[idxJointB] == -1) { + if (subset[j].peaksIndices[idxJointA] == indexA && subset[j].peaksIndices[idxJointB] == -1) { subset[j].peaksIndices[idxJointB] = indexB; - } else if (subset[j].peaksIndices[idxJointB] == indexB - && subset[j].peaksIndices[idxJointA] == -1) { + } else if (subset[j].peaksIndices[idxJointB] == indexB && subset[j].peaksIndices[idxJointA] == -1) { subset[j].peaksIndices[idxJointA] = indexA; } } @@ -315,8 +326,7 @@ std::vector groupPeaksToPoses(const std::vector>& a } std::vector poses; for (const auto& subsetI : subset) { - if (subsetI.nJoints < minJointsNumber - || subsetI.score / subsetI.nJoints < minSubsetScore) { + if (subsetI.nJoints < minJointsNumber || subsetI.score / subsetI.nJoints < minSubsetScore) { continue; } int position = -1; diff --git a/demos/common/cpp/models/src/segmentation_model.cpp b/demos/common/cpp/models/src/segmentation_model.cpp index 2667a027a6e..82a153b2fe5 100644 --- a/demos/common/cpp/models/src/segmentation_model.cpp +++ b/demos/common/cpp/models/src/segmentation_model.cpp @@ -14,19 +14,27 @@ // limitations under the License. */ +#include "models/segmentation_model.h" + +#include +#include + #include +#include #include #include -#include + +#include +#include #include -#include -#include "models/segmentation_model.h" + +#include "models/internal_model_data.h" #include "models/results.h" -SegmentationModel::SegmentationModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout) : - ImageModel(modelFileName, useAutoResize, layout) {} +SegmentationModel::SegmentationModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout) + : ImageModel(modelFileName, useAutoResize, layout) {} -std::vector SegmentationModel::loadLabels(const std::string & labelFilename) { +std::vector SegmentationModel::loadLabels(const std::string& labelFilename) { std::vector labelsList; /* Read labels (if any) */ @@ -61,17 +69,15 @@ void SegmentationModel::prepareInputsOutputs(std::shared_ptr& model) } ov::preprocess::PrePostProcessor ppp(model); - ppp.input().tensor(). - set_element_type(ov::element::u8). - set_layout({ "NHWC" }); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"}); if (useAutoResize) { - ppp.input().tensor(). - set_spatial_dynamic_shape(); + ppp.input().tensor().set_spatial_dynamic_shape(); - ppp.input().preprocess(). - convert_element_type(ov::element::f32). - resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + ppp.input() + .preprocess() + .convert_element_type(ov::element::f32) + .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); } ppp.input().model().set_layout(inputLayout); @@ -87,20 +93,20 @@ void SegmentationModel::prepareInputsOutputs(std::shared_ptr& model) const ov::Shape& outputShape = output.get_shape(); ov::Layout outputLayout(""); switch (outputShape.size()) { - case 3: - outputLayout = "CHW"; - outChannels = 1; - outHeight = (int)(outputShape[ov::layout::height_idx(outputLayout)]); - outWidth = (int)(outputShape[ov::layout::width_idx(outputLayout)]); - break; - case 4: - outputLayout = "NCHW"; - outChannels = (int)(outputShape[ov::layout::channels_idx(outputLayout)]); - outHeight = (int)(outputShape[ov::layout::height_idx(outputLayout)]); - outWidth = (int)(outputShape[ov::layout::width_idx(outputLayout)]); - break; - default: - throw std::logic_error("Unexpected output tensor shape. Only 4D and 3D outputs are supported."); + case 3: + outputLayout = "CHW"; + outChannels = 1; + outHeight = static_cast(outputShape[ov::layout::height_idx(outputLayout)]); + outWidth = static_cast(outputShape[ov::layout::width_idx(outputLayout)]); + break; + case 4: + outputLayout = "NCHW"; + outChannels = static_cast(outputShape[ov::layout::channels_idx(outputLayout)]); + outHeight = static_cast(outputShape[ov::layout::height_idx(outputLayout)]); + outWidth = static_cast(outputShape[ov::layout::width_idx(outputLayout)]); + break; + default: + throw std::logic_error("Unexpected output tensor shape. Only 4D and 3D outputs are supported."); } } @@ -114,16 +120,14 @@ std::unique_ptr SegmentationModel::postprocess(InferenceResult& infR if (outChannels == 1 && outTensor.get_element_type() == ov::element::i32) { cv::Mat predictions(outHeight, outWidth, CV_32SC1, outTensor.data()); predictions.convertTo(result->resultImage, CV_8UC1); - } - else if (outChannels == 1 && outTensor.get_element_type() == ov::element::i64) { + } else if (outChannels == 1 && outTensor.get_element_type() == ov::element::i64) { cv::Mat predictions(outHeight, outWidth, CV_32SC1); const auto data = outTensor.data(); for (size_t i = 0; i < predictions.total(); ++i) { reinterpret_cast(predictions.data)[i] = int32_t(data[i]); } predictions.convertTo(result->resultImage, CV_8UC1); - } - else if (outTensor.get_element_type() == ov::element::f32) { + } else if (outTensor.get_element_type() == ov::element::f32) { const float* data = outTensor.data(); for (int rowId = 0; rowId < outHeight; ++rowId) { for (int colId = 0; colId < outWidth; ++colId) { @@ -135,16 +139,19 @@ std::unique_ptr SegmentationModel::postprocess(InferenceResult& infR classId = chId; maxProb = prob; } - } // nChannels + } // nChannels result->resultImage.at(rowId, colId) = classId; - } // width - } // height + } // width + } // height } - cv::resize(result->resultImage, result->resultImage, - cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight), - 0, 0, cv::INTER_NEAREST); + cv::resize(result->resultImage, + result->resultImage, + cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight), + 0, + 0, + cv::INTER_NEAREST); return std::unique_ptr(result); } diff --git a/demos/common/cpp/models/src/style_transfer_model.cpp b/demos/common/cpp/models/src/style_transfer_model.cpp index 1ea8599a310..ee2f29b42e1 100644 --- a/demos/common/cpp/models/src/style_transfer_model.cpp +++ b/demos/common/cpp/models/src/style_transfer_model.cpp @@ -14,20 +14,28 @@ // limitations under the License. */ +#include "models/style_transfer_model.h" + +#include + +#include +#include #include #include -#include -#include + +#include +#include #include + #include #include -#include -#include "models/style_transfer_model.h" + +#include "models/input_data.h" +#include "models/internal_model_data.h" #include "models/results.h" -StyleTransferModel::StyleTransferModel(const std::string& modelFileName, const std::string& layout) : - ImageModel(modelFileName, false, layout) { -} +StyleTransferModel::StyleTransferModel(const std::string& modelFileName, const std::string& layout) + : ImageModel(modelFileName, false, layout) {} void StyleTransferModel::prepareInputsOutputs(std::shared_ptr& model) { // --------------------------- Configure input & output --------------------------------------------- @@ -41,8 +49,8 @@ void StyleTransferModel::prepareInputsOutputs(std::shared_ptr& model) const ov::Shape& inputShape = model->input().get_shape(); ov::Layout inputLayout = getInputLayout(model->input()); - if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 - || inputShape[ov::layout::channels_idx(inputLayout)] != 3) { + if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 || + inputShape[ov::layout::channels_idx(inputLayout)] != 3) { throw std::logic_error("3-channel 4-dimensional model's input is expected"); } @@ -51,13 +59,10 @@ void StyleTransferModel::prepareInputsOutputs(std::shared_ptr& model) ov::preprocess::PrePostProcessor ppp(model); ppp.input().preprocess().convert_element_type(ov::element::f32); - ppp.input().tensor(). - set_element_type(ov::element::u8). - set_layout("NHWC"); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC"); ppp.input().model().set_layout(inputLayout); - // --------------------------- Prepare output ----------------------------------------------------- const ov::OutputVector& outputs = model->outputs(); if (outputs.size() != 1) { @@ -66,9 +71,9 @@ void StyleTransferModel::prepareInputsOutputs(std::shared_ptr& model) outputsNames.push_back(model->output().get_any_name()); const ov::Shape& outputShape = model->output().get_shape(); - ov::Layout outputLayout{ "NCHW" }; - if (outputShape.size() != 4 || outputShape[ov::layout::batch_idx(outputLayout)] != 1 - || outputShape[ov::layout::channels_idx(outputLayout)] != 3) { + ov::Layout outputLayout{"NCHW"}; + if (outputShape.size() != 4 || outputShape[ov::layout::batch_idx(outputLayout)] != 1 || + outputShape[ov::layout::channels_idx(outputLayout)] != 3) { throw std::logic_error("3-channel 4-dimensional model's output is expected"); } @@ -76,7 +81,8 @@ void StyleTransferModel::prepareInputsOutputs(std::shared_ptr& model) model = ppp.build(); } -std::shared_ptr StyleTransferModel::preprocess(const InputData& inputData, ov::InferRequest& request) { +std::shared_ptr StyleTransferModel::preprocess(const InputData& inputData, + ov::InferRequest& request) { auto imgData = inputData.asRef(); auto& img = imgData.inputImage; @@ -87,7 +93,6 @@ std::shared_ptr StyleTransferModel::preprocess(const InputDat } std::unique_ptr StyleTransferModel::postprocess(InferenceResult& infResult) { - ImageResult* result = new ImageResult; *static_cast(result) = static_cast(infResult); @@ -95,15 +100,14 @@ std::unique_ptr StyleTransferModel::postprocess(InferenceResult& inf const auto outputData = infResult.getFirstOutputTensor().data(); const ov::Shape& outputShape = infResult.getFirstOutputTensor().get_shape(); - size_t outHeight = (int)(outputShape[2]); - size_t outWidth = (int)(outputShape[3]); + size_t outHeight = static_cast(outputShape[2]); + size_t outWidth = static_cast(outputShape[3]); size_t numOfPixels = outWidth * outHeight; std::vector imgPlanes; - imgPlanes = std::vector{ - cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2])), - cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])), - cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0]))}; + imgPlanes = std::vector{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2])), + cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])), + cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0]))}; cv::Mat resultImg; cv::merge(imgPlanes, resultImg); cv::resize(resultImg, result->resultImage, cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight)); diff --git a/demos/common/cpp/models/src/super_resolution_model.cpp b/demos/common/cpp/models/src/super_resolution_model.cpp index c649d46c189..164991aa033 100644 --- a/demos/common/cpp/models/src/super_resolution_model.cpp +++ b/demos/common/cpp/models/src/super_resolution_model.cpp @@ -14,20 +14,33 @@ // limitations under the License. */ +#include "models/super_resolution_model.h" + +#include + +#include +#include #include +#include #include -#include + +#include #include + #include #include #include -#include "models/super_resolution_model.h" + +#include "models/input_data.h" +#include "models/internal_model_data.h" #include "models/results.h" -SuperResolutionModel::SuperResolutionModel(const std::string& modelFileName, const cv::Size& inputImgSize, const std::string& layout) : - ImageModel(modelFileName, false, layout) { - netInputHeight = inputImgSize.height; - netInputWidth = inputImgSize.width; +SuperResolutionModel::SuperResolutionModel(const std::string& modelFileName, + const cv::Size& inputImgSize, + const std::string& layout) + : ImageModel(modelFileName, false, layout) { + netInputHeight = inputImgSize.height; + netInputWidth = inputImgSize.width; } void SuperResolutionModel::prepareInputsOutputs(std::shared_ptr& model) { @@ -69,15 +82,13 @@ void SuperResolutionModel::prepareInputsOutputs(std::shared_ptr& mode inputsNames[0].swap(inputsNames[1]); } else if (!(lrShape[widthId] <= bicShape[widthId] && lrShape[heightId] <= bicShape[heightId])) { throw std::logic_error("Each spatial dimension of one input must surpass or be equal to a spatial" - "dimension of another input"); + "dimension of another input"); } } ov::preprocess::PrePostProcessor ppp(model); for (const auto& input : inputs) { - ppp.input(input.get_any_name()).tensor(). - set_element_type(ov::element::u8). - set_layout("NHWC"); + ppp.input(input.get_any_name()).tensor().set_element_type(ov::element::u8).set_layout("NHWC"); ppp.input(input.get_any_name()).model().set_layout(inputLayout); } @@ -132,14 +143,15 @@ void SuperResolutionModel::changeInputSize(std::shared_ptr& model, in model->reshape(shapes); } -std::shared_ptr SuperResolutionModel::preprocess(const InputData& inputData, ov::InferRequest& request) { +std::shared_ptr SuperResolutionModel::preprocess(const InputData& inputData, + ov::InferRequest& request) { auto imgData = inputData.asRef(); auto& img = imgData.inputImage; const ov::Tensor lrInputTensor = request.get_tensor(inputsNames[0]); const ov::Layout layout("NHWC"); - if (img.channels() != (int)lrInputTensor.get_shape()[ov::layout::channels_idx(layout)]) { + if (img.channels() != static_cast(lrInputTensor.get_shape()[ov::layout::channels_idx(layout)])) { cv::cvtColor(img, img, cv::COLOR_BGR2GRAY); } @@ -153,8 +165,8 @@ std::shared_ptr SuperResolutionModel::preprocess(const InputD if (inputsNames.size() == 2) { const ov::Tensor bicInputTensor = request.get_tensor(inputsNames[1]); - const int h = (int)bicInputTensor.get_shape()[ov::layout::height_idx(layout)]; - const int w = (int)bicInputTensor.get_shape()[ov::layout::width_idx(layout)]; + const int h = static_cast(bicInputTensor.get_shape()[ov::layout::height_idx(layout)]); + const int w = static_cast(bicInputTensor.get_shape()[ov::layout::width_idx(layout)]); cv::Mat resized; cv::resize(img, resized, cv::Size(w, h), 0, 0, cv::INTER_CUBIC); request.set_tensor(inputsNames[1], wrapMat2Tensor(resized)); @@ -170,15 +182,14 @@ std::unique_ptr SuperResolutionModel::postprocess(InferenceResult& i std::vector imgPlanes; const ov::Shape& outShape = infResult.getFirstOutputTensor().get_shape(); - const size_t outChannels = (int)(outShape[1]); - const size_t outHeight = (int)(outShape[2]); - const size_t outWidth = (int)(outShape[3]); + const size_t outChannels = static_cast(outShape[1]); + const size_t outHeight = static_cast(outShape[2]); + const size_t outWidth = static_cast(outShape[3]); const size_t numOfPixels = outWidth * outHeight; if (outChannels == 3) { - imgPlanes = std::vector{ - cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0])), - cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])), - cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2]))}; + imgPlanes = std::vector{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0])), + cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])), + cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2]))}; } else { imgPlanes = std::vector{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0]))}; // Post-processing for text-image-super-resolution models diff --git a/demos/common/cpp/pipelines/include/pipelines/async_pipeline.h b/demos/common/cpp/pipelines/include/pipelines/async_pipeline.h index f333acdd7b6..6661c00601e 100644 --- a/demos/common/cpp/pipelines/include/pipelines/async_pipeline.h +++ b/demos/common/cpp/pipelines/include/pipelines/async_pipeline.h @@ -15,23 +15,33 @@ */ #pragma once +#include + #include -#include -#include -#include +#include +#include +#include +#include + #include + #include -#include -#include #include + #include "pipelines/requests_pool.h" +class ModelBase; +struct InputData; +struct MetaData; +struct ModelConfig; + /// This is base class for asynchronous pipeline /// Derived classes should add functions for data submission and output processing class AsyncPipeline { public: /// Loads model and performs required initialization - /// @param modelInstance pointer to model object. Object it points to should not be destroyed manually after passing pointer to this function. + /// @param modelInstance pointer to model object. Object it points to should not be destroyed manually after passing + /// pointer to this function. /// @param config - fine tuning configuration for model /// @param core - reference to ov::Core instance to use. /// If it is omitted, new instance of ov::Core will be created inside. @@ -40,39 +50,55 @@ class AsyncPipeline { /// Waits until either output data becomes available or pipeline allows to submit more input data. /// @param shouldKeepOrder if true, function will treat results as ready only if next sequential result (frame) is - /// ready (so results can be extracted in the same order as they were submitted). Otherwise, function will return if any result is ready. + /// ready (so results can be extracted in the same order as they were submitted). Otherwise, function will return if + /// any result is ready. void waitForData(bool shouldKeepOrder = true); /// @returns true if there's available infer requests in the pool /// and next frame can be submitted for processing, false otherwise. - bool isReadyToProcess() { return requestsPool->isIdleRequestAvailable(); } + bool isReadyToProcess() { + return requestsPool->isIdleRequestAvailable(); + } /// Waits for all currently submitted requests to be completed. /// - void waitForTotalCompletion() { if (requestsPool) requestsPool->waitForTotalCompletion(); } + void waitForTotalCompletion() { + if (requestsPool) + requestsPool->waitForTotalCompletion(); + } /// Submits data to the model for inference /// @param inputData - input data to be submitted /// @param metaData - shared pointer to metadata container. /// Might be null. This pointer will be passed through pipeline and put to the final result structure. /// @returns -1 if image cannot be scheduled for processing (there's no free InferRequest available). - /// Otherwise returns unique sequential frame ID for this particular request. Same frame ID will be written in the result structure. + /// Otherwise returns unique sequential frame ID for this particular request. Same frame ID will be written in the + /// result structure. virtual int64_t submitData(const InputData& inputData, const std::shared_ptr& metaData); /// Gets available data from the queue /// @param shouldKeepOrder if true, function will treat results as ready only if next sequential result (frame) is - /// ready (so results can be extracted in the same order as they were submitted). Otherwise, function will return if any result is ready. + /// ready (so results can be extracted in the same order as they were submitted). Otherwise, function will return if + /// any result is ready. virtual std::unique_ptr getResult(bool shouldKeepOrder = true); - PerformanceMetrics getInferenceMetircs(){ return inferenceMetrics;} - PerformanceMetrics getPreprocessMetrics(){ return preprocessMetrics;} - PerformanceMetrics getPostprocessMetrics() { return postprocessMetrics;} + PerformanceMetrics getInferenceMetircs() { + return inferenceMetrics; + } + PerformanceMetrics getPreprocessMetrics() { + return preprocessMetrics; + } + PerformanceMetrics getPostprocessMetrics() { + return postprocessMetrics; + } protected: /// Returns processed result, if available /// @param shouldKeepOrder if true, function will return processed data sequentially, - /// keeping original frames order (as they were submitted). Otherwise, function will return processed data in random order. - /// @returns InferenceResult with processed information or empty InferenceResult (with negative frameID) if there's no any results yet. + /// keeping original frames order (as they were submitted). Otherwise, function will return processed data in random + /// order. + /// @returns InferenceResult with processed information or empty InferenceResult (with negative frameID) if there's + /// no any results yet. virtual InferenceResult getInferenceResult(bool shouldKeepOrder); std::unique_ptr requestsPool; diff --git a/demos/common/cpp/pipelines/include/pipelines/metadata.h b/demos/common/cpp/pipelines/include/pipelines/metadata.h index ae22964ebde..aca18ee36aa 100644 --- a/demos/common/cpp/pipelines/include/pipelines/metadata.h +++ b/demos/common/cpp/pipelines/include/pipelines/metadata.h @@ -20,11 +20,13 @@ struct MetaData { virtual ~MetaData() {} - template T& asRef() { + template + T& asRef() { return dynamic_cast(*this); } - template const T& asRef() const { + template + const T& asRef() const { return dynamic_cast(*this); } }; @@ -33,20 +35,17 @@ struct ImageMetaData : public MetaData { cv::Mat img; std::chrono::steady_clock::time_point timeStamp; - ImageMetaData() { - } + ImageMetaData() {} - ImageMetaData(cv::Mat img, std::chrono::steady_clock::time_point timeStamp) : - img(img), - timeStamp(timeStamp) { - } + ImageMetaData(cv::Mat img, std::chrono::steady_clock::time_point timeStamp) : img(img), timeStamp(timeStamp) {} }; struct ClassificationImageMetaData : public ImageMetaData { unsigned int groundTruthId; - ClassificationImageMetaData(cv::Mat img, std::chrono::steady_clock::time_point timeStamp, unsigned int groundTruthId) : - ImageMetaData(img, timeStamp), - groundTruthId(groundTruthId) { - } + ClassificationImageMetaData(cv::Mat img, + std::chrono::steady_clock::time_point timeStamp, + unsigned int groundTruthId) + : ImageMetaData(img, timeStamp), + groundTruthId(groundTruthId) {} }; diff --git a/demos/common/cpp/pipelines/include/pipelines/requests_pool.h b/demos/common/cpp/pipelines/include/pipelines/requests_pool.h index 0250417a1cc..d9b220e198c 100644 --- a/demos/common/cpp/pipelines/include/pipelines/requests_pool.h +++ b/demos/common/cpp/pipelines/include/pipelines/requests_pool.h @@ -16,11 +16,13 @@ #pragma once -#include -#include -#include -#include +#include + +#include +#include +#include +#include /// This is class storing requests pool for asynchronous pipeline /// @@ -29,8 +31,9 @@ class RequestsPool { RequestsPool(ov::CompiledModel& compiledModel, unsigned int size); ~RequestsPool(); - /// Returns idle request from the pool. Returned request is automatically marked as In Use (this status will be reset after request processing completion) - /// This function is thread safe as long as request is used only until setRequestIdle call + /// Returns idle request from the pool. Returned request is automatically marked as In Use (this status will be + /// reset after request processing completion) This function is thread safe as long as request is used only until + /// setRequestIdle call /// @returns pointer to request with idle state or nullptr if all requests are in use. ov::InferRequest getIdleRequest(); @@ -48,7 +51,8 @@ class RequestsPool { bool isIdleRequestAvailable(); /// Waits for completion of every non-idle requests in pool. - /// getIdleRequest should not be called together with this function or after it to avoid race condition or invalid state + /// getIdleRequest should not be called together with this function or after it to avoid race condition or invalid + /// state /// @returns number of requests in use void waitForTotalCompletion(); diff --git a/demos/common/cpp/pipelines/src/async_pipeline.cpp b/demos/common/cpp/pipelines/src/async_pipeline.cpp index 8c1f4c47fe5..7460cf44192 100644 --- a/demos/common/cpp/pipelines/src/async_pipeline.cpp +++ b/demos/common/cpp/pipelines/src/async_pipeline.cpp @@ -14,15 +14,30 @@ // limitations under the License. */ +#include "pipelines/async_pipeline.h" + +#include +#include +#include #include +#include #include +#include +#include + #include -#include + +#include +#include +#include +#include #include -#include "pipelines/async_pipeline.h" -AsyncPipeline::AsyncPipeline(std::unique_ptr&& modelInstance, const ModelConfig& config, ov::Core& core) : - model(std::move(modelInstance)) { +struct InputData; +struct MetaData; + +AsyncPipeline::AsyncPipeline(std::unique_ptr&& modelInstance, const ModelConfig& config, ov::Core& core) + : model(std::move(modelInstance)) { compiledModel = model->compileModel(config, core); // --------------------------- Create infer requests ------------------------------------------------ unsigned int nireq = config.maxAsyncRequests; @@ -31,8 +46,10 @@ AsyncPipeline::AsyncPipeline(std::unique_ptr&& modelInstance, const M // +1 to use it as a buffer of the pipeline nireq = compiledModel.get_property(ov::optimal_number_of_infer_requests) + 1; } catch (const ov::Exception& ex) { - throw std::runtime_error(std::string("Every device used with the demo should support compiled model's property " - "'OPTIMAL_NUMBER_OF_INFER_REQUESTS'. Failed to query the property with error: ") + ex.what()); + throw std::runtime_error( + std::string("Every device used with the demo should support compiled model's property " + "'OPTIMAL_NUMBER_OF_INFER_REQUESTS'. Failed to query the property with error: ") + + ex.what()); } } slog::info << "\tNumber of inference requests: " << nireq << slog::endl; @@ -48,16 +65,11 @@ AsyncPipeline::~AsyncPipeline() { void AsyncPipeline::waitForData(bool shouldKeepOrder) { std::unique_lock lock(mtx); - condVar.wait( - lock, - [&]() - { - return callbackException != nullptr || - requestsPool->isIdleRequestAvailable() || - (shouldKeepOrder ? - completedInferenceResults.find(outputFrameId) != completedInferenceResults.end() : - !completedInferenceResults.empty()); - }); + condVar.wait(lock, [&]() { + return callbackException != nullptr || requestsPool->isIdleRequestAvailable() || + (shouldKeepOrder ? completedInferenceResults.find(outputFrameId) != completedInferenceResults.end() + : !completedInferenceResults.empty()); + }); if (callbackException) { std::rethrow_exception(callbackException); @@ -98,15 +110,14 @@ int64_t AsyncPipeline::submitData(const InputData& inputData, const std::shared_ completedInferenceResults.emplace(frameID, result); requestsPool->setRequestIdle(request); - } - catch (...) { + } catch (...) { if (!callbackException) { callbackException = std::current_exception(); } } } condVar.notify_one(); - }); + }); inputFrameId++; if (inputFrameId < 0) @@ -135,9 +146,8 @@ InferenceResult AsyncPipeline::getInferenceResult(bool shouldKeepOrder) { { const std::lock_guard lock(mtx); - const auto& it = shouldKeepOrder ? - completedInferenceResults.find(outputFrameId) : - completedInferenceResults.begin(); + const auto& it = + shouldKeepOrder ? completedInferenceResults.find(outputFrameId) : completedInferenceResults.begin(); if (it != completedInferenceResults.end()) { retVal = std::move(it->second); @@ -145,7 +155,7 @@ InferenceResult AsyncPipeline::getInferenceResult(bool shouldKeepOrder) { } } - if(!retVal.IsEmpty()) { + if (!retVal.IsEmpty()) { outputFrameId = retVal.frameId; outputFrameId++; if (outputFrameId < 0) { diff --git a/demos/common/cpp/pipelines/src/requests_pool.cpp b/demos/common/cpp/pipelines/src/requests_pool.cpp index 318c2a07327..93230c95995 100644 --- a/demos/common/cpp/pipelines/src/requests_pool.cpp +++ b/demos/common/cpp/pipelines/src/requests_pool.cpp @@ -14,12 +14,15 @@ // limitations under the License. */ +#include "pipelines/requests_pool.h" + +#include +#include #include + #include -#include "pipelines/requests_pool.h" -RequestsPool::RequestsPool(ov::CompiledModel& compiledModel, unsigned int size) : - numRequestsInUse(0) { +RequestsPool::RequestsPool(ov::CompiledModel& compiledModel, unsigned int size) : numRequestsInUse(0) { for (unsigned int infReqId = 0; infReqId < size; ++infReqId) { requests.emplace_back(compiledModel.create_infer_request(), false); } @@ -35,12 +38,12 @@ RequestsPool::~RequestsPool() { ov::InferRequest RequestsPool::getIdleRequest() { std::lock_guard lock(mtx); - const auto& it = std::find_if(requests.begin(), requests.end(), - [](const std::pair& x) {return !x.second; }); + const auto& it = std::find_if(requests.begin(), requests.end(), [](const std::pair& x) { + return !x.second; + }); if (it == requests.end()) { return ov::InferRequest(); - } - else { + } else { it->second = true; numRequestsInUse++; return it->first; @@ -49,8 +52,11 @@ ov::InferRequest RequestsPool::getIdleRequest() { void RequestsPool::setRequestIdle(const ov::InferRequest& request) { std::lock_guard lock(mtx); - const auto& it = std::find_if(this->requests.begin(), this->requests.end(), - [&request](const std::pair& x) {return x.first == request; }); + const auto& it = std::find_if(this->requests.begin(), + this->requests.end(), + [&request](const std::pair& x) { + return x.first == request; + }); it->second = false; numRequestsInUse--; } diff --git a/demos/common/cpp/utils/include/utils/config_factory.h b/demos/common/cpp/utils/include/utils/config_factory.h index 4fcdb9f426a..15fe4f4aa7b 100644 --- a/demos/common/cpp/utils/include/utils/config_factory.h +++ b/demos/common/cpp/utils/include/utils/config_factory.h @@ -15,12 +15,13 @@ */ #pragma once -#include -#include -#include +#include // for uint32_t -#include -#include "gflags/gflags.h" +#include // for map +#include // for set +#include // for string + +#include // for AnyMap struct ModelConfig { std::string deviceName; @@ -31,14 +32,17 @@ struct ModelConfig { std::set getDevices(); std::map getLegacyConfig(); + protected: std::set devices; }; class ConfigFactory { public: - static ModelConfig getUserConfig(const std::string& flags_d, uint32_t flags_nireq, - const std::string& flags_nstreams, uint32_t flags_nthreads); + static ModelConfig getUserConfig(const std::string& flags_d, + uint32_t flags_nireq, + const std::string& flags_nstreams, + uint32_t flags_nthreads); static ModelConfig getMinLatencyConfig(const std::string& flags_d, uint32_t flags_nireq); protected: diff --git a/demos/common/cpp/utils/src/config_factory.cpp b/demos/common/cpp/utils/src/config_factory.cpp index fa92d467974..7d0d9808e94 100644 --- a/demos/common/cpp/utils/src/config_factory.cpp +++ b/demos/common/cpp/utils/src/config_factory.cpp @@ -14,14 +14,17 @@ // limitations under the License. */ - -#include -#include -#include -#include "utils/args_helper.hpp" -#include "utils/common.hpp" #include "utils/config_factory.h" +#include // for set, set<>::iterator +#include // for string, operator==, basic_string +#include // for pair +#include // for vector + +#include // for intel_gpu properties + +#include "utils/args_helper.hpp" // for parseDevices, parseValuePerDevice + std::set ModelConfig::getDevices() { if (devices.empty()) { for (const std::string& device : parseDevices(deviceName)) { @@ -33,7 +36,9 @@ std::set ModelConfig::getDevices() { } ModelConfig ConfigFactory::getUserConfig(const std::string& flags_d, - uint32_t flags_nireq, const std::string& flags_nstreams, uint32_t flags_nthreads) { + uint32_t flags_nireq, + const std::string& flags_nstreams, + uint32_t flags_nthreads) { auto config = getCommonConfig(flags_d, flags_nireq); std::map deviceNstreams = parseValuePerDevice(config.getDevices(), flags_nstreams); @@ -45,16 +50,19 @@ ModelConfig ConfigFactory::getUserConfig(const std::string& flags_d, config.compiledModelConfig.emplace(ov::affinity.name(), ov::Affinity::NONE); - ov::streams::Num nstreams = deviceNstreams.count(device) > 0 ? ov::streams::Num(deviceNstreams[device]) : ov::streams::AUTO; + ov::streams::Num nstreams = + deviceNstreams.count(device) > 0 ? ov::streams::Num(deviceNstreams[device]) : ov::streams::AUTO; config.compiledModelConfig.emplace(ov::streams::num.name(), nstreams); } else if (device == "GPU") { - ov::streams::Num nstreams = deviceNstreams.count(device) > 0 ? ov::streams::Num(deviceNstreams[device]) : ov::streams::AUTO; + ov::streams::Num nstreams = + deviceNstreams.count(device) > 0 ? ov::streams::Num(deviceNstreams[device]) : ov::streams::AUTO; config.compiledModelConfig.emplace(ov::streams::num.name(), nstreams); - if (flags_d.find("MULTI") != std::string::npos && config.getDevices().find("CPU") != config.getDevices().end()) { + if (flags_d.find("MULTI") != std::string::npos && + config.getDevices().find("CPU") != config.getDevices().end()) { // multi-device execution with the CPU + GPU performs best with GPU throttling hint, // which releases another CPU thread (that is otherwise used by the GPU driver for active polling) config.compiledModelConfig.emplace(ov::intel_gpu::hint::queue_throttle.name(), - ov::intel_gpu::hint::ThrottleLevel(1)); + ov::intel_gpu::hint::ThrottleLevel(1)); } } } @@ -66,8 +74,7 @@ ModelConfig ConfigFactory::getMinLatencyConfig(const std::string& flags_d, uint3 for (const auto& device : config.getDevices()) { if (device == "CPU") { // CPU supports a few special performance-oriented keys config.compiledModelConfig.emplace(ov::streams::num.name(), 1); - } - else if (device == "GPU") { + } else if (device == "GPU") { config.compiledModelConfig.emplace(ov::streams::num.name(), 1); } } diff --git a/demos/human_pose_estimation_demo/cpp/main.cpp b/demos/human_pose_estimation_demo/cpp/main.cpp index ff54f15bf33..5abc7c45d95 100644 --- a/demos/human_pose_estimation_demo/cpp/main.cpp +++ b/demos/human_pose_estimation_demo/cpp/main.cpp @@ -14,49 +14,69 @@ // limitations under the License. */ +#include +#include + +#include +#include +#include #include +#include +#include +#include #include +#include #include #include -#include +#include +#include +#include #include -#include #include #include +#include +#include +#include +#include #include #include -#include +#include +#include #include +#include #include #include #include #include - DEFINE_INPUT_FLAGS DEFINE_OUTPUT_FLAGS static const char help_message[] = "Print a usage message."; -static const char at_message[] = "Required. Type of the model, either 'ae' for Associative Embedding, 'higherhrnet' for HigherHRNet models based on ae " -"or 'openpose' for OpenPose."; +static const char at_message[] = "Required. Type of the model, either 'ae' for Associative Embedding, 'higherhrnet' " + "for HigherHRNet models based on ae " + "or 'openpose' for OpenPose."; static const char model_message[] = "Required. Path to an .xml file with a trained model."; static const char layout_message[] = "Optional. Specify inputs layouts." -" Ex. NCHW or input0:NCHW,input1:NC in case of more than one input."; + " Ex. NCHW or input0:NCHW,input1:NC in case of more than one input."; static const char target_size_message[] = "Optional. Target input size."; -static const char target_device_message[] = "Optional. Specify the target device to infer on (the list of available devices is shown below). " -"Default value is CPU. Use \"-d HETERO:\" format to specify HETERO plugin. " -"The demo will look for a suitable plugin for a specified device."; +static const char target_device_message[] = + "Optional. Specify the target device to infer on (the list of available devices is shown below). " + "Default value is CPU. Use \"-d HETERO:\" format to specify HETERO plugin. " + "The demo will look for a suitable plugin for a specified device."; static const char thresh_output_message[] = "Optional. Probability threshold for poses filtering."; -static const char nireq_message[] = "Optional. Number of infer requests. If this option is omitted, number of infer requests is determined automatically."; +static const char nireq_message[] = "Optional. Number of infer requests. If this option is omitted, number of infer " + "requests is determined automatically."; static const char num_threads_message[] = "Optional. Number of threads."; static const char num_streams_message[] = "Optional. Number of streams to use for inference on the CPU or/and GPU in " -"throughput mode (for HETERO and MULTI device cases use format " -":,: or just )"; + "throughput mode (for HETERO and MULTI device cases use format " + ":,: or just )"; static const char no_show_message[] = "Optional. Don't show output."; static const char utilization_monitors_message[] = "Optional. List of monitors to show initially."; -static const char output_resolution_message[] = "Optional. Specify the maximum output window resolution " +static const char output_resolution_message[] = + "Optional. Specify the maximum output window resolution " "in (width x height) format. Example: 1280x720. Input frame size used by default."; DEFINE_bool(h, false, help_message); @@ -74,8 +94,8 @@ DEFINE_string(u, "", utilization_monitors_message); DEFINE_string(output_resolution, "", output_resolution_message); /** -* \brief This function shows a help message -*/ + * \brief This function shows a help message + */ static void showUsage() { std::cout << std::endl; std::cout << "human_pose_estimation_demo [OPTION]" << std::endl; @@ -100,7 +120,7 @@ static void showUsage() { std::cout << " -u " << utilization_monitors_message << std::endl; } -bool ParseAndCheckCommandLine(int argc, char *argv[]) { +bool ParseAndCheckCommandLine(int argc, char* argv[]) { // ---------------------------Parsing and validation of input args-------------------------------------- gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); if (FLAGS_h) { @@ -127,7 +147,6 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) { return true; } - cv::Mat renderHumanPose(HumanPoseResult& result, OutputTransform& outputTransform) { if (!result.metaData) { throw std::invalid_argument("Renderer: metadata is null"); @@ -139,24 +158,60 @@ cv::Mat renderHumanPose(HumanPoseResult& result, OutputTransform& outputTransfor throw std::invalid_argument("Renderer: image provided in metadata is empty"); } outputTransform.resize(outputImg); - static const cv::Scalar colors[HPEOpenPose::keypointsNumber] = { - cv::Scalar(255, 0, 0), cv::Scalar(255, 85, 0), cv::Scalar(255, 170, 0), - cv::Scalar(255, 255, 0), cv::Scalar(170, 255, 0), cv::Scalar(85, 255, 0), - cv::Scalar(0, 255, 0), cv::Scalar(0, 255, 85), cv::Scalar(0, 255, 170), - cv::Scalar(0, 255, 255), cv::Scalar(0, 170, 255), cv::Scalar(0, 85, 255), - cv::Scalar(0, 0, 255), cv::Scalar(85, 0, 255), cv::Scalar(170, 0, 255), - cv::Scalar(255, 0, 255), cv::Scalar(255, 0, 170), cv::Scalar(255, 0, 85) - }; - static const std::pair keypointsOP[] = { - {1, 2}, {1, 5}, {2, 3}, {3, 4}, {5, 6}, {6, 7}, - {1, 8}, {8, 9}, {9, 10}, {1, 11}, {11, 12}, {12, 13}, - {1, 0}, {0, 14},{14, 16}, {0, 15}, {15, 17} - }; - static const std::pair keypointsAE[] = { - {15, 13}, {13, 11}, {16, 14}, {14, 12}, {11, 12}, {5, 11}, - {6, 12}, {5, 6}, {5, 7}, {6, 8}, {7, 9}, {8, 10}, - {1, 2}, {0, 1}, {0, 2}, {1, 3}, {2, 4}, {3, 5}, {4, 6} - }; + static const cv::Scalar colors[HPEOpenPose::keypointsNumber] = {cv::Scalar(255, 0, 0), + cv::Scalar(255, 85, 0), + cv::Scalar(255, 170, 0), + cv::Scalar(255, 255, 0), + cv::Scalar(170, 255, 0), + cv::Scalar(85, 255, 0), + cv::Scalar(0, 255, 0), + cv::Scalar(0, 255, 85), + cv::Scalar(0, 255, 170), + cv::Scalar(0, 255, 255), + cv::Scalar(0, 170, 255), + cv::Scalar(0, 85, 255), + cv::Scalar(0, 0, 255), + cv::Scalar(85, 0, 255), + cv::Scalar(170, 0, 255), + cv::Scalar(255, 0, 255), + cv::Scalar(255, 0, 170), + cv::Scalar(255, 0, 85)}; + static const std::pair keypointsOP[] = {{1, 2}, + {1, 5}, + {2, 3}, + {3, 4}, + {5, 6}, + {6, 7}, + {1, 8}, + {8, 9}, + {9, 10}, + {1, 11}, + {11, 12}, + {12, 13}, + {1, 0}, + {0, 14}, + {14, 16}, + {0, 15}, + {15, 17}}; + static const std::pair keypointsAE[] = {{15, 13}, + {13, 11}, + {16, 14}, + {14, 12}, + {11, 12}, + {5, 11}, + {6, 12}, + {5, 6}, + {5, 7}, + {6, 8}, + {7, 9}, + {8, 10}, + {1, 2}, + {0, 1}, + {0, 2}, + {1, 3}, + {2, 4}, + {3, 5}, + {4, 6}}; const int stickWidth = 4; const cv::Point2f absentKeypoint(-1.0f, -1.0f); for (auto& pose : result.poses) { @@ -171,8 +226,7 @@ cv::Mat renderHumanPose(HumanPoseResult& result, OutputTransform& outputTransfor if (!result.poses.empty()) { if (result.poses[0].keypoints.size() == HPEOpenPose::keypointsNumber) { limbKeypointsIds.insert(limbKeypointsIds.begin(), std::begin(keypointsOP), std::end(keypointsOP)); - } - else { + } else { limbKeypointsIds.insert(limbKeypointsIds.begin(), std::begin(keypointsAE), std::end(keypointsAE)); } } @@ -180,9 +234,8 @@ cv::Mat renderHumanPose(HumanPoseResult& result, OutputTransform& outputTransfor for (auto pose : result.poses) { for (const auto& limbKeypointsId : limbKeypointsIds) { std::pair limbKeypoints(pose.keypoints[limbKeypointsId.first], - pose.keypoints[limbKeypointsId.second]); - if (limbKeypoints.first == absentKeypoint - || limbKeypoints.second == absentKeypoint) { + pose.keypoints[limbKeypointsId.second]); + if (limbKeypoints.first == absentKeypoint || limbKeypoints.second == absentKeypoint) { continue; } @@ -192,8 +245,7 @@ cv::Mat renderHumanPose(HumanPoseResult& result, OutputTransform& outputTransfor double length = std::sqrt(difference.x * difference.x + difference.y * difference.y); int angle = static_cast(std::atan2(difference.y, difference.x) * 180 / CV_PI); std::vector polygon; - cv::ellipse2Poly(cv::Point2d(meanX, meanY), cv::Size2d(length / 2, stickWidth), - angle, 0, 360, 1, polygon); + cv::ellipse2Poly(cv::Point2d(meanX, meanY), cv::Size2d(length / 2, stickWidth), angle, 0, 360, 1, polygon); cv::fillConvexPoly(pane, polygon, colors[limbKeypointsId.second]); } } @@ -201,7 +253,7 @@ cv::Mat renderHumanPose(HumanPoseResult& result, OutputTransform& outputTransfor return outputImg; } -int main(int argc, char *argv[]) { +int main(int argc, char* argv[]) { try { PerformanceMetrics metrics, renderMetrics; @@ -221,29 +273,36 @@ int main(int argc, char *argv[]) { cv::Size outputResolution = curr_frame.size(); size_t found = FLAGS_output_resolution.find("x"); if (found != std::string::npos) { - outputResolution = cv::Size{ - std::stoi(FLAGS_output_resolution.substr(0, found)), - std::stoi(FLAGS_output_resolution.substr(found + 1, FLAGS_output_resolution.length())) - }; + outputResolution = + cv::Size{std::stoi(FLAGS_output_resolution.substr(0, found)), + std::stoi(FLAGS_output_resolution.substr(found + 1, FLAGS_output_resolution.length()))}; outputTransform = OutputTransform(curr_frame.size(), outputResolution); outputResolution = outputTransform.computeResolution(); } - //------------------------------ Running Human Pose Estimation routines ---------------------------------------------- + //------------------------------ Running Human Pose Estimation routines + //---------------------------------------------- double aspectRatio = curr_frame.cols / static_cast(curr_frame.rows); std::unique_ptr model; if (FLAGS_at == "openpose") { - model.reset(new HPEOpenPose(FLAGS_m, aspectRatio, FLAGS_tsize, (float)FLAGS_t, FLAGS_layout)); - } - else if (FLAGS_at == "ae") { - model.reset(new HpeAssociativeEmbedding(FLAGS_m, aspectRatio, FLAGS_tsize, (float)FLAGS_t, FLAGS_layout)); - } - else if (FLAGS_at == "higherhrnet") { + model.reset(new HPEOpenPose(FLAGS_m, aspectRatio, FLAGS_tsize, static_cast(FLAGS_t), FLAGS_layout)); + } else if (FLAGS_at == "ae") { + model.reset(new HpeAssociativeEmbedding(FLAGS_m, + aspectRatio, + FLAGS_tsize, + static_cast(FLAGS_t), + FLAGS_layout)); + } else if (FLAGS_at == "higherhrnet") { float delta = 0.5f; - model.reset(new HpeAssociativeEmbedding(FLAGS_m, aspectRatio, FLAGS_tsize, (float)FLAGS_t, FLAGS_layout, delta, RESIZE_KEEP_ASPECT_LETTERBOX)); - } - else { + model.reset(new HpeAssociativeEmbedding(FLAGS_m, + aspectRatio, + FLAGS_tsize, + static_cast(FLAGS_t), + FLAGS_layout, + delta, + RESIZE_KEEP_ASPECT_LETTERBOX)); + } else { slog::err << "No model type or invalid model type (-at) provided: " + FLAGS_at << slog::endl; return -1; } @@ -252,12 +311,12 @@ int main(int argc, char *argv[]) { ov::Core core; AsyncPipeline pipeline(std::move(model), - ConfigFactory::getUserConfig(FLAGS_d, FLAGS_nireq, FLAGS_nstreams, FLAGS_nthreads), - core); + ConfigFactory::getUserConfig(FLAGS_d, FLAGS_nireq, FLAGS_nstreams, FLAGS_nthreads), + core); Presenter presenter(FLAGS_u); - int64_t frameNum = pipeline.submitData(ImageInputData(curr_frame), - std::make_shared(curr_frame, startTime)); + int64_t frameNum = + pipeline.submitData(ImageInputData(curr_frame), std::make_shared(curr_frame, startTime)); uint32_t framesProcessed = 0; bool keepRunning = true; @@ -273,10 +332,11 @@ int main(int argc, char *argv[]) { break; } frameNum = pipeline.submitData(ImageInputData(curr_frame), - std::make_shared(curr_frame, startTime)); - } + std::make_shared(curr_frame, startTime)); + } - //--- Waiting for free input slot or output data available. Function will return immediately if any of them are available. + //--- Waiting for free input slot or output data available. Function will return immediately if any of them + // are available. pipeline.waitForData(); //--- Checking for results and rendering data if it's ready @@ -289,7 +349,10 @@ int main(int argc, char *argv[]) { presenter.drawGraphs(outFrame); renderMetrics.update(renderingStart); metrics.update(result->metaData->asRef().timeStamp, - outFrame, { 10, 22 }, cv::FONT_HERSHEY_COMPLEX, 0.65); + outFrame, + {10, 22}, + cv::FONT_HERSHEY_COMPLEX, + 0.65); videoWriter.write(outFrame); framesProcessed++; if (!FLAGS_no_show) { @@ -298,8 +361,7 @@ int main(int argc, char *argv[]) { int key = cv::waitKey(1); if (27 == key || 'q' == key || 'Q' == key) { // Esc keepRunning = false; - } - else { + } else { presenter.handleKey(key); } } @@ -316,7 +378,10 @@ int main(int argc, char *argv[]) { presenter.drawGraphs(outFrame); renderMetrics.update(renderingStart); metrics.update(result->metaData->asRef().timeStamp, - outFrame, { 10, 22 }, cv::FONT_HERSHEY_COMPLEX, 0.65); + outFrame, + {10, 22}, + cv::FONT_HERSHEY_COMPLEX, + 0.65); videoWriter.write(outFrame); if (!FLAGS_no_show) { cv::imshow("Human Pose Estimation Results", outFrame); @@ -327,17 +392,17 @@ int main(int argc, char *argv[]) { slog::info << "Metrics report:" << slog::endl; metrics.logTotal(); - logLatencyPerStage(cap->getMetrics().getTotal().latency, pipeline.getPreprocessMetrics().getTotal().latency, - pipeline.getInferenceMetircs().getTotal().latency, pipeline.getPostprocessMetrics().getTotal().latency, - renderMetrics.getTotal().latency); + logLatencyPerStage(cap->getMetrics().getTotal().latency, + pipeline.getPreprocessMetrics().getTotal().latency, + pipeline.getInferenceMetircs().getTotal().latency, + pipeline.getPostprocessMetrics().getTotal().latency, + renderMetrics.getTotal().latency); slog::info << presenter.reportMeans() << slog::endl; - } - catch (const std::exception& error) { + } catch (const std::exception& error) { slog::err << error.what() << slog::endl; return 1; - } - catch (...) { + } catch (...) { slog::err << "Unknown/internal exception happened." << slog::endl; return 1; } diff --git a/demos/image_processing_demo/cpp/main.cpp b/demos/image_processing_demo/cpp/main.cpp index 91535d54eba..bcfe3c8be6f 100644 --- a/demos/image_processing_demo/cpp/main.cpp +++ b/demos/image_processing_demo/cpp/main.cpp @@ -14,26 +14,40 @@ // limitations under the License. */ +#include +#include + +#include +#include #include -#include +#include +#include #include -#include -#include +#include +#include #include +#include +#include +#include #include -#include #include +#include +#include #include +#include +#include #include #include +#include #include #include -#include +#include +#include #include -#include #include +#include #include #include @@ -43,24 +57,28 @@ DEFINE_INPUT_FLAGS DEFINE_OUTPUT_FLAGS static const char help_message[] = "Print a usage message."; -static const char at_message[] = "Required. Type of the model, either 'sr' for Super Resolution task, 'deblur' for Deblurring, 'jr' for JPEGRestoration, 'style' for Style Transfer task."; +static const char at_message[] = "Required. Type of the model, either 'sr' for Super Resolution task, 'deblur' for " + "Deblurring, 'jr' for JPEGRestoration, 'style' for Style Transfer task."; static const char model_message[] = "Required. Path to an .xml file with a trained model."; static const char layout_message[] = "Optional. Specify inputs layouts." -" Ex. NCHW or input0:NCHW,input1:NC in case of more than one input."; -static const char target_device_message[] = "Optional. Specify the target device to infer on (the list of available devices is shown below). " -"Default value is CPU. Use \"-d HETERO:\" format to specify HETERO plugin. " -"The demo will look for a suitable plugin for a specified device."; -static const char nireq_message[] = "Optional. Number of infer requests. If this option is omitted, number of infer requests is determined automatically."; + " Ex. NCHW or input0:NCHW,input1:NC in case of more than one input."; +static const char target_device_message[] = + "Optional. Specify the target device to infer on (the list of available devices is shown below). " + "Default value is CPU. Use \"-d HETERO:\" format to specify HETERO plugin. " + "The demo will look for a suitable plugin for a specified device."; +static const char nireq_message[] = "Optional. Number of infer requests. If this option is omitted, number of infer " + "requests is determined automatically."; static const char num_threads_message[] = "Optional. Number of threads."; static const char num_streams_message[] = "Optional. Number of streams to use for inference on the CPU or/and GPU in " -"throughput mode (for HETERO and MULTI device cases use format " -":,: or just )"; + "throughput mode (for HETERO and MULTI device cases use format " + ":,: or just )"; static const char no_show_processed_video[] = "Optional. Do not show processed video."; static const char utilization_monitors_message[] = "Optional. List of monitors to show initially."; -static const char output_resolution_message[] = "Optional. Specify the maximum output window resolution " +static const char output_resolution_message[] = + "Optional. Specify the maximum output window resolution " "in (width x height) format. Example: 1280x720. Input frame size used by default."; static const char jc_message[] = "Optional. Flag of using compression for jpeg images. " - "Default value if false. Only for jr architecture type."; + "Default value if false. Only for jr architecture type."; DEFINE_bool(h, false, help_message); DEFINE_string(at, "", at_message); @@ -75,10 +93,9 @@ DEFINE_string(u, "", utilization_monitors_message); DEFINE_string(output_resolution, "", output_resolution_message); DEFINE_bool(jc, false, jc_message); - /** -* \brief This function shows a help message -*/ + * \brief This function shows a help message + */ static void showUsage() { std::cout << std::endl; std::cout << "image_processing_demo [OPTION]" << std::endl; @@ -102,7 +119,7 @@ static void showUsage() { std::cout << " -jc " << jc_message << std::endl; } -bool ParseAndCheckCommandLine(int argc, char *argv[]) { +bool ParseAndCheckCommandLine(int argc, char* argv[]) { // ---------------------------Parsing and validation of input args-------------------------------------- gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); if (FLAGS_h) { @@ -128,7 +145,7 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) { return true; } -std::unique_ptr getModel(const cv::Size& frameSize, const std::string& type, bool doCompression=false) { +std::unique_ptr getModel(const cv::Size& frameSize, const std::string& type, bool doCompression = false) { if (type == "sr") { return std::unique_ptr(new SuperResolutionModel(FLAGS_m, frameSize, FLAGS_layout)); } @@ -144,7 +161,7 @@ std::unique_ptr getModel(const cv::Size& frameSize, const std::strin throw std::invalid_argument("No model type or invalid model type (-at) provided: " + FLAGS_at); } -int main(int argc, char *argv[]) { +int main(int argc, char* argv[]) { try { PerformanceMetrics metrics, renderMetrics; @@ -166,12 +183,12 @@ int main(int argc, char *argv[]) { std::unique_ptr model = getModel(cv::Size(curr_frame.cols, curr_frame.rows), FLAGS_at, FLAGS_jc); AsyncPipeline pipeline(std::move(model), - ConfigFactory::getUserConfig(FLAGS_d, FLAGS_nireq, FLAGS_nstreams, FLAGS_nthreads), - core); + ConfigFactory::getUserConfig(FLAGS_d, FLAGS_nireq, FLAGS_nstreams, FLAGS_nthreads), + core); Presenter presenter(FLAGS_u); - int64_t frameNum = pipeline.submitData(ImageInputData(curr_frame), - std::make_shared(curr_frame, startTime)); + int64_t frameNum = + pipeline.submitData(ImageInputData(curr_frame), std::make_shared(curr_frame, startTime)); bool keepRunning = true; std::unique_ptr result; @@ -194,12 +211,10 @@ int main(int argc, char *argv[]) { cv::Size viewSize = view.getSize(); if (outputResolution.height > viewSize.height || outputResolution.width > viewSize.width) outputResolution = viewSize; - } - else { - outputResolution = cv::Size{ - std::stoi(FLAGS_output_resolution.substr(0, found)), - std::stoi(FLAGS_output_resolution.substr(found + 1, FLAGS_output_resolution.length())) - }; + } else { + outputResolution = + cv::Size{std::stoi(FLAGS_output_resolution.substr(0, found)), + std::stoi(FLAGS_output_resolution.substr(found + 1, FLAGS_output_resolution.length()))}; } outputTransform = OutputTransform(result->asRef().resultImage.size(), outputResolution); outputResolution = outputTransform.computeResolution(); @@ -227,10 +242,11 @@ int main(int argc, char *argv[]) { } frameNum = pipeline.submitData(ImageInputData(curr_frame), - std::make_shared(curr_frame, startTime)); + std::make_shared(curr_frame, startTime)); } - //--- Waiting for free input slot or output data available. Function will return immediately if any of them are available. + //--- Waiting for free input slot or output data available. Function will return immediately if any of them + // are available. pipeline.waitForData(); //--- Checking for results and rendering data if it's ready @@ -244,15 +260,14 @@ int main(int argc, char *argv[]) { cv::Size viewSize = view.getSize(); if (outputResolution.height > viewSize.height || outputResolution.width > viewSize.width) outputResolution = viewSize; - } - else { + } else { outputResolution = cv::Size{ std::stoi(FLAGS_output_resolution.substr(0, found)), - std::stoi(FLAGS_output_resolution.substr(found + 1, FLAGS_output_resolution.length())) - }; + std::stoi(FLAGS_output_resolution.substr(found + 1, FLAGS_output_resolution.length()))}; } - outputTransform = OutputTransform(result->asRef().resultImage.size(), outputResolution); + outputTransform = + OutputTransform(result->asRef().resultImage.size(), outputResolution); outputResolution = outputTransform.computeResolution(); } @@ -261,14 +276,17 @@ int main(int argc, char *argv[]) { presenter.drawGraphs(outFrame); renderMetrics.update(renderingStart); metrics.update(result->metaData->asRef().timeStamp, - outFrame, { 10, 22 }, cv::FONT_HERSHEY_COMPLEX, 0.65); + outFrame, + {10, 22}, + cv::FONT_HERSHEY_COMPLEX, + 0.65); videoWriter.write(outFrame); if (!FLAGS_no_show) { view.show(outFrame); //--- Processing keyboard events auto key = cv::waitKey(1); - if (27 == key || 'q' == key || 'Q' == key) { // Esc + if (27 == key || 'q' == key || 'Q' == key) { // Esc keepRunning = false; } else { view.handleKey(key); @@ -277,7 +295,7 @@ int main(int argc, char *argv[]) { } framesProcessed++; } - } // while(keepRunning) + } // while(keepRunning) // ------------ Waiting for completion of data processing and rendering the rest of results --------- pipeline.waitForTotalCompletion(); @@ -291,14 +309,13 @@ int main(int argc, char *argv[]) { cv::Size viewSize = view.getSize(); if (outputResolution.height > viewSize.height || outputResolution.width > viewSize.width) outputResolution = viewSize; - } - else { + } else { outputResolution = cv::Size{ std::stoi(FLAGS_output_resolution.substr(0, found)), - std::stoi(FLAGS_output_resolution.substr(found + 1, FLAGS_output_resolution.length())) - }; + std::stoi(FLAGS_output_resolution.substr(found + 1, FLAGS_output_resolution.length()))}; } - outputTransform = OutputTransform(result->asRef().resultImage.size(), outputResolution); + outputTransform = + OutputTransform(result->asRef().resultImage.size(), outputResolution); outputResolution = outputTransform.computeResolution(); } @@ -306,7 +323,10 @@ int main(int argc, char *argv[]) { //--- Showing results and device information presenter.drawGraphs(outFrame); metrics.update(result->metaData->asRef().timeStamp, - outFrame, { 10, 22 }, cv::FONT_HERSHEY_COMPLEX, 0.65); + outFrame, + {10, 22}, + cv::FONT_HERSHEY_COMPLEX, + 0.65); videoWriter.write(outFrame); if (!FLAGS_no_show) { view.show(outFrame); @@ -319,17 +339,16 @@ int main(int argc, char *argv[]) { slog::info << "Metrics report:" << slog::endl; metrics.logTotal(); - logLatencyPerStage(cap->getMetrics().getTotal().latency, pipeline.getPreprocessMetrics().getTotal().latency, - pipeline.getInferenceMetircs().getTotal().latency, pipeline.getPostprocessMetrics().getTotal().latency, - renderMetrics.getTotal().latency); + logLatencyPerStage(cap->getMetrics().getTotal().latency, + pipeline.getPreprocessMetrics().getTotal().latency, + pipeline.getInferenceMetircs().getTotal().latency, + pipeline.getPostprocessMetrics().getTotal().latency, + renderMetrics.getTotal().latency); slog::info << presenter.reportMeans() << slog::endl; - - } - catch (const std::exception& error) { + } catch (const std::exception& error) { slog::err << error.what() << slog::endl; return 1; - } - catch (...) { + } catch (...) { slog::err << "Unknown/internal exception happened." << slog::endl; return 1; } diff --git a/demos/image_processing_demo/cpp/visualizer.cpp b/demos/image_processing_demo/cpp/visualizer.cpp index 621bf3c473a..b0b61cdd615 100644 --- a/demos/image_processing_demo/cpp/visualizer.cpp +++ b/demos/image_processing_demo/cpp/visualizer.cpp @@ -14,11 +14,21 @@ // limitations under the License. */ -#include -#include -#include #include "visualizer.hpp" +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include Visualizer::Visualizer(const std::string& type) { if (type == "sr") @@ -74,11 +84,11 @@ cv::Mat Visualizer::renderResultData(ImageResult result, cv::Size& newResolution cv::resize(result.resultImage, result.resultImage, resolution); cv::resize(inputImg, inputImg, resolution); - if (inputImg.channels() != result.resultImage.channels()){ + if (inputImg.channels() != result.resultImage.channels()) { cv::cvtColor(result.resultImage, resultImg, cv::COLOR_GRAY2BGR); - } - else + } else { resultImg = result.resultImage; + } changeDisplayImg(); return displayImg; } @@ -95,8 +105,13 @@ void Visualizer::show(cv::Mat img) { int baseline = 0; int lineH = cv::getTextSize(helpMessage[0], cv::FONT_HERSHEY_COMPLEX_SMALL, 0.75, 1, &baseline).height + pad; for (size_t i = 0; i < 4; ++i) { - putHighlightedText(img, helpMessage[i], cv::Point(pad, margin + baseline + (i + 1) * lineH), cv::FONT_HERSHEY_COMPLEX, - 0.65, cv::Scalar(255, 0, 0), 2); + putHighlightedText(img, + helpMessage[i], + cv::Point(pad, margin + baseline + (i + 1) * lineH), + cv::FONT_HERSHEY_COMPLEX, + 0.65, + cv::Scalar(255, 0, 0), + 2); } } @@ -127,11 +142,21 @@ void Visualizer::changeDisplayImg() { void Visualizer::markImage(cv::Mat& image, const std::pair& marks, float alpha) { int pad = 25; std::pair positions(static_cast(image.cols) * alpha / 2.0f, - static_cast(image.cols) * (1 + alpha) / 2.0f); - putHighlightedText(image, marks.first, cv::Point(static_cast(positions.first) - pad, 25), - cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 0, 255), 2); - putHighlightedText(image, marks.second, cv::Point(static_cast(positions.second), 25), - cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 0, 255), 2); + static_cast(image.cols) * (1 + alpha) / 2.0f); + putHighlightedText(image, + marks.first, + cv::Point(static_cast(positions.first) - pad, 25), + cv::FONT_HERSHEY_COMPLEX, + 1, + cv::Scalar(0, 0, 255), + 2); + putHighlightedText(image, + marks.second, + cv::Point(static_cast(positions.second), 25), + cv::FONT_HERSHEY_COMPLEX, + 1, + cv::Scalar(0, 0, 255), + 2); } void Visualizer::drawSweepLine(cv::Mat& image) { diff --git a/demos/image_processing_demo/cpp/visualizer.hpp b/demos/image_processing_demo/cpp/visualizer.hpp index b19b251352b..6cb7bdb0c3b 100644 --- a/demos/image_processing_demo/cpp/visualizer.hpp +++ b/demos/image_processing_demo/cpp/visualizer.hpp @@ -14,12 +14,12 @@ // limitations under the License. */ -#include -#include #include -#include -#include -#include +#include + +#include + +struct ImageResult; class Visualizer { private: @@ -51,15 +51,16 @@ class Visualizer { void markImage(cv::Mat& image, const std::pair& marks, float alpha); void drawSweepLine(cv::Mat& image); void changeDisplayImg(); + public: - Visualizer(const std::string& type=""); + Visualizer(const std::string& type = ""); cv::Size getSize(); // change display image for new input and result images cv::Mat renderResultData(ImageResult result, cv::Size& newResolution); // show display image or specified value - void show(cv::Mat img=cv::Mat()); + void show(cv::Mat img = cv::Mat()); void handleKey(int key); }; diff --git a/demos/object_detection_demo/cpp/main.cpp b/demos/object_detection_demo/cpp/main.cpp index 191bf0ee58c..42997a2352e 100644 --- a/demos/object_detection_demo/cpp/main.cpp +++ b/demos/object_detection_demo/cpp/main.cpp @@ -14,68 +14,94 @@ // limitations under the License. */ +#include +#include + +#include +#include +#include +#include +#include #include -#include +#include +#include +#include #include +#include #include -#include +#include +#include #include #include -#include +#include +#include +#include #include -#include +#include #include #include #include #include #include #include +#include +#include +#include +#include #include #include #include +#include +#include #include #include #include #include #include - DEFINE_INPUT_FLAGS DEFINE_OUTPUT_FLAGS static const char help_message[] = "Print a usage message."; -static const char at_message[] = "Required. Architecture type: centernet, faceboxes, retinaface, retinaface-pytorch, ssd or yolo"; +static const char at_message[] = + "Required. Architecture type: centernet, faceboxes, retinaface, retinaface-pytorch, ssd or yolo"; static const char model_message[] = "Required. Path to an .xml file with a trained model."; -static const char target_device_message[] = "Optional. Specify the target device to infer on (the list of available devices is shown below). " -"Default value is CPU. Use \"-d HETERO:\" format to specify HETERO plugin. " -"The demo will look for a suitable plugin for a specified device."; +static const char target_device_message[] = + "Optional. Specify the target device to infer on (the list of available devices is shown below). " + "Default value is CPU. Use \"-d HETERO:\" format to specify HETERO plugin. " + "The demo will look for a suitable plugin for a specified device."; static const char labels_message[] = "Optional. Path to a file with labels mapping."; static const char layout_message[] = "Optional. Specify inputs layouts." -" Ex. NCHW or input0:NCHW,input1:NC in case of more than one input."; + " Ex. NCHW or input0:NCHW,input1:NC in case of more than one input."; static const char thresh_output_message[] = "Optional. Probability threshold for detections."; static const char raw_output_message[] = "Optional. Inference results as raw values."; -static const char input_resizable_message[] = "Optional. Enables resizable input with support of ROI crop & auto resize."; -static const char nireq_message[] = "Optional. Number of infer requests. If this option is omitted, number of infer requests is determined automatically."; +static const char input_resizable_message[] = + "Optional. Enables resizable input with support of ROI crop & auto resize."; +static const char nireq_message[] = "Optional. Number of infer requests. If this option is omitted, number of infer " + "requests is determined automatically."; static const char num_threads_message[] = "Optional. Number of threads."; static const char num_streams_message[] = "Optional. Number of streams to use for inference on the CPU or/and GPU in " -"throughput mode (for HETERO and MULTI device cases use format " -":,: or just )"; + "throughput mode (for HETERO and MULTI device cases use format " + ":,: or just )"; static const char no_show_message[] = "Optional. Don't show output."; static const char utilization_monitors_message[] = "Optional. List of monitors to show initially."; -static const char iou_thresh_output_message[] = "Optional. Filtering intersection over union threshold for overlapping boxes."; +static const char iou_thresh_output_message[] = + "Optional. Filtering intersection over union threshold for overlapping boxes."; static const char yolo_af_message[] = "Optional. Use advanced postprocessing/filtering algorithm for YOLO."; -static const char output_resolution_message[] = "Optional. Specify the maximum output window resolution " +static const char output_resolution_message[] = + "Optional. Specify the maximum output window resolution " "in (width x height) format. Example: 1280x720. Input frame size used by default."; static const char anchors_message[] = "Optional. A comma separated list of anchors. " - "By default used default anchors for model. Only for YOLOV4 architecture type."; + "By default used default anchors for model. Only for YOLOV4 architecture type."; static const char masks_message[] = "Optional. A comma separated list of mask for anchors. " - "By default used default masks for model. Only for YOLOV4 architecture type."; + "By default used default masks for model. Only for YOLOV4 architecture type."; static const char reverse_input_channels_message[] = "Optional. Switch the input channels order from BGR to RGB."; -static const char mean_values_message[] = "Optional. Normalize input by subtracting the mean values per channel. Example: \"255.0 255.0 255.0\""; +static const char mean_values_message[] = + "Optional. Normalize input by subtracting the mean values per channel. Example: \"255.0 255.0 255.0\""; static const char scale_values_message[] = "Optional. Divide input by scale values per channel. Division is applied " - "after mean values subtraction. Example: \"255.0 255.0 255.0\""; + "after mean values subtraction. Example: \"255.0 255.0 255.0\""; DEFINE_bool(h, false, help_message); DEFINE_string(at, "", at_message); @@ -101,8 +127,8 @@ DEFINE_string(mean_values, "", mean_values_message); DEFINE_string(scale_values, "", scale_values_message); /** -* \brief This function shows a help message -*/ + * \brief This function shows a help message + */ static void showUsage() { std::cout << std::endl; std::cout << "object_detection_demo [OPTION]" << std::endl; @@ -154,12 +180,15 @@ class ColorPalette { return dh * dh + ds * ds + dv * dv; } - static cv::Scalar maxMinDistance(const std::vector& colorSet, const std::vector& colorCandidates) { + static cv::Scalar maxMinDistance(const std::vector& colorSet, + const std::vector& colorCandidates) { std::vector distances; distances.reserve(colorCandidates.size()); for (auto& c1 : colorCandidates) { - auto min = *std::min_element(colorSet.begin(), colorSet.end(), - [&c1](const cv::Scalar& a, const cv::Scalar& b) { return distance(c1, a) < distance(c1, b); }); + auto min = + *std::min_element(colorSet.begin(), colorSet.end(), [&c1](const cv::Scalar& a, const cv::Scalar& b) { + return distance(c1, a) < distance(c1, b); + }); distances.push_back(distance(c1, min)); } auto max = std::max_element(distances.begin(), distances.end()); @@ -176,15 +205,16 @@ class ColorPalette { public: explicit ColorPalette(size_t n) { palette.reserve(n); - std::vector hsvColors(1, { 1., 1., 1. }); + std::vector hsvColors(1, {1., 1., 1.}); std::vector colorCandidates; size_t numCandidates = 100; hsvColors.reserve(n); colorCandidates.resize(numCandidates); for (size_t i = 1; i < n; ++i) { - std::generate(colorCandidates.begin(), colorCandidates.end(), - []() { return cv::Scalar{ getRandom(), getRandom(0.8, 1.0), getRandom(0.5, 1.0) }; }); + std::generate(colorCandidates.begin(), colorCandidates.end(), []() { + return cv::Scalar{getRandom(), getRandom(0.8, 1.0), getRandom(0.5, 1.0)}; + }); hsvColors.push_back(maxMinDistance(hsvColors, colorCandidates)); } @@ -198,12 +228,12 @@ class ColorPalette { } } - const cv::Scalar& operator[] (size_t index) const { + const cv::Scalar& operator[](size_t index) const { return palette[index % palette.size()]; } }; -bool ParseAndCheckCommandLine(int argc, char *argv[]) { +bool ParseAndCheckCommandLine(int argc, char* argv[]) { // ---------------------------Parsing and validation of input args-------------------------------------- gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); if (FLAGS_h) { @@ -250,21 +280,22 @@ cv::Mat renderDetectionData(DetectionResult& result, const ColorPalette& palette for (auto& obj : result.objects) { if (FLAGS_r) { - slog::debug << " " - << std::left << std::setw(9) << obj.label << " | " - << std::setw(10) << obj.confidence << " | " - << std::setw(4) << int(obj.x) << " | " - << std::setw(4) << int(obj.y) << " | " - << std::setw(4) << int(obj.x + obj.width) << " | " - << std::setw(4) << int(obj.y + obj.height) - << slog::endl; + slog::debug << " " << std::left << std::setw(9) << obj.label << " | " << std::setw(10) << obj.confidence + << " | " << std::setw(4) << int(obj.x) << " | " << std::setw(4) << int(obj.y) << " | " + << std::setw(4) << int(obj.x + obj.width) << " | " << std::setw(4) << int(obj.y + obj.height) + << slog::endl; } outputTransform.scaleRect(obj); std::ostringstream conf; conf << ":" << std::fixed << std::setprecision(1) << obj.confidence * 100 << '%'; const auto& color = palette[obj.labelID]; - putHighlightedText(outputImg, obj.label + conf.str(), - cv::Point2f(obj.x, obj.y - 5), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, color, 2); + putHighlightedText(outputImg, + obj.label + conf.str(), + cv::Point2f(obj.x, obj.y - 5), + cv::FONT_HERSHEY_COMPLEX_SMALL, + 1, + color, + 2); cv::rectangle(outputImg, obj, color, 2); } @@ -273,14 +304,12 @@ cv::Mat renderDetectionData(DetectionResult& result, const ColorPalette& palette outputTransform.scaleCoord(lmark); cv::circle(outputImg, lmark, 2, cv::Scalar(0, 255, 255), -1); } - } - catch (const std::bad_cast&) {} + } catch (const std::bad_cast&) {} return outputImg; } - -int main(int argc, char *argv[]) { +int main(int argc, char* argv[]) { try { PerformanceMetrics metrics; @@ -298,18 +327,13 @@ int main(int argc, char *argv[]) { for (auto& str : strAnchors) { anchors.push_back(std::stof(str)); } - } catch(...) { - throw std::runtime_error("Invalid anchors list is provided."); - } + } catch (...) { throw std::runtime_error("Invalid anchors list is provided."); } try { for (auto& str : strMasks) { masks.push_back(std::stoll(str)); } - } - catch (...) { - throw std::runtime_error("Invalid masks list is provided."); - } + } catch (...) { throw std::runtime_error("Invalid masks list is provided."); } //------------------------------- Preparing Input ------------------------------------------------------ auto cap = openImagesCapture(FLAGS_i, FLAGS_loop, FLAGS_nireq == 1 ? read_type::efficient : read_type::safe); @@ -323,25 +347,38 @@ int main(int argc, char *argv[]) { std::unique_ptr model; if (FLAGS_at == "centernet") { - model.reset(new ModelCenterNet(FLAGS_m, (float)FLAGS_t, labels, FLAGS_layout)); - } - else if (FLAGS_at == "faceboxes") { - model.reset(new ModelFaceBoxes(FLAGS_m, (float)FLAGS_t, FLAGS_auto_resize, (float)FLAGS_iou_t, FLAGS_layout)); - } - else if (FLAGS_at == "retinaface") { - model.reset(new ModelRetinaFace(FLAGS_m, (float)FLAGS_t, FLAGS_auto_resize, (float)FLAGS_iou_t, FLAGS_layout)); - } - else if (FLAGS_at == "retinaface-pytorch") { - model.reset(new ModelRetinaFacePT(FLAGS_m, (float)FLAGS_t, FLAGS_auto_resize, (float)FLAGS_iou_t, FLAGS_layout)); - } - else if (FLAGS_at == "ssd") { - model.reset(new ModelSSD(FLAGS_m, (float)FLAGS_t, FLAGS_auto_resize, labels, FLAGS_layout)); - } - else if (FLAGS_at == "yolo") { - model.reset(new ModelYolo(FLAGS_m, (float)FLAGS_t, FLAGS_auto_resize, FLAGS_yolo_af, (float)FLAGS_iou_t, - labels, anchors, masks, FLAGS_layout)); - } - else { + model.reset(new ModelCenterNet(FLAGS_m, static_cast(FLAGS_t), labels, FLAGS_layout)); + } else if (FLAGS_at == "faceboxes") { + model.reset(new ModelFaceBoxes(FLAGS_m, + static_cast(FLAGS_t), + FLAGS_auto_resize, + static_cast(FLAGS_iou_t), + FLAGS_layout)); + } else if (FLAGS_at == "retinaface") { + model.reset(new ModelRetinaFace(FLAGS_m, + static_cast(FLAGS_t), + FLAGS_auto_resize, + static_cast(FLAGS_iou_t), + FLAGS_layout)); + } else if (FLAGS_at == "retinaface-pytorch") { + model.reset(new ModelRetinaFacePT(FLAGS_m, + static_cast(FLAGS_t), + FLAGS_auto_resize, + static_cast(FLAGS_iou_t), + FLAGS_layout)); + } else if (FLAGS_at == "ssd") { + model.reset(new ModelSSD(FLAGS_m, static_cast(FLAGS_t), FLAGS_auto_resize, labels, FLAGS_layout)); + } else if (FLAGS_at == "yolo") { + model.reset(new ModelYolo(FLAGS_m, + static_cast(FLAGS_t), + FLAGS_auto_resize, + FLAGS_yolo_af, + static_cast(FLAGS_iou_t), + labels, + anchors, + masks, + FLAGS_layout)); + } else { slog::err << "No model type or invalid model type (-at) provided: " + FLAGS_at << slog::endl; return -1; } @@ -350,10 +387,9 @@ int main(int argc, char *argv[]) { ov::Core core; - AsyncPipeline pipeline( - std::move(model), - ConfigFactory::getUserConfig(FLAGS_d, FLAGS_nireq, FLAGS_nstreams, FLAGS_nthreads), - core); + AsyncPipeline pipeline(std::move(model), + ConfigFactory::getUserConfig(FLAGS_d, FLAGS_nireq, FLAGS_nstreams, FLAGS_nthreads), + core); Presenter presenter(FLAGS_u); bool keepRunning = true; @@ -382,7 +418,7 @@ int main(int argc, char *argv[]) { } frameNum = pipeline.submitData(ImageInputData(curr_frame), - std::make_shared(curr_frame, startTime)); + std::make_shared(curr_frame, startTime)); } if (frameNum == 0) { @@ -391,14 +427,14 @@ int main(int argc, char *argv[]) { } else { outputResolution = cv::Size{ std::stoi(FLAGS_output_resolution.substr(0, found)), - std::stoi(FLAGS_output_resolution.substr(found + 1, FLAGS_output_resolution.length())) - }; + std::stoi(FLAGS_output_resolution.substr(found + 1, FLAGS_output_resolution.length()))}; outputTransform = OutputTransform(curr_frame.size(), outputResolution); outputResolution = outputTransform.computeResolution(); } } - //--- Waiting for free input slot or output data available. Function will return immediately if any of them are available. + //--- Waiting for free input slot or output data available. Function will return immediately if any of them + // are available. pipeline.waitForData(); //--- Checking for results and rendering data if it's ready @@ -412,7 +448,10 @@ int main(int argc, char *argv[]) { presenter.drawGraphs(outFrame); renderMetrics.update(renderingStart); metrics.update(result->metaData->asRef().timeStamp, - outFrame, { 10, 22 }, cv::FONT_HERSHEY_COMPLEX, 0.65); + outFrame, + {10, 22}, + cv::FONT_HERSHEY_COMPLEX, + 0.65); videoWriter.write(outFrame); framesProcessed++; @@ -428,22 +467,24 @@ int main(int argc, char *argv[]) { } } } - } // while(keepRunning) + } // while(keepRunning) // ------------ Waiting for completion of data processing and rendering the rest of results --------- pipeline.waitForTotalCompletion(); for (; framesProcessed <= frameNum; framesProcessed++) { result = pipeline.getResult(); - if (result != nullptr) - { + if (result != nullptr) { auto renderingStart = std::chrono::steady_clock::now(); cv::Mat outFrame = renderDetectionData(result->asRef(), palette, outputTransform); //--- Showing results and device information presenter.drawGraphs(outFrame); renderMetrics.update(renderingStart); metrics.update(result->metaData->asRef().timeStamp, - outFrame, { 10, 22 }, cv::FONT_HERSHEY_COMPLEX, 0.65); + outFrame, + {10, 22}, + cv::FONT_HERSHEY_COMPLEX, + 0.65); videoWriter.write(outFrame); if (!FLAGS_no_show) { cv::imshow("Detection Results", outFrame); @@ -455,16 +496,16 @@ int main(int argc, char *argv[]) { slog::info << "Metrics report:" << slog::endl; metrics.logTotal(); - logLatencyPerStage(cap->getMetrics().getTotal().latency, pipeline.getPreprocessMetrics().getTotal().latency, - pipeline.getInferenceMetircs().getTotal().latency, pipeline.getPostprocessMetrics().getTotal().latency, - renderMetrics.getTotal().latency); + logLatencyPerStage(cap->getMetrics().getTotal().latency, + pipeline.getPreprocessMetrics().getTotal().latency, + pipeline.getInferenceMetircs().getTotal().latency, + pipeline.getPostprocessMetrics().getTotal().latency, + renderMetrics.getTotal().latency); slog::info << presenter.reportMeans() << slog::endl; - } - catch (const std::exception& error) { + } catch (const std::exception& error) { slog::err << error.what() << slog::endl; return 1; - } - catch (...) { + } catch (...) { slog::err << "Unknown/internal exception happened." << slog::endl; return 1; } diff --git a/demos/segmentation_demo/cpp/main.cpp b/demos/segmentation_demo/cpp/main.cpp index c361d67fa1d..f13a4f715a3 100644 --- a/demos/segmentation_demo/cpp/main.cpp +++ b/demos/segmentation_demo/cpp/main.cpp @@ -14,19 +14,35 @@ // limitations under the License. */ +#include +#include + +#include +#include +#include #include #include -#include +#include #include +#include +#include +#include -#include #include +#include +#include +#include +#include +#include +#include +#include #include #include #include #include -#include +#include +#include #include #include #include @@ -38,22 +54,26 @@ DEFINE_OUTPUT_FLAGS static const char help_message[] = "Print a usage message."; static const char model_message[] = "Required. Path to an .xml file with a trained model."; -static const char target_device_message[] = "Optional. Specify the target device to infer on (the list of available devices is shown below). " -"Default value is CPU. Use \"-d HETERO:\" format to specify HETERO plugin. " -"The demo will look for a suitable plugin for a specified device."; +static const char target_device_message[] = + "Optional. Specify the target device to infer on (the list of available devices is shown below). " + "Default value is CPU. Use \"-d HETERO:\" format to specify HETERO plugin. " + "The demo will look for a suitable plugin for a specified device."; static const char labels_message[] = "Optional. Path to a file with labels mapping."; static const char layout_message[] = "Optional. Specify inputs layouts." -" Ex. NCHW or input0:NCHW,input1:NC in case of more than one input."; + " Ex. NCHW or input0:NCHW,input1:NC in case of more than one input."; static const char raw_output_message[] = "Optional. Output inference results as mask histogram."; -static const char nireq_message[] = "Optional. Number of infer requests. If this option is omitted, number of infer requests is determined automatically."; -static const char input_resizable_message[] = "Optional. Enables resizable input with support of ROI crop & auto resize."; +static const char nireq_message[] = "Optional. Number of infer requests. If this option is omitted, number of infer " + "requests is determined automatically."; +static const char input_resizable_message[] = + "Optional. Enables resizable input with support of ROI crop & auto resize."; static const char num_threads_message[] = "Optional. Number of threads."; static const char num_streams_message[] = "Optional. Number of streams to use for inference on the CPU or/and GPU in " -"throughput mode (for HETERO and MULTI device cases use format " -":,: or just )"; + "throughput mode (for HETERO and MULTI device cases use format " + ":,: or just )"; static const char no_show_message[] = "Optional. Don't show output."; static const char utilization_monitors_message[] = "Optional. List of monitors to show initially."; -static const char output_resolution_message[] = "Optional. Specify the maximum output window resolution " +static const char output_resolution_message[] = + "Optional. Specify the maximum output window resolution " "in (width x height) format. Example: 1280x720. Input frame size used by default."; static const char only_masks_message[] = "Optional. Display only masks. Could be switched by TAB key."; @@ -73,8 +93,8 @@ DEFINE_string(output_resolution, "", output_resolution_message); DEFINE_bool(only_masks, false, only_masks_message); /** -* \brief This function shows a help message -*/ + * \brief This function shows a help message + */ static void showUsage() { std::cout << std::endl; std::cout << "segmentation_demo [OPTION]" << std::endl; @@ -100,8 +120,7 @@ static void showUsage() { std::cout << " -only_masks " << only_masks_message << std::endl; } - -bool ParseAndCheckCommandLine(int argc, char *argv[]) { +bool ParseAndCheckCommandLine(int argc, char* argv[]) { // ---------------------------Parsing and validation of input args-------------------------------------- gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); if (FLAGS_h) { @@ -125,28 +144,9 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) { } static const Color PASCAL_VOC_COLORS[] = { - { 0, 0, 0 }, - { 128, 0, 0 }, - { 0, 128, 0 }, - { 128, 128, 0 }, - { 0, 0, 128 }, - { 128, 0, 128 }, - { 0, 128, 128 }, - { 128, 128, 128 }, - { 64, 0, 0 }, - { 192, 0, 0 }, - { 64, 128, 0 }, - { 192, 128, 0 }, - { 64, 0, 128 }, - { 192, 0, 128 }, - { 64, 128, 128 }, - { 192, 128, 128 }, - { 0, 64, 0 }, - { 128, 64, 0 }, - { 0, 192, 0 }, - { 128, 192, 0 }, - { 0, 64, 128 } -}; + {0, 0, 0}, {128, 0, 0}, {0, 128, 0}, {128, 128, 0}, {0, 0, 128}, {128, 0, 128}, {0, 128, 128}, + {128, 128, 128}, {64, 0, 0}, {192, 0, 0}, {64, 128, 0}, {192, 128, 0}, {64, 0, 128}, {192, 0, 128}, + {64, 128, 128}, {192, 128, 128}, {0, 64, 0}, {128, 64, 0}, {0, 192, 0}, {128, 192, 0}, {0, 64, 128}}; cv::Mat applyColorMap(cv::Mat input) { // Initializing colors array if needed @@ -158,7 +158,9 @@ cv::Mat applyColorMap(cv::Mat input) { colors = cv::Mat(256, 1, CV_8UC3); std::size_t i = 0; for (; i < arraySize(PASCAL_VOC_COLORS); ++i) { - colors.at(i, 0) = { PASCAL_VOC_COLORS[i].blue(), PASCAL_VOC_COLORS[i].green(), PASCAL_VOC_COLORS[i].red() }; + colors.at(i, 0) = {PASCAL_VOC_COLORS[i].blue(), + PASCAL_VOC_COLORS[i].green(), + PASCAL_VOC_COLORS[i].red()}; } for (; i < (std::size_t)colors.cols; ++i) { colors.at(i, 0) = cv::Vec3b(distr(rng), distr(rng), distr(rng)); @@ -184,7 +186,8 @@ cv::Mat renderSegmentationData(const ImageResult& result, OutputTransform& outpu } // Visualizing result data over source image - cv::Mat output = masks_only ? applyColorMap(result.resultImage) : inputImg / 2 + applyColorMap(result.resultImage) / 2; + cv::Mat output = + masks_only ? applyColorMap(result.resultImage) : inputImg / 2 + applyColorMap(result.resultImage) / 2; outputTransform.resize(output); return output; } @@ -195,9 +198,9 @@ void printRawResults(const ImageResult& result, std::vector labels) double min_val, max_val; cv::minMaxLoc(result.resultImage, &min_val, &max_val); - int max_classes = static_cast(max_val) + 1; // We use +1 for only background case - const float range[] = { 0, static_cast(max_classes) }; - const float * ranges[] = { range }; + int max_classes = static_cast(max_val) + 1; // We use +1 for only background case + const float range[] = {0, static_cast(max_classes)}; + const float* ranges[] = {range}; cv::Mat histogram; cv::calcHist(&result.resultImage, 1, 0, cv::Mat(), histogram, 1, &max_classes, ranges); @@ -206,11 +209,9 @@ void printRawResults(const ImageResult& result, std::vector labels) const int value = static_cast(histogram.at(i)); if (value > 0) { std::string label = (size_t)i < labels.size() ? labels[i] : "#" + std::to_string(i); - slog::debug << " " - << std::setw(16) << std::left << label << " | " - << std::setw(6) << value << " | " - << std::setw(5) << std::setprecision(2) << std::fixed << std::right << value / all * 100 << "%" - << slog::endl; + slog::debug << " " << std::setw(16) << std::left << label << " | " << std::setw(6) << value << " | " + << std::setw(5) << std::setprecision(2) << std::fixed << std::right << value / all * 100 << "%" + << slog::endl; } } } @@ -234,7 +235,8 @@ int main(int argc, char* argv[]) { ov::Core core; AsyncPipeline pipeline( std::unique_ptr(new SegmentationModel(FLAGS_m, FLAGS_auto_resize, FLAGS_layout)), - ConfigFactory::getUserConfig(FLAGS_d, FLAGS_nireq, FLAGS_nstreams, FLAGS_nthreads), core); + ConfigFactory::getUserConfig(FLAGS_d, FLAGS_nireq, FLAGS_nstreams, FLAGS_nthreads), + core); Presenter presenter(FLAGS_u); std::vector labels; @@ -267,24 +269,23 @@ int main(int argc, char* argv[]) { } frameNum = pipeline.submitData(ImageInputData(curr_frame), - std::make_shared(curr_frame, startTime)); + std::make_shared(curr_frame, startTime)); } if (frameNum == 0) { if (found == std::string::npos) { outputResolution = curr_frame.size(); - } - else { + } else { outputResolution = cv::Size{ std::stoi(FLAGS_output_resolution.substr(0, found)), - std::stoi(FLAGS_output_resolution.substr(found + 1, FLAGS_output_resolution.length())) - }; + std::stoi(FLAGS_output_resolution.substr(found + 1, FLAGS_output_resolution.length()))}; outputTransform = OutputTransform(curr_frame.size(), outputResolution); outputResolution = outputTransform.computeResolution(); } } - //--- Waiting for free input slot or output data available. Function will return immediately if any of them are available. + //--- Waiting for free input slot or output data available. Function will return immediately if any of them + // are available. pipeline.waitForData(); //--- Checking for results and rendering data if it's ready @@ -300,7 +301,10 @@ int main(int argc, char* argv[]) { presenter.drawGraphs(outFrame); renderMetrics.update(renderingStart); metrics.update(result->metaData->asRef().timeStamp, - outFrame, { 10, 22 }, cv::FONT_HERSHEY_COMPLEX, 0.65); + outFrame, + {10, 22}, + cv::FONT_HERSHEY_COMPLEX, + 0.65); videoWriter.write(outFrame); framesProcessed++; if (!FLAGS_no_show) { @@ -308,7 +312,7 @@ int main(int argc, char* argv[]) { //--- Processing keyboard events auto key = cv::waitKey(1); - if (27 == key || 'q' == key || 'Q' == key) { // Esc + if (27 == key || 'q' == key || 'Q' == key) { // Esc keepRunning = false; } else if (9 == key) { only_masks = !only_masks; @@ -317,7 +321,7 @@ int main(int argc, char* argv[]) { } } } - } // while(keepRunning) + } // while(keepRunning) // ------------ Waiting for completion of data processing and rendering the rest of results --------- pipeline.waitForTotalCompletion(); @@ -332,7 +336,10 @@ int main(int argc, char* argv[]) { } presenter.drawGraphs(outFrame); metrics.update(result->metaData->asRef().timeStamp, - outFrame, { 10, 22 }, cv::FONT_HERSHEY_COMPLEX, 0.65); + outFrame, + {10, 22}, + cv::FONT_HERSHEY_COMPLEX, + 0.65); videoWriter.write(outFrame); if (!FLAGS_no_show) { cv::imshow("Segmentation Results", outFrame); @@ -344,16 +351,16 @@ int main(int argc, char* argv[]) { slog::info << "Metrics report:" << slog::endl; metrics.logTotal(); - logLatencyPerStage(cap->getMetrics().getTotal().latency, pipeline.getPreprocessMetrics().getTotal().latency, - pipeline.getInferenceMetircs().getTotal().latency, pipeline.getPostprocessMetrics().getTotal().latency, - renderMetrics.getTotal().latency); + logLatencyPerStage(cap->getMetrics().getTotal().latency, + pipeline.getPreprocessMetrics().getTotal().latency, + pipeline.getInferenceMetircs().getTotal().latency, + pipeline.getPostprocessMetrics().getTotal().latency, + renderMetrics.getTotal().latency); slog::info << presenter.reportMeans() << slog::endl; - } - catch (const std::exception& error) { + } catch (const std::exception& error) { slog::err << error.what() << slog::endl; return 1; - } - catch (...) { + } catch (...) { slog::err << "Unknown/internal exception happened." << slog::endl; return 1; } From 1e28cb92cdae93933befd534705e509eb8d92a54 Mon Sep 17 00:00:00 2001 From: ivikhrev Date: Thu, 31 Mar 2022 18:43:04 +0300 Subject: [PATCH 2/6] upd pedestrian trakcer demo --- .../cpp/include/cnn.hpp | 33 +- .../cpp/include/core.hpp | 29 +- .../cpp/include/descriptor.hpp | 43 +- .../cpp/include/distance.hpp | 36 +- .../cpp/include/logging.hpp | 3 +- .../cpp/include/pedestrian_tracker_demo.hpp | 60 +-- .../cpp/include/tracker.hpp | 213 +++++----- .../cpp/include/utils.hpp | 51 ++- demos/pedestrian_tracker_demo/cpp/main.cpp | 150 ++++--- demos/pedestrian_tracker_demo/cpp/src/cnn.cpp | 58 +-- .../cpp/src/distance.cpp | 22 +- .../cpp/src/tracker.cpp | 382 +++++++++--------- .../pedestrian_tracker_demo/cpp/src/utils.cpp | 36 +- 13 files changed, 577 insertions(+), 539 deletions(-) diff --git a/demos/pedestrian_tracker_demo/cpp/include/cnn.hpp b/demos/pedestrian_tracker_demo/cpp/include/cnn.hpp index 98767ba7516..4f3a0433f24 100644 --- a/demos/pedestrian_tracker_demo/cpp/include/cnn.hpp +++ b/demos/pedestrian_tracker_demo/cpp/include/cnn.hpp @@ -4,21 +4,20 @@ #pragma once -#include -#include +#include + +#include #include #include -#include -#include + +#include #include -#include /** * @brief Base class of config for model */ struct ModelConfigTracker { - explicit ModelConfigTracker(const std::string& path_to_model) - : path_to_model(path_to_model) {} + explicit ModelConfigTracker(const std::string& path_to_model) : path_to_model(path_to_model) {} /** @brief Path to model description */ std::string path_to_model; @@ -36,9 +35,7 @@ class BaseModel { /** * @brief Constructor */ - BaseModel(const Config& config, - const ov::Core& core, - const std::string & deviceName); + BaseModel(const Config& config, const ov::Core& core, const std::string& deviceName); /** * @brief Descructor @@ -68,7 +65,7 @@ class BaseModel { ov::Core core; /** @brief device */ std::string device_name; - /** @brief Model input layout */ + /** @brief Model input layout */ ov::Layout input_layout; /** @brief Compiled model */ ov::CompiledModel compiled_model; @@ -86,16 +83,16 @@ class BaseModel { class VectorCNN : public BaseModel { public: - VectorCNN(const ModelConfigTracker& config, - const ov::Core & core, - const std::string & deviceName); + VectorCNN(const ModelConfigTracker& config, const ov::Core& core, const std::string& deviceName); - void Compute(const cv::Mat& image, - cv::Mat* vector, cv::Size outp_shape = cv::Size()) const; + void Compute(const cv::Mat& image, cv::Mat* vector, cv::Size outp_shape = cv::Size()) const; void Compute(const std::vector& images, - std::vector* vectors, cv::Size outp_shape = cv::Size()) const; + std::vector* vectors, + cv::Size outp_shape = cv::Size()) const; - int size() const { return result_size; } + int size() const { + return result_size; + } private: int result_size; // Length of result diff --git a/demos/pedestrian_tracker_demo/cpp/include/core.hpp b/demos/pedestrian_tracker_demo/cpp/include/core.hpp index a784b9bc757..a2de09d2424 100644 --- a/demos/pedestrian_tracker_demo/cpp/include/core.hpp +++ b/demos/pedestrian_tracker_demo/cpp/include/core.hpp @@ -4,31 +4,27 @@ #pragma once -#include - #include #include #include #include +#include + /// /// \brief The TrackedObject struct defines properties of detected object. /// struct TrackedObject { - cv::Rect rect; ///< Detected object ROI (zero area if N/A). - double confidence; ///< Detection confidence level (-1 if N/A). - int64_t frame_idx; ///< Frame index where object was detected (-1 if N/A). - int object_id; ///< Unique object identifier (-1 if N/A). + cv::Rect rect; ///< Detected object ROI (zero area if N/A). + double confidence; ///< Detection confidence level (-1 if N/A). + int64_t frame_idx; ///< Frame index where object was detected (-1 if N/A). + int object_id; ///< Unique object identifier (-1 if N/A). uint64_t timestamp; ///< Timestamp in milliseconds. /// /// \brief Default constructor. /// - TrackedObject() - : confidence(-1), - frame_idx(-1), - object_id(-1), - timestamp(0) {} + TrackedObject() : confidence(-1), frame_idx(-1), object_id(-1), timestamp(0) {} /// /// \brief Constructor with parameters. @@ -37,13 +33,12 @@ struct TrackedObject { /// \param frame_idx Index of frame. /// \param object_id Object ID. /// - TrackedObject(const cv::Rect &rect, float confidence, int64_t frame_idx, - int object_id) + TrackedObject(const cv::Rect& rect, float confidence, int64_t frame_idx, int object_id) : rect(rect), - confidence(confidence), - frame_idx(frame_idx), - object_id(object_id), - timestamp(0) {} + confidence(confidence), + frame_idx(frame_idx), + object_id(object_id), + timestamp(0) {} }; using TrackedObjects = std::deque; diff --git a/demos/pedestrian_tracker_demo/cpp/include/descriptor.hpp b/demos/pedestrian_tracker_demo/cpp/include/descriptor.hpp index 0c510988472..fe23d6b95b2 100644 --- a/demos/pedestrian_tracker_demo/cpp/include/descriptor.hpp +++ b/demos/pedestrian_tracker_demo/cpp/include/descriptor.hpp @@ -6,8 +6,10 @@ #include #include #include + #include #include + #include "cnn.hpp" #include "core.hpp" #include "logging.hpp" @@ -29,20 +31,18 @@ class IImageDescriptor { /// \param[in] mat Color image. /// \param[out] descr Computed descriptor. /// - virtual void Compute(const cv::Mat &mat, cv::Mat *descr) = 0; + virtual void Compute(const cv::Mat& mat, cv::Mat* descr) = 0; /// /// \brief Computes image descriptors in batches. /// \param[in] mats Images of interest. /// \param[out] descrs Matrices to store the computed descriptors. /// - virtual void Compute(const std::vector &mats, - std::vector *descrs) = 0; + virtual void Compute(const std::vector& mats, std::vector* descrs) = 0; virtual ~IImageDescriptor() {} }; - /// /// \brief Uses resized image as descriptor. /// @@ -53,25 +53,27 @@ class ResizedImageDescriptor : public IImageDescriptor { /// \param[in] descr_size Size of the descriptor (resized image). /// \param[in] interpolation Interpolation algorithm. /// - explicit ResizedImageDescriptor(const cv::Size &descr_size, - const cv::InterpolationFlags interpolation) - : descr_size_(descr_size), interpolation_(interpolation) { - PT_CHECK_GT(descr_size.width, 0); - PT_CHECK_GT(descr_size.height, 0); - } + explicit ResizedImageDescriptor(const cv::Size& descr_size, const cv::InterpolationFlags interpolation) + : descr_size_(descr_size), + interpolation_(interpolation) { + PT_CHECK_GT(descr_size.width, 0); + PT_CHECK_GT(descr_size.height, 0); + } /// /// \brief Returns descriptor size. /// \return Number of elements in the descriptor. /// - cv::Size size() const override { return descr_size_; } + cv::Size size() const override { + return descr_size_; + } /// /// \brief Computes image descriptor. /// \param[in] mat Frame containing the image of interest. /// \param[out] descr Matrix to store the computed descriptor. /// - void Compute(const cv::Mat &mat, cv::Mat *descr) override { + void Compute(const cv::Mat& mat, cv::Mat* descr) override { PT_CHECK(descr != nullptr); PT_CHECK(!mat.empty()); cv::resize(mat, *descr, descr_size_, 0, 0, interpolation_); @@ -82,11 +84,10 @@ class ResizedImageDescriptor : public IImageDescriptor { /// \param[in] mats Frames containing images of interest. /// \param[out] descrs Matrices to store the computed descriptors. // - void Compute(const std::vector &mats, - std::vector *descrs) override { + void Compute(const std::vector& mats, std::vector* descrs) override { PT_CHECK(descrs != nullptr); descrs->resize(mats.size()); - for (size_t i = 0; i < mats.size(); i++) { + for (size_t i = 0; i < mats.size(); i++) { Compute(mats[i], &(descrs[i])); } } @@ -97,16 +98,13 @@ class ResizedImageDescriptor : public IImageDescriptor { cv::InterpolationFlags interpolation_; }; - class Descriptor : public IImageDescriptor { private: VectorCNN handler; public: - Descriptor(const ModelConfigTracker& config, - const ov::Core& core, - const std::string& deviceName): - handler(config, core, deviceName) {} + Descriptor(const ModelConfigTracker& config, const ov::Core& core, const std::string& deviceName) + : handler(config, core, deviceName) {} /// /// \brief Descriptor size getter. @@ -121,7 +119,7 @@ class Descriptor : public IImageDescriptor { /// \param[in] mat Color image. /// \param[out] descr Computed descriptor. /// - void Compute(const cv::Mat &mat, cv::Mat *descr) override { + void Compute(const cv::Mat& mat, cv::Mat* descr) override { handler.Compute(mat, descr); } @@ -130,8 +128,7 @@ class Descriptor : public IImageDescriptor { /// \param[in] mats Images of interest. /// \param[out] descrs Matrices to store the computed descriptors. /// - void Compute(const std::vector &mats, - std::vector *descrs) override { + void Compute(const std::vector& mats, std::vector* descrs) override { handler.Compute(mats, descrs); } }; diff --git a/demos/pedestrian_tracker_demo/cpp/include/distance.hpp b/demos/pedestrian_tracker_demo/cpp/include/distance.hpp index 505e0eb87f4..28bc4ade12c 100644 --- a/demos/pedestrian_tracker_demo/cpp/include/distance.hpp +++ b/demos/pedestrian_tracker_demo/cpp/include/distance.hpp @@ -4,9 +4,9 @@ #pragma once -#include #include -#include + +#include #include /// @@ -21,7 +21,7 @@ class IDescriptorDistance { /// \param[in] descr2 Second descriptor. /// \return Distance between two descriptors. /// - virtual float Compute(const cv::Mat &descr1, const cv::Mat &descr2) = 0; + virtual float Compute(const cv::Mat& descr1, const cv::Mat& descr2) = 0; /// /// \brief Computes distances between two descriptors in batches. @@ -29,8 +29,7 @@ class IDescriptorDistance { /// \param[in] descrs2 Batch of second descriptors. /// \return Distances between descriptors. /// - virtual std::vector Compute(const std::vector &descrs1, - const std::vector &descrs2) = 0; + virtual std::vector Compute(const std::vector& descrs1, const std::vector& descrs2) = 0; virtual ~IDescriptorDistance() {} }; @@ -45,7 +44,7 @@ class CosDistance : public IDescriptorDistance { /// \brief CosDistance constructor. /// \param[in] descriptor_size Descriptor size. /// - explicit CosDistance(const cv::Size &descriptor_size); + explicit CosDistance(const cv::Size& descriptor_size); /// /// \brief Computes distance between two descriptors. @@ -53,7 +52,7 @@ class CosDistance : public IDescriptorDistance { /// \param descr2 Second descriptor. /// \return Distance between two descriptors. /// - float Compute(const cv::Mat &descr1, const cv::Mat &descr2) override; + float Compute(const cv::Mat& descr1, const cv::Mat& descr2) override; /// /// \brief Computes distances between two descriptors in batches. @@ -61,15 +60,12 @@ class CosDistance : public IDescriptorDistance { /// \param[in] descrs2 Batch of second descriptors. /// \return Distances between descriptors. /// - std::vector Compute( - const std::vector &descrs1, - const std::vector &descrs2) override; + std::vector Compute(const std::vector& descrs1, const std::vector& descrs2) override; private: cv::Size descriptor_size_; }; - /// /// \brief Computes distance between images /// using MatchTemplate function from OpenCV library @@ -88,30 +84,30 @@ class MatchTemplateDistance : public IDescriptorDistance { /// Final distance is computed as: /// scale * distance + offset. /// - MatchTemplateDistance(int type = cv::TemplateMatchModes::TM_CCORR_NORMED, - float scale = -1, float offset = 1) - : type_(type), scale_(scale), offset_(offset) {} + MatchTemplateDistance(int type = cv::TemplateMatchModes::TM_CCORR_NORMED, float scale = -1, float offset = 1) + : type_(type), + scale_(scale), + offset_(offset) {} /// /// \brief Computes distance between image descriptors. /// \param[in] descr1 First image descriptor. /// \param[in] descr2 Second image descriptor. /// \return Distance between image descriptors. /// - float Compute(const cv::Mat &descr1, const cv::Mat &descr2) override; + float Compute(const cv::Mat& descr1, const cv::Mat& descr2) override; /// /// \brief Computes distances between two descriptors in batches. /// \param[in] descrs1 Batch of first descriptors. /// \param[in] descrs2 Batch of second descriptors. /// \return Distances between descriptors. /// - std::vector Compute(const std::vector &descrs1, - const std::vector &descrs2) override; + std::vector Compute(const std::vector& descrs1, const std::vector& descrs2) override; virtual ~MatchTemplateDistance() {} private: - int type_; ///< Method of MatchTemplate function computation. - float scale_; ///< Scale parameter for the distance. Final distance is - /// computed as: scale * distance + offset. + int type_; ///< Method of MatchTemplate function computation. + float scale_; ///< Scale parameter for the distance. Final distance is + /// computed as: scale * distance + offset. float offset_; ///< Offset parameter for the distance. Final distance is /// computed as: scale * distance + offset. }; diff --git a/demos/pedestrian_tracker_demo/cpp/include/logging.hpp b/demos/pedestrian_tracker_demo/cpp/include/logging.hpp index 4397ca122a3..6bfac58cd29 100644 --- a/demos/pedestrian_tracker_demo/cpp/include/logging.hpp +++ b/demos/pedestrian_tracker_demo/cpp/include/logging.hpp @@ -9,8 +9,7 @@ #define PT_CHECK(cond) IE_ASSERT(cond) << " " #define PT_CHECK_BINARY(actual, expected, op) \ - IE_ASSERT(actual op expected) << ". " \ - << actual << " vs " << expected << ". " + IE_ASSERT(actual op expected) << ". " << actual << " vs " << expected << ". " #define PT_CHECK_EQ(actual, expected) PT_CHECK_BINARY(actual, expected, ==) #define PT_CHECK_NE(actual, expected) PT_CHECK_BINARY(actual, expected, !=) diff --git a/demos/pedestrian_tracker_demo/cpp/include/pedestrian_tracker_demo.hpp b/demos/pedestrian_tracker_demo/cpp/include/pedestrian_tracker_demo.hpp index 6ce72ba2629..2bbb63e2f7e 100644 --- a/demos/pedestrian_tracker_demo/cpp/include/pedestrian_tracker_demo.hpp +++ b/demos/pedestrian_tracker_demo/cpp/include/pedestrian_tracker_demo.hpp @@ -5,8 +5,10 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once +#include #include #include + #include #include @@ -16,45 +18,54 @@ DEFINE_OUTPUT_FLAGS static const char help_message[] = "Print a usage message."; static const char first_frame_message[] = "Optional. The index of the first frame of the input to process. " - "The actual first frame captured depends on cv::VideoCapture implementation " - "and may have slightly different number."; + "The actual first frame captured depends on cv::VideoCapture implementation " + "and may have slightly different number."; static const char read_limit_message[] = "Optional. Read length limit before stopping or restarting reading the input."; -static const char pedestrian_detection_model_message[] = "Required. Path to the Pedestrian Detection Retail model (.xml) file."; -static const char pedestrian_reid_model_message[] = "Required. Path to the Pedestrian Reidentification Retail model (.xml) file."; -static const char target_device_detection_message[] = "Optional. Specify the target device for pedestrian detection " - "(the list of available devices is shown below). Default value is CPU. " - "Use \"-d HETERO:\" format to specify HETERO plugin."; -static const char target_device_reid_message[] = "Optional. Specify the target device for pedestrian reidentification " - "(the list of available devices is shown below). Default value is CPU. " - "Use \"-d HETERO:\" format to specify HETERO plugin."; +static const char pedestrian_detection_model_message[] = + "Required. Path to the Pedestrian Detection Retail model (.xml) file."; +static const char pedestrian_reid_model_message[] = + "Required. Path to the Pedestrian Reidentification Retail model (.xml) file."; +static const char target_device_detection_message[] = + "Optional. Specify the target device for pedestrian detection " + "(the list of available devices is shown below). Default value is CPU. " + "Use \"-d HETERO:\" format to specify HETERO plugin."; +static const char target_device_reid_message[] = + "Optional. Specify the target device for pedestrian reidentification " + "(the list of available devices is shown below). Default value is CPU. " + "Use \"-d HETERO:\" format to specify HETERO plugin."; static const char layout_det_model_message[] = "Optional. Specify inputs layouts." " Ex. NCHW or input0:NCHW,input1:NC in case of more than one input."; static const char raw_output_message[] = "Optional. Output pedestrian tracking results in a raw format " - "(compatible with MOTChallenge format)."; + "(compatible with MOTChallenge format)."; static const char no_show_message[] = "Optional. Don't show output."; static const char delay_message[] = "Optional. Delay between frames used for visualization. " - "If negative, the visualization is turned off (like with the option 'no_show'). " - "If zero, the visualization is made frame-by-frame."; -static const char output_log_message[] = "Optional. The file name to write output log file with results of pedestrian tracking. " - "The format of the log file is compatible with MOTChallenge format."; + "If negative, the visualization is turned off (like with the option 'no_show'). " + "If zero, the visualization is made frame-by-frame."; +static const char output_log_message[] = + "Optional. The file name to write output log file with results of pedestrian tracking. " + "The format of the log file is compatible with MOTChallenge format."; static const char utilization_monitors_message[] = "Optional. List of monitors to show initially."; static const char at_message[] = "Required. Architecture type for detector model: centernet, ssd or yolo."; static const char thresh_output_message[] = "Optional. Probability threshold for detections."; -static const char input_resizable_message[] = "Optional. Enables resizable input with support of ROI crop & auto resize."; -static const char iou_thresh_output_message[] = "Optional. Filtering intersection over union threshold for overlapping boxes."; +static const char input_resizable_message[] = + "Optional. Enables resizable input with support of ROI crop & auto resize."; +static const char iou_thresh_output_message[] = + "Optional. Filtering intersection over union threshold for overlapping boxes."; static const char yolo_af_message[] = "Optional. Use advanced postprocessing/filtering algorithm for YOLO."; static const char labels_message[] = "Optional. Path to a file with labels mapping."; -static const char nireq_message[] = "Optional. Number of infer requests for detector model. If this option is omitted, number of infer requests is determined automatically."; +static const char nireq_message[] = "Optional. Number of infer requests for detector model. If this option is omitted, " + "number of infer requests is determined automatically."; static const char num_threads_message[] = "Optional. Number of threads for detector model."; -static const char num_streams_message[] = "Optional. Number of streams to use for inference on the CPU or/and GPU in " -"throughput mode for detector model (for HETERO and MULTI device cases use format " -":,: or just )"; -static const char person_label_message[] = "Optional. Label of class person for detector. Default -1 for tracking all objects"; - +static const char num_streams_message[] = + "Optional. Number of streams to use for inference on the CPU or/and GPU in " + "throughput mode for detector model (for HETERO and MULTI device cases use format " + ":,: or just )"; +static const char person_label_message[] = + "Optional. Label of class person for detector. Default -1 for tracking all objects"; DEFINE_bool(h, false, help_message); DEFINE_uint32(first, 0, first_frame_message); -DEFINE_uint32(read_limit, gflags::uint32(std::numeric_limits::max()), read_limit_message); +DEFINE_uint32(read_limit, static_cast(std::numeric_limits::max()), read_limit_message); DEFINE_string(m_det, "", pedestrian_detection_model_message); DEFINE_string(m_reid, "", pedestrian_reid_model_message); DEFINE_string(d_det, "CPU", target_device_detection_message); @@ -76,7 +87,6 @@ DEFINE_uint32(nthreads, 0, num_threads_message); DEFINE_string(nstreams, "", num_streams_message); DEFINE_int32(person_label, -1, person_label_message); - /** * @brief This function show a help message */ diff --git a/demos/pedestrian_tracker_demo/cpp/include/tracker.hpp b/demos/pedestrian_tracker_demo/cpp/include/tracker.hpp index 984c125380e..9c9cf19e82b 100644 --- a/demos/pedestrian_tracker_demo/cpp/include/tracker.hpp +++ b/demos/pedestrian_tracker_demo/cpp/include/tracker.hpp @@ -4,20 +4,25 @@ #pragma once -#include "core.hpp" +#include -#include -#include -#include +#include #include -#include +#include #include +#include #include #include +#include + +#include +#include "core.hpp" +#include "logging.hpp" #include "utils.hpp" -#include "descriptor.hpp" -#include "distance.hpp" + +class IDescriptorDistance; +class IImageDescriptor; /// /// \brief The TrackerParams struct stores parameters of PedestrianTracker @@ -82,30 +87,36 @@ struct Track { /// \param descriptor_fast Fast descriptor. /// \param descriptor_strong Strong descriptor (reid embedding). /// - Track(const TrackedObjects &objs, const cv::Mat &last_image, - const cv::Mat &descriptor_fast, const cv::Mat &descriptor_strong) + Track(const TrackedObjects& objs, + const cv::Mat& last_image, + const cv::Mat& descriptor_fast, + const cv::Mat& descriptor_strong) : objects(objs), - predicted_rect(!objs.empty() ? objs.back().rect : cv::Rect()), - last_image(last_image), - descriptor_fast(descriptor_fast), - descriptor_strong(descriptor_strong), - lost(0), - length(1) { - PT_CHECK(!objs.empty()); - first_object = objs[0]; - } + predicted_rect(!objs.empty() ? objs.back().rect : cv::Rect()), + last_image(last_image), + descriptor_fast(descriptor_fast), + descriptor_strong(descriptor_strong), + lost(0), + length(1) { + PT_CHECK(!objs.empty()); + first_object = objs[0]; + } /// /// \brief empty returns if track does not contain objects. /// \return true if track does not contain objects. /// - bool empty() const { return objects.empty(); } + bool empty() const { + return objects.empty(); + } /// /// \brief size returns number of detected objects in a track. /// \return number of detected objects in a track. /// - size_t size() const { return objects.size(); } + size_t size() const { + return objects.size(); + } /// /// \brief operator [] return const reference to detected object with @@ -113,7 +124,9 @@ struct Track { /// \param i Index of object. /// \return const reference to detected object with specified index. /// - const TrackedObject &operator[](size_t i) const { return objects[i]; } + const TrackedObject& operator[](size_t i) const { + return objects[i]; + } /// /// \brief operator [] return non-const reference to detected object with @@ -121,13 +134,15 @@ struct Track { /// \param i Index of object. /// \return non-const reference to detected object with specified index. /// - TrackedObject &operator[](size_t i) { return objects[i]; } + TrackedObject& operator[](size_t i) { + return objects[i]; + } /// /// \brief back returns const reference to last object in track. /// \return const reference to last object in track. /// - const TrackedObject &back() const { + const TrackedObject& back() const { PT_CHECK(!empty()); return objects.back(); } @@ -136,18 +151,18 @@ struct Track { /// \brief back returns non-const reference to last object in track. /// \return non-const reference to last object in track. /// - TrackedObject &back() { + TrackedObject& back() { PT_CHECK(!empty()); return objects.back(); } - TrackedObjects objects; ///< Detected objects; + TrackedObjects objects; ///< Detected objects; cv::Rect predicted_rect; ///< Rectangle that represents predicted position /// and size of bounding box if track has been lost. - cv::Mat last_image; ///< Image of last detected object in track. + cv::Mat last_image; ///< Image of last detected object in track. cv::Mat descriptor_fast; ///< Fast descriptor. cv::Mat descriptor_strong; ///< Strong descriptor (reid embedding). - size_t lost; ///< How many frames ago track has been lost. + size_t lost; ///< How many frames ago track has been lost. TrackedObject first_object; ///< First object in track. size_t length; ///< Length of a track including number of objects that were @@ -181,7 +196,7 @@ class PedestrianTracker { /// parameters. /// \param[in] params - the pedestrian tracker parameters. /// - explicit PedestrianTracker(const TrackerParams ¶ms = TrackerParams()); + explicit PedestrianTracker(const TrackerParams& params = TrackerParams()); virtual ~PedestrianTracker() {} /// @@ -191,62 +206,61 @@ class PedestrianTracker { /// \param[in] timestamp Timestamp must be positive and measured in /// milliseconds /// - void Process(const cv::Mat &frame, const TrackedObjects &detections, - uint64_t timestamp); + void Process(const cv::Mat& frame, const TrackedObjects& detections, uint64_t timestamp); /// /// \brief Pipeline parameters getter. /// \return Parameters of pipeline. /// - const TrackerParams ¶ms() const; + const TrackerParams& params() const; /// /// \brief Fast descriptor getter. /// \return Fast descriptor used in pipeline. /// - const Descriptor &descriptor_fast() const; + const Descriptor& descriptor_fast() const; /// /// \brief Fast descriptor setter. /// \param[in] val Fast descriptor used in pipeline. /// - void set_descriptor_fast(const Descriptor &val); + void set_descriptor_fast(const Descriptor& val); /// /// \brief Strong descriptor getter. /// \return Strong descriptor used in pipeline. /// - const Descriptor &descriptor_strong() const; + const Descriptor& descriptor_strong() const; /// /// \brief Strong descriptor setter. /// \param[in] val Strong descriptor used in pipeline. /// - void set_descriptor_strong(const Descriptor &val); + void set_descriptor_strong(const Descriptor& val); /// /// \brief Fast distance getter. /// \return Fast distance used in pipeline. /// - const Distance &distance_fast() const; + const Distance& distance_fast() const; /// /// \brief Fast distance setter. /// \param[in] val Fast distance used in pipeline. /// - void set_distance_fast(const Distance &val); + void set_distance_fast(const Distance& val); /// /// \brief Strong distance getter. /// \return Strong distance used in pipeline. /// - const Distance &distance_strong() const; + const Distance& distance_strong() const; /// /// \brief Strong distance setter. /// \param[in] val Strong distance used in pipeline. /// - void set_distance_strong(const Distance &val); + void set_distance_strong(const Distance& val); /// /// \brief Returns a detection log which is used for tracks saving. @@ -259,7 +273,7 @@ class PedestrianTracker { /// \brief Get active tracks to draw /// \return Active tracks. /// - std::unordered_map > GetActiveTracks() const; + std::unordered_map> GetActiveTracks() const; /// /// \brief Get tracked detections. @@ -272,7 +286,7 @@ class PedestrianTracker { /// \param[in] frame Colored image (CV_8UC3). /// \return Colored image with drawn active tracks. /// - cv::Mat DrawActiveTracks(const cv::Mat &frame); + cv::Mat DrawActiveTracks(const cv::Mat& frame); /// /// \brief IsTrackForgotten returns true if track is forgotten. @@ -286,7 +300,7 @@ class PedestrianTracker { /// ago). /// \return Set of tracks {id, track}. /// - const std::unordered_map &tracks() const; + const std::unordered_map& tracks() const; /// /// \brief IsTrackValid Checks whether track is valid (duration > threshold). @@ -313,30 +327,27 @@ class PedestrianTracker { Match() {} - Match(const TrackedObject &a, const cv::Rect &a_pr_rect, - const TrackedObject &b, bool pr_label) + Match(const TrackedObject& a, const cv::Rect& a_pr_rect, const TrackedObject& b, bool pr_label) : frame_idx1(a.frame_idx), - frame_idx2(b.frame_idx), - rect1(a.rect), - rect2(b.rect), - pr_rect1(a_pr_rect), - pr_label(pr_label), - gt_label(a.object_id == b.object_id) { - PT_CHECK_NE(frame_idx1, frame_idx2); - } + frame_idx2(b.frame_idx), + rect1(a.rect), + rect2(b.rect), + pr_rect1(a_pr_rect), + pr_label(pr_label), + gt_label(a.object_id == b.object_id) { + PT_CHECK_NE(frame_idx1, frame_idx2); + } }; - const ObjectTracks all_tracks(bool valid_only) const; // Returns shape affinity. - static float ShapeAffinity(float w, const cv::Rect &trk, const cv::Rect &det); + static float ShapeAffinity(float w, const cv::Rect& trk, const cv::Rect& det); // Returns motion affinity. - static float MotionAffinity(float w, const cv::Rect &trk, - const cv::Rect &det); + static float MotionAffinity(float w, const cv::Rect& trk, const cv::Rect& det); // Returns time affinity. - static float TimeAffinity(float w, const float &trk, const float &det); + static float TimeAffinity(float w, const float& trk, const float& det); cv::Rect PredictRect(size_t id, size_t k, size_t s) const; @@ -344,56 +355,62 @@ class PedestrianTracker { cv::Rect PredictRectSimple(size_t id, size_t k, size_t s) const; - void SolveAssignmentProblem( - const std::set &track_ids, const TrackedObjects &detections, - const std::vector &descriptors, float thr, - std::set *unmatched_tracks, - std::set *unmatched_detections, - std::set> *matches); + void SolveAssignmentProblem(const std::set& track_ids, + const TrackedObjects& detections, + const std::vector& descriptors, + float thr, + std::set* unmatched_tracks, + std::set* unmatched_detections, + std::set>* matches); - void ComputeFastDesciptors(const cv::Mat &frame, - const TrackedObjects &detections, - std::vector *descriptors); + void ComputeFastDesciptors(const cv::Mat& frame, + const TrackedObjects& detections, + std::vector* descriptors); - void ComputeDissimilarityMatrix(const std::set &active_track_ids, - const TrackedObjects &detections, - const std::vector &fast_descriptors, - cv::Mat *dissimilarity_matrix); + void ComputeDissimilarityMatrix(const std::set& active_track_ids, + const TrackedObjects& detections, + const std::vector& fast_descriptors, + cv::Mat* dissimilarity_matrix); - std::vector ComputeDistances( - const cv::Mat &frame, - const TrackedObjects& detections, - const std::vector> &track_and_det_ids, - std::map *det_id_to_descriptor); + std::vector ComputeDistances(const cv::Mat& frame, + const TrackedObjects& detections, + const std::vector>& track_and_det_ids, + std::map* det_id_to_descriptor); std::map> StrongMatching( - const cv::Mat &frame, + const cv::Mat& frame, const TrackedObjects& detections, - const std::vector> &track_and_det_ids); + const std::vector>& track_and_det_ids); std::vector> GetTrackToDetectionIds( - const std::set> &matches); + const std::set>& matches); - float AffinityFast(const cv::Mat &descriptor1, const TrackedObject &obj1, - const cv::Mat &descriptor2, const TrackedObject &obj2); + float AffinityFast(const cv::Mat& descriptor1, + const TrackedObject& obj1, + const cv::Mat& descriptor2, + const TrackedObject& obj2); - float Affinity(const TrackedObject &obj1, const TrackedObject &obj2); + float Affinity(const TrackedObject& obj1, const TrackedObject& obj2); - void AddNewTrack(const cv::Mat &frame, const TrackedObject &detection, - const cv::Mat &fast_descriptor, - const cv::Mat &descriptor_strong = cv::Mat()); + void AddNewTrack(const cv::Mat& frame, + const TrackedObject& detection, + const cv::Mat& fast_descriptor, + const cv::Mat& descriptor_strong = cv::Mat()); - void AddNewTracks(const cv::Mat &frame, const TrackedObjects &detections, - const std::vector &descriptors_fast); + void AddNewTracks(const cv::Mat& frame, + const TrackedObjects& detections, + const std::vector& descriptors_fast); - void AddNewTracks(const cv::Mat &frame, const TrackedObjects &detections, - const std::vector &descriptors_fast, - const std::set &ids); + void AddNewTracks(const cv::Mat& frame, + const TrackedObjects& detections, + const std::vector& descriptors_fast, + const std::set& ids); - void AppendToTrack(const cv::Mat &frame, size_t track_id, - const TrackedObject &detection, - const cv::Mat &descriptor_fast, - const cv::Mat &descriptor_strong); + void AppendToTrack(const cv::Mat& frame, + size_t track_id, + const TrackedObject& detection, + const cv::Mat& descriptor_fast, + const cv::Mat& descriptor_strong); bool EraseTrackIfBBoxIsOutOfFrame(size_t track_id); @@ -401,12 +418,12 @@ class PedestrianTracker { bool UpdateLostTrackAndEraseIfItsNeeded(size_t track_id); - void UpdateLostTracks(const std::set &track_ids); + void UpdateLostTracks(const std::set& track_ids); - const std::set &active_track_ids() const; + const std::set& active_track_ids() const; - TrackedObjects FilterDetections(const TrackedObjects &detections) const; - bool IsTrackForgotten(const Track &track) const; + TrackedObjects FilterDetections(const TrackedObjects& detections) const; + bool IsTrackForgotten(const Track& track) const; // Parameters of the pipeline. TrackerParams params_; @@ -433,7 +450,7 @@ class PedestrianTracker { cv::Size prev_frame_size_; struct pair_hash { - std::size_t operator()(const std::pair &p) const { + std::size_t operator()(const std::pair& p) const { PT_CHECK(p.first < 1e6 && p.second < 1e6); return static_cast(p.first * 1e6 + p.second); } diff --git a/demos/pedestrian_tracker_demo/cpp/include/utils.hpp b/demos/pedestrian_tracker_demo/cpp/include/utils.hpp index 7dfc3cde256..5489717f12f 100644 --- a/demos/pedestrian_tracker_demo/cpp/include/utils.hpp +++ b/demos/pedestrian_tracker_demo/cpp/include/utils.hpp @@ -3,17 +3,17 @@ // #pragma once -#include +#include + #include -#include +#include #include -#include -#include #include -#include -#include +#include + +#include + #include -#include "logging.hpp" /// /// \brief The DetectionLogEntry struct @@ -22,8 +22,8 @@ /// struct DetectionLogEntry { TrackedObjects objects; ///< Detected objects. - int64_t frame_idx; ///< Processed frame index (-1 if N/A). - double time_ms; ///< Frame processing time in ms (-1 if N/A). + int64_t frame_idx; ///< Processed frame index (-1 if N/A). + double time_ms; ///< Frame processing time in ms (-1 if N/A). /// /// \brief DetectionLogEntry default constructor. @@ -34,33 +34,33 @@ struct DetectionLogEntry { /// \brief DetectionLogEntry copy constructor. /// \param other Detection entry. /// - DetectionLogEntry(const DetectionLogEntry &other) + DetectionLogEntry(const DetectionLogEntry& other) : objects(other.objects), - frame_idx(other.frame_idx), - time_ms(other.time_ms) {} + frame_idx(other.frame_idx), + time_ms(other.time_ms) {} /// /// \brief DetectionLogEntry move constructor. /// \param other Detection entry. /// - DetectionLogEntry(DetectionLogEntry &&other) + DetectionLogEntry(DetectionLogEntry&& other) : objects(std::move(other.objects)), - frame_idx(other.frame_idx), - time_ms(other.time_ms) {} + frame_idx(other.frame_idx), + time_ms(other.time_ms) {} /// /// \brief Assignment operator. /// \param other Detection entry. /// \return Detection entry. /// - DetectionLogEntry &operator=(const DetectionLogEntry &other) = default; + DetectionLogEntry& operator=(const DetectionLogEntry& other) = default; /// /// \brief Move assignment operator. /// \param other Detection entry. /// \return Detection entry. /// - DetectionLogEntry &operator=(DetectionLogEntry &&other) { + DetectionLogEntry& operator=(DetectionLogEntry&& other) { if (this != &other) { objects = std::move(other.objects); frame_idx = other.frame_idx; @@ -80,8 +80,7 @@ using DetectionLog = std::vector; /// \param[in] path -- path to a file to store /// \param[in] log -- detection log to store /// -void SaveDetectionLogToTrajFile(const std::string& path, - const DetectionLog& log); +void SaveDetectionLogToTrajFile(const std::string& path, const DetectionLog& log); /// /// \brief Print DetectionLog to stdout in the format @@ -98,9 +97,7 @@ void PrintDetectionLog(const DetectionLog& log); /// \param[in,out] image Frame. /// \param[in] lwd Line width. /// -void DrawPolyline(const std::vector& polyline, - const cv::Scalar& color, cv::Mat* image, - int lwd = 5); +void DrawPolyline(const std::vector& polyline, const cv::Scalar& color, cv::Mat* image, int lwd = 5); /// /// \brief Stream output operator for deque of elements. @@ -108,12 +105,13 @@ void DrawPolyline(const std::vector& polyline, /// \param[in] v Vector of elements. /// template -std::ostream &operator<<(std::ostream &os, const std::deque &v) { +std::ostream& operator<<(std::ostream& os, const std::deque& v) { os << "[\n"; if (!v.empty()) { auto itr = v.begin(); os << *itr; - for (++itr; itr != v.end(); ++itr) os << ",\n" << *itr; + for (++itr; itr != v.end(); ++itr) + os << ",\n" << *itr; } os << "\n]"; return os; @@ -125,12 +123,13 @@ std::ostream &operator<<(std::ostream &os, const std::deque &v) { /// \param[in] v Vector of elements. /// template -std::ostream &operator<<(std::ostream &os, const std::vector &v) { +std::ostream& operator<<(std::ostream& os, const std::vector& v) { os << "[\n"; if (!v.empty()) { auto itr = v.begin(); os << *itr; - for (++itr; itr != v.end(); ++itr) os << ",\n" << *itr; + for (++itr; itr != v.end(); ++itr) + os << ",\n" << *itr; } os << "\n]"; return os; diff --git a/demos/pedestrian_tracker_demo/cpp/main.cpp b/demos/pedestrian_tracker_demo/cpp/main.cpp index c355d9e043d..a417301071f 100644 --- a/demos/pedestrian_tracker_demo/cpp/main.cpp +++ b/demos/pedestrian_tracker_demo/cpp/main.cpp @@ -2,25 +2,44 @@ // SPDX-License-Identifier: Apache-2.0 // +#include +#include + +#include +#include +#include +#include #include #include -#include +#include #include #include #include #include -#include +#include +#include +#include #include +#include #include #include #include +#include +#include +#include +#include #include #include +#include +#include #include +#include +#include #include +#include "cnn.hpp" #include "core.hpp" #include "descriptor.hpp" #include "distance.hpp" @@ -30,11 +49,10 @@ using ImageWithFrameIndex = std::pair; -std::unique_ptr -CreatePedestrianTracker(const std::string& reid_model, - const ov::Core & core, - const std::string & deviceName, - bool should_keep_tracking_info) { +std::unique_ptr CreatePedestrianTracker(const std::string& reid_model, + const ov::Core& core, + const std::string& deviceName, + bool should_keep_tracking_info) { TrackerParams params; if (should_keep_tracking_info) { @@ -46,38 +64,35 @@ CreatePedestrianTracker(const std::string& reid_model, // Load reid-model. std::shared_ptr descriptor_fast = - std::make_shared( - cv::Size(16, 32), cv::InterpolationFlags::INTER_LINEAR); - std::shared_ptr distance_fast = - std::make_shared(); + std::make_shared(cv::Size(16, 32), cv::InterpolationFlags::INTER_LINEAR); + std::shared_ptr distance_fast = std::make_shared(); tracker->set_descriptor_fast(descriptor_fast); tracker->set_distance_fast(distance_fast); if (!reid_model.empty()) { ModelConfigTracker reid_config(reid_model); - reid_config.max_batch_size = 16; // defaulting to 16 + reid_config.max_batch_size = 16; // defaulting to 16 std::shared_ptr descriptor_strong = std::make_shared(reid_config, core, deviceName); if (descriptor_strong == nullptr) { throw std::runtime_error("[SAMPLES] internal error - invalid descriptor"); } - std::shared_ptr distance_strong = - std::make_shared(descriptor_strong->size()); + std::shared_ptr distance_strong = std::make_shared(descriptor_strong->size()); tracker->set_descriptor_strong(descriptor_strong); tracker->set_distance_strong(distance_strong); } else { slog::warn << "Reid model " - << "was not specified. " - << "Only fast reidentification approach will be used." << slog::endl; + << "was not specified. " + << "Only fast reidentification approach will be used." << slog::endl; } return tracker; } -bool ParseAndCheckCommandLine(int argc, char *argv[]) { +bool ParseAndCheckCommandLine(int argc, char* argv[]) { // ---------------------------Parsing and validation of input args-------------------------------------- gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); @@ -106,7 +121,7 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) { return true; } -int main(int argc, char **argv) { +int main(int argc, char** argv) { try { PerformanceMetrics metrics; @@ -139,34 +154,44 @@ int main(int argc, char **argv) { std::unique_ptr detectionModel; if (FLAGS_at == "centernet") { - detectionModel.reset(new ModelCenterNet(det_model, (float)FLAGS_t, labels, FLAGS_layout_det)); - } - else if (FLAGS_at == "ssd") { - detectionModel.reset(new ModelSSD(det_model, (float)FLAGS_t, FLAGS_auto_resize, labels, FLAGS_layout_det)); - } - else if (FLAGS_at == "yolo") { - detectionModel.reset(new ModelYolo(det_model, (float)FLAGS_t, FLAGS_auto_resize, FLAGS_yolo_af, (float)FLAGS_iou_t, labels, {}, {}, FLAGS_layout_det)); - } - else { + detectionModel.reset(new ModelCenterNet(det_model, static_cast(FLAGS_t), labels, FLAGS_layout_det)); + } else if (FLAGS_at == "ssd") { + detectionModel.reset( + new ModelSSD(det_model, static_cast(FLAGS_t), FLAGS_auto_resize, labels, FLAGS_layout_det)); + } else if (FLAGS_at == "yolo") { + detectionModel.reset(new ModelYolo(det_model, + static_cast(FLAGS_t), + FLAGS_auto_resize, + FLAGS_yolo_af, + static_cast(FLAGS_iou_t), + labels, + {}, + {}, + FLAGS_layout_det)); + } else { slog::err << "No model type or invalid model type (-at) provided: " + FLAGS_at << slog::endl; return -1; } std::vector devices{detector_mode, reid_mode}; - slog::info << ov::get_openvino_version() << slog::endl; + slog::info << ov::get_openvino_version() << slog::endl; ov::Core core; auto model = detectionModel->compileModel( - ConfigFactory::getUserConfig(FLAGS_d_det, FLAGS_nireq, FLAGS_nstreams, FLAGS_nthreads), core); + ConfigFactory::getUserConfig(FLAGS_d_det, FLAGS_nireq, FLAGS_nstreams, FLAGS_nthreads), + core); auto req = model.create_infer_request(); bool should_keep_tracking_info = should_save_det_log || should_print_out; std::unique_ptr tracker = - CreatePedestrianTracker(reid_model, core, reid_mode, - should_keep_tracking_info); - - std::unique_ptr cap = openImagesCapture(FLAGS_i, FLAGS_loop, - FLAGS_nireq == 1 ? read_type::efficient : read_type::safe, FLAGS_first, FLAGS_read_limit); + CreatePedestrianTracker(reid_model, core, reid_mode, should_keep_tracking_info); + + std::unique_ptr cap = + openImagesCapture(FLAGS_i, + FLAGS_loop, + FLAGS_nireq == 1 ? read_type::efficient : read_type::safe, + FLAGS_first, + FLAGS_read_limit); double video_fps = cap->fps(); if (0.0 == video_fps) { // the default frame rate for DukeMTMC dataset @@ -181,7 +206,7 @@ int main(int argc, char **argv) { cv::Size graphSize{static_cast(frame.cols / 4), 60}; Presenter presenter(FLAGS_u, 10, graphSize); - for (unsigned frameIdx = 0; ; ++frameIdx) { + for (unsigned frameIdx = 0;; ++frameIdx) { detectionModel->preprocess(ImageInputData(frame), req); req.infer(); @@ -197,13 +222,11 @@ int main(int argc, char **argv) { if (ov::element::i32 == outTensor.get_element_type()) { res.outputsData.emplace(outName, outTensor); - } - else { + } else { res.outputsData.emplace(outName, outTensor); } } - auto result = (detectionModel->postprocess(res))->asRef(); TrackedObjects detections; @@ -216,48 +239,54 @@ int main(int argc, char **argv) { const float frame_height_ = static_cast(frame.rows); object.frame_idx = result.frameId; - const float x0 = - std::min(std::max(0.0f, result.objects[i].x / frame_width_), 1.0f) * frame_width_; - const float y0 = - std::min(std::max(0.0f, result.objects[i].y / frame_height_), 1.0f) * frame_height_; + const float x0 = std::min(std::max(0.0f, result.objects[i].x / frame_width_), 1.0f) * frame_width_; + const float y0 = std::min(std::max(0.0f, result.objects[i].y / frame_height_), 1.0f) * frame_height_; const float x1 = - std::min(std::max(0.0f, (result.objects[i].x + result.objects[i].width) / frame_width_), 1.0f) * frame_width_; + std::min(std::max(0.0f, (result.objects[i].x + result.objects[i].width) / frame_width_), 1.0f) * + frame_width_; const float y1 = - std::min(std::max(0.0f, (result.objects[i].y + result.objects[i].height) / frame_height_), 1.0f) * frame_height_; + std::min(std::max(0.0f, (result.objects[i].y + result.objects[i].height) / frame_height_), 1.0f) * + frame_height_; object.rect = cv::Rect2f(cv::Point(static_cast(round(static_cast(x0))), - static_cast(round(static_cast(y0)))), - cv::Point(static_cast(round(static_cast(x1))), - static_cast(round(static_cast(y1))))); + static_cast(round(static_cast(y0)))), + cv::Point(static_cast(round(static_cast(x1))), + static_cast(round(static_cast(y1))))); - if (object.rect.area() > 0 && ((int)result.objects[i].labelID== FLAGS_person_label || FLAGS_person_label == -1)) { + if (object.rect.area() > 0 && + (static_cast(result.objects[i].labelID) == FLAGS_person_label || FLAGS_person_label == -1)) { detections.emplace_back(object); } } // timestamp in milliseconds - uint64_t cur_timestamp = static_cast(1000.0 / video_fps * frameIdx); + uint64_t cur_timestamp = static_cast(1000.0 / video_fps * frameIdx); tracker->Process(frame, detections, cur_timestamp); // Drawing colored "worms" (tracks). frame = tracker->DrawActiveTracks(frame); // Drawing all detected objects on a frame by BLUE COLOR - for (const auto &detection : detections) { + for (const auto& detection : detections) { cv::rectangle(frame, detection.rect, cv::Scalar(255, 0, 0), 3); } // Drawing tracked detections only by RED color and print ID and detection // confidence level. - for (const auto &detection : tracker->TrackedDetections()) { + for (const auto& detection : tracker->TrackedDetections()) { cv::rectangle(frame, detection.rect, cv::Scalar(0, 0, 255), 3); - std::string text = std::to_string(detection.object_id) + - " conf: " + std::to_string(detection.confidence); - putHighlightedText(frame, text, detection.rect.tl() - cv::Point{10, 10}, cv::FONT_HERSHEY_COMPLEX, - 0.65, cv::Scalar(0, 0, 255), 2); + std::string text = + std::to_string(detection.object_id) + " conf: " + std::to_string(detection.confidence); + putHighlightedText(frame, + text, + detection.rect.tl() - cv::Point{10, 10}, + cv::FONT_HERSHEY_COMPLEX, + 0.65, + cv::Scalar(0, 0, 255), + 2); } presenter.drawGraphs(frame); - metrics.update(startTime, frame, { 10, 22 }, cv::FONT_HERSHEY_COMPLEX, 0.65); + metrics.update(startTime, frame, {10, 22}, cv::FONT_HERSHEY_COMPLEX, 0.65); videoWriter.write(frame); if (should_show) { @@ -274,7 +303,8 @@ int main(int argc, char **argv) { } startTime = std::chrono::steady_clock::now(); frame = cap->read(); - if (!frame.data) break; + if (!frame.data) + break; if (frame.size() != firstFrameSize) throw std::runtime_error("Can't track objects on images of different size"); } @@ -291,12 +321,10 @@ int main(int argc, char **argv) { slog::info << "Metrics report:" << slog::endl; metrics.logTotal(); slog::info << presenter.reportMeans() << slog::endl; - } - catch (const std::exception& error) { + } catch (const std::exception& error) { slog::err << error.what() << slog::endl; return 1; - } - catch (...) { + } catch (...) { slog::err << "Unknown/internal exception happened." << slog::endl; return 1; } diff --git a/demos/pedestrian_tracker_demo/cpp/src/cnn.cpp b/demos/pedestrian_tracker_demo/cpp/src/cnn.cpp index 8c235428f12..c09729bca5e 100644 --- a/demos/pedestrian_tracker_demo/cpp/src/cnn.cpp +++ b/demos/pedestrian_tracker_demo/cpp/src/cnn.cpp @@ -2,21 +2,27 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "cnn.hpp" + #include #include +#include +#include #include +#include #include #include -#include + #include + #include +#include #include -#include "cnn.hpp" -BaseModel::BaseModel(const Config& config, - const ov::Core& core, - const std::string& device_name) : - config(config), core(core), device_name(device_name) {} +BaseModel::BaseModel(const Config& config, const ov::Core& core, const std::string& device_name) + : config(config), + core(core), + device_name(device_name) {} void BaseModel::Load() { slog::info << "Reading model: " << config.path_to_model << slog::endl; @@ -29,9 +35,7 @@ void BaseModel::Load() { ov::preprocess::PrePostProcessor ppp(model); input_layout = getLayoutFromShape(model->input().get_shape()); - ppp.input().tensor(). - set_element_type(ov::element::u8). - set_layout({ "NCHW" }); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NCHW"}); ppp.input().model().set_layout(input_layout); @@ -45,7 +49,7 @@ void BaseModel::Load() { input_shape = model->input().get_shape(); input_shape[ov::layout::batch_idx(input_layout)] = config.max_batch_size; output_shape = model->output().get_shape(); - ov::set_batch(model, { 1, int64(config.max_batch_size) }); + ov::set_batch(model, {1, static_cast(config.max_batch_size)}); compiled_model = core.compile_model(model, device_name); logCompiledModelInfo(compiled_model, config.path_to_model, device_name, modelType); @@ -55,9 +59,8 @@ void BaseModel::Load() { output_tensor = infer_request.get_output_tensor(); } -void BaseModel::InferBatch( - const std::vector& frames, - const std::function& fetch_results) const { +void BaseModel::InferBatch(const std::vector& frames, + const std::function& fetch_results) const { size_t num_imgs = frames.size(); input_tensor.set_shape(input_shape); for (size_t batch_i = 0; batch_i < num_imgs;) { @@ -65,31 +68,31 @@ void BaseModel::InferBatch( for (size_t b = 0; b < batch_size; ++b) { matToTensor(frames[batch_i + b], input_tensor, b); } - infer_request.set_input_tensor(ov::Tensor(input_tensor, { 0, 0, 0, 0 }, { batch_size, input_shape[ov::layout::channels_idx(input_layout)], - input_shape[ov::layout::height_idx(input_layout)], input_shape[ov::layout::width_idx(input_layout)] })); + infer_request.set_input_tensor(ov::Tensor(input_tensor, + {0, 0, 0, 0}, + {batch_size, + input_shape[ov::layout::channels_idx(input_layout)], + input_shape[ov::layout::height_idx(input_layout)], + input_shape[ov::layout::width_idx(input_layout)]})); infer_request.infer(); fetch_results(infer_request.get_output_tensor(), batch_size); batch_i += batch_size; } } -VectorCNN::VectorCNN(const Config& config, - const ov::Core& core, - const std::string& deviceName) +VectorCNN::VectorCNN(const Config& config, const ov::Core& core, const std::string& deviceName) : BaseModel(config, core, deviceName) { Load(); result_size = std::accumulate(std::next(output_shape.begin(), 1), output_shape.end(), 1, std::multiplies()); } -void VectorCNN::Compute(const cv::Mat& frame, - cv::Mat* vector, cv::Size out_shape) const { +void VectorCNN::Compute(const cv::Mat& frame, cv::Mat* vector, cv::Size out_shape) const { std::vector output; - Compute({ frame }, &output, out_shape); + Compute({frame}, &output, out_shape); *vector = output[0]; } -void VectorCNN::Compute(const std::vector& images, std::vector* vectors, - cv::Size out_shape) const { +void VectorCNN::Compute(const std::vector& images, std::vector* vectors, cv::Size out_shape) const { if (images.empty()) { return; } @@ -102,15 +105,16 @@ void VectorCNN::Compute(const std::vector& images, std::vector } cv::Mat out_tensor(tensor_sizes, CV_32F, tensor.data()); for (size_t b = 0; b < batch_size; b++) { - cv::Mat tensor_wrapper(out_tensor.size[1], 1, CV_32F, - reinterpret_cast((out_tensor.ptr(0) + b * out_tensor.size[1]))); + cv::Mat tensor_wrapper(out_tensor.size[1], + 1, + CV_32F, + reinterpret_cast((out_tensor.ptr(0) + b * out_tensor.size[1]))); vectors->emplace_back(); if (out_shape != cv::Size()) { - tensor_wrapper = tensor_wrapper.reshape(1, { out_shape.height, out_shape.width }); + tensor_wrapper = tensor_wrapper.reshape(1, {out_shape.height, out_shape.width}); } tensor_wrapper.copyTo(vectors->back()); } - }; InferBatch(images, results_fetcher); } diff --git a/demos/pedestrian_tracker_demo/cpp/src/distance.cpp b/demos/pedestrian_tracker_demo/cpp/src/distance.cpp index 0257a456686..74efe976345 100644 --- a/demos/pedestrian_tracker_demo/cpp/src/distance.cpp +++ b/demos/pedestrian_tracker_demo/cpp/src/distance.cpp @@ -3,16 +3,19 @@ // #include "distance.hpp" -#include "logging.hpp" + +#include +#include #include -CosDistance::CosDistance(const cv::Size &descriptor_size) - : descriptor_size_(descriptor_size) { +#include "logging.hpp" + +CosDistance::CosDistance(const cv::Size& descriptor_size) : descriptor_size_(descriptor_size) { PT_CHECK(descriptor_size.area() != 0); } -float CosDistance::Compute(const cv::Mat &descr1, const cv::Mat &descr2) { +float CosDistance::Compute(const cv::Mat& descr1, const cv::Mat& descr2) { PT_CHECK(!descr1.empty()); PT_CHECK(!descr2.empty()); PT_CHECK(descr1.size() == descriptor_size_); @@ -25,8 +28,7 @@ float CosDistance::Compute(const cv::Mat &descr1, const cv::Mat &descr2) { return 0.5f * static_cast(1.0 - xy / norm); } -std::vector CosDistance::Compute(const std::vector &descrs1, - const std::vector &descrs2) { +std::vector CosDistance::Compute(const std::vector& descrs1, const std::vector& descrs2) { PT_CHECK(descrs1.size() != 0); PT_CHECK(descrs1.size() == descrs2.size()); @@ -38,9 +40,7 @@ std::vector CosDistance::Compute(const std::vector &descrs1, return distances; } - -float MatchTemplateDistance::Compute(const cv::Mat &descr1, - const cv::Mat &descr2) { +float MatchTemplateDistance::Compute(const cv::Mat& descr1, const cv::Mat& descr2) { PT_CHECK(!descr1.empty() && !descr2.empty()); PT_CHECK_EQ(descr1.size(), descr2.size()); PT_CHECK_EQ(descr1.type(), descr2.type()); @@ -51,8 +51,8 @@ float MatchTemplateDistance::Compute(const cv::Mat &descr1, return scale_ * dist + offset_; } -std::vector MatchTemplateDistance::Compute(const std::vector &descrs1, - const std::vector &descrs2) { +std::vector MatchTemplateDistance::Compute(const std::vector& descrs1, + const std::vector& descrs2) { std::vector result; for (size_t i = 0; i < descrs1.size(); i++) { result.push_back(Compute(descrs1[i], descrs2[i])); diff --git a/demos/pedestrian_tracker_demo/cpp/src/tracker.cpp b/demos/pedestrian_tracker_demo/cpp/src/tracker.cpp index 1abdb0973f1..38e95b54bca 100644 --- a/demos/pedestrian_tracker_demo/cpp/src/tracker.cpp +++ b/demos/pedestrian_tracker_demo/cpp/src/tracker.cpp @@ -2,28 +2,38 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "tracker.hpp" + +#include +#include + +#include +#include +#include #include +#include #include -#include #include #include -#include #include -#include -#include +#include + +#include + +#include +#include #include "core.hpp" -#include "tracker.hpp" +#include "descriptor.hpp" +#include "distance.hpp" #include "utils.hpp" -#include namespace { cv::Point Center(const cv::Rect& rect) { - return cv::Point(static_cast(rect.x + rect.width * 0.5), - static_cast(rect.y + rect.height * 0.5)); + return cv::Point(static_cast(rect.x + rect.width * 0.5), static_cast(rect.y + rect.height * 0.5)); } -std::vector Centers(const TrackedObjects &detections) { +std::vector Centers(const TrackedObjects& detections) { std::vector centers(detections.size()); for (size_t i = 0; i < detections.size(); i++) { centers[i] = Center(detections[i].rect); @@ -77,22 +87,22 @@ std::vector GenRandomColors(int colors_num) { TrackerParams::TrackerParams() : min_track_duration(1000), - forget_delay(150), - aff_thr_fast(0.8f), - aff_thr_strong(0.75f), - shape_affinity_w(0.5f), - motion_affinity_w(0.2f), - time_affinity_w(0.0f), - min_det_conf(0.65f), - bbox_aspect_ratios_range(0.666f, 5.0f), - bbox_heights_range(40, 1000), - predict(25), - strong_affinity_thr(0.2805f), - reid_thr(0.61f), - drop_forgotten_tracks(true), - max_num_objects_in_track(300) {} - -void ValidateParams(const TrackerParams &p) { + forget_delay(150), + aff_thr_fast(0.8f), + aff_thr_strong(0.75f), + shape_affinity_w(0.5f), + motion_affinity_w(0.2f), + time_affinity_w(0.0f), + min_det_conf(0.65f), + bbox_aspect_ratios_range(0.666f, 5.0f), + bbox_heights_range(40, 1000), + predict(25), + strong_affinity_thr(0.2805f), + reid_thr(0.61f), + drop_forgotten_tracks(true), + max_num_objects_in_track(300) {} + +void ValidateParams(const TrackerParams& p) { PT_CHECK_GE(p.min_track_duration, static_cast(500)); PT_CHECK_LE(p.min_track_duration, static_cast(10000)); @@ -133,7 +143,6 @@ void ValidateParams(const TrackerParams &p) { PT_CHECK_GE(p.reid_thr, 0.0f); PT_CHECK_LE(p.reid_thr, 1.0f); - if (p.max_num_objects_in_track > 0) { int min_required_track_length = static_cast(p.forget_delay); PT_CHECK_GE(p.max_num_objects_in_track, min_required_track_length); @@ -141,75 +150,81 @@ void ValidateParams(const TrackerParams &p) { } } -PedestrianTracker::PedestrianTracker(const TrackerParams ¶ms) +PedestrianTracker::PedestrianTracker(const TrackerParams& params) : params_(params), - descriptor_strong_(nullptr), - distance_strong_(nullptr), - tracks_counter_(0), - frame_size_(0, 0), - prev_timestamp_(std::numeric_limits::max()) { - ValidateParams(params); - } + descriptor_strong_(nullptr), + distance_strong_(nullptr), + tracks_counter_(0), + frame_size_(0, 0), + prev_timestamp_(std::numeric_limits::max()) { + ValidateParams(params); +} // Pipeline parameters getter. -const TrackerParams &PedestrianTracker::params() const { return params_; } +const TrackerParams& PedestrianTracker::params() const { + return params_; +} // Descriptor fast getter. -const PedestrianTracker::Descriptor &PedestrianTracker::descriptor_fast() const { +const PedestrianTracker::Descriptor& PedestrianTracker::descriptor_fast() const { return descriptor_fast_; } // Descriptor fast setter. -void PedestrianTracker::set_descriptor_fast(const Descriptor &val) { +void PedestrianTracker::set_descriptor_fast(const Descriptor& val) { descriptor_fast_ = val; } // Descriptor strong getter. -const PedestrianTracker::Descriptor &PedestrianTracker::descriptor_strong() const { +const PedestrianTracker::Descriptor& PedestrianTracker::descriptor_strong() const { return descriptor_strong_; } // Descriptor strong setter. -void PedestrianTracker::set_descriptor_strong(const Descriptor &val) { +void PedestrianTracker::set_descriptor_strong(const Descriptor& val) { descriptor_strong_ = val; } // Distance fast getter. -const PedestrianTracker::Distance &PedestrianTracker::distance_fast() const { return distance_fast_; } +const PedestrianTracker::Distance& PedestrianTracker::distance_fast() const { + return distance_fast_; +} // Distance fast setter. -void PedestrianTracker::set_distance_fast(const Distance &val) { distance_fast_ = val; } +void PedestrianTracker::set_distance_fast(const Distance& val) { + distance_fast_ = val; +} // Distance strong getter. -const PedestrianTracker::Distance &PedestrianTracker::distance_strong() const { return distance_strong_; } +const PedestrianTracker::Distance& PedestrianTracker::distance_strong() const { + return distance_strong_; +} // Distance strong setter. -void PedestrianTracker::set_distance_strong(const Distance &val) { distance_strong_ = val; } +void PedestrianTracker::set_distance_strong(const Distance& val) { + distance_strong_ = val; +} // Returns all tracks including forgotten (lost too many frames ago). -const std::unordered_map & -PedestrianTracker::tracks() const { +const std::unordered_map& PedestrianTracker::tracks() const { return tracks_; } // Returns indexes of active tracks only. -const std::set &PedestrianTracker::active_track_ids() const { +const std::set& PedestrianTracker::active_track_ids() const { return active_track_ids_; } - // Returns detection log which is used for tracks saving. DetectionLog PedestrianTracker::GetDetectionLog(const bool valid_only) const { return ConvertTracksToDetectionLog(all_tracks(valid_only)); } -TrackedObjects PedestrianTracker::FilterDetections( - const TrackedObjects &detections) const { +TrackedObjects PedestrianTracker::FilterDetections(const TrackedObjects& detections) const { TrackedObjects filtered_detections; - for (const auto &det : detections) { + for (const auto& det : detections) { float aspect_ratio = static_cast(det.rect.height) / det.rect.width; - if (det.confidence > params_.min_det_conf && - IsInRange(aspect_ratio, params_.bbox_aspect_ratios_range) && + if (det.confidence > params_.min_det_conf && IsInRange(aspect_ratio, params_.bbox_aspect_ratios_range) && IsInRange(static_cast(det.rect.height), params_.bbox_heights_range)) { filtered_detections.emplace_back(det); } @@ -217,11 +232,13 @@ TrackedObjects PedestrianTracker::FilterDetections( return filtered_detections; } -void PedestrianTracker::SolveAssignmentProblem( - const std::set &track_ids, const TrackedObjects &detections, - const std::vector &descriptors, float thr, - std::set *unmatched_tracks, std::set *unmatched_detections, - std::set> *matches) { +void PedestrianTracker::SolveAssignmentProblem(const std::set& track_ids, + const TrackedObjects& detections, + const std::vector& descriptors, + float thr, + std::set* unmatched_tracks, + std::set* unmatched_detections, + std::set>* matches) { PT_CHECK(unmatched_tracks); PT_CHECK(unmatched_detections); unmatched_tracks->clear(); @@ -234,8 +251,7 @@ void PedestrianTracker::SolveAssignmentProblem( matches->clear(); cv::Mat dissimilarity; - ComputeDissimilarityMatrix(track_ids, detections, descriptors, - &dissimilarity); + ComputeDissimilarityMatrix(track_ids, detections, descriptors, &dissimilarity); std::vector res = KuhnMunkres().Solve(dissimilarity); @@ -259,14 +275,14 @@ const ObjectTracks PedestrianTracker::all_tracks(bool valid_only) const { size_t counter = 0; std::set sorted_ids; - for (const auto &pair : tracks()) { + for (const auto& pair : tracks()) { sorted_ids.emplace(pair.first); } for (size_t id : sorted_ids) { if (!valid_only || IsTrackValid(id)) { TrackedObjects filtered_objects; - for (const auto &object : tracks().at(id).objects) { + for (const auto& object : tracks().at(id).objects) { filtered_objects.emplace_back(object); filtered_objects.back().object_id = counter; } @@ -276,9 +292,8 @@ const ObjectTracks PedestrianTracker::all_tracks(bool valid_only) const { return all_objects; } -cv::Rect PedestrianTracker::PredictRect(size_t id, size_t k, - size_t s) const { - const auto &track = tracks_.at(id); +cv::Rect PedestrianTracker::PredictRect(size_t id, size_t k, size_t s) const { + const auto& track = tracks_.at(id); PT_CHECK(!track.empty()); if (track.size() == 1) { @@ -318,13 +333,12 @@ cv::Rect PedestrianTracker::PredictRect(size_t id, size_t k, static_cast(height)); } - bool PedestrianTracker::EraseTrackIfBBoxIsOutOfFrame(size_t track_id) { - if (tracks_.find(track_id) == tracks_.end()) return true; + if (tracks_.find(track_id) == tracks_.end()) + return true; auto c = Center(tracks_.at(track_id).predicted_rect); if (!prev_frame_size_.empty() && - (c.x < 0 || c.y < 0 || c.x > prev_frame_size_.width || - c.y > prev_frame_size_.height)) { + (c.x < 0 || c.y < 0 || c.x > prev_frame_size_.width || c.y > prev_frame_size_.height)) { tracks_.at(track_id).lost = params_.forget_delay + 1; for (auto id : active_track_ids()) { size_t min_id = std::min(id, track_id); @@ -337,9 +351,9 @@ bool PedestrianTracker::EraseTrackIfBBoxIsOutOfFrame(size_t track_id) { return false; } -bool PedestrianTracker::EraseTrackIfItWasLostTooManyFramesAgo( - size_t track_id) { - if (tracks_.find(track_id) == tracks_.end()) return true; +bool PedestrianTracker::EraseTrackIfItWasLostTooManyFramesAgo(size_t track_id) { + if (tracks_.find(track_id) == tracks_.end()) + return true; if (tracks_.at(track_id).lost > params_.forget_delay) { for (auto id : active_track_ids()) { size_t min_id = std::min(id, track_id); @@ -353,27 +367,23 @@ bool PedestrianTracker::EraseTrackIfItWasLostTooManyFramesAgo( return false; } -bool PedestrianTracker::UpdateLostTrackAndEraseIfItsNeeded( - size_t track_id) { +bool PedestrianTracker::UpdateLostTrackAndEraseIfItsNeeded(size_t track_id) { tracks_.at(track_id).lost++; - tracks_.at(track_id).predicted_rect = - PredictRect(track_id, params().predict, tracks_.at(track_id).lost); + tracks_.at(track_id).predicted_rect = PredictRect(track_id, params().predict, tracks_.at(track_id).lost); bool erased = EraseTrackIfBBoxIsOutOfFrame(track_id); - if (!erased) erased = EraseTrackIfItWasLostTooManyFramesAgo(track_id); + if (!erased) + erased = EraseTrackIfItWasLostTooManyFramesAgo(track_id); return erased; } -void PedestrianTracker::UpdateLostTracks( - const std::set &track_ids) { +void PedestrianTracker::UpdateLostTracks(const std::set& track_ids) { for (auto track_id : track_ids) { UpdateLostTrackAndEraseIfItsNeeded(track_id); } } -void PedestrianTracker::Process(const cv::Mat &frame, - const TrackedObjects &input_detections, - uint64_t timestamp) { +void PedestrianTracker::Process(const cv::Mat& frame, const TrackedObjects& input_detections, uint64_t timestamp) { if (prev_timestamp_ != std::numeric_limits::max()) PT_CHECK_LT(prev_timestamp_, timestamp); @@ -384,7 +394,7 @@ void PedestrianTracker::Process(const cv::Mat &frame, } TrackedObjects detections = FilterDetections(input_detections); - for (auto &obj : detections) { + for (auto& obj : detections) { obj.timestamp = timestamp; } @@ -397,20 +407,22 @@ void PedestrianTracker::Process(const cv::Mat &frame, std::set unmatched_tracks, unmatched_detections; std::set> matches; - SolveAssignmentProblem(active_tracks, detections, descriptors_fast, - params_.aff_thr_fast, &unmatched_tracks, - &unmatched_detections, &matches); + SolveAssignmentProblem(active_tracks, + detections, + descriptors_fast, + params_.aff_thr_fast, + &unmatched_tracks, + &unmatched_detections, + &matches); std::map> is_matching_to_track; if (distance_strong_) { - std::vector> reid_track_and_det_ids = - GetTrackToDetectionIds(matches); - is_matching_to_track = StrongMatching( - frame, detections, reid_track_and_det_ids); + std::vector> reid_track_and_det_ids = GetTrackToDetectionIds(matches); + is_matching_to_track = StrongMatching(frame, detections, reid_track_and_det_ids); } - for (const auto &match : matches) { + for (const auto& match : matches) { size_t track_id = std::get<0>(match); size_t det_id = std::get<1>(match); float conf = std::get<2>(match); @@ -419,21 +431,22 @@ void PedestrianTracker::Process(const cv::Mat &frame, last_det.rect = tracks_.at(track_id).predicted_rect; if (conf > params_.aff_thr_fast) { - AppendToTrack(frame, track_id, detections[det_id], - descriptors_fast[det_id], cv::Mat()); + AppendToTrack(frame, track_id, detections[det_id], descriptors_fast[det_id], cv::Mat()); unmatched_detections.erase(det_id); } else { if (conf > params_.strong_affinity_thr) { if (distance_strong_ && is_matching_to_track[track_id].first) { - AppendToTrack(frame, track_id, detections[det_id], + AppendToTrack(frame, + track_id, + detections[det_id], descriptors_fast[det_id], is_matching_to_track[track_id].second.clone()); } else { if (UpdateLostTrackAndEraseIfItsNeeded(track_id)) { - AddNewTrack(frame, detections[det_id], descriptors_fast[det_id], - distance_strong_ - ? is_matching_to_track[track_id].second.clone() - : cv::Mat()); + AddNewTrack(frame, + detections[det_id], + descriptors_fast[det_id], + distance_strong_ ? is_matching_to_track[track_id].second.clone() : cv::Mat()); } } @@ -456,7 +469,8 @@ void PedestrianTracker::Process(const cv::Mat &frame, } prev_frame_size_ = frame.size(); - if (params_.drop_forgotten_tracks) DropForgottenTracks(); + if (params_.drop_forgotten_tracks) + DropForgottenTracks(); tracks_dists_.clear(); prev_timestamp_ = timestamp; @@ -468,14 +482,13 @@ void PedestrianTracker::DropForgottenTracks() { size_t max_id = 0; if (!active_track_ids_.empty()) - max_id = - *std::max_element(active_track_ids_.begin(), active_track_ids_.end()); + max_id = *std::max_element(active_track_ids_.begin(), active_track_ids_.end()); const size_t kMaxTrackID = 10000; bool reassign_id = max_id > kMaxTrackID; size_t counter = 0; - for (const auto &pair : tracks_) { + for (const auto& pair : tracks_) { if (!IsTrackForgotten(pair.first)) { new_tracks.emplace(reassign_id ? counter : pair.first, pair.second); new_active_tracks.emplace(reassign_id ? counter : pair.first); @@ -488,41 +501,35 @@ void PedestrianTracker::DropForgottenTracks() { tracks_counter_ = reassign_id ? counter : tracks_counter_; } -float PedestrianTracker::ShapeAffinity(float weight, const cv::Rect &trk, - const cv::Rect &det) { +float PedestrianTracker::ShapeAffinity(float weight, const cv::Rect& trk, const cv::Rect& det) { float w_dist = static_cast(std::abs(trk.width - det.width) / (trk.width + det.width)); float h_dist = static_cast(std::abs(trk.height - det.height) / (trk.height + det.height)); return static_cast(exp(static_cast(-weight * (w_dist + h_dist)))); } -float PedestrianTracker::MotionAffinity(float weight, const cv::Rect &trk, - const cv::Rect &det) { - float x_dist = static_cast(trk.x - det.x) * (trk.x - det.x) / - (det.width * det.width); - float y_dist = static_cast(trk.y - det.y) * (trk.y - det.y) / - (det.height * det.height); +float PedestrianTracker::MotionAffinity(float weight, const cv::Rect& trk, const cv::Rect& det) { + float x_dist = static_cast(trk.x - det.x) * (trk.x - det.x) / (det.width * det.width); + float y_dist = static_cast(trk.y - det.y) * (trk.y - det.y) / (det.height * det.height); return static_cast(exp(static_cast(-weight * (x_dist + y_dist)))); } -float PedestrianTracker::TimeAffinity(float weight, const float &trk_time, - const float &det_time) { +float PedestrianTracker::TimeAffinity(float weight, const float& trk_time, const float& det_time) { return static_cast(exp(static_cast(-weight * std::fabs(trk_time - det_time)))); } -void PedestrianTracker::ComputeFastDesciptors( - const cv::Mat &frame, const TrackedObjects &detections, - std::vector *descriptors) { +void PedestrianTracker::ComputeFastDesciptors(const cv::Mat& frame, + const TrackedObjects& detections, + std::vector* descriptors) { *descriptors = std::vector(detections.size(), cv::Mat()); for (size_t i = 0; i < detections.size(); i++) { - descriptor_fast_->Compute(frame(detections[i].rect).clone(), - &((*descriptors)[i])); + descriptor_fast_->Compute(frame(detections[i].rect).clone(), &((*descriptors)[i])); } } -void PedestrianTracker::ComputeDissimilarityMatrix( - const std::set &active_tracks, const TrackedObjects &detections, - const std::vector &descriptors_fast, - cv::Mat *dissimilarity_matrix) { +void PedestrianTracker::ComputeDissimilarityMatrix(const std::set& active_tracks, + const TrackedObjects& detections, + const std::vector& descriptors_fast, + cv::Mat* dissimilarity_matrix) { cv::Mat am(active_tracks.size(), detections.size(), CV_32F, cv::Scalar(0)); size_t i = 0; for (auto id : active_tracks) { @@ -530,19 +537,17 @@ void PedestrianTracker::ComputeDissimilarityMatrix( for (size_t j = 0; j < descriptors_fast.size(); j++) { auto last_det = tracks_.at(id).objects.back(); last_det.rect = tracks_.at(id).predicted_rect; - ptr[j] = AffinityFast(tracks_.at(id).descriptor_fast, last_det, - descriptors_fast[j], detections[j]); + ptr[j] = AffinityFast(tracks_.at(id).descriptor_fast, last_det, descriptors_fast[j], detections[j]); } i++; } *dissimilarity_matrix = 1.0 - am; } -std::vector PedestrianTracker::ComputeDistances( - const cv::Mat &frame, - const TrackedObjects& detections, - const std::vector> &track_and_det_ids, - std::map *det_id_to_descriptor) { +std::vector PedestrianTracker::ComputeDistances(const cv::Mat& frame, + const TrackedObjects& detections, + const std::vector>& track_and_det_ids, + std::map* det_id_to_descriptor) { std::map det_to_batch_ids; std::map track_to_batch_ids; @@ -572,8 +577,7 @@ std::vector PedestrianTracker::ComputeDistances( size_t det_id = track_and_det_ids[i].second; if (tracks_.at(track_id).descriptor_strong.empty()) { - tracks_.at(track_id).descriptor_strong = - descriptors[track_to_batch_ids[track_id]].clone(); + tracks_.at(track_id).descriptor_strong = descriptors[track_to_batch_ids[track_id]].clone(); } (*det_id_to_descriptor)[det_id] = descriptors[det_to_batch_ids[det_id]]; @@ -581,18 +585,16 @@ std::vector PedestrianTracker::ComputeDistances( descriptors2.push_back(tracks_.at(track_id).descriptor_strong); } - std::vector distances = - distance_strong_->Compute(descriptors1, descriptors2); + std::vector distances = distance_strong_->Compute(descriptors1, descriptors2); return distances; } -std::vector> -PedestrianTracker::GetTrackToDetectionIds( - const std::set> &matches) { +std::vector> PedestrianTracker::GetTrackToDetectionIds( + const std::set>& matches) { std::vector> track_and_det_ids; - for (const auto &match : matches) { + for (const auto& match : matches) { size_t track_id = std::get<0>(match); size_t det_id = std::get<1>(match); float conf = std::get<2>(match); @@ -603,11 +605,10 @@ PedestrianTracker::GetTrackToDetectionIds( return track_and_det_ids; } -std::map> -PedestrianTracker::StrongMatching( - const cv::Mat &frame, +std::map> PedestrianTracker::StrongMatching( + const cv::Mat& frame, const TrackedObjects& detections, - const std::vector> &track_and_det_ids) { + const std::vector>& track_and_det_ids) { std::map> is_matching; if (track_and_det_ids.size() == 0) { @@ -615,9 +616,7 @@ PedestrianTracker::StrongMatching( } std::map det_ids_to_descriptors; - std::vector distances = - ComputeDistances(frame, detections, - track_and_det_ids, &det_ids_to_descriptors); + std::vector distances = ComputeDistances(frame, detections, track_and_det_ids, &det_ids_to_descriptors); for (size_t i = 0; i < track_and_det_ids.size(); i++) { auto reid_affinity = 1.0 - distances[i]; @@ -633,27 +632,26 @@ PedestrianTracker::StrongMatching( float affinity = static_cast(reid_affinity) * Affinity(last_det, detection); - bool is_detection_matching = - reid_affinity > params_.reid_thr && affinity > params_.aff_thr_strong; + bool is_detection_matching = reid_affinity > params_.reid_thr && affinity > params_.aff_thr_strong; - is_matching[track_id] = std::pair( - is_detection_matching, det_ids_to_descriptors[det_id]); + is_matching[track_id] = std::pair(is_detection_matching, det_ids_to_descriptors[det_id]); } return is_matching; } -void PedestrianTracker::AddNewTracks( - const cv::Mat &frame, const TrackedObjects &detections, - const std::vector &descriptors_fast) { +void PedestrianTracker::AddNewTracks(const cv::Mat& frame, + const TrackedObjects& detections, + const std::vector& descriptors_fast) { PT_CHECK(detections.size() == descriptors_fast.size()); for (size_t i = 0; i < detections.size(); i++) { AddNewTrack(frame, detections[i], descriptors_fast[i]); } } -void PedestrianTracker::AddNewTracks( - const cv::Mat &frame, const TrackedObjects &detections, - const std::vector &descriptors_fast, const std::set &ids) { +void PedestrianTracker::AddNewTracks(const cv::Mat& frame, + const TrackedObjects& detections, + const std::vector& descriptors_fast, + const std::set& ids) { PT_CHECK(detections.size() == descriptors_fast.size()); for (size_t i : ids) { PT_CHECK(i < detections.size()); @@ -661,37 +659,35 @@ void PedestrianTracker::AddNewTracks( } } -void PedestrianTracker::AddNewTrack(const cv::Mat &frame, - const TrackedObject &detection, - const cv::Mat &descriptor_fast, - const cv::Mat &descriptor_strong) { +void PedestrianTracker::AddNewTrack(const cv::Mat& frame, + const TrackedObject& detection, + const cv::Mat& descriptor_fast, + const cv::Mat& descriptor_strong) { auto detection_with_id = detection; detection_with_id.object_id = tracks_counter_; tracks_.emplace(std::pair( - tracks_counter_, - Track({detection_with_id}, frame(detection.rect).clone(), - descriptor_fast.clone(), descriptor_strong.clone()))); + tracks_counter_, + Track({detection_with_id}, frame(detection.rect).clone(), descriptor_fast.clone(), descriptor_strong.clone()))); for (size_t id : active_track_ids_) { - tracks_dists_.emplace(std::pair(id, tracks_counter_), - std::numeric_limits::max()); + tracks_dists_.emplace(std::pair(id, tracks_counter_), std::numeric_limits::max()); } active_track_ids_.insert(tracks_counter_); tracks_counter_++; } -void PedestrianTracker::AppendToTrack(const cv::Mat &frame, +void PedestrianTracker::AppendToTrack(const cv::Mat& frame, size_t track_id, - const TrackedObject &detection, - const cv::Mat &descriptor_fast, - const cv::Mat &descriptor_strong) { + const TrackedObject& detection, + const cv::Mat& descriptor_fast, + const cv::Mat& descriptor_strong) { PT_CHECK(!IsTrackForgotten(track_id)); auto detection_with_id = detection; detection_with_id.object_id = track_id; - auto &cur_track = tracks_.at(track_id); + auto& cur_track = tracks_.at(track_id); cur_track.objects.emplace_back(detection_with_id); cur_track.predicted_rect = detection.rect; cur_track.lost = 0; @@ -702,45 +698,42 @@ void PedestrianTracker::AppendToTrack(const cv::Mat &frame, if (cur_track.descriptor_strong.empty()) { cur_track.descriptor_strong = descriptor_strong.clone(); } else if (!descriptor_strong.empty()) { - cur_track.descriptor_strong = - 0.5 * (descriptor_strong + cur_track.descriptor_strong); + cur_track.descriptor_strong = 0.5 * (descriptor_strong + cur_track.descriptor_strong); } - if (params_.max_num_objects_in_track > 0) { - while (cur_track.size() > - static_cast(params_.max_num_objects_in_track)) { + while (cur_track.size() > static_cast(params_.max_num_objects_in_track)) { cur_track.objects.erase(cur_track.objects.begin()); } } } -float PedestrianTracker::AffinityFast(const cv::Mat &descriptor1, - const TrackedObject &obj1, - const cv::Mat &descriptor2, - const TrackedObject &obj2) { +float PedestrianTracker::AffinityFast(const cv::Mat& descriptor1, + const TrackedObject& obj1, + const cv::Mat& descriptor2, + const TrackedObject& obj2) { const float eps = 1e-6f; float shp_aff = ShapeAffinity(params_.shape_affinity_w, obj1.rect, obj2.rect); - if (shp_aff < eps) return 0.0f; + if (shp_aff < eps) + return 0.0f; - float mot_aff = - MotionAffinity(params_.motion_affinity_w, obj1.rect, obj2.rect); - if (mot_aff < eps) return 0.0f; + float mot_aff = MotionAffinity(params_.motion_affinity_w, obj1.rect, obj2.rect); + if (mot_aff < eps) + return 0.0f; float time_aff = TimeAffinity(params_.time_affinity_w, static_cast(obj1.frame_idx), static_cast(obj2.frame_idx)); - if (time_aff < eps) return 0.0f; + if (time_aff < eps) + return 0.0f; float app_aff = 1.0f - distance_fast_->Compute(descriptor1, descriptor2); return shp_aff * mot_aff * app_aff * time_aff; } -float PedestrianTracker::Affinity(const TrackedObject &obj1, - const TrackedObject &obj2) { +float PedestrianTracker::Affinity(const TrackedObject& obj1, const TrackedObject& obj2) { float shp_aff = ShapeAffinity(params_.shape_affinity_w, obj1.rect, obj2.rect); - float mot_aff = - MotionAffinity(params_.motion_affinity_w, obj1.rect, obj2.rect); + float mot_aff = MotionAffinity(params_.motion_affinity_w, obj1.rect, obj2.rect); float time_aff = TimeAffinity(params_.time_affinity_w, static_cast(obj1.frame_idx), static_cast(obj2.frame_idx)); return shp_aff * mot_aff * time_aff; @@ -748,7 +741,7 @@ float PedestrianTracker::Affinity(const TrackedObject &obj1, bool PedestrianTracker::IsTrackValid(size_t id) const { const auto& track = tracks_.at(id); - const auto &objects = track.objects; + const auto& objects = track.objects; if (objects.empty()) { return false; } @@ -762,12 +755,11 @@ bool PedestrianTracker::IsTrackForgotten(size_t id) const { return IsTrackForgotten(tracks_.at(id)); } -bool PedestrianTracker::IsTrackForgotten(const Track &track) const { +bool PedestrianTracker::IsTrackForgotten(const Track& track) const { return (track.lost > params_.forget_delay); } -std::unordered_map> -PedestrianTracker::GetActiveTracks() const { +std::unordered_map> PedestrianTracker::GetActiveTracks() const { std::unordered_map> active_tracks; for (size_t idx : active_track_ids()) { auto track = tracks().at(idx); @@ -789,7 +781,7 @@ TrackedObjects PedestrianTracker::TrackedDetections() const { return detections; } -cv::Mat PedestrianTracker::DrawActiveTracks(const cv::Mat &frame) { +cv::Mat PedestrianTracker::DrawActiveTracks(const cv::Mat& frame) { cv::Mat out_frame = frame.clone(); if (colors_.empty()) { @@ -804,12 +796,16 @@ cv::Mat PedestrianTracker::DrawActiveTracks(const cv::Mat &frame) { DrawPolyline(centers, colors_[idx % colors_.size()], &out_frame); std::stringstream ss; ss << idx; - putHighlightedText(out_frame, ss.str(), centers.back(), cv::FONT_HERSHEY_SCRIPT_COMPLEX, 0.95, - colors_[idx % colors_.size()], 2); + putHighlightedText(out_frame, + ss.str(), + centers.back(), + cv::FONT_HERSHEY_SCRIPT_COMPLEX, + 0.95, + colors_[idx % colors_.size()], + 2); auto track = tracks().at(idx); if (track.lost) { - cv::line(out_frame, active_track.second.back(), - Center(track.predicted_rect), cv::Scalar(0, 0, 0), 4); + cv::line(out_frame, active_track.second.back(), Center(track.predicted_rect), cv::Scalar(0, 0, 0), 4); } } diff --git a/demos/pedestrian_tracker_demo/cpp/src/utils.cpp b/demos/pedestrian_tracker_demo/cpp/src/utils.cpp index 87b90519551..e125683feb3 100644 --- a/demos/pedestrian_tracker_demo/cpp/src/utils.cpp +++ b/demos/pedestrian_tracker_demo/cpp/src/utils.cpp @@ -2,41 +2,42 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "utils.hpp" + +#include + #include #include -#include -#include #include #include + #include + #include -#include "utils.hpp" + +#include "core.hpp" +#include "logging.hpp" namespace { template -void SaveDetectionLogToStream(StreamType& stream, const EndlType& endl, - const DetectionLog& log) { +void SaveDetectionLogToStream(StreamType& stream, const EndlType& endl, const DetectionLog& log) { for (const auto& entry : log) { - std::vector objects(entry.objects.begin(), - entry.objects.end()); - std::sort(objects.begin(), objects.end(), - [](const TrackedObject& a, - const TrackedObject& b) - { return a.object_id < b.object_id; }); + std::vector objects(entry.objects.begin(), entry.objects.end()); + std::sort(objects.begin(), objects.end(), [](const TrackedObject& a, const TrackedObject& b) { + return a.object_id < b.object_id; + }); for (const auto& object : objects) { auto frame_idx_to_save = entry.frame_idx; stream << frame_idx_to_save << ','; - stream << object.object_id << ',' - << object.rect.x << ',' << object.rect.y << ',' - << object.rect.width << ',' << object.rect.height; + stream << object.object_id << ',' << object.rect.x << ',' << object.rect.y << ',' << object.rect.width + << ',' << object.rect.height; stream << endl; } } } } // anonymous namespace -void DrawPolyline(const std::vector& polyline, - const cv::Scalar& color, cv::Mat* image, int lwd) { +void DrawPolyline(const std::vector& polyline, const cv::Scalar& color, cv::Mat* image, int lwd) { PT_CHECK(image); PT_CHECK(!image->empty()); PT_CHECK_EQ(image->type(), CV_8UC3); @@ -48,8 +49,7 @@ void DrawPolyline(const std::vector& polyline, } } -void SaveDetectionLogToTrajFile(const std::string& path, - const DetectionLog& log) { +void SaveDetectionLogToTrajFile(const std::string& path, const DetectionLog& log) { std::ofstream file(path.c_str()); PT_CHECK(file.is_open()); SaveDetectionLogToStream(file, '\n', log); From 0d99138ed6c640b6d704fe0347958f6204d6df81 Mon Sep 17 00:00:00 2001 From: ivikhrev Date: Thu, 31 Mar 2022 19:41:17 +0300 Subject: [PATCH 3/6] cmath include ordering --- demos/common/cpp/models/src/detection_model_faceboxes.cpp | 3 +-- demos/common/cpp/models/src/detection_model_retinaface.cpp | 2 +- demos/common/cpp/models/src/detection_model_retinaface_pt.cpp | 2 +- demos/common/cpp/models/src/detection_model_yolo.cpp | 3 +-- .../common/cpp/models/src/hpe_model_associative_embedding.cpp | 2 +- demos/common/cpp/models/src/hpe_model_openpose.cpp | 3 +-- demos/common/cpp/models/src/openpose_decoder.cpp | 3 +-- demos/object_detection_demo/cpp/main.cpp | 2 +- demos/pedestrian_tracker_demo/cpp/main.cpp | 2 +- demos/pedestrian_tracker_demo/cpp/src/distance.cpp | 4 ++-- demos/pedestrian_tracker_demo/cpp/src/tracker.cpp | 2 +- 11 files changed, 12 insertions(+), 16 deletions(-) diff --git a/demos/common/cpp/models/src/detection_model_faceboxes.cpp b/demos/common/cpp/models/src/detection_model_faceboxes.cpp index f4c29a7f7c5..28d81ed26f6 100644 --- a/demos/common/cpp/models/src/detection_model_faceboxes.cpp +++ b/demos/common/cpp/models/src/detection_model_faceboxes.cpp @@ -16,9 +16,8 @@ #include "models/detection_model_faceboxes.h" -#include - #include +#include #include #include diff --git a/demos/common/cpp/models/src/detection_model_retinaface.cpp b/demos/common/cpp/models/src/detection_model_retinaface.cpp index fa6716ac2f7..ff6e36f1031 100644 --- a/demos/common/cpp/models/src/detection_model_retinaface.cpp +++ b/demos/common/cpp/models/src/detection_model_retinaface.cpp @@ -16,10 +16,10 @@ #include "models/detection_model_retinaface.h" -#include #include #include +#include #include #include diff --git a/demos/common/cpp/models/src/detection_model_retinaface_pt.cpp b/demos/common/cpp/models/src/detection_model_retinaface_pt.cpp index 0a8bd1c52fd..bb8321e103c 100644 --- a/demos/common/cpp/models/src/detection_model_retinaface_pt.cpp +++ b/demos/common/cpp/models/src/detection_model_retinaface_pt.cpp @@ -16,10 +16,10 @@ #include "models/detection_model_retinaface_pt.h" -#include #include #include +#include #include #include #include diff --git a/demos/common/cpp/models/src/detection_model_yolo.cpp b/demos/common/cpp/models/src/detection_model_yolo.cpp index 2c28dc4a71e..6b36d8048b9 100644 --- a/demos/common/cpp/models/src/detection_model_yolo.cpp +++ b/demos/common/cpp/models/src/detection_model_yolo.cpp @@ -16,9 +16,8 @@ #include "models/detection_model_yolo.h" -#include - #include +#include #include #include #include diff --git a/demos/common/cpp/models/src/hpe_model_associative_embedding.cpp b/demos/common/cpp/models/src/hpe_model_associative_embedding.cpp index 91976157883..2a8562f01d2 100644 --- a/demos/common/cpp/models/src/hpe_model_associative_embedding.cpp +++ b/demos/common/cpp/models/src/hpe_model_associative_embedding.cpp @@ -16,10 +16,10 @@ #include "models/hpe_model_associative_embedding.h" -#include #include #include +#include #include #include #include diff --git a/demos/common/cpp/models/src/hpe_model_openpose.cpp b/demos/common/cpp/models/src/hpe_model_openpose.cpp index a0fc5a2cac2..179fb0a2953 100644 --- a/demos/common/cpp/models/src/hpe_model_openpose.cpp +++ b/demos/common/cpp/models/src/hpe_model_openpose.cpp @@ -16,9 +16,8 @@ #include "models/hpe_model_openpose.h" -#include - #include +#include #include #include #include diff --git a/demos/common/cpp/models/src/openpose_decoder.cpp b/demos/common/cpp/models/src/openpose_decoder.cpp index 723495f73a0..6d516070363 100644 --- a/demos/common/cpp/models/src/openpose_decoder.cpp +++ b/demos/common/cpp/models/src/openpose_decoder.cpp @@ -16,9 +16,8 @@ #include "models/openpose_decoder.h" -#include - #include +#include #include #include #include diff --git a/demos/object_detection_demo/cpp/main.cpp b/demos/object_detection_demo/cpp/main.cpp index 42997a2352e..645425f8f59 100644 --- a/demos/object_detection_demo/cpp/main.cpp +++ b/demos/object_detection_demo/cpp/main.cpp @@ -14,11 +14,11 @@ // limitations under the License. */ -#include #include #include #include +#include #include #include #include diff --git a/demos/pedestrian_tracker_demo/cpp/main.cpp b/demos/pedestrian_tracker_demo/cpp/main.cpp index a417301071f..7384ceb7e84 100644 --- a/demos/pedestrian_tracker_demo/cpp/main.cpp +++ b/demos/pedestrian_tracker_demo/cpp/main.cpp @@ -6,8 +6,8 @@ #include #include -#include #include +#include #include #include #include diff --git a/demos/pedestrian_tracker_demo/cpp/src/distance.cpp b/demos/pedestrian_tracker_demo/cpp/src/distance.cpp index 74efe976345..98a5c79c6f6 100644 --- a/demos/pedestrian_tracker_demo/cpp/src/distance.cpp +++ b/demos/pedestrian_tracker_demo/cpp/src/distance.cpp @@ -1,12 +1,12 @@ -// Copyright (C) 2018-2019 Intel Corporation +// Copyright (C) 2018-2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "distance.hpp" -#include #include +#include #include #include "logging.hpp" diff --git a/demos/pedestrian_tracker_demo/cpp/src/tracker.cpp b/demos/pedestrian_tracker_demo/cpp/src/tracker.cpp index 38e95b54bca..0367ee87cc9 100644 --- a/demos/pedestrian_tracker_demo/cpp/src/tracker.cpp +++ b/demos/pedestrian_tracker_demo/cpp/src/tracker.cpp @@ -4,10 +4,10 @@ #include "tracker.hpp" -#include #include #include +#include #include #include #include From 587a203e1d32d531836e79c6d627d766f4df6518 Mon Sep 17 00:00:00 2001 From: ivikhrev Date: Thu, 31 Mar 2022 20:08:12 +0300 Subject: [PATCH 4/6] rename src for jpeg_restoration_model; fix includes --- .../cpp/models/include/models/deblurring_model.h | 2 +- .../include/models/detection_model_retinaface_pt.h | 2 +- .../include/models/hpe_model_associative_embedding.h | 6 +++++- .../models/include/models/jpeg_restoration_model.h | 11 +++++++++-- .../models/include/models/super_resolution_model.h | 2 +- ...peg_restoration.cpp => jpeg_restoration_model.cpp} | 0 6 files changed, 17 insertions(+), 6 deletions(-) rename demos/common/cpp/models/src/{jpeg_restoration.cpp => jpeg_restoration_model.cpp} (100%) diff --git a/demos/common/cpp/models/include/models/deblurring_model.h b/demos/common/cpp/models/include/models/deblurring_model.h index 860390d1504..33f55422deb 100644 --- a/demos/common/cpp/models/include/models/deblurring_model.h +++ b/demos/common/cpp/models/include/models/deblurring_model.h @@ -20,7 +20,7 @@ #include #include -#include +#include #include "models/image_model.h" diff --git a/demos/common/cpp/models/include/models/detection_model_retinaface_pt.h b/demos/common/cpp/models/include/models/detection_model_retinaface_pt.h index 5e9d402122d..79b16f8db59 100644 --- a/demos/common/cpp/models/include/models/detection_model_retinaface_pt.h +++ b/demos/common/cpp/models/include/models/detection_model_retinaface_pt.h @@ -21,7 +21,7 @@ #include #include -#include +#include #include "models/detection_model.h" diff --git a/demos/common/cpp/models/include/models/hpe_model_associative_embedding.h b/demos/common/cpp/models/include/models/hpe_model_associative_embedding.h index 6780225caf8..60561d4a336 100644 --- a/demos/common/cpp/models/include/models/hpe_model_associative_embedding.h +++ b/demos/common/cpp/models/include/models/hpe_model_associative_embedding.h @@ -20,12 +20,16 @@ #include #include -#include #include #include "models/image_model.h" +namespace ov { +class InferRequest; +class Model; +class Shape; +} // namespace ov struct HumanPose; struct InferenceResult; struct InputData; diff --git a/demos/common/cpp/models/include/models/jpeg_restoration_model.h b/demos/common/cpp/models/include/models/jpeg_restoration_model.h index 08794578822..8b22ac2c090 100644 --- a/demos/common/cpp/models/include/models/jpeg_restoration_model.h +++ b/demos/common/cpp/models/include/models/jpeg_restoration_model.h @@ -19,11 +19,18 @@ #include #include -#include +#include #include "models/image_model.h" -#include "models/results.h" +namespace ov { +class InferRequest; +class Model; +} // namespace ov +struct InferenceResult; +struct InputData; +struct InternalModelData; +struct ResultBase; class JPEGRestorationModel : public ImageModel { public: /// Constructor diff --git a/demos/common/cpp/models/include/models/super_resolution_model.h b/demos/common/cpp/models/include/models/super_resolution_model.h index ac9805e4cb8..773b5c33e4b 100644 --- a/demos/common/cpp/models/include/models/super_resolution_model.h +++ b/demos/common/cpp/models/include/models/super_resolution_model.h @@ -18,7 +18,7 @@ #include #include -#include +#include #include "models/image_model.h" diff --git a/demos/common/cpp/models/src/jpeg_restoration.cpp b/demos/common/cpp/models/src/jpeg_restoration_model.cpp similarity index 100% rename from demos/common/cpp/models/src/jpeg_restoration.cpp rename to demos/common/cpp/models/src/jpeg_restoration_model.cpp From 8b040788886f43a3779dffcf8872e5917a640820 Mon Sep 17 00:00:00 2001 From: Ivan Vikhrev Date: Thu, 31 Mar 2022 20:26:52 +0300 Subject: [PATCH 5/6] Apply suggestions from code review Co-authored-by: Zlobin Vladimir --- .../classification_benchmark_demo/cpp/grid_mat.hpp | 14 ++++++-------- .../cpp/models/src/detection_model_faceboxes.cpp | 12 ++++-------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/demos/classification_benchmark_demo/cpp/grid_mat.hpp b/demos/classification_benchmark_demo/cpp/grid_mat.hpp index 03d6073591f..1d3619844be 100644 --- a/demos/classification_benchmark_demo/cpp/grid_mat.hpp +++ b/demos/classification_benchmark_demo/cpp/grid_mat.hpp @@ -101,14 +101,14 @@ class GridMat { cv::Scalar textColor; switch (predictionResul) { case PredictionResult::Correct: - textColor = cv::Scalar(75, 255, 75); - break; // green + textColor = cv::Scalar(75, 255, 75); // green + break; case PredictionResult::Incorrect: - textColor = cv::Scalar(50, 50, 255); - break; // red + textColor = cv::Scalar(50, 50, 255); // red + break; case PredictionResult::Unknown: - textColor = cv::Scalar(200, 10, 10); - break; // blue + textColor = cv::Scalar(200, 10, 10); // blue + break; default: throw std::runtime_error("Undefined type of prediction result"); } @@ -150,6 +150,4 @@ class GridMat { cv::Size testMessageSize; }; -// defenition of the static member needed to remove clang build error -// https://stackoverflow.com/questions/8016780/undefined-reference-to-static-constexpr-char constexpr const char GridMat::testMessage[]; diff --git a/demos/common/cpp/models/src/detection_model_faceboxes.cpp b/demos/common/cpp/models/src/detection_model_faceboxes.cpp index 28d81ed26f6..e7671d50573 100644 --- a/demos/common/cpp/models/src/detection_model_faceboxes.cpp +++ b/demos/common/cpp/models/src/detection_model_faceboxes.cpp @@ -225,22 +225,18 @@ std::vector filterBoxes(const ov::Tensor& boxesTensor, } std::unique_ptr ModelFaceBoxes::postprocess(InferenceResult& infResult) { - // --------------------------- Filter scores and get valid indices for bounding - // boxes---------------------------------- + // Filter scores and get valid indices for bounding boxes const auto scoresTensor = infResult.outputsData[outputsNames[1]]; const auto scores = filterScores(scoresTensor, confidenceThreshold); - // --------------------------- Filter bounding boxes on indices - // ------------------------------------------------------- + // Filter bounding boxes on indices auto boxesTensor = infResult.outputsData[outputsNames[0]]; std::vector boxes = filterBoxes(boxesTensor, anchors, scores.first, variance); - // --------------------------- Apply Non-maximum Suppression - // ---------------------------------------------------------- + // Apply Non-maximum Suppression const std::vector keep = nms(boxes, scores.second, boxIOUThreshold); - // --------------------------- Create detection result objects - // -------------------------------------------------------- + // Create detection result objects DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); const auto imgWidth = infResult.internalModelData->asRef().inputImgWidth; const auto imgHeight = infResult.internalModelData->asRef().inputImgHeight; From fce7ed3b4196974a04ba4d6036a4c786aadcb6ed Mon Sep 17 00:00:00 2001 From: ivikhrev Date: Thu, 31 Mar 2022 20:41:17 +0300 Subject: [PATCH 6/6] remove remaining comments --- .../common/cpp/utils/include/utils/config_factory.h | 10 +++++----- demos/common/cpp/utils/src/config_factory.cpp | 12 ++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/demos/common/cpp/utils/include/utils/config_factory.h b/demos/common/cpp/utils/include/utils/config_factory.h index 15fe4f4aa7b..9e9b01e01a7 100644 --- a/demos/common/cpp/utils/include/utils/config_factory.h +++ b/demos/common/cpp/utils/include/utils/config_factory.h @@ -15,13 +15,13 @@ */ #pragma once -#include // for uint32_t +#include -#include // for map -#include // for set -#include // for string +#include +#include +#include -#include // for AnyMap +#include struct ModelConfig { std::string deviceName; diff --git a/demos/common/cpp/utils/src/config_factory.cpp b/demos/common/cpp/utils/src/config_factory.cpp index 7d0d9808e94..a3fd49fd1dd 100644 --- a/demos/common/cpp/utils/src/config_factory.cpp +++ b/demos/common/cpp/utils/src/config_factory.cpp @@ -16,14 +16,14 @@ #include "utils/config_factory.h" -#include // for set, set<>::iterator -#include // for string, operator==, basic_string -#include // for pair -#include // for vector +#include +#include +#include +#include -#include // for intel_gpu properties +#include -#include "utils/args_helper.hpp" // for parseDevices, parseValuePerDevice +#include "utils/args_helper.hpp" std::set ModelConfig::getDevices() { if (devices.empty()) {