From 3980b1e9fa72b178bad3e29bfbb79582efdf28ff Mon Sep 17 00:00:00 2001 From: Fabien SERVANT Date: Thu, 8 Jun 2023 10:30:44 +0200 Subject: [PATCH 1/9] new app for onnx based segmentation --- src/software/pipeline/CMakeLists.txt | 14 +++ .../pipeline/main_imageSegmentation.cpp | 113 ++++++++++++++++++ 2 files changed, 127 insertions(+) create mode 100644 src/software/pipeline/main_imageSegmentation.cpp diff --git a/src/software/pipeline/CMakeLists.txt b/src/software/pipeline/CMakeLists.txt index f788d242a5..cd1dbe9712 100644 --- a/src/software/pipeline/CMakeLists.txt +++ b/src/software/pipeline/CMakeLists.txt @@ -620,3 +620,17 @@ if(ALICEVISION_HAVE_OPENCV AND ALICEVISION_HAVE_ONNX) ${OpenCV_LIBRARIES} ) endif() + +if(ALICEVISION_HAVE_ONNX) + # SphereDetection + alicevision_add_software(aliceVision_imageSegmentation + SOURCE main_imageSegmentation.cpp + FOLDER ${FOLDER_SOFTWARE_PIPELINE} + LINKS aliceVision_image + aliceVision_cmdline + aliceVision_system + aliceVision_sfmData + aliceVision_sfmDataIO + ONNXRuntime::ONNXRuntime + ) +endif() diff --git a/src/software/pipeline/main_imageSegmentation.cpp b/src/software/pipeline/main_imageSegmentation.cpp new file mode 100644 index 0000000000..2ffa591921 --- /dev/null +++ b/src/software/pipeline/main_imageSegmentation.cpp @@ -0,0 +1,113 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2023 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +// Input and geometry +#include +#include + +// Image +#include +#include + +// System +#include +#include + +// Reading command line options +#include +#include +#include + +// IO +#include +#include + +// ONNXRuntime +#include + +// These constants define the current software version. +// They must be updated when the command line is changed. +#define ALICEVISION_SOFTWARE_VERSION_MAJOR 1 +#define ALICEVISION_SOFTWARE_VERSION_MINOR 0 + +using namespace aliceVision; + +namespace po = boost::program_options; + +int aliceVision_main(int argc, char** argv) +{ + std::string sfmDataFilepath; + + // Description of mandatory parameters + po::options_description requiredParams("Required parameters"); + requiredParams.add_options() + ("input,i", po::value(&sfmDataFilepath)->required(), "Input sfmData."); + + CmdLine cmdline( + "AliceVision imageSegmentation"); + cmdline.add(requiredParams); + if (!cmdline.execute(argc, argv)) + { + return EXIT_FAILURE; + } + + // load input scene + sfmData::SfMData sfmData; + if(!sfmDataIO::Load(sfmData, sfmDataFilepath, sfmDataIO::ESfMData(sfmDataIO::VIEWS))) + { + ALICEVISION_LOG_ERROR("The input file '" + sfmDataFilepath + "' cannot be read"); + return EXIT_FAILURE; + } + + + const OrtApi * ortObject = OrtGetApiBase()->GetApi(ORT_API_VERSION); + if (!ortObject) { + ALICEVISION_LOG_ERROR("ONNX runtime failed to initialize"); + return EXIT_FAILURE; + } + + OrtEnv * ortEnvironment; + ortObject->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "imageSegmentation", &ortEnvironment); + if (ortEnvironment == nullptr) + { + ALICEVISION_LOG_ERROR("ONNX runtime failed to create ONNX environment"); + return EXIT_FAILURE; + } + + OrtSessionOptions * ortSessionOptions; + OrtStatus * ortStatus = ortObject->CreateSessionOptions(&ortSessionOptions); + + if (ortStatus != nullptr) + { + ALICEVISION_LOG_ERROR("ONNX runtime failed to create ONNX session options"); + ortObject->ReleaseStatus(ortStatus); + return EXIT_FAILURE; + } + + OrtSession* ortSession; + ortStatus = ortObject->CreateSession(ortEnvironment, "/s/apps/users/servantf/MeshroomResearch/mrrs/segmentation/semantic/fcn_resnet50.onnx", ortSessionOptions, &ortSession); + if (ortStatus != nullptr) + { + ALICEVISION_LOG_ERROR("ONNX runtime failed to create ONNX session"); + ortObject->ReleaseStatus(ortStatus); + return EXIT_FAILURE; + } + + OrtMemoryInfo* ortMemoryInfo; + ortStatus = ortObject->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &ortMemoryInfo); + if (ortStatus != nullptr) + { + ALICEVISION_LOG_ERROR("ONNX runtime failed to create ONNX Memory info"); + ortObject->ReleaseStatus(ortStatus); + return EXIT_FAILURE; + } + + ortObject->ReleaseSessionOptions(ortSessionOptions); + ortObject->ReleaseSession(ortSession); + ortObject->ReleaseEnv(ortEnvironment); + + return EXIT_SUCCESS; +} From 9c8dcbe4d933351c294336c8369b87319295df73 Mon Sep 17 00:00:00 2001 From: Fabien SERVANT Date: Tue, 13 Jun 2023 08:40:20 +0200 Subject: [PATCH 2/9] stash --- src/aliceVision/image/imageAlgo.cpp | 9 + src/aliceVision/image/imageAlgo.hpp | 15 ++ .../pipeline/main_imageSegmentation.cpp | 156 +++++++++++++----- 3 files changed, 141 insertions(+), 39 deletions(-) diff --git a/src/aliceVision/image/imageAlgo.cpp b/src/aliceVision/image/imageAlgo.cpp index 6123961baf..1d910c2bb9 100644 --- a/src/aliceVision/image/imageAlgo.cpp +++ b/src/aliceVision/image/imageAlgo.cpp @@ -344,6 +344,15 @@ void resizeImage(int downscale, const image::Image &inImage, inImage.data(), outImage.data(), filter, filterSize); } +void resizeImage(const int outWidth, const int outHeight, const image::Image &inImage, + image::Image &outImage, const std::string &filter, + float filterSize) +{ + outImage.resize(outWidth, outHeight); + resizeImage(oiio::TypeDesc::FLOAT, inImage.Width(), inImage.Height(), outWidth, outHeight, 3, + inImage.data(), outImage.data(), filter, filterSize); +} + void resizeImage(int downscale, image::Image& inoutImage, const std::string& filter, float filterSize) { diff --git a/src/aliceVision/image/imageAlgo.hpp b/src/aliceVision/image/imageAlgo.hpp index 0a11ae74e9..b5edafcc11 100644 --- a/src/aliceVision/image/imageAlgo.hpp +++ b/src/aliceVision/image/imageAlgo.hpp @@ -75,6 +75,21 @@ void resizeImage(int downscale, const image::Image& inImage, image::Image& outImage, const std::string& filter = "", float filterSize = 0); +/** + * @brief Resize a given image buffer. + * @param[in] newWidth The destination width + * @param[in] newHeight The destination height + * @param[in] inImage The input image buffer + * @param[out] outImage The output image buffer + * @param[in] filter The name of a high-quality filter to use when resampling + * Default is bilinear resampling + * See openImageIO documentation "ImageBufAlgo filtername" + * @param[in] filterSize The resize filter size + */ +void resizeImage(int newWidth, int newHeight, const image::Image& inImage, + image::Image& outImage, + const std::string& filter = "", float filterSize = 0); + /** * @brief Resize a given image buffer in place. * @param[in] downscale The resize downscale diff --git a/src/software/pipeline/main_imageSegmentation.cpp b/src/software/pipeline/main_imageSegmentation.cpp index 2ffa591921..6d3464988d 100644 --- a/src/software/pipeline/main_imageSegmentation.cpp +++ b/src/software/pipeline/main_imageSegmentation.cpp @@ -37,14 +37,42 @@ using namespace aliceVision; namespace po = boost::program_options; +void imageToPlanes(std::vector & output, const image::Image & source) +{ + size_t planeSize = source.Width() * source.Height(); + + output.resize(planeSize * 3); + + float * planeR = output.data(); + float * planeG = planeR + planeSize; + float * planeB = planeG + planeSize; + + size_t pos = 0; + for (int i = 0; i < source.Height(); i++) + { + for (int j = 0; j < source.Width(); j++) + { + const image::RGBfColor & rgb = source(i, j); + planeR[pos] = rgb.r(); + planeG[pos] = rgb.g(); + planeB[pos] = rgb.b(); + + pos++; + } + } +} + + int aliceVision_main(int argc, char** argv) { std::string sfmDataFilepath; + std::string outputPath; // Description of mandatory parameters po::options_description requiredParams("Required parameters"); requiredParams.add_options() - ("input,i", po::value(&sfmDataFilepath)->required(), "Input sfmData."); + ("input,i", po::value(&sfmDataFilepath)->required(), "Input sfmData.") + ("output,o", po::value(&outputPath)->required(), "output folder."); CmdLine cmdline( "AliceVision imageSegmentation"); @@ -63,51 +91,101 @@ int aliceVision_main(int argc, char** argv) } - const OrtApi * ortObject = OrtGetApiBase()->GetApi(ORT_API_VERSION); - if (!ortObject) { - ALICEVISION_LOG_ERROR("ONNX runtime failed to initialize"); - return EXIT_FAILURE; - } + Ort::Env ortEnvironment(ORT_LOGGING_LEVEL_WARNING, "aliceVision-imageSegmentation"); + Ort::SessionOptions ortSessionOptions; + Ort::Session ortSession = Ort::Session(ortEnvironment, "/s/apps/users/servantf/MeshroomResearch/mrrs/segmentation/semantic/fcn_resnet50.onnx", ortSessionOptions); - OrtEnv * ortEnvironment; - ortObject->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "imageSegmentation", &ortEnvironment); - if (ortEnvironment == nullptr) - { - ALICEVISION_LOG_ERROR("ONNX runtime failed to create ONNX environment"); - return EXIT_FAILURE; - } + Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault); - OrtSessionOptions * ortSessionOptions; - OrtStatus * ortStatus = ortObject->CreateSessionOptions(&ortSessionOptions); + std::vector inputNames{"input"}; + std::vector outputNames{"output"}; - if (ortStatus != nullptr) - { - ALICEVISION_LOG_ERROR("ONNX runtime failed to create ONNX session options"); - ortObject->ReleaseStatus(ortStatus); - return EXIT_FAILURE; - } + std::vector inputDimensions = {1, 3, 720, 1280}; + std::vector outputDimensions = {1, 21, 720, 1280}; - OrtSession* ortSession; - ortStatus = ortObject->CreateSession(ortEnvironment, "/s/apps/users/servantf/MeshroomResearch/mrrs/segmentation/semantic/fcn_resnet50.onnx", ortSessionOptions, &ortSession); - if (ortStatus != nullptr) + for (const auto & pv : sfmData.getViews()) { - ALICEVISION_LOG_ERROR("ONNX runtime failed to create ONNX session"); - ortObject->ReleaseStatus(ortStatus); - return EXIT_FAILURE; - } - - OrtMemoryInfo* ortMemoryInfo; - ortStatus = ortObject->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &ortMemoryInfo); - if (ortStatus != nullptr) - { - ALICEVISION_LOG_ERROR("ONNX runtime failed to create ONNX Memory info"); - ortObject->ReleaseStatus(ortStatus); - return EXIT_FAILURE; + std::string path = pv.second->getImagePath(); + + image::Image image; + image::readImage(path, image, image::EImageColorSpace::NO_CONVERSION); + + if (image.Height() > image.Width()) + { + + } + + + /*//Normalize + for (int i = 0; i < 720; i++) + { + for (int j = 0; j < 1280;j++) + { + image::RGBfColor value = image(i, j); + image(i, j).r() = (value.r() - 0.485) / 0.229; + image(i, j).g() = (value.g() - 0.456) / 0.224; + image(i, j).b() = (value.b() - 0.406) / 0.225; + } + } + + std::vector transformedInput; + imageToPlanes(transformedInput, image); + + std::vector output(21 * 720 * 1280); + + Ort::Value inputTensors = Ort::Value::CreateTensor( + mem_info, + transformedInput.data(), transformedInput.size(), + inputDimensions.data(), inputDimensions.size() + ); + + Ort::Value outputTensors = Ort::Value::CreateTensor( + mem_info, + output.data(), output.size(), + outputDimensions.data(), outputDimensions.size() + ); + + try + { + std::cout << "Before Running\n"; + ortSession.Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensors, 1, outputNames.data(), &outputTensors, 1); + std::cout << "Done!" << std::endl; + } + catch (const Ort::Exception& exception) + { + std::cout << "ERROR running model inference: " << exception.what() << std::endl; + exit(-1); + } + + + image::Image dest(1280, 720, true); + for (int i = 0; i < 720; i++) + { + for (int j = 0; j < 1280; j++) + { + int maxClasse = 0; + int maxVal = 0; + + for (int classe = 0; classe < 21; classe++) + { + int classPos = classe * 1280 * 720; + int pos = classPos + i * 1280 + j; + + float val = output[pos]; + if (val > maxVal) + { + maxVal = val; + maxClasse = classe; + } + } + + dest(i, j) = maxClasse / 21.0; + } + } + + image::writeImage("/s/prods/mvg/_source_global/users/servantf/toto.png", dest, image::ImageWriteOptions());*/ } - ortObject->ReleaseSessionOptions(ortSessionOptions); - ortObject->ReleaseSession(ortSession); - ortObject->ReleaseEnv(ortEnvironment); return EXIT_SUCCESS; } From d311a9eac502642018a735961da4e9d1bbe689ac Mon Sep 17 00:00:00 2001 From: Fabien SERVANT Date: Wed, 14 Jun 2023 10:40:17 +0200 Subject: [PATCH 3/9] Working semantic segmentation --- src/aliceVision/CMakeLists.txt | 5 + src/aliceVision/image/imageAlgo.cpp | 53 +++- src/aliceVision/image/imageAlgo.hpp | 15 ++ src/aliceVision/image/pixelTypes.hpp | 12 + src/aliceVision/segmentation/CMakeLists.txt | 24 ++ src/aliceVision/segmentation/segmentation.cpp | 235 ++++++++++++++++++ src/aliceVision/segmentation/segmentation.hpp | 86 +++++++ src/software/pipeline/CMakeLists.txt | 2 +- .../pipeline/main_imageSegmentation.cpp | 131 ++++------ 9 files changed, 464 insertions(+), 99 deletions(-) create mode 100644 src/aliceVision/segmentation/CMakeLists.txt create mode 100644 src/aliceVision/segmentation/segmentation.cpp create mode 100644 src/aliceVision/segmentation/segmentation.hpp diff --git a/src/aliceVision/CMakeLists.txt b/src/aliceVision/CMakeLists.txt index bc1e446315..ebb97aeebc 100644 --- a/src/aliceVision/CMakeLists.txt +++ b/src/aliceVision/CMakeLists.txt @@ -56,9 +56,14 @@ if(ALICEVISION_BUILD_MVS) if(ALICEVISION_HAVE_CUDA) add_subdirectory(depthMap) endif() + + if(ALICEVISION_HAVE_ONNX) + add_subdirectory(segmentation) + endif() endif() + if(ALICEVISION_BUILD_SFM AND ALICEVISION_BUILD_MVS) add_subdirectory(sfmMvsUtils) endif() diff --git a/src/aliceVision/image/imageAlgo.cpp b/src/aliceVision/image/imageAlgo.cpp index 1d910c2bb9..f6ca2eb929 100644 --- a/src/aliceVision/image/imageAlgo.cpp +++ b/src/aliceVision/image/imageAlgo.cpp @@ -344,15 +344,6 @@ void resizeImage(int downscale, const image::Image &inImage, inImage.data(), outImage.data(), filter, filterSize); } -void resizeImage(const int outWidth, const int outHeight, const image::Image &inImage, - image::Image &outImage, const std::string &filter, - float filterSize) -{ - outImage.resize(outWidth, outHeight); - resizeImage(oiio::TypeDesc::FLOAT, inImage.Width(), inImage.Height(), outWidth, outHeight, 3, - inImage.data(), outImage.data(), filter, filterSize); -} - void resizeImage(int downscale, image::Image& inoutImage, const std::string& filter, float filterSize) { @@ -432,6 +423,50 @@ void resizeImage(int downscale, image::Image& inoutImage, inoutImage.swap(rescaled); } +void resizeImage(const int outWidth, const int outHeight, const image::Image &inImage, + image::Image &outImage, const std::string &filter, + float filterSize) +{ + outImage.resize(outWidth, outHeight); + resizeImage(oiio::TypeDesc::FLOAT, inImage.Width(), inImage.Height(), outWidth, outHeight, 3, + inImage.data(), outImage.data(), filter, filterSize); +} + +void resizeImage(const int outWidth, const int outHeight, const image::Image &inImage, + image::Image &outImage, const std::string &filter, + float filterSize) +{ + outImage.resize(outWidth, outHeight); + resizeImage(oiio::TypeDesc::UINT32, inImage.Width(), inImage.Height(), outWidth, outHeight, 1, + inImage.data(), outImage.data(), filter, filterSize); +} + +template +void resampleImage(oiio::TypeDesc typeDesc, + int inWidth, + int inHeight, + int outWidth, + int outHeight, + int nchannels, + const T* inBuffer, + T* outBuffer, + bool interpolate) +{ + const oiio::ImageBuf inBuf(oiio::ImageSpec(inWidth, inHeight, nchannels, typeDesc), + const_cast(inBuffer)); + oiio::ImageBuf outBuf(oiio::ImageSpec(outWidth, outHeight, nchannels, typeDesc), outBuffer); + + oiio::ImageBufAlgo::resample(outBuf, inBuf, interpolate); +} + +void resampleImage(int outWidth, int outHeight, const image::Image& inImage, + image::Image& outImage, bool interpolate) +{ + outImage.resize(outWidth, outHeight); + resampleImage(oiio::TypeDesc::UINT32, inImage.Width(), inImage.Height(), outWidth, outHeight, 1, + inImage.data(), outImage.data(), interpolate); +} + template void convolveImage(oiio::TypeDesc typeDesc, int inWidth, diff --git a/src/aliceVision/image/imageAlgo.hpp b/src/aliceVision/image/imageAlgo.hpp index b5edafcc11..27783eea90 100644 --- a/src/aliceVision/image/imageAlgo.hpp +++ b/src/aliceVision/image/imageAlgo.hpp @@ -86,6 +86,9 @@ void resizeImage(int downscale, const image::Image& inImage, * See openImageIO documentation "ImageBufAlgo filtername" * @param[in] filterSize The resize filter size */ +void resizeImage(int newWidth, int newHeight, const image::Image& inImage, + image::Image& outImage, + const std::string& filter = "", float filterSize = 0); void resizeImage(int newWidth, int newHeight, const image::Image& inImage, image::Image& outImage, const std::string& filter = "", float filterSize = 0); @@ -112,6 +115,18 @@ void resizeImage(int downscale, image::Image& inoutImage, void resizeImage(int downscale, image::Image& inoutImage, const std::string& filter = "", float filterSize = 0); + +/** + * @brief resample a given image buffer. + * @param[in] newWidth The destination width + * @param[in] newHeight The destination height + * @param[in] inImage The input image buffer + * @param[out] outImage The output image buffer + * @param[in] interpolate use interpolation (bilinear) ? + */ +void resampleImage(int newWidth, int newHeight, const image::Image& inImage, + image::Image& outImage, bool interpolate); + /** * @brief convolve a given image buffer * @param[in] inBuffer The input image buffer diff --git a/src/aliceVision/image/pixelTypes.hpp b/src/aliceVision/image/pixelTypes.hpp index fcb5170896..aeacf82ce1 100644 --- a/src/aliceVision/image/pixelTypes.hpp +++ b/src/aliceVision/image/pixelTypes.hpp @@ -169,6 +169,17 @@ namespace aliceVision return Rgb( ((*this)(0) + other(0)), ((*this)(1) + other(1)), ((*this)(2) + other(2))); } + /** + * @brief Elementwise multiplication + * @param other the other element to multiply + * @return Rgb color after multiply + * @note This does not modify the Rgb value (ie: only return a modified copy) + */ + inline Rgb operator *( const Rgb& other ) const + { + return Rgb( ((*this)(0) * other(0)), ((*this)(1) * other(1)), ((*this)(2) * other(2))); + } + /** * @brief scalar division @@ -183,6 +194,7 @@ namespace aliceVision T( ( Z )( ( *this )( 1 ) ) / val ), T( ( Z )( ( *this )( 2 ) ) / val ) ); } + /** * @brief scalar multiplication diff --git a/src/aliceVision/segmentation/CMakeLists.txt b/src/aliceVision/segmentation/CMakeLists.txt new file mode 100644 index 0000000000..10c39524e6 --- /dev/null +++ b/src/aliceVision/segmentation/CMakeLists.txt @@ -0,0 +1,24 @@ +# Headers +set(segmentation_files_headers + segmentation.hpp +) + +# Sources +set(segmentation_files_sources + segmentation.cpp +) + +alicevision_add_library(aliceVision_segmentation + SOURCES ${segmentation_files_headers} ${segmentation_files_sources} + PUBLIC_LINKS + aliceVision_system + aliceVision_numeric + aliceVision_image + ONNXRuntime::ONNXRuntime + #penImageIO::OpenImageIO +) + + + + + diff --git a/src/aliceVision/segmentation/segmentation.cpp b/src/aliceVision/segmentation/segmentation.cpp new file mode 100644 index 0000000000..d0d87fca02 --- /dev/null +++ b/src/aliceVision/segmentation/segmentation.cpp @@ -0,0 +1,235 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2023 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#include "segmentation.hpp" + +#include +#include +#include + +namespace aliceVision { +namespace segmentation { + +void imageToPlanes(std::vector & output, const image::Image::Base & source) +{ + size_t planeSize = source.rows() * source.cols(); + + output.resize(planeSize * 3); + + float * planeR = output.data(); + float * planeG = planeR + planeSize; + float * planeB = planeG + planeSize; + + size_t pos = 0; + for (int i = 0; i < source.rows(); i++) + { + for (int j = 0; j < source.cols(); j++) + { + const image::RGBfColor & rgb = source(i, j); + planeR[pos] = rgb.r(); + planeG[pos] = rgb.g(); + planeB[pos] = rgb.b(); + + pos++; + } + } +} + +bool Segmentation::processImage(image::Image &labels, const image::Image & source) +{ + //Todo : handle orientation and small images smaller than model input + + // Compute the optimal resized size such that at last one dimension fit the model + int resizedHeight = 0; + int resizedWidth = 0; + if (source.Height() < source.Width()) + { + resizedHeight = _modelHeight; + resizedWidth = double(source.Width()) * double(_modelHeight) / double(source.Height()); + } + else + { + resizedWidth = _modelWidth; + resizedHeight = double(source.Height()) * double(_modelWidth) / double(source.Width()); + } + + //Resize image + image::Image resized; + imageAlgo::resizeImage(resizedWidth, resizedHeight, source, resized); + + //Normalize image to fit model statistics + for (int i = 0; i < resizedHeight; i++) + { + for (int j = 0; j < resizedWidth;j++) + { + image::RGBfColor value = resized(i, j); + resized(i, j) = (value - _center) * _scale; + } + } + + image::Image resizedLabels; + if (!tiledProcess(resizedLabels, resized)) + { + return false; + } + + imageAlgo::resampleImage(source.Width(), source.Height(), resizedLabels, labels, false); + + return true; +} + +bool Segmentation::tiledProcess(image::Image & labels, const image::Image & source) +{ + //Compute the theorical tiles count + int cwidth = divideRoundUp(source.Width(), _modelWidth); + int cheight = divideRoundUp(source.Height(), _modelHeight); + + image::Image scoredLabels(source.Width(), source.Height(), true, {0, 0.0f}); + + //Loop over tiles + for (int i = 0; i < cheight; i++) + { + //Compute starting point with overlap on previous + int y = std::max(0, int(i * _modelHeight - _overlapRatio * _modelHeight)); + int ly = y + _modelHeight; + + //If we are on the end border, shift on the other side + int shifty = source.Height() - ly; + if (shifty < 0) + { + y = std::max(0, y + shifty); + } + + for (int j = 0; j < cwidth; j++) + { + //Compute starting point with overlap on previous + int x = std::max(0, int(j * _modelWidth - _overlapRatio * _modelWidth)); + int lx = x + _modelWidth; + + //If we are on the end border, shift on the other side + int shiftx = source.Width() - lx; + if (shiftx < 0) + { + x = std::max(0, x + shiftx); + } + + //x and y contains the position of the tile in the input image + auto & block = source.block(y, x, _modelHeight, _modelWidth); + + //Compute tile + image::Image tileLabels(_modelWidth, _modelHeight, true, {0, 0.0f}); + processTile(tileLabels, block); + + + //Update the global labeling + mergeLabels(scoredLabels, tileLabels, x, y); + } + } + + labels = scoredLabels.cast(); + + return true; +} + +bool Segmentation::mergeLabels(image::Image & labels, image::Image & tileLabels, int tileX, int tileY) +{ + for (int i = 0; i < tileLabels.Height(); i++) + { + int y = i + tileY; + for (int j = 0; j < tileLabels.Width(); j++) + { + int x = j + tileX; + + if (tileLabels(i, j).score > labels(y, x).score) + { + labels(y, x) = tileLabels(i, j); + } + } + } + + return true; +} + +bool Segmentation::labelsFromModelOutput(image::Image & labels, const std::vector & modelOutput) +{ + for (int outputY = 0; outputY < _modelHeight; outputY++) + { + for (int outputX = 0; outputX < _modelWidth; outputX++) + { + int maxClasse = 0; + int maxVal = 0; + + for (int classe = 0; classe < _classes.size(); classe++) + { + int classPos = classe * _modelWidth * _modelHeight; + int pos = classPos + outputY * _modelWidth + outputX; + + float val = modelOutput[pos]; + if (val > maxVal) + { + maxVal = val; + maxClasse = classe; + } + } + + labels(outputY, outputX) = {maxClasse, maxVal}; + } + } + + return true; +} + +bool Segmentation::processTile(image::Image & labels, const image::Image::Base & source) +{ + Ort::Env ortEnvironment(ORT_LOGGING_LEVEL_WARNING, "aliceVision-imageSegmentation"); + Ort::SessionOptions ortSessionOptions; + Ort::Session ortSession = Ort::Session(ortEnvironment, "/s/apps/users/servantf/MeshroomResearch/mrrs/segmentation/semantic/fcn_resnet50.onnx", ortSessionOptions); + + Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault); + + std::vector inputNames{"input"}; + std::vector outputNames{"output"}; + std::vector inputDimensions = {1, 3, _modelHeight, _modelWidth}; + std::vector outputDimensions = {1, _classes.size(), _modelHeight, _modelWidth}; + + std::vector output(_classes.size() * _modelHeight * _modelWidth); + Ort::Value outputTensors = Ort::Value::CreateTensor( + mem_info, + output.data(), output.size(), + outputDimensions.data(), outputDimensions.size() + ); + + std::vector transformedInput; + imageToPlanes(transformedInput, source); + + Ort::Value inputTensors = Ort::Value::CreateTensor( + mem_info, + transformedInput.data(), transformedInput.size(), + inputDimensions.data(), inputDimensions.size() + ); + + try + { + ALICEVISION_LOG_INFO("test"); + ortSession.Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensors, 1, outputNames.data(), &outputTensors, 1); + ALICEVISION_LOG_INFO("test2"); + } + catch (const Ort::Exception& exception) + { + std::cout << "ERROR running model inference: " << exception.what() << std::endl; + return false; + } + + if (!labelsFromModelOutput(labels, output)) + { + return false; + } + + return true; +} + +} //aliceVision +} //segmentation \ No newline at end of file diff --git a/src/aliceVision/segmentation/segmentation.hpp b/src/aliceVision/segmentation/segmentation.hpp new file mode 100644 index 0000000000..553894dbe4 --- /dev/null +++ b/src/aliceVision/segmentation/segmentation.hpp @@ -0,0 +1,86 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2023 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#include +#include + +#include +#include + +// ONNXRuntime +#include + +namespace aliceVision { +namespace segmentation { + +struct ScoredLabel +{ + IndexT label; + float score; + + operator IndexT() const { return label; } +}; + +class Segmentation +{ +public: + const std::vector & getClasses() + { + return _classes; + } + +public: + /** + * Process an input image to estimate segmentation + * @param labels the labels image resulting from the process + * @param source is the input image to process + */ + bool processImage(image::Image &labels, const image::Image & source); + +private: + /** + * Assume the source image is the correct size + * @param labels the output label image + * @param source the input image to process + */ + bool tiledProcess(image::Image &labels, const image::Image & source); + + /** + * Transform model output to a label image + * @param labels the output labels imaage + * @param modeloutput the model output vector + */ + bool labelsFromModelOutput(image::Image & labels, const std::vector & modelOutput); + + /** + * Process effectively a buffer of the model input size + * param labels the output labels + * @param source the source tile + */ + bool processTile(image::Image & labels, const image::Image::Base & source); + + /** + * Merge tile labels with global labels image + * @param labels the global labels image + * @param tileLabels the local tile labels image + * @param tileX the position of the tile in the global image + * @param tileY the position of the tile in the global image + */ + bool mergeLabels(image::Image & labels, image::Image & tileLabels, int tileX, int tileY); + +protected: + std::vector _classes = {"__background__", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", + "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", + "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; + image::RGBfColor _center= {0.485, 0.456, 0.406}; + image::RGBfColor _scale= {1.0 / 0.229, 1.0 / 0.224, 1.0 / 0.225}; + int _modelWidth = 1280; + int _modelHeight = 720; + double _overlapRatio = 0.3; +}; + +} //aliceVision +} //segmentation \ No newline at end of file diff --git a/src/software/pipeline/CMakeLists.txt b/src/software/pipeline/CMakeLists.txt index cd1dbe9712..e43a4f5073 100644 --- a/src/software/pipeline/CMakeLists.txt +++ b/src/software/pipeline/CMakeLists.txt @@ -631,6 +631,6 @@ if(ALICEVISION_HAVE_ONNX) aliceVision_system aliceVision_sfmData aliceVision_sfmDataIO - ONNXRuntime::ONNXRuntime + aliceVision_segmentation ) endif() diff --git a/src/software/pipeline/main_imageSegmentation.cpp b/src/software/pipeline/main_imageSegmentation.cpp index 6d3464988d..9ff85630ac 100644 --- a/src/software/pipeline/main_imageSegmentation.cpp +++ b/src/software/pipeline/main_imageSegmentation.cpp @@ -24,9 +24,10 @@ // IO #include #include +#include -// ONNXRuntime -#include + +#include // These constants define the current software version. // They must be updated when the command line is changed. @@ -37,9 +38,9 @@ using namespace aliceVision; namespace po = boost::program_options; -void imageToPlanes(std::vector & output, const image::Image & source) +void imageToPlanes(std::vector & output, const image::Image::Base & source) { - size_t planeSize = source.Width() * source.Height(); + size_t planeSize = source.rows() * source.cols(); output.resize(planeSize * 3); @@ -48,9 +49,9 @@ void imageToPlanes(std::vector & output, const image::Image & output, const image::Image validClasses = {"person"}; // Description of mandatory parameters po::options_description requiredParams("Required parameters"); @@ -74,9 +77,14 @@ int aliceVision_main(int argc, char** argv) ("input,i", po::value(&sfmDataFilepath)->required(), "Input sfmData.") ("output,o", po::value(&outputPath)->required(), "output folder."); - CmdLine cmdline( - "AliceVision imageSegmentation"); + po::options_description optionalParams("Optional parameters"); + optionalParams.add_options() + ("validClasses,c", po::value>(&validClasses)->multitoken(), + "Names of classes which are to be considered"); + + CmdLine cmdline("AliceVision imageSegmentation"); cmdline.add(requiredParams); + cmdline.add(optionalParams); if (!cmdline.execute(argc, argv)) { return EXIT_FAILURE; @@ -90,102 +98,47 @@ int aliceVision_main(int argc, char** argv) return EXIT_FAILURE; } - - Ort::Env ortEnvironment(ORT_LOGGING_LEVEL_WARNING, "aliceVision-imageSegmentation"); - Ort::SessionOptions ortSessionOptions; - Ort::Session ortSession = Ort::Session(ortEnvironment, "/s/apps/users/servantf/MeshroomResearch/mrrs/segmentation/semantic/fcn_resnet50.onnx", ortSessionOptions); - - Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault); - - std::vector inputNames{"input"}; - std::vector outputNames{"output"}; + aliceVision::segmentation::Segmentation seg; + const auto & classes = seg.getClasses(); - std::vector inputDimensions = {1, 3, 720, 1280}; - std::vector outputDimensions = {1, 21, 720, 1280}; - - for (const auto & pv : sfmData.getViews()) + std::set validClassesIndices; + for (const auto & s : validClasses) { - std::string path = pv.second->getImagePath(); - - image::Image image; - image::readImage(path, image, image::EImageColorSpace::NO_CONVERSION); + std::string classInput = boost::to_lower_copy(s); + boost::trim(classInput); - if (image.Height() > image.Width()) + for (int idc = 0; idc < classes.size(); idc++) { + std::string classCompare = boost::to_lower_copy(classes[idc]); + boost::trim(classCompare); - } - - - /*//Normalize - for (int i = 0; i < 720; i++) - { - for (int j = 0; j < 1280;j++) + if (classCompare.compare(classInput) == 0) { - image::RGBfColor value = image(i, j); - image(i, j).r() = (value.r() - 0.485) / 0.229; - image(i, j).g() = (value.g() - 0.456) / 0.224; - image(i, j).b() = (value.b() - 0.406) / 0.225; + validClassesIndices.insert(idc); + break; } } - - std::vector transformedInput; - imageToPlanes(transformedInput, image); - - std::vector output(21 * 720 * 1280); - - Ort::Value inputTensors = Ort::Value::CreateTensor( - mem_info, - transformedInput.data(), transformedInput.size(), - inputDimensions.data(), inputDimensions.size() - ); - - Ort::Value outputTensors = Ort::Value::CreateTensor( - mem_info, - output.data(), output.size(), - outputDimensions.data(), outputDimensions.size() - ); + } - try - { - std::cout << "Before Running\n"; - ortSession.Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensors, 1, outputNames.data(), &outputTensors, 1); - std::cout << "Done!" << std::endl; - } - catch (const Ort::Exception& exception) - { - std::cout << "ERROR running model inference: " << exception.what() << std::endl; - exit(-1); - } + for (const auto & pv : sfmData.getViews()) + { + std::string path = pv.second->getImagePath(); + image::Image image; + image::readImage(path, image, image::EImageColorSpace::NO_CONVERSION); - image::Image dest(1280, 720, true); - for (int i = 0; i < 720; i++) + image::Image labels; + if (!seg.processImage(labels, image)) { - for (int j = 0; j < 1280; j++) - { - int maxClasse = 0; - int maxVal = 0; - - for (int classe = 0; classe < 21; classe++) - { - int classPos = classe * 1280 * 720; - int pos = classPos + i * 1280 + j; - - float val = output[pos]; - if (val > maxVal) - { - maxVal = val; - maxClasse = classe; - } - } - - dest(i, j) = maxClasse / 21.0; - } + ALICEVISION_LOG_INFO("Failed to segment image " << path); } - image::writeImage("/s/prods/mvg/_source_global/users/servantf/toto.png", dest, image::ImageWriteOptions());*/ + //Store image + std::stringstream ss; + ss << outputPath << "/" << pv.first << ".exr"; + image::writeImage(ss.str(), labels, image::ImageWriteOptions()); } - + return EXIT_SUCCESS; } From 4753e3668a9d905ee81e08fb25fede5e760e3914 Mon Sep 17 00:00:00 2001 From: Fabien SERVANT Date: Tue, 20 Jun 2023 14:48:04 +0200 Subject: [PATCH 4/9] add model path to parameters --- src/aliceVision/segmentation/segmentation.cpp | 46 +++++++++---------- src/aliceVision/segmentation/segmentation.hpp | 27 +++++++---- .../pipeline/main_imageSegmentation.cpp | 38 +++++++++++++-- 3 files changed, 74 insertions(+), 37 deletions(-) diff --git a/src/aliceVision/segmentation/segmentation.cpp b/src/aliceVision/segmentation/segmentation.cpp index d0d87fca02..c43b0e40e0 100644 --- a/src/aliceVision/segmentation/segmentation.cpp +++ b/src/aliceVision/segmentation/segmentation.cpp @@ -47,13 +47,13 @@ bool Segmentation::processImage(image::Image &labels, const image::Image int resizedWidth = 0; if (source.Height() < source.Width()) { - resizedHeight = _modelHeight; - resizedWidth = double(source.Width()) * double(_modelHeight) / double(source.Height()); + resizedHeight = _parameters.modelHeight; + resizedWidth = double(source.Width()) * double(_parameters.modelHeight) / double(source.Height()); } else { - resizedWidth = _modelWidth; - resizedHeight = double(source.Height()) * double(_modelWidth) / double(source.Width()); + resizedWidth = _parameters.modelWidth; + resizedHeight = double(source.Height()) * double(_parameters.modelWidth) / double(source.Width()); } //Resize image @@ -66,7 +66,7 @@ bool Segmentation::processImage(image::Image &labels, const image::Image for (int j = 0; j < resizedWidth;j++) { image::RGBfColor value = resized(i, j); - resized(i, j) = (value - _center) * _scale; + resized(i, j) = (value - _parameters.center) * _parameters.scale; } } @@ -84,8 +84,8 @@ bool Segmentation::processImage(image::Image &labels, const image::Image bool Segmentation::tiledProcess(image::Image & labels, const image::Image & source) { //Compute the theorical tiles count - int cwidth = divideRoundUp(source.Width(), _modelWidth); - int cheight = divideRoundUp(source.Height(), _modelHeight); + int cwidth = divideRoundUp(source.Width(), _parameters.modelWidth); + int cheight = divideRoundUp(source.Height(), _parameters.modelHeight); image::Image scoredLabels(source.Width(), source.Height(), true, {0, 0.0f}); @@ -93,8 +93,8 @@ bool Segmentation::tiledProcess(image::Image & labels, const image::Imag for (int i = 0; i < cheight; i++) { //Compute starting point with overlap on previous - int y = std::max(0, int(i * _modelHeight - _overlapRatio * _modelHeight)); - int ly = y + _modelHeight; + int y = std::max(0, int(i * _parameters.modelHeight - _parameters.overlapRatio * _parameters.modelHeight)); + int ly = y + _parameters.modelHeight; //If we are on the end border, shift on the other side int shifty = source.Height() - ly; @@ -106,8 +106,8 @@ bool Segmentation::tiledProcess(image::Image & labels, const image::Imag for (int j = 0; j < cwidth; j++) { //Compute starting point with overlap on previous - int x = std::max(0, int(j * _modelWidth - _overlapRatio * _modelWidth)); - int lx = x + _modelWidth; + int x = std::max(0, int(j * _parameters.modelWidth - _parameters.overlapRatio * _parameters.modelWidth)); + int lx = x + _parameters.modelWidth; //If we are on the end border, shift on the other side int shiftx = source.Width() - lx; @@ -117,10 +117,10 @@ bool Segmentation::tiledProcess(image::Image & labels, const image::Imag } //x and y contains the position of the tile in the input image - auto & block = source.block(y, x, _modelHeight, _modelWidth); + auto & block = source.block(y, x, _parameters.modelHeight, _parameters.modelWidth); //Compute tile - image::Image tileLabels(_modelWidth, _modelHeight, true, {0, 0.0f}); + image::Image tileLabels(_parameters.modelWidth, _parameters.modelHeight, true, {0, 0.0f}); processTile(tileLabels, block); @@ -155,17 +155,17 @@ bool Segmentation::mergeLabels(image::Image & labels, image::Image< bool Segmentation::labelsFromModelOutput(image::Image & labels, const std::vector & modelOutput) { - for (int outputY = 0; outputY < _modelHeight; outputY++) + for (int outputY = 0; outputY < _parameters.modelHeight; outputY++) { - for (int outputX = 0; outputX < _modelWidth; outputX++) + for (int outputX = 0; outputX < _parameters.modelWidth; outputX++) { int maxClasse = 0; int maxVal = 0; - for (int classe = 0; classe < _classes.size(); classe++) + for (int classe = 0; classe < _parameters.classes.size(); classe++) { - int classPos = classe * _modelWidth * _modelHeight; - int pos = classPos + outputY * _modelWidth + outputX; + int classPos = classe * _parameters.modelWidth * _parameters.modelHeight; + int pos = classPos + outputY * _parameters.modelWidth + outputX; float val = modelOutput[pos]; if (val > maxVal) @@ -186,16 +186,16 @@ bool Segmentation::processTile(image::Image & labels, const image:: { Ort::Env ortEnvironment(ORT_LOGGING_LEVEL_WARNING, "aliceVision-imageSegmentation"); Ort::SessionOptions ortSessionOptions; - Ort::Session ortSession = Ort::Session(ortEnvironment, "/s/apps/users/servantf/MeshroomResearch/mrrs/segmentation/semantic/fcn_resnet50.onnx", ortSessionOptions); + Ort::Session ortSession = Ort::Session(ortEnvironment, _parameters.modelWeights.c_str(), ortSessionOptions); Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault); std::vector inputNames{"input"}; std::vector outputNames{"output"}; - std::vector inputDimensions = {1, 3, _modelHeight, _modelWidth}; - std::vector outputDimensions = {1, _classes.size(), _modelHeight, _modelWidth}; + std::vector inputDimensions = {1, 3, _parameters.modelHeight, _parameters.modelWidth}; + std::vector outputDimensions = {1, _parameters.classes.size(), _parameters.modelHeight, _parameters.modelWidth}; - std::vector output(_classes.size() * _modelHeight * _modelWidth); + std::vector output(_parameters.classes.size() * _parameters.modelHeight * _parameters.modelWidth); Ort::Value outputTensors = Ort::Value::CreateTensor( mem_info, output.data(), output.size(), @@ -213,9 +213,7 @@ bool Segmentation::processTile(image::Image & labels, const image:: try { - ALICEVISION_LOG_INFO("test"); ortSession.Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensors, 1, outputNames.data(), &outputTensors, 1); - ALICEVISION_LOG_INFO("test2"); } catch (const Ort::Exception& exception) { diff --git a/src/aliceVision/segmentation/segmentation.hpp b/src/aliceVision/segmentation/segmentation.hpp index 553894dbe4..15ee50246f 100644 --- a/src/aliceVision/segmentation/segmentation.hpp +++ b/src/aliceVision/segmentation/segmentation.hpp @@ -29,10 +29,26 @@ class Segmentation public: const std::vector & getClasses() { - return _classes; + return _parameters.classes; } + struct Parameters + { + std::string modelWeights; + std::vector classes; + image::RGBfColor center; + image::RGBfColor scale; + int modelWidth; + int modelHeight; + double overlapRatio; + }; + public: + Segmentation(const Parameters & parameters) : _parameters(parameters) + { + + } + /** * Process an input image to estimate segmentation * @param labels the labels image resulting from the process @@ -72,14 +88,7 @@ class Segmentation bool mergeLabels(image::Image & labels, image::Image & tileLabels, int tileX, int tileY); protected: - std::vector _classes = {"__background__", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", - "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", - "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; - image::RGBfColor _center= {0.485, 0.456, 0.406}; - image::RGBfColor _scale= {1.0 / 0.229, 1.0 / 0.224, 1.0 / 0.225}; - int _modelWidth = 1280; - int _modelHeight = 720; - double _overlapRatio = 0.3; + Parameters _parameters; }; } //aliceVision diff --git a/src/software/pipeline/main_imageSegmentation.cpp b/src/software/pipeline/main_imageSegmentation.cpp index 9ff85630ac..0fdec2bdd9 100644 --- a/src/software/pipeline/main_imageSegmentation.cpp +++ b/src/software/pipeline/main_imageSegmentation.cpp @@ -63,18 +63,30 @@ void imageToPlanes(std::vector & output, const image::Image & mask, const image::Image & labels, const std::set & validClasses) +{ + for (int i = 0; i < mask.Height(); i++) + { + for (int j = 0; j < mask.Width(); j++) + { + IndexT label = labels(i, j); + mask(i, j) = (validClasses.find(label) != validClasses.end())?255:0; + } + } +} int aliceVision_main(int argc, char** argv) { std::string sfmDataFilepath; std::string outputPath; - std::vector validClasses = {"person"}; + std::string modelWeightsPath; + std::vector validClasses; // Description of mandatory parameters po::options_description requiredParams("Required parameters"); requiredParams.add_options() ("input,i", po::value(&sfmDataFilepath)->required(), "Input sfmData.") + ("modelPath,m", po::value(&modelWeightsPath)->required(), "Input Model weights file.") ("output,o", po::value(&outputPath)->required(), "output folder."); po::options_description optionalParams("Optional parameters"); @@ -98,9 +110,23 @@ int aliceVision_main(int argc, char** argv) return EXIT_FAILURE; } - aliceVision::segmentation::Segmentation seg; + aliceVision::segmentation::Segmentation::Parameters parameters; + + parameters.modelWeights = modelWeightsPath; + parameters.classes = {"__background__", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", + "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", + "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; + parameters.center= {0.485, 0.456, 0.406}; + parameters.scale= {1.0 / 0.229, 1.0 / 0.224, 1.0 / 0.225}; + parameters.modelWidth = 1280; + parameters.modelHeight = 720; + parameters.overlapRatio = 0.3; + + aliceVision::segmentation::Segmentation seg(parameters); const auto & classes = seg.getClasses(); + + //Compute the set of valid classes given parameters std::set validClassesIndices; for (const auto & s : validClasses) { @@ -133,10 +159,14 @@ int aliceVision_main(int argc, char** argv) ALICEVISION_LOG_INFO("Failed to segment image " << path); } + + image::Image mask(labels.Width(), labels.Height()); + labelsToMask(mask, labels, validClassesIndices); + //Store image std::stringstream ss; ss << outputPath << "/" << pv.first << ".exr"; - image::writeImage(ss.str(), labels, image::ImageWriteOptions()); + image::writeImage(ss.str(), mask, image::ImageWriteOptions()); } From 6da307614d2a1dfe62f34df77068d97e549d712f Mon Sep 17 00:00:00 2001 From: Fabien SERVANT Date: Wed, 21 Jun 2023 12:05:23 +0200 Subject: [PATCH 5/9] Adding cuda support to segmentation --- src/aliceVision/segmentation/CMakeLists.txt | 12 +- src/aliceVision/segmentation/segmentation.cpp | 115 ++++++++++++++++-- src/aliceVision/segmentation/segmentation.hpp | 38 ++++++ .../pipeline/main_imageSegmentation.cpp | 4 +- 4 files changed, 158 insertions(+), 11 deletions(-) diff --git a/src/aliceVision/segmentation/CMakeLists.txt b/src/aliceVision/segmentation/CMakeLists.txt index 10c39524e6..3c37b41926 100644 --- a/src/aliceVision/segmentation/CMakeLists.txt +++ b/src/aliceVision/segmentation/CMakeLists.txt @@ -8,6 +8,13 @@ set(segmentation_files_sources segmentation.cpp ) +set(SEGMENTATION_PRIVATE_LINKS "") +set(SEGMENTATION_PRIVATE_INCLUDE_DIRS "") +if(ALICEVISION_HAVE_CUDA) + set(SEGMENTATION_PRIVATE_LINKS ${CUDA_LIBRARIES}) + set(SEGMENTATION_PRIVATE_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS}) +endif() + alicevision_add_library(aliceVision_segmentation SOURCES ${segmentation_files_headers} ${segmentation_files_sources} PUBLIC_LINKS @@ -15,7 +22,10 @@ alicevision_add_library(aliceVision_segmentation aliceVision_numeric aliceVision_image ONNXRuntime::ONNXRuntime - #penImageIO::OpenImageIO + PRIVATE_LINKS + ${SEGMENTATION_PRIVATE_LINKS} + PRIVATE_INCLUDE_DIRS + ${SEGMENTATION_PRIVATE_INCLUDE_DIRS} ) diff --git a/src/aliceVision/segmentation/segmentation.cpp b/src/aliceVision/segmentation/segmentation.cpp index c43b0e40e0..3862cd70ac 100644 --- a/src/aliceVision/segmentation/segmentation.cpp +++ b/src/aliceVision/segmentation/segmentation.cpp @@ -6,6 +6,10 @@ #include "segmentation.hpp" +#if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_CUDA) +#include +#endif + #include #include #include @@ -13,6 +17,7 @@ namespace aliceVision { namespace segmentation { + void imageToPlanes(std::vector & output, const image::Image::Base & source) { size_t planeSize = source.rows() * source.cols(); @@ -38,6 +43,47 @@ void imageToPlanes(std::vector & output, const image::Image(ORT_LOGGING_LEVEL_WARNING, "aliceVision-imageSegmentation"); + + Ort::SessionOptions ortSessionOptions; + + #if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_CUDA) + OrtCUDAProviderOptionsV2* cuda_options = nullptr; + api.CreateCUDAProviderOptions(&cuda_options); + api.SessionOptionsAppendExecutionProvider_CUDA_V2(static_cast(ortSessionOptions), cuda_options); + api.ReleaseCUDAProviderOptions(cuda_options); + + _ortSession = std::make_unique(*_ortEnvironment, _parameters.modelWeights.c_str(), ortSessionOptions); + + Ort::MemoryInfo memInfoCuda("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemType::OrtMemTypeDefault); + Ort::Allocator cudaAllocator(*_ortSession, memInfoCuda); + + _output.resize(_parameters.classes.size() * _parameters.modelHeight * _parameters.modelWidth); + _cudaInput = cudaAllocator.Alloc(_output.size() * sizeof(float)); + _cudaOutput = cudaAllocator.Alloc(_output.size() * sizeof(float)); + #else + _ortSession = std::make_unique(ortEnvironment, _parameters.modelWeights.c_str(), ortSessionOptions); + #endif + + return true; +} + +bool Segmentation::terminate() +{ + #if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_CUDA) + Ort::MemoryInfo mem_info_cuda("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemType::OrtMemTypeDefault); + Ort::Allocator cudaAllocator(*_ortSession, mem_info_cuda); + cudaAllocator.Free(_cudaInput); + cudaAllocator.Free(_cudaOutput); + #endif + + return true; +} + bool Segmentation::processImage(image::Image &labels, const image::Image & source) { //Todo : handle orientation and small images smaller than model input @@ -121,7 +167,12 @@ bool Segmentation::tiledProcess(image::Image & labels, const image::Imag //Compute tile image::Image tileLabels(_parameters.modelWidth, _parameters.modelHeight, true, {0, 0.0f}); + + #if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_CUDA) + processTileGPU(tileLabels, block); + #else processTile(tileLabels, block); + #endif //Update the global labeling @@ -184,11 +235,7 @@ bool Segmentation::labelsFromModelOutput(image::Image & labels, con bool Segmentation::processTile(image::Image & labels, const image::Image::Base & source) { - Ort::Env ortEnvironment(ORT_LOGGING_LEVEL_WARNING, "aliceVision-imageSegmentation"); - Ort::SessionOptions ortSessionOptions; - Ort::Session ortSession = Ort::Session(ortEnvironment, _parameters.modelWeights.c_str(), ortSessionOptions); - - Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault); + Ort::MemoryInfo memInfo = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault); std::vector inputNames{"input"}; std::vector outputNames{"output"}; @@ -197,7 +244,7 @@ bool Segmentation::processTile(image::Image & labels, const image:: std::vector output(_parameters.classes.size() * _parameters.modelHeight * _parameters.modelWidth); Ort::Value outputTensors = Ort::Value::CreateTensor( - mem_info, + memInfo, output.data(), output.size(), outputDimensions.data(), outputDimensions.size() ); @@ -206,18 +253,18 @@ bool Segmentation::processTile(image::Image & labels, const image:: imageToPlanes(transformedInput, source); Ort::Value inputTensors = Ort::Value::CreateTensor( - mem_info, + memInfo, transformedInput.data(), transformedInput.size(), inputDimensions.data(), inputDimensions.size() ); try { - ortSession.Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensors, 1, outputNames.data(), &outputTensors, 1); + _ortSession->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensors, 1, outputNames.data(), &outputTensors, 1); } catch (const Ort::Exception& exception) { - std::cout << "ERROR running model inference: " << exception.what() << std::endl; + ALICEVISION_LOG_ERROR("ERROR running model inference: " << exception.what()); return false; } @@ -229,5 +276,55 @@ bool Segmentation::processTile(image::Image & labels, const image:: return true; } +#if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_CUDA) +bool Segmentation::processTileGPU(image::Image & labels, const image::Image::Base & source) +{ + Ort::MemoryInfo mem_info_cuda("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemType::OrtMemTypeDefault); + Ort::Allocator cudaAllocator(*_ortSession, mem_info_cuda); + + std::vector inputNames{"input"}; + std::vector outputNames{"output"}; + std::vector inputDimensions = {1, 3, _parameters.modelHeight, _parameters.modelWidth}; + std::vector outputDimensions = {1, _parameters.classes.size(), _parameters.modelHeight, _parameters.modelWidth}; + + + Ort::Value outputTensors = Ort::Value::CreateTensor( + mem_info_cuda, + reinterpret_cast(_cudaOutput), _output.size(), + outputDimensions.data(), outputDimensions.size() + ); + + std::vector transformedInput; + imageToPlanes(transformedInput, source); + + cudaMemcpy(_cudaInput, transformedInput.data(), sizeof(float) * transformedInput.size(), cudaMemcpyHostToDevice); + + Ort::Value inputTensors = Ort::Value::CreateTensor( + mem_info_cuda, + reinterpret_cast(_cudaInput), transformedInput.size(), + inputDimensions.data(), inputDimensions.size() + ); + + try + { + _ortSession->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensors, 1, outputNames.data(), &outputTensors, 1); + } + catch (const Ort::Exception& exception) + { + ALICEVISION_LOG_ERROR("ERROR running model inference: " << exception.what()); + return false; + } + + cudaMemcpy(_output.data(), _cudaOutput, sizeof(float) * _output.size(), cudaMemcpyDeviceToHost); + + if (!labelsFromModelOutput(labels, _output)) + { + return false; + } + + return true; +} +#endif + } //aliceVision } //segmentation \ No newline at end of file diff --git a/src/aliceVision/segmentation/segmentation.hpp b/src/aliceVision/segmentation/segmentation.hpp index 15ee50246f..9e99955003 100644 --- a/src/aliceVision/segmentation/segmentation.hpp +++ b/src/aliceVision/segmentation/segmentation.hpp @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -46,7 +47,15 @@ class Segmentation public: Segmentation(const Parameters & parameters) : _parameters(parameters) { + if (!initialize()) + { + throw std::runtime_error("Error on segmentation initialization"); + } + } + virtual ~Segmentation() + { + terminate(); } /** @@ -57,6 +66,17 @@ class Segmentation bool processImage(image::Image &labels, const image::Image & source); private: + + /** + * Onnx creation code + */ + bool initialize(); + + /** + * Onnx destruction code + */ + bool terminate(); + /** * Assume the source image is the correct size * @param labels the output label image @@ -78,6 +98,15 @@ class Segmentation */ bool processTile(image::Image & labels, const image::Image::Base & source); + /** + * Process effectively a buffer of the model input size + * param labels the output labels + * @param source the source tile + */ + #if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_CUDA) + bool processTileGPU(image::Image & labels, const image::Image::Base & source); + #endif + /** * Merge tile labels with global labels image * @param labels the global labels image @@ -89,6 +118,15 @@ class Segmentation protected: Parameters _parameters; + std::unique_ptr _ortEnvironment; + std::unique_ptr _ortSession; + + std::vector _output; + + #if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_CUDA) + void * _cudaOutput; + void * _cudaInput; + #endif }; } //aliceVision diff --git a/src/software/pipeline/main_imageSegmentation.cpp b/src/software/pipeline/main_imageSegmentation.cpp index 0fdec2bdd9..9be1612d7c 100644 --- a/src/software/pipeline/main_imageSegmentation.cpp +++ b/src/software/pipeline/main_imageSegmentation.cpp @@ -123,9 +123,10 @@ int aliceVision_main(int argc, char** argv) parameters.overlapRatio = 0.3; aliceVision::segmentation::Segmentation seg(parameters); - const auto & classes = seg.getClasses(); + const auto & classes = seg.getClasses(); + //Compute the set of valid classes given parameters std::set validClassesIndices; for (const auto & s : validClasses) @@ -149,6 +150,7 @@ int aliceVision_main(int argc, char** argv) for (const auto & pv : sfmData.getViews()) { std::string path = pv.second->getImagePath(); + ALICEVISION_LOG_INFO("processing " << path); image::Image image; image::readImage(path, image, image::EImageColorSpace::NO_CONVERSION); From f05a70accc0c29383b0bdb45b0d8494f1ade33d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Vital?= Date: Wed, 21 Jun 2023 17:53:54 +0200 Subject: [PATCH 6/9] [image] add missing copyright --- src/aliceVision/image/imageAlgo.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/aliceVision/image/imageAlgo.cpp b/src/aliceVision/image/imageAlgo.cpp index f6ca2eb929..2bdc5ef62a 100644 --- a/src/aliceVision/image/imageAlgo.cpp +++ b/src/aliceVision/image/imageAlgo.cpp @@ -1,3 +1,9 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2019 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + #include "imageAlgo.hpp" #include From b919f217a5ae2faca7339437b566118f7aa2d158 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Vital?= Date: Wed, 21 Jun 2023 17:55:43 +0200 Subject: [PATCH 7/9] [software] imageSegmentation: minor cosmetic changes --- src/software/pipeline/CMakeLists.txt | 2 +- src/software/pipeline/main_imageSegmentation.cpp | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/software/pipeline/CMakeLists.txt b/src/software/pipeline/CMakeLists.txt index e43a4f5073..4e8cf0012a 100644 --- a/src/software/pipeline/CMakeLists.txt +++ b/src/software/pipeline/CMakeLists.txt @@ -622,7 +622,7 @@ if(ALICEVISION_HAVE_OPENCV AND ALICEVISION_HAVE_ONNX) endif() if(ALICEVISION_HAVE_ONNX) - # SphereDetection + # Image Segmentation alicevision_add_software(aliceVision_imageSegmentation SOURCE main_imageSegmentation.cpp FOLDER ${FOLDER_SOFTWARE_PIPELINE} diff --git a/src/software/pipeline/main_imageSegmentation.cpp b/src/software/pipeline/main_imageSegmentation.cpp index 9be1612d7c..85e1e5cf4a 100644 --- a/src/software/pipeline/main_imageSegmentation.cpp +++ b/src/software/pipeline/main_imageSegmentation.cpp @@ -70,7 +70,7 @@ void labelsToMask(image::Image & mask, const image::Image for (int j = 0; j < mask.Width(); j++) { IndexT label = labels(i, j); - mask(i, j) = (validClasses.find(label) != validClasses.end())?255:0; + mask(i, j) = (validClasses.find(label) != validClasses.end()) ? 255 : 0; } } } @@ -113,9 +113,17 @@ int aliceVision_main(int argc, char** argv) aliceVision::segmentation::Segmentation::Parameters parameters; parameters.modelWeights = modelWeightsPath; - parameters.classes = {"__background__", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", - "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", - "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; + parameters.classes = { + "__background__", + "aeroplane", + "bicycle", "bird", "boat", "bottle", "bus", + "car", "cat", "chair", "cow", + "diningtable", "dog", + "horse", + "motorbike", + "person", "pottedplant", + "sheep", "sofa", + "train", "tvmonitor"}; parameters.center= {0.485, 0.456, 0.406}; parameters.scale= {1.0 / 0.229, 1.0 / 0.224, 1.0 / 0.225}; parameters.modelWidth = 1280; From 6059eb506dd1f34be750adcefc5419db81910239 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Vital?= Date: Wed, 21 Jun 2023 18:11:12 +0200 Subject: [PATCH 8/9] [software] featureExtraction: add maskExtension and maskInvert params --- src/aliceVision/feature/FeatureExtractor.cpp | 6 +++--- src/aliceVision/feature/FeatureExtractor.hpp | 6 +++++- src/software/pipeline/main_featureExtraction.cpp | 10 ++++++++-- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/aliceVision/feature/FeatureExtractor.cpp b/src/aliceVision/feature/FeatureExtractor.cpp index a54288f0a4..6de3607ec9 100644 --- a/src/aliceVision/feature/FeatureExtractor.cpp +++ b/src/aliceVision/feature/FeatureExtractor.cpp @@ -182,9 +182,9 @@ void FeatureExtractor::computeViewJob(const FeatureExtractorViewJob& job, bool u { const auto masksFolder = fs::path(_masksFolder); const auto idMaskPath = masksFolder / - fs::path(std::to_string(job.view().getViewId())).replace_extension("png"); + fs::path(std::to_string(job.view().getViewId())).replace_extension(_maskExtension); const auto nameMaskPath = masksFolder / - fs::path(job.view().getImagePath()).filename().replace_extension("png"); + fs::path(job.view().getImagePath()).filename().replace_extension(_maskExtension); if (fs::exists(idMaskPath)) { @@ -233,7 +233,7 @@ void FeatureExtractor::computeViewJob(const FeatureExtractorViewJob& job, bool u bool masked = false; if (x < mask.Width() && y < mask.Height()) { - if (mask(y, x) == 0) + if ((mask(y, x) == 0 && !_maskInvert) || (mask(y, x) != 0 && _maskInvert)) { masked = true; } diff --git a/src/aliceVision/feature/FeatureExtractor.hpp b/src/aliceVision/feature/FeatureExtractor.hpp index 182c80e9dc..c124ad8bcc 100644 --- a/src/aliceVision/feature/FeatureExtractor.hpp +++ b/src/aliceVision/feature/FeatureExtractor.hpp @@ -79,9 +79,11 @@ class FeatureExtractor _rangeSize = rangeSize; } - void setMasksFolder(const std::string& folder) + void setMasksFolder(const std::string& folder, const std::string& ext, bool invert) { _masksFolder = folder; + _maskExtension = ext; + _maskInvert = invert; } void setOutputFolder(const std::string& folder) @@ -103,6 +105,8 @@ class FeatureExtractor const sfmData::SfMData& _sfmData; std::vector> _imageDescribers; std::string _masksFolder; + std::string _maskExtension; + bool _maskInvert; std::string _outputFolder; int _rangeStart = -1; int _rangeSize = -1; diff --git a/src/software/pipeline/main_featureExtraction.cpp b/src/software/pipeline/main_featureExtraction.cpp index b06cdd7274..662817d7cd 100644 --- a/src/software/pipeline/main_featureExtraction.cpp +++ b/src/software/pipeline/main_featureExtraction.cpp @@ -32,7 +32,7 @@ // These constants define the current software version. // They must be updated when the command line is changed. #define ALICEVISION_SOFTWARE_VERSION_MAJOR 1 -#define ALICEVISION_SOFTWARE_VERSION_MINOR 1 +#define ALICEVISION_SOFTWARE_VERSION_MINOR 2 using namespace aliceVision; @@ -57,6 +57,8 @@ int aliceVision_main(int argc, char **argv) int maxThreads = 0; bool forceCpuExtraction = false; image::EImageColorSpace workingColorSpace = image::EImageColorSpace::SRGB; + std::string maskExtension = "png"; + bool maskInvert = false; po::options_description requiredParams("Required parameters"); requiredParams.add_options() @@ -88,6 +90,10 @@ int aliceVision_main(int argc, char **argv) "Use only CPU feature extraction methods.") ("masksFolder", po::value(&masksFolder), "Masks folder.") + ("maskExtension", po::value(&maskExtension)->default_value(maskExtension), + "File extension for masks.") + ("maskInvert", po::value(&maskInvert)->default_value(maskInvert), + "Invert mask values.") ("rangeStart", po::value(&rangeStart)->default_value(rangeStart), "Range image index start.") ("rangeSize", po::value(&rangeSize)->default_value(rangeSize), @@ -135,7 +141,7 @@ int aliceVision_main(int argc, char **argv) // create feature extractor feature::FeatureExtractor extractor(sfmData); - extractor.setMasksFolder(masksFolder); + extractor.setMasksFolder(masksFolder, maskExtension, maskInvert); extractor.setOutputFolder(outputFolder); // set maxThreads From 935f7b23e747ddcff570937bd22e665826f08207 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Vital?= Date: Thu, 22 Jun 2023 00:14:01 -0700 Subject: [PATCH 9/9] [software] imageSegmentation: add range arguments --- .../pipeline/main_imageSegmentation.cpp | 51 +++++++++++++++++-- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/src/software/pipeline/main_imageSegmentation.cpp b/src/software/pipeline/main_imageSegmentation.cpp index 85e1e5cf4a..884da12a84 100644 --- a/src/software/pipeline/main_imageSegmentation.cpp +++ b/src/software/pipeline/main_imageSegmentation.cpp @@ -24,7 +24,8 @@ // IO #include #include -#include +#include +#include #include @@ -81,6 +82,8 @@ int aliceVision_main(int argc, char** argv) std::string outputPath; std::string modelWeightsPath; std::vector validClasses; + int rangeStart = -1; + int rangeSize = 1; // Description of mandatory parameters po::options_description requiredParams("Required parameters"); @@ -92,7 +95,11 @@ int aliceVision_main(int argc, char** argv) po::options_description optionalParams("Optional parameters"); optionalParams.add_options() ("validClasses,c", po::value>(&validClasses)->multitoken(), - "Names of classes which are to be considered"); + "Names of classes which are to be considered") + ("rangeStart", po::value(&rangeStart)->default_value(rangeStart), + "Range start for processing views (ordered by image filepath). Set to -1 to process all images.") + ("rangeSize", po::value(&rangeSize)->default_value(rangeSize), + "Range size for processing views (ordered by image filepath)."); CmdLine cmdline("AliceVision imageSegmentation"); cmdline.add(requiredParams); @@ -110,6 +117,40 @@ int aliceVision_main(int argc, char** argv) return EXIT_FAILURE; } + // Order views by their image names + std::vector> viewsOrderedByName; + for(auto& viewIt : sfmData.getViews()) + { + viewsOrderedByName.push_back(viewIt.second); + } + std::sort(viewsOrderedByName.begin(), viewsOrderedByName.end(), + [](const std::shared_ptr& a, const std::shared_ptr& b) -> bool + { + if(a == nullptr || b == nullptr) + return true; + return (a->getImagePath() < b->getImagePath()); + }); + + // Define range to compute + if(rangeStart != -1) + { + if(rangeStart < 0 || rangeSize < 0 || static_cast(rangeStart) > viewsOrderedByName.size()) + { + ALICEVISION_LOG_ERROR("Range is incorrect"); + return EXIT_FAILURE; + } + + if(static_cast(rangeStart + rangeSize) > viewsOrderedByName.size()) + { + rangeSize = static_cast(viewsOrderedByName.size()) - rangeStart; + } + } + else + { + rangeStart = 0; + rangeSize = static_cast(viewsOrderedByName.size()); + } + aliceVision::segmentation::Segmentation::Parameters parameters; parameters.modelWeights = modelWeightsPath; @@ -155,9 +196,9 @@ int aliceVision_main(int argc, char** argv) } } - for (const auto & pv : sfmData.getViews()) + for (const auto & view : viewsOrderedByName) { - std::string path = pv.second->getImagePath(); + std::string path = view->getImagePath(); ALICEVISION_LOG_INFO("processing " << path); image::Image image; @@ -175,7 +216,7 @@ int aliceVision_main(int argc, char** argv) //Store image std::stringstream ss; - ss << outputPath << "/" << pv.first << ".exr"; + ss << outputPath << "/" << view->getViewId() << ".exr"; image::writeImage(ss.str(), mask, image::ImageWriteOptions()); }