From 3980b1e9fa72b178bad3e29bfbb79582efdf28ff Mon Sep 17 00:00:00 2001
From: Fabien SERVANT <fabien.servant@technicolor.com>
Date: Thu, 8 Jun 2023 10:30:44 +0200
Subject: [PATCH 1/9] new app for onnx based segmentation

---
 src/software/pipeline/CMakeLists.txt          |  14 +++
 .../pipeline/main_imageSegmentation.cpp       | 113 ++++++++++++++++++
 2 files changed, 127 insertions(+)
 create mode 100644 src/software/pipeline/main_imageSegmentation.cpp
diff --git a/src/software/pipeline/CMakeLists.txt b/src/software/pipeline/CMakeLists.txt
index f788d242a5..cd1dbe9712 100644
--- a/src/software/pipeline/CMakeLists.txt
+++ b/src/software/pipeline/CMakeLists.txt
@@ -620,3 +620,17 @@ if(ALICEVISION_HAVE_OPENCV AND ALICEVISION_HAVE_ONNX)
           ${OpenCV_LIBRARIES}
     )
 endif()
+
+if(ALICEVISION_HAVE_ONNX)
+    # SphereDetection
+    alicevision_add_software(aliceVision_imageSegmentation
+    SOURCE main_imageSegmentation.cpp
+    FOLDER ${FOLDER_SOFTWARE_PIPELINE}
+    LINKS aliceVision_image
+          aliceVision_cmdline
+          aliceVision_system
+          aliceVision_sfmData
+          aliceVision_sfmDataIO
+          ONNXRuntime::ONNXRuntime
+    )
+endif()
diff --git a/src/software/pipeline/main_imageSegmentation.cpp b/src/software/pipeline/main_imageSegmentation.cpp
new file mode 100644
index 0000000000..2ffa591921
--- /dev/null
+++ b/src/software/pipeline/main_imageSegmentation.cpp
@@ -0,0 +1,113 @@
+// This file is part of the AliceVision project.
+// Copyright (c) 2023 AliceVision contributors.
+// This Source Code Form is subject to the terms of the Mozilla Public License,
+// v. 2.0. If a copy of the MPL was not distributed with this file,
+// You can obtain one at https://mozilla.org/MPL/2.0/.
+
+// Input and geometry
+#include <aliceVision/sfmData/SfMData.hpp>
+#include <aliceVision/sfmDataIO/sfmDataIO.hpp>
+
+// Image
+#include <aliceVision/image/all.hpp>
+#include <aliceVision/image/imageAlgo.hpp>
+
+// System
+#include <aliceVision/system/MemoryInfo.hpp>
+#include <aliceVision/system/Logger.hpp>
+
+// Reading command line options
+#include <boost/program_options.hpp>
+#include <aliceVision/cmdline/cmdline.hpp>
+#include <aliceVision/system/main.hpp>
+
+// IO
+#include <fstream>
+#include <algorithm>
+
+// ONNXRuntime
+#include <onnxruntime_cxx_api.h>
+
+// These constants define the current software version.
+// They must be updated when the command line is changed.
+#define ALICEVISION_SOFTWARE_VERSION_MAJOR 1
+#define ALICEVISION_SOFTWARE_VERSION_MINOR 0
+
+using namespace aliceVision;
+
+namespace po = boost::program_options;
+
+int aliceVision_main(int argc, char** argv)
+{
+    std::string sfmDataFilepath;
+    
+    // Description of mandatory parameters
+    po::options_description requiredParams("Required parameters");
+    requiredParams.add_options()
+        ("input,i", po::value<std::string>(&sfmDataFilepath)->required(), "Input sfmData.");
+
+    CmdLine cmdline(
+        "AliceVision imageSegmentation");
+    cmdline.add(requiredParams);
+    if (!cmdline.execute(argc, argv))
+    {
+        return EXIT_FAILURE;
+    }
+
+    // load input scene
+    sfmData::SfMData sfmData;
+    if(!sfmDataIO::Load(sfmData, sfmDataFilepath, sfmDataIO::ESfMData(sfmDataIO::VIEWS)))
+    {
+        ALICEVISION_LOG_ERROR("The input file '" + sfmDataFilepath + "' cannot be read");
+        return EXIT_FAILURE;
+    }
+
+    
+    const OrtApi * ortObject = OrtGetApiBase()->GetApi(ORT_API_VERSION);
+    if (!ortObject) {
+        ALICEVISION_LOG_ERROR("ONNX runtime failed to initialize");
+        return EXIT_FAILURE;
+    }
+
+    OrtEnv * ortEnvironment;
+    ortObject->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "imageSegmentation", &ortEnvironment);
+    if (ortEnvironment == nullptr)
+    {
+        ALICEVISION_LOG_ERROR("ONNX runtime failed to create ONNX environment");
+        return EXIT_FAILURE;
+    }
+
+    OrtSessionOptions * ortSessionOptions;
+    OrtStatus * ortStatus = ortObject->CreateSessionOptions(&ortSessionOptions);
+
+    if (ortStatus != nullptr)
+    {
+        ALICEVISION_LOG_ERROR("ONNX runtime failed to create ONNX session options");
+        ortObject->ReleaseStatus(ortStatus);
+        return EXIT_FAILURE;
+    }
+
+    OrtSession* ortSession;
+    ortStatus = ortObject->CreateSession(ortEnvironment, "/s/apps/users/servantf/MeshroomResearch/mrrs/segmentation/semantic/fcn_resnet50.onnx", ortSessionOptions, &ortSession);
+    if (ortStatus != nullptr)
+    {
+        ALICEVISION_LOG_ERROR("ONNX runtime failed to create ONNX session");
+        ortObject->ReleaseStatus(ortStatus);
+        return EXIT_FAILURE;
+    }
+
+    OrtMemoryInfo* ortMemoryInfo;
+    ortStatus = ortObject->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &ortMemoryInfo);
+    if (ortStatus != nullptr)
+    {
+        ALICEVISION_LOG_ERROR("ONNX runtime failed to create ONNX Memory info");
+        ortObject->ReleaseStatus(ortStatus);
+        return EXIT_FAILURE;
+    }
+
+    ortObject->ReleaseSessionOptions(ortSessionOptions);
+    ortObject->ReleaseSession(ortSession);
+    ortObject->ReleaseEnv(ortEnvironment);
+
+    return EXIT_SUCCESS;
+}

From 9c8dcbe4d933351c294336c8369b87319295df73 Mon Sep 17 00:00:00 2001
From: Fabien SERVANT <fabien.servant@technicolor.com>
Date: Tue, 13 Jun 2023 08:40:20 +0200
Subject: [PATCH 2/9] stash

---
 src/aliceVision/image/imageAlgo.cpp           |   9 +
 src/aliceVision/image/imageAlgo.hpp           |  15 ++
 .../pipeline/main_imageSegmentation.cpp       | 156 +++++++++++++-----
 3 files changed, 141 insertions(+), 39 deletions(-)

diff --git a/src/aliceVision/image/imageAlgo.cpp b/src/aliceVision/image/imageAlgo.cpp
index 6123961baf..1d910c2bb9 100644
--- a/src/aliceVision/image/imageAlgo.cpp
+++ b/src/aliceVision/image/imageAlgo.cpp
@@ -344,6 +344,15 @@ void resizeImage(int downscale, const image::Image<image::RGBfColor> &inImage,
                 inImage.data(), outImage.data(), filter, filterSize);
 }
 
+void resizeImage(const int outWidth, const int outHeight, const image::Image<image::RGBfColor> &inImage,
+                 image::Image<image::RGBfColor> &outImage, const std::string &filter,
+                 float filterSize)
+{
+    outImage.resize(outWidth, outHeight);
+    resizeImage(oiio::TypeDesc::FLOAT, inImage.Width(), inImage.Height(), outWidth, outHeight, 3,
+                inImage.data(), outImage.data(), filter, filterSize);
+}
+
 void resizeImage(int downscale, image::Image<image::RGBfColor>& inoutImage,
                  const std::string& filter, float filterSize)
 {
diff --git a/src/aliceVision/image/imageAlgo.hpp b/src/aliceVision/image/imageAlgo.hpp
index 0a11ae74e9..b5edafcc11 100644
--- a/src/aliceVision/image/imageAlgo.hpp
+++ b/src/aliceVision/image/imageAlgo.hpp
@@ -75,6 +75,21 @@ void resizeImage(int downscale, const image::Image<image::RGBAfColor>& inImage,
                  image::Image<image::RGBAfColor>& outImage,
                  const std::string& filter = "", float filterSize = 0);
 
+/**
+ * @brief Resize a given image buffer.
+ * @param[in] newWidth The destination width
+ * @param[in] newHeight The destination height
+ * @param[in] inImage The input image buffer
+ * @param[out] outImage The output image buffer
+ * @param[in] filter The name of a high-quality filter to use when resampling
+ *            Default is bilinear resampling
+ *            See openImageIO documentation "ImageBufAlgo filtername"
+ * @param[in] filterSize The resize filter size
+ */
+void resizeImage(int newWidth, int newHeight, const image::Image<image::RGBfColor>& inImage,
+                 image::Image<image::RGBfColor>& outImage,
+                 const std::string& filter = "", float filterSize = 0);
+
 /**
  * @brief Resize a given image buffer in place.
  * @param[in] downscale The resize downscale
diff --git a/src/software/pipeline/main_imageSegmentation.cpp b/src/software/pipeline/main_imageSegmentation.cpp
index 2ffa591921..6d3464988d 100644
--- a/src/software/pipeline/main_imageSegmentation.cpp
+++ b/src/software/pipeline/main_imageSegmentation.cpp
@@ -37,14 +37,42 @@ using namespace aliceVision;
 
 namespace po = boost::program_options;
 
+void imageToPlanes(std::vector<float> & output, const image::Image<image::RGBfColor> & source)
+{
+    size_t planeSize = source.Width() * source.Height();
+    
+    output.resize(planeSize * 3);
+
+    float * planeR = output.data();
+    float * planeG = planeR + planeSize;
+    float * planeB = planeG + planeSize;
+
+    size_t pos = 0;
+    for (int i = 0; i < source.Height(); i++)
+    {
+        for (int j = 0; j < source.Width(); j++)
+        {
+            const image::RGBfColor & rgb = source(i, j);
+            planeR[pos] = rgb.r();
+            planeG[pos] = rgb.g();
+            planeB[pos] = rgb.b();
+
+            pos++;
+        }
+    }
+}
+
+
 int aliceVision_main(int argc, char** argv)
 {
     std::string sfmDataFilepath;
+    std::string outputPath;
     
     // Description of mandatory parameters
     po::options_description requiredParams("Required parameters");
     requiredParams.add_options()
-        ("input,i", po::value<std::string>(&sfmDataFilepath)->required(), "Input sfmData.");
+        ("input,i", po::value<std::string>(&sfmDataFilepath)->required(), "Input sfmData.")
+        ("output,o", po::value<std::string>(&outputPath)->required(), "output folder.");
 
     CmdLine cmdline(
         "AliceVision imageSegmentation");
@@ -63,51 +91,101 @@ int aliceVision_main(int argc, char** argv)
     }
 
     
-    const OrtApi * ortObject = OrtGetApiBase()->GetApi(ORT_API_VERSION);
-    if (!ortObject) {
-        ALICEVISION_LOG_ERROR("ONNX runtime failed to initialize");
-        return EXIT_FAILURE;
-    }
+    Ort::Env ortEnvironment(ORT_LOGGING_LEVEL_WARNING, "aliceVision-imageSegmentation");
+    Ort::SessionOptions ortSessionOptions;
+    Ort::Session ortSession = Ort::Session(ortEnvironment, "/s/apps/users/servantf/MeshroomResearch/mrrs/segmentation/semantic/fcn_resnet50.onnx", ortSessionOptions);
 
-    OrtEnv * ortEnvironment;
-    ortObject->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "imageSegmentation", &ortEnvironment);
-    if (ortEnvironment == nullptr)
-    {
-        ALICEVISION_LOG_ERROR("ONNX runtime failed to create ONNX environment");
-        return EXIT_FAILURE;
-    }
+    Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
 
-    OrtSessionOptions * ortSessionOptions;
-    OrtStatus * ortStatus = ortObject->CreateSessionOptions(&ortSessionOptions);
+    std::vector<const char*> inputNames{"input"};
+    std::vector<const char*> outputNames{"output"};
 
-    if (ortStatus != nullptr)
-    {
-        ALICEVISION_LOG_ERROR("ONNX runtime failed to create ONNX session options");
-        ortObject->ReleaseStatus(ortStatus);
-        return EXIT_FAILURE;
-    }
+    std::vector<int64_t> inputDimensions = {1, 3, 720, 1280};
+    std::vector<int64_t> outputDimensions = {1, 21, 720, 1280};
 
-    OrtSession* ortSession;
-    ortStatus = ortObject->CreateSession(ortEnvironment, "/s/apps/users/servantf/MeshroomResearch/mrrs/segmentation/semantic/fcn_resnet50.onnx", ortSessionOptions, &ortSession);
-    if (ortStatus != nullptr)
+    for (const auto & pv : sfmData.getViews())
     {
-        ALICEVISION_LOG_ERROR("ONNX runtime failed to create ONNX session");
-        ortObject->ReleaseStatus(ortStatus);
-        return EXIT_FAILURE;
-    }
-
-    OrtMemoryInfo* ortMemoryInfo;
-    ortStatus = ortObject->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &ortMemoryInfo);
-    if (ortStatus != nullptr)
-    {
-        ALICEVISION_LOG_ERROR("ONNX runtime failed to create ONNX Memory info");
-        ortObject->ReleaseStatus(ortStatus);
-        return EXIT_FAILURE;
+        std::string path = pv.second->getImagePath();
+
+        image::Image<image::RGBfColor> image;
+        image::readImage(path, image, image::EImageColorSpace::NO_CONVERSION);
+
+        if (image.Height() > image.Width())
+        {
+
+        }
+        
+        
+        /*//Normalize
+        for (int i = 0; i < 720; i++)
+        {
+            for (int j = 0; j < 1280;j++)
+            {
+                image::RGBfColor value = image(i, j);
+                image(i, j).r() = (value.r() - 0.485) / 0.229;
+                image(i, j).g() = (value.g() - 0.456) / 0.224;
+                image(i, j).b() = (value.b() - 0.406) / 0.225;
+            }
+        }
+       
+        std::vector<float> transformedInput;
+        imageToPlanes(transformedInput, image);
+
+        std::vector<float> output(21 * 720 * 1280);
+
+        Ort::Value inputTensors = Ort::Value::CreateTensor<float>(
+            mem_info, 
+            transformedInput.data(), transformedInput.size(), 
+            inputDimensions.data(), inputDimensions.size()
+        );
+
+        Ort::Value outputTensors = Ort::Value::CreateTensor<float>(
+            mem_info, 
+            output.data(), output.size(), 
+            outputDimensions.data(), outputDimensions.size()
+        );
+
+        try 
+        {
+            std::cout << "Before Running\n";
+            ortSession.Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensors, 1, outputNames.data(), &outputTensors, 1);
+            std::cout << "Done!" << std::endl;
+        } 
+        catch (const Ort::Exception& exception) 
+        {
+            std::cout << "ERROR running model inference: " << exception.what() << std::endl;
+            exit(-1);
+        }
+
+
+        image::Image<float> dest(1280, 720, true);
+        for (int i = 0; i < 720; i++)
+        {
+            for (int j = 0; j < 1280; j++)
+            {
+                int maxClasse = 0;
+                int maxVal = 0;
+
+                for (int classe = 0; classe < 21; classe++)
+                {
+                    int classPos = classe * 1280 * 720;
+                    int pos = classPos + i * 1280  + j;
+
+                    float val = output[pos];
+                    if (val > maxVal)
+                    {
+                        maxVal = val;
+                        maxClasse = classe;
+                    }
+                }
+
+                dest(i, j) = maxClasse / 21.0;
+            }
+        }
+
+        image::writeImage("/s/prods/mvg/_source_global/users/servantf/toto.png", dest, image::ImageWriteOptions());*/
     }
 
-    ortObject->ReleaseSessionOptions(ortSessionOptions);
-    ortObject->ReleaseSession(ortSession);
-    ortObject->ReleaseEnv(ortEnvironment);
 
     return EXIT_SUCCESS;
 }

From d311a9eac502642018a735961da4e9d1bbe689ac Mon Sep 17 00:00:00 2001
From: Fabien SERVANT <fabien.servant@technicolor.com>
Date: Wed, 14 Jun 2023 10:40:17 +0200
Subject: [PATCH 3/9] Working semantic segmentation

---
 src/aliceVision/CMakeLists.txt                |   5 +
 src/aliceVision/image/imageAlgo.cpp           |  53 +++-
 src/aliceVision/image/imageAlgo.hpp           |  15 ++
 src/aliceVision/image/pixelTypes.hpp          |  12 +
 src/aliceVision/segmentation/CMakeLists.txt   |  24 ++
 src/aliceVision/segmentation/segmentation.cpp | 235 ++++++++++++++++++
 src/aliceVision/segmentation/segmentation.hpp |  86 +++++++
 src/software/pipeline/CMakeLists.txt          |   2 +-
 .../pipeline/main_imageSegmentation.cpp       | 131 ++++------
 9 files changed, 464 insertions(+), 99 deletions(-)
 create mode 100644 src/aliceVision/segmentation/CMakeLists.txt
 create mode 100644 src/aliceVision/segmentation/segmentation.cpp
 create mode 100644 src/aliceVision/segmentation/segmentation.hpp

diff --git a/src/aliceVision/CMakeLists.txt b/src/aliceVision/CMakeLists.txt
index bc1e446315..ebb97aeebc 100644
--- a/src/aliceVision/CMakeLists.txt
+++ b/src/aliceVision/CMakeLists.txt
@@ -56,9 +56,14 @@ if(ALICEVISION_BUILD_MVS)
   if(ALICEVISION_HAVE_CUDA)
     add_subdirectory(depthMap)
   endif()
+
+  if(ALICEVISION_HAVE_ONNX)
+    add_subdirectory(segmentation)
+  endif()
 endif()
 
 
+
 if(ALICEVISION_BUILD_SFM AND ALICEVISION_BUILD_MVS)
   add_subdirectory(sfmMvsUtils)
 endif()
diff --git a/src/aliceVision/image/imageAlgo.cpp b/src/aliceVision/image/imageAlgo.cpp
index 1d910c2bb9..f6ca2eb929 100644
--- a/src/aliceVision/image/imageAlgo.cpp
+++ b/src/aliceVision/image/imageAlgo.cpp
@@ -344,15 +344,6 @@ void resizeImage(int downscale, const image::Image<image::RGBfColor> &inImage,
                 inImage.data(), outImage.data(), filter, filterSize);
 }
 
-void resizeImage(const int outWidth, const int outHeight, const image::Image<image::RGBfColor> &inImage,
-                 image::Image<image::RGBfColor> &outImage, const std::string &filter,
-                 float filterSize)
-{
-    outImage.resize(outWidth, outHeight);
-    resizeImage(oiio::TypeDesc::FLOAT, inImage.Width(), inImage.Height(), outWidth, outHeight, 3,
-                inImage.data(), outImage.data(), filter, filterSize);
-}
-
 void resizeImage(int downscale, image::Image<image::RGBfColor>& inoutImage,
                  const std::string& filter, float filterSize)
 {
@@ -432,6 +423,50 @@ void resizeImage(int downscale, image::Image<image::RGBAfColor>& inoutImage,
     inoutImage.swap(rescaled);
 }
 
+void resizeImage(const int outWidth, const int outHeight, const image::Image<image::RGBfColor> &inImage,
+                 image::Image<image::RGBfColor> &outImage, const std::string &filter,
+                 float filterSize)
+{
+    outImage.resize(outWidth, outHeight);
+    resizeImage(oiio::TypeDesc::FLOAT, inImage.Width(), inImage.Height(), outWidth, outHeight, 3,
+                inImage.data(), outImage.data(), filter, filterSize);
+}
+
+void resizeImage(const int outWidth, const int outHeight, const image::Image<IndexT> &inImage,
+                 image::Image<IndexT> &outImage, const std::string &filter,
+                 float filterSize)
+{
+    outImage.resize(outWidth, outHeight);
+    resizeImage(oiio::TypeDesc::UINT32, inImage.Width(), inImage.Height(), outWidth, outHeight, 1,
+                inImage.data(), outImage.data(), filter, filterSize);
+}
+
+template<typename T>
+void resampleImage(oiio::TypeDesc typeDesc,
+                 int inWidth,
+                 int inHeight,
+                 int outWidth,
+                 int outHeight,
+                 int nchannels,
+                 const T* inBuffer,
+                 T* outBuffer,
+                 bool interpolate)
+{
+    const oiio::ImageBuf inBuf(oiio::ImageSpec(inWidth, inHeight, nchannels, typeDesc),
+                               const_cast<T*>(inBuffer));
+    oiio::ImageBuf outBuf(oiio::ImageSpec(outWidth, outHeight, nchannels, typeDesc), outBuffer);
+
+    oiio::ImageBufAlgo::resample(outBuf, inBuf, interpolate);
+}
+
+void resampleImage(int outWidth, int outHeight, const image::Image<IndexT>& inImage,
+                 image::Image<IndexT>& outImage, bool interpolate)
+{
+    outImage.resize(outWidth, outHeight);
+    resampleImage(oiio::TypeDesc::UINT32, inImage.Width(), inImage.Height(), outWidth, outHeight, 1,
+        inImage.data(), outImage.data(), interpolate);
+}
+
 template<typename T>
 void convolveImage(oiio::TypeDesc typeDesc,
                    int inWidth,
diff --git a/src/aliceVision/image/imageAlgo.hpp b/src/aliceVision/image/imageAlgo.hpp
index b5edafcc11..27783eea90 100644
--- a/src/aliceVision/image/imageAlgo.hpp
+++ b/src/aliceVision/image/imageAlgo.hpp
@@ -86,6 +86,9 @@ void resizeImage(int downscale, const image::Image<image::RGBAfColor>& inImage,
  *            See openImageIO documentation "ImageBufAlgo filtername"
  * @param[in] filterSize The resize filter size
  */
+void resizeImage(int newWidth, int newHeight, const image::Image<IndexT>& inImage,
+                 image::Image<IndexT>& outImage,
+                 const std::string& filter = "", float filterSize = 0);
 void resizeImage(int newWidth, int newHeight, const image::Image<image::RGBfColor>& inImage,
                  image::Image<image::RGBfColor>& outImage,
                  const std::string& filter = "", float filterSize = 0);
@@ -112,6 +115,18 @@ void resizeImage(int downscale, image::Image<image::RGBAColor>& inoutImage,
 void resizeImage(int downscale, image::Image<image::RGBAfColor>& inoutImage,
                  const std::string& filter = "", float filterSize = 0);
 
+
+/**
+ * @brief resample a given image buffer.
+ * @param[in] newWidth The destination width
+ * @param[in] newHeight The destination height
+ * @param[in] inImage The input image buffer
+ * @param[out] outImage The output image buffer
+ * @param[in] interpolate use interpolation (bilinear) ?
+ */
+void resampleImage(int newWidth, int newHeight, const image::Image<IndexT>& inImage,
+                 image::Image<IndexT>& outImage, bool interpolate);
+
 /**
  * @brief convolve a given image buffer
  * @param[in] inBuffer The input image buffer
diff --git a/src/aliceVision/image/pixelTypes.hpp b/src/aliceVision/image/pixelTypes.hpp
index fcb5170896..aeacf82ce1 100644
--- a/src/aliceVision/image/pixelTypes.hpp
+++ b/src/aliceVision/image/pixelTypes.hpp
@@ -169,6 +169,17 @@ namespace aliceVision
         return Rgb( ((*this)(0) + other(0)), ((*this)(1) + other(1)), ((*this)(2) + other(2)));
       }
 
+      /**
+      * @brief Elementwise multiplication
+      * @param other the other element to multiply
+      * @return Rgb color after multiply
+      * @note This does not modify the Rgb value (ie: only return a modified copy)
+      */
+      inline Rgb operator *( const Rgb& other ) const
+      {
+        return Rgb( ((*this)(0) * other(0)), ((*this)(1) * other(1)), ((*this)(2) * other(2)));
+      }
+
     
       /**
       * @brief scalar division
@@ -183,6 +194,7 @@ namespace aliceVision
                     T( ( Z )( ( *this )( 1 ) ) / val ),
                     T( ( Z )( ( *this )( 2 ) ) / val ) );
       }
+      
 
       /**
       * @brief scalar multiplication
diff --git a/src/aliceVision/segmentation/CMakeLists.txt b/src/aliceVision/segmentation/CMakeLists.txt
new file mode 100644
index 0000000000..10c39524e6
--- /dev/null
+++ b/src/aliceVision/segmentation/CMakeLists.txt
@@ -0,0 +1,24 @@
+# Headers
+set(segmentation_files_headers
+    segmentation.hpp
+)
+
+# Sources
+set(segmentation_files_sources
+    segmentation.cpp
+)
+
+alicevision_add_library(aliceVision_segmentation
+  SOURCES ${segmentation_files_headers} ${segmentation_files_sources}
+  PUBLIC_LINKS
+    aliceVision_system
+    aliceVision_numeric
+    aliceVision_image
+    ONNXRuntime::ONNXRuntime
+    #penImageIO::OpenImageIO
+)
+
+
+
+
+
diff --git a/src/aliceVision/segmentation/segmentation.cpp b/src/aliceVision/segmentation/segmentation.cpp
new file mode 100644
index 0000000000..d0d87fca02
--- /dev/null
+++ b/src/aliceVision/segmentation/segmentation.cpp
@@ -0,0 +1,235 @@
+// This file is part of the AliceVision project.
+// Copyright (c) 2023 AliceVision contributors.
+// This Source Code Form is subject to the terms of the Mozilla Public License,
+// v. 2.0. If a copy of the MPL was not distributed with this file,
+// You can obtain one at https://mozilla.org/MPL/2.0/.
+
+#include "segmentation.hpp"
+
+#include <aliceVision/image/all.hpp>
+#include <aliceVision/image/imageAlgo.hpp>
+#include <aliceVision/numeric/numeric.hpp>
+
+namespace aliceVision {
+namespace segmentation {
+
+void imageToPlanes(std::vector<float> & output, const image::Image<image::RGBfColor>::Base & source)
+{
+    size_t planeSize = source.rows() * source.cols();
+    
+    output.resize(planeSize * 3);
+
+    float * planeR = output.data();
+    float * planeG = planeR + planeSize;
+    float * planeB = planeG + planeSize;
+
+    size_t pos = 0;
+    for (int i = 0; i < source.rows(); i++)
+    {
+        for (int j = 0; j < source.cols(); j++)
+        {
+            const image::RGBfColor & rgb = source(i, j);
+            planeR[pos] = rgb.r();
+            planeG[pos] = rgb.g();
+            planeB[pos] = rgb.b();
+
+            pos++;
+        }
+    }
+}
+
+bool Segmentation::processImage(image::Image<IndexT> &labels, const image::Image<image::RGBfColor> & source)
+{
+    //Todo : handle orientation and small images smaller than model input
+        
+    // Compute the optimal resized size such that at last one dimension fit the model
+    int resizedHeight = 0;
+    int resizedWidth = 0;
+    if (source.Height() < source.Width())
+    {
+        resizedHeight = _modelHeight;
+        resizedWidth = double(source.Width()) * double(_modelHeight) / double(source.Height());
+    }
+    else 
+    {
+        resizedWidth = _modelWidth;
+        resizedHeight = double(source.Height()) * double(_modelWidth) / double(source.Width());
+    }
+
+    //Resize image
+    image::Image<image::RGBfColor> resized;
+    imageAlgo::resizeImage(resizedWidth, resizedHeight, source, resized);
+
+    //Normalize image to fit model statistics
+    for (int i = 0; i < resizedHeight; i++)
+    {
+        for (int j = 0; j < resizedWidth;j++)
+        {
+            image::RGBfColor value = resized(i, j);
+            resized(i, j) = (value - _center) * _scale;
+        }
+    }
+
+    image::Image<IndexT> resizedLabels;
+    if (!tiledProcess(resizedLabels, resized))
+    {
+        return false;
+    }
+
+    imageAlgo::resampleImage(source.Width(), source.Height(), resizedLabels, labels, false);
+
+    return true;
+}
+
+bool Segmentation::tiledProcess(image::Image<IndexT> & labels, const image::Image<image::RGBfColor> & source)
+{    
+    //Compute the theorical tiles count
+    int cwidth = divideRoundUp(source.Width(), _modelWidth);
+    int cheight = divideRoundUp(source.Height(), _modelHeight);
+
+    image::Image<ScoredLabel> scoredLabels(source.Width(), source.Height(), true, {0, 0.0f});
+
+    //Loop over tiles
+    for (int i = 0; i < cheight; i++)
+    {
+        //Compute starting point with overlap on previous
+        int y = std::max(0, int(i * _modelHeight - _overlapRatio * _modelHeight));
+        int ly = y + _modelHeight;
+
+        //If we are on the end border, shift on the other side
+        int shifty = source.Height() - ly;
+        if (shifty < 0)
+        {
+            y = std::max(0, y + shifty);
+        }
+
+        for (int j = 0; j < cwidth; j++)
+        {
+            //Compute starting point with overlap on previous
+            int x = std::max(0, int(j * _modelWidth - _overlapRatio * _modelWidth));
+            int lx = x + _modelWidth;
+
+            //If we are on the end border, shift on the other side
+            int shiftx = source.Width() - lx;
+            if (shiftx < 0)
+            {
+                x = std::max(0, x + shiftx);
+            }
+
+            //x and y contains the position of the tile in the input image
+            auto & block = source.block(y, x, _modelHeight, _modelWidth);
+
+            //Compute tile
+            image::Image<ScoredLabel> tileLabels(_modelWidth, _modelHeight, true, {0, 0.0f});
+            processTile(tileLabels, block);
+
+
+            //Update the global labeling
+            mergeLabels(scoredLabels, tileLabels, x, y);
+        }
+    }
+
+    labels = scoredLabels.cast<IndexT>();
+
+    return true;
+}
+
+bool Segmentation::mergeLabels(image::Image<ScoredLabel> & labels, image::Image<ScoredLabel> & tileLabels, int tileX, int tileY)
+{
+    for (int i = 0; i  < tileLabels.Height(); i++)
+    {
+        int y = i + tileY;
+        for (int j = 0; j < tileLabels.Width(); j++)
+        {
+            int x = j + tileX;
+
+            if (tileLabels(i, j).score > labels(y, x).score)
+            {
+                labels(y, x) = tileLabels(i, j);
+            }
+        }
+    }
+
+    return true;
+}
+
+bool Segmentation::labelsFromModelOutput(image::Image<ScoredLabel> & labels, const std::vector<float> & modelOutput)
+{
+    for (int outputY = 0; outputY < _modelHeight; outputY++)
+    {
+        for (int outputX = 0; outputX < _modelWidth; outputX++)
+        {
+            int maxClasse = 0;
+            int maxVal = 0;
+
+            for (int classe = 0; classe < _classes.size(); classe++)
+            {
+                int classPos = classe * _modelWidth * _modelHeight;
+                int pos = classPos + outputY * _modelWidth  + outputX;
+
+                float val = modelOutput[pos];
+                if (val > maxVal)
+                {
+                    maxVal = val;
+                    maxClasse = classe;
+                }
+            }
+            
+            labels(outputY, outputX) = {maxClasse, maxVal};
+        }
+    }
+    
+    return true;
+}
+
+bool Segmentation::processTile(image::Image<ScoredLabel> & labels, const image::Image<image::RGBfColor>::Base & source)
+{
+    Ort::Env ortEnvironment(ORT_LOGGING_LEVEL_WARNING, "aliceVision-imageSegmentation");
+    Ort::SessionOptions ortSessionOptions;
+    Ort::Session ortSession = Ort::Session(ortEnvironment, "/s/apps/users/servantf/MeshroomResearch/mrrs/segmentation/semantic/fcn_resnet50.onnx", ortSessionOptions);
+
+    Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
+
+    std::vector<const char*> inputNames{"input"};
+    std::vector<const char*> outputNames{"output"};
+    std::vector<int64_t> inputDimensions = {1, 3, _modelHeight, _modelWidth};
+    std::vector<int64_t> outputDimensions = {1, _classes.size(), _modelHeight, _modelWidth};
+
+    std::vector<float> output(_classes.size() * _modelHeight * _modelWidth);
+    Ort::Value outputTensors = Ort::Value::CreateTensor<float>(
+        mem_info, 
+        output.data(), output.size(), 
+        outputDimensions.data(), outputDimensions.size()
+    );
+
+    std::vector<float> transformedInput;
+    imageToPlanes(transformedInput, source);
+
+    Ort::Value inputTensors = Ort::Value::CreateTensor<float>(
+        mem_info, 
+        transformedInput.data(), transformedInput.size(), 
+        inputDimensions.data(), inputDimensions.size()
+    );
+
+    try 
+    {
+        ALICEVISION_LOG_INFO("test");
+        ortSession.Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensors, 1, outputNames.data(), &outputTensors, 1);
+        ALICEVISION_LOG_INFO("test2");
+    } 
+    catch (const Ort::Exception& exception) 
+    {
+        std::cout << "ERROR running model inference: " << exception.what() << std::endl;
+        return false;
+    }
+
+    if (!labelsFromModelOutput(labels, output))
+    {
+        return false;
+    }
+
+    return true;
+}
+
+} //aliceVision
+} //segmentation
\ No newline at end of file
diff --git a/src/aliceVision/segmentation/segmentation.hpp b/src/aliceVision/segmentation/segmentation.hpp
new file mode 100644
index 0000000000..553894dbe4
--- /dev/null
+++ b/src/aliceVision/segmentation/segmentation.hpp
@@ -0,0 +1,86 @@
+// This file is part of the AliceVision project.
+// Copyright (c) 2023 AliceVision contributors.
+// This Source Code Form is subject to the terms of the Mozilla Public License,
+// v. 2.0. If a copy of the MPL was not distributed with this file,
+// You can obtain one at https://mozilla.org/MPL/2.0/.
+
+#include <vector>
+#include <string> 
+
+#include <aliceVision/types.hpp>
+#include <aliceVision/image/Image.hpp>
+
+// ONNXRuntime
+#include <onnxruntime_cxx_api.h>
+
+namespace aliceVision {
+namespace segmentation {
+
+struct ScoredLabel
+{
+    IndexT label;
+    float score;
+
+    operator IndexT() const { return label; }
+};
+
+class Segmentation
+{
+public:
+    const std::vector<std::string> & getClasses() 
+    {
+        return _classes;
+    }
+
+public:
+    /**
+     * Process an input image to estimate segmentation
+     * @param labels the labels image resulting from the process
+     * @param source is the input image to process
+     */
+    bool processImage(image::Image<IndexT> &labels, const image::Image<image::RGBfColor> & source);
+
+private:
+    /**
+     * Assume the source image is the correct size
+     * @param labels the output label image
+     * @param source the input image to process
+     */
+    bool tiledProcess(image::Image<IndexT> &labels, const image::Image<image::RGBfColor> & source);
+
+    /**
+     * Transform model output to a label image
+     * @param labels the output labels imaage
+     * @param modeloutput the model output vector
+     */
+    bool labelsFromModelOutput(image::Image<ScoredLabel> & labels, const std::vector<float> & modelOutput);
+
+    /**
+     * Process effectively a buffer of the model input size
+     * param labels the output labels
+     * @param source the source tile
+     */
+    bool processTile(image::Image<ScoredLabel> & labels, const image::Image<image::RGBfColor>::Base & source);
+
+    /**
+     * Merge tile labels with global labels image
+     * @param labels the global labels image
+     * @param tileLabels the local tile labels image
+     * @param tileX the position of the tile in the global image
+     * @param tileY the position of the tile in the global image
+     */
+    bool mergeLabels(image::Image<ScoredLabel> & labels, image::Image<ScoredLabel> & tileLabels, int tileX, int tileY);
+
+protected:
+    std::vector<std::string> _classes = {"__background__", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus",
+                                    "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike",
+                                    "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};
+    image::RGBfColor _center= {0.485, 0.456, 0.406};
+    image::RGBfColor _scale= {1.0 / 0.229, 1.0 / 0.224, 1.0 / 0.225};
+    int _modelWidth = 1280;
+    int _modelHeight = 720;
+    double _overlapRatio = 0.3;
+};
+
+} //aliceVision
+} //segmentation
\ No newline at end of file
diff --git a/src/software/pipeline/CMakeLists.txt b/src/software/pipeline/CMakeLists.txt
index cd1dbe9712..e43a4f5073 100644
--- a/src/software/pipeline/CMakeLists.txt
+++ b/src/software/pipeline/CMakeLists.txt
@@ -631,6 +631,6 @@ if(ALICEVISION_HAVE_ONNX)
           aliceVision_system
           aliceVision_sfmData
           aliceVision_sfmDataIO
-          ONNXRuntime::ONNXRuntime
+          aliceVision_segmentation
     )
 endif()
diff --git a/src/software/pipeline/main_imageSegmentation.cpp b/src/software/pipeline/main_imageSegmentation.cpp
index 6d3464988d..9ff85630ac 100644
--- a/src/software/pipeline/main_imageSegmentation.cpp
+++ b/src/software/pipeline/main_imageSegmentation.cpp
@@ -24,9 +24,10 @@
 // IO
 #include <fstream>
 #include <algorithm>
+#include <boost/algorithm/string.hpp>    
 
-// ONNXRuntime
-#include <onnxruntime_cxx_api.h>
+
+#include <aliceVision/segmentation/segmentation.hpp>
 
 // These constants define the current software version.
 // They must be updated when the command line is changed.
@@ -37,9 +38,9 @@ using namespace aliceVision;
 
 namespace po = boost::program_options;
 
-void imageToPlanes(std::vector<float> & output, const image::Image<image::RGBfColor> & source)
+void imageToPlanes(std::vector<float> & output, const image::Image<image::RGBfColor>::Base & source)
 {
-    size_t planeSize = source.Width() * source.Height();
+    size_t planeSize = source.rows() * source.cols();
     
     output.resize(planeSize * 3);
 
@@ -48,9 +49,9 @@ void imageToPlanes(std::vector<float> & output, const image::Image<image::RGBfCo
     float * planeB = planeG + planeSize;
 
     size_t pos = 0;
-    for (int i = 0; i < source.Height(); i++)
+    for (int i = 0; i < source.rows(); i++)
     {
-        for (int j = 0; j < source.Width(); j++)
+        for (int j = 0; j < source.cols(); j++)
         {
             const image::RGBfColor & rgb = source(i, j);
             planeR[pos] = rgb.r();
@@ -63,10 +64,12 @@ void imageToPlanes(std::vector<float> & output, const image::Image<image::RGBfCo
 }
 
 
+
 int aliceVision_main(int argc, char** argv)
 {
     std::string sfmDataFilepath;
     std::string outputPath;
+    std::vector<std::string> validClasses = {"person"};
     
     // Description of mandatory parameters
     po::options_description requiredParams("Required parameters");
@@ -74,9 +77,14 @@ int aliceVision_main(int argc, char** argv)
         ("input,i", po::value<std::string>(&sfmDataFilepath)->required(), "Input sfmData.")
         ("output,o", po::value<std::string>(&outputPath)->required(), "output folder.");
 
-    CmdLine cmdline(
-        "AliceVision imageSegmentation");
+    po::options_description optionalParams("Optional parameters");
+    optionalParams.add_options()
+        ("validClasses,c", po::value<std::vector<std::string>>(&validClasses)->multitoken(),
+         "Names of classes which are to be considered");
+
+    CmdLine cmdline("AliceVision imageSegmentation");
     cmdline.add(requiredParams);
+    cmdline.add(optionalParams);
     if (!cmdline.execute(argc, argv))
     {
         return EXIT_FAILURE;
@@ -90,102 +98,47 @@ int aliceVision_main(int argc, char** argv)
         return EXIT_FAILURE;
     }
 
-    
-    Ort::Env ortEnvironment(ORT_LOGGING_LEVEL_WARNING, "aliceVision-imageSegmentation");
-    Ort::SessionOptions ortSessionOptions;
-    Ort::Session ortSession = Ort::Session(ortEnvironment, "/s/apps/users/servantf/MeshroomResearch/mrrs/segmentation/semantic/fcn_resnet50.onnx", ortSessionOptions);
-
-    Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
-
-    std::vector<const char*> inputNames{"input"};
-    std::vector<const char*> outputNames{"output"};
+    aliceVision::segmentation::Segmentation seg;
+    const auto & classes = seg.getClasses();
 
-    std::vector<int64_t> inputDimensions = {1, 3, 720, 1280};
-    std::vector<int64_t> outputDimensions = {1, 21, 720, 1280};
-
-    for (const auto & pv : sfmData.getViews())
+    std::set<IndexT> validClassesIndices;
+    for (const auto & s : validClasses)
     {
-        std::string path = pv.second->getImagePath();
-
-        image::Image<image::RGBfColor> image;
-        image::readImage(path, image, image::EImageColorSpace::NO_CONVERSION);
+        std::string classInput = boost::to_lower_copy(s);
+        boost::trim(classInput);
 
-        if (image.Height() > image.Width())
+        for (int idc = 0; idc < classes.size(); idc++)
         {
+            std::string classCompare = boost::to_lower_copy(classes[idc]);
+            boost::trim(classCompare);
 
-        }
-        
-        
-        /*//Normalize
-        for (int i = 0; i < 720; i++)
-        {
-            for (int j = 0; j < 1280;j++)
+            if (classCompare.compare(classInput) == 0)
             {
-                image::RGBfColor value = image(i, j);
-                image(i, j).r() = (value.r() - 0.485) / 0.229;
-                image(i, j).g() = (value.g() - 0.456) / 0.224;
-                image(i, j).b() = (value.b() - 0.406) / 0.225;
+                validClassesIndices.insert(idc);
+                break;
             }
         }
-       
-        std::vector<float> transformedInput;
-        imageToPlanes(transformedInput, image);
-
-        std::vector<float> output(21 * 720 * 1280);
-
-        Ort::Value inputTensors = Ort::Value::CreateTensor<float>(
-            mem_info, 
-            transformedInput.data(), transformedInput.size(), 
-            inputDimensions.data(), inputDimensions.size()
-        );
-
-        Ort::Value outputTensors = Ort::Value::CreateTensor<float>(
-            mem_info, 
-            output.data(), output.size(), 
-            outputDimensions.data(), outputDimensions.size()
-        );
+    }
 
-        try 
-        {
-            std::cout << "Before Running\n";
-            ortSession.Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensors, 1, outputNames.data(), &outputTensors, 1);
-            std::cout << "Done!" << std::endl;
-        } 
-        catch (const Ort::Exception& exception) 
-        {
-            std::cout << "ERROR running model inference: " << exception.what() << std::endl;
-            exit(-1);
-        }
+    for (const auto & pv : sfmData.getViews())
+    {
+        std::string path = pv.second->getImagePath();
 
+        image::Image<image::RGBfColor> image;
+        image::readImage(path, image, image::EImageColorSpace::NO_CONVERSION);
 
-        image::Image<float> dest(1280, 720, true);
-        for (int i = 0; i < 720; i++)
+        image::Image<IndexT> labels;
+        if (!seg.processImage(labels, image))
         {
-            for (int j = 0; j < 1280; j++)
-            {
-                int maxClasse = 0;
-                int maxVal = 0;
-
-                for (int classe = 0; classe < 21; classe++)
-                {
-                    int classPos = classe * 1280 * 720;
-                    int pos = classPos + i * 1280  + j;
-
-                    float val = output[pos];
-                    if (val > maxVal)
-                    {
-                        maxVal = val;
-                        maxClasse = classe;
-                    }
-                }
-
-                dest(i, j) = maxClasse / 21.0;
-            }
+            ALICEVISION_LOG_INFO("Failed to segment image " << path);
         }
 
-        image::writeImage("/s/prods/mvg/_source_global/users/servantf/toto.png", dest, image::ImageWriteOptions());*/
+        //Store image
+        std::stringstream ss;
+        ss << outputPath << "/" << pv.first << ".exr";
+        image::writeImage(ss.str(), labels, image::ImageWriteOptions());
     }
 
-
+   
     return EXIT_SUCCESS;
 }

From 4753e3668a9d905ee81e08fb25fede5e760e3914 Mon Sep 17 00:00:00 2001
From: Fabien SERVANT <fabien.servant@technicolor.com>
Date: Tue, 20 Jun 2023 14:48:04 +0200
Subject: [PATCH 4/9] add model path to parameters

---
 src/aliceVision/segmentation/segmentation.cpp | 46 +++++++++----------
 src/aliceVision/segmentation/segmentation.hpp | 27 +++++++----
 .../pipeline/main_imageSegmentation.cpp       | 38 +++++++++++++--
 3 files changed, 74 insertions(+), 37 deletions(-)

diff --git a/src/aliceVision/segmentation/segmentation.cpp b/src/aliceVision/segmentation/segmentation.cpp
index d0d87fca02..c43b0e40e0 100644
--- a/src/aliceVision/segmentation/segmentation.cpp
+++ b/src/aliceVision/segmentation/segmentation.cpp
@@ -47,13 +47,13 @@ bool Segmentation::processImage(image::Image<IndexT> &labels, const image::Image
     int resizedWidth = 0;
     if (source.Height() < source.Width())
     {
-        resizedHeight = _modelHeight;
-        resizedWidth = double(source.Width()) * double(_modelHeight) / double(source.Height());
+        resizedHeight = _parameters.modelHeight;
+        resizedWidth = double(source.Width()) * double(_parameters.modelHeight) / double(source.Height());
     }
     else 
     {
-        resizedWidth = _modelWidth;
-        resizedHeight = double(source.Height()) * double(_modelWidth) / double(source.Width());
+        resizedWidth = _parameters.modelWidth;
+        resizedHeight = double(source.Height()) * double(_parameters.modelWidth) / double(source.Width());
     }
 
     //Resize image
@@ -66,7 +66,7 @@ bool Segmentation::processImage(image::Image<IndexT> &labels, const image::Image
         for (int j = 0; j < resizedWidth;j++)
         {
             image::RGBfColor value = resized(i, j);
-            resized(i, j) = (value - _center) * _scale;
+            resized(i, j) = (value - _parameters.center) * _parameters.scale;
         }
     }
 
@@ -84,8 +84,8 @@ bool Segmentation::processImage(image::Image<IndexT> &labels, const image::Image
 bool Segmentation::tiledProcess(image::Image<IndexT> & labels, const image::Image<image::RGBfColor> & source)
 {    
     //Compute the theorical tiles count
-    int cwidth = divideRoundUp(source.Width(), _modelWidth);
-    int cheight = divideRoundUp(source.Height(), _modelHeight);
+    int cwidth = divideRoundUp(source.Width(), _parameters.modelWidth);
+    int cheight = divideRoundUp(source.Height(), _parameters.modelHeight);
 
     image::Image<ScoredLabel> scoredLabels(source.Width(), source.Height(), true, {0, 0.0f});
 
@@ -93,8 +93,8 @@ bool Segmentation::tiledProcess(image::Image<IndexT> & labels, const image::Imag
     for (int i = 0; i < cheight; i++)
     {
         //Compute starting point with overlap on previous
-        int y = std::max(0, int(i * _modelHeight - _overlapRatio * _modelHeight));
-        int ly = y + _modelHeight;
+        int y = std::max(0, int(i * _parameters.modelHeight - _parameters.overlapRatio * _parameters.modelHeight));
+        int ly = y + _parameters.modelHeight;
 
         //If we are on the end border, shift on the other side
         int shifty = source.Height() - ly;
@@ -106,8 +106,8 @@ bool Segmentation::tiledProcess(image::Image<IndexT> & labels, const image::Imag
         for (int j = 0; j < cwidth; j++)
         {
             //Compute starting point with overlap on previous
-            int x = std::max(0, int(j * _modelWidth - _overlapRatio * _modelWidth));
-            int lx = x + _modelWidth;
+            int x = std::max(0, int(j * _parameters.modelWidth - _parameters.overlapRatio * _parameters.modelWidth));
+            int lx = x + _parameters.modelWidth;
 
             //If we are on the end border, shift on the other side
             int shiftx = source.Width() - lx;
@@ -117,10 +117,10 @@ bool Segmentation::tiledProcess(image::Image<IndexT> & labels, const image::Imag
             }
 
             //x and y contains the position of the tile in the input image
-            auto & block = source.block(y, x, _modelHeight, _modelWidth);
+            auto & block = source.block(y, x, _parameters.modelHeight, _parameters.modelWidth);
 
             //Compute tile
-            image::Image<ScoredLabel> tileLabels(_modelWidth, _modelHeight, true, {0, 0.0f});
+            image::Image<ScoredLabel> tileLabels(_parameters.modelWidth, _parameters.modelHeight, true, {0, 0.0f});
             processTile(tileLabels, block);
 
 
@@ -155,17 +155,17 @@ bool Segmentation::mergeLabels(image::Image<ScoredLabel> & labels, image::Image<
 
 bool Segmentation::labelsFromModelOutput(image::Image<ScoredLabel> & labels, const std::vector<float> & modelOutput)
 {
-    for (int outputY = 0; outputY < _modelHeight; outputY++)
+    for (int outputY = 0; outputY < _parameters.modelHeight; outputY++)
     {
-        for (int outputX = 0; outputX < _modelWidth; outputX++)
+        for (int outputX = 0; outputX < _parameters.modelWidth; outputX++)
         {
             int maxClasse = 0;
             int maxVal = 0;
 
-            for (int classe = 0; classe < _classes.size(); classe++)
+            for (int classe = 0; classe < _parameters.classes.size(); classe++)
             {
-                int classPos = classe * _modelWidth * _modelHeight;
-                int pos = classPos + outputY * _modelWidth  + outputX;
+                int classPos = classe * _parameters.modelWidth * _parameters.modelHeight;
+                int pos = classPos + outputY * _parameters.modelWidth  + outputX;
 
                 float val = modelOutput[pos];
                 if (val > maxVal)
@@ -186,16 +186,16 @@ bool Segmentation::processTile(image::Image<ScoredLabel> & labels, const image::
 {
     Ort::Env ortEnvironment(ORT_LOGGING_LEVEL_WARNING, "aliceVision-imageSegmentation");
     Ort::SessionOptions ortSessionOptions;
-    Ort::Session ortSession = Ort::Session(ortEnvironment, "/s/apps/users/servantf/MeshroomResearch/mrrs/segmentation/semantic/fcn_resnet50.onnx", ortSessionOptions);
+    Ort::Session ortSession = Ort::Session(ortEnvironment, _parameters.modelWeights.c_str(), ortSessionOptions);
 
     Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
 
     std::vector<const char*> inputNames{"input"};
     std::vector<const char*> outputNames{"output"};
-    std::vector<int64_t> inputDimensions = {1, 3, _modelHeight, _modelWidth};
-    std::vector<int64_t> outputDimensions = {1, _classes.size(), _modelHeight, _modelWidth};
+    std::vector<int64_t> inputDimensions = {1, 3, _parameters.modelHeight, _parameters.modelWidth};
+    std::vector<int64_t> outputDimensions = {1, _parameters.classes.size(), _parameters.modelHeight, _parameters.modelWidth};
 
-    std::vector<float> output(_classes.size() * _modelHeight * _modelWidth);
+    std::vector<float> output(_parameters.classes.size() * _parameters.modelHeight * _parameters.modelWidth);
     Ort::Value outputTensors = Ort::Value::CreateTensor<float>(
         mem_info, 
         output.data(), output.size(), 
@@ -213,9 +213,7 @@ bool Segmentation::processTile(image::Image<ScoredLabel> & labels, const image::
 
     try 
     {
-        ALICEVISION_LOG_INFO("test");
         ortSession.Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensors, 1, outputNames.data(), &outputTensors, 1);
-        ALICEVISION_LOG_INFO("test2");
     } 
     catch (const Ort::Exception& exception) 
     {
diff --git a/src/aliceVision/segmentation/segmentation.hpp b/src/aliceVision/segmentation/segmentation.hpp
index 553894dbe4..15ee50246f 100644
--- a/src/aliceVision/segmentation/segmentation.hpp
+++ b/src/aliceVision/segmentation/segmentation.hpp
@@ -29,10 +29,26 @@ class Segmentation
 public:
     const std::vector<std::string> & getClasses() 
     {
-        return _classes;
+        return _parameters.classes;
     }
 
+    struct Parameters
+    {
+        std::string modelWeights;
+        std::vector<std::string> classes;
+        image::RGBfColor center;
+        image::RGBfColor scale;
+        int modelWidth;
+        int modelHeight;
+        double overlapRatio;
+    };
+
 public:
+    Segmentation(const Parameters & parameters) : _parameters(parameters)
+    {
+
+    }
+
     /**
      * Process an input image to estimate segmentation
      * @param labels the labels image resulting from the process
@@ -72,14 +88,7 @@ class Segmentation
     bool mergeLabels(image::Image<ScoredLabel> & labels, image::Image<ScoredLabel> & tileLabels, int tileX, int tileY);
 
 protected:
-    std::vector<std::string> _classes = {"__background__", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus",
-                                    "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike",
-                                    "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};
-    image::RGBfColor _center= {0.485, 0.456, 0.406};
-    image::RGBfColor _scale= {1.0 / 0.229, 1.0 / 0.224, 1.0 / 0.225};
-    int _modelWidth = 1280;
-    int _modelHeight = 720;
-    double _overlapRatio = 0.3;
+    Parameters _parameters;
 };
 
 } //aliceVision
diff --git a/src/software/pipeline/main_imageSegmentation.cpp b/src/software/pipeline/main_imageSegmentation.cpp
index 9ff85630ac..0fdec2bdd9 100644
--- a/src/software/pipeline/main_imageSegmentation.cpp
+++ b/src/software/pipeline/main_imageSegmentation.cpp
@@ -63,18 +63,30 @@ void imageToPlanes(std::vector<float> & output, const image::Image<image::RGBfCo
     }
 }
 
-
+void labelsToMask(image::Image<unsigned char> & mask, const image::Image<IndexT> & labels, const std::set<IndexT> & validClasses)
+{
+    for (int i = 0; i < mask.Height(); i++)
+    {
+        for (int j = 0; j < mask.Width(); j++)
+        {
+            IndexT label = labels(i, j);
+            mask(i, j) = (validClasses.find(label) != validClasses.end())?255:0;
+        }
+    }
+}
 
 int aliceVision_main(int argc, char** argv)
 {
     std::string sfmDataFilepath;
     std::string outputPath;
-    std::vector<std::string> validClasses = {"person"};
+    std::string modelWeightsPath;
+    std::vector<std::string> validClasses;
     
     // Description of mandatory parameters
     po::options_description requiredParams("Required parameters");
     requiredParams.add_options()
         ("input,i", po::value<std::string>(&sfmDataFilepath)->required(), "Input sfmData.")
+        ("modelPath,m", po::value<std::string>(&modelWeightsPath)->required(), "Input Model weights file.")
         ("output,o", po::value<std::string>(&outputPath)->required(), "output folder.");
 
     po::options_description optionalParams("Optional parameters");
@@ -98,9 +110,23 @@ int aliceVision_main(int argc, char** argv)
         return EXIT_FAILURE;
     }
 
-    aliceVision::segmentation::Segmentation seg;
+    aliceVision::segmentation::Segmentation::Parameters parameters;
+
+    parameters.modelWeights = modelWeightsPath;
+    parameters.classes = {"__background__", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus",
+                                    "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike",
+                                    "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};
+    parameters.center= {0.485, 0.456, 0.406};
+    parameters.scale= {1.0 / 0.229, 1.0 / 0.224, 1.0 / 0.225};
+    parameters.modelWidth = 1280;
+    parameters.modelHeight = 720;
+    parameters.overlapRatio = 0.3;
+
+    aliceVision::segmentation::Segmentation seg(parameters);
     const auto & classes = seg.getClasses();
 
+
+    //Compute the set of valid classes given parameters
     std::set<IndexT> validClassesIndices;
     for (const auto & s : validClasses)
     {
@@ -133,10 +159,14 @@ int aliceVision_main(int argc, char** argv)
             ALICEVISION_LOG_INFO("Failed to segment image " << path);
         }
 
+
+        image::Image<unsigned char> mask(labels.Width(), labels.Height());
+        labelsToMask(mask, labels, validClassesIndices);
+
         //Store image
         std::stringstream ss;
         ss << outputPath << "/" << pv.first << ".exr";
-        image::writeImage(ss.str(), labels, image::ImageWriteOptions());
+        image::writeImage(ss.str(), mask, image::ImageWriteOptions());
     }
 
    

From 6da307614d2a1dfe62f34df77068d97e549d712f Mon Sep 17 00:00:00 2001
From: Fabien SERVANT <fabien.servant@technicolor.com>
Date: Wed, 21 Jun 2023 12:05:23 +0200
Subject: [PATCH 5/9] Adding cuda support to segmentation

---
 src/aliceVision/segmentation/CMakeLists.txt   |  12 +-
 src/aliceVision/segmentation/segmentation.cpp | 115 ++++++++++++++++--
 src/aliceVision/segmentation/segmentation.hpp |  38 ++++++
 .../pipeline/main_imageSegmentation.cpp       |   4 +-
 4 files changed, 158 insertions(+), 11 deletions(-)

diff --git a/src/aliceVision/segmentation/CMakeLists.txt b/src/aliceVision/segmentation/CMakeLists.txt
index 10c39524e6..3c37b41926 100644
--- a/src/aliceVision/segmentation/CMakeLists.txt
+++ b/src/aliceVision/segmentation/CMakeLists.txt
@@ -8,6 +8,13 @@ set(segmentation_files_sources
     segmentation.cpp
 )
 
+set(SEGMENTATION_PRIVATE_LINKS "")
+set(SEGMENTATION_PRIVATE_INCLUDE_DIRS "")
+if(ALICEVISION_HAVE_CUDA)
+  set(SEGMENTATION_PRIVATE_LINKS ${CUDA_LIBRARIES})
+  set(SEGMENTATION_PRIVATE_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS})
+endif()
+
 alicevision_add_library(aliceVision_segmentation
   SOURCES ${segmentation_files_headers} ${segmentation_files_sources}
   PUBLIC_LINKS
@@ -15,7 +22,10 @@ alicevision_add_library(aliceVision_segmentation
     aliceVision_numeric
     aliceVision_image
     ONNXRuntime::ONNXRuntime
-    #penImageIO::OpenImageIO
+  PRIVATE_LINKS
+    ${SEGMENTATION_PRIVATE_LINKS}
+  PRIVATE_INCLUDE_DIRS
+    ${SEGMENTATION_PRIVATE_INCLUDE_DIRS}
 )
 
 
diff --git a/src/aliceVision/segmentation/segmentation.cpp b/src/aliceVision/segmentation/segmentation.cpp
index c43b0e40e0..3862cd70ac 100644
--- a/src/aliceVision/segmentation/segmentation.cpp
+++ b/src/aliceVision/segmentation/segmentation.cpp
@@ -6,6 +6,10 @@
 
 #include "segmentation.hpp"
 
+#if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_CUDA)
+#include <cuda_runtime.h>
+#endif
+
 #include <aliceVision/image/all.hpp>
 #include <aliceVision/image/imageAlgo.hpp>
 #include <aliceVision/numeric/numeric.hpp>
@@ -13,6 +17,7 @@
 namespace aliceVision {
 namespace segmentation {
 
+
 void imageToPlanes(std::vector<float> & output, const image::Image<image::RGBfColor>::Base & source)
 {
     size_t planeSize = source.rows() * source.cols();
@@ -38,6 +43,47 @@ void imageToPlanes(std::vector<float> & output, const image::Image<image::RGBfCo
     }
 }
 
+bool Segmentation::initialize()
+{
+    const auto& api = Ort::GetApi();
+
+    _ortEnvironment = std::make_unique<Ort::Env>(ORT_LOGGING_LEVEL_WARNING, "aliceVision-imageSegmentation");
+
+    Ort::SessionOptions ortSessionOptions;
+
+    #if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_CUDA)
+        OrtCUDAProviderOptionsV2* cuda_options = nullptr;
+        api.CreateCUDAProviderOptions(&cuda_options);
+        api.SessionOptionsAppendExecutionProvider_CUDA_V2(static_cast<OrtSessionOptions*>(ortSessionOptions), cuda_options);
+        api.ReleaseCUDAProviderOptions(cuda_options);
+
+        _ortSession = std::make_unique<Ort::Session>(*_ortEnvironment, _parameters.modelWeights.c_str(), ortSessionOptions);  
+
+        Ort::MemoryInfo memInfoCuda("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemType::OrtMemTypeDefault);
+        Ort::Allocator cudaAllocator(*_ortSession, memInfoCuda);
+
+        _output.resize(_parameters.classes.size() * _parameters.modelHeight * _parameters.modelWidth);
+        _cudaInput = cudaAllocator.Alloc(_output.size() * sizeof(float));
+        _cudaOutput = cudaAllocator.Alloc(_output.size() * sizeof(float));
+    #else
+        _ortSession = std::make_unique<Ort::Session>(ortEnvironment, _parameters.modelWeights.c_str(), ortSessionOptions);
+    #endif
+
+    return true;
+}
+
+bool Segmentation::terminate()
+{
+    #if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_CUDA)
+        Ort::MemoryInfo mem_info_cuda("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemType::OrtMemTypeDefault);
+        Ort::Allocator cudaAllocator(*_ortSession, mem_info_cuda);
+        cudaAllocator.Free(_cudaInput);
+        cudaAllocator.Free(_cudaOutput);
+    #endif
+
+    return true;
+}
+
 bool Segmentation::processImage(image::Image<IndexT> &labels, const image::Image<image::RGBfColor> & source)
 {
     //Todo : handle orientation and small images smaller than model input
@@ -121,7 +167,12 @@ bool Segmentation::tiledProcess(image::Image<IndexT> & labels, const image::Imag
 
             //Compute tile
             image::Image<ScoredLabel> tileLabels(_parameters.modelWidth, _parameters.modelHeight, true, {0, 0.0f});
+
+            #if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_CUDA)
+            processTileGPU(tileLabels, block);
+            #else
             processTile(tileLabels, block);
+            #endif
 
 
             //Update the global labeling
@@ -184,11 +235,7 @@ bool Segmentation::labelsFromModelOutput(image::Image<ScoredLabel> & labels, con
 
 bool Segmentation::processTile(image::Image<ScoredLabel> & labels, const image::Image<image::RGBfColor>::Base & source)
 {
-    Ort::Env ortEnvironment(ORT_LOGGING_LEVEL_WARNING, "aliceVision-imageSegmentation");
-    Ort::SessionOptions ortSessionOptions;
-    Ort::Session ortSession = Ort::Session(ortEnvironment, _parameters.modelWeights.c_str(), ortSessionOptions);
-
-    Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
+    Ort::MemoryInfo memInfo = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
 
     std::vector<const char*> inputNames{"input"};
     std::vector<const char*> outputNames{"output"};
@@ -197,7 +244,7 @@ bool Segmentation::processTile(image::Image<ScoredLabel> & labels, const image::
 
     std::vector<float> output(_parameters.classes.size() * _parameters.modelHeight * _parameters.modelWidth);
     Ort::Value outputTensors = Ort::Value::CreateTensor<float>(
-        mem_info, 
+        memInfo, 
         output.data(), output.size(), 
         outputDimensions.data(), outputDimensions.size()
     );
@@ -206,18 +253,18 @@ bool Segmentation::processTile(image::Image<ScoredLabel> & labels, const image::
     imageToPlanes(transformedInput, source);
 
     Ort::Value inputTensors = Ort::Value::CreateTensor<float>(
-        mem_info, 
+        memInfo, 
         transformedInput.data(), transformedInput.size(), 
         inputDimensions.data(), inputDimensions.size()
     );
 
     try 
     {
-        ortSession.Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensors, 1, outputNames.data(), &outputTensors, 1);
+        _ortSession->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensors, 1, outputNames.data(), &outputTensors, 1);
     } 
     catch (const Ort::Exception& exception) 
     {
-        std::cout << "ERROR running model inference: " << exception.what() << std::endl;
+        ALICEVISION_LOG_ERROR("ERROR running model inference: " << exception.what());
         return false;
     }
 
@@ -229,5 +276,55 @@ bool Segmentation::processTile(image::Image<ScoredLabel> & labels, const image::
     return true;
 }
 
+#if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_CUDA)
+bool Segmentation::processTileGPU(image::Image<ScoredLabel> & labels, const image::Image<image::RGBfColor>::Base & source)
+{
+    Ort::MemoryInfo mem_info_cuda("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemType::OrtMemTypeDefault);
+    Ort::Allocator cudaAllocator(*_ortSession, mem_info_cuda);
+
+    std::vector<const char*> inputNames{"input"};
+    std::vector<const char*> outputNames{"output"};
+    std::vector<int64_t> inputDimensions = {1, 3, _parameters.modelHeight, _parameters.modelWidth};
+    std::vector<int64_t> outputDimensions = {1, _parameters.classes.size(), _parameters.modelHeight, _parameters.modelWidth};
+
+    
+    Ort::Value outputTensors = Ort::Value::CreateTensor<float>(
+        mem_info_cuda, 
+        reinterpret_cast<float*>(_cudaOutput), _output.size(), 
+        outputDimensions.data(), outputDimensions.size()
+    );
+
+    std::vector<float> transformedInput;
+    imageToPlanes(transformedInput, source);
+
+    cudaMemcpy(_cudaInput, transformedInput.data(), sizeof(float) * transformedInput.size(), cudaMemcpyHostToDevice);
+
+    Ort::Value inputTensors = Ort::Value::CreateTensor<float>(
+        mem_info_cuda, 
+        reinterpret_cast<float*>(_cudaInput), transformedInput.size(), 
+        inputDimensions.data(), inputDimensions.size()
+    );
+
+    try 
+    {
+        _ortSession->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensors, 1, outputNames.data(), &outputTensors, 1);
+    } 
+    catch (const Ort::Exception& exception) 
+    {
+        ALICEVISION_LOG_ERROR("ERROR running model inference: " << exception.what());
+        return false;
+    }
+
+    cudaMemcpy(_output.data(), _cudaOutput, sizeof(float) * _output.size(), cudaMemcpyDeviceToHost);
+
+    if (!labelsFromModelOutput(labels, _output))
+    {
+        return false;
+    }
+
+    return true;
+}
+#endif
+
 } //aliceVision
 } //segmentation
\ No newline at end of file
diff --git a/src/aliceVision/segmentation/segmentation.hpp b/src/aliceVision/segmentation/segmentation.hpp
index 15ee50246f..9e99955003 100644
--- a/src/aliceVision/segmentation/segmentation.hpp
+++ b/src/aliceVision/segmentation/segmentation.hpp
@@ -7,6 +7,7 @@
 #include <vector>
 #include <string> 
 
+#include <aliceVision/config.hpp>
 #include <aliceVision/types.hpp>
 #include <aliceVision/image/Image.hpp>
 
@@ -46,7 +47,15 @@ class Segmentation
 public:
     Segmentation(const Parameters & parameters) : _parameters(parameters)
     {
+        if (!initialize())
+        {
+            throw std::runtime_error("Error on segmentation initialization");
+        }
+    }
 
+    virtual ~Segmentation()
+    {
+        terminate();
     }
 
     /**
@@ -57,6 +66,17 @@ class Segmentation
     bool processImage(image::Image<IndexT> &labels, const image::Image<image::RGBfColor> & source);
 
 private:
+
+    /**
+     * Onnx creation code
+    */
+    bool initialize();
+    
+    /**
+     * Onnx destruction code
+    */
+    bool terminate();
+
     /**
      * Assume the source image is the correct size
      * @param labels the output label image
@@ -78,6 +98,15 @@ class Segmentation
      */
     bool processTile(image::Image<ScoredLabel> & labels, const image::Image<image::RGBfColor>::Base & source);
 
+    /**
+     * Process effectively a buffer of the model input size
+     * param labels the output labels
+     * @param source the source tile
+     */
+    #if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_CUDA)
+    bool processTileGPU(image::Image<ScoredLabel> & labels, const image::Image<image::RGBfColor>::Base & source);
+    #endif
+
     /**
      * Merge tile labels with global labels image
      * @param labels the global labels image
@@ -89,6 +118,15 @@ class Segmentation
 
 protected:
     Parameters _parameters;
+    std::unique_ptr<Ort::Env> _ortEnvironment;
+    std::unique_ptr<Ort::Session> _ortSession;
+    
+    std::vector<float> _output;
+
+    #if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_CUDA)
+    void * _cudaOutput;
+    void * _cudaInput;
+    #endif
 };
 
 } //aliceVision
diff --git a/src/software/pipeline/main_imageSegmentation.cpp b/src/software/pipeline/main_imageSegmentation.cpp
index 0fdec2bdd9..9be1612d7c 100644
--- a/src/software/pipeline/main_imageSegmentation.cpp
+++ b/src/software/pipeline/main_imageSegmentation.cpp
@@ -123,9 +123,10 @@ int aliceVision_main(int argc, char** argv)
     parameters.overlapRatio = 0.3;
 
     aliceVision::segmentation::Segmentation seg(parameters);
-    const auto & classes = seg.getClasses();
 
+    const auto & classes = seg.getClasses();
 
+    
     //Compute the set of valid classes given parameters
     std::set<IndexT> validClassesIndices;
     for (const auto & s : validClasses)
@@ -149,6 +150,7 @@ int aliceVision_main(int argc, char** argv)
     for (const auto & pv : sfmData.getViews())
     {
         std::string path = pv.second->getImagePath();
+        ALICEVISION_LOG_INFO("processing " << path);
 
         image::Image<image::RGBfColor> image;
         image::readImage(path, image, image::EImageColorSpace::NO_CONVERSION);

From f05a70accc0c29383b0bdb45b0d8494f1ade33d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Vital?= <mugulmotion@gmail.com>
Date: Wed, 21 Jun 2023 17:53:54 +0200
Subject: [PATCH 6/9] [image] add missing copyright

---
 src/aliceVision/image/imageAlgo.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/aliceVision/image/imageAlgo.cpp b/src/aliceVision/image/imageAlgo.cpp
index f6ca2eb929..2bdc5ef62a 100644
--- a/src/aliceVision/image/imageAlgo.cpp
+++ b/src/aliceVision/image/imageAlgo.cpp
@@ -1,3 +1,9 @@
+// This file is part of the AliceVision project.
+// Copyright (c) 2019 AliceVision contributors.
+// This Source Code Form is subject to the terms of the Mozilla Public License,
+// v. 2.0. If a copy of the MPL was not distributed with this file,
+// You can obtain one at https://mozilla.org/MPL/2.0/.
+
 #include "imageAlgo.hpp"
 
 #include <aliceVision/image/Image.hpp>

From b919f217a5ae2faca7339437b566118f7aa2d158 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Vital?= <mugulmotion@gmail.com>
Date: Wed, 21 Jun 2023 17:55:43 +0200
Subject: [PATCH 7/9] [software] imageSegmentation: minor cosmetic changes

---
 src/software/pipeline/CMakeLists.txt             |  2 +-
 src/software/pipeline/main_imageSegmentation.cpp | 16 ++++++++++++----
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/software/pipeline/CMakeLists.txt b/src/software/pipeline/CMakeLists.txt
index e43a4f5073..4e8cf0012a 100644
--- a/src/software/pipeline/CMakeLists.txt
+++ b/src/software/pipeline/CMakeLists.txt
@@ -622,7 +622,7 @@ if(ALICEVISION_HAVE_OPENCV AND ALICEVISION_HAVE_ONNX)
 endif()
 
 if(ALICEVISION_HAVE_ONNX)
-    # SphereDetection
+    # Image Segmentation
     alicevision_add_software(aliceVision_imageSegmentation
     SOURCE main_imageSegmentation.cpp
     FOLDER ${FOLDER_SOFTWARE_PIPELINE}
diff --git a/src/software/pipeline/main_imageSegmentation.cpp b/src/software/pipeline/main_imageSegmentation.cpp
index 9be1612d7c..85e1e5cf4a 100644
--- a/src/software/pipeline/main_imageSegmentation.cpp
+++ b/src/software/pipeline/main_imageSegmentation.cpp
@@ -70,7 +70,7 @@ void labelsToMask(image::Image<unsigned char> & mask, const image::Image<IndexT>
         for (int j = 0; j < mask.Width(); j++)
         {
             IndexT label = labels(i, j);
-            mask(i, j) = (validClasses.find(label) != validClasses.end())?255:0;
+            mask(i, j) = (validClasses.find(label) != validClasses.end()) ? 255 : 0;
         }
     }
 }
@@ -113,9 +113,17 @@ int aliceVision_main(int argc, char** argv)
     aliceVision::segmentation::Segmentation::Parameters parameters;
 
     parameters.modelWeights = modelWeightsPath;
-    parameters.classes = {"__background__", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus",
-                                    "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike",
-                                    "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};
+    parameters.classes = {
+        "__background__",
+        "aeroplane",
+        "bicycle", "bird", "boat", "bottle", "bus",
+        "car", "cat", "chair", "cow",
+        "diningtable", "dog",
+        "horse",
+        "motorbike",
+        "person", "pottedplant",
+        "sheep", "sofa",
+        "train", "tvmonitor"};
     parameters.center= {0.485, 0.456, 0.406};
     parameters.scale= {1.0 / 0.229, 1.0 / 0.224, 1.0 / 0.225};
     parameters.modelWidth = 1280;

From 6059eb506dd1f34be750adcefc5419db81910239 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Vital?= <mugulmotion@gmail.com>
Date: Wed, 21 Jun 2023 18:11:12 +0200
Subject: [PATCH 8/9] [software] featureExtraction: add maskExtension and
 maskInvert params

---
 src/aliceVision/feature/FeatureExtractor.cpp     |  6 +++---
 src/aliceVision/feature/FeatureExtractor.hpp     |  6 +++++-
 src/software/pipeline/main_featureExtraction.cpp | 10 ++++++++--
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/aliceVision/feature/FeatureExtractor.cpp b/src/aliceVision/feature/FeatureExtractor.cpp
index a54288f0a4..6de3607ec9 100644
--- a/src/aliceVision/feature/FeatureExtractor.cpp
+++ b/src/aliceVision/feature/FeatureExtractor.cpp
@@ -182,9 +182,9 @@ void FeatureExtractor::computeViewJob(const FeatureExtractorViewJob& job, bool u
     {
         const auto masksFolder = fs::path(_masksFolder);
         const auto idMaskPath = masksFolder /
-                fs::path(std::to_string(job.view().getViewId())).replace_extension("png");
+                fs::path(std::to_string(job.view().getViewId())).replace_extension(_maskExtension);
         const auto nameMaskPath = masksFolder /
-                fs::path(job.view().getImagePath()).filename().replace_extension("png");
+                fs::path(job.view().getImagePath()).filename().replace_extension(_maskExtension);
 
         if (fs::exists(idMaskPath))
         {
@@ -233,7 +233,7 @@ void FeatureExtractor::computeViewJob(const FeatureExtractorViewJob& job, bool u
                 bool masked = false;
                 if (x < mask.Width() && y < mask.Height())
                 {
-                    if (mask(y, x) == 0)
+                    if ((mask(y, x) == 0 && !_maskInvert) || (mask(y, x) != 0 && _maskInvert))
                     {
                         masked = true;
                     }
diff --git a/src/aliceVision/feature/FeatureExtractor.hpp b/src/aliceVision/feature/FeatureExtractor.hpp
index 182c80e9dc..c124ad8bcc 100644
--- a/src/aliceVision/feature/FeatureExtractor.hpp
+++ b/src/aliceVision/feature/FeatureExtractor.hpp
@@ -79,9 +79,11 @@ class FeatureExtractor
       _rangeSize = rangeSize;
     }
 
-    void setMasksFolder(const std::string& folder)
+    void setMasksFolder(const std::string& folder, const std::string& ext, bool invert)
     {
       _masksFolder = folder;
+      _maskExtension = ext;
+      _maskInvert = invert;
     }
 
     void setOutputFolder(const std::string& folder)
@@ -103,6 +105,8 @@ class FeatureExtractor
     const sfmData::SfMData& _sfmData;
     std::vector<std::shared_ptr<feature::ImageDescriber>> _imageDescribers;
     std::string _masksFolder;
+    std::string _maskExtension;
+    bool _maskInvert;
     std::string _outputFolder;
     int _rangeStart = -1;
     int _rangeSize = -1;
diff --git a/src/software/pipeline/main_featureExtraction.cpp b/src/software/pipeline/main_featureExtraction.cpp
index b06cdd7274..662817d7cd 100644
--- a/src/software/pipeline/main_featureExtraction.cpp
+++ b/src/software/pipeline/main_featureExtraction.cpp
@@ -32,7 +32,7 @@
 // These constants define the current software version.
 // They must be updated when the command line is changed.
 #define ALICEVISION_SOFTWARE_VERSION_MAJOR 1
-#define ALICEVISION_SOFTWARE_VERSION_MINOR 1
+#define ALICEVISION_SOFTWARE_VERSION_MINOR 2
 
 using namespace aliceVision;
 
@@ -57,6 +57,8 @@ int aliceVision_main(int argc, char **argv)
     int maxThreads = 0;
     bool forceCpuExtraction = false;
     image::EImageColorSpace workingColorSpace = image::EImageColorSpace::SRGB;
+    std::string maskExtension = "png";
+    bool maskInvert = false;
 
     po::options_description requiredParams("Required parameters");
     requiredParams.add_options()
@@ -88,6 +90,10 @@ int aliceVision_main(int argc, char **argv)
          "Use only CPU feature extraction methods.")
         ("masksFolder", po::value<std::string>(&masksFolder),
          "Masks folder.")
+        ("maskExtension", po::value<std::string>(&maskExtension)->default_value(maskExtension),
+         "File extension for masks.")
+        ("maskInvert", po::value<bool>(&maskInvert)->default_value(maskInvert),
+         "Invert mask values.")
         ("rangeStart", po::value<int>(&rangeStart)->default_value(rangeStart),
          "Range image index start.")
         ("rangeSize", po::value<int>(&rangeSize)->default_value(rangeSize),
@@ -135,7 +141,7 @@ int aliceVision_main(int argc, char **argv)
 
     // create feature extractor
     feature::FeatureExtractor extractor(sfmData);
-    extractor.setMasksFolder(masksFolder);
+    extractor.setMasksFolder(masksFolder, maskExtension, maskInvert);
     extractor.setOutputFolder(outputFolder);
 
     // set maxThreads

From 935f7b23e747ddcff570937bd22e665826f08207 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Vital?= <mugulmotion@gmail.com>
Date: Thu, 22 Jun 2023 00:14:01 -0700
Subject: [PATCH 9/9] [software] imageSegmentation: add range arguments

---
 .../pipeline/main_imageSegmentation.cpp       | 51 +++++++++++++++++--
 1 file changed, 46 insertions(+), 5 deletions(-)

diff --git a/src/software/pipeline/main_imageSegmentation.cpp b/src/software/pipeline/main_imageSegmentation.cpp
index 85e1e5cf4a..884da12a84 100644
--- a/src/software/pipeline/main_imageSegmentation.cpp
+++ b/src/software/pipeline/main_imageSegmentation.cpp
@@ -24,7 +24,8 @@
 // IO
 #include <fstream>
 #include <algorithm>
-#include <boost/algorithm/string.hpp>    
+#include <memory>
+#include <string>
 
 
 #include <aliceVision/segmentation/segmentation.hpp>
@@ -81,6 +82,8 @@ int aliceVision_main(int argc, char** argv)
     std::string outputPath;
     std::string modelWeightsPath;
     std::vector<std::string> validClasses;
+    int rangeStart = -1;
+    int rangeSize = 1;
     
     // Description of mandatory parameters
     po::options_description requiredParams("Required parameters");
@@ -92,7 +95,11 @@ int aliceVision_main(int argc, char** argv)
     po::options_description optionalParams("Optional parameters");
     optionalParams.add_options()
         ("validClasses,c", po::value<std::vector<std::string>>(&validClasses)->multitoken(),
-         "Names of classes which are to be considered");
+         "Names of classes which are to be considered")
+        ("rangeStart", po::value<int>(&rangeStart)->default_value(rangeStart), 
+        "Range start for processing views (ordered by image filepath). Set to -1 to process all images.")
+        ("rangeSize", po::value<int>(&rangeSize)->default_value(rangeSize), 
+        "Range size for processing views (ordered by image filepath).");
 
     CmdLine cmdline("AliceVision imageSegmentation");
     cmdline.add(requiredParams);
@@ -110,6 +117,40 @@ int aliceVision_main(int argc, char** argv)
         return EXIT_FAILURE;
     }
 
+    // Order views by their image names
+    std::vector<std::shared_ptr<sfmData::View>> viewsOrderedByName;
+    for(auto& viewIt : sfmData.getViews())
+    {
+        viewsOrderedByName.push_back(viewIt.second);
+    }
+    std::sort(viewsOrderedByName.begin(), viewsOrderedByName.end(),
+              [](const std::shared_ptr<sfmData::View>& a, const std::shared_ptr<sfmData::View>& b) -> bool
+              {
+                  if(a == nullptr || b == nullptr)
+                      return true;
+                  return (a->getImagePath() < b->getImagePath());
+              });
+
+    // Define range to compute
+    if(rangeStart != -1)
+    {
+        if(rangeStart < 0 || rangeSize < 0 || static_cast<std::size_t>(rangeStart) > viewsOrderedByName.size())
+        {
+            ALICEVISION_LOG_ERROR("Range is incorrect");
+            return EXIT_FAILURE;
+        }
+
+        if(static_cast<std::size_t>(rangeStart + rangeSize) > viewsOrderedByName.size())
+        {
+            rangeSize = static_cast<int>(viewsOrderedByName.size()) - rangeStart;
+        }
+    }
+    else
+    {
+        rangeStart = 0;
+        rangeSize = static_cast<int>(viewsOrderedByName.size());
+    }
+
     aliceVision::segmentation::Segmentation::Parameters parameters;
 
     parameters.modelWeights = modelWeightsPath;
@@ -155,9 +196,9 @@ int aliceVision_main(int argc, char** argv)
         }
     }
 
-    for (const auto & pv : sfmData.getViews())
+    for (const auto & view : viewsOrderedByName)
     {
-        std::string path = pv.second->getImagePath();
+        std::string path = view->getImagePath();
         ALICEVISION_LOG_INFO("processing " << path);
 
         image::Image<image::RGBfColor> image;
@@ -175,7 +216,7 @@ int aliceVision_main(int argc, char** argv)
 
         //Store image
         std::stringstream ss;
-        ss << outputPath << "/" << pv.first << ".exr";
+        ss << outputPath << "/" << view->getViewId() << ".exr";
         image::writeImage(ss.str(), mask, image::ImageWriteOptions());
     }