PaddlePaddle · jiangjiajun · Aug 5, 2022 · Jul 19, 2022 · Jul 19, 2022 · Jul 20, 2022
diff --git a/csrcs/fastdeploy/vision/common/processors/resize.h b/csrcs/fastdeploy/vision/common/processors/resize.h
@@ -41,6 +41,16 @@ class Resize : public Processor {
                   float scale_h = -1.0, int interp = 1, bool use_scale = false,
                   ProcLib lib = ProcLib::OPENCV_CPU);
 
+  bool SetWidthAndHeight(int width, int height) {
+    width_ = width;
+    height_ = height;
+    return true;
+  }
+
+  std::tuple<int, int> GetWidthAndHeight() {
+    return std::make_tuple(width_, height_);
+  }
+
  private:
   int width_;
   int height_;
@@ -49,5 +59,5 @@ class Resize : public Processor {
   int interp_ = 1;
   bool use_scale_ = false;
 };
-} // namespace vision
-} // namespace fastdeploy
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/result.cc b/csrcs/fastdeploy/vision/common/result.cc
@@ -140,11 +140,24 @@ std::string FaceDetectionResult::Str() {
 }
 
 void SegmentationResult::Clear() {
-  std::vector<std::vector<int64_t>>().swap(masks);
+  std::vector<uint8_t>().swap(label_map);
+  std::vector<float>().swap(score_map);
+  std::vector<int64_t>().swap(shape);
+  contain_score_map = false;
 }
 
-void SegmentationResult::Resize(int64_t height, int64_t width) {
-  masks.resize(height, std::vector<int64_t>(width));
+void SegmentationResult::Reserve(int size) {
+  label_map.reserve(size);
+  if (contain_score_map > 0) {
+    score_map.reserve(size);
+  }
+}
+
+void SegmentationResult::Resize(int size) {
+  label_map.resize(size);
+  if (contain_score_map) {
+    score_map.resize(size);
+  }
 }
 
 std::string SegmentationResult::Str() {
@@ -153,11 +166,24 @@ std::string SegmentationResult::Str() {
   for (size_t i = 0; i < 10; ++i) {
     out += "[";
     for (size_t j = 0; j < 10; ++j) {
-      out = out + std::to_string(masks[i][j]) + ", ";
+      out = out + std::to_string(label_map[i * 10 + j]) + ", ";
     }
     out += ".....]\n";
   }
   out += "...........\n";
+  if (contain_score_map) {
+    out += "SegmentationResult Score map 10 rows x 10 cols: \n";
+    for (size_t i = 0; i < 10; ++i) {
+      out += "[";
+      for (size_t j = 0; j < 10; ++j) {
+        out = out + std::to_string(score_map[i * 10 + j]) + ", ";
+      }
+      out += ".....]\n";
+    }
+    out += "...........\n";
+  }
+  out += "result shape is: [" + std::to_string(shape[0]) + " " +
+         std::to_string(shape[1]) + "]";
   return out;
 }
 

diff --git a/csrcs/fastdeploy/vision/common/result.h b/csrcs/fastdeploy/vision/common/result.h
@@ -84,13 +84,18 @@ struct FASTDEPLOY_DECL FaceDetectionResult : public BaseResult {
 
 struct FASTDEPLOY_DECL SegmentationResult : public BaseResult {
   // mask
-  std::vector<std::vector<int64_t>> masks;
+  std::vector<uint8_t> label_map;
+  std::vector<float> score_map;
+  std::vector<int64_t> shape;
+  bool contain_score_map = false;
 
   ResultType type = ResultType::SEGMENTATION;
 
   void Clear();
 
-  void Resize(int64_t height, int64_t width);
+  void Reserve(int size);
+
+  void Resize(int size);
 
   std::string Str();
 };

diff --git a/csrcs/fastdeploy/vision/ppseg/model.cc b/csrcs/fastdeploy/vision/ppseg/model.cc
@@ -11,8 +11,8 @@ Model::Model(const std::string& model_file, const std::string& params_file,
              const std::string& config_file, const RuntimeOption& custom_option,
              const Frontend& model_format) {
   config_file_ = config_file;
-  valid_cpu_backends = {Backend::ORT, Backend::PDINFER};
-  valid_gpu_backends = {Backend::ORT, Backend::PDINFER};
+  valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
+  valid_gpu_backends = {Backend::PDINFER, Backend::ORT};
   runtime_option = custom_option;
   runtime_option.model_format = model_format;
   runtime_option.model_file = model_file;
@@ -65,6 +65,7 @@ bool Model::BuildPreprocessPipelineFromConfig() {
         const auto& target_size = op["target_size"];
         int resize_width = target_size[0].as<int>();
         int resize_height = target_size[1].as<int>();
+        is_resized = true;
         processors_.push_back(
             std::make_shared<Resize>(resize_width, resize_height));
       }
@@ -74,49 +75,140 @@ bool Model::BuildPreprocessPipelineFromConfig() {
   return true;
 }
 
-bool Model::Preprocess(Mat* mat, FDTensor* output) {
+bool Model::Preprocess(Mat* mat, FDTensor* output,
+                       std::map<std::string, std::array<int, 2>>* im_info) {
   for (size_t i = 0; i < processors_.size(); ++i) {
+    if (processors_[i]->Name().compare("Resize") == 0) {
+      auto processor = dynamic_cast<Resize*>(processors_[i].get());
+      int resize_width = -1;
+      int resize_height = -1;
+      std::tie(resize_width, resize_height) = processor->GetWidthAndHeight();
+      if (is_vertical_screen && (resize_width > resize_height)) {
+        if (processor->SetWidthAndHeight(resize_height, resize_width)) {
+          FDERROR << "Failed to set Resize processor width and height "
+                  << processors_[i]->Name() << "." << std::endl;
+        }
+      }
+    }
     if (!(*(processors_[i].get()))(mat)) {
       FDERROR << "Failed to process image data in " << processors_[i]->Name()
               << "." << std::endl;
       return false;
     }
   }
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<int>(mat->Height()),
+                                static_cast<int>(mat->Width())};
+
   mat->ShareWithTensor(output);
   output->shape.insert(output->shape.begin(), 1);
   output->name = InputInfoOfRuntime(0).name;
   return true;
 }
 
-bool Model::Postprocess(const FDTensor& infer_result,
-                        SegmentationResult* result) {
-  FDASSERT(infer_result.dtype == FDDataType::INT64,
-           "Require the data type of output is int64, but now it's " +
-               Str(const_cast<fastdeploy::FDDataType&>(infer_result.dtype)) +
-               ".");
+bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
+                        std::map<std::string, std::array<int, 2>>* im_info) {
+  // PaddleSeg has three types of inference output:
+  //     1. output with argmax and without softmax. 3-D matrix CHW, Channel
+  //     always 1, the element in matrix is classified label_id INT64 Type.
+  //     2. output without argmax and without softmax. 4-D matrix NCHW, N always
+  //     1, Channel is the num of classes. The element is the logits of classes
+  //     FP32
+  //     3. output without argmax and with softmax. 4-D matrix NCHW, the result
+  //     of 2 with softmax layer
+  // Fastdeploy output:
+  //     1. label_map
+  //     2. score_map(optional)
+  //     3. shape: 2-D HW
+  FDASSERT(infer_result.dtype == FDDataType::INT64 ||
+               infer_result.dtype == FDDataType::FP32,
+           "Require the data type of output is int64 or fp32, but now it's " +
+               Str(infer_result.dtype) + ".");
   result->Clear();
-  std::vector<int64_t> output_shape = infer_result.shape;
-  int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
-                                std::multiplies<int>());
-  const int64_t* infer_result_buffer =
-      reinterpret_cast<const int64_t*>(infer_result.data.data());
-  int64_t height = output_shape[1];
-  int64_t width = output_shape[2];
-  result->Resize(height, width);
-  for (int64_t i = 0; i < height; i++) {
-    int64_t begin = i * width;
-    int64_t end = (i + 1) * width - 1;
-    std::copy(infer_result_buffer + begin, infer_result_buffer + end,
-              result->masks[i].begin());
+
+  if (infer_result.shape.size() == 4) {
+    FDASSERT(infer_result.shape[0] == 1, "Only support batch size = 1.");
+    // output without argmax
+    result->contain_score_map = true;
+    utils::NCHW2NHWC<float_t>(infer_result);
   }
 
+  // for resize mat below
+  FDTensor new_infer_result;
+  Mat* mat = nullptr;
+  if (is_resized) {
+    cv::Mat temp_mat;
+    utils::FDTensor2FP32CVMat(temp_mat, infer_result,
+                              result->contain_score_map);
+
+    // original image shape
+    auto iter_ipt = (*im_info).find("input_shape");
+    FDASSERT(iter_ipt != im_info->end(),
+             "Cannot find input_shape from im_info.");
+    int ipt_h = iter_ipt->second[0];
+    int ipt_w = iter_ipt->second[1];
+
+    mat = new Mat(temp_mat);
+
+    Resize::Run(mat, ipt_w, ipt_h, -1, -1, 1);
+    mat->ShareWithTensor(&new_infer_result);
+    new_infer_result.shape.insert(new_infer_result.shape.begin(), 1);
+    result->shape = new_infer_result.shape;
+  } else {
+    result->shape = infer_result.shape;
+  }
+  int out_num =
+      std::accumulate(result->shape.begin(), result->shape.begin() + 3, 1,
+                      std::multiplies<int>());
+  // NCHW remove N or CHW remove C
+  result->shape.erase(result->shape.begin());
+  result->Resize(out_num);
+  if (result->contain_score_map) {
+    // output with label_map and score_map
+    float_t* infer_result_buffer = nullptr;
+    if (is_resized) {
+      infer_result_buffer = static_cast<float_t*>(new_infer_result.Data());
+    } else {
+      infer_result_buffer = static_cast<float_t*>(infer_result.Data());
+    }
+    // argmax
+    utils::ArgmaxScoreMap(infer_result_buffer, result, with_softmax);
+    result->shape.erase(result->shape.begin() + 2);
+  } else {
+    // output only with label_map
+    if (is_resized) {
+      float_t* infer_result_buffer =
+          static_cast<float_t*>(new_infer_result.Data());
+      for (int i = 0; i < out_num; i++) {
+        result->label_map[i] = static_cast<uint8_t>(*(infer_result_buffer + i));
+      }
+    } else {
+      const int64_t* infer_result_buffer =
+          reinterpret_cast<const int64_t*>(infer_result.Data());
+      for (int i = 0; i < out_num; i++) {
+        result->label_map[i] = static_cast<uint8_t>(*(infer_result_buffer + i));
+      }
+    }
+  }
+  delete mat;
+  mat = nullptr;
   return true;
 }
 
 bool Model::Predict(cv::Mat* im, SegmentationResult* result) {
   Mat mat(*im);
   std::vector<FDTensor> processed_data(1);
-  if (!Preprocess(&mat, &(processed_data[0]))) {
+
+  std::map<std::string, std::array<int, 2>> im_info;
+
+  // Record the shape of image and the shape of preprocessed image
+  im_info["input_shape"] = {static_cast<int>(mat.Height()),
+                            static_cast<int>(mat.Width())};
+  im_info["output_shape"] = {static_cast<int>(mat.Height()),
+                             static_cast<int>(mat.Width())};
+
+  if (!Preprocess(&mat, &(processed_data[0]), &im_info)) {
     FDERROR << "Failed to preprocess input data while using model:"
             << ModelName() << "." << std::endl;
     return false;
@@ -127,7 +219,7 @@ bool Model::Predict(cv::Mat* im, SegmentationResult* result) {
             << std::endl;
     return false;
   }
-  if (!Postprocess(infer_result[0], result)) {
+  if (!Postprocess(infer_result[0], result, &im_info)) {
     FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
             << std::endl;
     return false;

diff --git a/csrcs/fastdeploy/vision/ppseg/model.h b/csrcs/fastdeploy/vision/ppseg/model.h
@@ -18,14 +18,22 @@ class FASTDEPLOY_DECL Model : public FastDeployModel {
 
   virtual bool Predict(cv::Mat* im, SegmentationResult* result);
 
+  bool with_softmax = false;
+
+  bool is_vertical_screen = false;
+
  private:
   bool Initialize();
 
   bool BuildPreprocessPipelineFromConfig();
 
-  bool Preprocess(Mat* mat, FDTensor* outputs);
+  bool Preprocess(Mat* mat, FDTensor* outputs,
+                  std::map<std::string, std::array<int, 2>>* im_info);
+
+  bool Postprocess(FDTensor& infer_result, SegmentationResult* result,
+                   std::map<std::string, std::array<int, 2>>* im_info);
 
-  bool Postprocess(const FDTensor& infer_result, SegmentationResult* result);
+  bool is_resized = false;
 
   std::vector<std::shared_ptr<Processor>> processors_;
   std::string config_file_;

diff --git a/csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc b/csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc
@@ -20,11 +20,16 @@ void BindPPSeg(pybind11::module& m) {
   pybind11::class_<vision::ppseg::Model, FastDeployModel>(ppseg_module, "Model")
       .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
                           Frontend>())
-      .def("predict", [](vision::ppseg::Model& self, pybind11::array& data) {
-        auto mat = PyArrayToCvMat(data);
-        vision::SegmentationResult res;
-        self.Predict(&mat, &res);
-        return res;
-      });
+      .def("predict",
+           [](vision::ppseg::Model& self, pybind11::array& data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::SegmentationResult* res = new vision::SegmentationResult();
+             // self.Predict(&mat, &res);
+             self.Predict(&mat, res);
+             return res;
+           })
+      .def_readwrite("with_softmax", &vision::ppseg::Model::with_softmax)
+      .def_readwrite("is_vertical_screen",
+                     &vision::ppseg::Model::is_vertical_screen);
 }
 }  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/utils/FDTensor2CVMat.cc b/csrcs/fastdeploy/vision/utils/FDTensor2CVMat.cc
@@ -0,0 +1,59 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision/utils/utils.h"
+
+namespace fastdeploy {
+namespace vision {
+namespace utils {
+
+void FDTensor2FP32CVMat(cv::Mat& mat, FDTensor& infer_result,
+                        bool contain_score_map) {
+  // output with argmax channel is 1
+  int channel = 1;
+  int height = infer_result.shape[1];
+  int width = infer_result.shape[2];
+
+  if (contain_score_map) {
+    // output without argmax and convent to NHWC
+    channel = infer_result.shape[3];
+  }
+  // create FP32 cvmat
+  if (infer_result.dtype == FDDataType::INT64) {
+    FDWARNING << "The PaddleSeg model is exported with argmax. Inference "
+                 "result type is " +
+                     Str(infer_result.dtype) +
+                     ". If you want the edge of segmentation image more "
+                     "smoother. Please export model with --without_argmax "
+                     "--with_softmax."
+              << std::endl;
+    int64_t chw = channel * height * width;
+    int64_t* infer_result_buffer = static_cast<int64_t*>(infer_result.Data());
+    std::vector<float_t> float_result_buffer(chw);
+    mat = cv::Mat(height, width, CV_32FC(channel));
+    int index = 0;
+    for (int i = 0; i < height; i++) {
+      for (int j = 0; j < width; j++) {
+        mat.at<float_t>(i, j) =
+            static_cast<float_t>(infer_result_buffer[index++]);
+      }
+    }
+  } else if (infer_result.dtype == FDDataType::FP32) {
+    mat = cv::Mat(height, width, CV_32FC(channel), infer_result.Data());
+  }
+}
+
+}  // namespace utils
+}  // namespace vision
+}  // namespace fastdeploy