From e660031d33fbee298ac719defbe98dc708e7a699 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Tue, 19 Jul 2022 11:58:25 +0000
Subject: [PATCH 01/18] Support new model PaddleSeg

---
 fastdeploy/vision.h                     |   5 +-
 fastdeploy/vision/__init__.py           |   1 +
 fastdeploy/vision/common/result.cc      |  20 ++++
 fastdeploy/vision/common/result.h       |  13 +++
 fastdeploy/vision/ppseg/__init__.py     |  37 +++++++
 fastdeploy/vision/ppseg/model.cc        | 133 ++++++++++++++++++++++++
 fastdeploy/vision/ppseg/model.h         |  35 +++++++
 fastdeploy/vision/ppseg/ppseg_pybind.cc |  30 ++++++
 fastdeploy/vision/vision_pybind.cc      |  13 +++
 9 files changed, 285 insertions(+), 2 deletions(-)
 create mode 100644 fastdeploy/vision/ppseg/__init__.py
 create mode 100644 fastdeploy/vision/ppseg/model.cc
 create mode 100644 fastdeploy/vision/ppseg/model.h
 create mode 100644 fastdeploy/vision/ppseg/ppseg_pybind.cc

diff --git a/fastdeploy/vision.h b/fastdeploy/vision.h
index cafe310c70..de5dfd1583 100644
--- a/fastdeploy/vision.h
+++ b/fastdeploy/vision.h
@@ -15,12 +15,13 @@
 
 #include "fastdeploy/core/config.h"
 #ifdef ENABLE_VISION
+#include "fastdeploy/vision/megvii/yolox.h"
+#include "fastdeploy/vision/meituan/yolov6.h"
 #include "fastdeploy/vision/ppcls/model.h"
 #include "fastdeploy/vision/ppdet/ppyoloe.h"
+#include "fastdeploy/vision/ppseg/model.h"
 #include "fastdeploy/vision/ultralytics/yolov5.h"
 #include "fastdeploy/vision/wongkinyiu/yolov7.h"
-#include "fastdeploy/vision/meituan/yolov6.h"
-#include "fastdeploy/vision/megvii/yolox.h"
 #endif
 
 #include "fastdeploy/vision/visualize/visualize.h"
diff --git a/fastdeploy/vision/__init__.py b/fastdeploy/vision/__init__.py
index 6acbf0c376..08b0d68124 100644
--- a/fastdeploy/vision/__init__.py
+++ b/fastdeploy/vision/__init__.py
@@ -16,6 +16,7 @@
 from . import evaluation
 from . import ppcls
 from . import ppdet
+from . import ppseg
 from . import ultralytics
 from . import meituan
 from . import megvii
diff --git a/fastdeploy/vision/common/result.cc b/fastdeploy/vision/common/result.cc
index ece0973c0c..2ebd7014ce 100644
--- a/fastdeploy/vision/common/result.cc
+++ b/fastdeploy/vision/common/result.cc
@@ -72,5 +72,25 @@ std::string DetectionResult::Str() {
   return out;
 }
 
+void SegmentationResult::Clear() {
+  std::vector<std::vector<int64_t>>().swap(masks);
+}
+
+void SegmentationResult::Resize(int64_t height, int64_t width) {
+  masks.resize(height, std::vector<int64_t>(width));
+}
+
+std::string SegmentationResult::Str() {
+  std::string out;
+  out = "SegmentationResult(\nImage masks: ";
+  for (size_t i = 0; i < masks.size(); ++i) {
+    for (size_t j = 0; j < masks[0].size(); ++j) {
+      out = out + std::to_string(masks[i][j]) + ", ";
+    }
+  }
+  out += "\n)";
+  return out;
+}
+
 }  // namespace vision
 }  // namespace fastdeploy
diff --git a/fastdeploy/vision/common/result.h b/fastdeploy/vision/common/result.h
index 22227a26cb..7ff104250f 100644
--- a/fastdeploy/vision/common/result.h
+++ b/fastdeploy/vision/common/result.h
@@ -56,5 +56,18 @@ struct FASTDEPLOY_DECL DetectionResult : public BaseResult {
   std::string Str();
 };
 
+struct FASTDEPLOY_DECL SegmentationResult : public BaseResult {
+  // mask
+  std::vector<std::vector<int64_t>> masks;
+
+  ResultType type = ResultType::SEGMENTATION;
+
+  void Clear();
+
+  void Resize(int64_t height, int64_t width);
+
+  std::string Str();
+};
+
 }  // namespace vision
 }  // namespace fastdeploy
diff --git a/fastdeploy/vision/ppseg/__init__.py b/fastdeploy/vision/ppseg/__init__.py
new file mode 100644
index 0000000000..b580c01455
--- /dev/null
+++ b/fastdeploy/vision/ppseg/__init__.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from ... import FastDeployModel, Frontend
+from ... import fastdeploy_main as C
+
+
+class Model(FastDeployModel):
+    def __init__(self,
+                 model_file,
+                 params_file,
+                 config_file,
+                 backend_option=None,
+                 model_format=Frontend.PADDLE):
+        super(Model, self).__init__(backend_option)
+
+        assert model_format == Frontend.PADDLE, "PaddleSeg only support model format of Frontend.Paddle now."
+        self._model = C.vision.ppseg.Model(model_file, params_file,
+                                           config_file, self._runtime_option,
+                                           model_format)
+        assert self.initialized, "PaddleSeg model initialize failed."
+
+    def predict(self, input_image):
+        return self._model.predict(input_image)
diff --git a/fastdeploy/vision/ppseg/model.cc b/fastdeploy/vision/ppseg/model.cc
new file mode 100644
index 0000000000..508f4c36cd
--- /dev/null
+++ b/fastdeploy/vision/ppseg/model.cc
@@ -0,0 +1,133 @@
+#include "fastdeploy/vision/ppseg/model.h"
+#include "fastdeploy/vision.h"
+#include "fastdeploy/vision/utils/utils.h"
+#include "yaml-cpp/yaml.h"
+
+namespace fastdeploy {
+namespace vision {
+namespace ppseg {
+
+Model::Model(const std::string& model_file, const std::string& params_file,
+             const std::string& config_file, const RuntimeOption& custom_option,
+             const Frontend& model_format) {
+  config_file_ = config_file;
+  valid_cpu_backends = {Backend::ORT, Backend::PDINFER};
+  valid_gpu_backends = {Backend::ORT, Backend::PDINFER};
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool Model::Initialize() {
+  if (!BuildPreprocessPipelineFromConfig()) {
+    FDERROR << "Failed to build preprocess pipeline from configuration file."
+            << std::endl;
+    return false;
+  }
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool Model::BuildPreprocessPipelineFromConfig() {
+  processors_.clear();
+  YAML::Node cfg;
+  processors_.push_back(std::make_shared<BGR2RGB>());
+  try {
+    cfg = YAML::LoadFile(config_file_);
+  } catch (YAML::BadFile& e) {
+    FDERROR << "Failed to load yaml file " << config_file_
+            << ", maybe you should check this file." << std::endl;
+    return false;
+  }
+
+  if (cfg["Deploy"]["transforms"]) {
+    auto preprocess_cfg = cfg["Deploy"]["transforms"];
+    for (const auto& op : preprocess_cfg) {
+      FDASSERT(op.IsMap(),
+               "Require the transform information in yaml be Map type.");
+      if (op["type"].as<std::string>() == "Normalize") {
+        std::vector<float> mean = {0.5, 0.5, 0.5};
+        std::vector<float> std = {0.5, 0.5, 0.5};
+        processors_.push_back(std::make_shared<Normalize>(mean, std));
+
+      } else if (op["type"].as<std::string>() == "Resize") {
+        const auto& target_size = op["target_size"];
+        int resize_width = target_size[0].as<int>();
+        int resize_height = target_size[1].as<int>();
+        processors_.push_back(
+            std::make_shared<Resize>(resize_width, resize_height));
+      }
+    }
+    processors_.push_back(std::make_shared<HWC2CHW>());
+  }
+  return true;
+}
+
+bool Model::Preprocess(Mat* mat, FDTensor* output) {
+  for (size_t i = 0; i < processors_.size(); ++i) {
+    if (!(*(processors_[i].get()))(mat)) {
+      FDERROR << "Failed to process image data in " << processors_[i]->Name()
+              << "." << std::endl;
+      return false;
+    }
+  }
+  int channel = mat->Channels();
+  int width = mat->Width();
+  int height = mat->Height();
+  output->name = InputInfoOfRuntime(0).name;
+  output->SetExternalData({1, channel, height, width}, FDDataType::FP32,
+                          mat->GetCpuMat()->ptr());
+  return true;
+}
+
+bool Model::Postprocess(const FDTensor& infer_result,
+                        SegmentationResult* result) {
+  result->Clear();
+  std::vector<int64_t> output_shape = infer_result.shape;
+  int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
+                                std::multiplies<int>());
+  const int64_t* infer_result_buffer =
+      reinterpret_cast<const int64_t*>(infer_result.data.data());
+  int64_t height = output_shape[1];
+  int64_t width = output_shape[2];
+  result->Resize(height, width);
+  for (int64_t i = 0; i < height; i++) {
+    int64_t begin = i * width;
+    int64_t end = (i + 1) * width - 1;
+    std::copy(infer_result_buffer + begin, infer_result_buffer + end,
+              result->masks[i].begin());
+  }
+
+  return true;
+}
+
+bool Model::Predict(cv::Mat* im, SegmentationResult* result) {
+  Mat mat(*im);
+  std::vector<FDTensor> processed_data(1);
+  if (!Preprocess(&mat, &(processed_data[0]))) {
+    FDERROR << "Failed to preprocess input data while using model:"
+            << ModelName() << "." << std::endl;
+    return false;
+  }
+  std::vector<FDTensor> infer_result(1);
+  if (!Infer(processed_data, &infer_result)) {
+    FDERROR << "Failed to inference while using model:" << ModelName() << "."
+            << std::endl;
+    return false;
+  }
+  if (!Postprocess(infer_result[0], result)) {
+    FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+}  // namespace ppseg
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/ppseg/model.h b/fastdeploy/vision/ppseg/model.h
new file mode 100644
index 0000000000..c0ca5a70d0
--- /dev/null
+++ b/fastdeploy/vision/ppseg/model.h
@@ -0,0 +1,35 @@
+#pragma once
+#include "fastdeploy/fastdeploy_model.h"
+#include "fastdeploy/vision/common/processors/transform.h"
+#include "fastdeploy/vision/common/result.h"
+
+namespace fastdeploy {
+namespace vision {
+namespace ppseg {
+
+class FASTDEPLOY_DECL Model : public FastDeployModel {
+ public:
+  Model(const std::string& model_file, const std::string& params_file,
+        const std::string& config_file,
+        const RuntimeOption& custom_option = RuntimeOption(),
+        const Frontend& model_format = Frontend::PADDLE);
+
+  std::string ModelName() const { return "ppseg"; }
+
+  virtual bool Predict(cv::Mat* im, SegmentationResult* result);
+
+ private:
+  bool Initialize();
+
+  bool BuildPreprocessPipelineFromConfig();
+
+  bool Preprocess(Mat* mat, FDTensor* outputs);
+
+  bool Postprocess(const FDTensor& infer_result, SegmentationResult* result);
+
+  std::vector<std::shared_ptr<Processor>> processors_;
+  std::string config_file_;
+};
+}  // namespace ppseg
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/ppseg/ppseg_pybind.cc b/fastdeploy/vision/ppseg/ppseg_pybind.cc
new file mode 100644
index 0000000000..3bc0b1b136
--- /dev/null
+++ b/fastdeploy/vision/ppseg/ppseg_pybind.cc
@@ -0,0 +1,30 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "fastdeploy/pybind/main.h"
+
+namespace fastdeploy {
+void BindPpSegModel(pybind11::module& m) {
+  auto ppseg_module =
+      m.def_submodule("ppseg", "Module to deploy PaddleSegmentation.");
+  pybind11::class_<vision::ppseg::Model, FastDeployModel>(ppseg_module, "Model")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          Frontend>())
+      .def("predict", [](vision::ppseg::Model& self, pybind11::array& data) {
+        auto mat = PyArrayToCvMat(data);
+        vision::SegmentationResult res;
+        self.Predict(&mat, &res);
+        return res;
+      });
+}
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/vision_pybind.cc b/fastdeploy/vision/vision_pybind.cc
index 0334303ce6..1ca373d176 100644
--- a/fastdeploy/vision/vision_pybind.cc
+++ b/fastdeploy/vision/vision_pybind.cc
@@ -19,6 +19,8 @@ namespace fastdeploy {
 void BindPPCls(pybind11::module& m);
 void BindPPDet(pybind11::module& m);
 void BindWongkinyiu(pybind11::module& m);
+void BindPpClsModel(pybind11::module& m);
+void BindPpSegModel(pybind11::module& m);
 void BindUltralytics(pybind11::module& m);
 void BindMeituan(pybind11::module& m);
 void BindMegvii(pybind11::module& m);
@@ -42,8 +44,19 @@ void BindVision(pybind11::module& m) {
       .def("__repr__", &vision::DetectionResult::Str)
       .def("__str__", &vision::DetectionResult::Str);
 
+<<<<<<< Updated upstream
   BindPPCls(m);
   BindPPDet(m);
+=======
+  pybind11::class_<vision::SegmentationResult>(m, "SegmentationResult")
+      .def(pybind11::init())
+      .def_readwrite("masks", &vision::SegmentationResult::masks)
+      .def("__repr__", &vision::SegmentationResult::Str)
+      .def("__str__", &vision::SegmentationResult::Str);
+
+  BindPpClsModel(m);
+  BindPpSegModel(m);
+>>>>>>> Stashed changes
   BindUltralytics(m);
   BindWongkinyiu(m);
   BindMeituan(m);

From 9612e8cff72d0ac27e730f5a0a6d5722a0070a90 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Tue, 19 Jul 2022 12:05:52 +0000
Subject: [PATCH 02/18] Fix conflict

---
 fastdeploy/vision/vision_pybind.cc | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/fastdeploy/vision/vision_pybind.cc b/fastdeploy/vision/vision_pybind.cc
index 1ca373d176..14dd2896b3 100644
--- a/fastdeploy/vision/vision_pybind.cc
+++ b/fastdeploy/vision/vision_pybind.cc
@@ -44,19 +44,15 @@ void BindVision(pybind11::module& m) {
       .def("__repr__", &vision::DetectionResult::Str)
       .def("__str__", &vision::DetectionResult::Str);
 
-<<<<<<< Updated upstream
-  BindPPCls(m);
-  BindPPDet(m);
-=======
   pybind11::class_<vision::SegmentationResult>(m, "SegmentationResult")
       .def(pybind11::init())
       .def_readwrite("masks", &vision::SegmentationResult::masks)
       .def("__repr__", &vision::SegmentationResult::Str)
       .def("__str__", &vision::SegmentationResult::Str);
 
+  BindPPDet(m);
   BindPpClsModel(m);
   BindPpSegModel(m);
->>>>>>> Stashed changes
   BindUltralytics(m);
   BindWongkinyiu(m);
   BindMeituan(m);

From a5a2eed88277a9a4d5a8374d391787442c09091d Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Wed, 20 Jul 2022 09:27:33 +0000
Subject: [PATCH 03/18] PaddleSeg add visulization function

---
 examples/vision/ppseg_unet.cc                 | 59 +++++++++++++++++++
 fastdeploy/vision/common/result.cc            | 10 ++--
 fastdeploy/vision/ppseg/model.cc              |  6 ++
 fastdeploy/vision/visualize/__init__.py       |  4 ++
 fastdeploy/vision/visualize/segmentation.cc   | 46 +++++++++++++++
 fastdeploy/vision/visualize/visualize.h       |  7 ++-
 .../vision/visualize/visualize_pybind.cc      | 20 +++++--
 7 files changed, 140 insertions(+), 12 deletions(-)
 create mode 100644 examples/vision/ppseg_unet.cc
 create mode 100644 fastdeploy/vision/visualize/segmentation.cc

diff --git a/examples/vision/ppseg_unet.cc b/examples/vision/ppseg_unet.cc
new file mode 100644
index 0000000000..cb33611ad4
--- /dev/null
+++ b/examples/vision/ppseg_unet.cc
@@ -0,0 +1,59 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+#include "yaml-cpp/yaml.h"
+
+int main() {
+  namespace vis = fastdeploy::vision;
+
+  std::string model_file = "../resources/models/unet_Cityscapes/model.pdmodel";
+  std::string params_file =
+      "../resources/models/unet_Cityscapes/model.pdiparams";
+  std::string config_file = "../resources/models/unet_Cityscapes/deploy.yaml";
+  std::string img_path = "../resources/images/cityscapes_demo.png";
+  std::string vis_path = "../resources/outputs/vis.jpeg";
+
+  auto model = vis::ppseg::Model(model_file, params_file, config_file);
+  if (!model.Initialized()) {
+    std::cerr << "Init Failed." << std::endl;
+    return -1;
+  }
+
+  cv::Mat im = cv::imread(img_path);
+  cv::Mat vis_im;
+
+  vis::SegmentationResult res;
+  if (!model.Predict(&im, &res)) {
+    std::cerr << "Prediction Failed." << std::endl;
+    return -1;
+  } else {
+    std::cout << "Prediction Done!" << std::endl;
+  }
+
+  // 输出预测框结果
+  std::cout << res.Str() << std::endl;
+
+  YAML::Node cfg = YAML::LoadFile(config_file);
+  int num_classes = 19;
+  if (cfg["Deploy"]["num_classes"]) {
+    num_classes = cfg["Deploy"]["num_classes"].as<int>();
+  }
+
+  // 可视化预测结果
+  vis::Visualize::VisSegmentation(im, res, &vis_im, num_classes);
+  cv::imwrite(vis_path, vis_im);
+  std::cout << "Inference Done! Saved: " << vis_path << std::endl;
+  return 0;
+}
diff --git a/fastdeploy/vision/common/result.cc b/fastdeploy/vision/common/result.cc
index 2ebd7014ce..06a85ea454 100644
--- a/fastdeploy/vision/common/result.cc
+++ b/fastdeploy/vision/common/result.cc
@@ -82,13 +82,15 @@ void SegmentationResult::Resize(int64_t height, int64_t width) {
 
 std::string SegmentationResult::Str() {
   std::string out;
-  out = "SegmentationResult(\nImage masks: ";
-  for (size_t i = 0; i < masks.size(); ++i) {
-    for (size_t j = 0; j < masks[0].size(); ++j) {
+  out = "SegmentationResult Image masks 10 rows x 10 cols: \n";
+  for (size_t i = 0; i < 10; ++i) {
+    out += "[";
+    for (size_t j = 0; j < 10; ++j) {
       out = out + std::to_string(masks[i][j]) + ", ";
     }
+    out += ".....]\n";
   }
-  out += "\n)";
+  out += "...........\n";
   return out;
 }
 
diff --git a/fastdeploy/vision/ppseg/model.cc b/fastdeploy/vision/ppseg/model.cc
index 508f4c36cd..deae939b2c 100644
--- a/fastdeploy/vision/ppseg/model.cc
+++ b/fastdeploy/vision/ppseg/model.cc
@@ -53,6 +53,12 @@ bool Model::BuildPreprocessPipelineFromConfig() {
       if (op["type"].as<std::string>() == "Normalize") {
         std::vector<float> mean = {0.5, 0.5, 0.5};
         std::vector<float> std = {0.5, 0.5, 0.5};
+        if (op["mean"]) {
+          mean = op["mean"].as<std::vector<float>>();
+        }
+        if (op["std"]) {
+          std = op["std"].as<std::vector<float>>();
+        }
         processors_.push_back(std::make_shared<Normalize>(mean, std));
 
       } else if (op["type"].as<std::string>() == "Resize") {
diff --git a/fastdeploy/vision/visualize/__init__.py b/fastdeploy/vision/visualize/__init__.py
index 384ec2768f..a32746d165 100644
--- a/fastdeploy/vision/visualize/__init__.py
+++ b/fastdeploy/vision/visualize/__init__.py
@@ -19,3 +19,7 @@
 
 def vis_detection(im_data, det_result, line_size=1, font_size=0.5):
     C.vision.Visualize.vis_detection(im_data, det_result, line_size, font_size)
+
+
+def vis_segmentation(im_data, seg_result, vis_img, num_classes=19):
+    C.vision.Visualize.vis_segmentation(im, seg_result, vis_img, num_classes)
diff --git a/fastdeploy/vision/visualize/segmentation.cc b/fastdeploy/vision/visualize/segmentation.cc
new file mode 100644
index 0000000000..b1b142fc08
--- /dev/null
+++ b/fastdeploy/vision/visualize/segmentation.cc
@@ -0,0 +1,46 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifdef ENABLE_VISION_VISUALIZE
+
+#include "fastdeploy/vision/visualize/visualize.h"
+#include "opencv2/highgui.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+
+namespace fastdeploy {
+namespace vision {
+
+void Visualize::VisSegmentation(const cv::Mat& im,
+                                const SegmentationResult& result,
+                                cv::Mat* vis_img, const int& num_classes) {
+  auto color_map = GetColorMap(num_classes);
+  int64_t height = result.masks.size();
+  int64_t width = result.masks[1].size();
+  *vis_img = cv::Mat::zeros(height, width, CV_8UC3);
+
+  int64_t index = 0;
+  for (int i = 0; i < height; i++) {
+    for (int j = 0; j < width; j++) {
+      int category_id = static_cast<int>(result.masks[i][j]);
+      vis_img->at<cv::Vec3b>(i, j)[0] = color_map[3 * category_id + 0];
+      vis_img->at<cv::Vec3b>(i, j)[1] = color_map[3 * category_id + 1];
+      vis_img->at<cv::Vec3b>(i, j)[2] = color_map[3 * category_id + 2];
+    }
+  }
+  cv::addWeighted(im, .5, *vis_img, .5, 0, *vis_img);
+}
+
+}  // namespace vision
+}  // namespace fastdeploy
+#endif
diff --git a/fastdeploy/vision/visualize/visualize.h b/fastdeploy/vision/visualize/visualize.h
index 6fffa521a6..f78d4d0e6d 100644
--- a/fastdeploy/vision/visualize/visualize.h
+++ b/fastdeploy/vision/visualize/visualize.h
@@ -27,8 +27,11 @@ class FASTDEPLOY_DECL Visualize {
   static const std::vector<int>& GetColorMap(int num_classes = 1000);
   static void VisDetection(cv::Mat* im, const DetectionResult& result,
                            int line_size = 2, float font_size = 0.5f);
+  static void VisSegmentation(const cv::Mat& im,
+                              const SegmentationResult& result,
+                              cv::Mat* vis_img, const int& num_classes = 19);
 };
 
-} // namespace vision
-} // namespace fastdeploy
+}  // namespace vision
+}  // namespace fastdeploy
 #endif
diff --git a/fastdeploy/vision/visualize/visualize_pybind.cc b/fastdeploy/vision/visualize/visualize_pybind.cc
index 66ffc74f9f..7540da2a07 100644
--- a/fastdeploy/vision/visualize/visualize_pybind.cc
+++ b/fastdeploy/vision/visualize/visualize_pybind.cc
@@ -18,11 +18,19 @@ namespace fastdeploy {
 void BindVisualize(pybind11::module& m) {
   pybind11::class_<vision::Visualize>(m, "Visualize")
       .def(pybind11::init<>())
-      .def_static("vis_detection", [](pybind11::array& im_data,
-                                      vision::DetectionResult& result,
-                                      int line_size, float font_size) {
-        auto im = PyArrayToCvMat(im_data);
-        vision::Visualize::VisDetection(&im, result, line_size, font_size);
+      .def_static("vis_detection",
+                  [](pybind11::array& im_data, vision::DetectionResult& result,
+                     int line_size, float font_size) {
+                    auto im = PyArrayToCvMat(im_data);
+                    vision::Visualize::VisDetection(&im, result, line_size,
+                                                    font_size);
+                  })
+      .def_static("vis_segmentation", [](pybind11::array& im_data,
+                                         vision::SegmentationResult& result,
+                                         cv::Mat vis_img,
+                                         const int& num_classes) {
+        cv::Mat im = PyArrayToCvMat(im_data);
+        vision::Visualize::VisSegmentation(im, result, &vis_img, num_classes);
       });
 }
-} // namespace fastdeploy
+}  // namespace fastdeploy

From 3f44ce75bcfe1aff246147dbbfa590cf70082924 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Wed, 20 Jul 2022 11:44:22 +0000
Subject: [PATCH 04/18] fix bug

---
 fastdeploy/vision/vision_pybind.cc | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fastdeploy/vision/vision_pybind.cc b/fastdeploy/vision/vision_pybind.cc
index 14dd2896b3..ea1e920bdc 100644
--- a/fastdeploy/vision/vision_pybind.cc
+++ b/fastdeploy/vision/vision_pybind.cc
@@ -19,8 +19,7 @@ namespace fastdeploy {
 void BindPPCls(pybind11::module& m);
 void BindPPDet(pybind11::module& m);
 void BindWongkinyiu(pybind11::module& m);
-void BindPpClsModel(pybind11::module& m);
-void BindPpSegModel(pybind11::module& m);
+void BindPPSeg(pybind11::module& m);
 void BindUltralytics(pybind11::module& m);
 void BindMeituan(pybind11::module& m);
 void BindMegvii(pybind11::module& m);
@@ -51,8 +50,8 @@ void BindVision(pybind11::module& m) {
       .def("__str__", &vision::SegmentationResult::Str);
 
   BindPPDet(m);
-  BindPpClsModel(m);
-  BindPpSegModel(m);
+  BindPPCls(m);
+  BindPPSeg(m);
   BindUltralytics(m);
   BindWongkinyiu(m);
   BindMeituan(m);

From caf961b3d9d3e4ce9f8d032405808824d04c18db Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Wed, 20 Jul 2022 12:24:38 +0000
Subject: [PATCH 05/18] Fix BindPPSeg wrong name

---
 fastdeploy/vision/ppseg/ppseg_pybind.cc | 2 +-
 fastdeploy/vision/vision_pybind.cc      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fastdeploy/vision/ppseg/ppseg_pybind.cc b/fastdeploy/vision/ppseg/ppseg_pybind.cc
index 3bc0b1b136..60022f914b 100644
--- a/fastdeploy/vision/ppseg/ppseg_pybind.cc
+++ b/fastdeploy/vision/ppseg/ppseg_pybind.cc
@@ -14,7 +14,7 @@
 #include "fastdeploy/pybind/main.h"
 
 namespace fastdeploy {
-void BindPpSegModel(pybind11::module& m) {
+void BindPPSeg(pybind11::module& m) {
   auto ppseg_module =
       m.def_submodule("ppseg", "Module to deploy PaddleSegmentation.");
   pybind11::class_<vision::ppseg::Model, FastDeployModel>(ppseg_module, "Model")
diff --git a/fastdeploy/vision/vision_pybind.cc b/fastdeploy/vision/vision_pybind.cc
index ea1e920bdc..22c4f0bc2e 100644
--- a/fastdeploy/vision/vision_pybind.cc
+++ b/fastdeploy/vision/vision_pybind.cc
@@ -49,8 +49,8 @@ void BindVision(pybind11::module& m) {
       .def("__repr__", &vision::SegmentationResult::Str)
       .def("__str__", &vision::SegmentationResult::Str);
 
-  BindPPDet(m);
   BindPPCls(m);
+  BindPPDet(m);
   BindPPSeg(m);
   BindUltralytics(m);
   BindWongkinyiu(m);

From 13216d0e24ad1dc628b12e2ace09c2d39afea8f9 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Wed, 20 Jul 2022 14:29:32 +0000
Subject: [PATCH 06/18] Fix variable name

---
 fastdeploy/vision/visualize/__init__.py         | 5 +++--
 fastdeploy/vision/visualize/visualize_pybind.cc | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/fastdeploy/vision/visualize/__init__.py b/fastdeploy/vision/visualize/__init__.py
index a32746d165..7d1bcc8926 100644
--- a/fastdeploy/vision/visualize/__init__.py
+++ b/fastdeploy/vision/visualize/__init__.py
@@ -21,5 +21,6 @@ def vis_detection(im_data, det_result, line_size=1, font_size=0.5):
     C.vision.Visualize.vis_detection(im_data, det_result, line_size, font_size)
 
 
-def vis_segmentation(im_data, seg_result, vis_img, num_classes=19):
-    C.vision.Visualize.vis_segmentation(im, seg_result, vis_img, num_classes)
+def vis_segmentation(im_data, seg_result, vis_im_data, num_classes=1000):
+    C.vision.Visualize.vis_segmentation(im_data, seg_result, vis_im_data,
+                                        num_classes)
diff --git a/fastdeploy/vision/visualize/visualize_pybind.cc b/fastdeploy/vision/visualize/visualize_pybind.cc
index 7540da2a07..5d5eb2388d 100644
--- a/fastdeploy/vision/visualize/visualize_pybind.cc
+++ b/fastdeploy/vision/visualize/visualize_pybind.cc
@@ -27,10 +27,11 @@ void BindVisualize(pybind11::module& m) {
                   })
       .def_static("vis_segmentation", [](pybind11::array& im_data,
                                          vision::SegmentationResult& result,
-                                         cv::Mat vis_img,
+                                         pybind11::array& vis_im_data,
                                          const int& num_classes) {
         cv::Mat im = PyArrayToCvMat(im_data);
-        vision::Visualize::VisSegmentation(im, result, &vis_img, num_classes);
+        cv::Mat vis_im = PyArrayToCvMat(vis_im_data);
+        vision::Visualize::VisSegmentation(im, result, &vis_im, num_classes);
       });
 }
 }  // namespace fastdeploy

From 9810140340765d71d7b7ec25495422bbd265f22f Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Thu, 21 Jul 2022 05:30:26 +0000
Subject: [PATCH 07/18] Update by comments

---
 fastdeploy/vision/ppseg/model.cc        | 11 ++++++-----
 fastdeploy/vision/visualize/visualize.h |  2 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/fastdeploy/vision/ppseg/model.cc b/fastdeploy/vision/ppseg/model.cc
index deae939b2c..268d85f7d3 100644
--- a/fastdeploy/vision/ppseg/model.cc
+++ b/fastdeploy/vision/ppseg/model.cc
@@ -82,17 +82,18 @@ bool Model::Preprocess(Mat* mat, FDTensor* output) {
       return false;
     }
   }
-  int channel = mat->Channels();
-  int width = mat->Width();
-  int height = mat->Height();
+  mat->ShareWithTensor(output);
+  output->shape.insert(output->shape.begin(), 1);
   output->name = InputInfoOfRuntime(0).name;
-  output->SetExternalData({1, channel, height, width}, FDDataType::FP32,
-                          mat->GetCpuMat()->ptr());
   return true;
 }
 
 bool Model::Postprocess(const FDTensor& infer_result,
                         SegmentationResult* result) {
+  FDASSERT(infer_result.dtype == FDDataType::INT64,
+           "Require the data type of output is int64, but now it's " +
+               Str(const_cast<fastdeploy::FDDataType&>(infer_result.dtype)) +
+               ".");
   result->Clear();
   std::vector<int64_t> output_shape = infer_result.shape;
   int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
diff --git a/fastdeploy/vision/visualize/visualize.h b/fastdeploy/vision/visualize/visualize.h
index f78d4d0e6d..1eb212c2b9 100644
--- a/fastdeploy/vision/visualize/visualize.h
+++ b/fastdeploy/vision/visualize/visualize.h
@@ -29,7 +29,7 @@ class FASTDEPLOY_DECL Visualize {
                            int line_size = 2, float font_size = 0.5f);
   static void VisSegmentation(const cv::Mat& im,
                               const SegmentationResult& result,
-                              cv::Mat* vis_img, const int& num_classes = 19);
+                              cv::Mat* vis_img, const int& num_classes = 1000);
 };
 
 }  // namespace vision

From aac6ca2f1ecb238dce0d0623f30579f3c19983a5 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Thu, 21 Jul 2022 05:33:46 +0000
Subject: [PATCH 08/18] Add ppseg-unet example python version

---
 model_zoo/vision/ppseg/ppseg_unet.py | 36 ++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 model_zoo/vision/ppseg/ppseg_unet.py

diff --git a/model_zoo/vision/ppseg/ppseg_unet.py b/model_zoo/vision/ppseg/ppseg_unet.py
new file mode 100644
index 0000000000..c279e0a8fd
--- /dev/null
+++ b/model_zoo/vision/ppseg/ppseg_unet.py
@@ -0,0 +1,36 @@
+import fastdeploy as fd
+import cv2
+import tarfile
+
+# 下载模型和测试图片
+model_url = "https://github.com/felixhjh/Fastdeploy-Models/raw/main/unet_Cityscapes.tar.gz"
+test_jpg_url = "https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png"
+fd.download(model_url, ".", show_progress=True)
+fd.download(test_jpg_url, ".", show_progress=True)
+
+try:
+    tar = tarfile.open("unet_Cityscapes.tar.gz", "r:gz")
+    file_names = tar.getnames()
+    for file_name in file_names:
+        tar.extract(file_name, ".")
+    tar.close()
+except Exception as e:
+    raise Exception(e)
+
+# 加载模型
+model = fd.vision.ppseg.Model("./unet_Cityscapes/model.pdmodel",
+                              "./unet_Cityscapes/model.pdiparams",
+                              "./unet_Cityscapes/deploy.yaml")
+
+# 预测图片
+im = cv2.imread("./cityscapes_demo.png")
+result = model.predict(im)
+
+vis_im = im.copy()
+# 可视化结果
+fd.vision.visualize.vis_segmentation(im, result, vis_im)
+cv2.imwrite("vis_img.png", vis_im)
+
+# 输出预测结果
+print(result)
+print(model.runtime_option)

From 1218c1ff6261241e1593fe12ac5d89c520e37a38 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Thu, 21 Jul 2022 14:59:36 +0000
Subject: [PATCH 09/18] Change the way to decompress  model file

---
 model_zoo/vision/ppseg/ppseg_unet.py | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/model_zoo/vision/ppseg/ppseg_unet.py b/model_zoo/vision/ppseg/ppseg_unet.py
index c279e0a8fd..c2e158ba8c 100644
--- a/model_zoo/vision/ppseg/ppseg_unet.py
+++ b/model_zoo/vision/ppseg/ppseg_unet.py
@@ -5,18 +5,9 @@
 # 下载模型和测试图片
 model_url = "https://github.com/felixhjh/Fastdeploy-Models/raw/main/unet_Cityscapes.tar.gz"
 test_jpg_url = "https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png"
-fd.download(model_url, ".", show_progress=True)
+fd.download_and_decompress(model_url, ".")
 fd.download(test_jpg_url, ".", show_progress=True)
 
-try:
-    tar = tarfile.open("unet_Cityscapes.tar.gz", "r:gz")
-    file_names = tar.getnames()
-    for file_name in file_names:
-        tar.extract(file_name, ".")
-    tar.close()
-except Exception as e:
-    raise Exception(e)
-
 # 加载模型
 model = fd.vision.ppseg.Model("./unet_Cityscapes/model.pdmodel",
                               "./unet_Cityscapes/model.pdiparams",

From d35fd44c88736362635ca77530d4518697975cd4 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Mon, 25 Jul 2022 12:56:32 +0000
Subject: [PATCH 10/18] Visualize resize mask back to original image size

---
 fastdeploy/vision/visualize/segmentation.cc | 31 +++++++++++++++++----
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/fastdeploy/vision/visualize/segmentation.cc b/fastdeploy/vision/visualize/segmentation.cc
index b1b142fc08..74a813912d 100644
--- a/fastdeploy/vision/visualize/segmentation.cc
+++ b/fastdeploy/vision/visualize/segmentation.cc
@@ -14,6 +14,7 @@
 
 #ifdef ENABLE_VISION_VISUALIZE
 
+#include "fastdeploy/vision/common/processors/transform.h"
 #include "fastdeploy/vision/visualize/visualize.h"
 #include "opencv2/highgui.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
@@ -24,15 +25,33 @@ namespace vision {
 void Visualize::VisSegmentation(const cv::Mat& im,
                                 const SegmentationResult& result,
                                 cv::Mat* vis_img, const int& num_classes) {
+  int origin_h = im.rows;
+  int origin_w = im.cols;
   auto color_map = GetColorMap(num_classes);
-  int64_t height = result.masks.size();
-  int64_t width = result.masks[1].size();
-  *vis_img = cv::Mat::zeros(height, width, CV_8UC3);
+  int mask_h = result.masks.size();
+  int mask_w = result.masks[0].size();
+  *vis_img = cv::Mat::zeros(origin_h, origin_w, CV_8UC3);
+  cv::Mat mask_mat(mask_h, mask_w, CV_32FC1);
+
+  for (int i = 0; i < mask_h; ++i) {
+    for (int j = 0; j < mask_w; ++j) {
+      mask_mat.at<float>(i, j) = static_cast<float>(result.masks[i][j]);
+    }
+  }
+  Mat mat(mask_mat);
+  if (origin_h != mask_h || origin_w != mask_w) {
+    Resize::Run(&mat, origin_w, origin_h);
+  }
+#ifdef ENABLE_OPENCV_CUDA
+  cv::cuda::GpuMat* im_mask = mat.GetGpuMat();
+#else
+  cv::Mat* im_mask = mat.GetCpuMat();
+#endif
 
   int64_t index = 0;
-  for (int i = 0; i < height; i++) {
-    for (int j = 0; j < width; j++) {
-      int category_id = static_cast<int>(result.masks[i][j]);
+  for (int i = 0; i < origin_h; i++) {
+    for (int j = 0; j < origin_w; j++) {
+      int category_id = static_cast<int>((*im_mask).at<float>(i, j));
       vis_img->at<cv::Vec3b>(i, j)[0] = color_map[3 * category_id + 0];
       vis_img->at<cv::Vec3b>(i, j)[1] = color_map[3 * category_id + 1];
       vis_img->at<cv::Vec3b>(i, j)[2] = color_map[3 * category_id + 2];

From 5a595e9f19f7fa1664253b796cc44f4e85f36f79 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Mon, 1 Aug 2022 14:28:21 +0000
Subject: [PATCH 11/18] Update paddleseg support

---
 csrcs/fastdeploy/vision/common/result.cc      |  18 ++-
 csrcs/fastdeploy/vision/common/result.h       |   7 +-
 csrcs/fastdeploy/vision/ppseg/model.cc        | 133 +++++++++++++++---
 csrcs/fastdeploy/vision/ppseg/model.h         |  10 +-
 csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc |  15 +-
 csrcs/fastdeploy/vision/vision_pybind.cc      |   5 +-
 .../vision/visualize/segmentation.cc          |  31 +---
 7 files changed, 158 insertions(+), 61 deletions(-)

diff --git a/csrcs/fastdeploy/vision/common/result.cc b/csrcs/fastdeploy/vision/common/result.cc
index 0ef077f0ce..aa42f0088e 100644
--- a/csrcs/fastdeploy/vision/common/result.cc
+++ b/csrcs/fastdeploy/vision/common/result.cc
@@ -140,7 +140,10 @@ std::string FaceDetectionResult::Str() {
 }
 
 void SegmentationResult::Clear() {
-  std::vector<std::vector<int64_t>>().swap(masks);
+  std::vector<uint8_t>().swap(label_map);
+  std::vector<float>().swap(score_map);
+  std::vector<int64_t>().swap(shape);
+  contain_score_map = false;
 }
 
 void SegmentationResult::Resize(int64_t height, int64_t width) {
@@ -153,11 +156,22 @@ std::string SegmentationResult::Str() {
   for (size_t i = 0; i < 10; ++i) {
     out += "[";
     for (size_t j = 0; j < 10; ++j) {
-      out = out + std::to_string(masks[i][j]) + ", ";
+      out = out + std::to_string(label_map[i * 10 + j]) + ", ";
     }
     out += ".....]\n";
   }
   out += "...........\n";
+  if (contain_score_map) {
+    out += "SegmentationResult Score map 10 rows x 10 cols: \n";
+    for (size_t i = 0; i < 10; ++i) {
+      out += "[";
+      for (size_t j = 0; j < 10; ++j) {
+        out = out + std::to_string(score_map[i * 10 + j]) + ", ";
+      }
+      out += ".....]\n";
+    }
+  }
+  out += "...........\n";
   return out;
 }
 
diff --git a/csrcs/fastdeploy/vision/common/result.h b/csrcs/fastdeploy/vision/common/result.h
index 4900d394d8..af44ba8cae 100644
--- a/csrcs/fastdeploy/vision/common/result.h
+++ b/csrcs/fastdeploy/vision/common/result.h
@@ -84,14 +84,15 @@ struct FASTDEPLOY_DECL FaceDetectionResult : public BaseResult {
 
 struct FASTDEPLOY_DECL SegmentationResult : public BaseResult {
   // mask
-  std::vector<std::vector<int64_t>> masks;
+  std::vector<uint8_t> label_map;
+  std::vector<float> score_map;
+  td::vector<int64_t> shape;
+  bool contain_score_map = false;
 
   ResultType type = ResultType::SEGMENTATION;
 
   void Clear();
 
-  void Resize(int64_t height, int64_t width);
-
   std::string Str();
 };
 
diff --git a/csrcs/fastdeploy/vision/ppseg/model.cc b/csrcs/fastdeploy/vision/ppseg/model.cc
index 268d85f7d3..04232239c3 100644
--- a/csrcs/fastdeploy/vision/ppseg/model.cc
+++ b/csrcs/fastdeploy/vision/ppseg/model.cc
@@ -11,8 +11,8 @@ Model::Model(const std::string& model_file, const std::string& params_file,
              const std::string& config_file, const RuntimeOption& custom_option,
              const Frontend& model_format) {
   config_file_ = config_file;
-  valid_cpu_backends = {Backend::ORT, Backend::PDINFER};
-  valid_gpu_backends = {Backend::ORT, Backend::PDINFER};
+  valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
+  valid_gpu_backends = {Backend::PDINFER, Backend::ORT};
   runtime_option = custom_option;
   runtime_option.model_format = model_format;
   runtime_option.model_file = model_file;
@@ -65,6 +65,7 @@ bool Model::BuildPreprocessPipelineFromConfig() {
         const auto& target_size = op["target_size"];
         int resize_width = target_size[0].as<int>();
         int resize_height = target_size[1].as<int>();
+        is_resized = true;
         processors_.push_back(
             std::make_shared<Resize>(resize_width, resize_height));
       }
@@ -74,7 +75,8 @@ bool Model::BuildPreprocessPipelineFromConfig() {
   return true;
 }
 
-bool Model::Preprocess(Mat* mat, FDTensor* output) {
+bool Model::Preprocess(Mat* mat, FDTensor* output,
+                       std::map<std::string, std::array<int, 2>>* im_info) {
   for (size_t i = 0; i < processors_.size(); ++i) {
     if (!(*(processors_[i].get()))(mat)) {
       FDERROR << "Failed to process image data in " << processors_[i]->Name()
@@ -82,41 +84,128 @@ bool Model::Preprocess(Mat* mat, FDTensor* output) {
       return false;
     }
   }
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<int>(mat->Height()),
+                                static_cast<int>(mat->Width())};
+
   mat->ShareWithTensor(output);
+  for (auto& i : output->shape) {
+    std::cout << "Preprocess before shape: " << i << std::endl;
+  }
   output->shape.insert(output->shape.begin(), 1);
+  for (auto& i : output->shape) {
+    std::cout << "Preprocess After shape: " << i << std::endl;
+  }
   output->name = InputInfoOfRuntime(0).name;
   return true;
 }
 
-bool Model::Postprocess(const FDTensor& infer_result,
-                        SegmentationResult* result) {
-  FDASSERT(infer_result.dtype == FDDataType::INT64,
-           "Require the data type of output is int64, but now it's " +
+bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
+                        std::map<std::string, std::array<int, 2>>* im_info) {
+  FDASSERT(infer_result.dtype == FDDataType::INT64 ||
+               infer_result.dtype == FDDataType::FP32,
+           "Require the data type of output is int64 or fp32, but now it's " +
                Str(const_cast<fastdeploy::FDDataType&>(infer_result.dtype)) +
                ".");
   result->Clear();
-  std::vector<int64_t> output_shape = infer_result.shape;
-  int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
-                                std::multiplies<int>());
-  const int64_t* infer_result_buffer =
-      reinterpret_cast<const int64_t*>(infer_result.data.data());
-  int64_t height = output_shape[1];
-  int64_t width = output_shape[2];
-  result->Resize(height, width);
-  for (int64_t i = 0; i < height; i++) {
-    int64_t begin = i * width;
-    int64_t end = (i + 1) * width - 1;
-    std::copy(infer_result_buffer + begin, infer_result_buffer + end,
-              result->masks[i].begin());
+  if (infer_result.shape.size() == 4) {
+    result->contain_score_map = true;
+    float_t* infer_result_buffer =
+        reinterpret_cast<float_t*>(infer_result.data.data());
+    // NCHW to NHWC
+    int num = infer_result.shape[0];
+    int channel = infer_result.shape[1];
+    int height = infer_result.shape[2];
+    int width = infer_result.shape[3];
+    int chw = channel * height * width;
+    int wc = width * channel;
+    int wh = width * height;
+    std::vector<float_t> hwc_data(chw);
+    int index = 0;
+    for (int n = 0; n < num; n++) {
+      for (int c = 0; c < channel; c++) {
+        for (int h = 0; h < height; h++) {
+          for (int w = 0; w < width; w++) {
+            hwc_data[n * chw + h * wc + w * channel + c] =
+                *(infer_result_buffer + index);
+            index++;
+          }
+        }
+      }
+    }
+    uint8_t* hwc_data_buffer = reinterpret_cast<uint8_t*>(hwc_data.data());
+    std::copy(hwc_data_buffer, hwc_data_buffer + infer_result.data.size() - 1,
+              infer_result.data.begin());
+    infer_result.shape = {num, height, width, channel};
+  }
+  result->shape = infer_result.shape;
+  for (auto& i : result->shape) {
+    std::cout << "result->shape---" << i << std::endl;
   }
+  int out_num =
+      std::accumulate(result->shape.begin(), result->shape.begin() + 3, 1,
+                      std::multiplies<int>());
+  std::cout << "out_num---" << out_num << std::endl;
+  result->shape.erase(result->shape.begin());
+  result->label_map.reserve(out_num);
+  if (result->contain_score_map) {
+    float_t* infer_result_buffer =
+        reinterpret_cast<float_t*>(infer_result.Data());
+    int64_t height = result->shape[0];
+    int64_t width = result->shape[1];
+    int64_t num_classes = result->shape[2];
+    result->shape.erase(result->shape.begin() + 2);
+
+    result->score_map.reserve(out_num);
+    int index = 0;
+    for (size_t i = 0; i < height; ++i) {
+      for (size_t j = 0; j < width; ++j) {
+        int64_t s = (i * width + j) * num_classes;
+        float_t* max_class_score = std::max_element(
+            infer_result_buffer + s, infer_result_buffer + s + num_classes);
+
+        auto label_id = std::distance(infer_result_buffer + s, max_class_score);
 
+        result->label_map[index] = (static_cast<uint8_t>(label_id));
+
+        if (with_softmax) {
+          double_t total = 0;
+          for (int k = 0; k < num_classes; k++) {
+            total += exp(*(infer_result_buffer + s + k) - *max_class_score);
+          }
+          double_t softmax_class_score = 1 / total;
+          result->score_map[index] = static_cast<float>(softmax_class_score);
+
+        } else {
+          result->score_map[index] = *max_class_score;
+        }
+        index++;
+      }
+    }
+  } else {
+    for (int i = 0; i < out_num; i++) {
+      const int64_t* infer_result_buffer =
+          reinterpret_cast<const int64_t*>(infer_result.Data());
+      result->label_map[i] = static_cast<uint8_t>(*(infer_result_buffer + i));
+    }
+  }
   return true;
 }
 
 bool Model::Predict(cv::Mat* im, SegmentationResult* result) {
   Mat mat(*im);
   std::vector<FDTensor> processed_data(1);
-  if (!Preprocess(&mat, &(processed_data[0]))) {
+
+  std::map<std::string, std::array<int, 2>> im_info;
+
+  // Record the shape of image and the shape of preprocessed image
+  im_info["input_shape"] = {static_cast<int>(mat.Height()),
+                            static_cast<int>(mat.Width())};
+  im_info["output_shape"] = {static_cast<int>(mat.Height()),
+                             static_cast<int>(mat.Width())};
+
+  if (!Preprocess(&mat, &(processed_data[0]), &im_info)) {
     FDERROR << "Failed to preprocess input data while using model:"
             << ModelName() << "." << std::endl;
     return false;
@@ -127,7 +216,7 @@ bool Model::Predict(cv::Mat* im, SegmentationResult* result) {
             << std::endl;
     return false;
   }
-  if (!Postprocess(infer_result[0], result)) {
+  if (!Postprocess(infer_result[0], result, &im_info)) {
     FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
             << std::endl;
     return false;
diff --git a/csrcs/fastdeploy/vision/ppseg/model.h b/csrcs/fastdeploy/vision/ppseg/model.h
index c0ca5a70d0..bfbb7e52ab 100644
--- a/csrcs/fastdeploy/vision/ppseg/model.h
+++ b/csrcs/fastdeploy/vision/ppseg/model.h
@@ -18,14 +18,20 @@ class FASTDEPLOY_DECL Model : public FastDeployModel {
 
   virtual bool Predict(cv::Mat* im, SegmentationResult* result);
 
+  bool with_softmax = false;
+
  private:
   bool Initialize();
 
   bool BuildPreprocessPipelineFromConfig();
 
-  bool Preprocess(Mat* mat, FDTensor* outputs);
+  bool Preprocess(Mat* mat, FDTensor* outputs,
+                  std::map<std::string, std::array<int, 2>>* im_info);
+
+  bool Postprocess(FDTensor& infer_result, SegmentationResult* result,
+                   std::map<std::string, std::array<int, 2>>* im_info);
 
-  bool Postprocess(const FDTensor& infer_result, SegmentationResult* result);
+  bool is_resized = false;
 
   std::vector<std::shared_ptr<Processor>> processors_;
   std::string config_file_;
diff --git a/csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc b/csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc
index 60022f914b..dd2ec5572d 100644
--- a/csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc
+++ b/csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc
@@ -20,11 +20,14 @@ void BindPPSeg(pybind11::module& m) {
   pybind11::class_<vision::ppseg::Model, FastDeployModel>(ppseg_module, "Model")
       .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
                           Frontend>())
-      .def("predict", [](vision::ppseg::Model& self, pybind11::array& data) {
-        auto mat = PyArrayToCvMat(data);
-        vision::SegmentationResult res;
-        self.Predict(&mat, &res);
-        return res;
-      });
+      .def("predict",
+           [](vision::ppseg::Model& self, pybind11::array& data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::SegmentationResult* res = new vision::SegmentationResult();
+             // self.Predict(&mat, &res);
+             self.Predict(&mat, res);
+             return res;
+           })
+      .def_readwrite("with_softmax", &vision::ppseg::Model::with_softmax);
 }
 }  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/vision_pybind.cc b/csrcs/fastdeploy/vision/vision_pybind.cc
index 79aa876351..3648105a44 100644
--- a/csrcs/fastdeploy/vision/vision_pybind.cc
+++ b/csrcs/fastdeploy/vision/vision_pybind.cc
@@ -59,7 +59,10 @@ void BindVision(pybind11::module& m) {
       .def("__str__", &vision::FaceDetectionResult::Str);
   pybind11::class_<vision::SegmentationResult>(m, "SegmentationResult")
       .def(pybind11::init())
-      .def_readwrite("masks", &vision::SegmentationResult::masks)
+      .def_readwrite("label_map", &vision::SegmentationResult::label_map)
+      .def_readwrite("score_map", &vision::SegmentationResult::score_map)
+      .def_readwrite("shape", &vision::SegmentationResult::shape)
+      .def_readwrite("shape", &vision::SegmentationResult::shape)
       .def("__repr__", &vision::SegmentationResult::Str)
       .def("__str__", &vision::SegmentationResult::Str);
 
diff --git a/csrcs/fastdeploy/vision/visualize/segmentation.cc b/csrcs/fastdeploy/vision/visualize/segmentation.cc
index 74a813912d..1fba09a131 100644
--- a/csrcs/fastdeploy/vision/visualize/segmentation.cc
+++ b/csrcs/fastdeploy/vision/visualize/segmentation.cc
@@ -14,7 +14,6 @@
 
 #ifdef ENABLE_VISION_VISUALIZE
 
-#include "fastdeploy/vision/common/processors/transform.h"
 #include "fastdeploy/vision/visualize/visualize.h"
 #include "opencv2/highgui.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
@@ -25,33 +24,15 @@ namespace vision {
 void Visualize::VisSegmentation(const cv::Mat& im,
                                 const SegmentationResult& result,
                                 cv::Mat* vis_img, const int& num_classes) {
-  int origin_h = im.rows;
-  int origin_w = im.cols;
   auto color_map = GetColorMap(num_classes);
-  int mask_h = result.masks.size();
-  int mask_w = result.masks[0].size();
-  *vis_img = cv::Mat::zeros(origin_h, origin_w, CV_8UC3);
-  cv::Mat mask_mat(mask_h, mask_w, CV_32FC1);
-
-  for (int i = 0; i < mask_h; ++i) {
-    for (int j = 0; j < mask_w; ++j) {
-      mask_mat.at<float>(i, j) = static_cast<float>(result.masks[i][j]);
-    }
-  }
-  Mat mat(mask_mat);
-  if (origin_h != mask_h || origin_w != mask_w) {
-    Resize::Run(&mat, origin_w, origin_h);
-  }
-#ifdef ENABLE_OPENCV_CUDA
-  cv::cuda::GpuMat* im_mask = mat.GetGpuMat();
-#else
-  cv::Mat* im_mask = mat.GetCpuMat();
-#endif
+  int64_t height = result.shape[0];
+  int64_t width = result.shape[1];
+  *vis_img = cv::Mat::zeros(height, width, CV_8UC3);
 
   int64_t index = 0;
-  for (int i = 0; i < origin_h; i++) {
-    for (int j = 0; j < origin_w; j++) {
-      int category_id = static_cast<int>((*im_mask).at<float>(i, j));
+  for (int i = 0; i < height; i++) {
+    for (int j = 0; j < width; j++) {
+      int category_id = result.label_map[index++];
       vis_img->at<cv::Vec3b>(i, j)[0] = color_map[3 * category_id + 0];
       vis_img->at<cv::Vec3b>(i, j)[1] = color_map[3 * category_id + 1];
       vis_img->at<cv::Vec3b>(i, j)[2] = color_map[3 * category_id + 2];

From bc7fb068fdc0bee8873e6b2e8db51fe8480819a1 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Mon, 1 Aug 2022 16:27:46 +0000
Subject: [PATCH 12/18] Add essential files to support ppseg

---
 csrcs/fastdeploy/vision/common/result.cc |  6 +-----
 csrcs/fastdeploy/vision/common/result.h  |  2 +-
 fastdeploy/vision/ppseg/__init__.py      | 11 +++++++++++
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/csrcs/fastdeploy/vision/common/result.cc b/csrcs/fastdeploy/vision/common/result.cc
index aa42f0088e..fdc248ec63 100644
--- a/csrcs/fastdeploy/vision/common/result.cc
+++ b/csrcs/fastdeploy/vision/common/result.cc
@@ -146,10 +146,6 @@ void SegmentationResult::Clear() {
   contain_score_map = false;
 }
 
-void SegmentationResult::Resize(int64_t height, int64_t width) {
-  masks.resize(height, std::vector<int64_t>(width));
-}
-
 std::string SegmentationResult::Str() {
   std::string out;
   out = "SegmentationResult Image masks 10 rows x 10 cols: \n";
@@ -170,8 +166,8 @@ std::string SegmentationResult::Str() {
       }
       out += ".....]\n";
     }
+    out += "...........\n";
   }
-  out += "...........\n";
   return out;
 }
 
diff --git a/csrcs/fastdeploy/vision/common/result.h b/csrcs/fastdeploy/vision/common/result.h
index af44ba8cae..eada40ac30 100644
--- a/csrcs/fastdeploy/vision/common/result.h
+++ b/csrcs/fastdeploy/vision/common/result.h
@@ -86,7 +86,7 @@ struct FASTDEPLOY_DECL SegmentationResult : public BaseResult {
   // mask
   std::vector<uint8_t> label_map;
   std::vector<float> score_map;
-  td::vector<int64_t> shape;
+  std::vector<int64_t> shape;
   bool contain_score_map = false;
 
   ResultType type = ResultType::SEGMENTATION;
diff --git a/fastdeploy/vision/ppseg/__init__.py b/fastdeploy/vision/ppseg/__init__.py
index b580c01455..77fcdb9379 100644
--- a/fastdeploy/vision/ppseg/__init__.py
+++ b/fastdeploy/vision/ppseg/__init__.py
@@ -35,3 +35,14 @@ def __init__(self,
 
     def predict(self, input_image):
         return self._model.predict(input_image)
+
+    @property
+    def with_softmax(self):
+        return self._model.with_softmax
+
+    @with_softmax.setter
+    def with_softmax(self, value):
+        assert isinstance(
+            value,
+            bool), "The value to set `with_softmax` must be type of bool."
+        self._model.with_softmax = value

From c7cf0309c98030dffa150894a7a705339f135651 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Tue, 2 Aug 2022 09:52:53 +0000
Subject: [PATCH 13/18] Support logits matrix resize

---
 csrcs/fastdeploy/vision/ppseg/model.cc | 67 ++++++++++++++++++++------
 1 file changed, 53 insertions(+), 14 deletions(-)

diff --git a/csrcs/fastdeploy/vision/ppseg/model.cc b/csrcs/fastdeploy/vision/ppseg/model.cc
index 04232239c3..9244c57ac6 100644
--- a/csrcs/fastdeploy/vision/ppseg/model.cc
+++ b/csrcs/fastdeploy/vision/ppseg/model.cc
@@ -110,9 +110,10 @@ bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
                ".");
   result->Clear();
   if (infer_result.shape.size() == 4) {
+    FDASSERT(infer_result.shape[0] == 1, "Only support batch size = 1.");
     result->contain_score_map = true;
     float_t* infer_result_buffer =
-        reinterpret_cast<float_t*>(infer_result.data.data());
+        reinterpret_cast<float_t*>(infer_result.MutableData());
     // NCHW to NHWC
     int num = infer_result.shape[0];
     int channel = infer_result.shape[1];
@@ -134,24 +135,58 @@ bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
         }
       }
     }
-    uint8_t* hwc_data_buffer = reinterpret_cast<uint8_t*>(hwc_data.data());
-    std::copy(hwc_data_buffer, hwc_data_buffer + infer_result.data.size() - 1,
-              infer_result.data.begin());
+    std::memcpy(infer_result.MutableData(), hwc_data.data(),
+                num * chw * sizeof(float_t));
     infer_result.shape = {num, height, width, channel};
   }
-  result->shape = infer_result.shape;
-  for (auto& i : result->shape) {
-    std::cout << "result->shape---" << i << std::endl;
+
+  FDTensor new_infer_result;
+  Mat* mat = nullptr;
+  if (is_resized) {
+    FDASSERT(infer_result.dtype == FDDataType::FP32,
+             "Require the data type of output is fp32, but now it's " +
+                 Str(const_cast<fastdeploy::FDDataType&>(infer_result.dtype)) +
+                 ". Please export PaddleSeg model without argmax and with "
+                 "softmax refer to "
+                 "https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/"
+                 "docs/model_export_cn.md");
+
+    int channel = 1;
+    int height = infer_result.shape[1];
+    int width = infer_result.shape[2];
+
+    if (result->contain_score_map) {
+      channel = infer_result.shape[3];
+    }
+    cv::Mat temp_mat =
+        cv::Mat(height, width, CV_32FC(channel), infer_result.Data());
+    auto iter_ipt = (*im_info).find("input_shape");
+    FDASSERT(iter_ipt != im_info->end(),
+             "Cannot find input_shape from im_info.");
+    int ipt_h = iter_ipt->second[0];
+    int ipt_w = iter_ipt->second[1];
+
+    mat = new Mat(temp_mat);
+    Resize::Run(mat, ipt_w, ipt_h, -1, -1, 1);
+    mat->ShareWithTensor(&new_infer_result);
+    new_infer_result.shape.insert(new_infer_result.shape.begin(), 1);
+    result->shape = new_infer_result.shape;
+  } else {
+    result->shape = infer_result.shape;
   }
   int out_num =
       std::accumulate(result->shape.begin(), result->shape.begin() + 3, 1,
                       std::multiplies<int>());
-  std::cout << "out_num---" << out_num << std::endl;
+
   result->shape.erase(result->shape.begin());
   result->label_map.reserve(out_num);
   if (result->contain_score_map) {
-    float_t* infer_result_buffer =
-        reinterpret_cast<float_t*>(infer_result.Data());
+    float_t* infer_result_buffer = nullptr;
+    if (is_resized) {
+      infer_result_buffer = static_cast<float_t*>(new_infer_result.Data());
+    } else {
+      infer_result_buffer = static_cast<float_t*>(infer_result.Data());
+    }
     int64_t height = result->shape[0];
     int64_t width = result->shape[1];
     int64_t num_classes = result->shape[2];
@@ -164,10 +199,13 @@ bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
         int64_t s = (i * width + j) * num_classes;
         float_t* max_class_score = std::max_element(
             infer_result_buffer + s, infer_result_buffer + s + num_classes);
-
-        auto label_id = std::distance(infer_result_buffer + s, max_class_score);
-
-        result->label_map[index] = (static_cast<uint8_t>(label_id));
+        int label_id = std::distance(infer_result_buffer + s, max_class_score);
+        if (label_id >= 255) {
+          FDWARNING << "label_id is stored by uint8_t, now the value is bigger "
+                       "than 255, it's "
+                    << static_cast<int>(label_id) << "." << std::endl;
+        }
+        result->label_map[index] = static_cast<uint8_t>(label_id);
 
         if (with_softmax) {
           double_t total = 0;
@@ -190,6 +228,7 @@ bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
       result->label_map[i] = static_cast<uint8_t>(*(infer_result_buffer + i));
     }
   }
+  std::free(mat);
   return true;
 }
 

From c33666ff2ecd5684e676312636091cfc0fd5ea48 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Tue, 2 Aug 2022 10:20:51 +0000
Subject: [PATCH 14/18] Support mask matrix resize

---
 csrcs/fastdeploy/vision/ppseg/model.cc | 45 +++++++++++++++-----------
 1 file changed, 27 insertions(+), 18 deletions(-)

diff --git a/csrcs/fastdeploy/vision/ppseg/model.cc b/csrcs/fastdeploy/vision/ppseg/model.cc
index 9244c57ac6..e5e30cea24 100644
--- a/csrcs/fastdeploy/vision/ppseg/model.cc
+++ b/csrcs/fastdeploy/vision/ppseg/model.cc
@@ -90,13 +90,7 @@ bool Model::Preprocess(Mat* mat, FDTensor* output,
                                 static_cast<int>(mat->Width())};
 
   mat->ShareWithTensor(output);
-  for (auto& i : output->shape) {
-    std::cout << "Preprocess before shape: " << i << std::endl;
-  }
   output->shape.insert(output->shape.begin(), 1);
-  for (auto& i : output->shape) {
-    std::cout << "Preprocess After shape: " << i << std::endl;
-  }
   output->name = InputInfoOfRuntime(0).name;
   return true;
 }
@@ -143,14 +137,6 @@ bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
   FDTensor new_infer_result;
   Mat* mat = nullptr;
   if (is_resized) {
-    FDASSERT(infer_result.dtype == FDDataType::FP32,
-             "Require the data type of output is fp32, but now it's " +
-                 Str(const_cast<fastdeploy::FDDataType&>(infer_result.dtype)) +
-                 ". Please export PaddleSeg model without argmax and with "
-                 "softmax refer to "
-                 "https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/"
-                 "docs/model_export_cn.md");
-
     int channel = 1;
     int height = infer_result.shape[1];
     int width = infer_result.shape[2];
@@ -158,8 +144,23 @@ bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
     if (result->contain_score_map) {
       channel = infer_result.shape[3];
     }
-    cv::Mat temp_mat =
-        cv::Mat(height, width, CV_32FC(channel), infer_result.Data());
+    cv::Mat temp_mat;
+    if (infer_result.dtype == FDDataType::INT64) {
+      int64_t chw = channel * height * width;
+      int64_t* infer_result_buffer = static_cast<int64_t*>(infer_result.Data());
+      std::vector<float_t> float_result_buffer(chw);
+      temp_mat = cv::Mat(height, width, CV_32FC(channel));
+      int index = 0;
+      for (int i = 0; i < height; i++) {
+        for (int j = 0; j < width; j++) {
+          temp_mat.at<float_t>(i, j) =
+              static_cast<float_t>(infer_result_buffer[index++]);
+        }
+      }
+    } else if (infer_result.dtype == FDDataType::FP32) {
+      temp_mat = cv::Mat(height, width, CV_32FC(channel), infer_result.Data());
+    }
+
     auto iter_ipt = (*im_info).find("input_shape");
     FDASSERT(iter_ipt != im_info->end(),
              "Cannot find input_shape from im_info.");
@@ -222,10 +223,18 @@ bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
       }
     }
   } else {
-    for (int i = 0; i < out_num; i++) {
+    if (is_resized) {
+      float_t* infer_result_buffer =
+          static_cast<float_t*>(new_infer_result.Data());
+      for (int i = 0; i < out_num; i++) {
+        result->label_map[i] = static_cast<uint8_t>(*(infer_result_buffer + i));
+      }
+    } else {
       const int64_t* infer_result_buffer =
           reinterpret_cast<const int64_t*>(infer_result.Data());
-      result->label_map[i] = static_cast<uint8_t>(*(infer_result_buffer + i));
+      for (int i = 0; i < out_num; i++) {
+        result->label_map[i] = static_cast<uint8_t>(*(infer_result_buffer + i));
+      }
     }
   }
   std::free(mat);

From 3bdad35607043a8cc1b74ba1a790b1cead117704 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Thu, 4 Aug 2022 11:03:10 +0000
Subject: [PATCH 15/18] Fix some bugs

---
 .../vision/common/processors/resize.h         | 14 +++++++--
 csrcs/fastdeploy/vision/common/result.cc      | 16 ++++++++++
 csrcs/fastdeploy/vision/common/result.h       |  4 +++
 csrcs/fastdeploy/vision/ppseg/model.cc        | 31 ++++++++++++++-----
 csrcs/fastdeploy/vision/ppseg/model.h         |  2 ++
 csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc |  4 ++-
 fastdeploy/vision/ppseg/__init__.py           | 11 +++++++
 7 files changed, 72 insertions(+), 10 deletions(-)

diff --git a/csrcs/fastdeploy/vision/common/processors/resize.h b/csrcs/fastdeploy/vision/common/processors/resize.h
index 137007997f..5b6e9c0257 100644
--- a/csrcs/fastdeploy/vision/common/processors/resize.h
+++ b/csrcs/fastdeploy/vision/common/processors/resize.h
@@ -41,6 +41,16 @@ class Resize : public Processor {
                   float scale_h = -1.0, int interp = 1, bool use_scale = false,
                   ProcLib lib = ProcLib::OPENCV_CPU);
 
+  bool SetWidthAndHeight(int width, int height) {
+    width_ = width;
+    height_ = height;
+    return true;
+  }
+
+  std::tuple<int, int> GetWidthAndHeight() {
+    return std::make_tuple(width_, height_);
+  }
+
  private:
   int width_;
   int height_;
@@ -49,5 +59,5 @@ class Resize : public Processor {
   int interp_ = 1;
   bool use_scale_ = false;
 };
-} // namespace vision
-} // namespace fastdeploy
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/result.cc b/csrcs/fastdeploy/vision/common/result.cc
index fdc248ec63..1a9a6dbfeb 100644
--- a/csrcs/fastdeploy/vision/common/result.cc
+++ b/csrcs/fastdeploy/vision/common/result.cc
@@ -146,6 +146,20 @@ void SegmentationResult::Clear() {
   contain_score_map = false;
 }
 
+void SegmentationResult::Reserve(int size) {
+  label_map.reserve(size);
+  if (contain_score_map > 0) {
+    score_map.reserve(size);
+  }
+}
+
+void SegmentationResult::Resize(int size) {
+  label_map.resize(size);
+  if (contain_score_map) {
+    score_map.resize(size);
+  }
+}
+
 std::string SegmentationResult::Str() {
   std::string out;
   out = "SegmentationResult Image masks 10 rows x 10 cols: \n";
@@ -168,6 +182,8 @@ std::string SegmentationResult::Str() {
     }
     out += "...........\n";
   }
+  out += "result shape is: [" + std::to_string(shape[0]) + " " +
+         std::to_string(shape[1]) + "]";
   return out;
 }
 
diff --git a/csrcs/fastdeploy/vision/common/result.h b/csrcs/fastdeploy/vision/common/result.h
index eada40ac30..f2b20f623b 100644
--- a/csrcs/fastdeploy/vision/common/result.h
+++ b/csrcs/fastdeploy/vision/common/result.h
@@ -93,6 +93,10 @@ struct FASTDEPLOY_DECL SegmentationResult : public BaseResult {
 
   void Clear();
 
+  void Reserve(int size);
+
+  void Resize(int size);
+
   std::string Str();
 };
 
diff --git a/csrcs/fastdeploy/vision/ppseg/model.cc b/csrcs/fastdeploy/vision/ppseg/model.cc
index e5e30cea24..613a7a6b7a 100644
--- a/csrcs/fastdeploy/vision/ppseg/model.cc
+++ b/csrcs/fastdeploy/vision/ppseg/model.cc
@@ -78,6 +78,18 @@ bool Model::BuildPreprocessPipelineFromConfig() {
 bool Model::Preprocess(Mat* mat, FDTensor* output,
                        std::map<std::string, std::array<int, 2>>* im_info) {
   for (size_t i = 0; i < processors_.size(); ++i) {
+    if (processors_[i]->Name().compare("Resize") == 0) {
+      auto processor = dynamic_cast<Resize*>(processors_[i].get());
+      int resize_width = -1;
+      int resize_height = -1;
+      std::tie(resize_width, resize_height) = processor->GetWidthAndHeight();
+      if (is_vertical_screen && (resize_width > resize_height)) {
+        if (processor->SetWidthAndHeight(resize_height, resize_width)) {
+          FDERROR << "Failed to set Resize processor width and height "
+                  << processors_[i]->Name() << "." << std::endl;
+        }
+      }
+    }
     if (!(*(processors_[i].get()))(mat)) {
       FDERROR << "Failed to process image data in " << processors_[i]->Name()
               << "." << std::endl;
@@ -100,8 +112,7 @@ bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
   FDASSERT(infer_result.dtype == FDDataType::INT64 ||
                infer_result.dtype == FDDataType::FP32,
            "Require the data type of output is int64 or fp32, but now it's " +
-               Str(const_cast<fastdeploy::FDDataType&>(infer_result.dtype)) +
-               ".");
+               Str(infer_result.dtype) + ".");
   result->Clear();
   if (infer_result.shape.size() == 4) {
     FDASSERT(infer_result.shape[0] == 1, "Only support batch size = 1.");
@@ -146,6 +157,13 @@ bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
     }
     cv::Mat temp_mat;
     if (infer_result.dtype == FDDataType::INT64) {
+      FDWARNING << "The PaddleSeg model is exported with argmax. Inference "
+                   "result type is " +
+                       Str(infer_result.dtype) +
+                       ". If you want the edge of segmentation image more "
+                       "smoother. Please export model with --without_argmax "
+                       "--with_softmax."
+                << std::endl;
       int64_t chw = channel * height * width;
       int64_t* infer_result_buffer = static_cast<int64_t*>(infer_result.Data());
       std::vector<float_t> float_result_buffer(chw);
@@ -180,7 +198,7 @@ bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
                       std::multiplies<int>());
 
   result->shape.erase(result->shape.begin());
-  result->label_map.reserve(out_num);
+  result->Resize(out_num);
   if (result->contain_score_map) {
     float_t* infer_result_buffer = nullptr;
     if (is_resized) {
@@ -191,9 +209,6 @@ bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
     int64_t height = result->shape[0];
     int64_t width = result->shape[1];
     int64_t num_classes = result->shape[2];
-    result->shape.erase(result->shape.begin() + 2);
-
-    result->score_map.reserve(out_num);
     int index = 0;
     for (size_t i = 0; i < height; ++i) {
       for (size_t j = 0; j < width; ++j) {
@@ -237,7 +252,9 @@ bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
       }
     }
   }
-  std::free(mat);
+  result->shape.erase(result->shape.begin() + 2);
+  delete mat;
+  mat = nullptr;
   return true;
 }
 
diff --git a/csrcs/fastdeploy/vision/ppseg/model.h b/csrcs/fastdeploy/vision/ppseg/model.h
index bfbb7e52ab..72f8dbc645 100644
--- a/csrcs/fastdeploy/vision/ppseg/model.h
+++ b/csrcs/fastdeploy/vision/ppseg/model.h
@@ -20,6 +20,8 @@ class FASTDEPLOY_DECL Model : public FastDeployModel {
 
   bool with_softmax = false;
 
+  bool is_vertical_screen = false;
+
  private:
   bool Initialize();
 
diff --git a/csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc b/csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc
index dd2ec5572d..949c274875 100644
--- a/csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc
+++ b/csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc
@@ -28,6 +28,8 @@ void BindPPSeg(pybind11::module& m) {
              self.Predict(&mat, res);
              return res;
            })
-      .def_readwrite("with_softmax", &vision::ppseg::Model::with_softmax);
+      .def_readwrite("with_softmax", &vision::ppseg::Model::with_softmax)
+      .def_readwrite("is_vertical_screen",
+                     &vision::ppseg::Model::is_vertical_screen);
 }
 }  // namespace fastdeploy
diff --git a/fastdeploy/vision/ppseg/__init__.py b/fastdeploy/vision/ppseg/__init__.py
index 77fcdb9379..dbc826722d 100644
--- a/fastdeploy/vision/ppseg/__init__.py
+++ b/fastdeploy/vision/ppseg/__init__.py
@@ -46,3 +46,14 @@ def with_softmax(self, value):
             value,
             bool), "The value to set `with_softmax` must be type of bool."
         self._model.with_softmax = value
+
+    @property
+    def is_vertical_screen(self):
+        return self._model.is_vertical_screen
+
+    @is_vertical_screen.setter
+    def is_vertical_screen(self, value):
+        assert isinstance(
+            value,
+            bool), "The value to set `is_vertical_screen` must be type of bool."
+        self._model.is_vertical_screen = value

From b2d333402097d1597bffd9cdb6de8118e6bfc0dd Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Thu, 4 Aug 2022 12:25:01 +0000
Subject: [PATCH 16/18] Format code

---
 csrcs/fastdeploy/vision/ppseg/model.cc | 195 ++++++++++++++-----------
 1 file changed, 108 insertions(+), 87 deletions(-)

diff --git a/csrcs/fastdeploy/vision/ppseg/model.cc b/csrcs/fastdeploy/vision/ppseg/model.cc
index 613a7a6b7a..de028efb8a 100644
--- a/csrcs/fastdeploy/vision/ppseg/model.cc
+++ b/csrcs/fastdeploy/vision/ppseg/model.cc
@@ -7,6 +7,106 @@ namespace fastdeploy {
 namespace vision {
 namespace ppseg {
 
+// NCHW2NHWC
+void NCHW2NHWC(FDTensor& infer_result) {
+  float_t* infer_result_buffer =
+      reinterpret_cast<float_t*>(infer_result.MutableData());
+  // NCHW to NHWC
+  int num = infer_result.shape[0];
+  int channel = infer_result.shape[1];
+  int height = infer_result.shape[2];
+  int width = infer_result.shape[3];
+  int chw = channel * height * width;
+  int wc = width * channel;
+  int wh = width * height;
+  std::vector<float_t> hwc_data(chw);
+  int index = 0;
+  for (int n = 0; n < num; n++) {
+    for (int c = 0; c < channel; c++) {
+      for (int h = 0; h < height; h++) {
+        for (int w = 0; w < width; w++) {
+          hwc_data[n * chw + h * wc + w * channel + c] =
+              *(infer_result_buffer + index);
+          index++;
+        }
+      }
+    }
+  }
+  std::memcpy(infer_result.MutableData(), hwc_data.data(),
+              num * chw * sizeof(float_t));
+  infer_result.shape = {num, height, width, channel};
+}
+
+void Cast2FP32Mat(cv::Mat& mat, FDTensor& infer_result,
+                  bool contain_score_map) {
+  int channel = 1;
+  int height = infer_result.shape[1];
+  int width = infer_result.shape[2];
+
+  if (contain_score_map) {
+    // output without argmax and convent to NHWC
+    channel = infer_result.shape[3];
+  }
+  if (infer_result.dtype == FDDataType::INT64) {
+    // output with argmax
+    FDWARNING << "The PaddleSeg model is exported with argmax. Inference "
+                 "result type is " +
+                     Str(infer_result.dtype) +
+                     ". If you want the edge of segmentation image more "
+                     "smoother. Please export model with --without_argmax "
+                     "--with_softmax."
+              << std::endl;
+    int64_t chw = channel * height * width;
+    int64_t* infer_result_buffer = static_cast<int64_t*>(infer_result.Data());
+    std::vector<float_t> float_result_buffer(chw);
+    mat = cv::Mat(height, width, CV_32FC(channel));
+    int index = 0;
+    for (int i = 0; i < height; i++) {
+      for (int j = 0; j < width; j++) {
+        mat.at<float_t>(i, j) =
+            static_cast<float_t>(infer_result_buffer[index++]);
+      }
+    }
+  } else if (infer_result.dtype == FDDataType::FP32) {
+    mat = cv::Mat(height, width, CV_32FC(channel), infer_result.Data());
+  }
+}
+
+void ArgmaxScoreMap(float_t* infer_result_buffer, SegmentationResult* result,
+                    bool with_softmax) {
+  int64_t height = result->shape[0];
+  int64_t width = result->shape[1];
+  int64_t num_classes = result->shape[2];
+  int index = 0;
+  for (size_t i = 0; i < height; ++i) {
+    for (size_t j = 0; j < width; ++j) {
+      int64_t s = (i * width + j) * num_classes;
+      float_t* max_class_score = std::max_element(
+          infer_result_buffer + s, infer_result_buffer + s + num_classes);
+      int label_id = std::distance(infer_result_buffer + s, max_class_score);
+      if (label_id >= 255) {
+        FDWARNING << "label_id is stored by uint8_t, now the value is bigger "
+                     "than 255, it's "
+                  << static_cast<int>(label_id) << "." << std::endl;
+      }
+      result->label_map[index] = static_cast<uint8_t>(label_id);
+
+      if (with_softmax) {
+        double_t total = 0;
+        for (int k = 0; k < num_classes; k++) {
+          total += exp(*(infer_result_buffer + s + k) - *max_class_score);
+        }
+        double_t softmax_class_score = 1 / total;
+        result->score_map[index] = static_cast<float>(softmax_class_score);
+
+      } else {
+        result->score_map[index] = *max_class_score;
+      }
+      index++;
+    }
+  }
+}
+
 Model::Model(const std::string& model_file, const std::string& params_file,
              const std::string& config_file, const RuntimeOption& custom_option,
              const Frontend& model_format) {
@@ -114,71 +214,21 @@ bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
            "Require the data type of output is int64 or fp32, but now it's " +
                Str(infer_result.dtype) + ".");
   result->Clear();
+
   if (infer_result.shape.size() == 4) {
+    // output without argmax
     FDASSERT(infer_result.shape[0] == 1, "Only support batch size = 1.");
     result->contain_score_map = true;
-    float_t* infer_result_buffer =
-        reinterpret_cast<float_t*>(infer_result.MutableData());
-    // NCHW to NHWC
-    int num = infer_result.shape[0];
-    int channel = infer_result.shape[1];
-    int height = infer_result.shape[2];
-    int width = infer_result.shape[3];
-    int chw = channel * height * width;
-    int wc = width * channel;
-    int wh = width * height;
-    std::vector<float_t> hwc_data(chw);
-    int index = 0;
-    for (int n = 0; n < num; n++) {
-      for (int c = 0; c < channel; c++) {
-        for (int h = 0; h < height; h++) {
-          for (int w = 0; w < width; w++) {
-            hwc_data[n * chw + h * wc + w * channel + c] =
-                *(infer_result_buffer + index);
-            index++;
-          }
-        }
-      }
-    }
-    std::memcpy(infer_result.MutableData(), hwc_data.data(),
-                num * chw * sizeof(float_t));
-    infer_result.shape = {num, height, width, channel};
+    NCHW2NHWC(infer_result);
   }
 
   FDTensor new_infer_result;
   Mat* mat = nullptr;
   if (is_resized) {
-    int channel = 1;
-    int height = infer_result.shape[1];
-    int width = infer_result.shape[2];
-
-    if (result->contain_score_map) {
-      channel = infer_result.shape[3];
-    }
     cv::Mat temp_mat;
-    if (infer_result.dtype == FDDataType::INT64) {
-      FDWARNING << "The PaddleSeg model is exported with argmax. Inference "
-                   "result type is " +
-                       Str(infer_result.dtype) +
-                       ". If you want the edge of segmentation image more "
-                       "smoother. Please export model with --without_argmax "
-                       "--with_softmax."
-                << std::endl;
-      int64_t chw = channel * height * width;
-      int64_t* infer_result_buffer = static_cast<int64_t*>(infer_result.Data());
-      std::vector<float_t> float_result_buffer(chw);
-      temp_mat = cv::Mat(height, width, CV_32FC(channel));
-      int index = 0;
-      for (int i = 0; i < height; i++) {
-        for (int j = 0; j < width; j++) {
-          temp_mat.at<float_t>(i, j) =
-              static_cast<float_t>(infer_result_buffer[index++]);
-        }
-      }
-    } else if (infer_result.dtype == FDDataType::FP32) {
-      temp_mat = cv::Mat(height, width, CV_32FC(channel), infer_result.Data());
-    }
+    Cast2FP32Mat(temp_mat, infer_result, result->contain_score_map);
 
+    // original image shape
     auto iter_ipt = (*im_info).find("input_shape");
     FDASSERT(iter_ipt != im_info->end(),
              "Cannot find input_shape from im_info.");
@@ -206,37 +256,9 @@ bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
     } else {
       infer_result_buffer = static_cast<float_t*>(infer_result.Data());
     }
-    int64_t height = result->shape[0];
-    int64_t width = result->shape[1];
-    int64_t num_classes = result->shape[2];
-    int index = 0;
-    for (size_t i = 0; i < height; ++i) {
-      for (size_t j = 0; j < width; ++j) {
-        int64_t s = (i * width + j) * num_classes;
-        float_t* max_class_score = std::max_element(
-            infer_result_buffer + s, infer_result_buffer + s + num_classes);
-        int label_id = std::distance(infer_result_buffer + s, max_class_score);
-        if (label_id >= 255) {
-          FDWARNING << "label_id is stored by uint8_t, now the value is bigger "
-                       "than 255, it's "
-                    << static_cast<int>(label_id) << "." << std::endl;
-        }
-        result->label_map[index] = static_cast<uint8_t>(label_id);
-
-        if (with_softmax) {
-          double_t total = 0;
-          for (int k = 0; k < num_classes; k++) {
-            total += exp(*(infer_result_buffer + s + k) - *max_class_score);
-          }
-          double_t softmax_class_score = 1 / total;
-          result->score_map[index] = static_cast<float>(softmax_class_score);
-
-        } else {
-          result->score_map[index] = *max_class_score;
-        }
-        index++;
-      }
-    }
+    // argmax
+    ArgmaxScoreMap(infer_result_buffer, result, with_softmax);
+    result->shape.erase(result->shape.begin() + 2);
   } else {
     if (is_resized) {
       float_t* infer_result_buffer =
@@ -252,7 +274,6 @@ bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
       }
     }
   }
-  result->shape.erase(result->shape.begin() + 2);
   delete mat;
   mat = nullptr;
   return true;

From a946eb65130428cef5921ec87cdaa2c2d761120f Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Thu, 4 Aug 2022 12:59:23 +0000
Subject: [PATCH 17/18] Add code comment

---
 csrcs/fastdeploy/vision/ppseg/model.cc | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/csrcs/fastdeploy/vision/ppseg/model.cc b/csrcs/fastdeploy/vision/ppseg/model.cc
index de028efb8a..05c378d963 100644
--- a/csrcs/fastdeploy/vision/ppseg/model.cc
+++ b/csrcs/fastdeploy/vision/ppseg/model.cc
@@ -7,11 +7,9 @@ namespace fastdeploy {
 namespace vision {
 namespace ppseg {
 
-// NCHW2NHWC
 void NCHW2NHWC(FDTensor& infer_result) {
   float_t* infer_result_buffer =
       reinterpret_cast<float_t*>(infer_result.MutableData());
-  // NCHW to NHWC
   int num = infer_result.shape[0];
   int channel = infer_result.shape[1];
   int height = infer_result.shape[2];
@@ -39,6 +37,7 @@ void NCHW2NHWC(FDTensor& infer_result) {
 
 void Cast2FP32Mat(cv::Mat& mat, FDTensor& infer_result,
                   bool contain_score_map) {
+  // output with argmax channel is 1
   int channel = 1;
   int height = infer_result.shape[1];
   int width = infer_result.shape[2];
@@ -47,8 +46,8 @@ void Cast2FP32Mat(cv::Mat& mat, FDTensor& infer_result,
     // output without argmax and convent to NHWC
     channel = infer_result.shape[3];
   }
+  // create FP32 cvmat
   if (infer_result.dtype == FDDataType::INT64) {
-    // output with argmax
     FDWARNING << "The PaddleSeg model is exported with argmax. Inference "
                  "result type is " +
                      Str(infer_result.dtype) +
@@ -209,6 +208,18 @@ bool Model::Preprocess(Mat* mat, FDTensor* output,
 
 bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
                         std::map<std::string, std::array<int, 2>>* im_info) {
+  // PaddleSeg has three types of inference output:
+  //     1. output with argmax and without softmax. 3-D matrix CHW, Channel
+  //     always 1, the element in matrix is classified label_id INT64 Type.
+  //     2. output without argmax and without softmax. 4-D matrix NCHW, N always
+  //     1, Channel is the num of classes. The element is the logits of classes
+  //     FP32
+  //     3. output without argmax and with softmax. 4-D matrix NCHW, the result
+  //     of 2 with softmax layer
+  // Fastdeploy output:
+  //     1. label_map
+  //     2. score_map(optional)
+  //     3. shape: 2-D HW
   FDASSERT(infer_result.dtype == FDDataType::INT64 ||
                infer_result.dtype == FDDataType::FP32,
            "Require the data type of output is int64 or fp32, but now it's " +
@@ -216,12 +227,13 @@ bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
   result->Clear();
 
   if (infer_result.shape.size() == 4) {
-    // output without argmax
     FDASSERT(infer_result.shape[0] == 1, "Only support batch size = 1.");
+    // output without argmax
     result->contain_score_map = true;
     NCHW2NHWC(infer_result);
   }
 
+  // for resize mat below
   FDTensor new_infer_result;
   Mat* mat = nullptr;
   if (is_resized) {
@@ -236,6 +248,7 @@ bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
     int ipt_w = iter_ipt->second[1];
 
     mat = new Mat(temp_mat);
+
     Resize::Run(mat, ipt_w, ipt_h, -1, -1, 1);
     mat->ShareWithTensor(&new_infer_result);
     new_infer_result.shape.insert(new_infer_result.shape.begin(), 1);
@@ -246,10 +259,11 @@ bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
   int out_num =
       std::accumulate(result->shape.begin(), result->shape.begin() + 3, 1,
                       std::multiplies<int>());
-
+  // NCHW remove N or CHW remove C
   result->shape.erase(result->shape.begin());
   result->Resize(out_num);
   if (result->contain_score_map) {
+    // output with label_map and score_map
     float_t* infer_result_buffer = nullptr;
     if (is_resized) {
       infer_result_buffer = static_cast<float_t*>(new_infer_result.Data());
@@ -260,6 +274,7 @@ bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
     ArgmaxScoreMap(infer_result_buffer, result, with_softmax);
     result->shape.erase(result->shape.begin() + 2);
   } else {
+    // output only with label_map
     if (is_resized) {
       float_t* infer_result_buffer =
           static_cast<float_t*>(new_infer_result.Data());

From 0b07de8ec82149c8d04267d5b56ed655c4e0e1d3 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Thu, 4 Aug 2022 14:58:42 +0000
Subject: [PATCH 18/18] Format code

---
 csrcs/fastdeploy/vision/ppseg/model.cc        | 106 +-----------------
 .../fastdeploy/vision/utils/FDTensor2CVMat.cc |  59 ++++++++++
 csrcs/fastdeploy/vision/utils/utils.h         |  67 +++++++++++
 3 files changed, 130 insertions(+), 102 deletions(-)
 create mode 100644 csrcs/fastdeploy/vision/utils/FDTensor2CVMat.cc

diff --git a/csrcs/fastdeploy/vision/ppseg/model.cc b/csrcs/fastdeploy/vision/ppseg/model.cc
index 05c378d963..7f692c6a71 100644
--- a/csrcs/fastdeploy/vision/ppseg/model.cc
+++ b/csrcs/fastdeploy/vision/ppseg/model.cc
@@ -7,105 +7,6 @@ namespace fastdeploy {
 namespace vision {
 namespace ppseg {
 
-void NCHW2NHWC(FDTensor& infer_result) {
-  float_t* infer_result_buffer =
-      reinterpret_cast<float_t*>(infer_result.MutableData());
-  int num = infer_result.shape[0];
-  int channel = infer_result.shape[1];
-  int height = infer_result.shape[2];
-  int width = infer_result.shape[3];
-  int chw = channel * height * width;
-  int wc = width * channel;
-  int wh = width * height;
-  std::vector<float_t> hwc_data(chw);
-  int index = 0;
-  for (int n = 0; n < num; n++) {
-    for (int c = 0; c < channel; c++) {
-      for (int h = 0; h < height; h++) {
-        for (int w = 0; w < width; w++) {
-          hwc_data[n * chw + h * wc + w * channel + c] =
-              *(infer_result_buffer + index);
-          index++;
-        }
-      }
-    }
-  }
-  std::memcpy(infer_result.MutableData(), hwc_data.data(),
-              num * chw * sizeof(float_t));
-  infer_result.shape = {num, height, width, channel};
-}
-
-void Cast2FP32Mat(cv::Mat& mat, FDTensor& infer_result,
-                  bool contain_score_map) {
-  // output with argmax channel is 1
-  int channel = 1;
-  int height = infer_result.shape[1];
-  int width = infer_result.shape[2];
-
-  if (contain_score_map) {
-    // output without argmax and convent to NHWC
-    channel = infer_result.shape[3];
-  }
-  // create FP32 cvmat
-  if (infer_result.dtype == FDDataType::INT64) {
-    FDWARNING << "The PaddleSeg model is exported with argmax. Inference "
-                 "result type is " +
-                     Str(infer_result.dtype) +
-                     ". If you want the edge of segmentation image more "
-                     "smoother. Please export model with --without_argmax "
-                     "--with_softmax."
-              << std::endl;
-    int64_t chw = channel * height * width;
-    int64_t* infer_result_buffer = static_cast<int64_t*>(infer_result.Data());
-    std::vector<float_t> float_result_buffer(chw);
-    mat = cv::Mat(height, width, CV_32FC(channel));
-    int index = 0;
-    for (int i = 0; i < height; i++) {
-      for (int j = 0; j < width; j++) {
-        mat.at<float_t>(i, j) =
-            static_cast<float_t>(infer_result_buffer[index++]);
-      }
-    }
-  } else if (infer_result.dtype == FDDataType::FP32) {
-    mat = cv::Mat(height, width, CV_32FC(channel), infer_result.Data());
-  }
-}
-
-void ArgmaxScoreMap(float_t* infer_result_buffer, SegmentationResult* result,
-                    bool with_softmax) {
-  int64_t height = result->shape[0];
-  int64_t width = result->shape[1];
-  int64_t num_classes = result->shape[2];
-  int index = 0;
-  for (size_t i = 0; i < height; ++i) {
-    for (size_t j = 0; j < width; ++j) {
-      int64_t s = (i * width + j) * num_classes;
-      float_t* max_class_score = std::max_element(
-          infer_result_buffer + s, infer_result_buffer + s + num_classes);
-      int label_id = std::distance(infer_result_buffer + s, max_class_score);
-      if (label_id >= 255) {
-        FDWARNING << "label_id is stored by uint8_t, now the value is bigger "
-                     "than 255, it's "
-                  << static_cast<int>(label_id) << "." << std::endl;
-      }
-      result->label_map[index] = static_cast<uint8_t>(label_id);
-
-      if (with_softmax) {
-        double_t total = 0;
-        for (int k = 0; k < num_classes; k++) {
-          total += exp(*(infer_result_buffer + s + k) - *max_class_score);
-        }
-        double_t softmax_class_score = 1 / total;
-        result->score_map[index] = static_cast<float>(softmax_class_score);
-
-      } else {
-        result->score_map[index] = *max_class_score;
-      }
-      index++;
-    }
-  }
-}
-
 Model::Model(const std::string& model_file, const std::string& params_file,
              const std::string& config_file, const RuntimeOption& custom_option,
              const Frontend& model_format) {
@@ -230,7 +131,7 @@ bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
     FDASSERT(infer_result.shape[0] == 1, "Only support batch size = 1.");
     // output without argmax
     result->contain_score_map = true;
-    NCHW2NHWC(infer_result);
+    utils::NCHW2NHWC<float_t>(infer_result);
   }
 
   // for resize mat below
@@ -238,7 +139,8 @@ bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
   Mat* mat = nullptr;
   if (is_resized) {
     cv::Mat temp_mat;
-    Cast2FP32Mat(temp_mat, infer_result, result->contain_score_map);
+    utils::FDTensor2FP32CVMat(temp_mat, infer_result,
+                              result->contain_score_map);
 
     // original image shape
     auto iter_ipt = (*im_info).find("input_shape");
@@ -271,7 +173,7 @@ bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
       infer_result_buffer = static_cast<float_t*>(infer_result.Data());
     }
     // argmax
-    ArgmaxScoreMap(infer_result_buffer, result, with_softmax);
+    utils::ArgmaxScoreMap(infer_result_buffer, result, with_softmax);
     result->shape.erase(result->shape.begin() + 2);
   } else {
     // output only with label_map
diff --git a/csrcs/fastdeploy/vision/utils/FDTensor2CVMat.cc b/csrcs/fastdeploy/vision/utils/FDTensor2CVMat.cc
new file mode 100644
index 0000000000..fdd110cb8c
--- /dev/null
+++ b/csrcs/fastdeploy/vision/utils/FDTensor2CVMat.cc
@@ -0,0 +1,59 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision/utils/utils.h"
+
+namespace fastdeploy {
+namespace vision {
+namespace utils {
+
+void FDTensor2FP32CVMat(cv::Mat& mat, FDTensor& infer_result,
+                        bool contain_score_map) {
+  // output with argmax channel is 1
+  int channel = 1;
+  int height = infer_result.shape[1];
+  int width = infer_result.shape[2];
+
+  if (contain_score_map) {
+    // output without argmax and convent to NHWC
+    channel = infer_result.shape[3];
+  }
+  // create FP32 cvmat
+  if (infer_result.dtype == FDDataType::INT64) {
+    FDWARNING << "The PaddleSeg model is exported with argmax. Inference "
+                 "result type is " +
+                     Str(infer_result.dtype) +
+                     ". If you want the edge of segmentation image more "
+                     "smoother. Please export model with --without_argmax "
+                     "--with_softmax."
+              << std::endl;
+    int64_t chw = channel * height * width;
+    int64_t* infer_result_buffer = static_cast<int64_t*>(infer_result.Data());
+    std::vector<float_t> float_result_buffer(chw);
+    mat = cv::Mat(height, width, CV_32FC(channel));
+    int index = 0;
+    for (int i = 0; i < height; i++) {
+      for (int j = 0; j < width; j++) {
+        mat.at<float_t>(i, j) =
+            static_cast<float_t>(infer_result_buffer[index++]);
+      }
+    }
+  } else if (infer_result.dtype == FDDataType::FP32) {
+    mat = cv::Mat(height, width, CV_32FC(channel), infer_result.Data());
+  }
+}
+
+}  // namespace utils
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/utils/utils.h b/csrcs/fastdeploy/vision/utils/utils.h
index e95e7e10b5..fb9c874d53 100644
--- a/csrcs/fastdeploy/vision/utils/utils.h
+++ b/csrcs/fastdeploy/vision/utils/utils.h
@@ -51,6 +51,73 @@ std::vector<int32_t> TopKIndices(const T* array, int array_size, int topk) {
   return res;
 }
 
+template <typename T>
+void ArgmaxScoreMap(T infer_result_buffer, SegmentationResult* result,
+                    bool with_softmax) {
+  int64_t height = result->shape[0];
+  int64_t width = result->shape[1];
+  int64_t num_classes = result->shape[2];
+  int index = 0;
+  for (size_t i = 0; i < height; ++i) {
+    for (size_t j = 0; j < width; ++j) {
+      int64_t s = (i * width + j) * num_classes;
+      T max_class_score = std::max_element(
+          infer_result_buffer + s, infer_result_buffer + s + num_classes);
+      int label_id = std::distance(infer_result_buffer + s, max_class_score);
+      if (label_id >= 255) {
+        FDWARNING << "label_id is stored by uint8_t, now the value is bigger "
+                     "than 255, it's "
+                  << static_cast<int>(label_id) << "." << std::endl;
+      }
+      result->label_map[index] = static_cast<uint8_t>(label_id);
+
+      if (with_softmax) {
+        double_t total = 0;
+        for (int k = 0; k < num_classes; k++) {
+          total += exp(*(infer_result_buffer + s + k) - *max_class_score);
+        }
+        double_t softmax_class_score = 1 / total;
+        result->score_map[index] = static_cast<float>(softmax_class_score);
+
+      } else {
+        result->score_map[index] = static_cast<float>(*max_class_score);
+      }
+      index++;
+    }
+  }
+}
+
+template <typename T>
+void NCHW2NHWC(FDTensor& infer_result) {
+  T* infer_result_buffer = reinterpret_cast<T*>(infer_result.MutableData());
+  int num = infer_result.shape[0];
+  int channel = infer_result.shape[1];
+  int height = infer_result.shape[2];
+  int width = infer_result.shape[3];
+  int chw = channel * height * width;
+  int wc = width * channel;
+  int wh = width * height;
+  std::vector<T> hwc_data(chw);
+  int index = 0;
+  for (int n = 0; n < num; n++) {
+    for (int c = 0; c < channel; c++) {
+      for (int h = 0; h < height; h++) {
+        for (int w = 0; w < width; w++) {
+          hwc_data[n * chw + h * wc + w * channel + c] =
+              *(infer_result_buffer + index);
+          index++;
+        }
+      }
+    }
+  }
+  std::memcpy(infer_result.MutableData(), hwc_data.data(),
+              num * chw * sizeof(T));
+  infer_result.shape = {num, height, width, channel};
+}
+
+void FDTensor2FP32CVMat(cv::Mat& mat, FDTensor& infer_result,
+                        bool contain_score_map);
+
 void NMS(DetectionResult* output, float iou_threshold = 0.5);
 
 void NMS(FaceDetectionResult* result, float iou_threshold = 0.5);