Skip to content

Commit

Permalink
[Left TODO] Support PaddleSeg deployment (#39)
Browse files Browse the repository at this point in the history
* Support new model PaddleSeg

* Fix conflict

* PaddleSeg add visulization function

* fix bug

* Fix BindPPSeg wrong name

* Fix variable name

* Update by comments

* Add ppseg-unet example python version

* Change the way to decompress  model file

* Visualize resize mask back to original image size

* Update paddleseg support

* Add essential files to support ppseg

* Support logits matrix resize

* Support mask matrix resize

* Fix some bugs

* Format code

* Add code comment

* Format code

Co-authored-by: Jason <jiangjiajun@baidu.com>
  • Loading branch information
felixhjh and jiangjiajun authored Aug 5, 2022
1 parent bd0482f commit 0e0dfd9
Show file tree
Hide file tree
Showing 12 changed files with 342 additions and 55 deletions.
14 changes: 12 additions & 2 deletions csrcs/fastdeploy/vision/common/processors/resize.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,16 @@ class Resize : public Processor {
float scale_h = -1.0, int interp = 1, bool use_scale = false,
ProcLib lib = ProcLib::OPENCV_CPU);

bool SetWidthAndHeight(int width, int height) {
width_ = width;
height_ = height;
return true;
}

std::tuple<int, int> GetWidthAndHeight() {
return std::make_tuple(width_, height_);
}

private:
int width_;
int height_;
Expand All @@ -49,5 +59,5 @@ class Resize : public Processor {
int interp_ = 1;
bool use_scale_ = false;
};
} // namespace vision
} // namespace fastdeploy
} // namespace vision
} // namespace fastdeploy
34 changes: 30 additions & 4 deletions csrcs/fastdeploy/vision/common/result.cc
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,24 @@ std::string FaceDetectionResult::Str() {
}

void SegmentationResult::Clear() {
std::vector<std::vector<int64_t>>().swap(masks);
std::vector<uint8_t>().swap(label_map);
std::vector<float>().swap(score_map);
std::vector<int64_t>().swap(shape);
contain_score_map = false;
}

void SegmentationResult::Resize(int64_t height, int64_t width) {
masks.resize(height, std::vector<int64_t>(width));
void SegmentationResult::Reserve(int size) {
label_map.reserve(size);
if (contain_score_map > 0) {
score_map.reserve(size);
}
}

void SegmentationResult::Resize(int size) {
label_map.resize(size);
if (contain_score_map) {
score_map.resize(size);
}
}

std::string SegmentationResult::Str() {
Expand All @@ -153,11 +166,24 @@ std::string SegmentationResult::Str() {
for (size_t i = 0; i < 10; ++i) {
out += "[";
for (size_t j = 0; j < 10; ++j) {
out = out + std::to_string(masks[i][j]) + ", ";
out = out + std::to_string(label_map[i * 10 + j]) + ", ";
}
out += ".....]\n";
}
out += "...........\n";
if (contain_score_map) {
out += "SegmentationResult Score map 10 rows x 10 cols: \n";
for (size_t i = 0; i < 10; ++i) {
out += "[";
for (size_t j = 0; j < 10; ++j) {
out = out + std::to_string(score_map[i * 10 + j]) + ", ";
}
out += ".....]\n";
}
out += "...........\n";
}
out += "result shape is: [" + std::to_string(shape[0]) + " " +
std::to_string(shape[1]) + "]";
return out;
}

Expand Down
9 changes: 7 additions & 2 deletions csrcs/fastdeploy/vision/common/result.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,18 @@ struct FASTDEPLOY_DECL FaceDetectionResult : public BaseResult {

struct FASTDEPLOY_DECL SegmentationResult : public BaseResult {
// mask
std::vector<std::vector<int64_t>> masks;
std::vector<uint8_t> label_map;
std::vector<float> score_map;
std::vector<int64_t> shape;
bool contain_score_map = false;

ResultType type = ResultType::SEGMENTATION;

void Clear();

void Resize(int64_t height, int64_t width);
void Reserve(int size);

void Resize(int size);

std::string Str();
};
Expand Down
140 changes: 116 additions & 24 deletions csrcs/fastdeploy/vision/ppseg/model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ Model::Model(const std::string& model_file, const std::string& params_file,
const std::string& config_file, const RuntimeOption& custom_option,
const Frontend& model_format) {
config_file_ = config_file;
valid_cpu_backends = {Backend::ORT, Backend::PDINFER};
valid_gpu_backends = {Backend::ORT, Backend::PDINFER};
valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
valid_gpu_backends = {Backend::PDINFER, Backend::ORT};
runtime_option = custom_option;
runtime_option.model_format = model_format;
runtime_option.model_file = model_file;
Expand Down Expand Up @@ -65,6 +65,7 @@ bool Model::BuildPreprocessPipelineFromConfig() {
const auto& target_size = op["target_size"];
int resize_width = target_size[0].as<int>();
int resize_height = target_size[1].as<int>();
is_resized = true;
processors_.push_back(
std::make_shared<Resize>(resize_width, resize_height));
}
Expand All @@ -74,49 +75,140 @@ bool Model::BuildPreprocessPipelineFromConfig() {
return true;
}

bool Model::Preprocess(Mat* mat, FDTensor* output) {
bool Model::Preprocess(Mat* mat, FDTensor* output,
std::map<std::string, std::array<int, 2>>* im_info) {
for (size_t i = 0; i < processors_.size(); ++i) {
if (processors_[i]->Name().compare("Resize") == 0) {
auto processor = dynamic_cast<Resize*>(processors_[i].get());
int resize_width = -1;
int resize_height = -1;
std::tie(resize_width, resize_height) = processor->GetWidthAndHeight();
if (is_vertical_screen && (resize_width > resize_height)) {
if (processor->SetWidthAndHeight(resize_height, resize_width)) {
FDERROR << "Failed to set Resize processor width and height "
<< processors_[i]->Name() << "." << std::endl;
}
}
}
if (!(*(processors_[i].get()))(mat)) {
FDERROR << "Failed to process image data in " << processors_[i]->Name()
<< "." << std::endl;
return false;
}
}

// Record output shape of preprocessed image
(*im_info)["output_shape"] = {static_cast<int>(mat->Height()),
static_cast<int>(mat->Width())};

mat->ShareWithTensor(output);
output->shape.insert(output->shape.begin(), 1);
output->name = InputInfoOfRuntime(0).name;
return true;
}

bool Model::Postprocess(const FDTensor& infer_result,
SegmentationResult* result) {
FDASSERT(infer_result.dtype == FDDataType::INT64,
"Require the data type of output is int64, but now it's " +
Str(const_cast<fastdeploy::FDDataType&>(infer_result.dtype)) +
".");
bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
std::map<std::string, std::array<int, 2>>* im_info) {
// PaddleSeg has three types of inference output:
// 1. output with argmax and without softmax. 3-D matrix CHW, Channel
// always 1, the element in matrix is classified label_id INT64 Type.
// 2. output without argmax and without softmax. 4-D matrix NCHW, N always
// 1, Channel is the num of classes. The element is the logits of classes
// FP32
// 3. output without argmax and with softmax. 4-D matrix NCHW, the result
// of 2 with softmax layer
// Fastdeploy output:
// 1. label_map
// 2. score_map(optional)
// 3. shape: 2-D HW
FDASSERT(infer_result.dtype == FDDataType::INT64 ||
infer_result.dtype == FDDataType::FP32,
"Require the data type of output is int64 or fp32, but now it's " +
Str(infer_result.dtype) + ".");
result->Clear();
std::vector<int64_t> output_shape = infer_result.shape;
int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
std::multiplies<int>());
const int64_t* infer_result_buffer =
reinterpret_cast<const int64_t*>(infer_result.data.data());
int64_t height = output_shape[1];
int64_t width = output_shape[2];
result->Resize(height, width);
for (int64_t i = 0; i < height; i++) {
int64_t begin = i * width;
int64_t end = (i + 1) * width - 1;
std::copy(infer_result_buffer + begin, infer_result_buffer + end,
result->masks[i].begin());

if (infer_result.shape.size() == 4) {
FDASSERT(infer_result.shape[0] == 1, "Only support batch size = 1.");
// output without argmax
result->contain_score_map = true;
utils::NCHW2NHWC<float_t>(infer_result);
}

// for resize mat below
FDTensor new_infer_result;
Mat* mat = nullptr;
if (is_resized) {
cv::Mat temp_mat;
utils::FDTensor2FP32CVMat(temp_mat, infer_result,
result->contain_score_map);

// original image shape
auto iter_ipt = (*im_info).find("input_shape");
FDASSERT(iter_ipt != im_info->end(),
"Cannot find input_shape from im_info.");
int ipt_h = iter_ipt->second[0];
int ipt_w = iter_ipt->second[1];

mat = new Mat(temp_mat);

Resize::Run(mat, ipt_w, ipt_h, -1, -1, 1);
mat->ShareWithTensor(&new_infer_result);
new_infer_result.shape.insert(new_infer_result.shape.begin(), 1);
result->shape = new_infer_result.shape;
} else {
result->shape = infer_result.shape;
}
int out_num =
std::accumulate(result->shape.begin(), result->shape.begin() + 3, 1,
std::multiplies<int>());
// NCHW remove N or CHW remove C
result->shape.erase(result->shape.begin());
result->Resize(out_num);
if (result->contain_score_map) {
// output with label_map and score_map
float_t* infer_result_buffer = nullptr;
if (is_resized) {
infer_result_buffer = static_cast<float_t*>(new_infer_result.Data());
} else {
infer_result_buffer = static_cast<float_t*>(infer_result.Data());
}
// argmax
utils::ArgmaxScoreMap(infer_result_buffer, result, with_softmax);
result->shape.erase(result->shape.begin() + 2);
} else {
// output only with label_map
if (is_resized) {
float_t* infer_result_buffer =
static_cast<float_t*>(new_infer_result.Data());
for (int i = 0; i < out_num; i++) {
result->label_map[i] = static_cast<uint8_t>(*(infer_result_buffer + i));
}
} else {
const int64_t* infer_result_buffer =
reinterpret_cast<const int64_t*>(infer_result.Data());
for (int i = 0; i < out_num; i++) {
result->label_map[i] = static_cast<uint8_t>(*(infer_result_buffer + i));
}
}
}
delete mat;
mat = nullptr;
return true;
}

bool Model::Predict(cv::Mat* im, SegmentationResult* result) {
Mat mat(*im);
std::vector<FDTensor> processed_data(1);
if (!Preprocess(&mat, &(processed_data[0]))) {

std::map<std::string, std::array<int, 2>> im_info;

// Record the shape of image and the shape of preprocessed image
im_info["input_shape"] = {static_cast<int>(mat.Height()),
static_cast<int>(mat.Width())};
im_info["output_shape"] = {static_cast<int>(mat.Height()),
static_cast<int>(mat.Width())};

if (!Preprocess(&mat, &(processed_data[0]), &im_info)) {
FDERROR << "Failed to preprocess input data while using model:"
<< ModelName() << "." << std::endl;
return false;
Expand All @@ -127,7 +219,7 @@ bool Model::Predict(cv::Mat* im, SegmentationResult* result) {
<< std::endl;
return false;
}
if (!Postprocess(infer_result[0], result)) {
if (!Postprocess(infer_result[0], result, &im_info)) {
FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
<< std::endl;
return false;
Expand Down
12 changes: 10 additions & 2 deletions csrcs/fastdeploy/vision/ppseg/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,22 @@ class FASTDEPLOY_DECL Model : public FastDeployModel {

virtual bool Predict(cv::Mat* im, SegmentationResult* result);

bool with_softmax = false;

bool is_vertical_screen = false;

private:
bool Initialize();

bool BuildPreprocessPipelineFromConfig();

bool Preprocess(Mat* mat, FDTensor* outputs);
bool Preprocess(Mat* mat, FDTensor* outputs,
std::map<std::string, std::array<int, 2>>* im_info);

bool Postprocess(FDTensor& infer_result, SegmentationResult* result,
std::map<std::string, std::array<int, 2>>* im_info);

bool Postprocess(const FDTensor& infer_result, SegmentationResult* result);
bool is_resized = false;

std::vector<std::shared_ptr<Processor>> processors_;
std::string config_file_;
Expand Down
17 changes: 11 additions & 6 deletions csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,16 @@ void BindPPSeg(pybind11::module& m) {
pybind11::class_<vision::ppseg::Model, FastDeployModel>(ppseg_module, "Model")
.def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
Frontend>())
.def("predict", [](vision::ppseg::Model& self, pybind11::array& data) {
auto mat = PyArrayToCvMat(data);
vision::SegmentationResult res;
self.Predict(&mat, &res);
return res;
});
.def("predict",
[](vision::ppseg::Model& self, pybind11::array& data) {
auto mat = PyArrayToCvMat(data);
vision::SegmentationResult* res = new vision::SegmentationResult();
// self.Predict(&mat, &res);
self.Predict(&mat, res);
return res;
})
.def_readwrite("with_softmax", &vision::ppseg::Model::with_softmax)
.def_readwrite("is_vertical_screen",
&vision::ppseg::Model::is_vertical_screen);
}
} // namespace fastdeploy
59 changes: 59 additions & 0 deletions csrcs/fastdeploy/vision/utils/FDTensor2CVMat.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "fastdeploy/vision/utils/utils.h"

namespace fastdeploy {
namespace vision {
namespace utils {

void FDTensor2FP32CVMat(cv::Mat& mat, FDTensor& infer_result,
bool contain_score_map) {
// output with argmax channel is 1
int channel = 1;
int height = infer_result.shape[1];
int width = infer_result.shape[2];

if (contain_score_map) {
// output without argmax and convent to NHWC
channel = infer_result.shape[3];
}
// create FP32 cvmat
if (infer_result.dtype == FDDataType::INT64) {
FDWARNING << "The PaddleSeg model is exported with argmax. Inference "
"result type is " +
Str(infer_result.dtype) +
". If you want the edge of segmentation image more "
"smoother. Please export model with --without_argmax "
"--with_softmax."
<< std::endl;
int64_t chw = channel * height * width;
int64_t* infer_result_buffer = static_cast<int64_t*>(infer_result.Data());
std::vector<float_t> float_result_buffer(chw);
mat = cv::Mat(height, width, CV_32FC(channel));
int index = 0;
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
mat.at<float_t>(i, j) =
static_cast<float_t>(infer_result_buffer[index++]);
}
}
} else if (infer_result.dtype == FDDataType::FP32) {
mat = cv::Mat(height, width, CV_32FC(channel), infer_result.Data());
}
}

} // namespace utils
} // namespace vision
} // namespace fastdeploy
Loading

0 comments on commit 0e0dfd9

Please sign in to comment.