From e61e10ee6a667925926aa3406b8a3ff52c0a73ef Mon Sep 17 00:00:00 2001 From: hiroi-sora <2230247019@qq.com> Date: Mon, 29 Jul 2024 01:21:54 +0800 Subject: [PATCH] =?UTF-8?q?=E7=89=88=E6=9C=AC=E6=8E=A8=E8=BF=9B=EF=BC=9Av1?= =?UTF-8?q?.4.1=20dev?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. 跟进 [PaddleOCR #10441](https://github.com/PaddlePaddle/PaddleOCR/pull/10441) - 使用智能指针代替原始指针 - 修复 detector_ classifier_ recognizer_ 可能造成的内存泄漏 2. 跟进 [PaddleOCR #10512](https://github.com/PaddlePaddle/PaddleOCR/pull/10512) - 修复时间记录的初始化过程 3. 格式化代码,补充部分注释 --- cpp/include/paddleocr.h | 75 ++--- cpp/include/paddlestructure.h | 76 ++--- cpp/include/task.h | 59 ++-- cpp/src/main.cpp | 4 +- cpp/src/ocr_cls.cpp | 268 +++++++++-------- cpp/src/ocr_rec.cpp | 324 ++++++++++---------- cpp/src/paddleocr.cpp | 43 +-- cpp/src/paddlestructure.cpp | 538 ++++++++++++++++++---------------- cpp/src/task.cpp | 206 +++++++------ 9 files changed, 849 insertions(+), 744 deletions(-) diff --git a/cpp/include/paddleocr.h b/cpp/include/paddleocr.h index 16750a1..7400aea 100644 --- a/cpp/include/paddleocr.h +++ b/cpp/include/paddleocr.h @@ -18,38 +18,47 @@ #include #include -namespace PaddleOCR { - -class PPOCR { -public: - explicit PPOCR(); - ~PPOCR(); - - std::vector> ocr(std::vector img_list, - bool det = true, - bool rec = true, - bool cls = true); - std::vector ocr(cv::Mat img, bool det = true, - bool rec = true, bool cls = true); - - void reset_timer(); - void benchmark_log(int img_num); - -protected: - std::vector time_info_det = {0, 0, 0}; - std::vector time_info_rec = {0, 0, 0}; - std::vector time_info_cls = {0, 0, 0}; - - void det(cv::Mat img, std::vector &ocr_results); - void rec(std::vector img_list, - std::vector &ocr_results); - void cls(std::vector img_list, - std::vector &ocr_results); - -private: - DBDetector *detector_ = nullptr; - Classifier *classifier_ = nullptr; - CRNNRecognizer *recognizer_ = nullptr; -}; +namespace PaddleOCR +{ + class PPOCR + { + public: + explicit PPOCR(); + ~PPOCR() = default; // 默认析构函数 + + // OCR方法,处理图像列表,返回每个图像的OCR结果向量 + std::vector> ocr(std::vector img_list, + bool det = true, + bool rec = true, + bool cls = true); + // OCR方法,处理单个图像,返回OCR结果 + std::vector ocr(cv::Mat img, bool det = true, + bool rec = true, bool cls = true); + + void reset_timer(); // 重置计时器 + void benchmark_log(int img_num); // 记录基准测试日志,参数为图像数量 + + protected: + // 时间信息 + std::vector time_info_det = {0, 0, 0}; + std::vector time_info_rec = {0, 0, 0}; + std::vector time_info_cls = {0, 0, 0}; + + // 文本检测:输入单张图片,在ocr_results向量中存放单行文本碎图的检测信息 + void det(cv::Mat img, + std::vector &ocr_results); + // 方向分类:输入单行碎图向量,在ocr_results向量中存放每个碎图的方向标志 + void cls(std::vector img_list, + std::vector &ocr_results); + // 文本识别:输入单行碎图向量,在ocr_results向量中存放每个碎图的文本 + void rec(std::vector img_list, + std::vector &ocr_results); + + private: + // 智能指针 + std::unique_ptr detector_; // 指向 文本检测器实例 + std::unique_ptr classifier_; // 指向 方向分类器实例 + std::unique_ptr recognizer_; // 指向 文本识别器实例 + }; } // namespace PaddleOCR diff --git a/cpp/include/paddlestructure.h b/cpp/include/paddlestructure.h index 8478a85..eb895e8 100644 --- a/cpp/include/paddlestructure.h +++ b/cpp/include/paddlestructure.h @@ -18,49 +18,57 @@ #include #include -namespace PaddleOCR { +namespace PaddleOCR +{ -class PaddleStructure : public PPOCR { -public: - explicit PaddleStructure(); - ~PaddleStructure(); + class PaddleStructure : public PPOCR + { + public: + explicit PaddleStructure(); + ~PaddleStructure() = default; - std::vector structure(cv::Mat img, - bool layout = false, - bool table = true, - bool ocr = false); + std::vector structure(cv::Mat img, + bool layout = false, + bool table = true, + bool ocr = false); - void reset_timer(); - void benchmark_log(int img_num); + void reset_timer(); + void benchmark_log(int img_num); -private: - std::vector time_info_table = {0, 0, 0}; - std::vector time_info_layout = {0, 0, 0}; + private: + std::vector time_info_table = {0, 0, 0}; + std::vector time_info_layout = {0, 0, 0}; - StructureTableRecognizer *table_model_ = nullptr; - StructureLayoutRecognizer *layout_model_ = nullptr; + std::unique_ptr table_model_; + std::unique_ptr layout_model_; - void layout(cv::Mat img, - std::vector &structure_result); + void layout(cv::Mat img, + std::vector &structure_result); - void table(cv::Mat img, StructurePredictResult &structure_result); + void table(cv::Mat img, StructurePredictResult &structure_result); - std::string rebuild_table(std::vector rec_html_tags, - std::vector> rec_boxes, - std::vector &ocr_result); + std::string rebuild_table(std::vector rec_html_tags, + std::vector> rec_boxes, + std::vector &ocr_result); - float dis(std::vector &box1, std::vector &box2); + float dis(std::vector &box1, std::vector &box2); - static bool comparison_dis(const std::vector &dis1, - const std::vector &dis2) { - if (dis1[1] < dis2[1]) { - return true; - } else if (dis1[1] == dis2[1]) { - return dis1[0] < dis2[0]; - } else { - return false; - } - } -}; + static bool comparison_dis(const std::vector &dis1, + const std::vector &dis2) + { + if (dis1[1] < dis2[1]) + { + return true; + } + else if (dis1[1] == dis2[1]) + { + return dis1[0] < dis2[0]; + } + else + { + return false; + } + } + }; } // namespace PaddleOCR \ No newline at end of file diff --git a/cpp/include/task.h b/cpp/include/task.h index 25c55e7..5f8f624 100644 --- a/cpp/include/task.h +++ b/cpp/include/task.h @@ -46,18 +46,18 @@ namespace PaddleOCR #endif // base64读图,失败 -#define CODE_ERR_BASE64_DECODE 300 // base64字符串解析为string失败 +#define CODE_ERR_BASE64_DECODE 300 // base64字符串解析为string失败 #define MSG_ERR_BASE64_DECODE "Base64 decode failed." -#define CODE_ERR_BASE64_IM_DECODE 301 // base64字符串解析成功,但读取到的内容无法被opencv解码 +#define CODE_ERR_BASE64_IM_DECODE 301 // base64字符串解析成功,但读取到的内容无法被opencv解码 #define MSG_ERR_BASE64_IM_DECODE "Base64 data imdecode failed." // json相关 -#define CODE_ERR_JSON_DUMP 400 // json对象 转字符串失败 +#define CODE_ERR_JSON_DUMP 400 // json对象 转字符串失败 #define MSG_ERR_JSON_DUMP "Json dump failed." -#define CODE_ERR_JSON_PARSE 401 // json字符串 转对象失败 +#define CODE_ERR_JSON_PARSE 401 // json字符串 转对象失败 #define MSG_ERR_JSON_PARSE "Json parse failed." -#define CODE_ERR_JSON_PARSE_KEY 402 // json对象 解析某个键时失败 +#define CODE_ERR_JSON_PARSE_KEY 402 // json对象 解析某个键时失败 #define MSG_ERR_JSON_PARSE_KEY(k) "Json parse key [" + k + "] failed." -#define CODE_ERR_NO_TASK 403 // 未发现有效任务 +#define CODE_ERR_NO_TASK 403 // 未发现有效任务 #define MSG_ERR_NO_TASK "No valid tasks." // ==================== 任务调用类 ==================== @@ -68,37 +68,36 @@ namespace PaddleOCR int ocr(); // OCR图片 private: - bool is_exit = false; // 为true时退出任务循环 - PPOCR *ppocr; // OCR引擎指针 - int t_code; // 本轮任务状态码 - std::string t_msg; // 本轮任务状态消息 + bool is_exit = false; // 为true时退出任务循环 + std::unique_ptr ppocr; // OCR引擎智能指针 + int t_code; // 本轮任务状态码 + std::string t_msg; // 本轮任务状态消息 - private: // 任务流程 std::string run_ocr(std::string); // 输入用户传入值(字符串),返回结果json字符串 - int single_image_mode(); // 单次识别模式 - int socket_mode(); // 套接字模式 - int anonymous_pipe_mode(); // 匿名管道模式 - - // 输出相关 - void set_state(int code = CODE_INIT, std::string msg = ""); // 设置状态 - std::string get_state_json(int code = CODE_INIT, std::string msg = ""); // 获取状态json字符串 - std::string get_ocr_result_json(const std::vector &); // 传入OCR结果,返回json字符串 - - // 输入相关 - std::string json_dump(nlohmann::json); // json对象转字符串 - cv::Mat imread_json(std::string &); // 输入json字符串,解析json并返回图片Mat - cv::Mat imread_u8(std::string path, int flag = cv::IMREAD_COLOR); // 代替cv imread,输入utf-8字符串,返回Mat。失败时设置错误码,并返回空Mat。 - cv::Mat imread_clipboard(int flag = cv::IMREAD_COLOR); // 从当前剪贴板中读取图片 - cv::Mat imread_base64(std::string&, int flag = cv::IMREAD_COLOR); // 输入base64编码的字符串,返回Mat + int single_image_mode(); // 单次识别模式 + int socket_mode(); // 套接字模式 + int anonymous_pipe_mode(); // 匿名管道模式 + + // 输出相关 + void set_state(int code = CODE_INIT, std::string msg = ""); // 设置状态 + std::string get_state_json(int code = CODE_INIT, std::string msg = ""); // 获取状态json字符串 + std::string get_ocr_result_json(const std::vector &); // 传入OCR结果,返回json字符串 + + // 输入相关 + std::string json_dump(nlohmann::json); // json对象转字符串 + cv::Mat imread_json(std::string &); // 输入json字符串,解析json并返回图片Mat + cv::Mat imread_u8(std::string path, int flag = cv::IMREAD_COLOR); // 代替cv imread,输入utf-8字符串,返回Mat。失败时设置错误码,并返回空Mat。 + cv::Mat imread_clipboard(int flag = cv::IMREAD_COLOR); // 从当前剪贴板中读取图片 + cv::Mat imread_base64(std::string &, int flag = cv::IMREAD_COLOR); // 输入base64编码的字符串,返回Mat #ifdef _WIN32 - cv::Mat imread_wstr(std::wstring pathW, int flags = cv::IMREAD_COLOR); // 输入unicode wstring字符串,返回Mat。 + cv::Mat imread_wstr(std::wstring pathW, int flags = cv::IMREAD_COLOR); // 输入unicode wstring字符串,返回Mat。 #endif - + // 其他 - + // ipv4 地址转 uint32_t - int addr_to_uint32(const std::string& addr, uint32_t& addr_out); + int addr_to_uint32(const std::string &addr, uint32_t &addr_out); }; } // namespace PaddleOCR diff --git a/cpp/src/main.cpp b/cpp/src/main.cpp index ba86109..02008da 100644 --- a/cpp/src/main.cpp +++ b/cpp/src/main.cpp @@ -13,7 +13,7 @@ // limitations under the License. // 版本信息 -#define PROJECT_VER "v1.4.0" +#define PROJECT_VER "v1.4.1 dev" #define PROJECT_NAME "PaddleOCR-json " PROJECT_VER #include "opencv2/core.hpp" @@ -31,7 +31,7 @@ using namespace PaddleOCR; void structure(std::vector &cv_all_img_names) { - PaddleOCR::PaddleStructure engine = PaddleOCR::PaddleStructure(); + PaddleOCR::PaddleStructure engine; if (FLAGS_benchmark) { diff --git a/cpp/src/ocr_cls.cpp b/cpp/src/ocr_cls.cpp index 13a03d6..c4107d7 100644 --- a/cpp/src/ocr_cls.cpp +++ b/cpp/src/ocr_cls.cpp @@ -14,133 +14,147 @@ #include -namespace PaddleOCR { - -void Classifier::Run(std::vector img_list, - std::vector &cls_labels, - std::vector &cls_scores, - std::vector ×) { - std::chrono::duration preprocess_diff = - std::chrono::steady_clock::now() - std::chrono::steady_clock::now(); - std::chrono::duration inference_diff = - std::chrono::steady_clock::now() - std::chrono::steady_clock::now(); - std::chrono::duration postprocess_diff = - std::chrono::steady_clock::now() - std::chrono::steady_clock::now(); - - int img_num = img_list.size(); - std::vector cls_image_shape = {3, 48, 192}; - for (int beg_img_no = 0; beg_img_no < img_num; - beg_img_no += this->cls_batch_num_) { - auto preprocess_start = std::chrono::steady_clock::now(); - int end_img_no = std::min(img_num, beg_img_no + this->cls_batch_num_); - int batch_num = end_img_no - beg_img_no; - // preprocess - std::vector norm_img_batch; - for (int ino = beg_img_no; ino < end_img_no; ino++) { - cv::Mat srcimg; - img_list[ino].copyTo(srcimg); - cv::Mat resize_img; - this->resize_op_.Run(srcimg, resize_img, this->use_tensorrt_, - cls_image_shape); - - this->normalize_op_.Run(&resize_img, this->mean_, this->scale_, - this->is_scale_); - if (resize_img.cols < cls_image_shape[2]) { - cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, - cls_image_shape[2] - resize_img.cols, - cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); - } - norm_img_batch.push_back(resize_img); +namespace PaddleOCR +{ + + void Classifier::Run(std::vector img_list, + std::vector &cls_labels, + std::vector &cls_scores, + std::vector ×) + { + std::chrono::duration preprocess_diff = std::chrono::duration::zero(); + std::chrono::duration inference_diff = std::chrono::duration::zero(); + std::chrono::duration postprocess_diff = std::chrono::duration::zero(); + + int img_num = img_list.size(); + std::vector cls_image_shape = {3, 48, 192}; + for (int beg_img_no = 0; beg_img_no < img_num; + beg_img_no += this->cls_batch_num_) + { + auto preprocess_start = std::chrono::steady_clock::now(); + int end_img_no = std::min(img_num, beg_img_no + this->cls_batch_num_); + int batch_num = end_img_no - beg_img_no; + // preprocess + std::vector norm_img_batch; + for (int ino = beg_img_no; ino < end_img_no; ino++) + { + cv::Mat srcimg; + img_list[ino].copyTo(srcimg); + cv::Mat resize_img; + this->resize_op_.Run(srcimg, resize_img, this->use_tensorrt_, + cls_image_shape); + + this->normalize_op_.Run(&resize_img, this->mean_, this->scale_, + this->is_scale_); + if (resize_img.cols < cls_image_shape[2]) + { + cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, + cls_image_shape[2] - resize_img.cols, + cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); + } + norm_img_batch.push_back(resize_img); + } + std::vector input(batch_num * cls_image_shape[0] * + cls_image_shape[1] * cls_image_shape[2], + 0.0f); + this->permute_op_.Run(norm_img_batch, input.data()); + auto preprocess_end = std::chrono::steady_clock::now(); + preprocess_diff += preprocess_end - preprocess_start; + + // inference. + auto input_names = this->predictor_->GetInputNames(); + auto input_t = this->predictor_->GetInputHandle(input_names[0]); + input_t->Reshape({batch_num, cls_image_shape[0], cls_image_shape[1], + cls_image_shape[2]}); + auto inference_start = std::chrono::steady_clock::now(); + input_t->CopyFromCpu(input.data()); + this->predictor_->Run(); + + std::vector predict_batch; + auto output_names = this->predictor_->GetOutputNames(); + auto output_t = this->predictor_->GetOutputHandle(output_names[0]); + auto predict_shape = output_t->shape(); + + int out_num = std::accumulate(predict_shape.begin(), predict_shape.end(), 1, + std::multiplies()); + predict_batch.resize(out_num); + + output_t->CopyToCpu(predict_batch.data()); + auto inference_end = std::chrono::steady_clock::now(); + inference_diff += inference_end - inference_start; + + // postprocess + auto postprocess_start = std::chrono::steady_clock::now(); + for (int batch_idx = 0; batch_idx < predict_shape[0]; batch_idx++) + { + int label = int( + Utility::argmax(&predict_batch[batch_idx * predict_shape[1]], + &predict_batch[(batch_idx + 1) * predict_shape[1]])); + float score = float(*std::max_element( + &predict_batch[batch_idx * predict_shape[1]], + &predict_batch[(batch_idx + 1) * predict_shape[1]])); + cls_labels[beg_img_no + batch_idx] = label; + cls_scores[beg_img_no + batch_idx] = score; + } + auto postprocess_end = std::chrono::steady_clock::now(); + postprocess_diff += postprocess_end - postprocess_start; + } + times.push_back(double(preprocess_diff.count() * 1000)); + times.push_back(double(inference_diff.count() * 1000)); + times.push_back(double(postprocess_diff.count() * 1000)); } - std::vector input(batch_num * cls_image_shape[0] * - cls_image_shape[1] * cls_image_shape[2], - 0.0f); - this->permute_op_.Run(norm_img_batch, input.data()); - auto preprocess_end = std::chrono::steady_clock::now(); - preprocess_diff += preprocess_end - preprocess_start; - - // inference. - auto input_names = this->predictor_->GetInputNames(); - auto input_t = this->predictor_->GetInputHandle(input_names[0]); - input_t->Reshape({batch_num, cls_image_shape[0], cls_image_shape[1], - cls_image_shape[2]}); - auto inference_start = std::chrono::steady_clock::now(); - input_t->CopyFromCpu(input.data()); - this->predictor_->Run(); - - std::vector predict_batch; - auto output_names = this->predictor_->GetOutputNames(); - auto output_t = this->predictor_->GetOutputHandle(output_names[0]); - auto predict_shape = output_t->shape(); - - int out_num = std::accumulate(predict_shape.begin(), predict_shape.end(), 1, - std::multiplies()); - predict_batch.resize(out_num); - - output_t->CopyToCpu(predict_batch.data()); - auto inference_end = std::chrono::steady_clock::now(); - inference_diff += inference_end - inference_start; - - // postprocess - auto postprocess_start = std::chrono::steady_clock::now(); - for (int batch_idx = 0; batch_idx < predict_shape[0]; batch_idx++) { - int label = int( - Utility::argmax(&predict_batch[batch_idx * predict_shape[1]], - &predict_batch[(batch_idx + 1) * predict_shape[1]])); - float score = float(*std::max_element( - &predict_batch[batch_idx * predict_shape[1]], - &predict_batch[(batch_idx + 1) * predict_shape[1]])); - cls_labels[beg_img_no + batch_idx] = label; - cls_scores[beg_img_no + batch_idx] = score; - } - auto postprocess_end = std::chrono::steady_clock::now(); - postprocess_diff += postprocess_end - postprocess_start; - } - times.push_back(double(preprocess_diff.count() * 1000)); - times.push_back(double(inference_diff.count() * 1000)); - times.push_back(double(postprocess_diff.count() * 1000)); -} - -void Classifier::LoadModel(const std::string &model_dir) { - paddle_infer::Config config; - config.SetModel(model_dir + "/inference.pdmodel", - model_dir + "/inference.pdiparams"); - - if (this->use_gpu_) { - config.EnableUseGpu(this->gpu_mem_, this->gpu_id_); - if (this->use_tensorrt_) { - auto precision = paddle_infer::Config::Precision::kFloat32; - if (this->precision_ == "fp16") { - precision = paddle_infer::Config::Precision::kHalf; - } - if (this->precision_ == "int8") { - precision = paddle_infer::Config::Precision::kInt8; - } - config.EnableTensorRtEngine(1 << 20, 10, 3, precision, false, false); - if (!Utility::PathExists("./trt_cls_shape.txt")) { - config.CollectShapeRangeInfo("./trt_cls_shape.txt"); - } else { - config.EnableTunedTensorRtDynamicShape("./trt_cls_shape.txt", true); - } - } - } else { - config.DisableGpu(); - if (this->use_mkldnn_) { - config.EnableMKLDNN(); - } - config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_); - } - - // false for zero copy tensor - config.SwitchUseFeedFetchOps(false); - // true for multiple input - config.SwitchSpecifyInputNames(true); - config.SwitchIrOptim(true); - - config.EnableMemoryOptim(); - config.DisableGlogInfo(); - - this->predictor_ = paddle_infer::CreatePredictor(config); -} + void Classifier::LoadModel(const std::string &model_dir) + { + paddle_infer::Config config; + config.SetModel(model_dir + "/inference.pdmodel", + model_dir + "/inference.pdiparams"); + + if (this->use_gpu_) + { + config.EnableUseGpu(this->gpu_mem_, this->gpu_id_); + if (this->use_tensorrt_) + { + auto precision = paddle_infer::Config::Precision::kFloat32; + if (this->precision_ == "fp16") + { + precision = paddle_infer::Config::Precision::kHalf; + } + if (this->precision_ == "int8") + { + precision = paddle_infer::Config::Precision::kInt8; + } + config.EnableTensorRtEngine(1 << 20, 10, 3, precision, false, false); + if (!Utility::PathExists("./trt_cls_shape.txt")) + { + config.CollectShapeRangeInfo("./trt_cls_shape.txt"); + } + else + { + config.EnableTunedTensorRtDynamicShape("./trt_cls_shape.txt", true); + } + } + } + else + { + config.DisableGpu(); + if (this->use_mkldnn_) + { + config.EnableMKLDNN(); + } + config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_); + } + + // false for zero copy tensor + config.SwitchUseFeedFetchOps(false); + // true for multiple input + config.SwitchSpecifyInputNames(true); + + config.SwitchIrOptim(true); + + config.EnableMemoryOptim(); + config.DisableGlogInfo(); + + this->predictor_ = paddle_infer::CreatePredictor(config); + } } // namespace PaddleOCR diff --git a/cpp/src/ocr_rec.cpp b/cpp/src/ocr_rec.cpp index 729dffa..c90ff58 100644 --- a/cpp/src/ocr_rec.cpp +++ b/cpp/src/ocr_rec.cpp @@ -14,165 +14,183 @@ #include -namespace PaddleOCR { - -void CRNNRecognizer::Run(std::vector img_list, - std::vector &rec_texts, - std::vector &rec_text_scores, - std::vector ×) { - std::chrono::duration preprocess_diff = - std::chrono::steady_clock::now() - std::chrono::steady_clock::now(); - std::chrono::duration inference_diff = - std::chrono::steady_clock::now() - std::chrono::steady_clock::now(); - std::chrono::duration postprocess_diff = - std::chrono::steady_clock::now() - std::chrono::steady_clock::now(); - - int img_num = img_list.size(); - std::vector width_list; - for (int i = 0; i < img_num; i++) { - width_list.push_back(float(img_list[i].cols) / img_list[i].rows); - } - std::vector indices = Utility::argsort(width_list); - - for (int beg_img_no = 0; beg_img_no < img_num; - beg_img_no += this->rec_batch_num_) { - auto preprocess_start = std::chrono::steady_clock::now(); - int end_img_no = std::min(img_num, beg_img_no + this->rec_batch_num_); - int batch_num = end_img_no - beg_img_no; - int imgH = this->rec_image_shape_[1]; - int imgW = this->rec_image_shape_[2]; - float max_wh_ratio = imgW * 1.0 / imgH; - for (int ino = beg_img_no; ino < end_img_no; ino++) { - int h = img_list[indices[ino]].rows; - int w = img_list[indices[ino]].cols; - float wh_ratio = w * 1.0 / h; - max_wh_ratio = std::max(max_wh_ratio, wh_ratio); - } - - int batch_width = imgW; - std::vector norm_img_batch; - for (int ino = beg_img_no; ino < end_img_no; ino++) { - cv::Mat srcimg; - img_list[indices[ino]].copyTo(srcimg); - cv::Mat resize_img; - this->resize_op_.Run(srcimg, resize_img, max_wh_ratio, - this->use_tensorrt_, this->rec_image_shape_); - this->normalize_op_.Run(&resize_img, this->mean_, this->scale_, - this->is_scale_); - norm_img_batch.push_back(resize_img); - batch_width = std::max(resize_img.cols, batch_width); +namespace PaddleOCR +{ + + void CRNNRecognizer::Run(std::vector img_list, + std::vector &rec_texts, + std::vector &rec_text_scores, + std::vector ×) + { + std::chrono::duration preprocess_diff = std::chrono::duration::zero(); + std::chrono::duration inference_diff = std::chrono::duration::zero(); + std::chrono::duration postprocess_diff = std::chrono::duration::zero(); + + int img_num = img_list.size(); + std::vector width_list; + for (int i = 0; i < img_num; i++) + { + width_list.push_back(float(img_list[i].cols) / img_list[i].rows); + } + std::vector indices = Utility::argsort(width_list); + + for (int beg_img_no = 0; beg_img_no < img_num; + beg_img_no += this->rec_batch_num_) + { + auto preprocess_start = std::chrono::steady_clock::now(); + int end_img_no = std::min(img_num, beg_img_no + this->rec_batch_num_); + int batch_num = end_img_no - beg_img_no; + int imgH = this->rec_image_shape_[1]; + int imgW = this->rec_image_shape_[2]; + float max_wh_ratio = imgW * 1.0 / imgH; + for (int ino = beg_img_no; ino < end_img_no; ino++) + { + int h = img_list[indices[ino]].rows; + int w = img_list[indices[ino]].cols; + float wh_ratio = w * 1.0 / h; + max_wh_ratio = std::max(max_wh_ratio, wh_ratio); + } + + int batch_width = imgW; + std::vector norm_img_batch; + for (int ino = beg_img_no; ino < end_img_no; ino++) + { + cv::Mat srcimg; + img_list[indices[ino]].copyTo(srcimg); + cv::Mat resize_img; + this->resize_op_.Run(srcimg, resize_img, max_wh_ratio, + this->use_tensorrt_, this->rec_image_shape_); + this->normalize_op_.Run(&resize_img, this->mean_, this->scale_, + this->is_scale_); + norm_img_batch.push_back(resize_img); + batch_width = std::max(resize_img.cols, batch_width); + } + + std::vector input(batch_num * 3 * imgH * batch_width, 0.0f); + this->permute_op_.Run(norm_img_batch, input.data()); + auto preprocess_end = std::chrono::steady_clock::now(); + preprocess_diff += preprocess_end - preprocess_start; + // Inference. + auto input_names = this->predictor_->GetInputNames(); + auto input_t = this->predictor_->GetInputHandle(input_names[0]); + input_t->Reshape({batch_num, 3, imgH, batch_width}); + auto inference_start = std::chrono::steady_clock::now(); + input_t->CopyFromCpu(input.data()); + this->predictor_->Run(); + + std::vector predict_batch; + auto output_names = this->predictor_->GetOutputNames(); + auto output_t = this->predictor_->GetOutputHandle(output_names[0]); + auto predict_shape = output_t->shape(); + + int out_num = std::accumulate(predict_shape.begin(), predict_shape.end(), 1, + std::multiplies()); + predict_batch.resize(out_num); + // predict_batch is the result of Last FC with softmax + output_t->CopyToCpu(predict_batch.data()); + auto inference_end = std::chrono::steady_clock::now(); + inference_diff += inference_end - inference_start; + // ctc decode + auto postprocess_start = std::chrono::steady_clock::now(); + for (int m = 0; m < predict_shape[0]; m++) + { + std::string str_res; + int argmax_idx; + int last_index = 0; + float score = 0.f; + int count = 0; + float max_value = 0.0f; + + for (int n = 0; n < predict_shape[1]; n++) + { + // get idx + argmax_idx = int(Utility::argmax( + &predict_batch[(m * predict_shape[1] + n) * predict_shape[2]], + &predict_batch[(m * predict_shape[1] + n + 1) * predict_shape[2]])); + // get score + max_value = float(*std::max_element( + &predict_batch[(m * predict_shape[1] + n) * predict_shape[2]], + &predict_batch[(m * predict_shape[1] + n + 1) * predict_shape[2]])); + + if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) + { + score += max_value; + count += 1; + str_res += label_list_[argmax_idx]; + } + last_index = argmax_idx; + } + score /= count; + if (std::isnan(score)) + { + continue; + } + rec_texts[indices[beg_img_no + m]] = str_res; + rec_text_scores[indices[beg_img_no + m]] = score; + } + auto postprocess_end = std::chrono::steady_clock::now(); + postprocess_diff += postprocess_end - postprocess_start; + } + times.push_back(double(preprocess_diff.count() * 1000)); + times.push_back(double(inference_diff.count() * 1000)); + times.push_back(double(postprocess_diff.count() * 1000)); } - std::vector input(batch_num * 3 * imgH * batch_width, 0.0f); - this->permute_op_.Run(norm_img_batch, input.data()); - auto preprocess_end = std::chrono::steady_clock::now(); - preprocess_diff += preprocess_end - preprocess_start; - // Inference. - auto input_names = this->predictor_->GetInputNames(); - auto input_t = this->predictor_->GetInputHandle(input_names[0]); - input_t->Reshape({batch_num, 3, imgH, batch_width}); - auto inference_start = std::chrono::steady_clock::now(); - input_t->CopyFromCpu(input.data()); - this->predictor_->Run(); - - std::vector predict_batch; - auto output_names = this->predictor_->GetOutputNames(); - auto output_t = this->predictor_->GetOutputHandle(output_names[0]); - auto predict_shape = output_t->shape(); - - int out_num = std::accumulate(predict_shape.begin(), predict_shape.end(), 1, - std::multiplies()); - predict_batch.resize(out_num); - // predict_batch is the result of Last FC with softmax - output_t->CopyToCpu(predict_batch.data()); - auto inference_end = std::chrono::steady_clock::now(); - inference_diff += inference_end - inference_start; - // ctc decode - auto postprocess_start = std::chrono::steady_clock::now(); - for (int m = 0; m < predict_shape[0]; m++) { - std::string str_res; - int argmax_idx; - int last_index = 0; - float score = 0.f; - int count = 0; - float max_value = 0.0f; - - for (int n = 0; n < predict_shape[1]; n++) { - // get idx - argmax_idx = int(Utility::argmax( - &predict_batch[(m * predict_shape[1] + n) * predict_shape[2]], - &predict_batch[(m * predict_shape[1] + n + 1) * predict_shape[2]])); - // get score - max_value = float(*std::max_element( - &predict_batch[(m * predict_shape[1] + n) * predict_shape[2]], - &predict_batch[(m * predict_shape[1] + n + 1) * predict_shape[2]])); - - if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) { - score += max_value; - count += 1; - str_res += label_list_[argmax_idx]; + void CRNNRecognizer::LoadModel(const std::string &model_dir) + { + paddle_infer::Config config; + config.SetModel(model_dir + "/inference.pdmodel", + model_dir + "/inference.pdiparams"); + if (this->use_gpu_) + { + config.EnableUseGpu(this->gpu_mem_, this->gpu_id_); + if (this->use_tensorrt_) + { + auto precision = paddle_infer::Config::Precision::kFloat32; + if (this->precision_ == "fp16") + { + precision = paddle_infer::Config::Precision::kHalf; + } + if (this->precision_ == "int8") + { + precision = paddle_infer::Config::Precision::kInt8; + } + if (!Utility::PathExists("./trt_rec_shape.txt")) + { + config.CollectShapeRangeInfo("./trt_rec_shape.txt"); + } + else + { + config.EnableTunedTensorRtDynamicShape("./trt_rec_shape.txt", true); + } + } + } + else + { + config.DisableGpu(); + if (this->use_mkldnn_) + { + config.EnableMKLDNN(); + // cache 10 different shapes for mkldnn to avoid memory leak + config.SetMkldnnCacheCapacity(10); + } + config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_); } - last_index = argmax_idx; - } - score /= count; - if (std::isnan(score)) { - continue; - } - rec_texts[indices[beg_img_no + m]] = str_res; - rec_text_scores[indices[beg_img_no + m]] = score; - } - auto postprocess_end = std::chrono::steady_clock::now(); - postprocess_diff += postprocess_end - postprocess_start; - } - times.push_back(double(preprocess_diff.count() * 1000)); - times.push_back(double(inference_diff.count() * 1000)); - times.push_back(double(postprocess_diff.count() * 1000)); -} - -void CRNNRecognizer::LoadModel(const std::string &model_dir) { - paddle_infer::Config config; - config.SetModel(model_dir + "/inference.pdmodel", - model_dir + "/inference.pdiparams"); - if (this->use_gpu_) { - config.EnableUseGpu(this->gpu_mem_, this->gpu_id_); - if (this->use_tensorrt_) { - auto precision = paddle_infer::Config::Precision::kFloat32; - if (this->precision_ == "fp16") { - precision = paddle_infer::Config::Precision::kHalf; - } - if (this->precision_ == "int8") { - precision = paddle_infer::Config::Precision::kInt8; - } - if (!Utility::PathExists("./trt_rec_shape.txt")) { - config.CollectShapeRangeInfo("./trt_rec_shape.txt"); - } else { - config.EnableTunedTensorRtDynamicShape("./trt_rec_shape.txt", true); - } - } - } else { - config.DisableGpu(); - if (this->use_mkldnn_) { - config.EnableMKLDNN(); - // cache 10 different shapes for mkldnn to avoid memory leak - config.SetMkldnnCacheCapacity(10); - } - config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_); - } - // get pass_builder object - auto pass_builder = config.pass_builder(); - // delete "matmul_transpose_reshape_fuse_pass" - pass_builder->DeletePass("matmul_transpose_reshape_fuse_pass"); - config.SwitchUseFeedFetchOps(false); - // true for multiple input - config.SwitchSpecifyInputNames(true); + // get pass_builder object + auto pass_builder = config.pass_builder(); + // delete "matmul_transpose_reshape_fuse_pass" + pass_builder->DeletePass("matmul_transpose_reshape_fuse_pass"); + config.SwitchUseFeedFetchOps(false); + // true for multiple input + config.SwitchSpecifyInputNames(true); - config.SwitchIrOptim(true); + config.SwitchIrOptim(true); - config.EnableMemoryOptim(); - config.DisableGlogInfo(); + config.EnableMemoryOptim(); + config.DisableGlogInfo(); - this->predictor_ = paddle_infer::CreatePredictor(config); -} + this->predictor_ = paddle_infer::CreatePredictor(config); + } } // namespace PaddleOCR diff --git a/cpp/src/paddleocr.cpp b/cpp/src/paddleocr.cpp index f0afb0f..59bf205 100644 --- a/cpp/src/paddleocr.cpp +++ b/cpp/src/paddleocr.cpp @@ -24,30 +24,31 @@ namespace PaddleOCR { if (FLAGS_det) { - this->detector_ = new DBDetector( + // 使用智能指针,创建一个新的 DBDetector 对象,并将其管理权转移给 detector_ + this->detector_.reset(new DBDetector( FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_limit_type, FLAGS_limit_side_len, FLAGS_det_db_thresh, FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio, FLAGS_det_db_score_mode, FLAGS_use_dilation, - FLAGS_use_tensorrt, FLAGS_precision); + FLAGS_use_tensorrt, FLAGS_precision)); } if (FLAGS_cls && FLAGS_use_angle_cls) { - this->classifier_ = new Classifier( + this->classifier_.reset(new Classifier( FLAGS_cls_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_cls_thresh, - FLAGS_use_tensorrt, FLAGS_precision, FLAGS_cls_batch_num); + FLAGS_use_tensorrt, FLAGS_precision, FLAGS_cls_batch_num)); } if (FLAGS_rec) { - this->recognizer_ = new CRNNRecognizer( + this->recognizer_.reset(new CRNNRecognizer( FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_rec_char_dict_path, FLAGS_use_tensorrt, FLAGS_precision, FLAGS_rec_batch_num, - FLAGS_rec_img_h, FLAGS_rec_img_w); + FLAGS_rec_img_h, FLAGS_rec_img_w)); } - }; + } std::vector> // 对一批Mat列表进行OCR PPOCR::ocr(std::vector img_list, bool det, bool rec, bool cls) @@ -58,7 +59,7 @@ namespace PaddleOCR { // 不需要det的流程 std::vector ocr_result; ocr_result.resize(img_list.size()); - if (cls && this->classifier_ != nullptr) + if (cls && this->classifier_) { this->cls(img_list, ocr_result); for (int i = 0; i < img_list.size(); i++) @@ -103,8 +104,8 @@ namespace PaddleOCR // det if (det) { - this->det(img, ocr_result); // 取det结果 - // 按det结果,裁切图片 + this->det(img, ocr_result); // 取det结果 + // 按det结果,裁切图片 for (int j = 0; j < ocr_result.size(); j++) { cv::Mat crop_img; @@ -114,15 +115,15 @@ namespace PaddleOCR } else { - // 创建一个box,大小与整张图片相同 - std::vector> box = { {0, 0}, {img.cols - 1, 0}, {img.cols - 1, img.rows - 1}, {0, img.rows - 1} }; + // 创建一个box,大小与整张图片相同 + std::vector> box = {{0, 0}, {img.cols - 1, 0}, {img.cols - 1, img.rows - 1}, {0, img.rows - 1}}; OCRPredictResult res; res.box = box; ocr_result.push_back(res); img_list.push_back(img); } // cls - if (cls && this->classifier_ != nullptr) + if (cls && this->classifier_) { this->cls(img_list, ocr_result); for (int i = 0; i < img_list.size(); i++) @@ -235,20 +236,4 @@ namespace PaddleOCR } } - PPOCR::~PPOCR() - { - if (this->detector_ != nullptr) - { - delete this->detector_; - } - if (this->classifier_ != nullptr) - { - delete this->classifier_; - } - if (this->recognizer_ != nullptr) - { - delete this->recognizer_; - } - }; - } // namespace PaddleOCR diff --git a/cpp/src/paddlestructure.cpp b/cpp/src/paddlestructure.cpp index b2e35f8..9adc3d7 100644 --- a/cpp/src/paddlestructure.cpp +++ b/cpp/src/paddlestructure.cpp @@ -17,279 +17,321 @@ #include "auto_log/autolog.h" -namespace PaddleOCR { +namespace PaddleOCR +{ -PaddleStructure::PaddleStructure() { - if (FLAGS_layout) { - this->layout_model_ = new StructureLayoutRecognizer( - FLAGS_layout_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem, - FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_layout_dict_path, - FLAGS_use_tensorrt, FLAGS_precision, FLAGS_layout_score_threshold, - FLAGS_layout_nms_threshold); - } - if (FLAGS_table) { - this->table_model_ = new StructureTableRecognizer( - FLAGS_table_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem, - FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_table_char_dict_path, - FLAGS_use_tensorrt, FLAGS_precision, FLAGS_table_batch_num, - FLAGS_table_max_len, FLAGS_merge_no_span_structure); - } -}; + PaddleStructure::PaddleStructure() + { + if (FLAGS_layout) + { + this->layout_model_.reset(new StructureLayoutRecognizer( + FLAGS_layout_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem, + FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_layout_dict_path, + FLAGS_use_tensorrt, FLAGS_precision, FLAGS_layout_score_threshold, + FLAGS_layout_nms_threshold)); + } + if (FLAGS_table) + { + this->table_model_.reset(new StructureTableRecognizer( + FLAGS_table_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem, + FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_table_char_dict_path, + FLAGS_use_tensorrt, FLAGS_precision, FLAGS_table_batch_num, + FLAGS_table_max_len, FLAGS_merge_no_span_structure)); + } + } -std::vector -PaddleStructure::structure(cv::Mat srcimg, bool layout, bool table, bool ocr) { - cv::Mat img; - srcimg.copyTo(img); + std::vector + PaddleStructure::structure(cv::Mat srcimg, bool layout, bool table, bool ocr) + { + cv::Mat img; + srcimg.copyTo(img); - std::vector structure_results; + std::vector structure_results; - if (layout) { - this->layout(img, structure_results); - } else { - StructurePredictResult res; - res.type = "table"; - res.box = std::vector(4, 0.0); - res.box[2] = img.cols; - res.box[3] = img.rows; - structure_results.push_back(res); - } - cv::Mat roi_img; - for (int i = 0; i < structure_results.size(); i++) { - // crop image - roi_img = Utility::crop_image(img, structure_results[i].box); - if (structure_results[i].type == "table" && table) { - this->table(roi_img, structure_results[i]); - } else if (ocr) { - structure_results[i].text_res = this->ocr(roi_img, true, true, false); - } - } + if (layout) + { + this->layout(img, structure_results); + } + else + { + StructurePredictResult res; + res.type = "table"; + res.box = std::vector(4, 0.0); + res.box[2] = img.cols; + res.box[3] = img.rows; + structure_results.push_back(res); + } + cv::Mat roi_img; + for (int i = 0; i < structure_results.size(); i++) + { + // crop image + roi_img = Utility::crop_image(img, structure_results[i].box); + if (structure_results[i].type == "table" && table) + { + this->table(roi_img, structure_results[i]); + } + else if (ocr) + { + structure_results[i].text_res = this->ocr(roi_img, true, true, false); + } + } - return structure_results; -}; + return structure_results; + } -void PaddleStructure::layout( - cv::Mat img, std::vector &structure_result) { - std::vector layout_times; - this->layout_model_->Run(img, structure_result, layout_times); + void PaddleStructure::layout( + cv::Mat img, std::vector &structure_result) + { + std::vector layout_times; + this->layout_model_->Run(img, structure_result, layout_times); - this->time_info_layout[0] += layout_times[0]; - this->time_info_layout[1] += layout_times[1]; - this->time_info_layout[2] += layout_times[2]; -} + this->time_info_layout[0] += layout_times[0]; + this->time_info_layout[1] += layout_times[1]; + this->time_info_layout[2] += layout_times[2]; + } -void PaddleStructure::table(cv::Mat img, - StructurePredictResult &structure_result) { - // predict structure - std::vector> structure_html_tags; - std::vector structure_scores(1, 0); - std::vector>> structure_boxes; - std::vector structure_times; - std::vector img_list; - img_list.push_back(img); + void PaddleStructure::table(cv::Mat img, + StructurePredictResult &structure_result) + { + // predict structure + std::vector> structure_html_tags; + std::vector structure_scores(1, 0); + std::vector>> structure_boxes; + std::vector structure_times; + std::vector img_list; + img_list.push_back(img); - this->table_model_->Run(img_list, structure_html_tags, structure_scores, - structure_boxes, structure_times); + this->table_model_->Run(img_list, structure_html_tags, structure_scores, + structure_boxes, structure_times); - this->time_info_table[0] += structure_times[0]; - this->time_info_table[1] += structure_times[1]; - this->time_info_table[2] += structure_times[2]; + this->time_info_table[0] += structure_times[0]; + this->time_info_table[1] += structure_times[1]; + this->time_info_table[2] += structure_times[2]; - std::vector ocr_result; - std::string html; - int expand_pixel = 3; + std::vector ocr_result; + std::string html; + int expand_pixel = 3; - for (int i = 0; i < img_list.size(); i++) { - // det - this->det(img_list[i], ocr_result); - // crop image - std::vector rec_img_list; - std::vector ocr_box; - for (int j = 0; j < ocr_result.size(); j++) { - ocr_box = Utility::xyxyxyxy2xyxy(ocr_result[j].box); - ocr_box[0] = std::max(0, ocr_box[0] - expand_pixel); - ocr_box[1] = std::max(0, ocr_box[1] - expand_pixel), - ocr_box[2] = std::min(img_list[i].cols, ocr_box[2] + expand_pixel); - ocr_box[3] = std::min(img_list[i].rows, ocr_box[3] + expand_pixel); + for (int i = 0; i < img_list.size(); i++) + { + // det + this->det(img_list[i], ocr_result); + // crop image + std::vector rec_img_list; + std::vector ocr_box; + for (int j = 0; j < ocr_result.size(); j++) + { + ocr_box = Utility::xyxyxyxy2xyxy(ocr_result[j].box); + ocr_box[0] = std::max(0, ocr_box[0] - expand_pixel); + ocr_box[1] = std::max(0, ocr_box[1] - expand_pixel), + ocr_box[2] = std::min(img_list[i].cols, ocr_box[2] + expand_pixel); + ocr_box[3] = std::min(img_list[i].rows, ocr_box[3] + expand_pixel); - cv::Mat crop_img = Utility::crop_image(img_list[i], ocr_box); - rec_img_list.push_back(crop_img); + cv::Mat crop_img = Utility::crop_image(img_list[i], ocr_box); + rec_img_list.push_back(crop_img); + } + // rec + this->rec(rec_img_list, ocr_result); + // rebuild table + html = this->rebuild_table(structure_html_tags[i], structure_boxes[i], + ocr_result); + structure_result.html = html; + structure_result.cell_box = structure_boxes[i]; + structure_result.html_score = structure_scores[i]; + } } - // rec - this->rec(rec_img_list, ocr_result); - // rebuild table - html = this->rebuild_table(structure_html_tags[i], structure_boxes[i], - ocr_result); - structure_result.html = html; - structure_result.cell_box = structure_boxes[i]; - structure_result.html_score = structure_scores[i]; - } -}; - -std::string -PaddleStructure::rebuild_table(std::vector structure_html_tags, - std::vector> structure_boxes, - std::vector &ocr_result) { - // match text in same cell - std::vector> matched(structure_boxes.size(), - std::vector()); - std::vector ocr_box; - std::vector structure_box; - for (int i = 0; i < ocr_result.size(); i++) { - ocr_box = Utility::xyxyxyxy2xyxy(ocr_result[i].box); - ocr_box[0] -= 1; - ocr_box[1] -= 1; - ocr_box[2] += 1; - ocr_box[3] += 1; - std::vector> dis_list(structure_boxes.size(), - std::vector(3, 100000.0)); - for (int j = 0; j < structure_boxes.size(); j++) { - if (structure_boxes[i].size() == 8) { - structure_box = Utility::xyxyxyxy2xyxy(structure_boxes[j]); - } else { - structure_box = structure_boxes[j]; - } - dis_list[j][0] = this->dis(ocr_box, structure_box); - dis_list[j][1] = 1 - Utility::iou(ocr_box, structure_box); - dis_list[j][2] = j; - } - // find min dis idx - std::sort(dis_list.begin(), dis_list.end(), - PaddleStructure::comparison_dis); - matched[dis_list[0][2]].push_back(ocr_result[i].text); - } + std::string + PaddleStructure::rebuild_table(std::vector structure_html_tags, + std::vector> structure_boxes, + std::vector &ocr_result) + { + // match text in same cell + std::vector> matched(structure_boxes.size(), + std::vector()); - // get pred html - std::string html_str = ""; - int td_tag_idx = 0; - for (int i = 0; i < structure_html_tags.size(); i++) { - if (structure_html_tags[i].find("") != std::string::npos) { - if (structure_html_tags[i].find("") != std::string::npos) { - html_str += ""; - } - if (matched[td_tag_idx].size() > 0) { - bool b_with = false; - if (matched[td_tag_idx][0].find("") != std::string::npos && - matched[td_tag_idx].size() > 1) { - b_with = true; - html_str += ""; - } - for (int j = 0; j < matched[td_tag_idx].size(); j++) { - std::string content = matched[td_tag_idx][j]; - if (matched[td_tag_idx].size() > 1) { - // remove blank, and - if (content.length() > 0 && content.at(0) == ' ') { - content = content.substr(0); - } - if (content.length() > 2 && content.substr(0, 3) == "") { - content = content.substr(3); + std::vector ocr_box; + std::vector structure_box; + for (int i = 0; i < ocr_result.size(); i++) + { + ocr_box = Utility::xyxyxyxy2xyxy(ocr_result[i].box); + ocr_box[0] -= 1; + ocr_box[1] -= 1; + ocr_box[2] += 1; + ocr_box[3] += 1; + std::vector> dis_list(structure_boxes.size(), + std::vector(3, 100000.0)); + for (int j = 0; j < structure_boxes.size(); j++) + { + if (structure_boxes[i].size() == 8) + { + structure_box = Utility::xyxyxyxy2xyxy(structure_boxes[j]); + } + else + { + structure_box = structure_boxes[j]; + } + dis_list[j][0] = this->dis(ocr_box, structure_box); + dis_list[j][1] = 1 - Utility::iou(ocr_box, structure_box); + dis_list[j][2] = j; } - if (content.length() > 4 && - content.substr(content.length() - 4) == "") { - content = content.substr(0, content.length() - 4); - } - if (content.empty()) { - continue; + // find min dis idx + std::sort(dis_list.begin(), dis_list.end(), + PaddleStructure::comparison_dis); + matched[dis_list[0][2]].push_back(ocr_result[i].text); + } + + // get pred html + std::string html_str = ""; + int td_tag_idx = 0; + for (int i = 0; i < structure_html_tags.size(); i++) + { + if (structure_html_tags[i].find("") != std::string::npos) + { + if (structure_html_tags[i].find("") != std::string::npos) + { + html_str += ""; + } + if (matched[td_tag_idx].size() > 0) + { + bool b_with = false; + if (matched[td_tag_idx][0].find("") != std::string::npos && + matched[td_tag_idx].size() > 1) + { + b_with = true; + html_str += ""; + } + for (int j = 0; j < matched[td_tag_idx].size(); j++) + { + std::string content = matched[td_tag_idx][j]; + if (matched[td_tag_idx].size() > 1) + { + // remove blank, and + if (content.length() > 0 && content.at(0) == ' ') + { + content = content.substr(0); + } + if (content.length() > 2 && content.substr(0, 3) == "") + { + content = content.substr(3); + } + if (content.length() > 4 && + content.substr(content.length() - 4) == "") + { + content = content.substr(0, content.length() - 4); + } + if (content.empty()) + { + continue; + } + // add blank + if (j != matched[td_tag_idx].size() - 1 && + content.at(content.length() - 1) != ' ') + { + content += ' '; + } + } + html_str += content; + } + if (b_with) + { + html_str += ""; + } + } + if (structure_html_tags[i].find("") != std::string::npos) + { + html_str += ""; + } + else + { + html_str += structure_html_tags[i]; + } + td_tag_idx += 1; } - // add blank - if (j != matched[td_tag_idx].size() - 1 && - content.at(content.length() - 1) != ' ') { - content += ' '; + else + { + html_str += structure_html_tags[i]; } - } - html_str += content; - } - if (b_with) { - html_str += ""; } - } - if (structure_html_tags[i].find("") != std::string::npos) { - html_str += ""; - } else { - html_str += structure_html_tags[i]; - } - td_tag_idx += 1; - } else { - html_str += structure_html_tags[i]; + return html_str; } - } - return html_str; -} -float PaddleStructure::dis(std::vector &box1, std::vector &box2) { - int x1_1 = box1[0]; - int y1_1 = box1[1]; - int x2_1 = box1[2]; - int y2_1 = box1[3]; + float PaddleStructure::dis(std::vector &box1, std::vector &box2) + { + int x1_1 = box1[0]; + int y1_1 = box1[1]; + int x2_1 = box1[2]; + int y2_1 = box1[3]; - int x1_2 = box2[0]; - int y1_2 = box2[1]; - int x2_2 = box2[2]; - int y2_2 = box2[3]; + int x1_2 = box2[0]; + int y1_2 = box2[1]; + int x2_2 = box2[2]; + int y2_2 = box2[3]; - float dis = - abs(x1_2 - x1_1) + abs(y1_2 - y1_1) + abs(x2_2 - x2_1) + abs(y2_2 - y2_1); - float dis_2 = abs(x1_2 - x1_1) + abs(y1_2 - y1_1); - float dis_3 = abs(x2_2 - x2_1) + abs(y2_2 - y2_1); - return dis + std::min(dis_2, dis_3); -} - -void PaddleStructure::reset_timer() { - this->time_info_det = {0, 0, 0}; - this->time_info_rec = {0, 0, 0}; - this->time_info_cls = {0, 0, 0}; - this->time_info_table = {0, 0, 0}; - this->time_info_layout = {0, 0, 0}; -} + float dis = + abs(x1_2 - x1_1) + abs(y1_2 - y1_1) + abs(x2_2 - x2_1) + abs(y2_2 - y2_1); + float dis_2 = abs(x1_2 - x1_1) + abs(y1_2 - y1_1); + float dis_3 = abs(x2_2 - x2_1) + abs(y2_2 - y2_1); + return dis + std::min(dis_2, dis_3); + } -void PaddleStructure::benchmark_log(int img_num) { - if (this->time_info_det[0] + this->time_info_det[1] + this->time_info_det[2] > - 0) { - AutoLogger autolog_det("ocr_det", FLAGS_use_gpu, FLAGS_use_tensorrt, - FLAGS_enable_mkldnn, FLAGS_cpu_threads, 1, "dynamic", - FLAGS_precision, this->time_info_det, img_num); - autolog_det.report(); - } - if (this->time_info_rec[0] + this->time_info_rec[1] + this->time_info_rec[2] > - 0) { - AutoLogger autolog_rec("ocr_rec", FLAGS_use_gpu, FLAGS_use_tensorrt, - FLAGS_enable_mkldnn, FLAGS_cpu_threads, - FLAGS_rec_batch_num, "dynamic", FLAGS_precision, - this->time_info_rec, img_num); - autolog_rec.report(); - } - if (this->time_info_cls[0] + this->time_info_cls[1] + this->time_info_cls[2] > - 0) { - AutoLogger autolog_cls("ocr_cls", FLAGS_use_gpu, FLAGS_use_tensorrt, - FLAGS_enable_mkldnn, FLAGS_cpu_threads, - FLAGS_cls_batch_num, "dynamic", FLAGS_precision, - this->time_info_cls, img_num); - autolog_cls.report(); - } - if (this->time_info_table[0] + this->time_info_table[1] + - this->time_info_table[2] > - 0) { - AutoLogger autolog_table("table", FLAGS_use_gpu, FLAGS_use_tensorrt, - FLAGS_enable_mkldnn, FLAGS_cpu_threads, - FLAGS_cls_batch_num, "dynamic", FLAGS_precision, - this->time_info_table, img_num); - autolog_table.report(); - } - if (this->time_info_layout[0] + this->time_info_layout[1] + - this->time_info_layout[2] > - 0) { - AutoLogger autolog_layout("layout", FLAGS_use_gpu, FLAGS_use_tensorrt, - FLAGS_enable_mkldnn, FLAGS_cpu_threads, - FLAGS_cls_batch_num, "dynamic", FLAGS_precision, - this->time_info_layout, img_num); - autolog_layout.report(); - } -} + void PaddleStructure::reset_timer() + { + this->time_info_det = {0, 0, 0}; + this->time_info_rec = {0, 0, 0}; + this->time_info_cls = {0, 0, 0}; + this->time_info_table = {0, 0, 0}; + this->time_info_layout = {0, 0, 0}; + } -PaddleStructure::~PaddleStructure() { - if (this->table_model_ != nullptr) { - delete this->table_model_; - } -}; + void PaddleStructure::benchmark_log(int img_num) + { + if (this->time_info_det[0] + this->time_info_det[1] + this->time_info_det[2] > + 0) + { + AutoLogger autolog_det("ocr_det", FLAGS_use_gpu, FLAGS_use_tensorrt, + FLAGS_enable_mkldnn, FLAGS_cpu_threads, 1, "dynamic", + FLAGS_precision, this->time_info_det, img_num); + autolog_det.report(); + } + if (this->time_info_rec[0] + this->time_info_rec[1] + this->time_info_rec[2] > + 0) + { + AutoLogger autolog_rec("ocr_rec", FLAGS_use_gpu, FLAGS_use_tensorrt, + FLAGS_enable_mkldnn, FLAGS_cpu_threads, + FLAGS_rec_batch_num, "dynamic", FLAGS_precision, + this->time_info_rec, img_num); + autolog_rec.report(); + } + if (this->time_info_cls[0] + this->time_info_cls[1] + this->time_info_cls[2] > + 0) + { + AutoLogger autolog_cls("ocr_cls", FLAGS_use_gpu, FLAGS_use_tensorrt, + FLAGS_enable_mkldnn, FLAGS_cpu_threads, + FLAGS_cls_batch_num, "dynamic", FLAGS_precision, + this->time_info_cls, img_num); + autolog_cls.report(); + } + if (this->time_info_table[0] + this->time_info_table[1] + + this->time_info_table[2] > + 0) + { + AutoLogger autolog_table("table", FLAGS_use_gpu, FLAGS_use_tensorrt, + FLAGS_enable_mkldnn, FLAGS_cpu_threads, + FLAGS_cls_batch_num, "dynamic", FLAGS_precision, + this->time_info_table, img_num); + autolog_table.report(); + } + if (this->time_info_layout[0] + this->time_info_layout[1] + + this->time_info_layout[2] > + 0) + { + AutoLogger autolog_layout("layout", FLAGS_use_gpu, FLAGS_use_tensorrt, + FLAGS_enable_mkldnn, FLAGS_cpu_threads, + FLAGS_cls_batch_num, "dynamic", FLAGS_precision, + this->time_info_layout, img_num); + autolog_layout.report(); + } + } } // namespace PaddleOCR \ No newline at end of file diff --git a/cpp/src/task.cpp b/cpp/src/task.cpp index ecc23ad..a53a0d4 100644 --- a/cpp/src/task.cpp +++ b/cpp/src/task.cpp @@ -56,8 +56,8 @@ namespace PaddleOCR j["data"] = msg; return json_dump(j); } - - // 将OCR结果转换为json字符串 + + // 将OCR结果转换为json字符串 std::string Task::get_ocr_result_json(const std::vector &ocr_result) { nlohmann::json outJ; @@ -73,13 +73,13 @@ namespace PaddleOCR // 无包围盒 if (b.empty()) { - if (FLAGS_det) // 开了det仍无包围盒,跳过本组 + if (FLAGS_det) // 开了det仍无包围盒,跳过本组 continue; - else // 未开det,填充空包围盒 + else // 未开det,填充空包围盒 for (int bi = 0; bi < 4; bi++) b.push_back(std::vector{-1, -1}); } - // 启用了rec仍没有文字,跳过本组 + // 启用了rec仍没有文字,跳过本组 if (FLAGS_rec && (j["score"] <= 0 || j["text"] == "")) { continue; @@ -88,131 +88,154 @@ namespace PaddleOCR { j["box"] = {{b[0][0], b[0][1]}, {b[1][0], b[1][1]}, {b[2][0], b[2][1]}, {b[3][0], b[3][1]}}; } - // 如果启用了cls,则cls_label有实际值,那么写入方向分类相关参数 + // 如果启用了cls,则cls_label有实际值,那么写入方向分类相关参数 if (ocr_result[i].cls_label != -1) { - j["cls_label"] = ocr_result[i].cls_label; // 方向标签,0表示顺时针0°或90°,1表示180°或270° - j["cls_score"] = ocr_result[i].cls_score; // 方向标签置信度,越接近1越可信 + j["cls_label"] = ocr_result[i].cls_label; // 方向标签,0表示顺时针0°或90°,1表示180°或270° + j["cls_score"] = ocr_result[i].cls_score; // 方向标签置信度,越接近1越可信 } outJ["data"].push_back(j); isEmpty = false; } - // 结果1:识别成功,无文字(rec未检出) + // 结果1:识别成功,无文字(rec未检出) if (isEmpty) { return ""; } - // 结果2:识别成功,有文字 + // 结果2:识别成功,有文字 return json_dump(outJ); } // 输入base64编码的字符串,返回Mat - cv::Mat Task::imread_base64(std::string& b64str, int flag) { + cv::Mat Task::imread_base64(std::string &b64str, int flag) + { std::string decoded_string; - try { + try + { decoded_string = base64_decode(b64str); } - catch (...) { - set_state(CODE_ERR_BASE64_DECODE, MSG_ERR_BASE64_DECODE); // 报告状态:解析失败 + catch (...) + { + set_state(CODE_ERR_BASE64_DECODE, MSG_ERR_BASE64_DECODE); // 报告状态:解析失败 return cv::Mat(); } - try { + try + { std::vector data(decoded_string.begin(), decoded_string.end()); cv::Mat img = cv::imdecode(data, flag); - if (img.empty()) { - set_state(CODE_ERR_BASE64_IM_DECODE, MSG_ERR_BASE64_IM_DECODE); // 报告状态:转Mat失败 + if (img.empty()) + { + set_state(CODE_ERR_BASE64_IM_DECODE, MSG_ERR_BASE64_IM_DECODE); // 报告状态:转Mat失败 } return img; } - catch (...) { - set_state(CODE_ERR_BASE64_IM_DECODE, MSG_ERR_BASE64_IM_DECODE); // 报告状态:转Mat失败 + catch (...) + { + set_state(CODE_ERR_BASE64_IM_DECODE, MSG_ERR_BASE64_IM_DECODE); // 报告状态:转Mat失败 return cv::Mat(); } } - // 输入json字符串,解析并读取Mat - cv::Mat Task::imread_json(std::string& str_in) { + // 输入json字符串,解析并读取Mat + cv::Mat Task::imread_json(std::string &str_in) + { #ifdef ENABLE_REMOTE_EXIT - if (str_in == "exit") { // 退出指令 + if (str_in == "exit") + { // 退出指令 is_exit = true; return cv::Mat(); } #endif cv::Mat img; - bool is_image_found = false; // 当前是否已找到图片 + bool is_image_found = false; // 当前是否已找到图片 std::string logstr = ""; - // 解析为json对象 + // 解析为json对象 auto j = nlohmann::json(); - try { - j = nlohmann::json::parse(str_in); // 转json对象 + try + { + j = nlohmann::json::parse(str_in); // 转json对象 } - catch (...) { - set_state(CODE_ERR_JSON_PARSE, MSG_ERR_JSON_PARSE); // 报告状态:解析失败 + catch (...) + { + set_state(CODE_ERR_JSON_PARSE, MSG_ERR_JSON_PARSE); // 报告状态:解析失败 return cv::Mat(); } - for (auto& el : j.items()) { // 遍历键值 + for (auto &el : j.items()) + { // 遍历键值 #ifdef ENABLE_REMOTE_EXIT - if (el.key() == "exit") { // 退出指令 + if (el.key() == "exit") + { // 退出指令 is_exit = true; return cv::Mat(); } #endif - try { + try + { std::string value = to_string(el.value()); int vallen = value.length(); - if (vallen > 2 && value[0] == '\"' && value[vallen - 1] == '\"') { + if (vallen > 2 && value[0] == '\"' && value[vallen - 1] == '\"') + { value = value.substr(1, vallen - 2); // 删去nlohmann字符串的两端引号 } - // 提取图片 - if (!is_image_found) { - if (el.key() == "image_base64") { // base64字符串 - FLAGS_image_path = "base64"; // 设置图片路径标记,以便于无文字时的信息输出 - img = imread_base64(value); // 读取图片 + // 提取图片 + if (!is_image_found) + { + if (el.key() == "image_base64") + { // base64字符串 + FLAGS_image_path = "base64"; // 设置图片路径标记,以便于无文字时的信息输出 + img = imread_base64(value); // 读取图片 is_image_found = true; } #ifdef ENABLE_JSON_IMAGE_PATH - else if (el.key() == "image_path") { // 图片路径 + else if (el.key() == "image_path") + { // 图片路径 FLAGS_image_path = value; - img = imread_u8(value); // 读取图片 + img = imread_u8(value); // 读取图片 is_image_found = true; } #endif } - //else {} // TODO: 其它参数热更新 + // else {} // TODO: 其它参数热更新 } - catch (...) { // 安全起见,出现未知异常时结束本轮任务 - set_state(CODE_ERR_JSON_PARSE_KEY, MSG_ERR_JSON_PARSE_KEY(el.key())); // 报告状态:解析键失败 + catch (...) + { // 安全起见,出现未知异常时结束本轮任务 + set_state(CODE_ERR_JSON_PARSE_KEY, MSG_ERR_JSON_PARSE_KEY(el.key())); // 报告状态:解析键失败 return cv::Mat(); } } - if (!is_image_found) { - set_state(CODE_ERR_NO_TASK, MSG_ERR_NO_TASK); // 报告状态:未发现有效任务 + if (!is_image_found) + { + set_state(CODE_ERR_NO_TASK, MSG_ERR_NO_TASK); // 报告状态:未发现有效任务 } return img; } - // ==================== 任务流程 ==================== - std::string Task::run_ocr(std::string str_in) { + std::string Task::run_ocr(std::string str_in) + { cv::Mat img = imread_json(str_in); - if (is_exit) { // 退出 + if (is_exit) + { // 退出 return ""; } - if (img.empty()) { // 读图失败 + if (img.empty()) + { // 读图失败 return get_state_json(); } - // 执行OCR + // 执行OCR std::vector res_ocr = ppocr->ocr(img, FLAGS_det, FLAGS_rec, FLAGS_cls); - // 获取结果 + // 获取结果 std::string res_json = get_ocr_result_json(res_ocr); // 结果1:识别成功,无文字(rec未检出) - if (res_json.empty()) { + if (res_json.empty()) + { return get_state_json(CODE_OK_NONE, MSG_OK_NONE(FLAGS_image_path)); } - // 结果2:识别成功,有文字 - else { + // 结果2:识别成功,有文字 + else + { return res_json; } } @@ -221,30 +244,33 @@ namespace PaddleOCR int Task::ocr() { // 初始化引擎 - ppocr = new PPOCR(); // 创建引擎对象 + ppocr.reset(new PPOCR()); // 创建引擎实例,管理权移交给智能指针 ppocr int flag; - + #if defined(_WIN32) && defined(ENABLE_CLIPBOARD) std::cout << "OCR clipboard enbaled." << std::endl; #endif - + // 单张图片识别模式 - if (!FLAGS_image_path.empty()){ + if (!FLAGS_image_path.empty()) + { std::cout << "OCR single image mode. Path: " << FLAGS_image_path << std::endl; flag = 1; } // 套接字服务器模式 - else if (FLAGS_port >= 0 && !FLAGS_addr.empty()) { - std::cout << "OCR socket mode. Addr: " << FLAGS_addr << ", Port: " << FLAGS_port << std::endl; + else if (FLAGS_port >= 0 && !FLAGS_addr.empty()) + { + std::cout << "OCR socket mode. Addr: " << FLAGS_addr << ", Port: " << FLAGS_port << std::endl; flag = 2; } // 匿名管道模式 - else { + else + { std::cout << "OCR anonymous pipe mode." << std::endl; flag = 3; } std::cout << "OCR init completed." << std::endl; - + switch (flag) { case 1: @@ -256,56 +282,60 @@ namespace PaddleOCR } return 0; } - - // 单张图片识别模式 + + // 单张图片识别模式 int Task::single_image_mode() { set_state(); cv::Mat img = imread_u8(FLAGS_image_path); if (img.empty()) - { // 读图失败 + { // 读图失败 std::cout << get_state_json() << std::endl; return 0; } - // 执行OCR + // 执行OCR std::vector res_ocr = ppocr->ocr(img, FLAGS_det, FLAGS_rec, FLAGS_cls); - // 获取结果 + // 获取结果 std::string res_json = get_ocr_result_json(res_ocr); // 结果1:识别成功,无文字(rec未检出) - if (res_json.empty()) { + if (res_json.empty()) + { std::cout << get_state_json(CODE_OK_NONE, MSG_OK_NONE(FLAGS_image_path)) << std::endl; } - // 结果2:识别成功,有文字 - else { + // 结果2:识别成功,有文字 + else + { std::cout << res_json << std::endl; } return 0; } - - // 匿名管道模式 - int Task::anonymous_pipe_mode() { - while (1) { - set_state(); // 初始化状态 - // 读取一行输入 + + // 匿名管道模式 + int Task::anonymous_pipe_mode() + { + while (1) + { + set_state(); // 初始化状态 + // 读取一行输入 std::string str_in; getline(std::cin, str_in); - // 获取ocr结果并输出 + // 获取ocr结果并输出 std::string str_out = run_ocr(str_in); - if (is_exit) { // 退出 + if (is_exit) + { // 退出 return 0; } std::cout << str_out << std::endl; } return 0; } - - // 套接字服务器模式,在平台内定义 - - + + // 套接字服务器模式,在平台内定义 + // 其他函数 - + // ipv4 地址转 uint32_t - int Task::addr_to_uint32(const std::string& addr, uint32_t& addr_out) + int Task::addr_to_uint32(const std::string &addr, uint32_t &addr_out) { // 处理特殊情况 if (addr == "loopback" || addr == "localhost") @@ -318,20 +348,20 @@ namespace PaddleOCR addr_out = htonl(INADDR_ANY); return 0; } - + // 使用正则表达式来处理IPv4地址 std::regex rgx(R"((\d+)\.(\d+)\.(\d+)\.(\d+))"); std::smatch matches; uint32_t output = 0; - + // 如果验证为IPv4地址,将其转成 uint32_t 主机字节序 - if(std::regex_search(addr, matches, rgx)) + if (std::regex_search(addr, matches, rgx)) { uint8_t octet; for (size_t i = 1; i < matches.size(); ++i) { octet = static_cast(std::stoi(matches[i].str())); - output |= octet << (8 * (4-i)); + output |= octet << (8 * (4 - i)); } } // 反之则报错 @@ -339,7 +369,7 @@ namespace PaddleOCR { return -1; } - + // 最后把 uint32_t 主机字节序 转成 网络字节序 addr_out = htonl(output); return 0;