From cc8c54958f0a2e6f9b9eb74964b940c7b28f46fb Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng Date: Tue, 21 Feb 2023 19:37:46 +0800 Subject: [PATCH 1/9] update ppocrv3 for rknpu2 --- FastDeploy.cmake.in | 4 +- cmake/rknpu2.cmake | 11 +- .../paddledetection/jetson/README.md | 18 + examples/vision/ocr/PP-OCRv3/rknpu2/README.md | 77 ++++ .../ocr/PP-OCRv3/rknpu2/cpp/CMakeLists.txt | 14 + .../vision/ocr/PP-OCRv3/rknpu2/cpp/README.md | 55 +++ .../ocr/PP-OCRv3/rknpu2/cpp/README_CN.md | 63 +++ .../PP-OCRv3/rknpu2/cpp/infer_static_shape.cc | 126 ++++++ .../ocr/PP-OCRv3/rknpu2/python/README.md | 49 +++ .../ocr/PP-OCRv3/rknpu2/python/README_CN.md | 62 +++ .../rknpu2/python/infer_static_shape.py | 144 +++++++ fastdeploy/vision/ocr/ppocr/classifier.cc | 35 +- .../vision/ocr/ppocr/cls_preprocessor.cc | 30 +- .../vision/ocr/ppocr/cls_preprocessor.h | 9 + fastdeploy/vision/ocr/ppocr/dbdetector.cc | 29 +- .../vision/ocr/ppocr/det_preprocessor.cc | 63 ++- .../vision/ocr/ppocr/det_preprocessor.h | 28 ++ .../vision/ocr/ppocr/ocrmodel_pybind.cc | 382 +++++++++++------- .../vision/ocr/ppocr/rec_preprocessor.cc | 50 ++- .../vision/ocr/ppocr/rec_preprocessor.h | 9 + fastdeploy/vision/ocr/ppocr/recognizer.cc | 41 +- .../fastdeploy/vision/ocr/ppocr/__init__.py | 56 ++- 22 files changed, 1117 insertions(+), 238 deletions(-) create mode 100644 examples/vision/detection/paddledetection/jetson/README.md create mode 100644 examples/vision/ocr/PP-OCRv3/rknpu2/README.md create mode 100644 examples/vision/ocr/PP-OCRv3/rknpu2/cpp/CMakeLists.txt create mode 100755 examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README.md create mode 100644 examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README_CN.md create mode 100644 examples/vision/ocr/PP-OCRv3/rknpu2/cpp/infer_static_shape.cc create mode 100755 examples/vision/ocr/PP-OCRv3/rknpu2/python/README.md create mode 100644 examples/vision/ocr/PP-OCRv3/rknpu2/python/README_CN.md create mode 100755 examples/vision/ocr/PP-OCRv3/rknpu2/python/infer_static_shape.py mode change 100755 => 100644 fastdeploy/vision/ocr/ppocr/classifier.cc mode change 100755 => 100644 fastdeploy/vision/ocr/ppocr/cls_preprocessor.cc mode change 100755 => 100644 fastdeploy/vision/ocr/ppocr/dbdetector.cc mode change 100755 => 100644 fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc mode change 100755 => 100644 fastdeploy/vision/ocr/ppocr/recognizer.cc diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in index 5c2c5b7338..f5c2dbe832 100644 --- a/FastDeploy.cmake.in +++ b/FastDeploy.cmake.in @@ -131,9 +131,9 @@ endif() if(ENABLE_RKNPU2_BACKEND) if(RKNN2_TARGET_SOC STREQUAL "RK356X") - set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/RK356X/lib/librknn_api.so) + set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/lib/librknnrt.so) elseif (RKNN2_TARGET_SOC STREQUAL "RK3588") - set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/RK3588/lib/librknn_api.so) + set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/lib/librknnrt.so) else () message(FATAL_ERROR "RKNN2_TARGET_SOC is not set, ref value: RK356X or RK3588") endif() diff --git a/cmake/rknpu2.cmake b/cmake/rknpu2.cmake index 7f11b0bfbb..3d93a364f8 100644 --- a/cmake/rknpu2.cmake +++ b/cmake/rknpu2.cmake @@ -1,7 +1,7 @@ # get RKNPU2_URL set(RKNPU2_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/") -set(RKNPU2_VERSION "1.4.0") -set(RKNPU2_FILE "rknpu2_runtime-linux-x64-${RKNPU2_VERSION}.tgz") +set(RKNPU2_VERSION "1.4.2b0") +set(RKNPU2_FILE "rknpu2_runtime-linux-aarch64-${RKNPU2_VERSION}-${RKNN2_TARGET_SOC}.tgz") set(RKNPU2_URL "${RKNPU2_URL_BASE}${RKNPU2_FILE}") # download_and_decompress @@ -10,11 +10,12 @@ download_and_decompress(${RKNPU2_URL} ${CMAKE_CURRENT_BINARY_DIR}/${RKNPU2_FILE} # set path set(RKNPU_RUNTIME_PATH ${THIRD_PARTY_PATH}/install/rknpu2_runtime) +# include lib if (EXISTS ${RKNPU_RUNTIME_PATH}) - set(RKNN_RT_LIB ${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/lib/librknnrt.so) - include_directories(${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/include) + set(RKNN_RT_LIB ${RKNPU_RUNTIME_PATH}/lib/librknnrt.so) + include_directories(${RKNPU_RUNTIME_PATH}/include) else () - message(FATAL_ERROR "[rknpu2.cmake] download_and_decompress rknpu2_runtime error") + message(FATAL_ERROR "[rknpu2.cmake] RKNPU_RUNTIME_PATH does not exist.") endif () diff --git a/examples/vision/detection/paddledetection/jetson/README.md b/examples/vision/detection/paddledetection/jetson/README.md new file mode 100644 index 0000000000..0c6e0dd3d5 --- /dev/null +++ b/examples/vision/detection/paddledetection/jetson/README.md @@ -0,0 +1,18 @@ +## Paddle Inference模型导出 + +```bash +git clone https://github.com/PaddlePaddle/PaddleDetection.git +python tools/export_model.py -c configs/solov2/solov2_r50_fpn_1x_coco.yml --output_dir=./solov2_r50_fpn_1x_coco \ + -o weights=https://paddledet.bj.bcebos.com/models/solov2_r50_fpn_1x_coco.pdparams + +``` + +## ONNX模型导出 + +```bash +paddle2onnx --model_dir solov2_r50_fpn_1x_coco/solov2_r50_fpn_1x_coco \ + --model_filename model.pdmodel \ + --params_filename model.pdiparams \ + --save_file solov2_r50_fpn_1x_coco/solov2_r50_fpn_1x_coco/solov2_r50_fpn_1x_coco.onnx \ + --enable_dev_version True +``` diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/README.md b/examples/vision/ocr/PP-OCRv3/rknpu2/README.md new file mode 100644 index 0000000000..06ba9fc6eb --- /dev/null +++ b/examples/vision/ocr/PP-OCRv3/rknpu2/README.md @@ -0,0 +1,77 @@ +# PaddleOCR 模型部署 + +## PaddleOCR为多个模型组合串联任务,包含如下几个模型构成 + +* 文本检测 `DBDetector` +* [可选]方向分类 `Classifer` 用于调整进入文字识别前的图像方向 +* 文字识别 `Recognizer` 用于从图像中识别出文字 + +根据不同场景, FastDeploy汇总提供如下OCR任务部署, 用户需同时下载3个模型与字典文件(或2个,分类器可选), 完成OCR整个预测流程 + +## PP-OCR 中英文系列模型 + +下表中的模型下载链接由PaddleOCR模型库提供, 详见[PP-OCR系列模型列表](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6/doc/doc_ch/models_list.md) + +| OCR版本 | 文本框检测 | 方向分类模型 | 文字识别 | 字典文件 | 说明 | +|:-------------------|:---------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:--------------------------------------------------------| +| ch_PP-OCRv3[推荐] | [ch_PP-OCRv3_det](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_PP-OCRv3_rec](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar) | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv3系列原始超轻量模型,支持中英文、多语种文本检测 | +| en_PP-OCRv3[推荐] | [en_PP-OCRv3_det](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [en_PP-OCRv3_rec](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar) | [en_dict.txt](https://bj.bcebos.com/paddlehub/fastdeploy/en_dict.txt) | OCRv3系列原始超轻量模型,支持英文与数字识别,除检测模型和识别模型的训练数据与中文模型不同以外,无其他区别 | +| ch_PP-OCRv2 | [ch_PP-OCRv2_det](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_PP-OCRv2_rec](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv2系列原始超轻量模型,支持中英文、多语种文本检测 | +| ch_PP-OCRv2_mobile | [ch_ppocr_mobile_v2.0_det](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_ppocr_mobile_v2.0_rec](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv2系列原始超轻量模型,支持中英文、多语种文本检测,比PPOCRv2更加轻量 | +| ch_PP-OCRv2_server | [ch_ppocr_server_v2.0_det](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_ppocr_server_v2.0_rec](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv2服务器系列模型, 支持中英文、多语种文本检测,比超轻量模型更大,但效果更好 | + +## 模型转换 + +在RKNPU2上使用PPOCR时,我们需要把Paddle静态图模型转为RKNN模型。 + +### 静态图模型转RKNN格式模型 + +rknn_toolkit2工具暂不支持直接从Paddle静态图模型直接转换为RKNN模型,因此我们需要先将Paddle静态图模型转为RKNN模型。 + +```bash +# 下载模型和字典文件 +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar +tar -xvf ch_PP-OCRv3_det_infer.tar + +wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar +tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar + +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar +tar -xvf ch_PP-OCRv3_rec_infer.tar + +# 转换模型到ONNX格式的模型 +paddle2onnx --model_dir ch_PP-OCRv3_det_infer \ + --model_filename inference.pdmodel \ + --params_filename inference.pdiparams \ + --save_file ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \ + --enable_dev_version True +paddle2onnx --model_dir ch_ppocr_mobile_v2.0_cls_infer \ + --model_filename inference.pdmodel \ + --params_filename inference.pdiparams \ + --save_file ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \ + --enable_dev_version True +paddle2onnx --model_dir ch_PP-OCRv3_rec_infer \ + --model_filename inference.pdmodel \ + --params_filename inference.pdiparams \ + --save_file ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \ + --enable_dev_version True + +# 固定模型的输入shape +python -m paddle2onnx.optimize --input_model ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \ + --output_model ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \ + --input_shape_dict "{'x':[1,3,960,960]}" +python -m paddle2onnx.optimize --input_model ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \ + --output_model ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \ + --input_shape_dict "{'x':[1,3,48,192]}" +python -m paddle2onnx.optimize --input_model ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \ + --output_model ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \ + --input_shape_dict "{'x':[1,3,48,320]}" + +# 转换ONNX模型到RKNN模型 +python tools/rknpu2/export.py --config_path tools/rknpu2/config/ppocrv3_det.yaml \ + --target_platform rk3588 +python tools/rknpu2/export.py --config_path tools/rknpu2/config/ppocrv3_rec.yaml \ + --target_platform rk3588 +python tools/rknpu2/export.py --config_path tools/rknpu2/config/ppocrv3_cls.yaml \ + --target_platform rk3588 +``` diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/CMakeLists.txt b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/CMakeLists.txt new file mode 100644 index 0000000000..9538fea6be --- /dev/null +++ b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/CMakeLists.txt @@ -0,0 +1,14 @@ +PROJECT(infer_demo C CXX) +CMAKE_MINIMUM_REQUIRED (VERSION 3.10) + +# 指定下载解压后的fastdeploy库路径 +option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.") + +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) + +# 添加FastDeploy依赖头文件 +include_directories(${FASTDEPLOY_INCS}) + +add_executable(infer_static_shape_demo ${PROJECT_SOURCE_DIR}/infer_static_shape.cc) +# 添加FastDeploy库依赖 +target_link_libraries(infer_static_shape_demo ${FASTDEPLOY_LIBS}) diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README.md b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README.md new file mode 100755 index 0000000000..af5be5360a --- /dev/null +++ b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README.md @@ -0,0 +1,55 @@ +English | [简体中文](README_CN.md) +# PPOCRv3 C++ Deployment Example + +This directory provides examples that `infer.cc` fast finishes the deployment of PPOCRv3 on CPU/GPU and GPU accelerated by TensorRT. + +Two steps before deployment + +- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md) +- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md) + +Taking the CPU inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. FastDeploy version 0.7.0 or above (x.x.x>=0.7.0) is required to support this model. + +``` +mkdir build +cd build +# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above +cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x +make -j + + +# Download model, image, and dictionary files +wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg + +wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt + +# CPU推理 +./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \ + ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \ + ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \ + ./ppocr_keys_v1.txt \ + ./12.jpg \ + 0 +# RKNPU推理 +./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \ + ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \ + ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \ + ./ppocr_keys_v1.txt \ + ./12.jpg \ + 1 +``` + +The above command works for Linux or MacOS. For SDK in Windows, refer to: +- [How to use FastDeploy C++ SDK in Windows](../../../../../docs/cn/faq/use_sdk_on_windows.md) + +The visualized result after running is as follows + + + +## Other Documents + +- [C++ API Reference](https://baidu-paddle.github.io/fastdeploy-api/cpp/html/) +- [PPOCR Model Description](../../) +- [PPOCRv3 Python Deployment](../python) +- [Model Prediction Results](../../../../../../docs/en/faq/how_to_change_backend.md) +- [How to switch the model inference backend engine](../../../../../../docs/en/faq/how_to_change_backend.md) diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README_CN.md b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README_CN.md new file mode 100644 index 0000000000..82860ddc53 --- /dev/null +++ b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README_CN.md @@ -0,0 +1,63 @@ +[English](README_CN.md) | 简体中文 +# PPOCRv3 C++部署示例 + +本目录下提供`infer.cc`快速完成PPOCRv3在CPU/GPU,以及GPU上通过TensorRT加速部署的示例。 + +在部署前,需确认你已经成功完成以下两个操作: + +* [正确编译FastDeploy SDK](../../../../../../docs/cn/faq/rknpu2/build.md). +* [成功转换模型](../README.md). + +在本目录执行如下命令即可完成编译测试,支持此模型需保证FastDeploy版本1.0.3以上(x.x.x>1.0.3), RKNN版本在1.4.1b22以上。 + +``` +mkdir build +cd build +cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x +make -j + +# 下载图片和字典文件 +wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg +wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt + + +# 拷贝RKNN模型到build目录 + +# CPU推理 +./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \ + ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \ + ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \ + ./ppocr_keys_v1.txt \ + ./12.jpg \ + 0 +# RKNPU推理 +./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \ + ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \ + ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \ + ./ppocr_keys_v1.txt \ + ./12.jpg \ + 1 +``` + +运行完成可视化结果如下图所示: + + + +结果输出如下: + +```text +det boxes: [[276,174],[285,173],[285,178],[276,179]]rec text: rec score:0.000000 cls label: 1 cls score: 0.766602 +det boxes: [[43,408],[483,390],[483,431],[44,449]]rec text: 上海斯格威铂尔曼大酒店 rec score:0.888450 cls label: 0 cls score: 1.000000 +det boxes: [[186,456],[399,448],[399,480],[186,488]]rec text: 打浦路15号 rec score:0.988769 cls label: 0 cls score: 1.000000 +det boxes: [[18,501],[513,485],[514,537],[18,554]]rec text: 绿洲仕格维花园公寓 rec score:0.992730 cls label: 0 cls score: 1.000000 +det boxes: [[78,553],[404,541],[404,573],[78,585]]rec text: 打浦路252935号 rec score:0.983545 cls label: 0 cls score: 1.000000 +Visualized result saved in ./vis_result.jpg +``` + + +## 其它文档 + +- [C++ API查阅](https://baidu-paddle.github.io/fastdeploy-api/cpp/html/) +- [PPOCR 系列模型介绍](../../../README_CN.md) +- [PPOCRv3 Python部署](../python) +- [模型预测结果说明](../../../../../../docs/cn/faq/how_to_change_backend.md) diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/infer_static_shape.cc b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/infer_static_shape.cc new file mode 100644 index 0000000000..7add35688a --- /dev/null +++ b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/infer_static_shape.cc @@ -0,0 +1,126 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision.h" + +void InitAndInfer(const std::string &det_model_file, + const std::string &cls_model_file, + const std::string &rec_model_file, + const std::string &rec_label_file, + const std::string &image_file, + const fastdeploy::RuntimeOption &option, + const fastdeploy::ModelFormat &format) { + auto det_params_file = ""; + auto cls_params_file = ""; + auto rec_params_file = ""; + + auto det_option = option; + auto cls_option = option; + auto rec_option = option; + + if (format == fastdeploy::ONNX) { + std::cout << "ONNX Model" << std::endl; + } + + auto det_model = fastdeploy::vision::ocr::DBDetector( + det_model_file, det_params_file, det_option, format); + auto cls_model = fastdeploy::vision::ocr::Classifier( + cls_model_file, cls_params_file, cls_option, format); + auto rec_model = fastdeploy::vision::ocr::Recognizer( + rec_model_file, rec_params_file, rec_label_file, rec_option, format); + + if (format == fastdeploy::RKNN) { + cls_model.GetPreprocessor().DisableNormalize(); + cls_model.GetPreprocessor().DisablePermute(); + + det_model.GetPreprocessor().DisableNormalize(); + det_model.GetPreprocessor().DisablePermute(); + + rec_model.GetPreprocessor().DisableNormalize(); + rec_model.GetPreprocessor().DisablePermute(); + } + det_model.GetPreprocessor().SetStaticShapeInfer(true); + rec_model.GetPreprocessor().SetStaticShapeInfer(true); + + assert(det_model.Initialized()); + assert(cls_model.Initialized()); + assert(rec_model.Initialized()); + + // The classification model is optional, so the PP-OCR can also be connected + // in series as follows auto ppocr_v3 = + // fastdeploy::pipeline::PPOCRv3(&det_model, &rec_model); + auto ppocr_v3 = + fastdeploy::pipeline::PPOCRv3(&det_model, &cls_model, &rec_model); + + // When users enable static shape infer for rec model, the batch size of cls + // and rec model must to be set to 1. + ppocr_v3.SetClsBatchSize(1); + ppocr_v3.SetRecBatchSize(1); + + if (!ppocr_v3.Initialized()) { + std::cerr << "Failed to initialize PP-OCR." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::OCRResult result; + if (!ppocr_v3.Predict(im, &result)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << result.Str() << std::endl; + + auto vis_im = fastdeploy::vision::VisOcr(im, result); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + +int main(int argc, char *argv[]) { + if (argc < 7) { + std::cout << "Usage: infer_demo path/to/det_model path/to/cls_model " + "path/to/rec_model path/to/rec_label_file path/to/image " + "run_option, " + "e.g ./infer_demo ./ch_PP-OCRv3_det_infer " + "./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv3_rec_infer " + "./ppocr_keys_v1.txt ./12.jpg 0" + << std::endl; + std::cout << "The data type of run_option is int, 0: run with cpu; 1: run " + "with ascend." + << std::endl; + return -1; + } + + fastdeploy::RuntimeOption option; + fastdeploy::ModelFormat format; + int flag = std::atoi(argv[6]); + + if (flag == 0) { + option.UseCpu(); + format = fastdeploy::ONNX; + } else if (flag == 1) { + option.UseRKNPU2(); + format = fastdeploy::RKNN; + } + + std::string det_model_dir = argv[1]; + std::string cls_model_dir = argv[2]; + std::string rec_model_dir = argv[3]; + std::string rec_label_file = argv[4]; + std::string test_image = argv[5]; + InitAndInfer(det_model_dir, cls_model_dir, rec_model_dir, rec_label_file, + test_image, option, format); + return 0; +} diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/python/README.md b/examples/vision/ocr/PP-OCRv3/rknpu2/python/README.md new file mode 100755 index 0000000000..d281daf833 --- /dev/null +++ b/examples/vision/ocr/PP-OCRv3/rknpu2/python/README.md @@ -0,0 +1,49 @@ +English | [简体中文](README_CN.md) +# PPOCRv3 Python Deployment Example + +Two steps before deployment + +- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md) +- 2. Install FastDeploy Python whl package. Refer to [FastDeploy Python Installation](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md) + +This directory provides examples that `infer.py` fast finishes the deployment of PPOCRv3 on CPU/GPU and GPU accelerated by TensorRT. The script is as follows + +``` +wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg +wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt + +# Download the example code for deployment +git clone https://github.com/PaddlePaddle/FastDeploy.git +cd examples/vision/ocr/PP-OCRv3/python/ + +python3 infer_static_shape.py \ + --det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \ + --cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \ + --rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \ + --rec_label_file ./ppocr_keys_v1.txt \ + --image 12.jpg \ + --device cpu + +# NPU推理 +python3 infer_static_shape.py \ + --det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \ + --cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \ + --rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \ + --rec_label_file ppocr_keys_v1.txt \ + --image 12.jpg \ + --device npu +``` + +The visualized result after running is as follows + + + + + +## Other Documents + +- [Python API reference](https://baidu-paddle.github.io/fastdeploy-api/python/html/) +- [PPOCR Model Description](../../) +- [PPOCRv3 C++ Deployment](../cpp) +- [Model Prediction Results](../../../../../../docs/api/vision_results/) +- [How to switch the model inference backend engine](../../../../../../docs/en/faq/how_to_change_backend.md) diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/python/README_CN.md b/examples/vision/ocr/PP-OCRv3/rknpu2/python/README_CN.md new file mode 100644 index 0000000000..663a6b62d0 --- /dev/null +++ b/examples/vision/ocr/PP-OCRv3/rknpu2/python/README_CN.md @@ -0,0 +1,62 @@ +[English](README.md) | 简体中文 +# PPOCRv3 Python部署示例 + +在部署前,需确认以下两个步骤 + +- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) +- 2. FastDeploy Python whl包安装,参考[FastDeploy Python安装](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) + +本目录下提供`infer.py`快速完成PPOCRv3在CPU/GPU,以及GPU上通过TensorRT加速部署的示例。执行如下脚本即可完成 + +``` + +# 下载模型,图片和字典文件 +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar +tar xvf ch_PP-OCRv3_det_infer.tar + +wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar +tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar + +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar +tar xvf ch_PP-OCRv3_rec_infer.tar + +wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg + +wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt + +#下载部署示例代码 +git clone https://github.com/PaddlePaddle/FastDeploy.git +cd examples/vision/ocr/PP-OCRv3/python/ + +# CPU推理 +python3 infer_static_shape.py \ + --det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \ + --cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \ + --rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \ + --rec_label_file ./ppocr_keys_v1.txt \ + --image 12.jpg \ + --device cpu + +# NPU推理 +python3 infer_static_shape.py \ + --det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \ + --cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \ + --rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \ + --rec_label_file ppocr_keys_v1.txt \ + --image 12.jpg \ + --device npu +``` + +运行完成可视化结果如下图所示 + + + + + +## 其它文档 + +- [Python API文档查阅](https://baidu-paddle.github.io/fastdeploy-api/python/html/) +- [PPOCR 系列模型介绍](../../) +- [PPOCRv3 C++部署](../cpp) +- [模型预测结果说明](../../../../../../docs/api/vision_results/) +- [如何切换模型推理后端引擎](../../../../../../docs/cn/faq/how_to_change_backend.md) diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/python/infer_static_shape.py b/examples/vision/ocr/PP-OCRv3/rknpu2/python/infer_static_shape.py new file mode 100755 index 0000000000..7aa1382179 --- /dev/null +++ b/examples/vision/ocr/PP-OCRv3/rknpu2/python/infer_static_shape.py @@ -0,0 +1,144 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--det_model", required=True, help="Path of Detection model of PPOCR.") + parser.add_argument( + "--cls_model", + required=True, + help="Path of Classification model of PPOCR.") + parser.add_argument( + "--rec_model", + required=True, + help="Path of Recognization model of PPOCR.") + parser.add_argument( + "--rec_label_file", + required=True, + help="Path of Recognization model of PPOCR.") + parser.add_argument( + "--image", type=str, required=True, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu', 'kunlunxin' or 'gpu'.") + parser.add_argument( + "--cpu_thread_num", + type=int, + default=9, + help="Number of threads while inference on CPU.") + return parser.parse_args() + + +def build_option(args): + + det_option = fd.RuntimeOption() + cls_option = fd.RuntimeOption() + rec_option = fd.RuntimeOption() + if args.device == "npu": + det_option.use_rknpu2() + cls_option.use_rknpu2() + rec_option.use_rknpu2() + + return det_option, cls_option, rec_option + + +def build_format(args): + det_format = fd.ModelFormat.ONNX + cls_format = fd.ModelFormat.ONNX + rec_format = fd.ModelFormat.ONNX + if args.device == "npu": + det_format = fd.ModelFormat.RKNN + cls_format = fd.ModelFormat.RKNN + rec_format = fd.ModelFormat.RKNN + + return det_format, cls_format, rec_format + + +args = parse_arguments() + +# Detection模型, 检测文字框 +det_model_file = args.det_model +det_params_file = "" +# Classification模型,方向分类,可选 +cls_model_file = args.cls_model +cls_params_file = "" +# Recognition模型,文字识别模型 +rec_model_file = args.rec_model +rec_params_file = "" +rec_label_file = args.rec_label_file + +det_option, cls_option, rec_option = build_option(args) +det_format, cls_format, rec_format = build_format(args) + +det_model = fd.vision.ocr.DBDetector( + det_model_file, + det_params_file, + runtime_option=det_option, + model_format=det_format) + +cls_model = fd.vision.ocr.Classifier( + cls_model_file, + cls_params_file, + runtime_option=cls_option, + model_format=cls_format) + +rec_model = fd.vision.ocr.Recognizer( + rec_model_file, + rec_params_file, + rec_label_file, + runtime_option=rec_option, + model_format=rec_format) + +# Det,Rec模型启用静态shape推理 +det_model.preprocessor.static_shape_infer = True +rec_model.preprocessor.static_shape_infer = True + +if args.device == "npu": + det_model.preprocessor.disable_normalize() + det_model.preprocessor.disable_permute() + cls_model.preprocessor.disable_normalize() + cls_model.preprocessor.disable_permute() + rec_model.preprocessor.disable_normalize() + rec_model.preprocessor.disable_permute() + +# 创建PP-OCR,串联3个模型,其中cls_model可选,如无需求,可设置为None +ppocr_v3 = fd.vision.ocr.PPOCRv3( + det_model=det_model, cls_model=cls_model, rec_model=rec_model) + +# Cls模型和Rec模型的batch size 必须设置为1, 开启静态shape推理 +ppocr_v3.cls_batch_size = 1 +ppocr_v3.rec_batch_size = 1 + +# 预测图片准备 +im = cv2.imread(args.image) + +#预测并打印结果 +result = ppocr_v3.predict(im) + +print(result) + +# 可视化结果 +vis_im = fd.vision.vis_ppocr(im, result) +cv2.imwrite("visualized_result.jpg", vis_im) +print("Visualized result save in ./visualized_result.jpg") diff --git a/fastdeploy/vision/ocr/ppocr/classifier.cc b/fastdeploy/vision/ocr/ppocr/classifier.cc old mode 100755 new mode 100644 index 55f355db26..b7dcc502b1 --- a/fastdeploy/vision/ocr/ppocr/classifier.cc +++ b/fastdeploy/vision/ocr/ppocr/classifier.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "fastdeploy/vision/ocr/ppocr/classifier.h" + #include "fastdeploy/utils/perf.h" #include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h" @@ -26,15 +27,16 @@ Classifier::Classifier(const std::string& model_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::ORT, - Backend::OPENVINO}; - valid_gpu_backends = {Backend::ORT, Backend::TRT}; + valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { - valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, Backend::LITE}; + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, + Backend::LITE}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; valid_kunlunxin_backends = {Backend::LITE}; valid_ascend_backends = {Backend::LITE}; valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + valid_rknpu_backends = {Backend::RKNPU2}; } runtime_option = custom_option; runtime_option.model_format = model_format; @@ -54,16 +56,18 @@ bool Classifier::Initialize() { } std::unique_ptr Classifier::Clone() const { - std::unique_ptr clone_model = utils::make_unique(Classifier(*this)); + std::unique_ptr clone_model = + utils::make_unique(Classifier(*this)); clone_model->SetRuntime(clone_model->CloneRuntime()); return clone_model; } -bool Classifier::Predict(const cv::Mat& img, int32_t* cls_label, float* cls_score) { +bool Classifier::Predict(const cv::Mat& img, int32_t* cls_label, + float* cls_score) { std::vector cls_labels(1); std::vector cls_scores(1); bool success = BatchPredict({img}, &cls_labels, &cls_scores); - if(!success){ + if (!success) { return success; } *cls_label = cls_labels[0]; @@ -72,16 +76,19 @@ bool Classifier::Predict(const cv::Mat& img, int32_t* cls_label, float* cls_scor } bool Classifier::BatchPredict(const std::vector& images, - std::vector* cls_labels, std::vector* cls_scores) { + std::vector* cls_labels, + std::vector* cls_scores) { return BatchPredict(images, cls_labels, cls_scores, 0, images.size()); } bool Classifier::BatchPredict(const std::vector& images, - std::vector* cls_labels, std::vector* cls_scores, + std::vector* cls_labels, + std::vector* cls_scores, size_t start_index, size_t end_index) { size_t total_size = images.size(); std::vector fd_images = WrapMat(images); - if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index, end_index)) { + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index, + end_index)) { FDERROR << "Failed to preprocess the input image." << std::endl; return false; } @@ -91,13 +98,15 @@ bool Classifier::BatchPredict(const std::vector& images, return false; } - if (!postprocessor_.Run(reused_output_tensors_, cls_labels, cls_scores, start_index, total_size)) { - FDERROR << "Failed to postprocess the inference cls_results by runtime." << std::endl; + if (!postprocessor_.Run(reused_output_tensors_, cls_labels, cls_scores, + start_index, total_size)) { + FDERROR << "Failed to postprocess the inference cls_results by runtime." + << std::endl; return false; } return true; } -} // namesapce ocr +} // namespace ocr } // namespace vision } // namespace fastdeploy diff --git a/fastdeploy/vision/ocr/ppocr/cls_preprocessor.cc b/fastdeploy/vision/ocr/ppocr/cls_preprocessor.cc old mode 100755 new mode 100644 index dcd76c168a..35f98acc98 --- a/fastdeploy/vision/ocr/ppocr/cls_preprocessor.cc +++ b/fastdeploy/vision/ocr/ppocr/cls_preprocessor.cc @@ -13,9 +13,10 @@ // limitations under the License. #include "fastdeploy/vision/ocr/ppocr/cls_preprocessor.h" + +#include "fastdeploy/function/concat.h" #include "fastdeploy/utils/perf.h" #include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h" -#include "fastdeploy/function/concat.h" namespace fastdeploy { namespace vision { @@ -38,34 +39,43 @@ void OcrClassifierResizeImage(FDMat* mat, Resize::Run(mat, resize_w, img_h); } -bool ClassifierPreprocessor::Run(std::vector* images, std::vector* outputs) { +bool ClassifierPreprocessor::Run(std::vector* images, + std::vector* outputs) { return Run(images, outputs, 0, images->size()); } -bool ClassifierPreprocessor::Run(std::vector* images, std::vector* outputs, +bool ClassifierPreprocessor::Run(std::vector* images, + std::vector* outputs, size_t start_index, size_t end_index) { - - if (images->size() == 0 || start_index <0 || end_index <= start_index || end_index > images->size()) { - FDERROR << "images->size() or index error. Correct is: 0 <= start_index < end_index <= images->size()" << std::endl; + if (images->size() == 0 || start_index < 0 || end_index <= start_index || + end_index > images->size()) { + FDERROR << "images->size() or index error. Correct is: 0 <= start_index < " + "end_index <= images->size()" + << std::endl; return false; } for (size_t i = start_index; i < end_index; ++i) { FDMat* mat = &(images->at(i)); OcrClassifierResizeImage(mat, cls_image_shape_); - Normalize::Run(mat, mean_, scale_, is_scale_); + if (!disable_normalize_) { + Normalize::Run(mat, mean_, scale_, is_scale_); + } std::vector value = {0, 0, 0}; if (mat->Width() < cls_image_shape_[2]) { Pad::Run(mat, 0, 0, 0, cls_image_shape_[2] - mat->Width(), value); } - HWC2CHW::Run(mat); - Cast::Run(mat, "float"); + + if (!disable_permute_) { + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + } } // Only have 1 output Tensor. outputs->resize(1); // Concat all the preprocessed data to a batch tensor size_t tensor_size = end_index - start_index; - std::vector tensors(tensor_size); + std::vector tensors(tensor_size); for (size_t i = 0; i < tensor_size; ++i) { (*images)[i + start_index].ShareWithTensor(&(tensors[i])); tensors[i].ExpandDim(0); diff --git a/fastdeploy/vision/ocr/ppocr/cls_preprocessor.h b/fastdeploy/vision/ocr/ppocr/cls_preprocessor.h index 52b2bb7379..921f3f8267 100644 --- a/fastdeploy/vision/ocr/ppocr/cls_preprocessor.h +++ b/fastdeploy/vision/ocr/ppocr/cls_preprocessor.h @@ -56,7 +56,16 @@ class FASTDEPLOY_DECL ClassifierPreprocessor { /// Get cls_image_shape for the classification preprocess std::vector GetClsImageShape() const { return cls_image_shape_; } + /// This function will disable normalize in preprocessing step. + void DisableNormalize() { disable_permute_ = true; } + /// This function will disable hwc2chw in preprocessing step. + void DisablePermute() { disable_normalize_ = true; } + private: + // for recording the switch of hwc2chw + bool disable_permute_ = false; + // for recording the switch of normalize + bool disable_normalize_ = false; std::vector mean_ = {0.5f, 0.5f, 0.5f}; std::vector scale_ = {0.5f, 0.5f, 0.5f}; bool is_scale_ = true; diff --git a/fastdeploy/vision/ocr/ppocr/dbdetector.cc b/fastdeploy/vision/ocr/ppocr/dbdetector.cc old mode 100755 new mode 100644 index cd07cc262c..c365f971eb --- a/fastdeploy/vision/ocr/ppocr/dbdetector.cc +++ b/fastdeploy/vision/ocr/ppocr/dbdetector.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "fastdeploy/vision/ocr/ppocr/dbdetector.h" + #include "fastdeploy/utils/perf.h" #include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h" @@ -26,15 +27,16 @@ DBDetector::DBDetector(const std::string& model_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::ORT, - Backend::OPENVINO}; - valid_gpu_backends = {Backend::ORT, Backend::TRT}; + valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { - valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, Backend::LITE}; + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, + Backend::LITE}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; valid_kunlunxin_backends = {Backend::LITE}; valid_ascend_backends = {Backend::LITE}; valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + valid_rknpu_backends = {Backend::RKNPU2}; } runtime_option = custom_option; @@ -54,7 +56,8 @@ bool DBDetector::Initialize() { } std::unique_ptr DBDetector::Clone() const { - std::unique_ptr clone_model = utils::make_unique(DBDetector(*this)); + std::unique_ptr clone_model = + utils::make_unique(DBDetector(*this)); clone_model->SetRuntime(clone_model->CloneRuntime()); return clone_model; } @@ -69,11 +72,13 @@ bool DBDetector::Predict(const cv::Mat& img, return true; } -bool DBDetector::BatchPredict(const std::vector& images, - std::vector>>* det_results) { +bool DBDetector::BatchPredict( + const std::vector& images, + std::vector>>* det_results) { std::vector fd_images = WrapMat(images); std::vector> batch_det_img_info; - if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &batch_det_img_info)) { + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, + &batch_det_img_info)) { FDERROR << "Failed to preprocess input image." << std::endl; return false; } @@ -84,13 +89,15 @@ bool DBDetector::BatchPredict(const std::vector& images, return false; } - if (!postprocessor_.Run(reused_output_tensors_, det_results, batch_det_img_info)) { - FDERROR << "Failed to postprocess the inference cls_results by runtime." << std::endl; + if (!postprocessor_.Run(reused_output_tensors_, det_results, + batch_det_img_info)) { + FDERROR << "Failed to postprocess the inference cls_results by runtime." + << std::endl; return false; } return true; } -} // namesapce ocr +} // namespace ocr } // namespace vision } // namespace fastdeploy diff --git a/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc b/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc index 28b7e47afc..8079d65198 100644 --- a/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc +++ b/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc @@ -13,17 +13,22 @@ // limitations under the License. #include "fastdeploy/vision/ocr/ppocr/det_preprocessor.h" + +#include "fastdeploy/function/concat.h" #include "fastdeploy/utils/perf.h" #include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h" -#include "fastdeploy/function/concat.h" namespace fastdeploy { namespace vision { namespace ocr { -std::array OcrDetectorGetInfo(FDMat* img, int max_size_len) { +std::array DBDetectorPreprocessor::OcrDetectorGetInfo( + FDMat* img, int max_size_len) { int w = img->Width(); int h = img->Height(); + if (static_shape_infer_) { + return {w, h, det_image_shape_[2], det_image_shape_[1]}; + } float ratio = 1.f; int max_wh = w >= h ? w : h; @@ -39,30 +44,30 @@ std::array OcrDetectorGetInfo(FDMat* img, int max_size_len) { resize_h = std::max(int(std::round(float(resize_h) / 32) * 32), 32); resize_w = std::max(int(std::round(float(resize_w) / 32) * 32), 32); - return {w,h,resize_w,resize_h}; + return {w, h, resize_w, resize_h}; /* - *ratio_h = float(resize_h) / float(h); - *ratio_w = float(resize_w) / float(w); - */ + *ratio_h = float(resize_h) / float(h); + *ratio_w = float(resize_w) / float(w); + */ } -bool OcrDetectorResizeImage(FDMat* img, - int resize_w, - int resize_h, - int max_resize_w, - int max_resize_h) { + +bool OcrDetectorResizeImage(FDMat* img, int resize_w, int resize_h, + int max_resize_w, int max_resize_h) { Resize::Run(img, resize_w, resize_h); std::vector value = {0, 0, 0}; - Pad::Run(img, 0, max_resize_h-resize_h, 0, max_resize_w - resize_w, value); + Pad::Run(img, 0, max_resize_h - resize_h, 0, max_resize_w - resize_w, value); return true; } -bool DBDetectorPreprocessor::Run(std::vector* images, - std::vector* outputs, - std::vector>* batch_det_img_info_ptr) { +bool DBDetectorPreprocessor::Run( + std::vector* images, std::vector* outputs, + std::vector>* batch_det_img_info_ptr) { if (images->size() == 0) { - FDERROR << "The size of input images should be greater than 0." << std::endl; + FDERROR << "The size of input images should be greater than 0." + << std::endl; return false; } + int max_resize_w = 0; int max_resize_h = 0; std::vector>& batch_det_img_info = *batch_det_img_info_ptr; @@ -70,14 +75,28 @@ bool DBDetectorPreprocessor::Run(std::vector* images, batch_det_img_info.resize(images->size()); for (size_t i = 0; i < images->size(); ++i) { FDMat* mat = &(images->at(i)); - batch_det_img_info[i] = OcrDetectorGetInfo(mat,max_side_len_); - max_resize_w = std::max(max_resize_w,batch_det_img_info[i][2]); - max_resize_h = std::max(max_resize_h,batch_det_img_info[i][3]); + batch_det_img_info[i] = OcrDetectorGetInfo(mat, max_side_len_); + max_resize_w = std::max(max_resize_w, batch_det_img_info[i][2]); + max_resize_h = std::max(max_resize_h, batch_det_img_info[i][3]); } for (size_t i = 0; i < images->size(); ++i) { FDMat* mat = &(images->at(i)); - OcrDetectorResizeImage(mat, batch_det_img_info[i][2],batch_det_img_info[i][3],max_resize_w,max_resize_h); - NormalizeAndPermute::Run(mat, mean_, scale_, is_scale_); + OcrDetectorResizeImage(mat, batch_det_img_info[i][2], + batch_det_img_info[i][3], max_resize_w, + max_resize_h); + + if (!disable_normalize_ && !disable_permute_) { + NormalizeAndPermute::Run(mat, mean_, scale_, is_scale_); + } else { + if (!disable_normalize_) { + Normalize::Run(mat, mean_, scale_, is_scale_); + } + if (!disable_permute_) { + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + } + } + /* Normalize::Run(mat, mean_, scale_, is_scale_); HWC2CHW::Run(mat); @@ -87,7 +106,7 @@ bool DBDetectorPreprocessor::Run(std::vector* images, // Only have 1 output Tensor. outputs->resize(1); // Concat all the preprocessed data to a batch tensor - std::vector tensors(images->size()); + std::vector tensors(images->size()); for (size_t i = 0; i < images->size(); ++i) { (*images)[i].ShareWithTensor(&(tensors[i])); tensors[i].ExpandDim(0); diff --git a/fastdeploy/vision/ocr/ppocr/det_preprocessor.h b/fastdeploy/vision/ocr/ppocr/det_preprocessor.h index 552d0628a3..d694dfdd00 100644 --- a/fastdeploy/vision/ocr/ppocr/det_preprocessor.h +++ b/fastdeploy/vision/ocr/ppocr/det_preprocessor.h @@ -54,11 +54,39 @@ class FASTDEPLOY_DECL DBDetectorPreprocessor { /// Get is_scale of the image normalization in detection preprocess bool GetIsScale() const { return is_scale_; } + /// This function will disable normalize in preprocessing step. + void DisableNormalize() { disable_permute_ = true; } + /// This function will disable hwc2chw in preprocessing step. + void DisablePermute() { disable_normalize_ = true; } + + /// Set cls_image_shape for the classification preprocess + void SetDetImageShape(const std::vector& det_image_shape) { + det_image_shape_ = det_image_shape; + } + /// Get cls_image_shape for the classification preprocess + std::vector GetDetImageShape() const { return det_image_shape_; } + + /// Set static_shape_infer is true or not. When deploy PP-OCR + /// on hardware which can not support dynamic input shape very well, + /// like Huawei Ascned, static_shape_infer needs to to be true. + void SetStaticShapeInfer(bool static_shape_infer) { + static_shape_infer_ = static_shape_infer; + } + /// Get static_shape_infer of the recognition preprocess + bool GetStaticShapeInfer() const { return static_shape_infer_; } + private: + // for recording the switch of hwc2chw + bool disable_permute_ = false; + // for recording the switch of normalize + bool disable_normalize_ = false; int max_side_len_ = 960; std::vector mean_ = {0.485f, 0.456f, 0.406f}; std::vector scale_ = {0.229f, 0.224f, 0.225f}; bool is_scale_ = true; + std::vector det_image_shape_ = {3, 960, 960}; + bool static_shape_infer_ = false; + std::array OcrDetectorGetInfo(FDMat* img, int max_size_len); }; } // namespace ocr diff --git a/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc b/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc old mode 100755 new mode 100644 index 2bcb697a80..1276001a57 --- a/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc +++ b/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc @@ -12,80 +12,120 @@ // See the License for the specific language governing permissions and // limitations under the License. #include + #include "fastdeploy/pybind/main.h" namespace fastdeploy { void BindPPOCRModel(pybind11::module& m) { m.def("sort_boxes", [](std::vector>& boxes) { - vision::ocr::SortBoxes(&boxes); - return boxes; + vision::ocr::SortBoxes(&boxes); + return boxes; }); - + // DBDetector - pybind11::class_(m, "DBDetectorPreprocessor") + pybind11::class_( + m, "DBDetectorPreprocessor") .def(pybind11::init<>()) - .def_property("max_side_len", &vision::ocr::DBDetectorPreprocessor::GetMaxSideLen, &vision::ocr::DBDetectorPreprocessor::SetMaxSideLen) - .def_property("mean", &vision::ocr::DBDetectorPreprocessor::GetMean, &vision::ocr::DBDetectorPreprocessor::SetMean) - .def_property("scale", &vision::ocr::DBDetectorPreprocessor::GetScale, &vision::ocr::DBDetectorPreprocessor::SetScale) - .def_property("is_scale", &vision::ocr::DBDetectorPreprocessor::GetIsScale, &vision::ocr::DBDetectorPreprocessor::SetIsScale) - .def("run", [](vision::ocr::DBDetectorPreprocessor& self, std::vector& im_list) { - std::vector images; - for (size_t i = 0; i < im_list.size(); ++i) { - images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); - } - std::vector outputs; - std::vector> batch_det_img_info; - self.Run(&images, &outputs, &batch_det_img_info); - for(size_t i = 0; i< outputs.size(); ++i){ - outputs[i].StopSharing(); - } - return std::make_pair(outputs, batch_det_img_info); + .def_property("static_shape_infer", + &vision::ocr::DBDetectorPreprocessor::GetStaticShapeInfer, + &vision::ocr::DBDetectorPreprocessor::SetStaticShapeInfer) + .def_property("max_side_len", + &vision::ocr::DBDetectorPreprocessor::GetMaxSideLen, + &vision::ocr::DBDetectorPreprocessor::SetMaxSideLen) + .def_property("mean", &vision::ocr::DBDetectorPreprocessor::GetMean, + &vision::ocr::DBDetectorPreprocessor::SetMean) + .def_property("scale", &vision::ocr::DBDetectorPreprocessor::GetScale, + &vision::ocr::DBDetectorPreprocessor::SetScale) + .def_property("is_scale", + &vision::ocr::DBDetectorPreprocessor::GetIsScale, + &vision::ocr::DBDetectorPreprocessor::SetIsScale) + .def("run", + [](vision::ocr::DBDetectorPreprocessor& self, + std::vector& im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + std::vector> batch_det_img_info; + self.Run(&images, &outputs, &batch_det_img_info); + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return std::make_pair(outputs, batch_det_img_info); + }) + .def("disable_normalize", + [](vision::ocr::DBDetectorPreprocessor& self) { + self.DisableNormalize(); + }) + .def("disable_permute", [](vision::ocr::DBDetectorPreprocessor& self) { + self.DisablePermute(); }); - pybind11::class_(m, "DBDetectorPostprocessor") + pybind11::class_( + m, "DBDetectorPostprocessor") .def(pybind11::init<>()) - .def_property("det_db_thresh", &vision::ocr::DBDetectorPostprocessor::GetDetDBThresh, &vision::ocr::DBDetectorPostprocessor::SetDetDBThresh) - .def_property("det_db_box_thresh", &vision::ocr::DBDetectorPostprocessor::GetDetDBBoxThresh, &vision::ocr::DBDetectorPostprocessor::SetDetDBBoxThresh) - .def_property("det_db_unclip_ratio", &vision::ocr::DBDetectorPostprocessor::GetDetDBUnclipRatio, &vision::ocr::DBDetectorPostprocessor::SetDetDBUnclipRatio) - .def_property("det_db_score_mode", &vision::ocr::DBDetectorPostprocessor::GetDetDBScoreMode, &vision::ocr::DBDetectorPostprocessor::SetDetDBScoreMode) - .def_property("use_dilation", &vision::ocr::DBDetectorPostprocessor::GetUseDilation, &vision::ocr::DBDetectorPostprocessor::SetUseDilation) + .def_property("det_db_thresh", + &vision::ocr::DBDetectorPostprocessor::GetDetDBThresh, + &vision::ocr::DBDetectorPostprocessor::SetDetDBThresh) + .def_property("det_db_box_thresh", + &vision::ocr::DBDetectorPostprocessor::GetDetDBBoxThresh, + &vision::ocr::DBDetectorPostprocessor::SetDetDBBoxThresh) + .def_property("det_db_unclip_ratio", + &vision::ocr::DBDetectorPostprocessor::GetDetDBUnclipRatio, + &vision::ocr::DBDetectorPostprocessor::SetDetDBUnclipRatio) + .def_property("det_db_score_mode", + &vision::ocr::DBDetectorPostprocessor::GetDetDBScoreMode, + &vision::ocr::DBDetectorPostprocessor::SetDetDBScoreMode) + .def_property("use_dilation", + &vision::ocr::DBDetectorPostprocessor::GetUseDilation, + &vision::ocr::DBDetectorPostprocessor::SetUseDilation) - .def("run", [](vision::ocr::DBDetectorPostprocessor& self, - std::vector& inputs, - const std::vector>& batch_det_img_info) { - std::vector>> results; + .def("run", + [](vision::ocr::DBDetectorPostprocessor& self, + std::vector& inputs, + const std::vector>& batch_det_img_info) { + std::vector>> results; - if (!self.Run(inputs, &results, batch_det_img_info)) { - throw std::runtime_error("Failed to preprocess the input data in DBDetectorPostprocessor."); - } - return results; - }) - .def("run", [](vision::ocr::DBDetectorPostprocessor& self, - std::vector& input_array, - const std::vector>& batch_det_img_info) { - std::vector>> results; - std::vector inputs; - PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); - if (!self.Run(inputs, &results, batch_det_img_info)) { - throw std::runtime_error("Failed to preprocess the input data in DBDetectorPostprocessor."); - } - return results; - }); + if (!self.Run(inputs, &results, batch_det_img_info)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "DBDetectorPostprocessor."); + } + return results; + }) + .def("run", + [](vision::ocr::DBDetectorPostprocessor& self, + std::vector& input_array, + const std::vector>& batch_det_img_info) { + std::vector>> results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results, batch_det_img_info)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "DBDetectorPostprocessor."); + } + return results; + }); pybind11::class_(m, "DBDetector") .def(pybind11::init()) .def(pybind11::init<>()) - .def_property_readonly("preprocessor", &vision::ocr::DBDetector::GetPreprocessor) - .def_property_readonly("postprocessor", &vision::ocr::DBDetector::GetPostprocessor) - .def("predict", [](vision::ocr::DBDetector& self, - pybind11::array& data) { - auto mat = PyArrayToCvMat(data); - std::vector> boxes_result; - self.Predict(mat, &boxes_result); - return boxes_result; - }) - .def("batch_predict", [](vision::ocr::DBDetector& self, std::vector& data) { + .def_property_readonly("preprocessor", + &vision::ocr::DBDetector::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::ocr::DBDetector::GetPostprocessor) + .def("predict", + [](vision::ocr::DBDetector& self, pybind11::array& data) { + auto mat = PyArrayToCvMat(data); + std::vector> boxes_result; + self.Predict(mat, &boxes_result); + return boxes_result; + }) + .def("batch_predict", [](vision::ocr::DBDetector& self, + std::vector& data) { std::vector images; std::vector>> det_results; for (size_t i = 0; i < data.size(); ++i) { @@ -96,39 +136,63 @@ void BindPPOCRModel(pybind11::module& m) { }); // Classifier - pybind11::class_(m, "ClassifierPreprocessor") + pybind11::class_( + m, "ClassifierPreprocessor") .def(pybind11::init<>()) - .def_property("cls_image_shape", &vision::ocr::ClassifierPreprocessor::GetClsImageShape, &vision::ocr::ClassifierPreprocessor::SetClsImageShape) - .def_property("mean", &vision::ocr::ClassifierPreprocessor::GetMean, &vision::ocr::ClassifierPreprocessor::SetMean) - .def_property("scale", &vision::ocr::ClassifierPreprocessor::GetScale, &vision::ocr::ClassifierPreprocessor::SetScale) - .def_property("is_scale", &vision::ocr::ClassifierPreprocessor::GetIsScale, &vision::ocr::ClassifierPreprocessor::SetIsScale) - .def("run", [](vision::ocr::ClassifierPreprocessor& self, std::vector& im_list) { - std::vector images; - for (size_t i = 0; i < im_list.size(); ++i) { - images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); - } - std::vector outputs; - if (!self.Run(&images, &outputs)) { - throw std::runtime_error("Failed to preprocess the input data in ClassifierPreprocessor."); - } - for(size_t i = 0; i< outputs.size(); ++i){ - outputs[i].StopSharing(); - } - return outputs; + .def_property("cls_image_shape", + &vision::ocr::ClassifierPreprocessor::GetClsImageShape, + &vision::ocr::ClassifierPreprocessor::SetClsImageShape) + .def_property("mean", &vision::ocr::ClassifierPreprocessor::GetMean, + &vision::ocr::ClassifierPreprocessor::SetMean) + .def_property("scale", &vision::ocr::ClassifierPreprocessor::GetScale, + &vision::ocr::ClassifierPreprocessor::SetScale) + .def_property("is_scale", + &vision::ocr::ClassifierPreprocessor::GetIsScale, + &vision::ocr::ClassifierPreprocessor::SetIsScale) + .def("run", + [](vision::ocr::ClassifierPreprocessor& self, + std::vector& im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + if (!self.Run(&images, &outputs)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "ClassifierPreprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return outputs; + }) + .def("disable_normalize", + [](vision::ocr::ClassifierPreprocessor& self) { + self.DisableNormalize(); + }) + .def("disable_permute", [](vision::ocr::ClassifierPreprocessor& self) { + self.DisablePermute(); }); - pybind11::class_(m, "ClassifierPostprocessor") + pybind11::class_( + m, "ClassifierPostprocessor") .def(pybind11::init<>()) - .def_property("cls_thresh", &vision::ocr::ClassifierPostprocessor::GetClsThresh, &vision::ocr::ClassifierPostprocessor::SetClsThresh) - .def("run", [](vision::ocr::ClassifierPostprocessor& self, - std::vector& inputs) { - std::vector cls_labels; - std::vector cls_scores; - if (!self.Run(inputs, &cls_labels, &cls_scores)) { - throw std::runtime_error("Failed to preprocess the input data in ClassifierPostprocessor."); - } - return std::make_pair(cls_labels,cls_scores); - }) + .def_property("cls_thresh", + &vision::ocr::ClassifierPostprocessor::GetClsThresh, + &vision::ocr::ClassifierPostprocessor::SetClsThresh) + .def("run", + [](vision::ocr::ClassifierPostprocessor& self, + std::vector& inputs) { + std::vector cls_labels; + std::vector cls_scores; + if (!self.Run(inputs, &cls_labels, &cls_scores)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "ClassifierPostprocessor."); + } + return std::make_pair(cls_labels, cls_scores); + }) .def("run", [](vision::ocr::ClassifierPostprocessor& self, std::vector& input_array) { std::vector inputs; @@ -136,26 +200,31 @@ void BindPPOCRModel(pybind11::module& m) { std::vector cls_labels; std::vector cls_scores; if (!self.Run(inputs, &cls_labels, &cls_scores)) { - throw std::runtime_error("Failed to preprocess the input data in ClassifierPostprocessor."); + throw std::runtime_error( + "Failed to preprocess the input data in " + "ClassifierPostprocessor."); } - return std::make_pair(cls_labels,cls_scores); + return std::make_pair(cls_labels, cls_scores); }); - + pybind11::class_(m, "Classifier") .def(pybind11::init()) .def(pybind11::init<>()) - .def_property_readonly("preprocessor", &vision::ocr::Classifier::GetPreprocessor) - .def_property_readonly("postprocessor", &vision::ocr::Classifier::GetPostprocessor) - .def("predict", [](vision::ocr::Classifier& self, - pybind11::array& data) { - auto mat = PyArrayToCvMat(data); - int32_t cls_label; - float cls_score; - self.Predict(mat, &cls_label, &cls_score); - return std::make_pair(cls_label, cls_score); - }) - .def("batch_predict", [](vision::ocr::Classifier& self, std::vector& data) { + .def_property_readonly("preprocessor", + &vision::ocr::Classifier::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::ocr::Classifier::GetPostprocessor) + .def("predict", + [](vision::ocr::Classifier& self, pybind11::array& data) { + auto mat = PyArrayToCvMat(data); + int32_t cls_label; + float cls_score; + self.Predict(mat, &cls_label, &cls_score); + return std::make_pair(cls_label, cls_score); + }) + .def("batch_predict", [](vision::ocr::Classifier& self, + std::vector& data) { std::vector images; std::vector cls_labels; std::vector cls_scores; @@ -167,39 +236,63 @@ void BindPPOCRModel(pybind11::module& m) { }); // Recognizer - pybind11::class_(m, "RecognizerPreprocessor") - .def(pybind11::init<>()) - .def_property("static_shape_infer", &vision::ocr::RecognizerPreprocessor::GetStaticShapeInfer, &vision::ocr::RecognizerPreprocessor::SetStaticShapeInfer) - .def_property("rec_image_shape", &vision::ocr::RecognizerPreprocessor::GetRecImageShape, &vision::ocr::RecognizerPreprocessor::SetRecImageShape) - .def_property("mean", &vision::ocr::RecognizerPreprocessor::GetMean, &vision::ocr::RecognizerPreprocessor::SetMean) - .def_property("scale", &vision::ocr::RecognizerPreprocessor::GetScale, &vision::ocr::RecognizerPreprocessor::SetScale) - .def_property("is_scale", &vision::ocr::RecognizerPreprocessor::GetIsScale, &vision::ocr::RecognizerPreprocessor::SetIsScale) - .def("run", [](vision::ocr::RecognizerPreprocessor& self, std::vector& im_list) { - std::vector images; - for (size_t i = 0; i < im_list.size(); ++i) { - images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); - } - std::vector outputs; - if (!self.Run(&images, &outputs)) { - throw std::runtime_error("Failed to preprocess the input data in RecognizerPreprocessor."); - } - for(size_t i = 0; i< outputs.size(); ++i){ - outputs[i].StopSharing(); - } - return outputs; - }); + pybind11::class_( + m, "RecognizerPreprocessor") + .def(pybind11::init<>()) + .def_property("static_shape_infer", + &vision::ocr::RecognizerPreprocessor::GetStaticShapeInfer, + &vision::ocr::RecognizerPreprocessor::SetStaticShapeInfer) + .def_property("rec_image_shape", + &vision::ocr::RecognizerPreprocessor::GetRecImageShape, + &vision::ocr::RecognizerPreprocessor::SetRecImageShape) + .def_property("mean", &vision::ocr::RecognizerPreprocessor::GetMean, + &vision::ocr::RecognizerPreprocessor::SetMean) + .def_property("scale", &vision::ocr::RecognizerPreprocessor::GetScale, + &vision::ocr::RecognizerPreprocessor::SetScale) + .def_property("is_scale", + &vision::ocr::RecognizerPreprocessor::GetIsScale, + &vision::ocr::RecognizerPreprocessor::SetIsScale) + .def("run", + [](vision::ocr::RecognizerPreprocessor& self, + std::vector& im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + if (!self.Run(&images, &outputs)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "RecognizerPreprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return outputs; + }) + .def("disable_normalize", + [](vision::ocr::RecognizerPreprocessor& self) { + self.DisableNormalize(); + }) + .def("disable_permute", [](vision::ocr::RecognizerPreprocessor& self) { + self.DisablePermute(); + }); - pybind11::class_(m, "RecognizerPostprocessor") + pybind11::class_( + m, "RecognizerPostprocessor") .def(pybind11::init()) - .def("run", [](vision::ocr::RecognizerPostprocessor& self, - std::vector& inputs) { - std::vector texts; - std::vector rec_scores; - if (!self.Run(inputs, &texts, &rec_scores)) { - throw std::runtime_error("Failed to preprocess the input data in RecognizerPostprocessor."); - } - return std::make_pair(texts, rec_scores); - }) + .def("run", + [](vision::ocr::RecognizerPostprocessor& self, + std::vector& inputs) { + std::vector texts; + std::vector rec_scores; + if (!self.Run(inputs, &texts, &rec_scores)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "RecognizerPostprocessor."); + } + return std::make_pair(texts, rec_scores); + }) .def("run", [](vision::ocr::RecognizerPostprocessor& self, std::vector& input_array) { std::vector inputs; @@ -207,7 +300,9 @@ void BindPPOCRModel(pybind11::module& m) { std::vector texts; std::vector rec_scores; if (!self.Run(inputs, &texts, &rec_scores)) { - throw std::runtime_error("Failed to preprocess the input data in RecognizerPostprocessor."); + throw std::runtime_error( + "Failed to preprocess the input data in " + "RecognizerPostprocessor."); } return std::make_pair(texts, rec_scores); }); @@ -216,17 +311,20 @@ void BindPPOCRModel(pybind11::module& m) { .def(pybind11::init()) .def(pybind11::init<>()) - .def_property_readonly("preprocessor", &vision::ocr::Recognizer::GetPreprocessor) - .def_property_readonly("postprocessor", &vision::ocr::Recognizer::GetPostprocessor) - .def("predict", [](vision::ocr::Recognizer& self, - pybind11::array& data) { - auto mat = PyArrayToCvMat(data); - std::string text; - float rec_score; - self.Predict(mat, &text, &rec_score); - return std::make_pair(text, rec_score); - }) - .def("batch_predict", [](vision::ocr::Recognizer& self, std::vector& data) { + .def_property_readonly("preprocessor", + &vision::ocr::Recognizer::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::ocr::Recognizer::GetPostprocessor) + .def("predict", + [](vision::ocr::Recognizer& self, pybind11::array& data) { + auto mat = PyArrayToCvMat(data); + std::string text; + float rec_score; + self.Predict(mat, &text, &rec_score); + return std::make_pair(text, rec_score); + }) + .def("batch_predict", [](vision::ocr::Recognizer& self, + std::vector& data) { std::vector images; std::vector texts; std::vector rec_scores; diff --git a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc index ad049fdcec..59c7de2796 100644 --- a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc +++ b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc @@ -13,22 +13,23 @@ // limitations under the License. #include "fastdeploy/vision/ocr/ppocr/rec_preprocessor.h" + +#include "fastdeploy/function/concat.h" #include "fastdeploy/utils/perf.h" #include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h" -#include "fastdeploy/function/concat.h" namespace fastdeploy { namespace vision { namespace ocr { void OcrRecognizerResizeImage(FDMat* mat, float max_wh_ratio, - const std::vector& rec_image_shape, bool static_shape_infer) { + const std::vector& rec_image_shape, + bool static_shape_infer) { int img_h, img_w; img_h = rec_image_shape[1]; img_w = rec_image_shape[2]; if (!static_shape_infer) { - img_w = int(img_h * max_wh_ratio); float ratio = float(mat->Width()) / float(mat->Height()); @@ -43,23 +44,29 @@ void OcrRecognizerResizeImage(FDMat* mat, float max_wh_ratio, } else { if (mat->Width() >= img_w) { - Resize::Run(mat, img_w, img_h); // Reszie W to 320 + Resize::Run(mat, img_w, img_h); // Reszie W to 320 } else { Resize::Run(mat, mat->Width(), img_h); Pad::Run(mat, 0, 0, 0, int(img_w - mat->Width()), {127, 127, 127}); // Pad to 320 - } + } } } -bool RecognizerPreprocessor::Run(std::vector* images, std::vector* outputs) { +bool RecognizerPreprocessor::Run(std::vector* images, + std::vector* outputs) { return Run(images, outputs, 0, images->size(), {}); } -bool RecognizerPreprocessor::Run(std::vector* images, std::vector* outputs, - size_t start_index, size_t end_index, const std::vector& indices) { - if (images->size() == 0 || end_index <= start_index || end_index > images->size()) { - FDERROR << "images->size() or index error. Correct is: 0 <= start_index < end_index <= images->size()" << std::endl; +bool RecognizerPreprocessor::Run(std::vector* images, + std::vector* outputs, + size_t start_index, size_t end_index, + const std::vector& indices) { + if (images->size() == 0 || end_index <= start_index || + end_index > images->size()) { + FDERROR << "images->size() or index error. Correct is: 0 <= start_index < " + "end_index <= images->size()" + << std::endl; return false; } @@ -67,7 +74,7 @@ bool RecognizerPreprocessor::Run(std::vector* images, std::vector* images, std::vectorat(real_index)); - OcrRecognizerResizeImage(mat, max_wh_ratio, rec_image_shape_, static_shape_infer_); - NormalizeAndPermute::Run(mat, mean_, scale_, is_scale_); + OcrRecognizerResizeImage(mat, max_wh_ratio, rec_image_shape_, + static_shape_infer_); + if (!disable_normalize_ && !disable_permute_) { + NormalizeAndPermute::Run(mat, mean_, scale_, is_scale_); + } else { + if (!disable_normalize_) { + Normalize::Run(mat, mean_, scale_, is_scale_); + } + if (!disable_permute_) { + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + } + } } // Only have 1 output Tensor. outputs->resize(1); - size_t tensor_size = end_index-start_index; + size_t tensor_size = end_index - start_index; // Concat all the preprocessed data to a batch tensor - std::vector tensors(tensor_size); + std::vector tensors(tensor_size); for (size_t i = 0; i < tensor_size; ++i) { size_t real_index = i + start_index; if (indices.size() != 0) { real_index = indices[i + start_index]; } - + (*images)[real_index].ShareWithTensor(&(tensors[i])); tensors[i].ExpandDim(0); } diff --git a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h index f7d741b5d4..c5edb2a802 100644 --- a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h +++ b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h @@ -66,7 +66,16 @@ class FASTDEPLOY_DECL RecognizerPreprocessor { /// Get rec_image_shape for the recognition preprocess std::vector GetRecImageShape() { return rec_image_shape_; } + /// This function will disable normalize in preprocessing step. + void DisableNormalize() { disable_permute_ = true; } + /// This function will disable hwc2chw in preprocessing step. + void DisablePermute() { disable_normalize_ = true; } + private: + // for recording the switch of hwc2chw + bool disable_permute_ = false; + // for recording the switch of normalize + bool disable_normalize_ = false; std::vector rec_image_shape_ = {3, 48, 320}; std::vector mean_ = {0.5f, 0.5f, 0.5f}; std::vector scale_ = {0.5f, 0.5f, 0.5f}; diff --git a/fastdeploy/vision/ocr/ppocr/recognizer.cc b/fastdeploy/vision/ocr/ppocr/recognizer.cc old mode 100755 new mode 100644 index 69e75b281e..74a8a26a16 --- a/fastdeploy/vision/ocr/ppocr/recognizer.cc +++ b/fastdeploy/vision/ocr/ppocr/recognizer.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "fastdeploy/vision/ocr/ppocr/recognizer.h" + #include "fastdeploy/utils/perf.h" #include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h" @@ -26,17 +27,19 @@ Recognizer::Recognizer(const std::string& model_file, const std::string& params_file, const std::string& label_path, const RuntimeOption& custom_option, - const ModelFormat& model_format):postprocessor_(label_path) { + const ModelFormat& model_format) + : postprocessor_(label_path) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::ORT, - Backend::OPENVINO}; - valid_gpu_backends = {Backend::ORT, Backend::TRT}; + valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { - valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, Backend::LITE}; + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, + Backend::LITE}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; valid_kunlunxin_backends = {Backend::LITE}; - valid_ascend_backends = {Backend::LITE}; + valid_ascend_backends = {Backend::LITE}; valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + valid_rknpu_backends = {Backend::RKNPU2}; } runtime_option = custom_option; @@ -57,12 +60,14 @@ bool Recognizer::Initialize() { } std::unique_ptr Recognizer::Clone() const { - std::unique_ptr clone_model = utils::make_unique(Recognizer(*this)); + std::unique_ptr clone_model = + utils::make_unique(Recognizer(*this)); clone_model->SetRuntime(clone_model->CloneRuntime()); return clone_model; } -bool Recognizer::Predict(const cv::Mat& img, std::string* text, float* rec_score) { +bool Recognizer::Predict(const cv::Mat& img, std::string* text, + float* rec_score) { std::vector texts(1); std::vector rec_scores(1); bool success = BatchPredict({img}, &texts, &rec_scores); @@ -75,20 +80,24 @@ bool Recognizer::Predict(const cv::Mat& img, std::string* text, float* rec_score } bool Recognizer::BatchPredict(const std::vector& images, - std::vector* texts, std::vector* rec_scores) { + std::vector* texts, + std::vector* rec_scores) { return BatchPredict(images, texts, rec_scores, 0, images.size(), {}); } bool Recognizer::BatchPredict(const std::vector& images, - std::vector* texts, std::vector* rec_scores, - size_t start_index, size_t end_index, const std::vector& indices) { + std::vector* texts, + std::vector* rec_scores, + size_t start_index, size_t end_index, + const std::vector& indices) { size_t total_size = images.size(); if (indices.size() != 0 && indices.size() != total_size) { FDERROR << "indices.size() should be 0 or images.size()." << std::endl; return false; } std::vector fd_images = WrapMat(images); - if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index, end_index, indices)) { + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index, + end_index, indices)) { FDERROR << "Failed to preprocess the input image." << std::endl; return false; } @@ -99,13 +108,15 @@ bool Recognizer::BatchPredict(const std::vector& images, return false; } - if (!postprocessor_.Run(reused_output_tensors_, texts, rec_scores, start_index, total_size, indices)) { - FDERROR << "Failed to postprocess the inference cls_results by runtime." << std::endl; + if (!postprocessor_.Run(reused_output_tensors_, texts, rec_scores, + start_index, total_size, indices)) { + FDERROR << "Failed to postprocess the inference cls_results by runtime." + << std::endl; return false; } return true; } -} // namesapce ocr +} // namespace ocr } // namespace vision } // namespace fastdeploy \ No newline at end of file diff --git a/python/fastdeploy/vision/ocr/ppocr/__init__.py b/python/fastdeploy/vision/ocr/ppocr/__init__.py index 8425323014..ba087497c2 100755 --- a/python/fastdeploy/vision/ocr/ppocr/__init__.py +++ b/python/fastdeploy/vision/ocr/ppocr/__init__.py @@ -75,6 +75,29 @@ def mean(self, value): value, list), "The value to set `mean` must be type of list." self._preprocessor.mean = value + @property + def static_shape_infer(self): + return self._preprocessor.static_shape_infer + + @static_shape_infer.setter + def static_shape_infer(self, value): + assert isinstance( + value, + bool), "The value to set `static_shape_infer` must be type of bool." + self._preprocessor.static_shape_infer = value + + def disable_normalize(self): + """ + This function will disable normalize in preprocessing step. + """ + self._preprocessor.disable_normalize() + + def disable_permute(self): + """ + This function will disable hwc2chw in preprocessing step. + """ + self._preprocessor.disable_permute() + class DBDetectorPostprocessor: def __init__(self): @@ -174,6 +197,7 @@ def clone(self): """Clone OCR detection model object :return: a new OCR detection model object """ + class DBDetectorClone(DBDetector): def __init__(self, model): self._model = model @@ -367,6 +391,18 @@ def cls_image_shape(self, value): list), "The value to set `cls_image_shape` must be type of list." self._preprocessor.cls_image_shape = value + def disable_normalize(self): + """ + This function will disable normalize in preprocessing step. + """ + self._preprocessor.disable_normalize() + + def disable_permute(self): + """ + This function will disable hwc2chw in preprocessing step. + """ + self._preprocessor.disable_permute() + class ClassifierPostprocessor: def __init__(self): @@ -421,6 +457,7 @@ def clone(self): """Clone OCR classification model object :return: a new OCR classification model object """ + class ClassifierClone(Classifier): def __init__(self, model): self._model = model @@ -582,6 +619,18 @@ def rec_image_shape(self, value): list), "The value to set `rec_image_shape` must be type of list." self._preprocessor.rec_image_shape = value + def disable_normalize(self): + """ + This function will disable normalize in preprocessing step. + """ + self._preprocessor.disable_normalize() + + def disable_permute(self): + """ + This function will disable hwc2chw in preprocessing step. + """ + self._preprocessor.disable_permute() + class RecognizerPostprocessor: def __init__(self, label_path): @@ -629,6 +678,7 @@ def clone(self): """Clone OCR recognition model object :return: a new OCR recognition model object """ + class RecognizerClone(Recognizer): def __init__(self, model): self._model = model @@ -734,7 +784,7 @@ def __init__(self, det_model=None, cls_model=None, rec_model=None): assert det_model is not None and rec_model is not None, "The det_model and rec_model cannot be None." if cls_model is None: self.system_ = C.vision.ocr.PPOCRv3(det_model._model, - rec_model._model) + rec_model._model) else: self.system_ = C.vision.ocr.PPOCRv3( det_model._model, cls_model._model, rec_model._model) @@ -743,6 +793,7 @@ def clone(self): """Clone PPOCRv3 pipeline object :return: a new PPOCRv3 pipeline object """ + class PPOCRv3Clone(PPOCRv3): def __init__(self, system): self.system_ = system @@ -809,7 +860,7 @@ def __init__(self, det_model=None, cls_model=None, rec_model=None): assert det_model is not None and rec_model is not None, "The det_model and rec_model cannot be None." if cls_model is None: self.system_ = C.vision.ocr.PPOCRv2(det_model._model, - rec_model._model) + rec_model._model) else: self.system_ = C.vision.ocr.PPOCRv2( det_model._model, cls_model._model, rec_model._model) @@ -818,6 +869,7 @@ def clone(self): """Clone PPOCRv3 pipeline object :return: a new PPOCRv3 pipeline object """ + class PPOCRv2Clone(PPOCRv2): def __init__(self, system): self.system_ = system From 9eceb6091a2f7a274ee2e6c5990d06b4279b63aa Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng Date: Tue, 21 Feb 2023 19:42:02 +0800 Subject: [PATCH 2/9] add config --- tools/rknpu2/config/ppocrv3_cls.yaml | 15 +++++++++++++++ tools/rknpu2/config/ppocrv3_det.yaml | 15 +++++++++++++++ tools/rknpu2/config/ppocrv3_rec.yaml | 15 +++++++++++++++ 3 files changed, 45 insertions(+) create mode 100644 tools/rknpu2/config/ppocrv3_cls.yaml create mode 100644 tools/rknpu2/config/ppocrv3_det.yaml create mode 100644 tools/rknpu2/config/ppocrv3_rec.yaml diff --git a/tools/rknpu2/config/ppocrv3_cls.yaml b/tools/rknpu2/config/ppocrv3_cls.yaml new file mode 100644 index 0000000000..197becc2f2 --- /dev/null +++ b/tools/rknpu2/config/ppocrv3_cls.yaml @@ -0,0 +1,15 @@ +mean: + - + - 127.5 + - 127.5 + - 127.5 +std: + - + - 127.5 + - 127.5 + - 127.5 +model_path: ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx +outputs_nodes: +do_quantization: False +dataset: +output_folder: "./ch_ppocr_mobile_v2.0_cls_infer" diff --git a/tools/rknpu2/config/ppocrv3_det.yaml b/tools/rknpu2/config/ppocrv3_det.yaml new file mode 100644 index 0000000000..2897c5f74b --- /dev/null +++ b/tools/rknpu2/config/ppocrv3_det.yaml @@ -0,0 +1,15 @@ +mean: + - + - 123.675 + - 116.28 + - 103.53 +std: + - + - 58.395 + - 57.12 + - 57.375 +model_path: ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx +outputs_nodes: +do_quantization: False +dataset: +output_folder: "./ch_PP-OCRv3_det_infer" diff --git a/tools/rknpu2/config/ppocrv3_rec.yaml b/tools/rknpu2/config/ppocrv3_rec.yaml new file mode 100644 index 0000000000..8a22a39a2e --- /dev/null +++ b/tools/rknpu2/config/ppocrv3_rec.yaml @@ -0,0 +1,15 @@ +mean: + - + - 127.5 + - 127.5 + - 127.5 +std: + - + - 127.5 + - 127.5 + - 127.5 +model_path: ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx +outputs_nodes: +do_quantization: False +dataset: +output_folder: "./ch_PP-OCRv3_rec_infer" From 855d36761750597f61d9a22424f874be7a516aef Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng Date: Tue, 21 Feb 2023 19:42:26 +0800 Subject: [PATCH 3/9] add config --- tools/rknpu2/export.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/rknpu2/export.py b/tools/rknpu2/export.py index c42a1eade7..a94b348859 100644 --- a/tools/rknpu2/export.py +++ b/tools/rknpu2/export.py @@ -65,7 +65,10 @@ def get_config(): if not os.path.exists(yaml_config["output_folder"]): os.mkdir(yaml_config["output_folder"]) - model_base_name = os.path.basename(yaml_config["model_path"]).split(".")[0] + name_list = os.path.basename(yaml_config["model_path"]).split(".") + model_base_name = "" + for name in name_list[0:-1]: + model_base_name += name model_device_name = config.target_platform.lower() if yaml_config["do_quantization"]: model_save_name = model_base_name + "_" + model_device_name + "_quantized" + ".rknn" From 74bc6453e118ac01cde82f772380a1281faeb9a2 Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng Date: Tue, 21 Feb 2023 20:09:50 +0800 Subject: [PATCH 4/9] detele unuseful --- .../detection/paddledetection/jetson/README.md | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 examples/vision/detection/paddledetection/jetson/README.md diff --git a/examples/vision/detection/paddledetection/jetson/README.md b/examples/vision/detection/paddledetection/jetson/README.md deleted file mode 100644 index 0c6e0dd3d5..0000000000 --- a/examples/vision/detection/paddledetection/jetson/README.md +++ /dev/null @@ -1,18 +0,0 @@ -## Paddle Inference模型导出 - -```bash -git clone https://github.com/PaddlePaddle/PaddleDetection.git -python tools/export_model.py -c configs/solov2/solov2_r50_fpn_1x_coco.yml --output_dir=./solov2_r50_fpn_1x_coco \ - -o weights=https://paddledet.bj.bcebos.com/models/solov2_r50_fpn_1x_coco.pdparams - -``` - -## ONNX模型导出 - -```bash -paddle2onnx --model_dir solov2_r50_fpn_1x_coco/solov2_r50_fpn_1x_coco \ - --model_filename model.pdmodel \ - --params_filename model.pdiparams \ - --save_file solov2_r50_fpn_1x_coco/solov2_r50_fpn_1x_coco/solov2_r50_fpn_1x_coco.onnx \ - --enable_dev_version True -``` From 878276028be998c0e67aa8532eb55d29c82bd388 Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng Date: Tue, 21 Feb 2023 20:11:42 +0800 Subject: [PATCH 5/9] update useful results --- fastdeploy/vision/visualize/ocr.cc | 12 ++++--- fastdeploy/vision/visualize/visualize.h | 45 +++++++++++++------------ 2 files changed, 32 insertions(+), 25 deletions(-) diff --git a/fastdeploy/vision/visualize/ocr.cc b/fastdeploy/vision/visualize/ocr.cc index 4946e08e72..5d0bb9e1bf 100644 --- a/fastdeploy/vision/visualize/ocr.cc +++ b/fastdeploy/vision/visualize/ocr.cc @@ -17,10 +17,14 @@ namespace fastdeploy { namespace vision { -cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result) { +cv::Mat VisOcr(const cv::Mat& im, const OCRResult& ocr_result, + const float score_threshold) { auto vis_im = im.clone(); for (int n = 0; n < ocr_result.boxes.size(); n++) { + if (ocr_result.rec_scores[n] < score_threshold) { + continue; + } cv::Point rook_points[4]; for (int m = 0; m < 4; m++) { @@ -28,7 +32,7 @@ cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result) { int(ocr_result.boxes[n][m * 2 + 1])); } - const cv::Point *ppt[1] = {rook_points}; + const cv::Point* ppt[1] = {rook_points}; int npt[] = {4}; cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0); } @@ -36,7 +40,7 @@ cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result) { return vis_im; } -cv::Mat Visualize::VisOcr(const cv::Mat &im, const OCRResult &ocr_result) { +cv::Mat Visualize::VisOcr(const cv::Mat& im, const OCRResult& ocr_result) { FDWARNING << "DEPRECATED: fastdeploy::vision::Visualize::VisOcr is deprecated, " "please use fastdeploy::vision:VisOcr function instead." @@ -51,7 +55,7 @@ cv::Mat Visualize::VisOcr(const cv::Mat &im, const OCRResult &ocr_result) { int(ocr_result.boxes[n][m * 2 + 1])); } - const cv::Point *ppt[1] = {rook_points}; + const cv::Point* ppt[1] = {rook_points}; int npt[] = {4}; cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0); } diff --git a/fastdeploy/vision/visualize/visualize.h b/fastdeploy/vision/visualize/visualize.h index 4614dc4e1e..f382818c6a 100755 --- a/fastdeploy/vision/visualize/visualize.h +++ b/fastdeploy/vision/visualize/visualize.h @@ -15,8 +15,8 @@ #pragma once #include "fastdeploy/vision/common/result.h" -#include "opencv2/imgproc/imgproc.hpp" #include "fastdeploy/vision/tracking/pptracking/model.h" +#include "opencv2/imgproc/imgproc.hpp" namespace fastdeploy { /** \brief All C++ FastDeploy Vision Models APIs are defined inside this namespace @@ -41,9 +41,10 @@ class FASTDEPLOY_DECL Visualize { bool remove_small_connected_area = false); static cv::Mat RemoveSmallConnectedArea(const cv::Mat& alpha_pred, float threshold); - static cv::Mat SwapBackgroundMatting( - const cv::Mat& im, const cv::Mat& background, const MattingResult& result, - bool remove_small_connected_area = false); + static cv::Mat + SwapBackgroundMatting(const cv::Mat& im, const cv::Mat& background, + const MattingResult& result, + bool remove_small_connected_area = false); static cv::Mat SwapBackgroundSegmentation(const cv::Mat& im, const cv::Mat& background, int background_label, @@ -90,9 +91,11 @@ FASTDEPLOY_DECL cv::Mat VisDetection(const cv::Mat& im, * \param[in] font_size font size * \return cv::Mat type stores the visualized results */ -FASTDEPLOY_DECL cv::Mat VisClassification( - const cv::Mat& im, const ClassifyResult& result, int top_k = 5, - float score_threshold = 0.0f, float font_size = 0.5f); +FASTDEPLOY_DECL cv::Mat VisClassification(const cv::Mat& im, + const ClassifyResult& result, + int top_k = 5, + float score_threshold = 0.0f, + float font_size = 0.5f); /** \brief Show the visualized results with custom labels for classification models * * \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format @@ -103,10 +106,10 @@ FASTDEPLOY_DECL cv::Mat VisClassification( * \param[in] font_size font size * \return cv::Mat type stores the visualized results */ -FASTDEPLOY_DECL cv::Mat VisClassification( - const cv::Mat& im, const ClassifyResult& result, - const std::vector& labels, int top_k = 5, - float score_threshold = 0.0f, float font_size = 0.5f); +FASTDEPLOY_DECL cv::Mat +VisClassification(const cv::Mat& im, const ClassifyResult& result, + const std::vector& labels, int top_k = 5, + float score_threshold = 0.0f, float font_size = 0.5f); /** \brief Show the visualized results for face detection models * * \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format @@ -159,7 +162,8 @@ FASTDEPLOY_DECL cv::Mat VisMatting(const cv::Mat& im, * \param[in] result the result produced by model * \return cv::Mat type stores the visualized results */ -FASTDEPLOY_DECL cv::Mat VisOcr(const cv::Mat& im, const OCRResult& ocr_result); +FASTDEPLOY_DECL cv::Mat VisOcr(const cv::Mat& im, const OCRResult& ocr_result, + const float score_threshold = 0); FASTDEPLOY_DECL cv::Mat VisMOT(const cv::Mat& img, const MOTResult& results, float score_threshold = 0.0f, @@ -172,10 +176,10 @@ FASTDEPLOY_DECL cv::Mat VisMOT(const cv::Mat& img, const MOTResult& results, * \param[in] remove_small_connected_area if remove_small_connected_area==true, the visualized result will not include the small connected areas * \return cv::Mat type stores the visualized results */ -FASTDEPLOY_DECL cv::Mat SwapBackground(const cv::Mat& im, - const cv::Mat& background, - const MattingResult& result, - bool remove_small_connected_area = false); +FASTDEPLOY_DECL cv::Mat +SwapBackground(const cv::Mat& im, const cv::Mat& background, + const MattingResult& result, + bool remove_small_connected_area = false); /** \brief Swap the image background with SegmentationResult * * \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format @@ -196,12 +200,11 @@ FASTDEPLOY_DECL cv::Mat SwapBackground(const cv::Mat& im, * \param[in] conf_threshold threshold for result scores, the result will not be shown if the score is less than conf_threshold * \return cv::Mat type stores the visualized results */ -FASTDEPLOY_DECL cv::Mat VisKeypointDetection(const cv::Mat& im, - const KeyPointDetectionResult& results, - float conf_threshold = 0.5f); +FASTDEPLOY_DECL cv::Mat +VisKeypointDetection(const cv::Mat& im, const KeyPointDetectionResult& results, + float conf_threshold = 0.5f); FASTDEPLOY_DECL cv::Mat VisHeadPose(const cv::Mat& im, - const HeadPoseResult& result, - int size = 50, + const HeadPoseResult& result, int size = 50, int line_size = 1); } // namespace vision From 8e1be2312d5dddd6b3b568d8f9b7e0902413d38a Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng Date: Wed, 22 Feb 2023 10:58:37 +0800 Subject: [PATCH 6/9] Repair note --- fastdeploy/vision/ocr/ppocr/det_preprocessor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastdeploy/vision/ocr/ppocr/det_preprocessor.h b/fastdeploy/vision/ocr/ppocr/det_preprocessor.h index d694dfdd00..9ad09581e3 100644 --- a/fastdeploy/vision/ocr/ppocr/det_preprocessor.h +++ b/fastdeploy/vision/ocr/ppocr/det_preprocessor.h @@ -59,7 +59,7 @@ class FASTDEPLOY_DECL DBDetectorPreprocessor { /// This function will disable hwc2chw in preprocessing step. void DisablePermute() { disable_normalize_ = true; } - /// Set cls_image_shape for the classification preprocess + /// Set det_image_shape for the classification preprocess void SetDetImageShape(const std::vector& det_image_shape) { det_image_shape_ = det_image_shape; } From 67f24be21d98bed90d41889b9ebccf8aa6475768 Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng Date: Wed, 22 Feb 2023 11:06:14 +0800 Subject: [PATCH 7/9] Repair note --- fastdeploy/vision/ocr/ppocr/det_preprocessor.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fastdeploy/vision/ocr/ppocr/det_preprocessor.h b/fastdeploy/vision/ocr/ppocr/det_preprocessor.h index 9ad09581e3..c24c3d1603 100644 --- a/fastdeploy/vision/ocr/ppocr/det_preprocessor.h +++ b/fastdeploy/vision/ocr/ppocr/det_preprocessor.h @@ -59,7 +59,9 @@ class FASTDEPLOY_DECL DBDetectorPreprocessor { /// This function will disable hwc2chw in preprocessing step. void DisablePermute() { disable_normalize_ = true; } - /// Set det_image_shape for the classification preprocess + /// Set det_image_shape for the detection preprocess. + /// This api is usually used when you retrain the model. + /// Generally, you do not need to use it. void SetDetImageShape(const std::vector& det_image_shape) { det_image_shape_ = det_image_shape; } From c42f9b93867d0e236faeeedf9c639bb814c92d87 Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng Date: Sat, 25 Feb 2023 20:57:51 +0800 Subject: [PATCH 8/9] fixed bugs --- fastdeploy/vision/ocr/ppocr/det_preprocessor.cc | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc b/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc index 39917c51ff..4937008198 100644 --- a/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc +++ b/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc @@ -20,7 +20,8 @@ namespace fastdeploy { namespace vision { namespace ocr { -std::array OcrDetectorGetInfo(FDMat* img, int max_size_len) { +std::array DBDetectorPreprocessor::OcrDetectorGetInfo( + FDMat* img, int max_size_len) { int w = img->Width(); int h = img->Height(); if (static_shape_infer_) { @@ -57,17 +58,7 @@ DBDetectorPreprocessor::DBDetectorPreprocessor() { std::vector mean = {0.485f, 0.456f, 0.406f}; std::vector std = {0.229f, 0.224f, 0.225f}; bool is_scale = true; - if (!disable_normalize_ && !disable_permute_) { - NormalizeAndPermute::Run(mat, mean_, scale_, is_scale_); - } else { - if (!disable_normalize_) { - Normalize::Run(mat, mean_, scale_, is_scale_); - } - if (!disable_permute_) { - HWC2CHW::Run(mat); - Cast::Run(mat, "float"); - } - } + normalize_permute_op_ = std::make_shared(mean, std, is_scale); } @@ -100,6 +91,7 @@ bool DBDetectorPreprocessor::Apply(FDMatBatch* image_batch, ResizeImage(mat, batch_det_img_info_[i][2], batch_det_img_info_[i][3], max_resize_w, max_resize_h); } + if (!disable_normalize_ && !disable_permute_) { (*normalize_permute_op_)(image_batch); } From 23aed21c608f0e7b5e85e76fbf800f7084594193 Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng Date: Mon, 27 Feb 2023 10:55:53 +0800 Subject: [PATCH 9/9] update --- fastdeploy/vision/ocr/ppocr/det_preprocessor.cc | 1 - fastdeploy/vision/ocr/ppocr/det_preprocessor.h | 3 --- 2 files changed, 4 deletions(-) diff --git a/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc b/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc index 4937008198..06f47b6ef9 100644 --- a/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc +++ b/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc @@ -58,7 +58,6 @@ DBDetectorPreprocessor::DBDetectorPreprocessor() { std::vector mean = {0.485f, 0.456f, 0.406f}; std::vector std = {0.229f, 0.224f, 0.225f}; bool is_scale = true; - normalize_permute_op_ = std::make_shared(mean, std, is_scale); } diff --git a/fastdeploy/vision/ocr/ppocr/det_preprocessor.h b/fastdeploy/vision/ocr/ppocr/det_preprocessor.h index 819b123fa2..32ef80011d 100644 --- a/fastdeploy/vision/ocr/ppocr/det_preprocessor.h +++ b/fastdeploy/vision/ocr/ppocr/det_preprocessor.h @@ -94,9 +94,6 @@ class FASTDEPLOY_DECL DBDetectorPreprocessor : public ProcessorManager { std::shared_ptr resize_op_; std::shared_ptr pad_op_; std::shared_ptr normalize_permute_op_; - std::vector mean_ = {0.485f, 0.456f, 0.406f}; - std::vector scale_ = {0.229f, 0.224f, 0.225f}; - bool is_scale_ = true; std::vector det_image_shape_ = {3, 960, 960}; bool static_shape_infer_ = false; std::array OcrDetectorGetInfo(FDMat* img, int max_size_len);