diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index e45e0c9520..f98d172a63 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -30,7 +30,7 @@ jobs: - name: Check docstring coverage run: | pip install interrogate - interrogate -v --ignore-init-method --ignore-module --ignore-private --ignore-nested-functions --ignore-nested-classes --fail-under 95 mmdeploy + interrogate -v --ignore-init-method --ignore-module --ignore-private --ignore-nested-functions --ignore-nested-classes --fail-under 80 mmdeploy - name: Check pylint score run: | pip install pylint diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1540e469de..432a9fc627 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,11 +8,11 @@ repos: hooks: - id: isort - repo: https://github.com/pre-commit/mirrors-yapf - rev: v0.30.0 + rev: v0.32.0 hooks: - id: yapf - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.1.0 + rev: v4.1.0 hooks: - id: trailing-whitespace - id: check-yaml @@ -42,7 +42,7 @@ repos: args: ["--skip=third_party/*,*.proto"] - repo: https://github.com/myint/docformatter - rev: v1.3.1 + rev: v1.4 hooks: - id: docformatter args: ["--in-place", "--wrap-descriptions", "79"] diff --git a/CMakeLists.txt b/CMakeLists.txt index c5b93d4a30..004b94d609 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,6 +82,13 @@ if (MMDEPLOY_BUILD_SDK) FILE MMDeployTargets.cmake DESTINATION lib/cmake/MMDeploy) + # append backend deps + mmdeploy_add_deps(trt BACKENDS ${MMDEPLOY_TARGET_BACKENDS} DEPS TENSORRT CUDNN) + mmdeploy_add_deps(ort BACKENDS ${MMDEPLOY_TARGET_BACKENDS} DEPS ONNXRUNTIME) + mmdeploy_add_deps(ncnn BACKENDS ${MMDEPLOY_TARGET_BACKENDS} DEPS ncnn) + mmdeploy_add_deps(openvino BACKENDS ${MMDEPLOY_TARGET_BACKENDS} DEPS InferenceEngine) + mmdeploy_add_deps(pplnn BACKENDS ${MMDEPLOY_TARGET_BACKENDS} DEPS pplnn) + include(CMakePackageConfigHelpers) # generate the config file that is includes the exports configure_package_config_file(${CMAKE_SOURCE_DIR}/cmake/MMDeployConfig.cmake.in @@ -104,6 +111,10 @@ if (MMDEPLOY_BUILD_SDK) ${CMAKE_CURRENT_SOURCE_DIR}/cmake/loader.cpp.in DESTINATION lib/cmake/MMDeploy ) + install(DIRECTORY + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules + DESTINATION lib/cmake/MMDeploy + ) install(DIRECTORY ${CMAKE_SOURCE_DIR}/demo/csrc/ DESTINATION example) endif () diff --git a/cmake/MMDeploy.cmake b/cmake/MMDeploy.cmake index 086b45681f..4c67e8f5ac 100644 --- a/cmake/MMDeploy.cmake +++ b/cmake/MMDeploy.cmake @@ -149,3 +149,14 @@ function (mmdeploy_load_dynamic NAME) -Wl,--as-needed) endif () endfunction () + +macro(mmdeploy_add_deps backend) + set(multiValueArgs BACKENDS DEPS) + cmake_parse_arguments(INFO "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + set(has_backend OFF) + if (${backend} IN_LIST INFO_BACKENDS) + foreach(pkg IN LISTS INFO_DEPS) + set(${pkg}_DEPENDENCY "find_package(${pkg} REQUIRED)") + endforeach() + endif() +endmacro() diff --git a/cmake/MMDeployConfig.cmake.in b/cmake/MMDeployConfig.cmake.in index 4bd05489e4..3bf75f7e65 100644 --- a/cmake/MMDeployConfig.cmake.in +++ b/cmake/MMDeployConfig.cmake.in @@ -12,12 +12,28 @@ set(MMDEPLOY_BUILD_SHARED @BUILD_SHARED_LIBS@) if (NOT MMDEPLOY_BUILD_SHARED) if ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES) + find_package(CUDA REQUIRED) + if(MSVC) + set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc.exe) + else() + set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc) + endif() set(CMAKE_CUDA_RUNTIME_LIBRARY Shared) enable_language(CUDA) find_package(pplcv REQUIRED) endif () endif () +set(MMDEPLOY_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/modules") +list(APPEND CMAKE_MODULE_PATH ${MMDEPLOY_MODULE_PATH}) +@TENSORRT_DEPENDENCY@ +@CUDNN_DEPENDENCY@ +@ONNXRUNTIME_DEPENDENCY@ +@ncnn_DEPENDENCY@ +@InferenceEngine_DEPENDENCY@ +@pplnn_DEPENDENCY@ +list(REMOVE_ITEM CMAKE_MODULE_PATH ${MMDEPLOY_MODULE_PATH}) + find_package(spdlog REQUIRED) find_package(OpenCV REQUIRED) diff --git a/cmake/modules/FindCUDNN.cmake b/cmake/modules/FindCUDNN.cmake new file mode 100644 index 0000000000..3f3f9b893a --- /dev/null +++ b/cmake/modules/FindCUDNN.cmake @@ -0,0 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +if (NOT DEFINED CUDNN_DIR) + set(CUDNN_DIR $ENV{CUDNN_DIR}) +endif () + +find_path( + CUDNN_INCLUDE_DIR cudnn.h + HINTS ${CUDNN_DIR} ${CUDA_TOOLKIT_ROOT_DIR} + PATH_SUFFIXES include) + +find_library( + CUDNN_LIBRARY_CUDNN_PATH cudnn + HINTS ${CUDNN_DIR} ${CUDA_TOOLKIT_ROOT_DIR} + PATH_SUFFIXES lib lib64 lib/x64) + +if (NOT (CUDNN_INCLUDE_DIR AND CUDNN_LIBRARY_CUDNN_PATH)) + message(FATAL_ERROR "Couldn't find cuDNN in CUDNN_DIR: ${CUDNN_DIR}, " + "or in CUDA_TOOLKIT_ROOT_DIR: ${CUDA_TOOLKIT_ROOT_DIR}, " + "please check if the path is correct.") +endif() + +add_library(cudnn SHARED IMPORTED) +set_property(TARGET cudnn APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +if (MSVC) + set_target_properties(cudnn PROPERTIES + IMPORTED_IMPLIB_RELEASE ${CUDNN_LIBRARY_CUDNN_PATH} + INTERFACE_INCLUDE_DIRECTORIES ${CUDNN_INCLUDE_DIR} + ) + +else() + set_target_properties(cudnn PROPERTIES + IMPORTED_LOCATION_RELEASE ${CUDNN_LIBRARY_CUDNN_PATH} + INTERFACE_INCLUDE_DIRECTORIES ${CUDNN_INCLUDE_DIR} + ) +endif() diff --git a/cmake/modules/FindONNXRUNTIME.cmake b/cmake/modules/FindONNXRUNTIME.cmake new file mode 100644 index 0000000000..63ea176595 --- /dev/null +++ b/cmake/modules/FindONNXRUNTIME.cmake @@ -0,0 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +if (NOT DEFINED ONNXRUNTIME_DIR) + set(ONNXRUNTIME_DIR $ENV{ONNXRUNTIME_DIR}) +endif () +if (NOT ONNXRUNTIME_DIR) + message(FATAL_ERROR "Please set ONNXRUNTIME_DIR with cmake -D option.") +endif() + +find_path( + ONNXRUNTIME_INCLUDE_DIR onnxruntime_cxx_api.h + HINTS ${ONNXRUNTIME_DIR} + PATH_SUFFIXES include) +find_library( + ONNXRUNTIME_LIBRARY_ONNXRUNTIME_PATH onnxruntime + HINTS ${ONNXRUNTIME_DIR} + PATH_SUFFIXES lib lib64 lib/x64) +if (NOT (ONNXRUNTIME_INCLUDE_DIR AND ONNXRUNTIME_LIBRARY_ONNXRUNTIME_PATH)) + message(FATAL_ERROR "Couldn't find onnxruntime in ONNXRUNTIME_DIR: " + "${ONNXRUNTIME_DIR}, please check if the path is correct.") +endif() + +add_library(onnxruntime SHARED IMPORTED) +set_property(TARGET onnxruntime APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +if (MSVC) + set_target_properties(onnxruntime PROPERTIES + IMPORTED_IMPLIB_RELEASE ${ONNXRUNTIME_LIBRARY_ONNXRUNTIME_PATH} + INTERFACE_INCLUDE_DIRECTORIES ${ONNXRUNTIME_INCLUDE_DIR} + ) + +else() + set_target_properties(onnxruntime PROPERTIES + IMPORTED_LOCATION_RELEASE ${ONNXRUNTIME_LIBRARY_ONNXRUNTIME_PATH} + INTERFACE_INCLUDE_DIRECTORIES ${ONNXRUNTIME_INCLUDE_DIR} + ) +endif() diff --git a/cmake/modules/FindTENSORRT.cmake b/cmake/modules/FindTENSORRT.cmake new file mode 100644 index 0000000000..0786413e79 --- /dev/null +++ b/cmake/modules/FindTENSORRT.cmake @@ -0,0 +1,49 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +if (NOT DEFINED TENSORRT_DIR) + set(TENSORRT_DIR $ENV{TENSORRT_DIR}) +endif () +if (NOT TENSORRT_DIR) + message(FATAL_ERROR "Please set TENSORRT_DIR with cmake -D option.") +endif() + +find_path( + TENSORRT_INCLUDE_DIR NvInfer.h + HINTS ${TENSORRT_DIR} + PATH_SUFFIXES include) + +if (NOT TENSORRT_INCLUDE_DIR) + message(FATAL_ERROR "Cannot find TensorRT header NvInfer.h, " + "please check if the path is correct") +endif () + +set(__TENSORRT_LIB_COMPONENTS nvinfer;nvinfer_plugin) +foreach(__component ${__TENSORRT_LIB_COMPONENTS}) + find_library( + __component_path ${__component} + HINTS ${TENSORRT_DIR} + PATH_SUFFIXES lib lib64 lib/x64) + if (NOT __component_path) + message(FATAL_ERROR "Cannot find TensorRT lib ${__component}, " + "please check if the path is correct") + endif() + + add_library(${__component} SHARED IMPORTED) + set_property(TARGET ${__component} APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) + if (MSVC) + set_target_properties( + ${__component} PROPERTIES + IMPORTED_IMPLIB_RELEASE ${__component_path} + INTERFACE_INCLUDE_DIRECTORIES ${TENSORRT_INCLUDE_DIR} + ) + else() + set_target_properties( + ${__component} PROPERTIES + IMPORTED_LOCATION_RELEASE ${__component_path} + INTERFACE_INCLUDE_DIRECTORIES ${TENSORRT_INCLUDE_DIR} + ) + endif() + unset(__component_path CACHE) +endforeach() + +set(TENSORRT_LIBS ${__TENSORRT_LIB_COMPONENTS}) diff --git a/cmake/tensorrt.cmake b/cmake/tensorrt.cmake index 6bfd99e969..af9f6e226b 100644 --- a/cmake/tensorrt.cmake +++ b/cmake/tensorrt.cmake @@ -1,5 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. include(${CMAKE_SOURCE_DIR}/cmake/cuda.cmake) +include(${CMAKE_SOURCE_DIR}/cmake/modules/FindTENSORRT.cmake) +include(${CMAKE_SOURCE_DIR}/cmake/modules/FindCUDNN.cmake) find_path( TENSORRT_INCLUDE_DIR NvInfer.h HINTS ${TENSORRT_DIR} ${CUDA_TOOLKIT_ROOT_DIR} diff --git a/configs/_base_/backends/torchscript.py b/configs/_base_/backends/torchscript.py new file mode 100644 index 0000000000..754fe488ac --- /dev/null +++ b/configs/_base_/backends/torchscript.py @@ -0,0 +1 @@ +backend_config = dict(type='torchscript') diff --git a/configs/_base_/torchscript_config.py b/configs/_base_/torchscript_config.py new file mode 100644 index 0000000000..b16a2e871d --- /dev/null +++ b/configs/_base_/torchscript_config.py @@ -0,0 +1,6 @@ +ir_config = dict( + type='torchscript', + save_file='end2end.pt', + input_names=['input'], + output_names=['output'], + input_shape=None) diff --git a/configs/mmcls/classification_torchscript.py b/configs/mmcls/classification_torchscript.py new file mode 100644 index 0000000000..559fd25c38 --- /dev/null +++ b/configs/mmcls/classification_torchscript.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/torchscript_config.py', '../_base_/backends/torchscript.py' +] + +ir_config = dict(input_shape=None) +codebase_config = dict(type='mmcls', task='Classification') diff --git a/configs/mmdet/_base_/base_instance-seg_torchscript.py b/configs/mmdet/_base_/base_instance-seg_torchscript.py new file mode 100644 index 0000000000..68eee07e72 --- /dev/null +++ b/configs/mmdet/_base_/base_instance-seg_torchscript.py @@ -0,0 +1,4 @@ +_base_ = ['./base_torchscript.py'] + +ir_config = dict(output_names=['dets', 'labels', 'masks']) +codebase_config = dict(post_processing=dict(export_postprocess_mask=False)) diff --git a/configs/mmdet/_base_/base_torchscript.py b/configs/mmdet/_base_/base_torchscript.py new file mode 100644 index 0000000000..7e0ecc8ae5 --- /dev/null +++ b/configs/mmdet/_base_/base_torchscript.py @@ -0,0 +1,16 @@ +_base_ = ['../../_base_/torchscript_config.py'] + +ir_config = dict(output_names=['dets', 'labels']) +codebase_config = dict( + type='mmdet', + task='ObjectDetection', + model_type='end2end', + post_processing=dict( + score_threshold=0.05, + confidence_threshold=0.005, # for YOLOv3 + iou_threshold=0.5, + max_output_boxes_per_class=200, + pre_top_k=5000, + keep_top_k=100, + background_label_id=-1, + )) diff --git a/configs/mmdet/detection/detection_torchscript.py b/configs/mmdet/detection/detection_torchscript.py new file mode 100644 index 0000000000..69bfbd9c7f --- /dev/null +++ b/configs/mmdet/detection/detection_torchscript.py @@ -0,0 +1,3 @@ +_base_ = [ + '../_base_/base_torchscript.py', '../../_base_/backends/torchscript.py' +] diff --git a/configs/mmdet/instance-seg/instance-seg_torchscript.py b/configs/mmdet/instance-seg/instance-seg_torchscript.py new file mode 100644 index 0000000000..ba8ad7e041 --- /dev/null +++ b/configs/mmdet/instance-seg/instance-seg_torchscript.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/base_instance-seg_torchscript.py', + '../../_base_/backends/torchscript.py' +] diff --git a/configs/mmdet3d/voxel-detection/voxel-detection_dynamic.py b/configs/mmdet3d/voxel-detection/voxel-detection_dynamic.py new file mode 100644 index 0000000000..1a2402e03a --- /dev/null +++ b/configs/mmdet3d/voxel-detection/voxel-detection_dynamic.py @@ -0,0 +1,15 @@ +_base_ = ['./voxel-detection_static.py'] + +onnx_config = dict( + dynamic_axes={ + 'voxels': { + 0: 'voxels_num', + }, + 'num_points': { + 0: 'voxels_num', + }, + 'coors': { + 0: 'voxels_num', + } + }, + input_shape=None) diff --git a/configs/mmdet3d/voxel-detection/voxel-detection_onnxruntime_dynamic.py b/configs/mmdet3d/voxel-detection/voxel-detection_onnxruntime_dynamic.py new file mode 100644 index 0000000000..705d2c32e7 --- /dev/null +++ b/configs/mmdet3d/voxel-detection/voxel-detection_onnxruntime_dynamic.py @@ -0,0 +1,3 @@ +_base_ = [ + './voxel-detection_dynamic.py', '../../_base_/backends/onnxruntime.py' +] diff --git a/configs/mmdet3d/voxel-detection/voxel-detection_openvino_dynamic_kitti.py b/configs/mmdet3d/voxel-detection/voxel-detection_openvino_dynamic_kitti.py new file mode 100644 index 0000000000..2cfc965763 --- /dev/null +++ b/configs/mmdet3d/voxel-detection/voxel-detection_openvino_dynamic_kitti.py @@ -0,0 +1,9 @@ +_base_ = ['./voxel-detection_dynamic.py', '../../_base_/backends/openvino.py'] + +onnx_config = dict(input_shape=None) + +backend_config = dict(model_inputs=[ + dict( + opt_shapes=dict( + voxels=[5000, 32, 4], num_points=[5000], coors=[5000, 4])) +]) diff --git a/configs/mmdet3d/voxel-detection/voxel-detection_openvino_dynamic_nus.py b/configs/mmdet3d/voxel-detection/voxel-detection_openvino_dynamic_nus.py new file mode 100644 index 0000000000..70ef925b6e --- /dev/null +++ b/configs/mmdet3d/voxel-detection/voxel-detection_openvino_dynamic_nus.py @@ -0,0 +1,9 @@ +_base_ = ['./voxel-detection_dynamic.py', '../../_base_/backends/openvino.py'] + +onnx_config = dict(input_shape=None) + +backend_config = dict(model_inputs=[ + dict( + opt_shapes=dict( + voxels=[20000, 20, 5], num_points=[20000], coors=[20000, 4])) +]) diff --git a/configs/mmdet3d/voxel-detection/voxel-detection_static.py b/configs/mmdet3d/voxel-detection/voxel-detection_static.py new file mode 100644 index 0000000000..406c16513d --- /dev/null +++ b/configs/mmdet3d/voxel-detection/voxel-detection_static.py @@ -0,0 +1,6 @@ +_base_ = ['../../_base_/onnx_config.py'] +codebase_config = dict( + type='mmdet3d', task='VoxelDetection', model_type='end2end') +onnx_config = dict( + input_names=['voxels', 'num_points', 'coors'], + output_names=['scores', 'bbox_preds', 'dir_scores']) diff --git a/configs/mmdet3d/voxel-detection/voxel-detection_tensorrt_dynamic-kitti.py b/configs/mmdet3d/voxel-detection/voxel-detection_tensorrt_dynamic-kitti.py new file mode 100644 index 0000000000..4286e12c40 --- /dev/null +++ b/configs/mmdet3d/voxel-detection/voxel-detection_tensorrt_dynamic-kitti.py @@ -0,0 +1,18 @@ +_base_ = ['./voxel-detection_dynamic.py', '../../_base_/backends/tensorrt.py'] +backend_config = dict( + common_config=dict(max_workspace_size=1 << 30), + model_inputs=[ + dict( + input_shapes=dict( + voxels=dict( + min_shape=[2000, 32, 4], + opt_shape=[5000, 32, 4], + max_shape=[9000, 32, 4]), + num_points=dict( + min_shape=[2000], opt_shape=[5000], max_shape=[9000]), + coors=dict( + min_shape=[2000, 4], + opt_shape=[5000, 4], + max_shape=[9000, 4]), + )) + ]) diff --git a/configs/mmdet3d/voxel-detection/voxel-detection_tensorrt_dynamic-nus.py b/configs/mmdet3d/voxel-detection/voxel-detection_tensorrt_dynamic-nus.py new file mode 100644 index 0000000000..7ab7ba8245 --- /dev/null +++ b/configs/mmdet3d/voxel-detection/voxel-detection_tensorrt_dynamic-nus.py @@ -0,0 +1,18 @@ +_base_ = ['./voxel-detection_dynamic.py', '../../_base_/backends/tensorrt.py'] +backend_config = dict( + common_config=dict(max_workspace_size=1 << 30), + model_inputs=[ + dict( + input_shapes=dict( + voxels=dict( + min_shape=[5000, 20, 5], + opt_shape=[20000, 20, 5], + max_shape=[30000, 20, 5]), + num_points=dict( + min_shape=[5000], opt_shape=[20000], max_shape=[30000]), + coors=dict( + min_shape=[5000, 4], + opt_shape=[20000, 4], + max_shape=[30000, 4]), + )) + ]) diff --git a/configs/mmedit/super-resolution/super-resolution_torchscript.py b/configs/mmedit/super-resolution/super-resolution_torchscript.py new file mode 100644 index 0000000000..8ebef20e34 --- /dev/null +++ b/configs/mmedit/super-resolution/super-resolution_torchscript.py @@ -0,0 +1,7 @@ +_base_ = [ + '../../_base_/torchscript_config.py', + '../../_base_/backends/torchscript.py' +] + +ir_config = dict(input_shape=None) +codebase_config = dict(type='mmedit', task='SuperResolution') diff --git a/configs/mmocr/text-detection/text-detection_torchscript.py b/configs/mmocr/text-detection/text-detection_torchscript.py new file mode 100644 index 0000000000..48a27d44eb --- /dev/null +++ b/configs/mmocr/text-detection/text-detection_torchscript.py @@ -0,0 +1,7 @@ +_base_ = [ + '../../_base_/torchscript_config.py', + '../../_base_/backends/torchscript.py' +] + +ir_config = dict(input_shape=None) +codebase_config = dict(type='mmocr', task='TextDetection') diff --git a/configs/mmocr/text-recognition/text-recognition_torchscript.py b/configs/mmocr/text-recognition/text-recognition_torchscript.py new file mode 100644 index 0000000000..14e9112e49 --- /dev/null +++ b/configs/mmocr/text-recognition/text-recognition_torchscript.py @@ -0,0 +1,7 @@ +_base_ = [ + '../../_base_/torchscript_config.py', + '../../_base_/backends/torchscript.py' +] + +ir_config = dict(input_shape=None) +codebase_config = dict(type='mmocr', task='TextRecognition') diff --git a/configs/mmpose/pose-detection_sdk_static.py b/configs/mmpose/pose-detection_sdk_static.py new file mode 100644 index 0000000000..b93c858044 --- /dev/null +++ b/configs/mmpose/pose-detection_sdk_static.py @@ -0,0 +1,14 @@ +_base_ = ['./pose-detection_static.py', '../_base_/backends/sdk.py'] + +codebase_config = dict(model_type='sdk') + +backend_config = dict(pipeline=[ + dict(type='LoadImageFromFile', channel_order='bgr'), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'image_file', 'center', 'scale', 'rotation', 'bbox_score', + 'flip_pairs' + ]) +]) diff --git a/configs/mmseg/segmentation_ncnn_static.py b/configs/mmseg/segmentation_ncnn_static.py deleted file mode 100644 index 985542b5bf..0000000000 --- a/configs/mmseg/segmentation_ncnn_static.py +++ /dev/null @@ -1,3 +0,0 @@ -_base_ = ['./segmentation_static.py', '../_base_/backends/ncnn.py'] - -onnx_config = dict(input_shape=None) diff --git a/configs/mmseg/segmentation_onnxruntime_static-1024x2048.py b/configs/mmseg/segmentation_onnxruntime_static-1024x2048.py new file mode 100644 index 0000000000..2587a015f4 --- /dev/null +++ b/configs/mmseg/segmentation_onnxruntime_static-1024x2048.py @@ -0,0 +1,3 @@ +_base_ = ['./segmentation_static.py', '../_base_/backends/onnxruntime.py'] + +onnx_config = dict(input_shape=[2048, 1024]) diff --git a/configs/mmseg/segmentation_onnxruntime_static.py b/configs/mmseg/segmentation_onnxruntime_static-512x512.py similarity index 63% rename from configs/mmseg/segmentation_onnxruntime_static.py rename to configs/mmseg/segmentation_onnxruntime_static-512x512.py index 802eb08a4d..03d919d7b0 100644 --- a/configs/mmseg/segmentation_onnxruntime_static.py +++ b/configs/mmseg/segmentation_onnxruntime_static-512x512.py @@ -1,3 +1,3 @@ _base_ = ['./segmentation_static.py', '../_base_/backends/onnxruntime.py'] -onnx_config = dict(input_shape=None) +onnx_config = dict(input_shape=[512, 512]) diff --git a/configs/mmseg/segmentation_openvino_static-1024x2048.py b/configs/mmseg/segmentation_openvino_static-1024x2048.py new file mode 100644 index 0000000000..472e923e61 --- /dev/null +++ b/configs/mmseg/segmentation_openvino_static-1024x2048.py @@ -0,0 +1,4 @@ +_base_ = ['./segmentation_static.py', '../_base_/backends/openvino.py'] +onnx_config = dict(input_shape=[2048, 1024]) +backend_config = dict( + model_inputs=[dict(opt_shapes=dict(input=[1, 3, 1024, 2048]))]) diff --git a/configs/mmseg/segmentation_openvino_static-512x512.py b/configs/mmseg/segmentation_openvino_static-512x512.py new file mode 100644 index 0000000000..ef974335e4 --- /dev/null +++ b/configs/mmseg/segmentation_openvino_static-512x512.py @@ -0,0 +1,4 @@ +_base_ = ['./segmentation_static.py', '../_base_/backends/openvino.py'] +onnx_config = dict(input_shape=[512, 512]) +backend_config = dict( + model_inputs=[dict(opt_shapes=dict(input=[1, 3, 512, 512]))]) diff --git a/configs/mmseg/segmentation_tensorrt-fp16_static-1024x1024.py b/configs/mmseg/segmentation_tensorrt-fp16_static-1024x1024.py new file mode 100644 index 0000000000..dc887a10d5 --- /dev/null +++ b/configs/mmseg/segmentation_tensorrt-fp16_static-1024x1024.py @@ -0,0 +1,13 @@ +_base_ = ['./segmentation_static.py', '../_base_/backends/tensorrt-fp16.py'] + +onnx_config = dict(input_shape=[1024, 1024]) +backend_config = dict( + common_config=dict(max_workspace_size=1 << 30), + model_inputs=[ + dict( + input_shapes=dict( + input=dict( + min_shape=[1, 3, 1024, 1024], + opt_shape=[1, 3, 1024, 1024], + max_shape=[1, 3, 1024, 1024]))) + ]) diff --git a/configs/mmseg/segmentation_tensorrt-fp16_static-512x512.py b/configs/mmseg/segmentation_tensorrt-fp16_static-512x512.py new file mode 100644 index 0000000000..f6d0d8bb5f --- /dev/null +++ b/configs/mmseg/segmentation_tensorrt-fp16_static-512x512.py @@ -0,0 +1,13 @@ +_base_ = ['./segmentation_static.py', '../_base_/backends/tensorrt-fp16.py'] + +onnx_config = dict(input_shape=[512, 512]) +backend_config = dict( + common_config=dict(max_workspace_size=1 << 30), + model_inputs=[ + dict( + input_shapes=dict( + input=dict( + min_shape=[1, 3, 512, 512], + opt_shape=[1, 3, 512, 512], + max_shape=[1, 3, 512, 512]))) + ]) diff --git a/configs/mmseg/segmentation_tensorrt-int8_static-1024x1024.py b/configs/mmseg/segmentation_tensorrt-int8_static-1024x1024.py new file mode 100644 index 0000000000..b68ac61872 --- /dev/null +++ b/configs/mmseg/segmentation_tensorrt-int8_static-1024x1024.py @@ -0,0 +1,13 @@ +_base_ = ['./segmentation_static.py', '../_base_/backends/tensorrt-int8.py'] + +onnx_config = dict(input_shape=[1024, 1024]) +backend_config = dict( + common_config=dict(max_workspace_size=1 << 30), + model_inputs=[ + dict( + input_shapes=dict( + input=dict( + min_shape=[1, 3, 1024, 1024], + opt_shape=[1, 3, 1024, 1024], + max_shape=[1, 3, 1024, 1024]))) + ]) diff --git a/configs/mmseg/segmentation_tensorrt-int8_static-512x512.py b/configs/mmseg/segmentation_tensorrt-int8_static-512x512.py new file mode 100644 index 0000000000..125c9c1196 --- /dev/null +++ b/configs/mmseg/segmentation_tensorrt-int8_static-512x512.py @@ -0,0 +1,13 @@ +_base_ = ['./segmentation_static.py', '../_base_/backends/tensorrt-int8.py'] + +onnx_config = dict(input_shape=[512, 512]) +backend_config = dict( + common_config=dict(max_workspace_size=1 << 30), + model_inputs=[ + dict( + input_shapes=dict( + input=dict( + min_shape=[1, 3, 512, 512], + opt_shape=[1, 3, 512, 512], + max_shape=[1, 3, 512, 512]))) + ]) diff --git a/configs/mmseg/segmentation_tensorrt_static-1024x1024.py b/configs/mmseg/segmentation_tensorrt_static-1024x1024.py new file mode 100644 index 0000000000..949eec7579 --- /dev/null +++ b/configs/mmseg/segmentation_tensorrt_static-1024x1024.py @@ -0,0 +1,13 @@ +_base_ = ['./segmentation_static.py', '../_base_/backends/tensorrt.py'] + +onnx_config = dict(input_shape=[1024, 1024]) +backend_config = dict( + common_config=dict(max_workspace_size=1 << 30), + model_inputs=[ + dict( + input_shapes=dict( + input=dict( + min_shape=[1, 3, 1024, 1024], + opt_shape=[1, 3, 1024, 1024], + max_shape=[1, 3, 1024, 1024]))) + ]) diff --git a/configs/mmseg/segmentation_tensorrt_static-512x512.py b/configs/mmseg/segmentation_tensorrt_static-512x512.py new file mode 100644 index 0000000000..1fa5ef6695 --- /dev/null +++ b/configs/mmseg/segmentation_tensorrt_static-512x512.py @@ -0,0 +1,13 @@ +_base_ = ['./segmentation_static.py', '../_base_/backends/tensorrt.py'] + +onnx_config = dict(input_shape=[512, 512]) +backend_config = dict( + common_config=dict(max_workspace_size=1 << 30), + model_inputs=[ + dict( + input_shapes=dict( + input=dict( + min_shape=[1, 3, 512, 512], + opt_shape=[1, 3, 512, 512], + max_shape=[1, 3, 512, 512]))) + ]) diff --git a/configs/mmseg/segmentation_torchscript.py b/configs/mmseg/segmentation_torchscript.py new file mode 100644 index 0000000000..665f308ecc --- /dev/null +++ b/configs/mmseg/segmentation_torchscript.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/torchscript_config.py', '../_base_/backends/torchscript.py' +] + +ir_config = dict(input_shape=None) +codebase_config = dict(type='mmseg', task='Segmentation') diff --git a/csrc/apis/c/CMakeLists.txt b/csrc/apis/c/CMakeLists.txt index f1809995bb..5709e0c57a 100644 --- a/csrc/apis/c/CMakeLists.txt +++ b/csrc/apis/c/CMakeLists.txt @@ -5,7 +5,7 @@ project(capis) include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake) if ("all" IN_LIST MMDEPLOY_CODEBASES) - set(TASK_LIST "classifier;detector;segmentor;text_detector;text_recognizer;restorer;model") + set(TASK_LIST "classifier;detector;segmentor;text_detector;text_recognizer;pose_detector;restorer;model") else () set(TASK_LIST "model") if ("mmcls" IN_LIST MMDEPLOY_CODEBASES) @@ -24,6 +24,9 @@ else () list(APPEND TASK_LIST "text_detector") list(APPEND TASK_LIST "text_recognizer") endif () + if ("mmpose" IN_LIST MMDEPLOY_CODEBASES) + list(APPEND TASK_LIST "pose_detector") + endif () endif () foreach (TASK ${TASK_LIST}) diff --git a/csrc/apis/c/pose_detector.cpp b/csrc/apis/c/pose_detector.cpp new file mode 100644 index 0000000000..6c5ef426ef --- /dev/null +++ b/csrc/apis/c/pose_detector.cpp @@ -0,0 +1,190 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include "pose_detector.h" + +#include + +#include "codebase/mmpose/mmpose.h" +#include "core/device.h" +#include "core/graph.h" +#include "core/mat.h" +#include "core/tensor.h" +#include "core/utils/formatter.h" +#include "handle.h" + +using namespace std; +using namespace mmdeploy; + +namespace { + +const Value& config_template() { + // clang-format off + static Value v { + { + "pipeline", { + {"input", {"img_with_boxes"}}, + {"output", {"key_points_unflat"}}, + { + "tasks", { + { + {"name", "flatten"}, + {"type", "Flatten"}, + {"input", {"img_with_boxes"}}, + {"output", {"patch_flat", "patch_index"}}, + }, + { + {"name", "pose-detector"}, + {"type", "Inference"}, + {"params", {{"model", "TBD"},{"batch_size", 1}}}, + {"input", {"patch_flat"}}, + {"output", {"key_points"}} + }, + { + {"name", "unflatten"}, + {"type", "Unflatten"}, + {"input", {"key_points", "patch_index"}}, + {"output", {"key_points_unflat"}}, + } + } + } + } + } + }; + // clang-format on + return v; +} + +template +int mmdeploy_pose_detector_create_impl(ModelType&& m, const char* device_name, int device_id, + mm_handle_t* handle) { + try { + auto value = config_template(); + value["pipeline"]["tasks"][1]["params"]["model"] = std::forward(m); + + auto pose_estimator = std::make_unique(device_name, device_id, std::move(value)); + + *handle = pose_estimator.release(); + return MM_SUCCESS; + + } catch (const std::exception& e) { + MMDEPLOY_ERROR("exception caught: {}", e.what()); + } catch (...) { + MMDEPLOY_ERROR("unknown exception caught"); + } + return MM_E_FAIL; +} + +} // namespace + +int mmdeploy_pose_detector_create(mm_model_t model, const char* device_name, int device_id, + mm_handle_t* handle) { + return mmdeploy_pose_detector_create_impl(*static_cast(model), device_name, device_id, + handle); +} + +int mmdeploy_pose_detector_create_by_path(const char* model_path, const char* device_name, + int device_id, mm_handle_t* handle) { + return mmdeploy_pose_detector_create_impl(model_path, device_name, device_id, handle); +} + +int mmdeploy_pose_detector_apply(mm_handle_t handle, const mm_mat_t* mats, int mat_count, + mm_pose_detect_t** results) { + return mmdeploy_pose_detector_apply_bbox(handle, mats, mat_count, nullptr, nullptr, results); +} + +int mmdeploy_pose_detector_apply_bbox(mm_handle_t handle, const mm_mat_t* mats, int mat_count, + const mm_rect_t* bboxes, const int* bbox_count, + mm_pose_detect_t** results) { + if (handle == nullptr || mats == nullptr || mat_count == 0 || results == nullptr) { + return MM_E_INVALID_ARG; + } + + try { + auto pose_detector = static_cast(handle); + Value input{Value::kArray}; + auto result_count = 0; + for (int i = 0; i < mat_count; ++i) { + mmdeploy::Mat _mat{mats[i].height, mats[i].width, PixelFormat(mats[i].format), + DataType(mats->type), mats[i].data, Device{"cpu"}}; + + Value img_with_boxes; + if (bboxes && bbox_count) { + for (int j = 0; j < bbox_count[i]; ++j) { + Value obj; + obj["ori_img"] = _mat; + float width = bboxes[j].right - bboxes[j].left + 1; + float height = bboxes[j].bottom - bboxes[j].top + 1; + obj["box"] = {bboxes[j].left, bboxes[j].top, width, height, 1.0}; + obj["rotation"] = 0.f; + img_with_boxes.push_back(obj); + } + bboxes += bbox_count[i]; + result_count += bbox_count[i]; + } else { + // inference whole image + Value obj; + obj["ori_img"] = _mat; + obj["box"] = {0, 0, _mat.width(), _mat.height(), 1.0}; + obj["rotation"] = 0.f; + img_with_boxes.push_back(obj); + result_count += 1; + } + input.front().push_back(img_with_boxes); + } + + auto output = pose_detector->Run(std::move(input)).value().front(); + + auto pose_outputs = from_value>>(output); + + std::vector counts; + if (bboxes && bbox_count) { + counts = std::vector(bbox_count, bbox_count + mat_count); + } else { + counts.resize(mat_count, 1); + } + std::vector offsets{0}; + std::partial_sum(begin(counts), end(counts), back_inserter(offsets)); + + auto deleter = [&](mm_pose_detect_t* p) { + mmdeploy_pose_detector_release_result(p, offsets.back()); + }; + + std::unique_ptr _results( + new mm_pose_detect_t[result_count]{}, deleter); + + for (int i = 0; i < mat_count; ++i) { + auto& pose_output = pose_outputs[i]; + for (int j = 0; j < pose_output.size(); ++j) { + auto& res = _results[offsets[i] + j]; + auto& box_result = pose_output[j]; + int sz = box_result.key_points.size(); + + res.point = new mm_pointf_t[sz]; + res.score = new float[sz]; + res.length = sz; + for (int k = 0; k < sz; k++) { + res.point[k].x = box_result.key_points[k].bbox[0]; + res.point[k].y = box_result.key_points[k].bbox[1]; + res.score[k] = box_result.key_points[k].score; + } + } + } + *results = _results.release(); + return MM_SUCCESS; + + } catch (const std::exception& e) { + MMDEPLOY_ERROR("exception caught: {}", e.what()); + } catch (...) { + MMDEPLOY_ERROR("unknown exception caught"); + } + return MM_E_FAIL; +} + +void mmdeploy_pose_detector_release_result(mm_pose_detect_t* results, int count) { + for (int i = 0; i < count; ++i) { + delete[] results[i].point; + delete[] results[i].score; + } + delete[] results; +} +void mmdeploy_pose_detector_destroy(mm_handle_t handle) { delete static_cast(handle); } diff --git a/csrc/apis/c/pose_detector.h b/csrc/apis/c/pose_detector.h new file mode 100644 index 0000000000..16e3e23d26 --- /dev/null +++ b/csrc/apis/c/pose_detector.h @@ -0,0 +1,97 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +/** + * @file pose_detector.h + * @brief Interface to MMPose task + */ + +#ifndef MMDEPLOY_SRC_APIS_C_POSE_DETECTOR_H_ +#define MMDEPLOY_SRC_APIS_C_POSE_DETECTOR_H_ + +#include "common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct mm_pose_detect_t { + mm_pointf_t* point; ///< keypoint + float* score; ///< keypoint score + int length; ///< number of keypoint +} mm_pose_detect_t; + +/** + * @brief Create a pose detector instance + * @param[in] model an instance of mmpose model created by + * \ref mmdeploy_model_create_by_path or \ref mmdeploy_model_create in \ref model.h + * @param[in] device_name name of device, such as "cpu", "cuda", etc. + * @param[in] device_id id of device. + * @param[out] handle handle of the created pose detector, which must be destroyed + * by \ref mmdeploy_pose_detector_destroy + * @return status code of the operation + */ +MMDEPLOY_API int mmdeploy_pose_detector_create(mm_model_t model, const char* device_name, + int device_id, mm_handle_t* handle); + +/** + * @brief Create a pose detector instance + * @param[in] model_path path to pose detection model + * @param[in] device_name name of device, such as "cpu", "cuda", etc. + * @param[in] device_id id of device. + * @param[out] handle handle of the created pose detector, which must be destroyed + * by \ref mmdeploy_pose_detector_destroy + * @return status code of the operation + */ +MMDEPLOY_API int mmdeploy_pose_detector_create_by_path(const char* model_path, + const char* device_name, int device_id, + mm_handle_t* handle); + +/** + * @brief Apply pose detector to a batch of images with full image roi + * @param[in] handle pose detector's handle created by \ref + * mmdeploy_pose_detector_create_by_path + * @param[in] images a batch of images + * @param[in] count number of images in the batch + * @param[out] results a linear buffer contains the pose result, must be release + * by \ref mmdeploy_pose_detector_release_result + * @return status code of the operation + */ +MMDEPLOY_API int mmdeploy_pose_detector_apply(mm_handle_t handle, const mm_mat_t* mats, + int mat_count, mm_pose_detect_t** results); + +/** + * @brief Apply pose detector to a batch of images supplied with bboxes(roi) + * @param[in] handle pose detector's handle created by \ref + * mmdeploy_pose_detector_create_by_path + * @param[in] images a batch of images + * @param[in] image_count number of images in the batch + * @param[in] bboxes bounding boxes(roi) detected by mmdet + * @param[in] bbox_count number of bboxes of each \p images, must be same length as \p images + * @param[out] results a linear buffer contains the pose result, which has the same length as \p + * bboxes, must be release by \ref mmdeploy_pose_detector_release_result + * @return status code of the operation + */ +MMDEPLOY_API int mmdeploy_pose_detector_apply_bbox(mm_handle_t handle, const mm_mat_t* mats, + int mat_count, const mm_rect_t* bboxes, + const int* bbox_count, + mm_pose_detect_t** results); + +/** @brief Release result buffer returned by \ref mmdeploy_pose_detector_apply or \ref + * mmdeploy_pose_detector_apply_bbox + * @param[in] results result buffer by pose detector + * @param[in] count length of \p result + */ +MMDEPLOY_API void mmdeploy_pose_detector_release_result(mm_pose_detect_t* results, int count); + +/** + * @brief destroy pose_detector + * @param[in] handle handle of pose_detector created by \ref + * mmdeploy_pose_detector_create_by_path or \ref mmdeploy_pose_detector_create + */ +MMDEPLOY_API void mmdeploy_pose_detector_destroy(mm_handle_t handle); + +#ifdef __cplusplus +} +#endif + +#endif // MMDEPLOY_SRC_APIS_C_POSE_DETECTOR_H_ diff --git a/csrc/apis/python/CMakeLists.txt b/csrc/apis/python/CMakeLists.txt index 0730268f07..ce86ed2796 100644 --- a/csrc/apis/python/CMakeLists.txt +++ b/csrc/apis/python/CMakeLists.txt @@ -2,6 +2,10 @@ cmake_minimum_required(VERSION 3.14) project(mmdeploy_python) +if ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES) + include(${CMAKE_SOURCE_DIR}/cmake/cuda.cmake) +endif() + if (NOT TARGET pybind11) add_subdirectory(${CMAKE_SOURCE_DIR}/third_party/pybind11 pybind11) endif () @@ -20,6 +24,7 @@ mmdeploy_python_add_module(segmentor) mmdeploy_python_add_module(text_detector) mmdeploy_python_add_module(text_recognizer) mmdeploy_python_add_module(restorer) +mmdeploy_python_add_module(pose_detector) pybind11_add_module(${PROJECT_NAME} ${MMDEPLOY_PYTHON_SRCS}) diff --git a/csrc/apis/python/pose_detector.cpp b/csrc/apis/python/pose_detector.cpp new file mode 100644 index 0000000000..36e024f1a1 --- /dev/null +++ b/csrc/apis/python/pose_detector.cpp @@ -0,0 +1,83 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include "pose_detector.h" + +#include "common.h" +#include "core/logger.h" + +namespace mmdeploy { + +class PyPoseDedector { + public: + PyPoseDedector(const char *model_path, const char *device_name, int device_id) { + MMDEPLOY_INFO("{}, {}, {}", model_path, device_name, device_id); + auto status = + mmdeploy_pose_detector_create_by_path(model_path, device_name, device_id, &handle_); + if (status != MM_SUCCESS) { + throw std::runtime_error("failed to create pose_detedtor"); + } + } + py::list Apply(const std::vector &imgs, const std::vector> &_boxes) { + std::vector mats; + std::vector boxes; + mats.reserve(imgs.size()); + for (const auto &img : imgs) { + auto mat = GetMat(img); + mats.push_back(mat); + } + for (const auto &_box : _boxes) { + mm_rect_t box = {_box[0], _box[1], _box[2], _box[3]}; + boxes.push_back(box); + } + mm_pose_detect_t *detection{}; + int num_box = boxes.size(); + auto status = mmdeploy_pose_detector_apply_bbox(handle_, mats.data(), (int)mats.size(), + boxes.data(), &num_box, &detection); + if (status != MM_SUCCESS) { + throw std::runtime_error("failed to apply pose_detector, code: " + std::to_string(status)); + } + auto output = py::list{}; + auto result = detection; + for (int i = 0; i < mats.size(); i++) { + int n_point = result->length; + auto pred = py::array_t({1, n_point, 3}); + auto dst = pred.mutable_data(); + for (int j = 0; j < n_point; j++) { + dst[0] = result->point[j].x; + dst[1] = result->point[j].y; + dst[2] = result->score[j]; + dst += 3; + } + output.append(std::move(pred)); + result++; + } + mmdeploy_pose_detector_release_result(detection, (int)mats.size()); + return output; + } + ~PyPoseDedector() { + mmdeploy_pose_detector_destroy(handle_); + handle_ = {}; + } + + private: + mm_handle_t handle_{}; +}; + +static void register_python_pose_detector(py::module &m) { + py::class_(m, "PoseDetector") + .def(py::init([](const char *model_path, const char *device_name, int device_id) { + return std::make_unique(model_path, device_name, device_id); + })) + .def("__call__", &PyPoseDedector::Apply); +} + +class PythonPoseDetectorRegisterer { + public: + PythonPoseDetectorRegisterer() { + gPythonBindings().emplace("pose_detector", register_python_pose_detector); + } +}; + +static PythonPoseDetectorRegisterer python_pose_detector_registerer; + +} // namespace mmdeploy diff --git a/csrc/backend_ops/CMakeLists.txt b/csrc/backend_ops/CMakeLists.txt index a9eac86ae4..1537bd97fd 100644 --- a/csrc/backend_ops/CMakeLists.txt +++ b/csrc/backend_ops/CMakeLists.txt @@ -30,3 +30,9 @@ if ("ncnn" IN_LIST MMDEPLOY_TARGET_BACKENDS) message(STATUS "Build NCNN custom ops") add_subdirectory(ncnn) endif () + +# build TorchScript ops +if ("torchscript" IN_LIST MMDEPLOY_TARGET_BACKENDS) + message(STATUS "Build torchsciprt custom ops") + add_subdirectory(torchscript) +endif () diff --git a/csrc/backend_ops/common/modulated_deform_conv/common_cuda_helper.cuh b/csrc/backend_ops/common/modulated_deform_conv/common_cuda_helper.cuh new file mode 100644 index 0000000000..02c57c62e6 --- /dev/null +++ b/csrc/backend_ops/common/modulated_deform_conv/common_cuda_helper.cuh @@ -0,0 +1,94 @@ +// Copyright (c) OpenMMLab. All rights reserved. +#ifndef COMMON_CUDA_HELPER +#define COMMON_CUDA_HELPER + +#include +#include + +#include + +#define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); i += blockDim.x * gridDim.x) + +#define THREADS_PER_BLOCK 512 + +#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) +inline int GET_BLOCKS(const int N) { + int optimal_block_num = DIVUP(N, THREADS_PER_BLOCK); + int max_block_num = 4096; + return std::min(optimal_block_num, max_block_num); +} + +#define cudaCheckError() \ + { \ + cudaError_t e = cudaGetLastError(); \ + if (e != cudaSuccess) { \ + printf("Cuda failure %s:%d: '%s'\n", __FILE__, __LINE__, cudaGetErrorString(e)); \ + exit(0); \ + } \ + } + +/** + * Returns a view of the original tensor with its dimensions permuted. + * + * @param[out] dst pointer to the destination tensor + * @param[in] src pointer to the source tensor + * @param[in] src_size shape of the src tensor + * @param[in] permute The desired ordering of dimensions + * @param[in] src_dim dim of src tensor + * @param[in] stream cuda stream handle + */ +template +void memcpyPermute(scalar_t* dst, const scalar_t* src, int* src_size, int* permute, int src_dim, + cudaStream_t stream = 0); + +template +cublasStatus_t cublasGemmWrap(cublasHandle_t handle, cublasOperation_t transa, + cublasOperation_t transb, int m, int n, int k, const scalar_t* alpha, + const scalar_t* A, int lda, const scalar_t* B, int ldb, + const scalar_t* beta, scalar_t* C, int ldc); + +template +__device__ scalar_t bilinear_interpolate(const scalar_t* input, const int height, const int width, + scalar_t y, scalar_t x) { + // deal with cases that inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) return 0; + + if (y <= 0) y = 0; + if (x <= 0) x = 0; + + int y_low = (int)y; + int x_low = (int)x; + int y_high; + int x_high; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (scalar_t)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (scalar_t)x_low; + } else { + x_high = x_low + 1; + } + + scalar_t ly = y - y_low; + scalar_t lx = x - x_low; + scalar_t hy = 1. - ly, hx = 1. - lx; + // do bilinear interpolation + scalar_t v1 = input[y_low * width + x_low]; + scalar_t v2 = input[y_low * width + x_high]; + scalar_t v3 = input[y_high * width + x_low]; + scalar_t v4 = input[y_high * width + x_high]; + scalar_t w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + + return val; +} + +#endif // COMMON_CUDA_HELPER diff --git a/csrc/backend_ops/common/modulated_deform_conv/modulated_deform_conv_cpu.h b/csrc/backend_ops/common/modulated_deform_conv/modulated_deform_conv_cpu.h new file mode 100644 index 0000000000..a37e243109 --- /dev/null +++ b/csrc/backend_ops/common/modulated_deform_conv/modulated_deform_conv_cpu.h @@ -0,0 +1,82 @@ +#include +#include + +template +T bilinear_interpolate_2d(const T *src, const int64_t src_h, const int64_t src_w, const T h, + const T w) { + if (h <= -1 || src_h <= h || w <= -1 || src_w <= w) { + return 0; + } + + int64_t h_low = floor(h); + int64_t w_low = floor(w); + int64_t h_high = h_low + 1; + int64_t w_high = w_low + 1; + + T lh = h - h_low; + T lw = w - w_low; + T hh = 1 - lh; + T hw = 1 - lw; + + T v1 = 0; + if (h_low >= 0 && w_low >= 0) v1 = src[h_low * src_w + w_low]; + T v2 = 0; + if (h_low >= 0 && w_high <= src_w - 1) v2 = src[h_low * src_w + w_high]; + T v3 = 0; + if (h_high <= src_h - 1 && w_low >= 0) v3 = src[h_high * src_w + w_low]; + T v4 = 0; + if (h_high <= src_h - 1 && w_high <= src_w - 1) v4 = src[h_high * src_w + w_high]; + + T w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; + + T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + return val; +} + +// output: (channels * kernel_h * kernel_w, dst_h * dst_w) +template +void deformable_im2col_2d(const T *input, const T *offset, const T *mask, const int64_t src_h, + const int64_t src_w, const int64_t kernel_h, const int64_t kernel_w, + const int64_t pad_h, const int64_t pad_w, const int64_t stride_h, + const int64_t stride_w, const int64_t dilation_h, + const int64_t dilation_w, const int64_t channels, + const int64_t offset_groups, const int64_t dst_h, const int64_t dst_w, + const bool use_mask, T *columns) { + const int64_t workload = channels * dst_h * dst_w; + for (int64_t index = 0; index != workload; ++index) { + const int64_t ow = index % dst_w; + const int64_t oh = (index / dst_w) % dst_h; + const int64_t ic = index / (dst_w * dst_h); + const int64_t oc = ic * kernel_h * kernel_w; + + int64_t c_per_offset_grp = channels / offset_groups; + const int64_t grp_idx = ic / c_per_offset_grp; + + auto columns_ptr = columns + (oc * (dst_h * dst_w) + oh * dst_w + ow); + auto input_ptr = input + ic * (src_h * src_w); + auto offset_ptr = offset + grp_idx * 2 * kernel_h * kernel_w * dst_h * dst_w; + auto mask_ptr = mask; + if (use_mask) { + mask_ptr += grp_idx * kernel_h * kernel_w * dst_h * dst_w; + } + + for (int64_t kh = 0; kh < kernel_h; ++kh) { + for (int64_t kw = 0; kw < kernel_w; ++kw) { + const int64_t mask_idx = kh * kernel_w + kw; + const int64_t offset_idx = 2 * mask_idx; + + T mask_value = 1; + if (use_mask) { + mask_value = mask_ptr[mask_idx * (dst_h * dst_w) + oh * dst_w + ow]; + } + + const T offset_h = offset_ptr[offset_idx * (dst_h * dst_w) + oh * dst_w + ow]; + const T offset_w = offset_ptr[(offset_idx + 1) * (dst_h * dst_w) + oh * dst_w + ow]; + const T ih = (oh * stride_h - pad_h) + kh * dilation_h + offset_h; + const T iw = (ow * stride_w - pad_w) + kw * dilation_w + offset_w; + *columns_ptr = mask_value * bilinear_interpolate_2d(input_ptr, src_h, src_w, ih, iw); + columns_ptr += dst_h * dst_w; + } + } + } +} diff --git a/csrc/backend_ops/tensorrt/modulated_deform_conv/trt_modulated_deform_conv_kernel.hpp b/csrc/backend_ops/common/modulated_deform_conv/modulated_deform_conv_cuda.cuh similarity index 99% rename from csrc/backend_ops/tensorrt/modulated_deform_conv/trt_modulated_deform_conv_kernel.hpp rename to csrc/backend_ops/common/modulated_deform_conv/modulated_deform_conv_cuda.cuh index 2d78998a4d..3f4b2a55ce 100644 --- a/csrc/backend_ops/tensorrt/modulated_deform_conv/trt_modulated_deform_conv_kernel.hpp +++ b/csrc/backend_ops/common/modulated_deform_conv/modulated_deform_conv_cuda.cuh @@ -68,7 +68,7 @@ #include -#include "common_cuda_helper.hpp" +#include "common_cuda_helper.cuh" template __device__ T dmcn_im2col_bilinear(const T *input, const int data_width, const int height, diff --git a/csrc/backend_ops/onnxruntime/CMakeLists.txt b/csrc/backend_ops/onnxruntime/CMakeLists.txt index 613a60881e..5dfa8176b0 100644 --- a/csrc/backend_ops/onnxruntime/CMakeLists.txt +++ b/csrc/backend_ops/onnxruntime/CMakeLists.txt @@ -3,6 +3,7 @@ cmake_minimum_required(VERSION 3.14) project(mmdeploy_onnxruntime_ops) include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake) +include(${CMAKE_SOURCE_DIR}/cmake/modules/FindONNXRUNTIME.cmake) # add plugin source file(GLOB_RECURSE ORT_OPS_SRCS *.cpp) @@ -14,9 +15,8 @@ mmdeploy_export(${PROJECT_NAME}_obj) target_include_directories(${PROJECT_NAME}_obj PUBLIC $ $ + $ $) -target_link_directories(${PROJECT_NAME}_obj PUBLIC - ${ONNXRUNTIME_DIR}/lib) target_link_libraries(${PROJECT_NAME}_obj PUBLIC onnxruntime) mmdeploy_add_library(${PROJECT_NAME} SHARED EXCLUDE "") diff --git a/csrc/backend_ops/onnxruntime/modulated_deform_conv/modulated_deform_conv.cpp b/csrc/backend_ops/onnxruntime/modulated_deform_conv/modulated_deform_conv.cpp index 5561752cd6..3df1217a37 100644 --- a/csrc/backend_ops/onnxruntime/modulated_deform_conv/modulated_deform_conv.cpp +++ b/csrc/backend_ops/onnxruntime/modulated_deform_conv/modulated_deform_conv.cpp @@ -4,88 +4,11 @@ #include #include +#include "modulated_deform_conv/modulated_deform_conv_cpu.h" #include "ort_utils.h" namespace mmdeploy { -float bilinear_interpolate_2d(const float *src, const int64_t src_h, const int64_t src_w, - const float h, const float w) { - if (h <= -1 || src_h <= h || w <= -1 || src_w <= w) { - return 0; - } - - int64_t h_low = floor(h); - int64_t w_low = floor(w); - int64_t h_high = h_low + 1; - int64_t w_high = w_low + 1; - - float lh = h - h_low; - float lw = w - w_low; - float hh = 1 - lh; - float hw = 1 - lw; - - float v1 = 0; - if (h_low >= 0 && w_low >= 0) v1 = src[h_low * src_w + w_low]; - float v2 = 0; - if (h_low >= 0 && w_high <= src_w - 1) v2 = src[h_low * src_w + w_high]; - float v3 = 0; - if (h_high <= src_h - 1 && w_low >= 0) v3 = src[h_high * src_w + w_low]; - float v4 = 0; - if (h_high <= src_h - 1 && w_high <= src_w - 1) v4 = src[h_high * src_w + w_high]; - - float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; - - float val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - return val; -} - -// output: (channels * kernel_h * kernel_w, dst_h * dst_w) -void deformable_im2col_2d(const float *input, const float *offset, const float *mask, - const int64_t src_h, const int64_t src_w, const int64_t kernel_h, - const int64_t kernel_w, const int64_t pad_h, const int64_t pad_w, - const int64_t stride_h, const int64_t stride_w, const int64_t dilation_h, - const int64_t dilation_w, const int64_t channels, - const int64_t offset_groups, const int64_t dst_h, const int64_t dst_w, - const bool use_mask, float *columns) { - const int64_t workload = channels * dst_h * dst_w; - for (int64_t index = 0; index != workload; ++index) { - const int64_t ow = index % dst_w; - const int64_t oh = (index / dst_w) % dst_h; - const int64_t ic = index / (dst_w * dst_h); - const int64_t oc = ic * kernel_h * kernel_w; - - int64_t c_per_offset_grp = channels / offset_groups; - const int64_t grp_idx = ic / c_per_offset_grp; - - auto columns_ptr = columns + (oc * (dst_h * dst_w) + oh * dst_w + ow); - auto input_ptr = input + ic * (src_h * src_w); - auto offset_ptr = offset + grp_idx * 2 * kernel_h * kernel_w * dst_h * dst_w; - auto mask_ptr = mask; - if (use_mask) { - mask_ptr += grp_idx * kernel_h * kernel_w * dst_h * dst_w; - } - - for (int64_t kh = 0; kh < kernel_h; ++kh) { - for (int64_t kw = 0; kw < kernel_w; ++kw) { - const int64_t mask_idx = kh * kernel_w + kw; - const int64_t offset_idx = 2 * mask_idx; - - float mask_value = 1; - if (use_mask) { - mask_value = mask_ptr[mask_idx * (dst_h * dst_w) + oh * dst_w + ow]; - } - - const float offset_h = offset_ptr[offset_idx * (dst_h * dst_w) + oh * dst_w + ow]; - const float offset_w = offset_ptr[(offset_idx + 1) * (dst_h * dst_w) + oh * dst_w + ow]; - const float ih = (oh * stride_h - pad_h) + kh * dilation_h + offset_h; - const float iw = (ow * stride_w - pad_w) + kw * dilation_w + offset_w; - *columns_ptr = mask_value * bilinear_interpolate_2d(input_ptr, src_h, src_w, ih, iw); - columns_ptr += dst_h * dst_w; - } - } - } -} - void gemm_ref_fp32(const float *A, const float *B, const float *V, const float *H, const int32_t trans_A, const int32_t trans_B, const int32_t M, const int32_t N, const int32_t K, const float alpha, const float beta, float *Y) { @@ -162,12 +85,12 @@ void deformable_conv2d_ref_fp32(const float *src, const float *offset, const flo for (int64_t b = 0; b < batch; ++b) { for (int64_t g = 0; g < group; ++g) { - deformable_im2col_2d(src + b * src_c * src_h * src_w + g * ic_per_gp * src_h * src_w, - offset + b * offset_group * 2 * kernel_h * kernel_w * dst_h * dst_w, - mask + b * offset_group * kernel_h * kernel_w * dst_h * dst_w, src_h, - src_w, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, - dilation_w, ic_per_gp, offset_group, dst_h, dst_w, mask != nullptr, - columns); + deformable_im2col_2d( + src + b * src_c * src_h * src_w + g * ic_per_gp * src_h * src_w, + offset + b * offset_group * 2 * kernel_h * kernel_w * dst_h * dst_w, + mask + b * offset_group * kernel_h * kernel_w * dst_h * dst_w, src_h, src_w, kernel_h, + kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, ic_per_gp, + offset_group, dst_h, dst_w, mask != nullptr, columns); float *dst_ptr = dst + b * dst_c * dst_h * dst_w + g * oc_per_gp * dst_h * dst_w; if (bias != nullptr) { const float *bias_ptr = bias + g * oc_per_gp; diff --git a/csrc/backend_ops/onnxruntime/roi_align/roi_align.cpp b/csrc/backend_ops/onnxruntime/roi_align/roi_align.cpp deleted file mode 100644 index 78cd13c922..0000000000 --- a/csrc/backend_ops/onnxruntime/roi_align/roi_align.cpp +++ /dev/null @@ -1,255 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -// modified from -// https://github.com/facebookresearch/maskrcnn-benchmark/blob/main/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp -#include "roi_align.h" - -#include "ort_utils.h" - -namespace mmdeploy { -// implementation taken from Caffe2 -struct PreCalc { - int pos1; - int pos2; - int pos3; - int pos4; - float w1; - float w2; - float w3; - float w4; -}; - -void pre_calc_for_bilinear_interpolate(const int height, const int width, const int pooled_height, - const int pooled_width, const int iy_upper, - const int ix_upper, float roi_start_h, float roi_start_w, - float bin_size_h, float bin_size_w, int roi_bin_grid_h, - int roi_bin_grid_w, std::vector &pre_calc) { - int pre_calc_index = 0; - for (int ph = 0; ph < pooled_height; ph++) { - for (int pw = 0; pw < pooled_width; pw++) { - for (int iy = 0; iy < iy_upper; iy++) { - const float yy = roi_start_h + ph * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < ix_upper; ix++) { - const float xx = - roi_start_w + pw * bin_size_w + - static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); - - float x = xx; - float y = yy; - // deal with: inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - // empty - PreCalc pc; - pc.pos1 = 0; - pc.pos2 = 0; - pc.pos3 = 0; - pc.pos4 = 0; - pc.w1 = 0; - pc.w2 = 0; - pc.w3 = 0; - pc.w4 = 0; - pre_calc[pre_calc_index] = pc; - pre_calc_index += 1; - continue; - } - - if (y <= 0) { - y = 0; - } - if (x <= 0) { - x = 0; - } - - int y_low = (int)y; - int x_low = (int)x; - int y_high; - int x_high; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (float)y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (float)x_low; - } else { - x_high = x_low + 1; - } - - float ly = y - y_low; - float lx = x - x_low; - float hy = 1. - ly, hx = 1. - lx; - float w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - // save weights and indices - PreCalc pc; - pc.pos1 = y_low * width + x_low; - pc.pos2 = y_low * width + x_high; - pc.pos3 = y_high * width + x_low; - pc.pos4 = y_high * width + x_high; - pc.w1 = w1; - pc.w2 = w2; - pc.w3 = w3; - pc.w4 = w4; - pre_calc[pre_calc_index] = pc; - - pre_calc_index += 1; - } - } - } - } -} - -void ROIAlignForwardCPU(const int nthreads, const float *input, const float *rois, float *output, - float *argmax_y, float *argmax_x, const int pooled_height, - const int pooled_width, const float spatial_scale, const int sampling_ratio, - const int pool_mode, // 0 - max pool, 1 - avg pool - const bool aligned, const int channels, const int height, const int width) { - int n_rois = nthreads / channels / pooled_width / pooled_height; - // (n, c, ph, pw) is an element in the pooled output - // can be parallelized using omp - // #pragma omp parallel for num_threads(32) - for (int n = 0; n < n_rois; n++) { - int index_n = n * channels * pooled_width * pooled_height; - - const float *offset_rois = rois + n * 5; - int roi_batch_ind = offset_rois[0]; - - // Do not use rounding; this implementation detail is critical - float offset = aligned ? (float)0.5 : (float)0.0; - float roi_start_w = offset_rois[1] * spatial_scale - offset; - float roi_start_h = offset_rois[2] * spatial_scale - offset; - float roi_end_w = offset_rois[3] * spatial_scale - offset; - float roi_end_h = offset_rois[4] * spatial_scale - offset; - - float roi_width = roi_end_w - roi_start_w; - float roi_height = roi_end_h - roi_start_h; - if (aligned) { - /*AT_ASSERTM(roi_width >= 0 && roi_height >= 0, - "ROIs in ROIAlign cannot have non-negative size!");*/ - assert(roi_width >= 0 && roi_height >= 0); - } else { // for backward-compatibility only - roi_width = std::max(roi_width, (float)1.); - roi_height = std::max(roi_height, (float)1.); - } - float bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - float bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = - (sampling_ratio > 0) ? sampling_ratio : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // When the grid is empty, output zeros == 0/1, instead of NaN. - const float count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 - - // we want to precalculate indices and weights shared by all channels, - // this is the key point of optimization - std::vector pre_calc(roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height); - pre_calc_for_bilinear_interpolate(height, width, pooled_height, pooled_width, roi_bin_grid_h, - roi_bin_grid_w, roi_start_h, roi_start_w, bin_size_h, - bin_size_w, roi_bin_grid_h, roi_bin_grid_w, pre_calc); - - for (int c = 0; c < channels; c++) { - int index_n_c = index_n + c * pooled_width * pooled_height; - const float *offset_input = input + (roi_batch_ind * channels + c) * height * width; - int pre_calc_index = 0; - - for (int ph = 0; ph < pooled_height; ph++) { - for (int pw = 0; pw < pooled_width; pw++) { - int index = index_n_c + ph * pooled_width + pw; - - float output_val = 0.; - float maxval = -10000; - float maxidx_y = -1.f, maxidx_x = -1.f; - for (int iy = 0; iy < roi_bin_grid_h; iy++) { - const float y = - roi_start_h + ph * bin_size_h + - static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); - for (int ix = 0; ix < roi_bin_grid_w; ix++) { - const float x = - roi_start_w + pw * bin_size_w + - static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); - PreCalc pc = pre_calc[pre_calc_index]; - float val = pc.w1 * offset_input[pc.pos1] + pc.w2 * offset_input[pc.pos2] + - pc.w3 * offset_input[pc.pos3] + pc.w4 * offset_input[pc.pos4]; - if (val > maxval) { - maxval = val; - maxidx_y = y; - maxidx_x = x; - } - output_val += val; - pre_calc_index += 1; - } - } - if (pool_mode == 0) { - // We do max pooling inside a bin - output[index] = maxval; - argmax_y[index] = maxidx_y; - argmax_x[index] = maxidx_x; - } else if (pool_mode == 1) { - // We do average (integral) pooling inside a bin - output[index] = output_val / count; - } // if - } // for pw - } // for ph - } // for c - } // for n -} - -void MMCVRoiAlignKernel::Compute(OrtKernelContext *context) { - // Setup inputs - const OrtValue *input_X = ort_.KernelContext_GetInput(context, 0); - const float *X_data = reinterpret_cast(ort_.GetTensorData(input_X)); - const OrtValue *input_rois = ort_.KernelContext_GetInput(context, 1); - const float *rois = - reinterpret_cast(ort_.GetTensorData(input_rois)); - - // Setup output - OrtTensorDimensions out_dimensions(ort_, input_X); - OrtTensorDimensions roi_dimensions(ort_, input_rois); - - int batch_size = out_dimensions.data()[0]; - int input_channels = out_dimensions.data()[1]; - int input_height = out_dimensions.data()[2]; - int input_width = out_dimensions.data()[3]; - - out_dimensions.data()[0] = roi_dimensions.data()[0]; - out_dimensions.data()[2] = aligned_height_; - out_dimensions.data()[3] = aligned_width_; - - OrtValue *output = - ort_.KernelContext_GetOutput(context, 0, out_dimensions.data(), out_dimensions.size()); - float *out = ort_.GetTensorMutableData(output); - OrtTensorTypeAndShapeInfo *output_info = ort_.GetTensorTypeAndShape(output); - ort_.ReleaseTensorTypeAndShapeInfo(output_info); - - // TODO: forward here - int output_size = out_dimensions.data()[0]; - for (auto i = 1; i < out_dimensions.size(); ++i) { - output_size *= out_dimensions.data()[i]; - } - - int poolMod = 1; - if (pool_mode_ == "max") poolMod = 0; - - float *argmax_x = nullptr, *argmax_y = nullptr; - if (poolMod == 0) { - argmax_y = new float[output_size]; - argmax_x = new float[output_size]; - } - - ROIAlignForwardCPU(output_size, X_data, rois, out, argmax_y, argmax_x, aligned_height_, - aligned_width_, spatial_scale_, sampling_ratio_, poolMod, aligned_, - input_channels, input_height, input_width); - - if (argmax_x) delete argmax_x; - if (argmax_y) delete argmax_y; -} - -REGISTER_ONNXRUNTIME_OPS(mmdeploy, MMCVRoiAlignCustomOp); -} // namespace mmdeploy diff --git a/csrc/backend_ops/onnxruntime/roi_align/roi_align.h b/csrc/backend_ops/onnxruntime/roi_align/roi_align.h deleted file mode 100644 index 0c7afa67da..0000000000 --- a/csrc/backend_ops/onnxruntime/roi_align/roi_align.h +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (c) OpenMMLab. All rights reserved. -#ifndef ONNXRUNTIME_ROI_ALIGN_H -#define ONNXRUNTIME_ROI_ALIGN_H - -#include -#include - -#include -#include -#include -#include - -namespace mmdeploy { -struct MMCVRoiAlignKernel { - public: - MMCVRoiAlignKernel(Ort::CustomOpApi ort, const OrtKernelInfo* info) : ort_(ort) { - aligned_ = ort_.KernelInfoGetAttribute(info, "aligned"); - aligned_height_ = ort_.KernelInfoGetAttribute(info, "output_height"); - aligned_width_ = ort_.KernelInfoGetAttribute(info, "output_width"); - pool_mode_ = ort_.KernelInfoGetAttribute(info, "mode"); - sampling_ratio_ = ort_.KernelInfoGetAttribute(info, "sampling_ratio"); - spatial_scale_ = ort_.KernelInfoGetAttribute(info, "spatial_scale"); - } - - void Compute(OrtKernelContext* context); - - private: - Ort::CustomOpApi ort_; - - int aligned_height_; - int aligned_width_; - float spatial_scale_; - int sampling_ratio_; - std::string pool_mode_; - int aligned_; -}; - -struct MMCVRoiAlignCustomOp : Ort::CustomOpBase { - void* CreateKernel(Ort::CustomOpApi api, const OrtKernelInfo* info) const { - return new MMCVRoiAlignKernel(api, info); - } - const char* GetName() const { return "MMCVRoiAlign"; } - - size_t GetInputTypeCount() const { return 2; } - ONNXTensorElementDataType GetInputType(size_t) const { - return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; - } - - size_t GetOutputTypeCount() const { return 1; } - ONNXTensorElementDataType GetOutputType(size_t) const { - return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; - } - - // force cpu - const char* GetExecutionProviderType() const { return "CPUExecutionProvider"; } -}; -} // namespace mmdeploy - -#endif // ONNXRUNTIME_ROI_ALIGN_H diff --git a/csrc/backend_ops/tensorrt/CMakeLists.txt b/csrc/backend_ops/tensorrt/CMakeLists.txt index d4e9c41757..14db917dd3 100644 --- a/csrc/backend_ops/tensorrt/CMakeLists.txt +++ b/csrc/backend_ops/tensorrt/CMakeLists.txt @@ -18,6 +18,8 @@ add_library(${PROJECT_NAME}_obj OBJECT "${BACKEND_OPS_SRCS}") set_target_properties(${PROJECT_NAME}_obj PROPERTIES POSITION_INDEPENDENT_CODE 1) target_compile_definitions(${PROJECT_NAME}_obj PRIVATE -DTHRUST_IGNORE_DEPRECATED_CPP_DIALECT=1) +target_include_directories(${PROJECT_NAME}_obj + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common) target_include_directories(${PROJECT_NAME}_obj PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/common) target_include_directories(${PROJECT_NAME}_obj @@ -25,9 +27,8 @@ target_include_directories(${PROJECT_NAME}_obj target_include_directories(${PROJECT_NAME}_obj PRIVATE ${TENSORRT_INCLUDE_DIR}) target_include_directories(${PROJECT_NAME}_obj PRIVATE ${CUDNN_DIR}/include) target_include_directories(${PROJECT_NAME}_obj PRIVATE ${CUB_ROOT_DIR}) -target_link_directories(${PROJECT_NAME}_obj PUBLIC ${CUDNN_DIR}/lib64 ${CUDNN_DIR}/lib/x64) target_link_libraries(${PROJECT_NAME}_obj - PUBLIC ${TENSORRT_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} cudnn) + PUBLIC ${TENSORRT_LIBS} cublas cudnn) mmdeploy_export(${PROJECT_NAME}_obj) # Build module library. It is used to convert onnx model to tensorrt engine diff --git a/csrc/backend_ops/tensorrt/common/common_cuda_helper.hpp b/csrc/backend_ops/tensorrt/common/common_cuda_helper.hpp index 920a636fda..c76cac8a32 100644 --- a/csrc/backend_ops/tensorrt/common/common_cuda_helper.hpp +++ b/csrc/backend_ops/tensorrt/common/common_cuda_helper.hpp @@ -4,6 +4,7 @@ #include #include +#include #include diff --git a/csrc/backend_ops/tensorrt/common_impl/trt_cuda_helper.cu b/csrc/backend_ops/tensorrt/common_impl/trt_cuda_helper.cu index 065218958f..092e712def 100644 --- a/csrc/backend_ops/tensorrt/common_impl/trt_cuda_helper.cu +++ b/csrc/backend_ops/tensorrt/common_impl/trt_cuda_helper.cu @@ -5,13 +5,13 @@ using mmdeploy::TensorDesc; template -__global__ void copy_permute_kernel(scalar_t *dst, const scalar_t *src, int n, - TensorDesc ts_src_stride, TensorDesc ts_dst_stride, +__global__ void copy_permute_kernel(scalar_t *__restrict__ dst, const scalar_t *__restrict__ src, + int n, TensorDesc ts_src_stride, TensorDesc ts_dst_stride, TensorDesc ts_permute) { const int src_dim = ts_src_stride.dim; - int *src_stride = &(ts_src_stride.stride[0]); - int *dst_stride = &(ts_dst_stride.stride[0]); - int *permute = &(ts_permute.shape[0]); + const auto src_stride = ts_src_stride.stride; + const auto dst_stride = ts_dst_stride.stride; + const auto permute = ts_permute.shape; CUDA_1D_KERNEL_LOOP(index, n) { size_t dst_index = index; size_t src_index = 0; diff --git a/csrc/backend_ops/tensorrt/deform_conv/trt_deform_conv.cpp b/csrc/backend_ops/tensorrt/deform_conv/trt_deform_conv.cpp new file mode 100644 index 0000000000..95dd27ba83 --- /dev/null +++ b/csrc/backend_ops/tensorrt/deform_conv/trt_deform_conv.cpp @@ -0,0 +1,257 @@ +// Copyright (c) OpenMMLab. All rights reserved +#include "trt_deform_conv.hpp" + +#include + +#include + +#include "trt_deform_conv_kernel.hpp" +#include "trt_serialize.hpp" + +using namespace nvinfer1; + +namespace mmdeploy { +namespace { +static const char *PLUGIN_VERSION{"1"}; +static const char *PLUGIN_NAME{"MMCVDeformConv2d"}; +} // namespace + +DeformableConvPluginDynamic::DeformableConvPluginDynamic(const std::string &name, + const nvinfer1::Dims stride, + const nvinfer1::Dims padding, + const nvinfer1::Dims dilation, + const int deformableGroup, const int group) + : TRTPluginBase(name), + mStride(stride), + mPadding(padding), + mDilation(dilation), + mDeformableGroup(deformableGroup), + mGroup(group) {} + +DeformableConvPluginDynamic::DeformableConvPluginDynamic(const std::string name, const void *data, + size_t length) + : TRTPluginBase(name) { + deserialize_value(&data, &length, &mStride); + deserialize_value(&data, &length, &mPadding); + deserialize_value(&data, &length, &mDilation); + deserialize_value(&data, &length, &mDeformableGroup); + deserialize_value(&data, &length, &mGroup); +} +DeformableConvPluginDynamic::~DeformableConvPluginDynamic() {} + +nvinfer1::IPluginV2DynamicExt *DeformableConvPluginDynamic::clone() const TRT_NOEXCEPT { + DeformableConvPluginDynamic *plugin = new DeformableConvPluginDynamic( + mLayerName, mStride, mPadding, mDilation, mDeformableGroup, mGroup); + plugin->setPluginNamespace(getPluginNamespace()); + + return plugin; +} + +nvinfer1::DimsExprs DeformableConvPluginDynamic::getOutputDimensions( + int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs, + nvinfer1::IExprBuilder &exprBuilder) TRT_NOEXCEPT { + // input[0] == input + // input[1] == offset + // input[2] == weight + nvinfer1::DimsExprs ret; + ret.nbDims = 4; + ret.d[0] = inputs[0].d[0]; + ret.d[1] = inputs[2].d[0]; + + ret.d[2] = inputs[1].d[2]; + ret.d[3] = inputs[1].d[3]; + + return ret; +} + +bool DeformableConvPluginDynamic::supportsFormatCombination( + int pos, const nvinfer1::PluginTensorDesc *ioDesc, int nbInputs, int nbOutputs) TRT_NOEXCEPT { + if (pos == 0) { + return (ioDesc[pos].type == nvinfer1::DataType::kFLOAT && + ioDesc[pos].format == nvinfer1::TensorFormat::kLINEAR); + } else { + return ioDesc[pos].type == ioDesc[0].type && ioDesc[pos].format == ioDesc[0].format; + } +} + +void DeformableConvPluginDynamic::configurePlugin(const nvinfer1::DynamicPluginTensorDesc *inputs, + int nbInputs, + const nvinfer1::DynamicPluginTensorDesc *outputs, + int nbOutputs) TRT_NOEXCEPT {} + +size_t DeformableConvPluginDynamic::getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs, + int nbInputs, + const nvinfer1::PluginTensorDesc *outputs, + int nbOutputs) const TRT_NOEXCEPT { + int sizeof_dtype = mmdeploy::getElementSize(outputs[0].type); + + int batch_size = inputs[0].dims.d[0]; + int nInputPlane = inputs[0].dims.d[1]; + int inputHeight = inputs[0].dims.d[2]; + int inputWidth = inputs[0].dims.d[3]; + + int nOutputPlane = outputs[0].dims.d[1]; + int outputHeight = outputs[0].dims.d[2]; + int outputWidth = outputs[0].dims.d[3]; + + int kW = inputs[2].dims.d[2]; + int kH = inputs[2].dims.d[3]; + int im2col_step = std::min(32, batch_size); + + size_t col_size = mmdeploy::getAlignedSize(nInputPlane * kW * kH * im2col_step * outputHeight * + outputWidth * sizeof_dtype); + + size_t out_size = 0; + if (im2col_step != 1) + out_size = mmdeploy::getAlignedSize(batch_size * nOutputPlane * outputHeight * outputWidth * + sizeof_dtype); + + return col_size + out_size; +} + +int DeformableConvPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *inputDesc, + const nvinfer1::PluginTensorDesc *outputDesc, + const void *const *inputs, void *const *outputs, + void *workSpace, cudaStream_t stream) TRT_NOEXCEPT { + int batch = inputDesc[0].dims.d[0]; + int channels = inputDesc[0].dims.d[1]; + int height = inputDesc[0].dims.d[2]; + int width = inputDesc[0].dims.d[3]; + int channels_out = outputDesc[0].dims.d[1]; + int kernel_h = inputDesc[2].dims.d[2]; + int kernel_w = inputDesc[2].dims.d[3]; + + const void *x = inputs[0]; + const void *offset = inputs[1]; + const void *weight = inputs[2]; + void *output = outputs[0]; + int im2col_step = std::min(batch, 32); + + auto data_type = inputDesc[0].type; + switch (data_type) { + case nvinfer1::DataType::kFLOAT: + deform_conv((float *)x, (float *)weight, (float *)offset, (float *)output, workSpace, + batch, channels, height, width, channels_out, kernel_w, kernel_h, + mStride.d[0], mStride.d[1], mPadding.d[0], mPadding.d[1], mDilation.d[0], + mDilation.d[1], mGroup, mDeformableGroup, im2col_step, m_cublas_handle, + stream); + break; + default: + return 1; + break; + } + + return 0; +} + +nvinfer1::DataType DeformableConvPluginDynamic::getOutputDataType( + int index, const nvinfer1::DataType *inputTypes, int nbInputs) const TRT_NOEXCEPT { + return inputTypes[0]; +} + +// IPluginV2 Methods +const char *DeformableConvPluginDynamic::getPluginType() const TRT_NOEXCEPT { return PLUGIN_NAME; } + +const char *DeformableConvPluginDynamic::getPluginVersion() const TRT_NOEXCEPT { + return PLUGIN_VERSION; +} + +int DeformableConvPluginDynamic::getNbOutputs() const TRT_NOEXCEPT { return 1; } + +size_t DeformableConvPluginDynamic::getSerializationSize() const TRT_NOEXCEPT { + return serialized_size(mStride) + serialized_size(mPadding) + serialized_size(mDilation) + + serialized_size(mDeformableGroup) + serialized_size(mGroup); +} + +void DeformableConvPluginDynamic::serialize(void *buffer) const TRT_NOEXCEPT { + serialize_value(&buffer, mStride); + serialize_value(&buffer, mPadding); + serialize_value(&buffer, mDilation); + serialize_value(&buffer, mDeformableGroup); + serialize_value(&buffer, mGroup); +} + +void DeformableConvPluginDynamic::attachToContext( + cudnnContext *cudnnContext, cublasContext *cublasContext, + nvinfer1::IGpuAllocator *gpuAllocator) TRT_NOEXCEPT { + m_cublas_handle = cublasContext; +} + +void DeformableConvPluginDynamic::detachFromContext() TRT_NOEXCEPT {} + +////////////////////// creator ///////////////////////////// + +DeformableConvPluginDynamicCreator::DeformableConvPluginDynamicCreator() { + mPluginAttributes.clear(); + mPluginAttributes.emplace_back(nvinfer1::PluginField("stride")); + mPluginAttributes.emplace_back(nvinfer1::PluginField("padding")); + mPluginAttributes.emplace_back(nvinfer1::PluginField("dilation")); + mPluginAttributes.emplace_back(nvinfer1::PluginField("groups")); + mPluginAttributes.emplace_back(nvinfer1::PluginField("deform_groups")); + mFC.nbFields = mPluginAttributes.size(); + mFC.fields = mPluginAttributes.data(); +} + +const char *DeformableConvPluginDynamicCreator::getPluginName() const TRT_NOEXCEPT { + return PLUGIN_NAME; +} + +const char *DeformableConvPluginDynamicCreator::getPluginVersion() const TRT_NOEXCEPT { + return PLUGIN_VERSION; +} + +nvinfer1::IPluginV2 *DeformableConvPluginDynamicCreator::createPlugin( + const char *name, const nvinfer1::PluginFieldCollection *fc) TRT_NOEXCEPT { + nvinfer1::Dims stride{2, {1, 1}}; + nvinfer1::Dims padding{2, {0, 0}}; + nvinfer1::Dims dilation{2, {1, 1}}; + int deformableGroup = 1; + int group = 1; + + for (int i = 0; i < fc->nbFields; i++) { + if (fc->fields[i].data == nullptr) { + continue; + } + std::string field_name(fc->fields[i].name); + + if (field_name.compare("deform_groups") == 0) { + deformableGroup = static_cast(fc->fields[i].data)[0]; + } + + if (field_name.compare("groups") == 0) { + group = static_cast(fc->fields[i].data)[0]; + } + + if (field_name.compare("stride") == 0) { + stride.nbDims = 2; + stride.d[0] = static_cast(fc->fields[i].data)[0]; + stride.d[1] = static_cast(fc->fields[i].data)[1]; + } + + if (field_name.compare("padding") == 0) { + padding.nbDims = 2; + padding.d[0] = static_cast(fc->fields[i].data)[0]; + padding.d[1] = static_cast(fc->fields[i].data)[1]; + } + + if (field_name.compare("dilation") == 0) { + dilation.nbDims = 2; + dilation.d[0] = static_cast(fc->fields[i].data)[0]; + dilation.d[1] = static_cast(fc->fields[i].data)[1]; + } + } + + DeformableConvPluginDynamic *plugin = + new DeformableConvPluginDynamic(name, stride, padding, dilation, deformableGroup, group); + plugin->setPluginNamespace(getPluginNamespace()); + return plugin; +} + +nvinfer1::IPluginV2 *DeformableConvPluginDynamicCreator::deserializePlugin( + const char *name, const void *serialData, size_t serialLength) TRT_NOEXCEPT { + auto plugin = new DeformableConvPluginDynamic(name, serialData, serialLength); + plugin->setPluginNamespace(getPluginNamespace()); + return plugin; +} +REGISTER_TENSORRT_PLUGIN(DeformableConvPluginDynamicCreator); +} // namespace mmdeploy diff --git a/csrc/backend_ops/tensorrt/deform_conv/trt_deform_conv.hpp b/csrc/backend_ops/tensorrt/deform_conv/trt_deform_conv.hpp new file mode 100644 index 0000000000..3ea0ccbefe --- /dev/null +++ b/csrc/backend_ops/tensorrt/deform_conv/trt_deform_conv.hpp @@ -0,0 +1,81 @@ +// Copyright (c) OpenMMLab. All rights reserved. +#ifndef TRT_DEFORM_CONV_HPP +#define TRT_DEFORM_CONV_HPP +#include + +#include +#include +#include + +#include "trt_plugin_base.hpp" + +namespace mmdeploy { +class DeformableConvPluginDynamic : public TRTPluginBase { + public: + DeformableConvPluginDynamic(const std::string &name, const nvinfer1::Dims stride, + const nvinfer1::Dims padding, const nvinfer1::Dims dilation, + const int deformableGroup, const int group); + + DeformableConvPluginDynamic(const std::string name, const void *data, size_t length); + + DeformableConvPluginDynamic() = delete; + + ~DeformableConvPluginDynamic() TRT_NOEXCEPT override; + + // IPluginV2DynamicExt Methods + nvinfer1::IPluginV2DynamicExt *clone() const TRT_NOEXCEPT override; + nvinfer1::DimsExprs getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs *inputs, + int nbInputs, nvinfer1::IExprBuilder &exprBuilder) + TRT_NOEXCEPT override; + bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc *ioDesc, int nbInputs, + int nbOutputs) TRT_NOEXCEPT override; + void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in, int nbInputs, + const nvinfer1::DynamicPluginTensorDesc *out, + int nbOutputs) TRT_NOEXCEPT override; + size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs, int nbInputs, + const nvinfer1::PluginTensorDesc *outputs, + int nbOutputs) const TRT_NOEXCEPT override; + int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, + const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs, + void *const *outputs, void *workspace, cudaStream_t stream) TRT_NOEXCEPT override; + void attachToContext(cudnnContext *cudnnContext, cublasContext *cublasContext, + nvinfer1::IGpuAllocator *gpuAllocator) TRT_NOEXCEPT override; + void detachFromContext() TRT_NOEXCEPT override; + + // IPluginV2Ext Methods + nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType *inputTypes, + int nbInputs) const TRT_NOEXCEPT override; + + // IPluginV2 Methods + const char *getPluginType() const TRT_NOEXCEPT override; + const char *getPluginVersion() const TRT_NOEXCEPT override; + int getNbOutputs() const TRT_NOEXCEPT override; + size_t getSerializationSize() const TRT_NOEXCEPT override; + void serialize(void *buffer) const TRT_NOEXCEPT override; + + private: + nvinfer1::Dims mStride; + nvinfer1::Dims mPadding; + nvinfer1::Dims mDilation; + int mDeformableGroup; + int mGroup; + + cublasHandle_t m_cublas_handle; +}; + +class DeformableConvPluginDynamicCreator : public TRTPluginCreatorBase { + public: + DeformableConvPluginDynamicCreator(); + + const char *getPluginName() const TRT_NOEXCEPT override; + + const char *getPluginVersion() const TRT_NOEXCEPT override; + + nvinfer1::IPluginV2 *createPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc) + TRT_NOEXCEPT override; + + nvinfer1::IPluginV2 *deserializePlugin(const char *name, const void *serialData, + size_t serialLength) TRT_NOEXCEPT override; +}; +} // namespace mmdeploy +#endif // TRT_DEFORM_CONV_HPP diff --git a/csrc/backend_ops/tensorrt/deform_conv/trt_deform_conv_kernel.cu b/csrc/backend_ops/tensorrt/deform_conv/trt_deform_conv_kernel.cu new file mode 100644 index 0000000000..5ddb905a42 --- /dev/null +++ b/csrc/backend_ops/tensorrt/deform_conv/trt_deform_conv_kernel.cu @@ -0,0 +1,165 @@ +/*! + ******************* BEGIN Caffe Copyright Notice and Disclaimer + ***************** + * + * COPYRIGHT + * + * All contributions by the University of California: + * Copyright (c) 2014-2017 The Regents of the University of California (Regents) + * All rights reserved. + * + * All other contributions: + * Copyright (c) 2014-2017, the respective contributors + * All rights reserved. + * + * Caffe uses a shared copyright model: each contributor holds copyright over + * their contributions to Caffe. The project versioning records all such + * contribution and copyright details. If a contributor wants to further mark + * their specific copyright on a particular contribution, they should indicate + * their copyright solely in the commit message of the change when it is + * committed. + * + * LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + *this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + *AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + *IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE + *FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + *DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + *SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + *CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + *OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + *OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * CONTRIBUTION AGREEMENT + * + * By contributing to the BVLC/caffe repository through pull-request, comment, + * or otherwise, the contributor releases their content to the + * license and copyright terms herein. + * + ***************** END Caffe Copyright Notice and Disclaimer + ********************* + * + * Copyright (c) 2018 Microsoft + * Licensed under The MIT License [see LICENSE for details] + * \file modulated_deformable_im2col.cuh + * \brief Function definitions of converting an image to + * column matrix based on kernel, padding, dilation, and offset. + * These functions are mainly used in deformable convolution operators. + * \ref: https://arxiv.org/abs/1703.06211 + * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng + */ + +// modified from +// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu + +#include "common_cuda_helper.hpp" +#include "trt_deform_conv_kernel.cuh" +#include "trt_deform_conv_kernel.hpp" +#include "trt_plugin_helper.hpp" + +template +void deform_conv_im2col(const scalar_t* input, const scalar_t* offset, scalar_t* column, + const int channels, const int height, const int width, const int ksize_h, + const int ksize_w, const int pad_h, const int pad_w, const int stride_h, + const int stride_w, const int dilation_h, const int dilation_w, + const int parallel_imgs, const int deformable_group, cudaStream_t stream) { + int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; + int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; + int num_kernels = channels * height_col * width_col * parallel_imgs; + int channel_per_deformable_group = channels / deformable_group; + + deformable_im2col_gpu_kernel<<>>( + num_kernels, input, offset, height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, channel_per_deformable_group, parallel_imgs, channels, + deformable_group, height_col, width_col, column); + + cudaCheckError(); +} + +template +void deform_conv(const scalar_t* input, const scalar_t* weight, const scalar_t* offset, + scalar_t* output, void* workspace, int batchSize, int nInputPlane, int inputHeight, + int inputWidth, int nOutputPlane, int kW, int kH, int dW, int dH, int padW, + int padH, int dilationW, int dilationH, int group, int deformable_group, + int im2col_step, cublasHandle_t cublas_handle, cudaStream_t stream) { + size_t word_size = sizeof(scalar_t); + + im2col_step = std::min(int(batchSize), im2col_step); + long outputWidth = (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + long outputHW = outputHeight * outputWidth; + long kHW = kH * kW; + long columns_size = + mmdeploy::getAlignedSize(nInputPlane * kHW * im2col_step * outputHW * word_size); + + // column buffer for img2col + char* workspace_ptr = reinterpret_cast(workspace); + scalar_t* columns = reinterpret_cast(workspace_ptr); + workspace_ptr = workspace_ptr + columns_size; + + scalar_t* output_buffer; + if (im2col_step == 1) { + output_buffer = output; + } else { + // output need permute when im2col_step!=1 + output_buffer = reinterpret_cast(workspace_ptr); + } + + long input_elt_step = im2col_step * nInputPlane * inputHeight * inputWidth; + long offset_elt_step = im2col_step * deformable_group * 2 * kHW * outputHW; + long out_buffer_step = nOutputPlane * im2col_step * outputHW; + long col_g_step = nInputPlane * kHW * im2col_step * outputHW / group; + long weight_g_step = nOutputPlane * nInputPlane * kHW / (group * group); + long out_buffer_g_step = out_buffer_step / group; + int m = nOutputPlane / group; + int n = im2col_step * outputHW; + int k = nInputPlane * kHW / group; + scalar_t alpha = 1.f; + scalar_t beta = 0.f; + + for (int elt = 0; elt < batchSize / im2col_step; elt++) { + const scalar_t* input_start = input + elt * input_elt_step; + const scalar_t* offset_start = offset + elt * offset_elt_step; + + deform_conv_im2col(input_start, offset_start, columns, nInputPlane, inputHeight, + inputWidth, kH, kW, padH, padW, dH, dW, dilationH, dilationW, + im2col_step, deformable_group, stream); + + for (int g = 0; g < group; ++g) { + const scalar_t* weight_start = weight + g * weight_g_step; + scalar_t* col_start = columns + g * col_g_step; + scalar_t* out_buffer_start = output_buffer + elt * out_buffer_step + g * out_buffer_g_step; + + cublasGemmWrap(cublas_handle, CUBLAS_OP_N, CUBLAS_OP_N, n, m, k, &alpha, col_start, + n, weight_start, k, &beta, out_buffer_start, n); + cudaCheckError(); + } + } + + if (im2col_step != 1) { + int output_buffer_shape[5] = {batchSize / im2col_step, nOutputPlane, im2col_step, + static_cast(outputHeight), static_cast(outputWidth)}; + int output_buffer_permute[5] = {0, 2, 1, 3, 4}; + memcpyPermute(output, output_buffer, &output_buffer_shape[0], + &output_buffer_permute[0], 5, stream); + } +} + +template void deform_conv(const float* input, const float* weight, const float* offset, + float* output, void* workspace, int batchSize, int nInputPlane, + int inputHeight, int inputWidth, int nOutputPlane, int kW, int kH, + int dW, int dH, int padW, int padH, int dilationW, int dilationH, + int group, int deformable_group, int im2col_step, + cublasHandle_t cublas_handle, cudaStream_t stream); diff --git a/csrc/backend_ops/tensorrt/deform_conv/trt_deform_conv_kernel.cuh b/csrc/backend_ops/tensorrt/deform_conv/trt_deform_conv_kernel.cuh new file mode 100644 index 0000000000..6514efa82c --- /dev/null +++ b/csrc/backend_ops/tensorrt/deform_conv/trt_deform_conv_kernel.cuh @@ -0,0 +1,145 @@ +/*! + ******************* BEGIN Caffe Copyright Notice and Disclaimer + ***************** + * + * COPYRIGHT + * + * All contributions by the University of California: + * Copyright (c) 2014-2017 The Regents of the University of California (Regents) + * All rights reserved. + * + * All other contributions: + * Copyright (c) 2014-2017, the respective contributors + * All rights reserved. + * + * Caffe uses a shared copyright model: each contributor holds copyright over + * their contributions to Caffe. The project versioning records all such + * contribution and copyright details. If a contributor wants to further mark + * their specific copyright on a particular contribution, they should indicate + * their copyright solely in the commit message of the change when it is + * committed. + * + * LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + *this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + *AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + *IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE + *FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + *DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + *SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + *CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + *OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + *OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * CONTRIBUTION AGREEMENT + * + * By contributing to the BVLC/caffe repository through pull-request, comment, + * or otherwise, the contributor releases their content to the + * license and copyright terms herein. + * + ***************** END Caffe Copyright Notice and Disclaimer + ********************* + * + * Copyright (c) 2018 Microsoft + * Licensed under The MIT License [see LICENSE for details] + * \file modulated_deformable_im2col.cuh + * \brief Function definitions of converting an image to + * column matrix based on kernel, padding, dilation, and offset. + * These functions are mainly used in deformable convolution operators. + * \ref: https://arxiv.org/abs/1703.06211 + * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng + */ + +// modified from +// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu + +#include "common_cuda_helper.hpp" + +template +__device__ __forceinline__ scalar_t deformable_im2col_bilinear(const scalar_t* __restrict__ input, + const int height, const int width, + scalar_t h, scalar_t w) { + if (h <= -1.f || height <= h || w <= -1.f || width <= w) { + return 0; + } + + const int h_low = floorf(h); + const int w_low = floorf(w); + + input += h_low * width; + const scalar_t v1 = (h_low >= 0 && w_low >= 0) ? input[w_low] : static_cast(0.0f); + const int w_high = w_low + 1; + const scalar_t v2 = + (h_low >= 0 && w_high <= width - 1) ? input[w_high] : static_cast(0.0f); + const scalar_t lw = w - w_low; + const scalar_t v_low = fmaf(v2 - v1, lw, v1); + input += width; + const scalar_t v3 = + (h_low <= height - 2 && w_low >= 0) ? input[w_low] : static_cast(0.0f); + const scalar_t v4 = + (h_low <= height - 2 && w_high <= width - 1) ? input[w_high] : static_cast(0.0f); + const scalar_t v_high = fmaf(v4 - v3, lw, v3); + const scalar_t lh = h - h_low; + const scalar_t val = fmaf(v_high - v_low, lh, v_low); + return val; +} + +template +__global__ void deformable_im2col_gpu_kernel( + const int n, const scalar_t* __restrict__ data_im, const scalar_t* __restrict__ data_offset, + const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, + const int pad_w, const int stride_h, const int stride_w, const int dilation_h, + const int dilation_w, const int channel_per_deformable_group, const int batch_size, + const int num_channels, const int deformable_group, const int height_col, const int width_col, + scalar_t* __restrict__ data_col) { + const int hw_col = height_col * width_col; + const int data_col_step = batch_size * hw_col; + + CUDA_1D_KERNEL_LOOP(index, n) { + // index index of output matrix + int tmp_index = index; + const int w_col = tmp_index % width_col; + tmp_index /= width_col; + const int h_col = tmp_index % height_col; + tmp_index /= height_col; + const int b_col = tmp_index % batch_size; + const int c_im = tmp_index / batch_size; + const int c_col = c_im * kernel_h * kernel_w; + + // compute deformable group index + const int deformable_group_index = c_im / channel_per_deformable_group; + + const int h_in = h_col * stride_h - pad_h; + const int w_in = w_col * stride_w - pad_w; + scalar_t* __restrict__ data_col_ptr = data_col + c_col * data_col_step + index % data_col_step; + const scalar_t* __restrict__ data_im_ptr = + data_im + (b_col * num_channels + c_im) * height * width; + const scalar_t* __restrict__ data_offset_ptr = + data_offset + + ((b_col * deformable_group + deformable_group_index) << 1) * kernel_h * kernel_w * hw_col + + h_col * width_col + w_col; + for (int i = 0; i < kernel_h; ++i) { + for (int j = 0; j < kernel_w; ++j) { + const int data_offset_h = (i * kernel_w + j) * hw_col << 1; + const scalar_t offset_h = data_offset_ptr[data_offset_h]; + const int data_offset_w = data_offset_h + hw_col; + const scalar_t offset_w = data_offset_ptr[data_offset_w]; + const scalar_t h_im = h_in + i * dilation_h + offset_h; + const scalar_t w_im = w_in + j * dilation_w + offset_w; + const scalar_t val = deformable_im2col_bilinear(data_im_ptr, height, width, h_im, w_im); + *data_col_ptr = val; + data_col_ptr += data_col_step; + } + } + } +} diff --git a/csrc/backend_ops/tensorrt/deform_conv/trt_deform_conv_kernel.hpp b/csrc/backend_ops/tensorrt/deform_conv/trt_deform_conv_kernel.hpp new file mode 100644 index 0000000000..3d8f6dfc45 --- /dev/null +++ b/csrc/backend_ops/tensorrt/deform_conv/trt_deform_conv_kernel.hpp @@ -0,0 +1,20 @@ +// Copyright (c) OpenMMLab. All rights reserved +#ifndef TRT_DEFORM_CONV_KERNEL_HPP +#define TRT_DEFORM_CONV_KERNEL_HPP +#include +#include + +template +void deform_conv_im2col(const scalar_t* input, const scalar_t* offset, scalar_t* column, + const int channels, const int height, const int width, const int ksize_h, + const int ksize_w, const int pad_h, const int pad_w, const int stride_h, + const int stride_w, const int dilation_h, const int dilation_w, + const int parallel_imgs, const int deformable_group, cudaStream_t stream); + +template +void deform_conv(const scalar_t* input, const scalar_t* weight, const scalar_t* offset, + scalar_t* output, void* workspace, int batchSize, int nInputPlane, int inputHeight, + int inputWidth, int nOutputPlane, int kW, int kH, int dW, int dH, int padW, + int padH, int dilationW, int dilationH, int group, int deformable_group, + int im2col_step, cublasHandle_t cublas_handle, cudaStream_t stream); +#endif // TRT_DEFORM_CONV_KERNEL_HPP diff --git a/csrc/backend_ops/tensorrt/modulated_deform_conv/trt_modulated_deform_conv_kernel.cu b/csrc/backend_ops/tensorrt/modulated_deform_conv/trt_modulated_deform_conv_kernel.cu index a5940b5e40..ed284e7809 100644 --- a/csrc/backend_ops/tensorrt/modulated_deform_conv/trt_modulated_deform_conv_kernel.cu +++ b/csrc/backend_ops/tensorrt/modulated_deform_conv/trt_modulated_deform_conv_kernel.cu @@ -3,7 +3,7 @@ #include #include "common_cuda_helper.hpp" -#include "trt_modulated_deform_conv_kernel.hpp" +#include "modulated_deform_conv/modulated_deform_conv_cuda.cuh" #include "trt_plugin_helper.hpp" template diff --git a/csrc/backend_ops/torchscript/CMakeLists.txt b/csrc/backend_ops/torchscript/CMakeLists.txt new file mode 100644 index 0000000000..e383129992 --- /dev/null +++ b/csrc/backend_ops/torchscript/CMakeLists.txt @@ -0,0 +1,5 @@ +# Copyright (c) OpenMMLab. All rights reserved. +cmake_minimum_required(VERSION 3.14) + +add_subdirectory(ops) +add_subdirectory(optimizer) diff --git a/csrc/backend_ops/torchscript/bind.cpp b/csrc/backend_ops/torchscript/bind.cpp new file mode 100644 index 0000000000..cfc08d1489 --- /dev/null +++ b/csrc/backend_ops/torchscript/bind.cpp @@ -0,0 +1,10 @@ +// Copyright (c) OpenMMLab. All rights reserved. +#include "torch/script.h" + +TORCH_LIBRARY(mmdeploy, m) { + m.def( + "modulated_deform_conv(Tensor input, Tensor weight, Tensor bias, Tensor offset, Tensor " + "mask, " + "int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, int pad_w, int " + "dilation_h,int dilation_w, int groups, int deform_groups, bool with_bias) -> Tensor"); +} diff --git a/csrc/backend_ops/torchscript/ops/CMakeLists.txt b/csrc/backend_ops/torchscript/ops/CMakeLists.txt new file mode 100644 index 0000000000..71c7256cd4 --- /dev/null +++ b/csrc/backend_ops/torchscript/ops/CMakeLists.txt @@ -0,0 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. +cmake_minimum_required(VERSION 3.14) + +if("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES) + project(mmdeploy_torchscript_ops CUDA CXX) + include(${CMAKE_SOURCE_DIR}/cmake/cuda.cmake NO_POLICY_SCOPE) + file(GLOB_RECURSE BACKEND_OPS_SRCS *.cpp *.cu) +else() + project(mmdeploy_torchscript_ops CXX) + file(GLOB_RECURSE BACKEND_OPS_SRCS *.cpp) +endif() + +include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake) +find_package(Torch REQUIRED) + + +add_library(${PROJECT_NAME}_obj OBJECT "${BACKEND_OPS_SRCS}") +set_target_properties(${PROJECT_NAME}_obj PROPERTIES POSITION_INDEPENDENT_CODE 1) +target_compile_definitions(${PROJECT_NAME}_obj + PRIVATE -DTHRUST_IGNORE_DEPRECATED_CPP_DIALECT=1) +target_include_directories(${PROJECT_NAME}_obj + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../common) +target_include_directories(${PROJECT_NAME}_obj + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/common) + +if("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES) + target_include_directories(${PROJECT_NAME}_obj + PRIVATE ${CUDA_TOOLKIT_ROOT_DIR}/include) +endif() +target_link_libraries(${PROJECT_NAME}_obj PRIVATE ${TORCH_LIBRARIES}) +mmdeploy_export(${PROJECT_NAME}_obj) + +# Build module library. It is used to inference with torchscript +mmdeploy_add_module(${PROJECT_NAME} MODULE EXCLUDE "") +target_link_libraries(${PROJECT_NAME} PUBLIC ${PROJECT_NAME}_obj) +add_library(mmdeploy::torchscript_ops ALIAS ${PROJECT_NAME}) diff --git a/csrc/backend_ops/torchscript/ops/modulated_deform_conv/modulated_deform_conv_cpu.cpp b/csrc/backend_ops/torchscript/ops/modulated_deform_conv/modulated_deform_conv_cpu.cpp new file mode 100644 index 0000000000..c6d980919f --- /dev/null +++ b/csrc/backend_ops/torchscript/ops/modulated_deform_conv/modulated_deform_conv_cpu.cpp @@ -0,0 +1,94 @@ +// Copyright (c) OpenMMLab. All rights reserved. +#include "modulated_deform_conv/modulated_deform_conv_cpu.h" + +#include "torch/script.h" + +namespace mmdeploy { + +void modulated_deformable_im2col_cpu( + const at::Tensor data_im, const at::Tensor data_offset, const at::Tensor data_mask, + const int64_t batch_size, const int64_t channels, const int64_t height_im, + const int64_t width_im, const int64_t height_col, const int64_t width_col, + const int64_t kernel_h, const int64_t kernel_w, const int64_t pad_h, const int64_t pad_w, + const int64_t stride_h, const int64_t stride_w, const int64_t dilation_h, + const int64_t dilation_w, int64_t deformable_group, at::Tensor data_col) { + // num_axes should be smaller than block size + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_im.scalar_type(), "modulated_deformable_im2col_cpu", ([&] { + const scalar_t *data_im_ = data_im.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + const scalar_t *data_mask_ = data_mask.data_ptr(); + scalar_t *data_col_ = data_col.data_ptr(); + + deformable_im2col_2d(data_im_, data_offset_, data_mask_, height_im, width_im, + kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, channels, deformable_group, + height_col, width_col, data_mask_ != nullptr, data_col_); + })); +} + +at::Tensor modulated_deform_conv_forward_cpu(at::Tensor input, at::Tensor weight, at::Tensor bias, + at::Tensor offset, at::Tensor mask, int64_t kernel_h, + int64_t kernel_w, int64_t stride_h, int64_t stride_w, + int64_t pad_h, int64_t pad_w, int64_t dilation_h, + int64_t dilation_w, int64_t group, + int64_t deformable_group, bool with_bias) { + at::DeviceGuard guard(input.device()); + + const int batch = input.size(0); + const int channels = input.size(1); + const int height = input.size(2); + const int width = input.size(3); + + const int channels_out = weight.size(0); + const int channels_kernel = weight.size(1); + const int kernel_h_ = weight.size(2); + const int kernel_w_ = weight.size(3); + + if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) + AT_ERROR("Input shape and kernel shape won't match: (%d x %d vs %d x %d).", kernel_h_, kernel_w, + kernel_h_, kernel_w_); + if (channels != channels_kernel * group) + AT_ERROR("Input shape and kernel channels won't match: (%d vs %d).", channels, + channels_kernel * group); + + const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; + const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; + + // resize output + at::Tensor output = + at::zeros({batch, group, channels_out / group, height_out, width_out}, input.options()); + // resize temporary columns + at::Tensor columns = at::zeros( + {group, channels * kernel_h * kernel_w / group, 1 * height_out * width_out}, input.options()); + + // divide into group + weight = + weight.view({group, weight.size(0) / group, weight.size(1), weight.size(2), weight.size(3)}); + for (int b = 0; b < batch; b++) { + modulated_deformable_im2col_cpu(input[b], offset[b], mask[b], 1, channels, height, width, + height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, + stride_h, stride_w, dilation_h, dilation_w, deformable_group, + columns); + + for (int g = 0; g < group; g++) { + output[b][g] = + output[b][g].flatten(1).addmm_(weight[g].flatten(1), columns[g]).view_as(output[b][g]); + } + } + + output = output.view( + {output.size(0), output.size(1) * output.size(2), output.size(3), output.size(4)}); + + if (with_bias) { + output += bias.view({1, bias.size(0), 1, 1}); + } + + return output; +} + +TORCH_LIBRARY_IMPL(mmdeploy, CPU, m) { + m.impl("modulated_deform_conv", modulated_deform_conv_forward_cpu); +} +} // namespace mmdeploy diff --git a/csrc/backend_ops/torchscript/ops/modulated_deform_conv/modulated_deform_conv_cuda.cu b/csrc/backend_ops/torchscript/ops/modulated_deform_conv/modulated_deform_conv_cuda.cu new file mode 100644 index 0000000000..3f9b6aef08 --- /dev/null +++ b/csrc/backend_ops/torchscript/ops/modulated_deform_conv/modulated_deform_conv_cuda.cu @@ -0,0 +1,97 @@ +// Copyright (c) OpenMMLab. All rights reserved. +#include "c10/cuda/CUDAStream.h" +#include "modulated_deform_conv/modulated_deform_conv_cuda.cuh" +#include "torch/script.h" + +namespace mmdeploy { + +void modulated_deformable_im2col_cuda( + const at::Tensor data_im, const at::Tensor data_offset, const at::Tensor data_mask, + const int64_t batch_size, const int64_t channels, const int64_t height_im, + const int64_t width_im, const int64_t height_col, const int64_t width_col, + const int64_t kernel_h, const int64_t kernel_w, const int64_t pad_h, const int64_t pad_w, + const int64_t stride_h, const int64_t stride_w, const int64_t dilation_h, + const int64_t dilation_w, const int64_t deformable_group, at::Tensor data_col) { + // num_axes should be smaller than block size + const int channel_per_deformable_group = channels / deformable_group; + const int num_kernels = channels * batch_size * height_col * width_col; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_im.scalar_type(), "modulated_deformable_im2col_cuda", ([&] { + const scalar_t *data_im_ = data_im.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + const scalar_t *data_mask_ = data_mask.data_ptr(); + scalar_t *data_col_ = data_col.data_ptr(); + modulated_deformable_im2col_gpu_kernel + <<>>( + num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im, kernel_h, + kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, + channel_per_deformable_group, batch_size, channels, deformable_group, height_col, + width_col, data_col_); + })); +} + +at::Tensor modulated_deform_conv_forward_cuda(at::Tensor input, at::Tensor weight, at::Tensor bias, + at::Tensor offset, at::Tensor mask, int64_t kernel_h, + int64_t kernel_w, int64_t stride_h, int64_t stride_w, + int64_t pad_h, int64_t pad_w, int64_t dilation_h, + int64_t dilation_w, int64_t group, + int64_t deformable_group, bool with_bias) { + at::DeviceGuard guard(input.device()); + + const int batch = input.size(0); + const int channels = input.size(1); + const int height = input.size(2); + const int width = input.size(3); + + const int channels_out = weight.size(0); + const int channels_kernel = weight.size(1); + const int kernel_h_ = weight.size(2); + const int kernel_w_ = weight.size(3); + + if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) + AT_ERROR("Input shape and kernel shape won't match: (%d x %d vs %d x %d).", kernel_h_, kernel_w, + kernel_h_, kernel_w_); + if (channels != channels_kernel * group) + AT_ERROR("Input shape and kernel channels won't match: (%d vs %d).", channels, + channels_kernel * group); + + const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; + const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; + + // resize output + at::Tensor output = + at::zeros({batch, group, channels_out / group, height_out, width_out}, input.options()); + // resize temporary columns + at::Tensor columns = at::zeros( + {group, channels * kernel_h * kernel_w / group, 1 * height_out * width_out}, input.options()); + + // divide into group + weight = + weight.view({group, weight.size(0) / group, weight.size(1), weight.size(2), weight.size(3)}); + for (int b = 0; b < batch; b++) { + modulated_deformable_im2col_cuda(input[b], offset[b], mask[b], 1, channels, height, width, + height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, + stride_h, stride_w, dilation_h, dilation_w, deformable_group, + columns); + + for (int g = 0; g < group; g++) { + output[b][g] = + output[b][g].flatten(1).addmm_(weight[g].flatten(1), columns[g]).view_as(output[b][g]); + } + } + + output = output.view( + {output.size(0), output.size(1) * output.size(2), output.size(3), output.size(4)}); + + if (with_bias) { + output += bias.view({1, bias.size(0), 1, 1}); + } + + return output; +} + +TORCH_LIBRARY_IMPL(mmdeploy, CUDA, m) { + m.impl("modulated_deform_conv", modulated_deform_conv_forward_cuda); +} +} // namespace mmdeploy diff --git a/csrc/backend_ops/torchscript/optimizer/CMakeLists.txt b/csrc/backend_ops/torchscript/optimizer/CMakeLists.txt new file mode 100644 index 0000000000..8f3cb46d71 --- /dev/null +++ b/csrc/backend_ops/torchscript/optimizer/CMakeLists.txt @@ -0,0 +1,17 @@ +# Copyright (c) OpenMMLab. All rights reserved. +cmake_minimum_required(VERSION 3.14) +project(ts_optimizer) + +find_package(Torch REQUIRED) +if (NOT TARGET pybind11) + add_subdirectory(${CMAKE_SOURCE_DIR}/third_party/pybind11 pybind11) +endif () + +file(GLOB_RECURSE OPTIMIZER_SRCS *.cpp) + +pybind11_add_module(${PROJECT_NAME} ${OPTIMIZER_SRCS}) +target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES}) +target_link_directories(${PROJECT_NAME} PRIVATE mmdeploy::torchscript_ops) +set_target_properties( + ${PROJECT_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY + ${CMAKE_SOURCE_DIR}/mmdeploy/backend/torchscript) diff --git a/csrc/backend_ops/torchscript/optimizer/bind.cpp b/csrc/backend_ops/torchscript/optimizer/bind.cpp new file mode 100644 index 0000000000..73594776a3 --- /dev/null +++ b/csrc/backend_ops/torchscript/optimizer/bind.cpp @@ -0,0 +1,26 @@ +// Copyright (c) OpenMMLab. All rights reserved. +#include + +#include + +#include "optimizer.h" + +void optimize_for_backend(torch::jit::Module& model, const std::string& ir = "torchscript", + const std::string& backend = "torchscript") { + if (ir == "torchscript") { + model = mmdeploy::optimize_for_torchscript(model); + } else if (ir == "onnx") { + model = mmdeploy::optimize_for_onnx(model); + } else { + fprintf(stderr, "No optimize for combination ir: %s backend: %s\n", ir.c_str(), + backend.c_str()); + exit(-1); + } +} + +PYBIND11_MODULE(ts_optimizer, m) { + namespace py = pybind11; + m.def("optimize_for_backend", optimize_for_backend, py::arg("module"), + py::arg("ir") = std::string("torchscript"), + py::arg("backend") = std::string("torchscript")); +} diff --git a/csrc/backend_ops/torchscript/optimizer/optimizer.cpp b/csrc/backend_ops/torchscript/optimizer/optimizer.cpp new file mode 100644 index 0000000000..05ef9d54cd --- /dev/null +++ b/csrc/backend_ops/torchscript/optimizer/optimizer.cpp @@ -0,0 +1,70 @@ +// Copyright (c) OpenMMLab. All rights reserved. +#include "optimizer.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if TORCH_VERSION_MINOR >= 9 +#include +#include +#include +#endif + +namespace mmdeploy { + +using torch::jit::Graph; +const std::shared_ptr& required_passes(const std::shared_ptr& graph) { + RemoveExpands(graph); + CanonicalizeOps(graph); + EliminateDeadCode(graph); + return graph; +} + +Module optimize_for_torchscript(const Module& model) { + auto frozen_model = freeze_module(model); + auto graph = frozen_model.get_method("forward").graph(); + OptimizeFrozenGraph(graph, true); + +#if TORCH_VERSION_MINOR >= 9 + FuseFrozenConvAddRelu(graph); + ConvertFrozenOpsToMKLDNN(graph); + FrozenLinearTranspose(graph); +#endif + + graph = required_passes(graph); + EliminateCommonSubexpression(graph); + PeepholeOptimize(graph); + ConstantPropagation(graph); + ConstantPooling(graph); + + // TODO: add more custom passes + + return frozen_model; +} + +Module optimize_for_onnx(const Module& model) { + auto frozen_model = freeze_module(model, {"training"}); + auto graph = frozen_model.get_method("forward").graph(); + OptimizeFrozenGraph(graph, true); + +#if TORCH_VERSION_MINOR >= 9 + FuseFrozenConvAddRelu(graph); + ConvertFrozenOpsToMKLDNN(graph); + FrozenLinearTranspose(graph); +#endif + + // TODO: add more custom passes + + return frozen_model; +} + +// TODO: add optimizer for other backend/onnx + +} // namespace mmdeploy diff --git a/csrc/backend_ops/torchscript/optimizer/optimizer.h b/csrc/backend_ops/torchscript/optimizer/optimizer.h new file mode 100644 index 0000000000..d0d91c627d --- /dev/null +++ b/csrc/backend_ops/torchscript/optimizer/optimizer.h @@ -0,0 +1,10 @@ +// Copyright (c) OpenMMLab. All rights reserved. +#include + +namespace mmdeploy { +using torch::jit::script::Module; + +Module optimize_for_torchscript(const Module &model); + +Module optimize_for_onnx(const Module &model); +} // namespace mmdeploy diff --git a/csrc/codebase/CMakeLists.txt b/csrc/codebase/CMakeLists.txt index 9ef6490a8c..a0b98594aa 100644 --- a/csrc/codebase/CMakeLists.txt +++ b/csrc/codebase/CMakeLists.txt @@ -9,6 +9,7 @@ if ("all" IN_LIST MMDEPLOY_CODEBASES) list(APPEND CODEBASES "mmseg") list(APPEND CODEBASES "mmocr") list(APPEND CODEBASES "mmedit") + list(APPEND CODEBASES "mmpose") else () set(CODEBASES ${MMDEPLOY_CODEBASES}) endif () diff --git a/csrc/codebase/mmpose/CMakeLists.txt b/csrc/codebase/mmpose/CMakeLists.txt new file mode 100644 index 0000000000..6d4c7dd562 --- /dev/null +++ b/csrc/codebase/mmpose/CMakeLists.txt @@ -0,0 +1,11 @@ +# Copyright (c) OpenMMLab. All rights reserved. +cmake_minimum_required(VERSION 3.14) +project(mmdeploy_mmpose) + +include(${CMAKE_SOURCE_DIR}/cmake/opencv.cmake) +include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake) + +file(GLOB_RECURSE SRCS ${CMAKE_CURRENT_SOURCE_DIR} "*.cpp") +mmdeploy_add_module(${PROJECT_NAME} "${SRCS}") +target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy_opencv_utils) +add_library(mmdeploy::mmpose ALIAS ${PROJECT_NAME}) diff --git a/csrc/codebase/mmpose/keypoints_from_heatmap.cpp b/csrc/codebase/mmpose/keypoints_from_heatmap.cpp new file mode 100644 index 0000000000..72c6a3cf07 --- /dev/null +++ b/csrc/codebase/mmpose/keypoints_from_heatmap.cpp @@ -0,0 +1,390 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include +#include +#include + +#include "core/device.h" +#include "core/registry.h" +#include "core/serialization.h" +#include "core/tensor.h" +#include "core/utils/device_utils.h" +#include "core/utils/formatter.h" +#include "core/value.h" +#include "experimental/module_adapter.h" +#include "mmpose.h" +#include "opencv_utils.h" + +namespace mmdeploy::mmpose { + +using std::string; +using std::vector; + +template +struct _LoopBody : public cv::ParallelLoopBody { + F f_; + _LoopBody(F f) : f_(std::move(f)) {} + void operator()(const cv::Range& range) const override { f_(range); } +}; + +std::string to_lower(const std::string& s) { + std::string t = s; + std::transform(t.begin(), t.end(), t.begin(), [](unsigned char c) { return std::tolower(c); }); + return t; +} + +class TopdownHeatmapBaseHeadDecode : public MMPose { + public: + explicit TopdownHeatmapBaseHeadDecode(const Value& config) : MMPose(config) { + if (config.contains("params")) { + auto& params = config["params"]; + flip_test_ = params.value("flip_test", flip_test_); + use_udp_ = params.value("use_udp", use_udp_); + target_type_ = params.value("target_type", target_type_); + valid_radius_factor_ = params.value("valid_radius_factor", valid_radius_factor_); + unbiased_decoding_ = params.value("unbiased_decoding", unbiased_decoding_); + post_process_ = params.value("post_process", post_process_); + shift_heatmap_ = params.value("shift_heatmap", shift_heatmap_); + modulate_kernel_ = params.value("modulate_kernel", modulate_kernel_); + } + } + + Result operator()(const Value& _data, const Value& _prob) { + MMDEPLOY_DEBUG("preprocess_result: {}", _data); + MMDEPLOY_DEBUG("inference_result: {}", _prob); + + Device cpu_device{"cpu"}; + OUTCOME_TRY(auto heatmap, + MakeAvailableOnDevice(_prob["output"].get(), cpu_device, stream())); + OUTCOME_TRY(stream().Wait()); + if (!(heatmap.shape().size() == 4 && heatmap.data_type() == DataType::kFLOAT)) { + MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}, dtype: {}", heatmap.shape(), + (int)heatmap.data_type()); + return Status(eNotSupported); + } + + auto& img_metas = _data["img_metas"]; + + vector center; + vector scale; + from_value(img_metas["center"], center); + from_value(img_metas["scale"], scale); + Tensor pred = + keypoints_from_heatmap(heatmap, center, scale, unbiased_decoding_, post_process_, + modulate_kernel_, valid_radius_factor_, use_udp_, target_type_); + + return GetOutput(pred); + } + + Value GetOutput(Tensor& pred) { + PoseDetectorOutput output; + int K = pred.shape(1); + float* data = pred.data(); + for (int i = 0; i < K; i++) { + float x = *(data + 0); + float y = *(data + 1); + float s = *(data + 2); + output.key_points.push_back({{x, y}, s}); + data += 3; + } + return to_value(std::move(output)); + } + + Tensor keypoints_from_heatmap(const Tensor& _heatmap, const vector& center, + const vector& scale, bool unbiased_decoding, + const string& post_process, int modulate_kernel, + float valid_radius_factor, bool use_udp, + const string& target_type) { + Tensor heatmap(_heatmap.desc()); + heatmap.CopyFrom(_heatmap, stream()).value(); + stream().Wait().value(); + + int K = heatmap.shape(1); + int H = heatmap.shape(2); + int W = heatmap.shape(3); + + if (post_process == "megvii") { + heatmap = gaussian_blur(heatmap, modulate_kernel); + } + + Tensor pred; + + if (use_udp) { + if (to_lower(target_type) == to_lower(string("GaussianHeatMap"))) { + pred = get_max_pred(heatmap); + post_dark_udp(pred, heatmap, modulate_kernel); + } else if (to_lower(target_type) == to_lower(string("CombinedTarget"))) { + // output channel = 3 * channel_cfg['num_output_channels'] + assert(K % 3 == 0); + cv::parallel_for_(cv::Range(0, K), _LoopBody{[&](const cv::Range& r) { + for (int i = r.start; i < r.end; i++) { + int kt = (i % 3 == 0) ? 2 * modulate_kernel + 1 : modulate_kernel; + float* data = heatmap.data() + i * H * W; + cv::Mat work = cv::Mat(H, W, CV_32FC(1), data); + cv::GaussianBlur(work, work, {kt, kt}, 0); // inplace + } + }}); + float valid_radius = valid_radius_factor_ * H; + TensorDesc desc = {Device{"cpu"}, DataType::kFLOAT, {1, K / 3, H, W}}; + Tensor offset_x(desc); + Tensor offset_y(desc); + Tensor heatmap_(desc); + { + // split heatmap + float* src = heatmap.data(); + float* dst0 = heatmap_.data(); + float* dst1 = offset_x.data(); + float* dst2 = offset_y.data(); + for (int i = 0; i < K / 3; i++) { + std::copy_n(src, H * W, dst0); + std::transform(src + H * W, src + 2 * H * W, dst1, + [=](float& x) { return x * valid_radius; }); + std::transform(src + 2 * H * W, src + 3 * H * W, dst2, + [=](float& x) { return x * valid_radius; }); + src += 3 * H * W; + dst0 += H * W; + dst1 += H * W; + dst2 += H * W; + } + } + pred = get_max_pred(heatmap_); + for (int i = 0; i < K / 3; i++) { + float* data = pred.data() + i * 3; + int index = *(data + 0) + *(data + 1) * W + H * W * i; + float* offx = offset_x.data() + index; + float* offy = offset_y.data() + index; + *(data + 0) += *offx; + *(data + 1) += *offy; + } + } + } else { + pred = get_max_pred(heatmap); + if (post_process == "unbiased") { + heatmap = gaussian_blur(heatmap, modulate_kernel); + float* data = heatmap.data(); + std::for_each(data, data + K * H * W, [](float& v) { + double _v = std::max((double)v, 1e-10); + v = std::log(_v); + }); + for (int i = 0; i < K; i++) { + taylor(heatmap, pred, i); + } + + } else if (post_process != "null") { + for (int i = 0; i < K; i++) { + float* data = heatmap.data() + i * W * H; + auto _data = [&](int y, int x) { return *(data + y * W + x); }; + int px = *(pred.data() + i * 3 + 0); + int py = *(pred.data() + i * 3 + 1); + if (1 < px && px < W - 1 && 1 < py && py < H - 1) { + float v1 = _data(py, px + 1) - _data(py, px - 1); + float v2 = _data(py + 1, px) - _data(py - 1, px); + *(pred.data() + i * 3 + 0) += (v1 > 0) ? 0.25 : ((v1 < 0) ? -0.25 : 0); + *(pred.data() + i * 3 + 1) += (v2 > 0) ? 0.25 : ((v2 < 0) ? -0.25 : 0); + if (post_process_ == "megvii") { + *(pred.data() + i * 3 + 0) += 0.5; + *(pred.data() + i * 3 + 1) += 0.5; + } + } + } + } + } + + K = pred.shape(1); // changed if target_type is CombinedTarget + + // Transform back to the image + for (int i = 0; i < K; i++) { + transform_pred(pred, i, center, scale, {W, H}, use_udp); + } + + if (post_process_ == "megvii") { + for (int i = 0; i < K; i++) { + float* data = pred.data() + i * 3 + 2; + *data = *data / 255.0 + 0.5; + } + } + + return pred; + } + + void post_dark_udp(Tensor& pred, Tensor& heatmap, int kernel) { + int K = heatmap.shape(1); + int H = heatmap.shape(2); + int W = heatmap.shape(3); + cv::parallel_for_(cv::Range(0, K), _LoopBody{[&](const cv::Range& r) { + for (int i = r.start; i < r.end; i++) { + float* data = heatmap.data() + i * H * W; + cv::Mat work = cv::Mat(H, W, CV_32FC(1), data); + cv::GaussianBlur(work, work, {kernel, kernel}, 0); // inplace + } + }}); + std::for_each(heatmap.data(), heatmap.data() + K * H * W, [](float& x) { + x = std::max(0.001f, std::min(50.f, x)); + x = std::log(x); + }); + auto _heatmap_data = [&](int index, int c) -> float { + int y = index / (W + 2); + int x = index % (W + 2); + y = std::max(0, y - 1); + x = std::max(0, x - 1); + return *(heatmap.data() + c * H * W + y * W + x); + }; + for (int i = 0; i < K; i++) { + float* data = pred.data() + i * 3; + int index = *(data + 0) + 1 + (*(data + 1) + 1) * (W + 2); + float i_ = _heatmap_data(index, i); + float ix1 = _heatmap_data(index + 1, i); + float iy1 = _heatmap_data(index + W + 2, i); + float ix1y1 = _heatmap_data(index + W + 3, i); + float ix1_y1_ = _heatmap_data(index - W - 3, i); + float ix1_ = _heatmap_data(index - 1, i); + float iy1_ = _heatmap_data(index - 2 - W, i); + float dx = 0.5 * (ix1 - ix1_); + float dy = 0.5 * (iy1 - iy1_); + float dxx = ix1 - 2 * i_ + ix1_; + float dyy = iy1 - 2 * i_ + iy1_; + float dxy = 0.5 * (ix1y1 - ix1 - iy1 + i_ + i_ - ix1_ - iy1_ + ix1_y1_); + vector _data0 = {dx, dy}; + vector _data1 = {dxx, dxy, dxy, dyy}; + cv::Mat derivative = cv::Mat(2, 1, CV_32FC1, _data0.data()); + cv::Mat hessian = cv::Mat(2, 2, CV_32FC1, _data1.data()); + cv::Mat hessianinv = hessian.inv(); + cv::Mat offset = -hessianinv * derivative; + *(data + 0) += offset.at(0, 0); + *(data + 1) += offset.at(1, 0); + } + } + + void transform_pred(Tensor& pred, int k, const vector& center, const vector& _scale, + const vector& output_size, bool use_udp = false) { + auto scale = _scale; + scale[0] *= 200; + scale[1] *= 200; + + float scale_x, scale_y; + if (use_udp) { + scale_x = scale[0] / (output_size[0] - 1.0); + scale_y = scale[1] / (output_size[1] - 1.0); + } else { + scale_x = scale[0] / output_size[0]; + scale_y = scale[1] / output_size[1]; + } + + float* data = pred.data() + k * 3; + *(data + 0) = *(data + 0) * scale_x + center[0] - scale[0] * 0.5; + *(data + 1) = *(data + 1) * scale_y + center[1] - scale[1] * 0.5; + } + + void taylor(const Tensor& heatmap, Tensor& pred, int k) { + int K = heatmap.shape(1); + int H = heatmap.shape(2); + int W = heatmap.shape(3); + int px = *(pred.data() + k * 3 + 0); + int py = *(pred.data() + k * 3 + 1); + if (1 < px && px < W - 2 && 1 < py && py < H - 2) { + float* data = const_cast(heatmap.data() + k * H * W); + auto get_data = [&](int r, int c) { return *(data + r * W + c); }; + float dx = 0.5 * (get_data(py, px + 1) - get_data(py, px - 1)); + float dy = 0.5 * (get_data(py + 1, px) - get_data(py - 1, px)); + float dxx = 0.25 * (get_data(py, px + 2) - 2 * get_data(py, px) + get_data(py, px - 2)); + float dxy = 0.25 * (get_data(py + 1, px + 1) - get_data(py - 1, px + 1) - + get_data(py + 1, px - 1) + get_data(py - 1, px - 1)); + float dyy = 0.25 * (get_data(py + 2, px) - 2 * get_data(py, px) + get_data(py - 2, px)); + + vector _data0 = {dx, dy}; + vector _data1 = {dxx, dxy, dxy, dyy}; + cv::Mat derivative = cv::Mat(2, 1, CV_32FC1, _data0.data()); + cv::Mat hessian = cv::Mat(2, 2, CV_32FC1, _data1.data()); + if (std::fabs(dxx * dyy - dxy * dxy) > 1e-6) { + cv::Mat hessianinv = hessian.inv(); + cv::Mat offset = -hessianinv * derivative; + *(pred.data() + k * 3 + 0) += offset.at(0, 0); + *(pred.data() + k * 3 + 1) += offset.at(1, 0); + } + } + } + + Tensor gaussian_blur(const Tensor& _heatmap, int kernel) { + assert(kernel % 2 == 1); + + auto desc = _heatmap.desc(); + Tensor heatmap(desc); + + int K = _heatmap.shape(1); + int H = _heatmap.shape(2); + int W = _heatmap.shape(3); + int num_points = H * W; + + int border = (kernel - 1) / 2; + + for (int i = 0; i < K; i++) { + int offset = i * H * W; + float* data = const_cast(_heatmap.data()) + offset; + float origin_max = *std::max_element(data, data + num_points); + cv::Mat work = cv::Mat(H + 2 * border, W + 2 * border, CV_32FC1, cv::Scalar{}); + cv::Mat curr = cv::Mat(H, W, CV_32FC1, data); + cv::Rect roi = {border, border, W, H}; + curr.copyTo(work(roi)); + cv::GaussianBlur(work, work, {kernel, kernel}, 0); + cv::Mat valid = work(roi).clone(); + float cur_max = *std::max_element((float*)valid.data, (float*)valid.data + num_points); + float* dst = heatmap.data() + offset; + std::transform((float*)valid.data, (float*)valid.data + num_points, dst, + [&](float v) { return v * origin_max / cur_max; }); + } + return heatmap; + } + + Tensor get_max_pred(const Tensor& heatmap) { + int K = heatmap.shape(1); + int H = heatmap.shape(2); + int W = heatmap.shape(3); + int num_points = H * W; + TensorDesc pred_desc = {Device{"cpu"}, DataType::kFLOAT, {1, K, 3}}; + Tensor pred(pred_desc); + + cv::parallel_for_(cv::Range(0, K), _LoopBody{[&](const cv::Range& r) { + for (int i = r.start; i < r.end; i++) { + float* src_data = const_cast(heatmap.data()) + i * H * W; + cv::Mat mat = cv::Mat(H, W, CV_32FC1, src_data); + double min_val, max_val; + cv::Point min_loc, max_loc; + cv::minMaxLoc(mat, &min_val, &max_val, &min_loc, &max_loc); + float* dst_data = pred.data() + i * 3; + *(dst_data + 0) = -1; + *(dst_data + 1) = -1; + *(dst_data + 2) = max_val; + if (max_val > 0.0) { + *(dst_data + 0) = max_loc.x; + *(dst_data + 1) = max_loc.y; + } + } + }}); + + return pred; + } + + private: + bool flip_test_{true}; + bool shift_heatmap_{true}; + string post_process_ = {"default"}; + int modulate_kernel_{11}; + bool unbiased_decoding_{false}; + float valid_radius_factor_{0.0546875f}; + bool use_udp_{false}; + string target_type_{"GaussianHeatmap"}; +}; + +REGISTER_CODEBASE_COMPONENT(MMPose, TopdownHeatmapBaseHeadDecode); + +// decode process is same +using TopdownHeatmapSimpleHeadDecode = TopdownHeatmapBaseHeadDecode; +REGISTER_CODEBASE_COMPONENT(MMPose, TopdownHeatmapSimpleHeadDecode); +using TopdownHeatmapMultiStageHeadDecode = TopdownHeatmapBaseHeadDecode; +REGISTER_CODEBASE_COMPONENT(MMPose, TopdownHeatmapMultiStageHeadDecode); +using ViPNASHeatmapSimpleHeadDecode = TopdownHeatmapBaseHeadDecode; +REGISTER_CODEBASE_COMPONENT(MMPose, ViPNASHeatmapSimpleHeadDecode); +using TopdownHeatmapMSMUHeadDecode = TopdownHeatmapBaseHeadDecode; +REGISTER_CODEBASE_COMPONENT(MMPose, TopdownHeatmapMSMUHeadDecode); + +} // namespace mmdeploy::mmpose diff --git a/csrc/codebase/mmpose/keypoints_from_regression.cpp b/csrc/codebase/mmpose/keypoints_from_regression.cpp new file mode 100644 index 0000000000..a484b670e8 --- /dev/null +++ b/csrc/codebase/mmpose/keypoints_from_regression.cpp @@ -0,0 +1,115 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include +#include + +#include "core/device.h" +#include "core/registry.h" +#include "core/serialization.h" +#include "core/tensor.h" +#include "core/utils/device_utils.h" +#include "core/utils/formatter.h" +#include "core/value.h" +#include "experimental/module_adapter.h" +#include "mmpose.h" +#include "opencv_utils.h" + +namespace mmdeploy::mmpose { + +using std::string; +using std::vector; + +class DeepposeRegressionHeadDecode : public MMPose { + public: + explicit DeepposeRegressionHeadDecode(const Value& config) : MMPose(config) {} + + Result operator()(const Value& _data, const Value& _prob) { + MMDEPLOY_DEBUG("preprocess_result: {}", _data); + MMDEPLOY_DEBUG("inference_result: {}", _prob); + + Device cpu_device{"cpu"}; + OUTCOME_TRY(auto output, + MakeAvailableOnDevice(_prob["output"].get(), cpu_device, stream())); + OUTCOME_TRY(stream().Wait()); + if (!(output.shape().size() == 3 && output.data_type() == DataType::kFLOAT)) { + MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}, dtype: {}", output.shape(), + (int)output.data_type()); + return Status(eNotSupported); + } + + auto& img_metas = _data["img_metas"]; + + vector center; + vector scale; + from_value(img_metas["center"], center); + from_value(img_metas["scale"], scale); + vector img_size = {img_metas["img_shape"][2].get(), + img_metas["img_shape"][1].get()}; + + Tensor pred = keypoints_from_regression(output, center, scale, img_size); + + return GetOutput(pred); + } + + Value GetOutput(Tensor& pred) { + PoseDetectorOutput output; + int K = pred.shape(1); + float* data = pred.data(); + for (int i = 0; i < K; i++) { + float x = *(data + 0); + float y = *(data + 1); + float s = *(data + 2); + output.key_points.push_back({{x, y}, s}); + data += 3; + } + return to_value(std::move(output)); + } + + Tensor keypoints_from_regression(const Tensor& output, const vector& center, + const vector& scale, const vector& img_size) { + int K = output.shape(1); + TensorDesc pred_desc = {Device{"cpu"}, DataType::kFLOAT, {1, K, 3}}; + Tensor pred(pred_desc); + + float* src = const_cast(output.data()); + float* dst = pred.data(); + for (int i = 0; i < K; i++) { + *(dst + 0) = *(src + 0) * img_size[0]; + *(dst + 1) = *(src + 1) * img_size[1]; + *(dst + 2) = 1.f; + src += 2; + dst += 3; + } + + // Transform back to the image + for (int i = 0; i < K; i++) { + transform_pred(pred, i, center, scale, img_size, false); + } + + return pred; + } + + void transform_pred(Tensor& pred, int k, const vector& center, const vector& _scale, + const vector& output_size, bool use_udp = false) { + auto scale = _scale; + scale[0] *= 200; + scale[1] *= 200; + + float scale_x, scale_y; + if (use_udp) { + scale_x = scale[0] / (output_size[0] - 1.0); + scale_y = scale[1] / (output_size[1] - 1.0); + } else { + scale_x = scale[0] / output_size[0]; + scale_y = scale[1] / output_size[1]; + } + + float* data = pred.data() + k * 3; + *(data + 0) = *(data + 0) * scale_x + center[0] - scale[0] * 0.5; + *(data + 1) = *(data + 1) * scale_y + center[1] - scale[1] * 0.5; + } +}; + +REGISTER_CODEBASE_COMPONENT(MMPose, DeepposeRegressionHeadDecode); + +} // namespace mmdeploy::mmpose diff --git a/csrc/codebase/mmpose/mmpose.cpp b/csrc/codebase/mmpose/mmpose.cpp new file mode 100644 index 0000000000..7d5e048b11 --- /dev/null +++ b/csrc/codebase/mmpose/mmpose.cpp @@ -0,0 +1,15 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include "codebase/mmpose/mmpose.h" + +using namespace std; + +namespace mmdeploy { +namespace mmpose { + +REGISTER_CODEBASE(MMPose); + +} + +MMDEPLOY_DEFINE_REGISTRY(mmpose::MMPose); +} // namespace mmdeploy diff --git a/csrc/codebase/mmpose/mmpose.h b/csrc/codebase/mmpose/mmpose.h new file mode 100644 index 0000000000..ed66f53a8e --- /dev/null +++ b/csrc/codebase/mmpose/mmpose.h @@ -0,0 +1,30 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#ifndef MMDEPLOY_MMPOSE_H +#define MMDEPLOY_MMPOSE_H + +#include "codebase/common.h" +#include "core/device.h" +#include "core/module.h" + +namespace mmdeploy { +namespace mmpose { + +struct PoseDetectorOutput { + struct KeyPoint { + std::array bbox; // x, y + float score; + MMDEPLOY_ARCHIVE_MEMBERS(bbox, score); + }; + std::vector key_points; + MMDEPLOY_ARCHIVE_MEMBERS(key_points); +}; + +DECLARE_CODEBASE(MMPose, mmpose); + +} // namespace mmpose + +MMDEPLOY_DECLARE_REGISTRY(mmpose::MMPose); +} // namespace mmdeploy + +#endif // MMDEPLOY_MMPOSE_H diff --git a/csrc/codebase/mmpose/topdown_affine.cpp b/csrc/codebase/mmpose/topdown_affine.cpp new file mode 100644 index 0000000000..e3effd0e21 --- /dev/null +++ b/csrc/codebase/mmpose/topdown_affine.cpp @@ -0,0 +1,191 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include + +#include "archive/json_archive.h" +#include "archive/value_archive.h" +#include "core/registry.h" +#include "core/tensor.h" +#include "core/utils/device_utils.h" +#include "core/utils/formatter.h" +#include "opencv2/imgproc.hpp" +#include "opencv_utils.h" +#include "preprocess/transform/resize.h" +#include "preprocess/transform/transform.h" + +using namespace std; + +namespace mmdeploy { + +cv::Point2f operator*(cv::Point2f a, cv::Point2f b) { + cv::Point2f c; + c.x = a.x * b.x; + c.y = a.y * b.y; + return c; +} + +class TopDownAffineImpl : public Module { + public: + explicit TopDownAffineImpl(const Value& args) noexcept { + use_udp_ = args.value("use_udp", use_udp_); + backend_ = args.contains("backend") && args["backend"].is_string() + ? args["backend"].get() + : backend_; + stream_ = args["context"]["stream"].get(); + assert(args.contains("image_size")); + from_value(args["image_size"], image_size_); + } + + ~TopDownAffineImpl() override = default; + + Result Process(const Value& input) override { + MMDEPLOY_DEBUG("top_down_affine input: {}", input); + + Device host{"cpu"}; + auto _img = input["img"].get(); + OUTCOME_TRY(auto img, MakeAvailableOnDevice(_img, host, stream_)); + stream_.Wait().value(); + auto src = cpu::Tensor2CVMat(img); + + // prepare data + vector box; + from_value(input["box"], box); + vector c; // center + vector s; // scale + Box2cs(box, c, s); + auto r = input["rotation"].get(); + + cv::Mat dst; + if (use_udp_) { + cv::Mat trans = + GetWarpMatrix(r, {c[0] * 2.f, c[1] * 2.f}, {image_size_[0] - 1.f, image_size_[1] - 1.f}, + {s[0] * 200.f, s[1] * 200.f}); + + cv::warpAffine(src, dst, trans, {image_size_[0], image_size_[1]}, cv::INTER_LINEAR); + } else { + cv::Mat trans = + GetAffineTransform({c[0], c[1]}, {s[0], s[1]}, r, {image_size_[0], image_size_[1]}); + cv::warpAffine(src, dst, trans, {image_size_[0], image_size_[1]}, cv::INTER_LINEAR); + } + + Value output = input; + output["img"] = cpu::CVMat2Tensor(dst); + output["img_shape"] = {1, image_size_[1], image_size_[0], dst.channels()}; + output["center"] = to_value(c); + output["scale"] = to_value(s); + MMDEPLOY_DEBUG("output: {}", to_json(output).dump(2)); + return output; + } + + void Box2cs(vector& box, vector& center, vector& scale) { + float x = box[0]; + float y = box[1]; + float w = box[2]; + float h = box[3]; + float aspect_ratio = image_size_[0] * 1.0 / image_size_[1]; + center.push_back(x + w * 0.5); + center.push_back(y + h * 0.5); + if (w > aspect_ratio * h) { + h = w * 1.0 / aspect_ratio; + } else if (w < aspect_ratio * h) { + w = h * aspect_ratio; + } + scale.push_back(w / 200 * 1.25); + scale.push_back(h / 200 * 1.25); + } + + cv::Mat GetWarpMatrix(float theta, cv::Size2f size_input, cv::Size2f size_dst, + cv::Size2f size_target) { + theta = theta * 3.1415926 / 180; + float scale_x = size_dst.width / size_target.width; + float scale_y = size_dst.height / size_target.height; + cv::Mat matrix = cv::Mat(2, 3, CV_32FC1); + matrix.at(0, 0) = std::cos(theta) * scale_x; + matrix.at(0, 1) = -std::sin(theta) * scale_x; + matrix.at(0, 2) = + scale_x * (-0.5f * size_input.width * std::cos(theta) + + 0.5f * size_input.height * std::sin(theta) + 0.5f * size_target.width); + matrix.at(1, 0) = std::sin(theta) * scale_y; + matrix.at(1, 1) = std::cos(theta) * scale_y; + matrix.at(1, 2) = + scale_y * (-0.5f * size_input.width * std::sin(theta) - + 0.5f * size_input.height * std::cos(theta) + 0.5f * size_target.height); + return matrix; + } + + cv::Mat GetAffineTransform(cv::Point2f center, cv::Point2f scale, float rot, cv::Size output_size, + cv::Point2f shift = {0.f, 0.f}, bool inv = false) { + cv::Point2f scale_tmp = scale * 200; + float src_w = scale_tmp.x; + int dst_w = output_size.width; + int dst_h = output_size.height; + float rot_rad = 3.1415926 * rot / 180; + cv::Point2f src_dir = rotate_point({0.f, src_w * -0.5f}, rot_rad); + cv::Point2f dst_dir = {0.f, dst_w * -0.5f}; + + cv::Point2f src_points[3]; + src_points[0] = center + scale_tmp * shift; + src_points[1] = center + src_dir + scale_tmp * shift; + src_points[2] = Get3rdPoint(src_points[0], src_points[1]); + + cv::Point2f dst_points[3]; + dst_points[0] = {dst_w * 0.5f, dst_h * 0.5f}; + dst_points[1] = dst_dir + cv::Point2f(dst_w * 0.5f, dst_h * 0.5f); + dst_points[2] = Get3rdPoint(dst_points[0], dst_points[1]); + + cv::Mat trans = inv ? cv::getAffineTransform(dst_points, src_points) + : cv::getAffineTransform(src_points, dst_points); + return trans; + } + + cv::Point2f rotate_point(cv::Point2f pt, float angle_rad) { + float sn = std::sin(angle_rad); + float cs = std::cos(angle_rad); + float new_x = pt.x * cs - pt.y * sn; + float new_y = pt.x * sn + pt.y * cs; + return {new_x, new_y}; + } + + cv::Point2f Get3rdPoint(cv::Point2f a, cv::Point2f b) { + cv::Point2f direction = a - b; + cv::Point2f third_pt = b + cv::Point2f(-direction.y, direction.x); + return third_pt; + } + + protected: + bool use_udp_{false}; + vector image_size_; + std::string backend_; + Stream stream_; +}; + +class TopDownAffineImplCreator : public Creator { + public: + const char* GetName() const override { return "cpu"; } + int GetVersion() const override { return 1; } + ReturnType Create(const Value& args) override { + return std::make_unique(args); + } +}; + +MMDEPLOY_DEFINE_REGISTRY(TopDownAffineImpl); + +REGISTER_MODULE(TopDownAffineImpl, TopDownAffineImplCreator); + +class TopDownAffine : public Transform { + public: + explicit TopDownAffine(const Value& args) : Transform(args) { + impl_ = Instantiate("TopDownAffine", args); + } + ~TopDownAffine() override = default; + + Result Process(const Value& input) override { return impl_->Process(input); } + + private: + std::unique_ptr impl_; + static const std::string name_; +}; + +DECLARE_AND_REGISTER_MODULE(Transform, TopDownAffine, 1); + +} // namespace mmdeploy diff --git a/csrc/core/mpl/static_any.h b/csrc/core/mpl/static_any.h new file mode 100644 index 0000000000..a027fd63d3 --- /dev/null +++ b/csrc/core/mpl/static_any.h @@ -0,0 +1,489 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#ifndef MMDEPLOY_CSRC_CORE_MPL_STATIC_ANY_H_ +#define MMDEPLOY_CSRC_CORE_MPL_STATIC_ANY_H_ + +#include +#include +#include +#include +#include + +// re-implementation of std::any, relies on static type id instead of RTTI. +// adjusted from libc++-10 + +namespace mmdeploy { + +namespace traits { + +using type_id_t = uint64_t; + +template +struct TypeId { + static constexpr type_id_t value = 0; +}; + +template <> +struct TypeId { + static constexpr auto value = static_cast(-1); +}; + +// ! This only works when calling inside mmdeploy namespace +#define MMDEPLOY_REGISTER_TYPE_ID(type, id) \ + namespace traits { \ + template <> \ + struct TypeId { \ + static constexpr type_id_t value = id; \ + }; \ + } + +} // namespace traits + +namespace detail { + +template +struct is_in_place_type_impl : std::false_type {}; + +template +struct is_in_place_type_impl> : std::true_type {}; + +template +struct is_in_place_type : public is_in_place_type_impl {}; + +} // namespace detail + +class BadAnyCast : public std::bad_cast { + public: + const char* what() const noexcept override { return "BadAnyCast"; } +}; + +[[noreturn]] inline void ThrowBadAnyCast() { +#if __cpp_exceptions + throw BadAnyCast{}; +#else + std::abort(); +#endif +} + +// Forward declarations +class StaticAny; + +template +std::add_pointer_t> static_any_cast(const StaticAny*) noexcept; + +template +std::add_pointer_t static_any_cast(StaticAny*) noexcept; + +namespace __static_any_impl { + +using _Buffer = std::aligned_storage_t<3 * sizeof(void*), std::alignment_of_v>; + +template +using _IsSmallObject = + std::integral_constant % std::alignment_of_v == 0 && + std::is_nothrow_move_constructible_v>; + +enum class _Action { _Destroy, _Copy, _Move, _Get, _TypeInfo }; + +union _Ret { + void* ptr_; + traits::type_id_t type_id_; +}; + +template +struct _SmallHandler; +template +struct _LargeHandler; + +template +inline bool __compare_typeid(traits::type_id_t __id) { + if (__id && __id == traits::TypeId::value) { + return true; + } + return false; +} + +template +using _Handler = std::conditional_t<_IsSmallObject::value, _SmallHandler, _LargeHandler>; + +} // namespace __static_any_impl + +class StaticAny { + public: + constexpr StaticAny() noexcept : h_(nullptr) {} + + StaticAny(const StaticAny& other) : h_(nullptr) { + if (other.h_) { + other.__call(_Action::_Copy, this); + } + } + + StaticAny(StaticAny&& other) noexcept : h_(nullptr) { + if (other.h_) { + other.__call(_Action::_Move, this); + } + } + + template , + class = std::enable_if_t< + !std::is_same::value && !detail::is_in_place_type::value && + std::is_copy_constructible::value && traits::TypeId::value>> + explicit StaticAny(ValueType&& value); + + template < + class ValueType, class... Args, class T = std::decay_t, + class = std::enable_if_t::value && + std::is_copy_constructible::value && traits::TypeId::value>> + explicit StaticAny(std::in_place_type_t, Args&&... args); + + template , + class = std::enable_if_t< + std::is_constructible&, Args...>::value && + std::is_copy_constructible::value && traits::TypeId::value>> + explicit StaticAny(std::in_place_type_t, std::initializer_list, Args&&... args); + + ~StaticAny() { this->reset(); } + + StaticAny& operator=(const StaticAny& rhs) { + StaticAny(rhs).swap(*this); + return *this; + } + + StaticAny& operator=(StaticAny&& rhs) noexcept { + StaticAny(std::move(rhs)).swap(*this); + return *this; + } + + template < + class ValueType, class T = std::decay_t, + class = std::enable_if_t::value && + std::is_copy_constructible::value && traits::TypeId::value>> + StaticAny& operator=(ValueType&& v); + + template < + class ValueType, class... Args, class T = std::decay_t, + class = std::enable_if_t::value && + std::is_copy_constructible::value && traits::TypeId::value>> + T& emplace(Args&&... args); + + template , + class = std::enable_if_t< + std::is_constructible&, Args...>::value && + std::is_copy_constructible::value && traits::TypeId::value>> + T& emplace(std::initializer_list, Args&&...); + + void reset() noexcept { + if (h_) { + this->__call(_Action::_Destroy); + } + } + + void swap(StaticAny& rhs) noexcept; + + bool has_value() const noexcept { return h_ != nullptr; } + + traits::type_id_t type() const noexcept { + if (h_) { + return this->__call(_Action::_TypeInfo).type_id_; + } else { + return traits::TypeId::value; + } + } + + private: + using _Action = __static_any_impl::_Action; + using _Ret = __static_any_impl::_Ret; + using _HandleFuncPtr = _Ret (*)(_Action, const StaticAny*, StaticAny*, traits::type_id_t info); + + union _Storage { + constexpr _Storage() : ptr_(nullptr) {} + void* ptr_; + __static_any_impl::_Buffer buf_; + }; + + _Ret __call(_Action a, StaticAny* other = nullptr, traits::type_id_t info = 0) const { + return h_(a, this, other, info); + } + + _Ret __call(_Action a, StaticAny* other = nullptr, traits::type_id_t info = 0) { + return h_(a, this, other, info); + } + + template + friend struct __static_any_impl::_SmallHandler; + + template + friend struct __static_any_impl::_LargeHandler; + + template + friend std::add_pointer_t> static_any_cast(const StaticAny*) noexcept; + + template + friend std::add_pointer_t static_any_cast(StaticAny*) noexcept; + + _HandleFuncPtr h_ = nullptr; + _Storage s_; +}; + +namespace __static_any_impl { + +template +struct _SmallHandler { + static _Ret __handle(_Action action, const StaticAny* self, StaticAny* other, + traits::type_id_t info) { + _Ret ret; + ret.ptr_ = nullptr; + switch (action) { + case _Action::_Destroy: + __destroy(const_cast(*self)); + break; + case _Action::_Copy: + __copy(*self, *other); + break; + case _Action::_Move: + __move(const_cast(*self), *other); + break; + case _Action::_Get: + ret.ptr_ = __get(const_cast(*self), info); + break; + case _Action::_TypeInfo: + ret.type_id_ = __type_info(); + break; + } + return ret; + } + + template + static T& __create(StaticAny& dest, Args&&... args) { + T* ret = ::new (static_cast(&dest.s_.buf_)) T(std::forward(args)...); + dest.h_ = &_SmallHandler::__handle; + return *ret; + } + + private: + template + static void __destroy(StaticAny& self) { + T& value = *static_cast(static_cast(&self.s_.buf_)); + value.~T(); + self.h_ = nullptr; + } + + template + static void __copy(const StaticAny& self, StaticAny& dest) { + _SmallHandler::__create(dest, *static_cast(static_cast(&self.s_.buf_))); + } + + static void __move(StaticAny& self, StaticAny& dest) { + _SmallHandler::__create(dest, std::move(*static_cast(static_cast(&self.s_.buf_)))); + __destroy(self); + } + + static void* __get(StaticAny& self, traits::type_id_t info) { + if (__static_any_impl::__compare_typeid(info)) { + return static_cast(&self.s_.buf_); + } + return nullptr; + } + + static traits::type_id_t __type_info() { return traits::TypeId::value; } +}; + +template +struct _LargeHandler { + static _Ret __handle(_Action action, const StaticAny* self, StaticAny* other, + traits::type_id_t info) { + _Ret ret; + ret.ptr_ = nullptr; + switch (action) { + case _Action::_Destroy: + __destroy(const_cast(*self)); + break; + case _Action::_Copy: + __copy(*self, *other); + break; + case _Action::_Move: + __move(const_cast(*self), *other); + break; + case _Action::_Get: + ret.ptr_ = __get(const_cast(*self), info); + break; + case _Action::_TypeInfo: + ret.type_id_ = __type_info(); + break; + } + return ret; + } + + template + static T& __create(StaticAny& dest, Args&&... args) { + using _Alloc = std::allocator; + _Alloc alloc; + auto dealloc = [&](T* p) { alloc.deallocate(p, 1); }; + std::unique_ptr hold(alloc.allocate(1), dealloc); + T* ret = ::new ((void*)hold.get()) T(std::forward(args)...); + dest.s_.ptr_ = hold.release(); + dest.h_ = &_LargeHandler::__handle; + return *ret; + } + + private: + static void __destroy(StaticAny& self) { + delete static_cast(self.s_.ptr_); + self.h_ = nullptr; + } + + static void __copy(const StaticAny& self, StaticAny& dest) { + _LargeHandler::__create(dest, *static_cast(self.s_.ptr_)); + } + + static void __move(StaticAny& self, StaticAny& dest) { + dest.s_.ptr_ = self.s_.ptr_; + dest.h_ = &_LargeHandler::__handle; + self.h_ = nullptr; + } + + static void* __get(StaticAny& self, traits::type_id_t info) { + if (__static_any_impl::__compare_typeid(info)) { + return static_cast(self.s_.ptr_); + } + return nullptr; + } + + static traits::type_id_t __type_info() { return traits::TypeId::value; } +}; + +} // namespace __static_any_impl + +template +StaticAny::StaticAny(ValueType&& v) : h_(nullptr) { + __static_any_impl::_Handler::__create(*this, std::forward(v)); +} + +template +StaticAny::StaticAny(std::in_place_type_t, Args&&... args) { + __static_any_impl::_Handler::__create(*this, std::forward(args)...); +} + +template +StaticAny::StaticAny(std::in_place_type_t, std::initializer_list il, Args&&... args) { + __static_any_impl::_Handler::__create(*this, il, std::forward(args)...); +} + +template +inline StaticAny& StaticAny::operator=(ValueType&& v) { + StaticAny(std::forward(v)).swap(*this); + return *this; +} + +template +inline T& StaticAny::emplace(Args&&... args) { + reset(); + return __static_any_impl::_Handler::__create(*this, std::forward(args)...); +} + +template +inline T& StaticAny::emplace(std::initializer_list il, Args&&... args) { + reset(); + return __static_any_impl::_Handler::_create(*this, il, std::forward(args)...); +} + +inline void StaticAny::swap(StaticAny& rhs) noexcept { + if (this == &rhs) { + return; + } + if (h_ && rhs.h_) { + StaticAny tmp; + rhs.__call(_Action::_Move, &tmp); + this->__call(_Action::_Move, &rhs); + tmp.__call(_Action::_Move, this); + } else if (h_) { + this->__call(_Action::_Move, &rhs); + } else if (rhs.h_) { + rhs.__call(_Action::_Move, this); + } +} + +inline void swap(StaticAny& lhs, StaticAny& rhs) noexcept { lhs.swap(rhs); } + +template +inline StaticAny make_static_any(Args&&... args) { + return StaticAny(std::in_place_type, std::forward(args)...); +} + +template +StaticAny make_static_any(std::initializer_list il, Args&&... args) { + return StaticAny(std::in_place_type, il, std::forward(args)...); +} + +template +ValueType static_any_cast(const StaticAny& v) { + using _RawValueType = std::remove_cv_t>; + static_assert(std::is_constructible::value, + "ValueType is required to be a const lvalue reference " + "or a CopyConstructible type"); + auto tmp = static_any_cast>(&v); + if (tmp == nullptr) { + ThrowBadAnyCast(); + } + return static_cast(*tmp); +} + +template +inline ValueType static_any_cast(StaticAny& v) { + using _RawValueType = std::remove_cv_t>; + static_assert(std::is_constructible::value, + "ValueType is required to be an lvalue reference " + "or a CopyConstructible type"); + auto tmp = static_any_cast<_RawValueType>(&v); + if (tmp == nullptr) { + ThrowBadAnyCast(); + } + return static_cast(*tmp); +} + +template +inline ValueType static_any_cast(StaticAny&& v) { + using _RawValueType = std::remove_cv_t>; + static_assert(std::is_constructible::value, + "ValueType is required to be an rvalue reference " + "or a CopyConstructible type"); + auto tmp = static_any_cast<_RawValueType>(&v); + if (tmp == nullptr) { + ThrowBadAnyCast(); + } + return static_cast(std::move(*tmp)); +} + +template +inline std::add_pointer_t> static_any_cast( + const StaticAny* __any) noexcept { + static_assert(!std::is_reference::value, "ValueType may not be a reference."); + return static_any_cast(const_cast(__any)); +} + +template +inline RetType __pointer_or_func_test(void* p, std::false_type) noexcept { + return static_cast(p); +} + +template +inline RetType __pointer_or_func_test(void*, std::true_type) noexcept { + return nullptr; +} + +template +std::add_pointer_t static_any_cast(StaticAny* any) noexcept { + using __static_any_impl::_Action; + static_assert(!std::is_reference::value, "ValueType may not be a reference."); + using ReturnType = std::add_pointer_t; + if (any && any->h_) { + void* p = any->__call(_Action::_Get, nullptr, traits::TypeId::value).ptr_; + return __pointer_or_func_test(p, std::is_function{}); + } + return nullptr; +} + +} // namespace mmdeploy + +#endif // MMDEPLOY_CSRC_CORE_MPL_STATIC_ANY_H_ diff --git a/csrc/core/value.h b/csrc/core/value.h index 4ee0119ddc..fe5ae559e9 100644 --- a/csrc/core/value.h +++ b/csrc/core/value.h @@ -3,7 +3,6 @@ #ifndef MMDEPLOY_TYPES_VALUE_H_ #define MMDEPLOY_TYPES_VALUE_H_ -#include #include #include #include @@ -16,6 +15,7 @@ #include "core/logger.h" #include "core/status_code.h" #include "mpl/priority_tag.h" +#include "mpl/static_any.h" #include "mpl/type_traits.h" namespace mmdeploy { @@ -169,6 +169,14 @@ struct is_cast_by_erasure : std::true_type {}; template <> struct is_cast_by_erasure : std::true_type {}; +MMDEPLOY_REGISTER_TYPE_ID(Device, 1); +MMDEPLOY_REGISTER_TYPE_ID(Buffer, 2); +MMDEPLOY_REGISTER_TYPE_ID(Stream, 3); +MMDEPLOY_REGISTER_TYPE_ID(Event, 4); +MMDEPLOY_REGISTER_TYPE_ID(Model, 5); +MMDEPLOY_REGISTER_TYPE_ID(Tensor, 6); +MMDEPLOY_REGISTER_TYPE_ID(Mat, 7); + template struct is_value : std::is_same {}; @@ -209,8 +217,8 @@ class Value { using Array = std::vector; using Object = std::map; using Pointer = std::shared_ptr; - using Dynamic = mmdeploy::Dynamic; - using Any = std::any; + using Dynamic = ::mmdeploy::Dynamic; + using Any = ::mmdeploy::StaticAny; using ValueRef = detail::ValueRef; static constexpr const auto kNull = ValueType::kNull; @@ -354,7 +362,7 @@ class Value { if constexpr (std::is_void_v) { return true; } else { - return typeid(T) == data_.any->type(); + return traits::TypeId::value == data_.any->type(); } } @@ -445,11 +453,11 @@ class Value { template T* get_erased_ptr(EraseType*) noexcept { - return _is_any() ? std::any_cast(data_.any) : nullptr; + return _is_any() ? static_any_cast(data_.any) : nullptr; } template const T* get_erased_ptr(const EraseType*) const noexcept { - return _is_any() ? std::any_cast(const_cast(data_.any)) : nullptr; + return _is_any() ? static_any_cast(const_cast(data_.any)) : nullptr; } template diff --git a/csrc/device/cuda/CMakeLists.txt b/csrc/device/cuda/CMakeLists.txt index 04f392d855..7fdddd5ed9 100644 --- a/csrc/device/cuda/CMakeLists.txt +++ b/csrc/device/cuda/CMakeLists.txt @@ -10,7 +10,6 @@ set(SRCS cuda_device.cpp cuda_builtin_kernels.cu) mmdeploy_add_module(${PROJECT_NAME} "${SRCS}") -target_include_directories(${PROJECT_NAME} PUBLIC ${CUDA_INCLUDE_DIRS}) -target_link_directories(${PROJECT_NAME} PUBLIC ${CUDA_TOOLKIT_ROOT_DIR}/lib64) +target_include_directories(${PROJECT_NAME} PRIVATE ${CUDA_INCLUDE_DIRS}) target_link_libraries(${PROJECT_NAME} PRIVATE cudart cuda) add_library(mmdeploy::device::cuda ALIAS ${PROJECT_NAME}) diff --git a/csrc/net/ort/CMakeLists.txt b/csrc/net/ort/CMakeLists.txt index b4b78eff47..5d4740db69 100644 --- a/csrc/net/ort/CMakeLists.txt +++ b/csrc/net/ort/CMakeLists.txt @@ -2,12 +2,14 @@ cmake_minimum_required(VERSION 3.14) project(mmdeploy_ort_net) +include(${CMAKE_SOURCE_DIR}/cmake/modules/FindONNXRUNTIME.cmake) + if ("cpu" IN_LIST MMDEPLOY_TARGET_DEVICES) include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake) mmdeploy_add_module(${PROJECT_NAME} ort_net.cpp) target_include_directories(${PROJECT_NAME} PRIVATE ${ONNXRUNTIME_DIR}/include) - target_link_directories(${PROJECT_NAME} PUBLIC ${ONNXRUNTIME_DIR}/lib) target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy_onnxruntime_ops_obj) + target_link_libraries(${PROJECT_NAME} PUBLIC onnxruntime) add_library(mmdeploy::ort_net ALIAS ${PROJECT_NAME}) else () message(ERROR "'ort_net' is NOT supported in target devices: ${MMDEPLOY_TARGET_DEVICES}") diff --git a/csrc/net/trt/CMakeLists.txt b/csrc/net/trt/CMakeLists.txt index 8c71bd46c1..9ceb49006e 100644 --- a/csrc/net/trt/CMakeLists.txt +++ b/csrc/net/trt/CMakeLists.txt @@ -11,8 +11,7 @@ target_include_directories(${PROJECT_NAME} PRIVATE ${TENSORRT_INCLUDE_DIR}) target_include_directories(${PROJECT_NAME} PRIVATE ${CUDNN_DIR}/include) target_include_directories(${PROJECT_NAME} PRIVATE ${CUDA_TOOLKIT_ROOT_DIR}/include) -target_link_directories(${PROJECT_NAME} PUBLIC ${CUDNN_DIR}/lib64 ${CUDNN_DIR}/lib/x64) target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy_tensorrt_ops_obj) -target_link_libraries(${PROJECT_NAME} PUBLIC ${TENSORRT_LIBRARY} cudnn) +target_link_libraries(${PROJECT_NAME} PUBLIC ${TENSORRT_LIBS} cudnn) add_library(mmdeploy::trt_net ALIAS ${PROJECT_NAME}) diff --git a/csrc/preprocess/cuda/CMakeLists.txt b/csrc/preprocess/cuda/CMakeLists.txt index 2ccf77638d..1c83cf309b 100644 --- a/csrc/preprocess/cuda/CMakeLists.txt +++ b/csrc/preprocess/cuda/CMakeLists.txt @@ -23,5 +23,5 @@ mmdeploy_add_module(${PROJECT_NAME} "${SRCS}") target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy::transform ${PPLCV_LIBRARIES}) target_include_directories(${PROJECT_NAME} - PUBLIC ${CUDA_TOOLKIT_ROOT_DIR}/include ${PPLCV_INCLUDE_DIRS}) + PRIVATE ${CUDA_TOOLKIT_ROOT_DIR}/include ${PPLCV_INCLUDE_DIRS}) add_library(mmdeploy::transform_impl::cuda ALIAS ${PROJECT_NAME}) diff --git a/demo/csrc/CMakeLists.txt b/demo/csrc/CMakeLists.txt index 3e1bdcc6fb..71d49f3199 100644 --- a/demo/csrc/CMakeLists.txt +++ b/demo/csrc/CMakeLists.txt @@ -20,4 +20,5 @@ add_example(image_classification) add_example(object_detection) add_example(image_restorer) add_example(image_segmentation) +add_example(pose_detection) add_example(ocr) diff --git a/demo/csrc/pose_detection.cpp b/demo/csrc/pose_detection.cpp new file mode 100644 index 0000000000..14fa9c7391 --- /dev/null +++ b/demo/csrc/pose_detection.cpp @@ -0,0 +1,50 @@ +#include +#include +#include +#include +#include + +#include "pose_detector.h" + +int main(int argc, char *argv[]) { + if (argc != 4) { + fprintf(stderr, "usage:\n pose_detection device_name model_path image_path\n"); + return 1; + } + auto device_name = argv[1]; + auto model_path = argv[2]; + auto image_path = argv[3]; + cv::Mat img = cv::imread(image_path); + if (!img.data) { + fprintf(stderr, "failed to load image: %s\n", image_path); + return 1; + } + + mm_handle_t pose_estimator{}; + int status{}; + status = mmdeploy_pose_detector_create_by_path(model_path, device_name, 0, &pose_estimator); + if (status != MM_SUCCESS) { + fprintf(stderr, "failed to create pose_estimator, code: %d\n", (int)status); + return 1; + } + + mm_mat_t mat{img.data, img.rows, img.cols, 3, MM_BGR, MM_INT8}; + + mm_pose_detect_t *res{}; + int *res_count{}; + status = mmdeploy_pose_detector_apply(pose_estimator, &mat, 1, &res, &res_count); + if (status != MM_SUCCESS) { + fprintf(stderr, "failed to apply pose estimator, code: %d\n", (int)status); + return 1; + } + + for (int i = 0; i < res->length; i++) { + cv::circle(img, {(int)res->point[i].x, (int)res->point[i].y}, 1, {0, 255, 0}, 2); + } + cv::imwrite("output_pose.png", img); + + mmdeploy_pose_detector_release_result(res, 1); + mmdeploy_pose_detector_destroy(pose_estimator); + + return 0; +} diff --git a/docs/en/backends/onnxruntime.md b/docs/en/backends/onnxruntime.md index a47569d802..181e655094 100644 --- a/docs/en/backends/onnxruntime.md +++ b/docs/en/backends/onnxruntime.md @@ -57,7 +57,6 @@ make -j$(nproc) | Operator | CPU | GPU | MMDeploy Releases | | :--------------------------------------------------------------------------- | :---: | :---: | :---------------- | -| [RoIAlign](../ops/onnxruntime.md#roialign) | Y | N | master | | [grid_sampler](../ops/onnxruntime.md#grid_sampler) | Y | N | master | | [MMCVModulatedDeformConv2d](../ops/onnxruntime.md#mmcvmodulateddeformconv2d) | Y | N | master | diff --git a/docs/en/backends/openvino.md b/docs/en/backends/openvino.md index 12a6686d36..a33d64d528 100644 --- a/docs/en/backends/openvino.md +++ b/docs/en/backends/openvino.md @@ -63,6 +63,28 @@ Notes: the RoiAlign operation is replaced with the [ExperimentalDetectronROIFeatureExtractor](https://docs.openvinotoolkit.org/latest/openvino_docs_ops_detection_ExperimentalDetectronROIFeatureExtractor_6.html) operation in the ONNX graph. - Models "VFNet" and "Faster R-CNN + DCN" use the custom "DeformableConv2D" operation. +### Deployment config + +With the deployment config, you can specify additional options for the Model Optimizer. +To do this, add the necessary parameters to the `backend_config.mo_options` in the fields `args` (for parameters with values) and `flags` (for flags). + +Example: +```python +backend_config = dict( + mo_options=dict( + args=dict({ + '--mean_values': [0, 0, 0], + '--scale_values': [255, 255, 255], + '--data_type': 'FP32', + }), + flags=['--disable_fusing'], + ) +) +``` + +Information about the possible parameters for the Model Optimizer can be found in the [documentation](https://docs.openvino.ai/latest/openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model.html). + + ### FAQs - None diff --git a/docs/en/backends/torchscript.md b/docs/en/backends/torchscript.md new file mode 100644 index 0000000000..30449444a1 --- /dev/null +++ b/docs/en/backends/torchscript.md @@ -0,0 +1,54 @@ +## TorchScript support + +### Introduction of TorchScript + +**TorchScript** a way to create serializable and optimizable models from PyTorch code. Any TorchScript program can be saved from a Python process and loaded in a process where there is no Python dependency. Check the [Introduction to TorchScript](https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html) for more details. + +### Build custom ops + +#### Prerequisite + +- Download libtorch from the official website [here](https://pytorch.org/get-started/locally/). + +*Please note that only **Pre-cxx11 ABI** and **version 1.8.1+** on Linux platform are supported by now.* + +For previous versions of libtorch, users can find through the [issue comment](https://github.com/pytorch/pytorch/issues/40961#issuecomment-1017317786). Libtorch1.8.1+cu111 as an example, extract it, expose `Torch_DIR` and add the lib path to `LD_LIBRARY_PATH` as below: + +```bash +wget https://download.pytorch.org/libtorch/cu111/libtorch-shared-with-deps-1.8.1%2Bcu111.zip + +unzip libtorch-shared-with-deps-1.8.1+cu111.zip +cd libtorch +export Torch_DIR=$(pwd) +export LD_LIBRARY_PATH=$Torch_DIR/lib:$LD_LIBRARY_PATH +``` + +Note: + +- If you want to save libtorch env variables to bashrc, you could run + + ```bash + echo '# set env for libtorch' >> ~/.bashrc + echo "export Torch_DIR=${Torch_DIR}" >> ~/.bashrc + echo 'export LD_LIBRARY_PATH=$Torch_DIR/lib:$LD_LIBRARY_PATH' >> ~/.bashrc + source ~/.bashrc + ``` + +#### Build on Linux + +```bash +cd ${MMDEPLOY_DIR} # To MMDeploy root directory +mkdir -p build && cd build +cmake -DMMDEPLOY_TARGET_BACKENDS=torchscript -DTorch_DIR=${Torch_DIR} .. +make -j$(nproc) +``` + +### How to convert a model + +- You could follow the instructions of tutorial [How to convert model](../tutorials/how_to_convert_model.md) + +### FAQs + +- Error: `projects/thirdparty/libtorch/share/cmake/Caffe2/Caffe2Config.cmake:96 (message):Your installed Caffe2 version uses cuDNN but I cannot find the cuDNN libraries. Please set the proper cuDNN prefixes and / or install cuDNN.` + + May export CUDNN_ROOT=/root/path/to/cudnn to resolve the build error. diff --git a/docs/en/benchmark.md b/docs/en/benchmark.md index 1ea09b5264..9e66f19b85 100644 --- a/docs/en/benchmark.md +++ b/docs/en/benchmark.md @@ -690,6 +690,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut MMCls PyTorch + TorchScript ONNX Runtime TensorRT PPLNN @@ -702,6 +703,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut Task Metrics fp32 + seresnet fp32 fp32 fp16 @@ -714,6 +716,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut Classification top-1 69.90 + 69.90 69.88 69.88 69.86 @@ -724,6 +727,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut top-5 89.43 + 89.43 89.34 89.34 89.33 @@ -737,6 +741,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut 77.90 77.90 77.90 + 77.90 - 77.78 77.89 @@ -747,6 +752,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut 93.66 93.66 93.66 + 93.66 - 93.64 93.65 @@ -758,6 +764,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut 77.74 77.74 77.74 + 77.74 77.75 77.63 77.73 @@ -768,6 +775,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut 93.84 93.84 93.84 + 93.84 93.83 93.72 93.84 @@ -780,6 +788,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut 68.13 68.13 68.13 + 68.13 67.71 68.11 $MMCLS_DIR/configs/shufflenet_v1/shufflenet_v1_1x_b64x16_linearlr_bn_nowd_imagenet.py @@ -790,6 +799,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut 87.81 87.81 87.81 + 87.81 87.58 87.80 @@ -800,6 +810,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut 69.55 69.55 69.55 + 69.55 69.54 69.10 69.54 @@ -810,6 +821,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut 88.92 88.92 88.92 + 88.92 88.91 88.58 88.92 @@ -821,6 +833,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut 71.86 71.86 71.86 + 71.86 71.87 70.91 71.84 @@ -831,6 +844,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut 90.42 90.42 90.42 + 90.42 90.40 89.85 90.41 @@ -848,6 +862,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut MMDet Pytorch + TorchScript ONNXRuntime TensorRT PPLNN @@ -864,6 +879,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut fp32 fp32 fp32 + fp32 fp16 int8 fp16 @@ -876,6 +892,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut COCO2017 box AP 33.7 + 33.7 - 33.5 33.5 @@ -890,6 +907,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut COCO2017 box AP 25.5 + 25.5 - 25.5 25.5 @@ -904,6 +922,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut COCO2017 box AP 36.5 + 36.4 - 36.4 36.4 @@ -919,6 +938,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut box AP 36.6 - + - 36.6 36.5 - @@ -932,6 +952,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut COCO2017 box AP 37.4 + 37.4 - 37.4 37.4 @@ -946,6 +967,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut COCO2017 box AP 40.5 + 40.3 - 40.3 40.3 @@ -960,6 +982,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut COCO2017 box AP 37.4 + 37.3 - 37.3 37.3 @@ -975,6 +998,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut box AP 39.4 - + - 39.4 39.4 - @@ -989,6 +1013,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut box AP 40.4 - + - 40.4 40.4 - @@ -1016,6 +1041,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut COCO2017 box AP 38.2 + 38.1 - 38.1 38.1 @@ -1027,6 +1053,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut mask AP 34.7 + 34.7 - 33.7 33.7 @@ -1047,6 +1074,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut MMEdit Pytorch + TorchScript ONNX Runtime TensorRT PPLNN @@ -1054,7 +1082,6 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut - Model Task @@ -1063,6 +1090,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut fp32 fp32 fp32 + fp32 fp16 int8 fp16 @@ -1074,6 +1102,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut Set5 PSNR 28.4316 + 28.4120 28.4323 28.4323 28.4286 @@ -1084,6 +1113,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut SSIM 0.8099 + 0.8106 0.8097 0.8097 0.8096 @@ -1096,6 +1126,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut Set5 PSNR 28.2700 + 28.2619 28.2592 28.2592 - @@ -1106,6 +1137,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut SSIM 0.7778 + 0.7784 0.7764 0.7774 - @@ -1118,6 +1150,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut Set5 PSNR 30.6428 + 30.6306 30.6444 30.6430 - @@ -1126,8 +1159,9 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut $MMEDIT_DIR/configs/restorers/esrgan/esrgan_psnr_x4c64b23g32_g1_1000k_div2k.py - + SSIM 0.8559 + 0.8565 0.8558 0.8558 - @@ -1140,16 +1174,18 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut Set5 PSNR 27.9499 + 27.9252 27.9408 27.9408 - - 27.9388 - $MMEDIT_DIR/configs/restorers/srresnet_srgan/srgan_x4c64b16_g1_1000k_div2k.pyy + $MMEDIT_DIR/configs/restorers/srresnet_srgan/srgan_x4c64b16_g1_1000k_div2k.py SSIM 0.7846 + 0.7851 0.7839 0.7839 - @@ -1162,6 +1198,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut Set5 PSNR 30.2252 + 30.2069 30.2300 30.2300 - @@ -1170,8 +1207,9 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut $MMEDIT_DIR/configs/restorers/srresnet_srgan/msrresnet_x4c64b16_g1_1000k_div2k.py - + SSIM 0.8491 + 0.8497 0.8488 0.8488 - @@ -1184,6 +1222,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut Set5 PSNR 28.0297 + - 27.7016 27.7016 - @@ -1194,6 +1233,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut SSIM 0.8236 + - 0.8122 0.8122 - @@ -1206,6 +1246,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut Set5 PSNR 30.2223 + 30.2192 30.2214 30.2214 30.2211 @@ -1216,6 +1257,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut SSIM 0.8500 + 0.8507 0.8497 0.8497 0.8497 @@ -1235,6 +1277,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut MMOCR Pytorch + TorchScript ONNXRuntime TensorRT PPLNN @@ -1251,6 +1294,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut fp32 fp32 fp32 + fp32 fp16 int8 fp16 @@ -1263,6 +1307,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut ICDAR2015 recall 0.7310 + 0.7308 0.7304 0.7198 0.7179 @@ -1275,6 +1320,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut precision 0.8714 0.8718 + 0.8714 0.8677 0.8674 0.8688 @@ -1285,6 +1331,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut hmean 0.7950 0.7949 + 0.7950 0.7868 0.7856 0.7821 @@ -1299,6 +1346,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut 0.8067 0.8067 0.8067 + 0.8067 0.8063 0.8067 0.8067 @@ -1311,6 +1359,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut IIIT5K acc 0.9517 + - 0.9287 - - @@ -1332,6 +1381,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut MMSeg Pytorch + TorchScript ONNXRuntime TensorRT PPLNN @@ -1346,6 +1396,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut fp32 fp32 fp32 + fp32 fp16 int8 fp16 @@ -1356,6 +1407,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut Cityscapes mIoU 72.25 + 72.36 - 72.36 72.35 @@ -1368,6 +1420,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut Cityscapes mIoU 78.55 + 78.66 - 78.26 78.24 @@ -1380,6 +1433,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut Cityscapes mIoU 79.09 + 79.12 - 79.12 79.12 @@ -1392,6 +1446,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut Cityscapes mIoU 79.61 + 79.60 - 79.60 79.60 @@ -1404,6 +1459,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut Cityscapes mIoU 70.96 + 70.96 - 70.93 70.92 @@ -1417,12 +1473,247 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut mIoU 69.10 - + - 69.10 69.10 68.95 - $MMSEG_DIR/configs/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py + + ANN + Cityscapes + mIoU + 77.40 + - + - + 77.32 + 77.32 + - + - + $MMSEG_DIR/configs/ann/ann_r50-d8_512x1024_40k_cityscapes.py + + + APCNet + Cityscapes + mIoU + 77.40 + - + - + 77.32 + 77.32 + - + - + $MMSEG_DIR/configs/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes.py + + + BiSeNetV1 + Cityscapes + mIoU + 74.44 + - + - + 74.44 + 74.43 + - + - + $MMSEG_DIR/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py + + + BiSeNetV2 + Cityscapes + mIoU + 73.21 + - + - + 73.21 + 73.21 + - + - + $MMSEG_DIR/configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py + + + CGNet + Cityscapes + mIoU + 68.25 + - + - + 68.27 + 68.27 + - + - + $MMSEG_DIR/configs/cgnet/cgnet_512x1024_60k_cityscapes.py + + + EMANet + Cityscapes + mIoU + 77.59 + - + - + 77.59 + 77.6 + - + - + $MMSEG_DIR/configs/emanet/emanet_r50-d8_512x1024_80k_cityscapes.py + + + EncNet + Cityscapes + mIoU + 75.67 + - + - + 75.66 + 75.66 + - + - + $MMSEG_DIR/configs/encnet/encnet_r50-d8_512x1024_40k_cityscapes.py + + + ERFNet + Cityscapes + mIoU + 71.08 + - + - + 71.08 + 71.07 + - + - + $MMSEG_DIR/configs/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes.py + + + FastFCN + Cityscapes + mIoU + 79.12 + - + - + 79.12 + 79.12 + - + - + $MMSEG_DIR/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes.py + + + GCNet + Cityscapes + mIoU + 77.69 + - + - + 77.69 + 77.69 + - + - + $MMSEG_DIR/configs/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py + + + ICNet + Cityscapes + mIoU + 76.29 + - + - + 76.36 + 76.36 + - + - + $MMSEG_DIR/configs/icnet/icnet_r18-d8_832x832_80k_cityscapes.py + + + ISANet + Cityscapes + mIoU + 78.49 + - + - + 78.49 + 78.49 + - + - + $MMSEG_DIR/configs/isanet/isanet_r50-d8_512x1024_40k_cityscapes.py + + + OCRNet + Cityscapes + mIoU + 74.30 + - + - + 73.66 + 73.67 + - + - + $MMSEG_DIR/configs/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes.py + + + PointRend + Cityscapes + mIoU + 76.47 + - + - + 76.41 + 76.42 + - + - + $MMSEG_DIR/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py + + + Semantic FPN + Cityscapes + mIoU + 74.52 + - + - + 74.52 + 74.52 + - + - + $MMSEG_DIR/configs/sem_fpn/fpn_r50_512x1024_80k_cityscapes.py + + + STDC + Cityscapes + mIoU + 75.10 + - + - + 75.10 + 75.10 + - + - + $MMSEG_DIR/configs/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes.py + + + STDC + Cityscapes + mIoU + 77.17 + - + - + 77.17 + 77.17 + - + - + $MMSEG_DIR/configs/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes.py + + + UPerNet + Cityscapes + mIoU + 77.10 + - + - + 77.19 + 77.18 + - + - + $MMSEG_DIR/configs/upernet/upernet_r50_512x1024_40k_cityscapes.py + diff --git a/docs/en/build/android.md b/docs/en/build/android.md index 40fa4ac71b..c5bbb3eb17 100644 --- a/docs/en/build/android.md +++ b/docs/en/build/android.md @@ -198,6 +198,7 @@ mkdir -p build && cd build cmake .. \ -DOpenCV_DIR=${OPENCV_ANDROID_SDK_DIR}/sdk/native/jni/abi-arm64-v8a \ -Dspdlog_DIR=${SPDLOG_DIR}/lib/cmake/spdlog \ + -Dncnn_DIR=${NCNN_DIR}/build/install/lib/cmake/ncnn \ -DMMDeploy_DIR=${MMDEPLOY_DIR}/build/install/lib/cmake/MMDeploy \ -DCMAKE_TOOLCHAIN_FILE=${NDK_PATH}/build/cmake/android.toolchain.cmake \ -DANDROID_ABI=arm64-v8a \ diff --git a/docs/en/build/windows.md b/docs/en/build/windows.md index 253247ec80..f84fa10239 100644 --- a/docs/en/build/windows.md +++ b/docs/en/build/windows.md @@ -294,6 +294,7 @@ mkdir build -ErrorAction SilentlyContinue cd build cmake .. -G "Visual Studio 16 2019" -A x64 -T v142 -DMMDEPLOY_TARGET_BACKENDS="ort" -DONNXRUNTIME_DIR="$env:ONNXRUNTIME_DIR" cmake --build . --config Release -- /m +``` - **TensorRT** Custom Ops @@ -302,6 +303,7 @@ mkdir build -ErrorAction SilentlyContinue cd build cmake .. -G "Visual Studio 16 2019" -A x64 -T v142 -DMMDEPLOY_TARGET_BACKENDS="trt" -DTENSORRT_DIR="$env:TENSORRT_DIR" -DCUDNN_DIR="$env:CUDNN_DIR" cmake --build . --config Release -- /m +``` - **ncnn** Custom Ops diff --git a/docs/en/codebases/mmdet3d.md b/docs/en/codebases/mmdet3d.md new file mode 100644 index 0000000000..fdf1d4f5bc --- /dev/null +++ b/docs/en/codebases/mmdet3d.md @@ -0,0 +1,43 @@ +## MMDetection3d Support + +MMDetection3d is a next-generation platform for general 3D object detection. It is a part of the [OpenMMLab](https://openmmlab.com/) project. + +### MMDetection3d installation tutorial + +Please refer to [getting_started.md](https://github.com/open-mmlab/mmdetection3d/blob/master/docs/en/getting_started.md) for installation. + +### Example + +```bash +python tools/deploy.py \ + configs/mmdet3d/voxel-detection/voxel-detection_tensorrt_dynamic.py \ + ${MMDET3D_DIR}/configs/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class.py \ + checkpoints/point_pillars.pth \ + ${MMDET3D_DIR}/demo/data/kitti/kitti_000008.bin \ + --work-dir \ + work_dir \ + --show \ + --device \ + cuda:0 +``` +### List of MMDetection3d models supported by MMDeploy + +| Model | Task | OnnxRuntime | TensorRT | NCNN | PPLNN | OpenVINO | Model config | +| :----------------: | :------------------: | :---------: | :------: | :---: | :---: | :------: | :------------------------------------------------------------------------------------------------------: | +| PointPillars | VoxelDetection | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmdetection3d/blob/master/configs/pointpillars) | + +### Reminder + +Voxel detection onnx model excludes model.voxelize layer and model post process, and you can use python api to call these func. + +Example: + +```python +from mmdeploy.codebase.mmdet3d.deploy import VoxelDetectionModel +VoxelDetectionModel.voxelize(...) +VoxelDetectionModel.post_process(...) +``` + +### FAQs + +None diff --git a/docs/en/codebases/mmseg.md b/docs/en/codebases/mmseg.md index 30bbba2082..8cb30994b0 100644 --- a/docs/en/codebases/mmseg.md +++ b/docs/en/codebases/mmseg.md @@ -15,7 +15,34 @@ Please refer to [get_started.md](https://github.com/open-mmlab/mmsegmentation/bl | DeepLabV3 | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3) | | DeepLabV3+ | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3plus) | | Fast-SCNN[*](#static_shape) | Y | Y | N | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fastscnn) | -| UNet[*](#static_shape) | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/unet) | +| UNet | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/unet) | +| ANN[*](#static_shape) | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ann) | +| APCNet | Y | Y | Y | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/apcnet) | +| BiSeNetV1 | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/bisenetv1) | +| BiSeNetV2 | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/bisenetv2) | +| CGNet | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/cgnet) | +| DMNet | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/dmnet) | +| DNLNet | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/dnlnet) | +| EMANet | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/emanet) | +| EncNet | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/encnet) | +| ERFNet | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/erfnet) | +| FastFCN | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fastfcn) | +| GCNet | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/gcnet) | +| ICNet[*](#static_shape) | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/icnet) | +| ISANet | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/isanet) | +| NonLocal Net | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/nonlocal_net) | +| OCRNet | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ocrnet) | +| PointRend | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/point_rend) | +| Semantic FPN | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/sem_fpn) | +| STDC | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/stdc) | +| UPerNet[*](#static_shape) | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/upernet) | +| DANet | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/danet) | +| Segmenter[*](#static_shape) | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/segmenter) | +| SegFormer[*](#static_shape) | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/segformer) | +| SETR | Y | N | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/setr) | +| CCNet | N | N | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ccnet) | +| PSANet | N | N | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/psanet) | +| DPT | N | N | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/dpt) | ### Reminder diff --git a/docs/en/faq.md b/docs/en/faq.md index 899561a34a..b39042b437 100644 --- a/docs/en/faq.md +++ b/docs/en/faq.md @@ -33,3 +33,17 @@ - Error: `libtorch/share/cmake/Caffe2/Caffe2Config.cmake:96 (message):Your installed Caffe2 version uses cuDNN but I cannot find the cuDNN libraries. Please set the proper cuDNN prefixes and / or install cuDNN.` May `export CUDNN_ROOT=/root/path/to/cudnn` to resolve the build error. + + +### Windows +- Error: similar like this `OSError: [WinError 1455] The paging file is too small for this operation to complete. Error loading "C:\Users\cx\miniconda3\lib\site-packages\torch\lib\cudnn_cnn_infer64_8.dll" or one of its dependencies` + + Solution: according to this [post](https://stackoverflow.com/questions/64837376/how-to-efficiently-run-multiple-pytorch-processes-models-at-once-traceback), the issue may be caused by NVidia and will fix in *CUDA release 11.7*. For now one could use the [fixNvPe.py](https://gist.github.com/cobryan05/7d1fe28dd370e110a372c4d268dcb2e5) script to modify the nvidia dlls in the pytorch lib dir. + + `python fixNvPe.py --input=C:\Users\user\AppData\Local\Programs\Python\Python38\lib\site-packages\torch\lib\*.dll` + + You can find your pytorch installation path with: + ``` python + import torch + print(torch.__file__) + ``` diff --git a/docs/en/index.rst b/docs/en/index.rst index bc7e10d91f..717011adb0 100644 --- a/docs/en/index.rst +++ b/docs/en/index.rst @@ -46,6 +46,7 @@ You can switch between Chinese and English documents in the lower-left corner of backends/openvino.md backends/ncnn.md backends/pplnn.md + backends/torchscript.md .. toctree:: :maxdepth: 1 diff --git a/docs/en/ops/onnxruntime.md b/docs/en/ops/onnxruntime.md index 2e4d741e0d..51791ebc9f 100644 --- a/docs/en/ops/onnxruntime.md +++ b/docs/en/ops/onnxruntime.md @@ -3,64 +3,21 @@ - [ONNX Runtime Ops](#onnx-runtime-ops) - - [RoIAlign](#roialign) + - [grid_sampler](#grid_sampler) - [Description](#description) - [Parameters](#parameters) - [Inputs](#inputs) - [Outputs](#outputs) - [Type Constraints](#type-constraints) - - [grid_sampler](#grid_sampler) + - [MMCVModulatedDeformConv2d](#mmcvmodulateddeformconv2d) - [Description](#description-1) - [Parameters](#parameters-1) - [Inputs](#inputs-1) - [Outputs](#outputs-1) - [Type Constraints](#type-constraints-1) - - [MMCVModulatedDeformConv2d](#mmcvmodulateddeformconv2d) - - [Description](#description-2) - - [Parameters](#parameters-2) - - [Inputs](#inputs-2) - - [Outputs](#outputs-2) - - [Type Constraints](#type-constraints-2) -### RoIAlign - -#### Description - -Perform RoIAlign on output feature, used in bbox_head of most two-stage detectors. - -#### Parameters - -| Type | Parameter | Description | -| ------- | ---------------- | ------------------------------------------------------------------------------------------------------------- | -| `int` | `output_height` | height of output roi | -| `int` | `output_width` | width of output roi | -| `float` | `spatial_scale` | used to scale the input boxes | -| `int` | `sampling_ratio` | number of input samples to take for each output sample. `0` means to take samples densely for current models. | -| `str` | `mode` | pooling mode in each bin. `avg` or `max` | -| `int` | `aligned` | If `aligned=0`, use the legacy implementation in MMDetection. Else, align the results more perfectly. | - -#### Inputs - -
-
input: T
-
Input feature map; 4D tensor of shape (N, C, H, W), where N is the batch size, C is the numbers of channels, H and W are the height and width of the data.
-
rois: T
-
RoIs (Regions of Interest) to pool over; 2-D tensor of shape (num_rois, 5) given as [[batch_index, x1, y1, x2, y2], ...]. The RoIs' coordinates are the coordinate system of input.
-
- -#### Outputs - -
-
feat: T
-
RoI pooled output, 4-D tensor of shape (num_rois, C, output_height, output_width). The r-th batch element feat[r-1] is a pooled feature map corresponding to the r-th RoI RoIs[r-1].
-
- -#### Type Constraints - -- T:tensor(float32) - ### grid_sampler #### Description diff --git a/docs/en/supported_models.md b/docs/en/supported_models.md index 0672e23df7..06982b829c 100644 --- a/docs/en/supported_models.md +++ b/docs/en/supported_models.md @@ -2,47 +2,70 @@ The table below lists the models that are guaranteed to be exportable to other backends. -| Model | Codebase | OnnxRuntime | TensorRT | NCNN | PPLNN | OpenVINO | Model config | -| :------------------------ | :--------------- | :---------: | :------: | :---: | :---: | :------: | :--------------------------------------------------------------------------------------------: | -| RetinaNet | MMDetection | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet) | -| Faster R-CNN | MMDetection | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn) | -| YOLOv3 | MMDetection | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo) | -| YOLOX | MMDetection | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox) | -| FCOS | MMDetection | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos) | -| FSAF | MMDetection | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf) | -| Mask R-CNN | MMDetection | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn) | -| SSD[*](#note) | MMDetection | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd) | -| FoveaBox | MMDetection | Y | N | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox) | -| ATSS | MMDetection | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/atss) | -| GFL | MMDetection | Y | Y | N | ? | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl) | -| Cascade R-CNN | MMDetection | Y | Y | N | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn) | -| Cascade Mask R-CNN | MMDetection | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn) | -| VFNet | MMDetection | N | N | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/vfnet) | -| ResNet | MMClassification | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnet) | -| ResNeXt | MMClassification | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnext) | -| SE-ResNet | MMClassification | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/seresnet) | -| MobileNetV2 | MMClassification | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/mobilenet_v2) | -| ShuffleNetV1 | MMClassification | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v1) | -| ShuffleNetV2 | MMClassification | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v2) | -| FCN | MMSegmentation | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fcn) | -| PSPNet[*static](#note) | MMSegmentation | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/pspnet) | -| DeepLabV3 | MMSegmentation | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3) | -| DeepLabV3+ | MMSegmentation | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3plus) | -| Fast-SCNN[*static](#note) | MMSegmentation | Y | Y | N | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fastscnn) | -| UNet[*static](#note) | MMSegmentation | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/unet) | -| SRCNN | MMEditing | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srcnn) | -| ESRGAN | MMEditing | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/esrgan) | -| SRGAN | MMEditing | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srresnet_srgan) | -| SRResNet | MMEditing | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srresnet_srgan) | -| Real-ESRGAN | MMEditing | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/real_esrgan) | -| EDSR | MMEditing | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/edsr) | -| RDN | MMEditing | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/rdn) | -| DBNet | MMOCR | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textdet/dbnet) | -| CRNN | MMOCR | Y | Y | Y | Y | N | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/crnn) | -| SAR | MMOCR | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/sar) | -| HRNet | MMPose | Y | Y | Y | N | Y | [config](https://mmpose.readthedocs.io/en/latest/papers/backbones.html#hrnet-cvpr-2019) | -| MSPN | MMPose | Y | Y | Y | N | Y | [config](https://mmpose.readthedocs.io/en/latest/papers/backbones.html#mspn-arxiv-2019) | -| LiteHRNet | MMPose | Y | Y | N | N | Y | [config](https://mmpose.readthedocs.io/en/latest/papers/backbones.html#litehrnet-cvpr-2021) | +| Model | Codebase | TorchScript | OnnxRuntime | TensorRT | NCNN | PPLNN | OpenVINO | Model config | +| :------------------------ | :--------------- | :---------: | :---------: | :------: | :---: | :---: | :------: | :--------------------------------------------------------------------------------------------: | +| RetinaNet | MMDetection | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet) | +| Faster R-CNN | MMDetection | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn) | +| YOLOv3 | MMDetection | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo) | +| YOLOX | MMDetection | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox) | +| FCOS | MMDetection | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos) | +| FSAF | MMDetection | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf) | +| Mask R-CNN | MMDetection | Y | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn) | +| SSD[*](#note) | MMDetection | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd) | +| FoveaBox | MMDetection | Y | Y | N | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox) | +| ATSS | MMDetection | N | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/atss) | +| GFL | MMDetection | N | Y | Y | N | ? | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl) | +| Cascade R-CNN | MMDetection | N | Y | Y | N | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn) | +| Cascade Mask R-CNN | MMDetection | N | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn) | +| VFNet | MMDetection | N | N | N | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/vfnet) | +| ResNet | MMClassification | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnet) | +| ResNeXt | MMClassification | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnext) | +| SE-ResNet | MMClassification | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/seresnet) | +| MobileNetV2 | MMClassification | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/mobilenet_v2) | +| ShuffleNetV1 | MMClassification | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v1) | +| ShuffleNetV2 | MMClassification | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v2) | +| FCN | MMSegmentation | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fcn) | +| PSPNet[*static](#note) | MMSegmentation | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/pspnet) | +| DeepLabV3 | MMSegmentation | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3) | +| DeepLabV3+ | MMSegmentation | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3plus) | +| Fast-SCNN[*static](#note) | MMSegmentation | Y | Y | Y | N | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fastscnn) | +| UNet | MMSegmentation | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/unet) | +| ANN[*](#note) | MMSegmentation | ? | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ann) | +| APCNet | MMSegmentation | ? | Y | Y | Y | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/apcnet) | +| BiSeNetV1 | MMSegmentation | ? | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/bisenetv1) | +| BiSeNetV2 | MMSegmentation | ? | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/bisenetv2) | +| CGNet | MMSegmentation | ? | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/cgnet) | +| DMNet | MMSegmentation | ? | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/dmnet) | +| DNLNet | MMSegmentation | ? | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/dnlnet) | +| EMANet | MMSegmentation | ? | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/emanet) | +| EncNet | MMSegmentation | ? | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/encnet) | +| ERFNet | MMSegmentation | ? | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/erfnet) | +| FastFCN | MMSegmentation | ? | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fastfcn) | +| GCNet | MMSegmentation | ? | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/gcnet) | +| ICNet[*](#note) | MMSegmentation | ? | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/icnet) | +| ISANet | MMSegmentation | ? | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/isanet) | +| NonLocal Net | MMSegmentation | ? | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/nonlocal_net) | +| OCRNet | MMSegmentation | ? | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ocrnet) | +| PointRend | MMSegmentation | ? | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/point_rend) | +| Semantic FPN | MMSegmentation | ? | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/sem_fpn) | +| STDC | MMSegmentation | ? | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/stdc) | +| UPerNet[*](#note) | MMSegmentation | ? | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/upernet) | +| DANet | MMSegmentation | ? | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/danet) | +| SRCNN | MMEditing | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srcnn) | +| ESRGAN | MMEditing | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/esrgan) | +| SRGAN | MMEditing | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srresnet_srgan) | +| SRResNet | MMEditing | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srresnet_srgan) | +| Real-ESRGAN | MMEditing | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/real_esrgan) | +| EDSR | MMEditing | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/edsr) | +| RDN | MMEditing | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/rdn) | +| DBNet | MMOCR | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textdet/dbnet) | +| CRNN | MMOCR | Y | Y | Y | Y | Y | N | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/crnn) | +| SAR | MMOCR | N | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/sar) | +| HRNet | MMPose | N | Y | Y | Y | N | Y | [config](https://mmpose.readthedocs.io/en/latest/papers/backbones.html#hrnet-cvpr-2019) | +| MSPN | MMPose | N | Y | Y | Y | N | Y | [config](https://mmpose.readthedocs.io/en/latest/papers/backbones.html#mspn-arxiv-2019) | +| LiteHRNet | MMPose | N | Y | Y | N | N | Y | [config](https://mmpose.readthedocs.io/en/latest/papers/backbones.html#litehrnet-cvpr-2021) | +| PointPillars | MMDetection3d | ? | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmdetection3d/blob/master/configs/pointpillars) | +| CenterPoint (pillar) | MMDetection3d | ? | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmdetection3d/blob/master/configs/centerpoint) | ### Note diff --git a/docs/en/tutorials/how_to_support_new_backends.md b/docs/en/tutorials/how_to_support_new_backends.md index 85319b8d91..c18cd86148 100644 --- a/docs/en/tutorials/how_to_support_new_backends.md +++ b/docs/en/tutorials/how_to_support_new_backends.md @@ -218,6 +218,7 @@ Although the backend engines are usually implemented in C/C++, it is convenient def _build_wrapper(backend: Backend, backend_files: Sequence[str], device: str, + input_names: Optional[Sequence[str]] = None, output_names: Optional[Sequence[str]] = None): if backend == Backend.ONNXRUNTIME: from mmdeploy.backend.onnxruntime import ORTWrapper diff --git a/docs/zh_cn/benchmark.md b/docs/zh_cn/benchmark.md index 98d3f4f873..3225c44fd8 100644 --- a/docs/zh_cn/benchmark.md +++ b/docs/zh_cn/benchmark.md @@ -682,6 +682,7 @@ GPU: ncnn, TensorRT, PPLNN 用户可以直接通过[如何测试性能](tutorials/how_to_evaluate_a_model.md)获得想要的性能测试结果。下面是我们环境中的测试结果: +
MMCls
@@ -691,6 +692,7 @@ GPU: ncnn, TensorRT, PPLNN MMCls PyTorch + TorchScript ONNX Runtime TensorRT PPLNN @@ -703,6 +705,7 @@ GPU: ncnn, TensorRT, PPLNN Task Metrics fp32 + seresnet fp32 fp32 fp16 @@ -715,6 +718,7 @@ GPU: ncnn, TensorRT, PPLNN Classification top-1 69.90 + 69.90 69.88 69.88 69.86 @@ -725,6 +729,7 @@ GPU: ncnn, TensorRT, PPLNN top-5 89.43 + 89.43 89.34 89.34 89.33 @@ -738,6 +743,7 @@ GPU: ncnn, TensorRT, PPLNN 77.90 77.90 77.90 + 77.90 - 77.78 77.89 @@ -748,6 +754,7 @@ GPU: ncnn, TensorRT, PPLNN 93.66 93.66 93.66 + 93.66 - 93.64 93.65 @@ -759,6 +766,7 @@ GPU: ncnn, TensorRT, PPLNN 77.74 77.74 77.74 + 77.74 77.75 77.63 77.73 @@ -769,6 +777,7 @@ GPU: ncnn, TensorRT, PPLNN 93.84 93.84 93.84 + 93.84 93.83 93.72 93.84 @@ -781,6 +790,7 @@ GPU: ncnn, TensorRT, PPLNN 68.13 68.13 68.13 + 68.13 67.71 68.11 $MMCLS_DIR/configs/shufflenet_v1/shufflenet_v1_1x_b64x16_linearlr_bn_nowd_imagenet.py @@ -791,6 +801,7 @@ GPU: ncnn, TensorRT, PPLNN 87.81 87.81 87.81 + 87.81 87.58 87.80 @@ -801,6 +812,7 @@ GPU: ncnn, TensorRT, PPLNN 69.55 69.55 69.55 + 69.55 69.54 69.10 69.54 @@ -811,6 +823,7 @@ GPU: ncnn, TensorRT, PPLNN 88.92 88.92 88.92 + 88.92 88.91 88.58 88.92 @@ -822,6 +835,7 @@ GPU: ncnn, TensorRT, PPLNN 71.86 71.86 71.86 + 71.86 71.87 70.91 71.84 @@ -832,6 +846,7 @@ GPU: ncnn, TensorRT, PPLNN 90.42 90.42 90.42 + 90.42 90.40 89.85 90.41 @@ -849,6 +864,7 @@ GPU: ncnn, TensorRT, PPLNN MMDet Pytorch + TorchScript ONNXRuntime TensorRT PPLNN @@ -865,6 +881,7 @@ GPU: ncnn, TensorRT, PPLNN fp32 fp32 fp32 + fp32 fp16 int8 fp16 @@ -877,6 +894,7 @@ GPU: ncnn, TensorRT, PPLNN COCO2017 box AP 33.7 + 33.7 - 33.5 33.5 @@ -891,6 +909,7 @@ GPU: ncnn, TensorRT, PPLNN COCO2017 box AP 25.5 + 25.5 - 25.5 25.5 @@ -905,6 +924,7 @@ GPU: ncnn, TensorRT, PPLNN COCO2017 box AP 36.5 + 36.4 - 36.4 36.4 @@ -920,6 +940,7 @@ GPU: ncnn, TensorRT, PPLNN box AP 36.6 - + - 36.6 36.5 - @@ -933,6 +954,7 @@ GPU: ncnn, TensorRT, PPLNN COCO2017 box AP 37.4 + 37.4 - 37.4 37.4 @@ -947,6 +969,7 @@ GPU: ncnn, TensorRT, PPLNN COCO2017 box AP 40.5 + 40.3 - 40.3 40.3 @@ -961,6 +984,7 @@ GPU: ncnn, TensorRT, PPLNN COCO2017 box AP 37.4 + 37.3 - 37.3 37.3 @@ -976,6 +1000,7 @@ GPU: ncnn, TensorRT, PPLNN box AP 39.4 - + - 39.4 39.4 - @@ -990,6 +1015,7 @@ GPU: ncnn, TensorRT, PPLNN box AP 40.4 - + - 40.4 40.4 - @@ -1003,6 +1029,7 @@ GPU: ncnn, TensorRT, PPLNN COCO2017 box AP 38.2 + 38.1 - 38.1 38.1 @@ -1014,6 +1041,7 @@ GPU: ncnn, TensorRT, PPLNN mask AP 34.7 + 34.7 - 33.7 33.7 @@ -1034,6 +1062,7 @@ GPU: ncnn, TensorRT, PPLNN MMEdit Pytorch + TorchScript ONNX Runtime TensorRT PPLNN @@ -1041,7 +1070,6 @@ GPU: ncnn, TensorRT, PPLNN - Model Task @@ -1050,6 +1078,7 @@ GPU: ncnn, TensorRT, PPLNN fp32 fp32 fp32 + fp32 fp16 int8 fp16 @@ -1061,6 +1090,7 @@ GPU: ncnn, TensorRT, PPLNN Set5 PSNR 28.4316 + 28.4120 28.4323 28.4323 28.4286 @@ -1071,6 +1101,7 @@ GPU: ncnn, TensorRT, PPLNN SSIM 0.8099 + 0.8106 0.8097 0.8097 0.8096 @@ -1083,6 +1114,7 @@ GPU: ncnn, TensorRT, PPLNN Set5 PSNR 28.2700 + 28.2619 28.2592 28.2592 - @@ -1093,6 +1125,7 @@ GPU: ncnn, TensorRT, PPLNN SSIM 0.7778 + 0.7784 0.7764 0.7774 - @@ -1105,6 +1138,7 @@ GPU: ncnn, TensorRT, PPLNN Set5 PSNR 30.6428 + 30.6306 30.6444 30.6430 - @@ -1113,8 +1147,9 @@ GPU: ncnn, TensorRT, PPLNN $MMEDIT_DIR/configs/restorers/esrgan/esrgan_psnr_x4c64b23g32_g1_1000k_div2k.py - + SSIM 0.8559 + 0.8565 0.8558 0.8558 - @@ -1127,16 +1162,18 @@ GPU: ncnn, TensorRT, PPLNN Set5 PSNR 27.9499 + 27.9252 27.9408 27.9408 - - 27.9388 - $MMEDIT_DIR/configs/restorers/srresnet_srgan/srgan_x4c64b16_g1_1000k_div2k.pyy + $MMEDIT_DIR/configs/restorers/srresnet_srgan/srgan_x4c64b16_g1_1000k_div2k.py SSIM 0.7846 + 0.7851 0.7839 0.7839 - @@ -1149,6 +1186,7 @@ GPU: ncnn, TensorRT, PPLNN Set5 PSNR 30.2252 + 30.2069 30.2300 30.2300 - @@ -1157,8 +1195,9 @@ GPU: ncnn, TensorRT, PPLNN $MMEDIT_DIR/configs/restorers/srresnet_srgan/msrresnet_x4c64b16_g1_1000k_div2k.py - + SSIM 0.8491 + 0.8497 0.8488 0.8488 - @@ -1171,6 +1210,7 @@ GPU: ncnn, TensorRT, PPLNN Set5 PSNR 28.0297 + - 27.7016 27.7016 - @@ -1181,6 +1221,7 @@ GPU: ncnn, TensorRT, PPLNN SSIM 0.8236 + - 0.8122 0.8122 - @@ -1193,6 +1234,7 @@ GPU: ncnn, TensorRT, PPLNN Set5 PSNR 30.2223 + 30.2192 30.2214 30.2214 30.2211 @@ -1203,6 +1245,7 @@ GPU: ncnn, TensorRT, PPLNN SSIM 0.8500 + 0.8507 0.8497 0.8497 0.8497 @@ -1222,6 +1265,7 @@ GPU: ncnn, TensorRT, PPLNN MMOCR Pytorch + TorchScript ONNXRuntime TensorRT PPLNN @@ -1238,6 +1282,7 @@ GPU: ncnn, TensorRT, PPLNN fp32 fp32 fp32 + fp32 fp16 int8 fp16 @@ -1250,6 +1295,7 @@ GPU: ncnn, TensorRT, PPLNN ICDAR2015 recall 0.7310 + 0.7308 0.7304 0.7198 0.7179 @@ -1262,6 +1308,7 @@ GPU: ncnn, TensorRT, PPLNN precision 0.8714 0.8718 + 0.8714 0.8677 0.8674 0.8688 @@ -1272,6 +1319,7 @@ GPU: ncnn, TensorRT, PPLNN hmean 0.7950 0.7949 + 0.7950 0.7868 0.7856 0.7821 @@ -1286,6 +1334,7 @@ GPU: ncnn, TensorRT, PPLNN 0.8067 0.8067 0.8067 + 0.8067 0.8063 0.8067 0.8067 @@ -1298,6 +1347,7 @@ GPU: ncnn, TensorRT, PPLNN IIIT5K acc 0.9517 + - 0.9287 - - @@ -1319,6 +1369,7 @@ GPU: ncnn, TensorRT, PPLNN MMSeg Pytorch + TorchScript ONNXRuntime TensorRT PPLNN @@ -1333,6 +1384,7 @@ GPU: ncnn, TensorRT, PPLNN fp32 fp32 fp32 + fp32 fp16 int8 fp16 @@ -1343,6 +1395,7 @@ GPU: ncnn, TensorRT, PPLNN Cityscapes mIoU 72.25 + 72.36 - 72.36 72.35 @@ -1355,6 +1408,7 @@ GPU: ncnn, TensorRT, PPLNN Cityscapes mIoU 78.55 + 78.66 - 78.26 78.24 @@ -1367,6 +1421,7 @@ GPU: ncnn, TensorRT, PPLNN Cityscapes mIoU 79.09 + 79.12 - 79.12 79.12 @@ -1379,6 +1434,7 @@ GPU: ncnn, TensorRT, PPLNN Cityscapes mIoU 79.61 + 79.60 - 79.60 79.60 @@ -1391,6 +1447,7 @@ GPU: ncnn, TensorRT, PPLNN Cityscapes mIoU 70.96 + 70.96 - 70.93 70.92 @@ -1404,12 +1461,247 @@ GPU: ncnn, TensorRT, PPLNN mIoU 69.10 - + - 69.10 69.10 68.95 - $MMSEG_DIR/configs/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py + + ANN + Cityscapes + mIoU + 77.40 + - + - + 77.32 + 77.32 + - + - + $MMSEG_DIR/configs/ann/ann_r50-d8_512x1024_40k_cityscapes.py + + + APCNet + Cityscapes + mIoU + 77.40 + - + - + 77.32 + 77.32 + - + - + $MMSEG_DIR/configs/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes.py + + + BiSeNetV1 + Cityscapes + mIoU + 74.44 + - + - + 74.44 + 74.43 + - + - + $MMSEG_DIR/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py + + + BiSeNetV2 + Cityscapes + mIoU + 73.21 + - + - + 73.21 + 73.21 + - + - + $MMSEG_DIR/configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py + + + CGNet + Cityscapes + mIoU + 68.25 + - + - + 68.27 + 68.27 + - + - + $MMSEG_DIR/configs/cgnet/cgnet_512x1024_60k_cityscapes.py + + + EMANet + Cityscapes + mIoU + 77.59 + - + - + 77.59 + 77.6 + - + - + $MMSEG_DIR/configs/emanet/emanet_r50-d8_512x1024_80k_cityscapes.py + + + EncNet + Cityscapes + mIoU + 75.67 + - + - + 75.66 + 75.66 + - + - + $MMSEG_DIR/configs/encnet/encnet_r50-d8_512x1024_40k_cityscapes.py + + + ERFNet + Cityscapes + mIoU + 71.08 + - + - + 71.08 + 71.07 + - + - + $MMSEG_DIR/configs/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes.py + + + FastFCN + Cityscapes + mIoU + 79.12 + - + - + 79.12 + 79.12 + - + - + $MMSEG_DIR/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes.py + + + GCNet + Cityscapes + mIoU + 77.69 + - + - + 77.69 + 77.69 + - + - + $MMSEG_DIR/configs/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py + + + ICNet + Cityscapes + mIoU + 76.29 + - + - + 76.36 + 76.36 + - + - + $MMSEG_DIR/configs/icnet/icnet_r18-d8_832x832_80k_cityscapes.py + + + ISANet + Cityscapes + mIoU + 78.49 + - + - + 78.49 + 78.49 + - + - + $MMSEG_DIR/configs/isanet/isanet_r50-d8_512x1024_40k_cityscapes.py + + + OCRNet + Cityscapes + mIoU + 74.30 + - + - + 73.66 + 73.67 + - + - + $MMSEG_DIR/configs/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes.py + + + PointRend + Cityscapes + mIoU + 76.47 + - + - + 76.41 + 76.42 + - + - + $MMSEG_DIR/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py + + + Semantic FPN + Cityscapes + mIoU + 74.52 + - + - + 74.52 + 74.52 + - + - + $MMSEG_DIR/configs/sem_fpn/fpn_r50_512x1024_80k_cityscapes.py + + + STDC + Cityscapes + mIoU + 75.10 + - + - + 75.10 + 75.10 + - + - + $MMSEG_DIR/configs/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes.py + + + STDC + Cityscapes + mIoU + 77.17 + - + - + 77.17 + 77.17 + - + - + $MMSEG_DIR/configs/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes.py + + + UPerNet + Cityscapes + mIoU + 77.10 + - + - + 77.19 + 77.18 + - + - + $MMSEG_DIR/configs/upernet/upernet_r50_512x1024_40k_cityscapes.py +
diff --git a/docs/zh_cn/build/android.md b/docs/zh_cn/build/android.md index bbf126e60c..9bbe67ea3b 100644 --- a/docs/zh_cn/build/android.md +++ b/docs/zh_cn/build/android.md @@ -193,6 +193,7 @@ mkdir -p build && cd build cmake .. \ -DOpenCV_DIR=${OPENCV_ANDROID_SDK_DIR}/sdk/native/jni/abi-arm64-v8a \ -Dspdlog_DIR=${SPDLOG_DIR}/lib/cmake/spdlog \ + -Dncnn_DIR=${NCNN_DIR}/build/install/lib/cmake/ncnn \ -DMMDeploy_DIR=${MMDEPLOY_DIR}/build/install/lib/cmake/MMDeploy \ -DCMAKE_TOOLCHAIN_FILE=${NDK_PATH}/build/cmake/android.toolchain.cmake \ -DANDROID_ABI=arm64-v8a \ diff --git a/docs/zh_cn/build/windows.md b/docs/zh_cn/build/windows.md index 4cbacba81c..406f3300f7 100644 --- a/docs/zh_cn/build/windows.md +++ b/docs/zh_cn/build/windows.md @@ -96,9 +96,9 @@ cd ../.. 1. 从这里下载 OpenCV 3+。 2. 您可以下载并安装 OpenCV 预编译包到指定的目录下。也可以选择源码编译安装的方式 - 3. 在安装目录中,找到 OpenCVConfig.cmake,并把它的路径添加到环境变量 PATH 中。像这样: + 3. 在安装目录中,找到 OpenCVConfig.cmake,并把它的路径添加到环境变量 PATH 中。像这样:
$env:path = "\the\path\where\OpenCVConfig.cmake\locates;" + "$env:path"
- + pplcv diff --git a/mmdeploy/apis/__init__.py b/mmdeploy/apis/__init__.py index 70e6e479d5..48b1339d15 100644 --- a/mmdeploy/apis/__init__.py +++ b/mmdeploy/apis/__init__.py @@ -3,11 +3,12 @@ from .extract_model import extract_model from .inference import inference_model from .pytorch2onnx import torch2onnx, torch2onnx_impl +from .pytorch2torchscript import torch2torchscript, torch2torchscript_impl from .utils import build_task_processor, get_predefined_partition_cfg from .visualize import visualize_model __all__ = [ 'create_calib_table', 'extract_model', 'inference_model', 'torch2onnx', - 'torch2onnx_impl', 'build_task_processor', 'get_predefined_partition_cfg', - 'visualize_model' + 'torch2onnx_impl', 'torch2torchscript', 'torch2torchscript_impl', + 'build_task_processor', 'get_predefined_partition_cfg', 'visualize_model' ] diff --git a/mmdeploy/apis/openvino/__init__.py b/mmdeploy/apis/openvino/__init__.py index f7fbe9a370..97f6ade95d 100644 --- a/mmdeploy/apis/openvino/__init__.py +++ b/mmdeploy/apis/openvino/__init__.py @@ -6,7 +6,8 @@ if is_available(): from mmdeploy.backend.openvino.onnx2openvino import (get_output_model_file, onnx2openvino) - from .utils import get_input_info_from_cfg + from .utils import get_input_info_from_cfg, get_mo_options_from_cfg __all__ += [ - 'onnx2openvino', 'get_output_model_file', 'get_input_info_from_cfg' + 'onnx2openvino', 'get_output_model_file', 'get_input_info_from_cfg', + 'get_mo_options_from_cfg' ] diff --git a/mmdeploy/apis/openvino/utils.py b/mmdeploy/apis/openvino/utils.py index 79710eff21..72317595fd 100644 --- a/mmdeploy/apis/openvino/utils.py +++ b/mmdeploy/apis/openvino/utils.py @@ -3,8 +3,9 @@ import mmcv +from mmdeploy.backend.openvino import ModelOptimizerOptions from mmdeploy.utils import get_model_inputs -from mmdeploy.utils.config_utils import get_ir_config +from mmdeploy.utils.config_utils import get_backend_config, get_ir_config def update_input_names(input_info: Dict[str, List], @@ -50,3 +51,19 @@ def get_input_info_from_cfg(deploy_cfg: mmcv.Config) -> Dict[str, List]: input_info = dict(zip(input_names, input_info)) input_info = update_input_names(input_info, input_names) return input_info + + +def get_mo_options_from_cfg(deploy_cfg: mmcv.Config) -> ModelOptimizerOptions: + """Get additional parameters for the Model Optimizer from the deploy + config. + + Args: + deploy_cfg (mmcv.Config): Deployment config. + + Returns: + ModelOptimizerOptions: A class that will contain additional arguments. + """ + backend_config = get_backend_config(deploy_cfg) + mo_options = backend_config.get('mo_options', None) + mo_options = ModelOptimizerOptions(mo_options) + return mo_options diff --git a/mmdeploy/apis/pytorch2onnx.py b/mmdeploy/apis/pytorch2onnx.py index 43383fc452..e9912bc89b 100644 --- a/mmdeploy/apis/pytorch2onnx.py +++ b/mmdeploy/apis/pytorch2onnx.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. import os.path as osp -from typing import Any, Optional, Union +from typing import Any, Optional, Tuple, Union import mmcv import torch @@ -10,13 +10,13 @@ get_onnx_config, load_config) -def torch2onnx_impl(model: torch.nn.Module, input: torch.Tensor, +def torch2onnx_impl(model: torch.nn.Module, input: Union[torch.Tensor, Tuple], deploy_cfg: Union[str, mmcv.Config], output_file: str): """Converting torch model to ONNX. Args: model (torch.nn.Module): Input pytorch model. - input (torch.Tensor): Input tensor used to convert model. + input (torch.Tensor | Tuple): Input tensor used to convert model. deploy_cfg (str | mmcv.Config): Deployment config file or Config object. output_file (str): Output file to save ONNX model. @@ -103,7 +103,7 @@ def torch2onnx(img: Any, torch_model = task_processor.init_pytorch_model(model_checkpoint) data, model_inputs = task_processor.create_input(img, input_shape) - if not isinstance(model_inputs, torch.Tensor): + if not isinstance(model_inputs, torch.Tensor) and len(model_inputs) == 1: model_inputs = model_inputs[0] torch2onnx_impl( diff --git a/mmdeploy/apis/pytorch2torchscript.py b/mmdeploy/apis/pytorch2torchscript.py new file mode 100644 index 0000000000..8b54ce4ce8 --- /dev/null +++ b/mmdeploy/apis/pytorch2torchscript.py @@ -0,0 +1,111 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +from typing import Any, Optional, Sequence, Union + +import mmcv +import torch +from packaging.version import parse as version_parse + +from mmdeploy.backend.torchscript import get_ops_path +from mmdeploy.core import RewriterContext, patch_model +from mmdeploy.utils import (IR, get_backend, get_input_shape, get_root_logger, + load_config) + + +def torch2torchscript_impl(model: torch.nn.Module, + inputs: Union[torch.Tensor, Sequence[torch.Tensor]], + deploy_cfg: Union[str, + mmcv.Config], output_file: str): + """Converting torch model to torchscript. + + Args: + model (torch.nn.Module): Input pytorch model. + inputs (torch.Tensor | Sequence[torch.Tensor]): Input tensors used to + convert model. + deploy_cfg (str | mmcv.Config): Deployment config file or + Config object. + output_file (str): Output file to save torchscript model. + """ + # load custom ops if exist + custom_ops_path = get_ops_path() + if osp.exists(custom_ops_path): + torch.ops.load_library(custom_ops_path) + + deploy_cfg = load_config(deploy_cfg)[0] + + backend = get_backend(deploy_cfg).value + + patched_model = patch_model(model, cfg=deploy_cfg, backend=backend) + + with RewriterContext( + cfg=deploy_cfg, backend=backend, + ir=IR.TORCHSCRIPT), torch.no_grad(), torch.jit.optimized_execution( + True): + # for exporting models with weight that depends on inputs + patched_model(*inputs) if isinstance(inputs, Sequence) \ + else patched_model(inputs) + ts_model = torch.jit.trace(patched_model, inputs) + + # perform optimize, note that optimizing models may trigger errors when + # loading the saved .pt file, as described in + # https://github.com/pytorch/pytorch/issues/62706 + logger = get_root_logger() + logger.info('perform torchscript optimizer.') + try: + # custom optimizer + from mmdeploy.backend.torchscript import ts_optimizer + logger = get_root_logger() + ts_optimizer.optimize_for_backend( + ts_model._c, ir=IR.TORCHSCRIPT.value, backend=backend) + except Exception: + # use pytorch builtin optimizer + ts_model = torch.jit.freeze(ts_model) + torch_version = version_parse(torch.__version__) + if torch_version.minor >= 9: + ts_model = torch.jit.optimize_for_inference(ts_model) + + # save model + torch.jit.save(ts_model, output_file) + + +def torch2torchscript(img: Any, + work_dir: str, + save_file: str, + deploy_cfg: Union[str, mmcv.Config], + model_cfg: Union[str, mmcv.Config], + model_checkpoint: Optional[str] = None, + device: str = 'cuda:0'): + """Convert PyTorch model to torchscript model. + + Args: + img (str | np.ndarray | torch.Tensor): Input image used to assist + converting model. + work_dir (str): A working directory to save files. + save_file (str): Filename to save torchscript model. + deploy_cfg (str | mmcv.Config): Deployment config file or + Config object. + model_cfg (str | mmcv.Config): Model config file or Config object. + model_checkpoint (str): A checkpoint path of PyTorch model, + defaults to `None`. + device (str): A string specifying device type, defaults to 'cuda:0'. + """ + # load deploy_cfg if necessary + deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg) + mmcv.mkdir_or_exist(osp.abspath(work_dir)) + output_file = osp.join(work_dir, save_file) + + input_shape = get_input_shape(deploy_cfg) + + from mmdeploy.apis import build_task_processor + task_processor = build_task_processor(model_cfg, deploy_cfg, device) + + torch_model = task_processor.init_pytorch_model(model_checkpoint) + _, model_inputs = task_processor.create_input(img, input_shape) + if not isinstance(model_inputs, torch.Tensor): + model_inputs = model_inputs[0] + + torch2torchscript_impl( + torch_model, + model_inputs, + deploy_cfg=deploy_cfg, + output_file=output_file) diff --git a/mmdeploy/apis/visualize.py b/mmdeploy/apis/visualize.py index d4b315770c..ade0a21fe8 100644 --- a/mmdeploy/apis/visualize.py +++ b/mmdeploy/apis/visualize.py @@ -67,7 +67,6 @@ def visualize_model(model_cfg: Union[str, mmcv.Config], model = task_processor.init_backend_model(model) model_inputs, _ = task_processor.create_input(img, input_shape) - with torch.no_grad(): result = task_processor.run_inference(model, model_inputs)[0] diff --git a/mmdeploy/backend/openvino/__init__.py b/mmdeploy/backend/openvino/__init__.py index cb084b5589..7314e48df0 100644 --- a/mmdeploy/backend/openvino/__init__.py +++ b/mmdeploy/backend/openvino/__init__.py @@ -13,5 +13,8 @@ def is_available() -> bool: if is_available(): from .onnx2openvino import get_output_model_file + from .utils import ModelOptimizerOptions from .wrapper import OpenVINOWrapper - __all__ = ['OpenVINOWrapper', 'get_output_model_file'] + __all__ = [ + 'OpenVINOWrapper', 'get_output_model_file', 'ModelOptimizerOptions' + ] diff --git a/mmdeploy/backend/openvino/onnx2openvino.py b/mmdeploy/backend/openvino/onnx2openvino.py index a482b7c845..7252efabbd 100644 --- a/mmdeploy/backend/openvino/onnx2openvino.py +++ b/mmdeploy/backend/openvino/onnx2openvino.py @@ -2,12 +2,13 @@ import os.path as osp import subprocess from subprocess import PIPE, CalledProcessError, run -from typing import Dict, List, Union +from typing import Dict, List, Optional, Union import mmcv import torch from mmdeploy.utils import get_root_logger +from .utils import ModelOptimizerOptions def get_mo_command() -> str: @@ -55,7 +56,10 @@ def get_output_model_file(onnx_path: str, work_dir: str) -> str: def onnx2openvino(input_info: Dict[str, Union[List[int], torch.Size]], - output_names: List[str], onnx_path: str, work_dir: str): + output_names: List[str], + onnx_path: str, + work_dir: str, + mo_options: Optional[ModelOptimizerOptions] = None): """Convert ONNX to OpenVINO. Examples: @@ -72,8 +76,9 @@ def onnx2openvino(input_info: Dict[str, Union[List[int], torch.Size]], output_names (List[str]): Output names. Example: ['dets', 'labels']. onnx_path (str): The path to the onnx model. work_dir (str): The path to the directory for saving the results. + mo_options (None | ModelOptimizerOptions): The class with + additional arguments for the Model Optimizer. """ - input_names = ','.join(input_info.keys()) input_shapes = ','.join(str(list(elem)) for elem in input_info.values()) output = ','.join(output_names) @@ -88,8 +93,10 @@ def onnx2openvino(input_info: Dict[str, Union[List[int], torch.Size]], f'--output_dir="{work_dir}" ' \ f'--output="{output}" ' \ f'--input="{input_names}" ' \ - f'--input_shape="{input_shapes}" ' \ - f'--disable_fusing ' + f'--input_shape="{input_shapes}" ' + if mo_options is not None: + mo_args += mo_options.get_options() + command = f'{mo_command} {mo_args}' logger = get_root_logger() diff --git a/mmdeploy/backend/openvino/utils.py b/mmdeploy/backend/openvino/utils.py new file mode 100644 index 0000000000..7aa9dc3b37 --- /dev/null +++ b/mmdeploy/backend/openvino/utils.py @@ -0,0 +1,45 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Dict, List, Optional, Union + + +class ModelOptimizerOptions: + """A class to make it easier to support additional arguments for the Model + Optimizer that can be passed through the deployment configuration. + + Example: + >>> deploy_cfg = load_config(deploy_cfg_path) + >>> mo_options = deploy_cfg.get('mo_options', None) + >>> mo_options = ModelOptimizerOptions(mo_options) + >>> mo_args = mo_options.get_options() + """ + + def __init__(self, + mo_options: Optional[Dict[str, Union[Dict, List]]] = None): + self.args = '' + self.flags = '' + if mo_options is not None: + self.args = self.__parse_args(mo_options) + self.flags = self.__parse_flags(mo_options) + + def __parse_args(self, mo_options: Dict[str, Union[Dict, List]]) -> str: + """Parses a dictionary with arguments into a string.""" + mo_args_str = '' + if 'args' in mo_options: + for key, value in mo_options['args'].items(): + value_str = f'"{value}"' if isinstance(value, list) else value + mo_args_str += f'{key}={value_str} ' + return mo_args_str + + def __parse_flags(self, mo_options: Dict[str, Union[Dict, List]]) -> str: + """Parses a list with flags into a string.""" + mo_flags_str = '' + if 'flags' in mo_options: + mo_flags_str += ' '.join(mo_options['flags']) + return mo_flags_str + + def get_options(self) -> str: + """Returns a string with additional arguments for the Model Optimizer. + + If there are no additional arguments, it will return an empty string. + """ + return self.args + self.flags diff --git a/mmdeploy/backend/openvino/wrapper.py b/mmdeploy/backend/openvino/wrapper.py index 589906f345..7a41db24ad 100644 --- a/mmdeploy/backend/openvino/wrapper.py +++ b/mmdeploy/backend/openvino/wrapper.py @@ -42,7 +42,11 @@ def __init__(self, self.net = self.ie.read_network(ir_model_file, bin_path) for input in self.net.input_info.values(): batch_size = input.input_data.shape[0] - assert batch_size == 1, 'Only batch 1 is supported.' + dims = len(input.input_data.shape) + # if input is a image, it has (B,C,H,W) channels, + # need batch_size==1 + assert not dims == 4 or batch_size == 1, \ + 'Only batch 1 is supported.' self.device = 'cpu' self.sess = self.ie.load_network( network=self.net, device_name=self.device.upper(), num_requests=1) diff --git a/mmdeploy/backend/torchscript/__init__.py b/mmdeploy/backend/torchscript/__init__.py new file mode 100644 index 0000000000..9179ef3da6 --- /dev/null +++ b/mmdeploy/backend/torchscript/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# flake8: noqa +from .init_plugins import get_ops_path, ops_available + + +def is_available(): + """Torchscript available. + + Returns: + bool: Always True. + """ + return True + + +__all__ = ['get_ops_path', 'ops_available'] + +if is_available(): + from .wrapper import TorchscriptWrapper + + __all__ += ['TorchscriptWrapper'] diff --git a/mmdeploy/backend/torchscript/init_plugins.py b/mmdeploy/backend/torchscript/init_plugins.py new file mode 100644 index 0000000000..ec0371b59a --- /dev/null +++ b/mmdeploy/backend/torchscript/init_plugins.py @@ -0,0 +1,28 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import glob +import os.path as osp + + +def get_ops_path() -> str: + """Get path of the torchscript extension library. + + Returns: + str: A path of the torchscript extension library. + """ + wildcard = osp.abspath( + osp.join( + osp.dirname(__file__), + '../../../build/lib/libmmdeploy_torchscript_ops.so')) + + paths = glob.glob(wildcard) + lib_path = paths[0] if len(paths) > 0 else '' + return lib_path + + +def ops_available() -> bool: + """Return whether ops are available. + + Returns: + bool: Whether ops are available. + """ + return osp.exists(get_ops_path()) diff --git a/mmdeploy/backend/torchscript/wrapper.py b/mmdeploy/backend/torchscript/wrapper.py new file mode 100644 index 0000000000..668ab23aa0 --- /dev/null +++ b/mmdeploy/backend/torchscript/wrapper.py @@ -0,0 +1,108 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +from typing import Dict, Optional, Sequence, Union + +import torch + +from mmdeploy.utils import Backend +from mmdeploy.utils.timer import TimeCounter +from ..base import BACKEND_WRAPPER, BaseWrapper +from .init_plugins import get_ops_path + + +@BACKEND_WRAPPER.register_module(Backend.TORCHSCRIPT.value) +class TorchscriptWrapper(BaseWrapper): + """Torchscript engine wrapper for inference. + + Args: + model (torch.jit.RecursiveScriptModule): torchscript engine to wrap. + input_names (Sequence[str] | None): Names of model inputs in order. + Defaults to `None` and the wrapper will accept list or Tensor. + output_names (Sequence[str] | None): Names of model outputs in order. + Defaults to `None` and the wrapper will return list or Tensor. + + Note: + If the engine is converted from onnx model. The input_names and + output_names should be the same as onnx model. + + Examples: + >>> from mmdeploy.backend.torchscript import TorchscriptWrapper + >>> engine_file = 'resnet.engine' + >>> model = TorchscriptWrapper(engine_file, input_names=['input'], \ + >>> output_names=['output']) + >>> inputs = dict(input=torch.randn(1, 3, 224, 224)) + >>> outputs = model(inputs) + >>> print(outputs) + """ + + def __init__(self, + model: Union[str, torch.jit.RecursiveScriptModule], + input_names: Optional[Sequence[str]] = None, + output_names: Optional[Sequence[str]] = None): + # load custom ops if exist + custom_ops_path = get_ops_path() + if osp.exists(custom_ops_path): + torch.ops.load_library(custom_ops_path) + super().__init__(output_names) + self.ts_model = model + if isinstance(self.ts_model, str): + self.ts_model = torch.jit.load(self.ts_model) + + assert isinstance(self.ts_model, torch.jit.RecursiveScriptModule + ), 'failed to load torchscript model.' + + self._input_names = input_names + self._output_names = output_names + + def forward( + self, inputs: Union[torch.Tensor, Sequence[torch.Tensor], + Dict[str, torch.Tensor]] + ) -> Union[torch.Tensor, Sequence[torch.Tensor], Dict[str, torch.Tensor]]: + """Run forward inference. + + Args: + inputs (torch.Tensor | Sequence[torch.Tensor] | Dict[str, + torch.Tensor]): The input tensor, or tensor sequence, or pairs + of input names and tensors. + + Return: + outputs (torch.Tensor | Sequence[torch.Tensor] | Dict[str, + torch.Tensor]): The input tensor, or tensor sequence, or pairs + of input names and tensors. + """ + + is_dict_inputs = isinstance(inputs, Dict) + if is_dict_inputs: + # inputs to dict + assert self._input_names is not None, \ + 'input names have not been given.' + inputs = [inputs[input_name] for input_name in self._input_names] + elif isinstance(inputs, torch.Tensor): + inputs = [inputs] + + outputs = self.__torchscript_execute(inputs) + + if self._output_names is not None and is_dict_inputs: + # output to dict + if isinstance(outputs, torch.Tensor): + outputs = [outputs] + outputs = dict(zip(self._output_names, outputs)) + + if isinstance(outputs, tuple) and self._output_names is not None: + assert len(outputs) == len(self._output_names) + outputs = dict(zip(self._output_names, outputs)) + return outputs + + @TimeCounter.count_time() + def __torchscript_execute( + self, inputs: Sequence[torch.Tensor]) -> Sequence[torch.Tensor]: + """Run inference with TorchScript. + + Args: + inputs (Sequence[torch.Tensor]): A list of integer binding the + input/output. + Returns: + torch.Tensor | Sequence[torch.Tensor]: The inference outputs from + TorchScript. + """ + return self.ts_model(*inputs) diff --git a/mmdeploy/codebase/base/backend_model.py b/mmdeploy/codebase/base/backend_model.py index 6a21440693..93dc2fe74f 100644 --- a/mmdeploy/codebase/base/backend_model.py +++ b/mmdeploy/codebase/base/backend_model.py @@ -35,8 +35,11 @@ def __init__(self, def _build_wrapper(backend: Backend, backend_files: Sequence[str], device: str, + input_names: Optional[Sequence[str]] = None, output_names: Optional[Sequence[str]] = None, - deploy_cfg: Optional[mmcv.Config] = None): + deploy_cfg: Optional[mmcv.Config] = None, + *args, + **kwargs): """The default methods to build backend wrappers. Args: @@ -44,6 +47,8 @@ def _build_wrapper(backend: Backend, beckend_files (Sequence[str]): Paths to all required backend files( e.g. '.onnx' for ONNX Runtime, '.param' and '.bin' for ncnn). device (str): A string specifying device type. + input_names (Sequence[str] | None): Names of model inputs in + order. Defaults to `None`. output_names (Sequence[str] | None): Names of model outputs in order. Defaults to `None` and the wrapper will load the output names from the model. @@ -85,6 +90,12 @@ def _build_wrapper(backend: Backend, model_file=backend_files[0], task_name=task_name, device=device) + elif backend == Backend.TORCHSCRIPT: + from mmdeploy.backend.torchscript import TorchscriptWrapper + return TorchscriptWrapper( + model=backend_files[0], + input_names=input_names, + output_names=output_names) else: raise NotImplementedError(f'Unknown backend type: {backend.value}') diff --git a/mmdeploy/codebase/mmcls/deploy/classification_model.py b/mmdeploy/codebase/mmcls/deploy/classification_model.py index 260d72a80b..bf6dcbca2a 100644 --- a/mmdeploy/codebase/mmcls/deploy/classification_model.py +++ b/mmdeploy/codebase/mmcls/deploy/classification_model.py @@ -55,6 +55,7 @@ def _init_wrapper(self, backend: Backend, backend_files: Sequence[str], backend=backend, backend_files=backend_files, device=device, + input_names=[self.input_name], output_names=output_names, deploy_cfg=self.deploy_cfg) diff --git a/mmdeploy/codebase/mmcls/models/backbones/shufflenet_v2.py b/mmdeploy/codebase/mmcls/models/backbones/shufflenet_v2.py index 58e7030cab..2d26318af6 100644 --- a/mmdeploy/codebase/mmcls/models/backbones/shufflenet_v2.py +++ b/mmdeploy/codebase/mmcls/models/backbones/shufflenet_v2.py @@ -3,13 +3,17 @@ from mmcls.models.utils import channel_shuffle from mmdeploy.core import FUNCTION_REWRITER +from mmdeploy.utils import Backend # torch.chunk will export dynamic shape slice, which will lead integer input # on ncnn backend. So the model needs to rewrite. @FUNCTION_REWRITER.register_rewriter( func_name='mmcls.models.backbones.shufflenet_v2.InvertedResidual.forward', - backend='ncnn') + backend=Backend.NCNN.value) +@FUNCTION_REWRITER.register_rewriter( + func_name='mmcls.models.backbones.shufflenet_v2.InvertedResidual.forward', + backend=Backend.TORCHSCRIPT.value) def shufflenetv2_backbone__forward__ncnn(ctx, self, x): """Rewrite `forward` of InvertedResidual used in shufflenet_v2 for ncnn backend. diff --git a/mmdeploy/codebase/mmdet/__init__.py b/mmdeploy/codebase/mmdet/__init__.py index ea1376b931..949b533407 100644 --- a/mmdeploy/codebase/mmdet/__init__.py +++ b/mmdeploy/codebase/mmdet/__init__.py @@ -1,10 +1,11 @@ # Copyright (c) OpenMMLab. All rights reserved. from .core import * # noqa: F401,F403 from .deploy import (MMDetection, ObjectDetection, clip_bboxes, - get_post_processing_params, pad_with_value) + get_post_processing_params, pad_with_value, + pad_with_value_if_necessary) from .models import * # noqa: F401,F403 __all__ = [ 'get_post_processing_params', 'clip_bboxes', 'pad_with_value', - 'MMDetection', 'ObjectDetection' + 'pad_with_value_if_necessary', 'MMDetection', 'ObjectDetection' ] diff --git a/mmdeploy/codebase/mmdet/core/post_processing/bbox_nms.py b/mmdeploy/codebase/mmdet/core/post_processing/bbox_nms.py index 2580d37ef8..ee7a1403d7 100644 --- a/mmdeploy/codebase/mmdet/core/post_processing/bbox_nms.py +++ b/mmdeploy/codebase/mmdet/core/post_processing/bbox_nms.py @@ -5,7 +5,7 @@ import mmdeploy from mmdeploy.core import FUNCTION_REWRITER, mark from mmdeploy.mmcv.ops import ONNXNMSop, TRTBatchedNMSop -from mmdeploy.utils import is_dynamic_batch +from mmdeploy.utils import Backend, is_dynamic_batch def select_nms_index(scores: torch.Tensor, @@ -269,3 +269,60 @@ def multiclass_nms(*args, **kwargs): """Wrapper function for `_multiclass_nms`.""" return mmdeploy.codebase.mmdet.core.post_processing._multiclass_nms( *args, **kwargs) + + +@FUNCTION_REWRITER.register_rewriter( + func_name='mmdeploy.codebase.mmdet.core.post_processing._multiclass_nms', + backend=Backend.TORCHSCRIPT.value) +def multiclass_nms__torchscript(ctx, + boxes: Tensor, + scores: Tensor, + max_output_boxes_per_class: int = 1000, + iou_threshold: float = 0.5, + score_threshold: float = 0.05, + pre_top_k: int = -1, + keep_top_k: int = -1): + """rewrite for torchscript batched nms. + + Use batched_nms from torchvision instead of custom nms. + """ + # TODO: simplify inference for non-batch model + from torchvision.ops import batched_nms + batch_size = scores.shape[0] + num_boxes = scores.shape[1] + num_classes = scores.shape[2] + box_per_cls = len(boxes.shape) == 4 + scores = torch.where(scores > score_threshold, scores, scores.new_zeros(1)) + + # pre-topk + if pre_top_k > 0: + max_scores, _ = scores.max(-1) + _, topk_inds = max_scores.topk(pre_top_k) + batch_inds = torch.arange(batch_size).view( + -1, 1).expand_as(topk_inds).long() + boxes = boxes[batch_inds, topk_inds, ...] + scores = scores[batch_inds, topk_inds, :] + num_boxes = scores.shape[1] + + idxs = torch.arange(0, batch_size, device=scores.device).unsqueeze(1) + idxs = idxs.repeat(1, num_boxes).view(-1) + + keeps = [None] * num_classes + for cls_id in range(num_classes): + box = boxes if not box_per_cls else boxes[:, :, cls_id, :] + score = scores[:, :, cls_id] + box = box.view(-1, 4) + score = score.view(-1) + box_keep = batched_nms(box, score, idxs, iou_threshold=iou_threshold) + box_keep = box_keep[:max_output_boxes_per_class * batch_size] + batch_keep = idxs[box_keep] + cls_keep = torch.ones_like(box_keep) * cls_id + box_keep = box_keep - batch_keep * num_boxes + keeps[cls_id] = torch.stack([batch_keep, cls_keep, box_keep], dim=1) + + keeps = torch.cat(keeps) + scores = scores.permute(0, 2, 1) + dets, labels = select_nms_index( + scores, boxes, keeps, batch_size, keep_top_k=keep_top_k) + + return dets, labels diff --git a/mmdeploy/codebase/mmdet/deploy/__init__.py b/mmdeploy/codebase/mmdet/deploy/__init__.py index f6fff39d09..cd48bb7ad3 100644 --- a/mmdeploy/codebase/mmdet/deploy/__init__.py +++ b/mmdeploy/codebase/mmdet/deploy/__init__.py @@ -1,9 +1,10 @@ # Copyright (c) OpenMMLab. All rights reserved. from .mmdetection import MMDetection from .object_detection import ObjectDetection -from .utils import clip_bboxes, get_post_processing_params, pad_with_value +from .utils import (clip_bboxes, get_post_processing_params, pad_with_value, + pad_with_value_if_necessary) __all__ = [ 'get_post_processing_params', 'clip_bboxes', 'pad_with_value', - 'MMDetection', 'ObjectDetection' + 'pad_with_value_if_necessary', 'MMDetection', 'ObjectDetection' ] diff --git a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py index 79cdae9ecc..b368d10972 100644 --- a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py +++ b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py @@ -78,6 +78,7 @@ def _init_wrapper(self, backend: Backend, backend_files: Sequence[str], backend=backend, backend_files=backend_files, device=device, + input_names=[self.input_name], output_names=output_names, deploy_cfg=self.deploy_cfg) @@ -424,13 +425,14 @@ def _init_wrapper(self, backend, backend_files, device): backend, backend_files[0:n], device, - partition0_output_names, + output_names=partition0_output_names, deploy_cfg=self.deploy_cfg) self.second_wrapper = BaseBackendModel._build_wrapper( backend, backend_files[n:2 * n], - device, ['cls_score', 'bbox_pred'], + device, + output_names=['cls_score', 'bbox_pred'], deploy_cfg=self.deploy_cfg) def partition0_postprocess(self, x: Sequence[torch.Tensor], diff --git a/mmdeploy/codebase/mmdet/deploy/utils.py b/mmdeploy/codebase/mmdet/deploy/utils.py index 1ecd451e2f..5fd5b7ab78 100644 --- a/mmdeploy/codebase/mmdet/deploy/utils.py +++ b/mmdeploy/codebase/mmdet/deploy/utils.py @@ -5,7 +5,9 @@ import torch from torch import Tensor -from mmdeploy.utils import load_config +from mmdeploy.core import FUNCTION_REWRITER +from mmdeploy.core.rewriters.rewriter_utils import LibVersionChecker +from mmdeploy.utils import Backend, load_config def get_post_processing_params(deploy_cfg: Union[str, mmcv.Config]): @@ -69,6 +71,33 @@ def clip_bboxes(x1: Tensor, y1: Tensor, x2: Tensor, y2: Tensor, return x1, y1, x2, y2 +@FUNCTION_REWRITER.register_rewriter( + func_name='mmdeploy.codebase.mmdet.deploy.utils.clip_bboxes', + backend='tensorrt', + extra_checkers=LibVersionChecker('tensorrt', min_version='8')) +def clip_bboxes__trt8(ctx, x1: Tensor, y1: Tensor, x2: Tensor, y2: Tensor, + max_shape: Union[Tensor, Sequence[int]]): + """Clip bboxes for onnx. From TensorRT 8 we can do the operators on the + tensors directly. + + Args: + ctx (ContextCaller): The context with additional information. + x1 (Tensor): The x1 for bounding boxes. + y1 (Tensor): The y1 for bounding boxes. + x2 (Tensor): The x2 for bounding boxes. + y2 (Tensor): The y2 for bounding boxes. + max_shape (Tensor | Sequence[int]): The (H,W) of original image. + Returns: + tuple(Tensor): The clipped x1, y1, x2, y2. + """ + assert len(max_shape) == 2, '`max_shape` should be [h, w]' + x1 = torch.clamp(x1, 0, max_shape[1]) + y1 = torch.clamp(y1, 0, max_shape[0]) + x2 = torch.clamp(x2, 0, max_shape[1]) + y2 = torch.clamp(y2, 0, max_shape[0]) + return x1, y1, x2, y2 + + def pad_with_value(x: Tensor, pad_dim: int, pad_size: int, @@ -98,3 +127,62 @@ def pad_with_value(x: Tensor, x_pad = x_pad.repeat(*repeat_size) x = torch.cat([x, x_pad], dim=pad_dim) return x + + +def pad_with_value_if_necessary(x: Tensor, + pad_dim: int, + pad_size: int, + pad_value: Optional[Any] = None): + """Pad a tensor with a value along some dim if necessary. + + Args: + x (Tensor): Input tensor. + pad_dim (int): Along which dim to pad. + pad_size (int): To which size to pad. + pad_value (Any): Filled value for padding. Defaults to `None`. + + Returns: + Tensor: Padded tensor. + """ + return __pad_with_value_if_necessary( + x, pad_dim, pad_size=pad_size, pad_value=pad_value) + + +def __pad_with_value_if_necessary(x: Tensor, + pad_dim: int, + pad_size: int, + pad_value: Optional[Any] = None): + """Pad a tensor with a value along some dim, do nothing on default. + + Args: + x (Tensor): Input tensor. + pad_dim (int): Along which dim to pad. + pad_size (int): To which size to pad. + pad_value (Any): Filled value for padding. Defaults to `None`. + + Returns: + Tensor: Padded tensor. + """ + return x + + +@FUNCTION_REWRITER.register_rewriter( + 'mmdeploy.codebase.mmdet.deploy.utils.__pad_with_value_if_necessary', + backend=Backend.TENSORRT.value) +def __pad_with_value_if_necessary__tensorrt(ctx, + x: Tensor, + pad_dim: int, + pad_size: int, + pad_value: Optional[Any] = None): + """Pad a tensor with a value along some dim. + + Args: + x (Tensor): Input tensor. + pad_dim (int): Along which dim to pad. + pad_size (int): To which size to pad. + pad_value (Any): Filled value for padding. Defaults to `None`. + + Returns: + Tensor: Padded tensor. + """ + return pad_with_value(x, pad_dim, pad_size=pad_size, pad_value=pad_value) diff --git a/mmdeploy/codebase/mmdet/models/dense_heads/base_dense_head.py b/mmdeploy/codebase/mmdet/models/dense_heads/base_dense_head.py index 3c94c16250..cd224a4548 100644 --- a/mmdeploy/codebase/mmdet/models/dense_heads/base_dense_head.py +++ b/mmdeploy/codebase/mmdet/models/dense_heads/base_dense_head.py @@ -5,10 +5,11 @@ from mmdet.core.bbox.transforms import distance2bbox from mmdeploy.codebase.mmdet import (get_post_processing_params, - multiclass_nms, pad_with_value) + multiclass_nms, + pad_with_value_if_necessary) from mmdeploy.codebase.mmdet.core.ops import ncnn_detection_output_forward from mmdeploy.core import FUNCTION_REWRITER -from mmdeploy.utils import Backend, get_backend, is_dynamic_shape +from mmdeploy.utils import Backend, is_dynamic_shape @FUNCTION_REWRITER.register_rewriter( @@ -60,7 +61,6 @@ def base_dense_head__get_bbox(ctx, """ deploy_cfg = ctx.cfg is_dynamic_flag = is_dynamic_shape(deploy_cfg) - backend = get_backend(deploy_cfg) num_levels = len(cls_scores) featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] @@ -98,10 +98,8 @@ def base_dense_head__get_bbox(ctx, self.cls_out_channels) if self.use_sigmoid_cls: scores = scores.sigmoid() - nms_pre_score = scores else: scores = scores.softmax(-1) - nms_pre_score = scores if with_score_factors: score_factors = score_factors.permute(0, 2, 3, 1).reshape(batch_size, @@ -112,16 +110,16 @@ def base_dense_head__get_bbox(ctx, priors = priors.data priors = priors.expand(batch_size, -1, priors.size(-1)) if pre_topk > 0: + priors = pad_with_value_if_necessary(priors, 1, pre_topk) + bbox_pred = pad_with_value_if_necessary(bbox_pred, 1, pre_topk) + scores = pad_with_value_if_necessary(scores, 1, pre_topk, 0.) + if with_score_factors: + score_factors = pad_with_value_if_necessary( + score_factors, 1, pre_topk, 0.) + + nms_pre_score = scores if with_score_factors: nms_pre_score = nms_pre_score * score_factors - if backend == Backend.TENSORRT: - priors = pad_with_value(priors, 1, pre_topk) - bbox_pred = pad_with_value(bbox_pred, 1, pre_topk) - scores = pad_with_value(scores, 1, pre_topk, 0.) - nms_pre_score = pad_with_value(nms_pre_score, 1, pre_topk, 0.) - if with_score_factors: - score_factors = pad_with_value(score_factors, 1, pre_topk, - 0.) # Get maximum scores for foreground classes. if self.use_sigmoid_cls: @@ -180,7 +178,7 @@ def base_dense_head__get_bbox(ctx, @FUNCTION_REWRITER.register_rewriter( func_name='mmdet.models.dense_heads.base_dense_head.BaseDenseHead' '.get_bboxes', - backend='ncnn') + backend=Backend.NCNN.value) def base_dense_head__get_bboxes__ncnn(ctx, self, cls_scores, diff --git a/mmdeploy/codebase/mmdet/models/dense_heads/rpn_head.py b/mmdeploy/codebase/mmdet/models/dense_heads/rpn_head.py index e7f60c322d..5523c44b47 100644 --- a/mmdeploy/codebase/mmdet/models/dense_heads/rpn_head.py +++ b/mmdeploy/codebase/mmdet/models/dense_heads/rpn_head.py @@ -2,9 +2,10 @@ import torch from mmdeploy.codebase.mmdet import (get_post_processing_params, - multiclass_nms, pad_with_value) + multiclass_nms, + pad_with_value_if_necessary) from mmdeploy.core import FUNCTION_REWRITER -from mmdeploy.utils import Backend, get_backend, is_dynamic_shape +from mmdeploy.utils import Backend, is_dynamic_shape @FUNCTION_REWRITER.register_rewriter( @@ -95,13 +96,11 @@ def rpn_head__get_bboxes(ctx, anchors = anchors.expand_as(bbox_pred) - backend = get_backend(deploy_cfg) # topk in tensorrt does not support shape 0: _, topk_inds = scores.squeeze(2).topk(pre_topk) @@ -145,7 +144,7 @@ def rpn_head__get_bboxes(ctx, @FUNCTION_REWRITER.register_rewriter( - 'mmdet.models.dense_heads.RPNHead.get_bboxes', backend='ncnn') + 'mmdet.models.dense_heads.RPNHead.get_bboxes', backend=Backend.NCNN.value) def rpn_head__get_bboxes__ncnn(ctx, self, cls_scores, diff --git a/mmdeploy/codebase/mmdet/models/dense_heads/yolo_head.py b/mmdeploy/codebase/mmdet/models/dense_heads/yolo_head.py index 5d8e9a9937..cb57cddfc3 100644 --- a/mmdeploy/codebase/mmdet/models/dense_heads/yolo_head.py +++ b/mmdeploy/codebase/mmdet/models/dense_heads/yolo_head.py @@ -3,9 +3,10 @@ import torch from mmdeploy.codebase.mmdet import (get_post_processing_params, - multiclass_nms, pad_with_value) + multiclass_nms, + pad_with_value_if_necessary) from mmdeploy.core import FUNCTION_REWRITER -from mmdeploy.utils import Backend, get_backend, is_dynamic_shape +from mmdeploy.utils import Backend, is_dynamic_shape @FUNCTION_REWRITER.register_rewriter( @@ -90,13 +91,11 @@ def yolov3_head__get_bboxes(ctx, conf_pred = torch.sigmoid(pred_map[..., 4]) cls_pred = torch.sigmoid(pred_map[..., 5:]).view( batch_size, -1, self.num_classes) # Cls pred one-hot. - backend = get_backend(ctx.cfg) # topk in tensorrt does not support shape 0: _, topk_inds = conf_pred.topk(pre_topk) @@ -161,7 +160,8 @@ def yolov3_head__get_bboxes(ctx, @FUNCTION_REWRITER.register_rewriter( - func_name='mmdet.models.dense_heads.YOLOV3Head.get_bboxes', backend='ncnn') + func_name='mmdet.models.dense_heads.YOLOV3Head.get_bboxes', + backend=Backend.NCNN.value) def yolov3_head__get_bboxes__ncnn(ctx, self, pred_maps, diff --git a/mmdeploy/codebase/mmdet/models/roi_heads/single_level_roi_extractor.py b/mmdeploy/codebase/mmdet/models/roi_heads/single_level_roi_extractor.py index f87bdeabe8..f91ca48ad0 100644 --- a/mmdeploy/codebase/mmdet/models/roi_heads/single_level_roi_extractor.py +++ b/mmdeploy/codebase/mmdet/models/roi_heads/single_level_roi_extractor.py @@ -5,6 +5,8 @@ from mmdeploy.core.optimizers import mark from mmdeploy.core.rewriters import FUNCTION_REWRITER +from mmdeploy.utils import get_backend +from mmdeploy.utils.constants import Backend class MultiLevelRoiAlign(Function): @@ -108,14 +110,18 @@ def single_roi_extractor__forward(ctx, roi_scale_factor=None): """Rewrite `forward` of SingleRoIExtractor for default backend. - Rewrite this function to enable exporting to onnx even though the input + Rewrite this function to: + 1. enable exporting to IR even though the input image contains no targets. Note that, `ScatterND` of onnx may conflict with `Reshape` if a tensor have a dim size of 0. Thus, we have to cat zeros to the dim 0 of `roi_feats` and recover back after all roi align finished. - Besides, this function adds mark for roi_extractor forward and remove - unnecessary code of origin forward function. + 2. this function adds mark for roi_extractor forward and remove + unnecessary code of origin forward function when using ONNX as IR. + + 3. use the roi align in torhcvision to accelerate the inference. """ + backend = get_backend(ctx.cfg) out_size = self.roi_layers[0].output_size num_levels = len(feats) roi_feats = feats[0].new_zeros(rois.shape[0], self.out_channels, *out_size) @@ -128,29 +134,29 @@ def single_roi_extractor__forward(ctx, if roi_scale_factor is not None: rois = self.roi_rescale(rois, roi_scale_factor) - # concat len num_levels * 2 of zero tensors to dim 0 of roi_feats + # concate zeros to rois and roi_feats for empty tensor cases roi_feats = torch.cat( (roi_feats.new_zeros(num_levels * 2, *roi_feats.shape[-3:]), roi_feats)) + rois = torch.cat((rois.new_zeros(num_levels * 2, 5), rois)) + _tmp = torch.linspace( + 0, + num_levels - 1, + num_levels, + dtype=target_lvls.dtype, + device=target_lvls.device) + target_lvls = torch.cat((_tmp, _tmp, target_lvls)) for i in range(num_levels): mask = target_lvls == i inds = mask.nonzero(as_tuple=False).squeeze(1) - - # concat len 2 zero tensors to dim 0 of roi_feats - rois_i = torch.cat((rois.new_zeros(2, 5), rois[inds])) - - roi_feats_t = self.roi_layers[i](feats[i], rois_i) - - # correspondingly change the inds - inds = torch.cat([ - torch.tensor([2 * i, 2 * i + 1], - device=inds.device, - dtype=inds.dtype), inds + num_levels * 2 - ]) + rois_t = rois[inds] + # use the roi align in torhcvision + if backend == Backend.TORCHSCRIPT: + self.roi_layers[i].use_torchvision = True + roi_feats_t = self.roi_layers[i](feats[i], rois_t) roi_feats[inds] = roi_feats_t - - # slice and recover tensors - roi_feats = roi_feats[num_levels * (2):] + # slice to recover original size + roi_feats = roi_feats[num_levels * 2:] return roi_feats diff --git a/mmdeploy/codebase/mmdet3d/__init__.py b/mmdeploy/codebase/mmdet3d/__init__.py new file mode 100644 index 0000000000..1974ef569c --- /dev/null +++ b/mmdeploy/codebase/mmdet3d/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .deploy import MMDetection3d, VoxelDetection +from .models import * # noqa: F401,F403 + +__all__ = ['MMDetection3d', 'VoxelDetection'] diff --git a/mmdeploy/codebase/mmdet3d/deploy/__init__.py b/mmdeploy/codebase/mmdet3d/deploy/__init__.py new file mode 100644 index 0000000000..60ef615aca --- /dev/null +++ b/mmdeploy/codebase/mmdet3d/deploy/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .mmdetection3d import MMDetection3d +from .voxel_detection import VoxelDetection +from .voxel_detection_model import VoxelDetectionModel + +__all__ = ['MMDetection3d', 'VoxelDetection', 'VoxelDetectionModel'] diff --git a/mmdeploy/codebase/mmdet3d/deploy/mmdetection3d.py b/mmdeploy/codebase/mmdet3d/deploy/mmdetection3d.py new file mode 100644 index 0000000000..01f9fbf28e --- /dev/null +++ b/mmdeploy/codebase/mmdet3d/deploy/mmdetection3d.py @@ -0,0 +1,114 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Optional, Union + +import mmcv +from mmcv.utils import Registry +from torch.utils.data import DataLoader, Dataset + +from mmdeploy.codebase.base import CODEBASE, BaseTask, MMCodebase +from mmdeploy.utils import Codebase, get_task_type + + +def __build_mmdet3d_task(model_cfg: mmcv.Config, deploy_cfg: mmcv.Config, + device: str, registry: Registry) -> BaseTask: + task = get_task_type(deploy_cfg) + return registry.module_dict[task.value](model_cfg, deploy_cfg, device) + + +MMDET3D_TASK = Registry('mmdet3d_tasks', build_func=__build_mmdet3d_task) + + +@CODEBASE.register_module(Codebase.MMDET3D.value) +class MMDetection3d(MMCodebase): + + task_registry = MMDET3D_TASK + + def __init__(self): + super().__init__() + + @staticmethod + def build_task_processor(model_cfg: mmcv.Config, deploy_cfg: mmcv.Config, + device: str) -> BaseTask: + """The interface to build the task processors of mmdet3d. + + Args: + model_cfg (str | mmcv.Config): Model config file. + deploy_cfg (str | mmcv.Config): Deployment config file. + device (str): A string specifying device type. + + Returns: + BaseTask: A task processor. + """ + return MMDET3D_TASK.build(model_cfg, deploy_cfg, device) + + @staticmethod + def build_dataset(dataset_cfg: Union[str, mmcv.Config], *args, + **kwargs) -> Dataset: + """Build dataset for detection3d. + + Args: + dataset_cfg (str | mmcv.Config): The input dataset config. + + Returns: + Dataset: A PyTorch dataset. + """ + from mmdet3d.datasets import build_dataset as build_dataset_mmdet3d + + from mmdeploy.utils import load_config + dataset_cfg = load_config(dataset_cfg)[0] + data = dataset_cfg.data + + dataset = build_dataset_mmdet3d(data.test) + return dataset + + @staticmethod + def build_dataloader(dataset: Dataset, + samples_per_gpu: int, + workers_per_gpu: int, + num_gpus: int = 1, + dist: bool = False, + shuffle: bool = False, + seed: Optional[int] = None, + runner_type: str = 'EpochBasedRunner', + persistent_workers: bool = True, + **kwargs) -> DataLoader: + """Build dataloader for detection3d. + + Args: + dataset (Dataset): Input dataset. + samples_per_gpu (int): Number of training samples on each GPU, i.e. + ,batch size of each GPU. + workers_per_gpu (int): How many subprocesses to use for data + loading for each GPU. + num_gpus (int): Number of GPUs. Only used in non-distributed + training. + dist (bool): Distributed training/test or not. + Defaults to `False`. + shuffle (bool): Whether to shuffle the data at every epoch. + Defaults to `False`. + seed (int): An integer set to be seed. Default is `None`. + runner_type (str): Type of runner. Default: `EpochBasedRunner`. + persistent_workers (bool): If True, the data loader will not + shutdown the worker processes after a dataset has been consumed + once. This allows to maintain the workers `Dataset` instances + alive. This argument is only valid when PyTorch>=1.7.0. + Default: False. + kwargs: Any other keyword argument to be used to initialize + DataLoader. + + Returns: + DataLoader: A PyTorch dataloader. + """ + from mmdet3d.datasets import \ + build_dataloader as build_dataloader_mmdet3d + return build_dataloader_mmdet3d( + dataset, + samples_per_gpu, + workers_per_gpu, + num_gpus=num_gpus, + dist=dist, + shuffle=shuffle, + seed=seed, + runner_type=runner_type, + persistent_workers=persistent_workers, + **kwargs) diff --git a/mmdeploy/codebase/mmdet3d/deploy/voxel_detection.py b/mmdeploy/codebase/mmdet3d/deploy/voxel_detection.py new file mode 100644 index 0000000000..63eb87b7ab --- /dev/null +++ b/mmdeploy/codebase/mmdet3d/deploy/voxel_detection.py @@ -0,0 +1,301 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Any, Dict, List, Optional, Sequence, Tuple, Union + +import mmcv +import numpy as np +import torch +import torch.nn as nn +from mmcv.parallel import collate, scatter +from mmdet3d.core.bbox import get_box_type +from mmdet3d.datasets.pipelines import Compose +from torch.utils.data import DataLoader, Dataset + +from mmdeploy.codebase.base import BaseTask +from mmdeploy.codebase.mmdet3d.deploy.mmdetection3d import MMDET3D_TASK +from mmdeploy.utils import Task, get_root_logger, load_config +from .voxel_detection_model import VoxelDetectionModel + + +@MMDET3D_TASK.register_module(Task.VOXEL_DETECTION.value) +class VoxelDetection(BaseTask): + + def __init__(self, model_cfg: mmcv.Config, deploy_cfg: mmcv.Config, + device: str): + super().__init__(model_cfg, deploy_cfg, device) + + def init_backend_model(self, + model_files: Sequence[str] = None, + **kwargs) -> torch.nn.Module: + """Initialize backend model. + + Args: + model_files (Sequence[str]): Input model files. + + Returns: + nn.Module: An initialized backend model. + """ + from .voxel_detection_model import build_voxel_detection_model + model = build_voxel_detection_model( + model_files, self.model_cfg, self.deploy_cfg, device=self.device) + return model + + def init_pytorch_model(self, + model_checkpoint: Optional[str] = None, + cfg_options: Optional[Dict] = None, + **kwargs) -> torch.nn.Module: + """Initialize torch model. + + Args: + model_checkpoint (str): The checkpoint file of torch model, + defaults to `None`. + cfg_options (dict): Optional config key-pair parameters. + Returns: + nn.Module: An initialized torch model generated by other OpenMMLab + codebases. + """ + from mmdet3d.apis import init_model + device = self.device + model = init_model(self.model_cfg, model_checkpoint, device) + return model.eval() + + def create_input(self, pcd: str, *args) -> Tuple[Dict, torch.Tensor]: + """Create input for detector. + + Args: + pcd (str): Input pcd file path. + + Returns: + tuple: (data, input), meta information for the input pcd + and model input. + """ + data = VoxelDetection.read_pcd_file(pcd, self.model_cfg, self.device) + voxels, num_points, coors = VoxelDetectionModel.voxelize( + self.model_cfg, data['points'][0]) + return data, (voxels, num_points, coors) + + def visualize(self, + model: torch.nn.Module, + image: str, + result: list, + output_file: str, + window_name: str, + show_result: bool = False, + score_thr: float = 0.3): + """Visualize predictions of a model. + + Args: + model (nn.Module): Input model. + image (str): Pcd file to draw predictions on. + result (list): A list of predictions. + output_file (str): Output file to save result. + window_name (str): The name of visualization window. Defaults to + an empty string. + show_result (bool): Whether to show result in windows, defaults + to `False`. + score_thr (float): The score threshold to display the bbox. + Defaults to 0.3. + """ + from mmdet3d.apis import show_result_meshlab + data = VoxelDetection.read_pcd_file(image, self.model_cfg, self.device) + show_result_meshlab( + data, + result, + output_file, + score_thr, + show=show_result, + snapshot=1 - show_result, + task='det') + + @staticmethod + def read_pcd_file(pcd: str, model_cfg: Union[str, mmcv.Config], + device: str) -> Dict: + """Read data from pcd file and run test pipeline. + + Args: + pcd (str): Pcd file path. + model_cfg (str | mmcv.Config): The model config. + device (str): A string specifying device type. + + Returns: + dict: meta information for the input pcd. + """ + if isinstance(pcd, (list, tuple)): + pcd = pcd[0] + model_cfg = load_config(model_cfg)[0] + test_pipeline = Compose(model_cfg.data.test.pipeline) + box_type_3d, box_mode_3d = get_box_type( + model_cfg.data.test.box_type_3d) + data = dict( + pts_filename=pcd, + box_type_3d=box_type_3d, + box_mode_3d=box_mode_3d, + # for ScanNet demo we need axis_align_matrix + ann_info=dict(axis_align_matrix=np.eye(4)), + sweeps=[], + # set timestamp = 0 + timestamp=[0], + img_fields=[], + bbox3d_fields=[], + pts_mask_fields=[], + pts_seg_fields=[], + bbox_fields=[], + mask_fields=[], + seg_fields=[]) + data = test_pipeline(data) + data = collate([data], samples_per_gpu=1) + data['img_metas'] = [ + img_metas.data[0] for img_metas in data['img_metas'] + ] + data['points'] = [point.data[0] for point in data['points']] + if device != 'cpu': + data = scatter(data, [device])[0] + return data + + @staticmethod + def run_inference(model: nn.Module, + model_inputs: Dict[str, torch.Tensor]) -> List: + """Run inference once for a object detection model of mmdet3d. + + Args: + model (nn.Module): Input model. + model_inputs (dict): A dict containing model inputs tensor and + meta info. + + Returns: + list: The predictions of model inference. + """ + result = model( + return_loss=False, + points=model_inputs['points'], + img_metas=model_inputs['img_metas']) + return [result] + + @staticmethod + def evaluate_outputs(model_cfg, + outputs: Sequence, + dataset: Dataset, + metrics: Optional[str] = None, + out: Optional[str] = None, + metric_options: Optional[dict] = None, + format_only: bool = False, + log_file: Optional[str] = None): + if out: + logger = get_root_logger() + logger.info(f'\nwriting results to {out}') + mmcv.dump(outputs, out) + kwargs = {} if metric_options is None else metric_options + if format_only: + dataset.format_results(outputs, **kwargs) + if metrics: + eval_kwargs = model_cfg.get('evaluation', {}).copy() + # hard-code way to remove EvalHook args + for key in [ + 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best', + 'rule' + ]: + eval_kwargs.pop(key, None) + eval_kwargs.pop(key, None) + eval_kwargs.update(dict(metric=metrics, **kwargs)) + dataset.evaluate(outputs, **eval_kwargs) + + def get_model_name(self) -> str: + """Get the model name. + + Return: + str: the name of the model. + """ + raise NotImplementedError + + def get_tensor_from_input(self, input_data: Dict[str, Any], + **kwargs) -> torch.Tensor: + """Get input tensor from input data. + + Args: + input_data (dict): Input data containing meta info and image + tensor. + Returns: + torch.Tensor: An image in `Tensor`. + """ + raise NotImplementedError + + def get_partition_cfg(partition_type: str, **kwargs) -> Dict: + """Get a certain partition config for mmdet. + + Args: + partition_type (str): A string specifying partition type. + + Returns: + dict: A dictionary of partition config. + """ + raise NotImplementedError + + def get_postprocess(self) -> Dict: + """Get the postprocess information for SDK. + + Return: + dict: Composed of the postprocess information. + """ + raise NotImplementedError + + def get_preprocess(self) -> Dict: + """Get the preprocess information for SDK. + + Return: + dict: Composed of the preprocess information. + """ + raise NotImplementedError + + def single_gpu_test(self, + model: nn.Module, + data_loader: DataLoader, + show: bool = False, + out_dir: Optional[str] = None, + **kwargs) -> List: + """Run test with single gpu. + + Args: + model (nn.Module): Input model from nn.Module. + data_loader (DataLoader): PyTorch data loader. + show (bool): Specifying whether to show plotted results. Defaults + to `False`. + out_dir (str): A directory to save results, defaults to `None`. + + Returns: + list: The prediction results. + """ + model.eval() + results = [] + dataset = data_loader.dataset + + prog_bar = mmcv.ProgressBar(len(dataset)) + for i, data in enumerate(data_loader): + with torch.no_grad(): + result = model(data['points'][0].data, + data['img_metas'][0].data, False) + if show: + # Visualize the results of MMDetection3D model + # 'show_results' is MMdetection3D visualization API + if out_dir is None: + model.module.show_result( + data, + result, + out_dir='', + file_name='', + show=show, + snapshot=False, + score_thr=0.3) + else: + model.module.show_result( + data, + result, + out_dir=out_dir, + file_name=f'model_output{i}', + show=show, + snapshot=True, + score_thr=0.3) + results.extend(result) + + batch_size = len(result) + for _ in range(batch_size): + prog_bar.update() + return results diff --git a/mmdeploy/codebase/mmdet3d/deploy/voxel_detection_model.py b/mmdeploy/codebase/mmdet3d/deploy/voxel_detection_model.py new file mode 100644 index 0000000000..c5696ef50a --- /dev/null +++ b/mmdeploy/codebase/mmdet3d/deploy/voxel_detection_model.py @@ -0,0 +1,263 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Dict, List, Sequence, Union + +import mmcv +import torch +from mmcv.utils import Registry +from torch.nn import functional as F + +from mmdeploy.codebase.base import BaseBackendModel +from mmdeploy.core import RewriterContext +from mmdeploy.utils import (Backend, get_backend, get_codebase_config, + get_root_logger, load_config) + + +def __build_backend_voxel_model(cls_name: str, registry: Registry, *args, + **kwargs): + return registry.module_dict[cls_name](*args, **kwargs) + + +__BACKEND_MODEL = mmcv.utils.Registry( + 'backend_voxel_detectors', build_func=__build_backend_voxel_model) + + +@__BACKEND_MODEL.register_module('end2end') +class VoxelDetectionModel(BaseBackendModel): + """End to end model for inference of 3d voxel detection. + + Args: + backend (Backend): The backend enum, specifying backend type. + backend_files (Sequence[str]): Paths to all required backend files + (e.g. '.onnx' for ONNX Runtime, '.param' and '.bin' for ncnn). + device (str): A string specifying device type. + model_cfg (str | mmcv.Config): The model config. + deploy_cfg (str|mmcv.Config): Deployment config file or loaded Config + object. + """ + + def __init__(self, + backend: Backend, + backend_files: Sequence[str], + device: str, + model_cfg: mmcv.Config, + deploy_cfg: Union[str, mmcv.Config] = None): + super().__init__(deploy_cfg=deploy_cfg) + self.deploy_cfg = deploy_cfg + self.model_cfg = model_cfg + self.device = device + self._init_wrapper( + backend=backend, backend_files=backend_files, device=device) + + def _init_wrapper(self, backend: Backend, backend_files: Sequence[str], + device: str): + """Initialize backend wrapper. + + Args: + backend (Backend): The backend enum, specifying backend type. + backend_files (Sequence[str]): Paths to all required backend files + (e.g. '.onnx' for ONNX Runtime, '.param' and '.bin' for ncnn). + device (str): A string specifying device type. + """ + output_names = self.output_names + self.wrapper = BaseBackendModel._build_wrapper( + backend=backend, + backend_files=backend_files, + device=device, + output_names=output_names, + deploy_cfg=self.deploy_cfg) + + def forward(self, + points: Sequence[torch.Tensor], + img_metas: Sequence[dict], + return_loss=False): + """Run forward inference. + + Args: + points (Sequence[torch.Tensor]): A list contains input pcd(s) + in [N, ndim] float tensor. points[:, :3] contain xyz points + and points[:, 3:] contain other information like reflectivity + img_metas (Sequence[dict]): A list of meta info for image(s). + return_loss (Bool): Consistent with the pytorch model. + Default = False. + + Returns: + list: A list contains predictions. + """ + result_list = [] + for i in range(len(img_metas)): + voxels, num_points, coors = VoxelDetectionModel.voxelize( + self.model_cfg, points[i]) + input_dict = { + 'voxels': voxels, + 'num_points': num_points, + 'coors': coors + } + outputs = self.wrapper(input_dict) + result = VoxelDetectionModel.post_process(self.model_cfg, + self.deploy_cfg, outputs, + img_metas[i], + self.device)[0] + result_list.append(result) + return result_list + + def show_result(self, + data: Dict, + result: List, + out_dir: str, + file_name: str, + show=False, + snapshot=False, + **kwargs): + from mmcv.parallel import DataContainer as DC + from mmdet3d.core import show_result + if isinstance(data['points'][0], DC): + points = data['points'][0]._data[0][0].numpy() + elif mmcv.is_list_of(data['points'][0], torch.Tensor): + points = data['points'][0][0] + else: + ValueError(f"Unsupported data type {type(data['points'][0])} " + f'for visualization!') + pred_bboxes = result[0]['boxes_3d'] + pred_labels = result[0]['labels_3d'] + pred_bboxes = pred_bboxes.tensor.cpu().numpy() + show_result( + points, + None, + pred_bboxes, + out_dir, + file_name, + show=show, + snapshot=snapshot, + pred_labels=pred_labels) + + @staticmethod + def voxelize(model_cfg: Union[str, mmcv.Config], points: torch.Tensor): + """convert kitti points(N, >=3) to voxels. + + Args: + model_cfg (str | mmcv.Config): The model config. + points (torch.Tensor): [N, ndim] float tensor. points[:, :3] + contain xyz points and points[:, 3:] contain other information + like reflectivity. + + Returns: + voxels: [M, max_points, ndim] float tensor. only contain points + and returned when max_points != -1. + coordinates: [M, 3] int32 tensor, always returned. + num_points_per_voxel: [M] int32 tensor. Only returned when + max_points != -1. + """ + from mmcv.ops import Voxelization + model_cfg = load_config(model_cfg)[0] + if 'voxel_layer' in model_cfg.model.keys(): + voxel_layer = model_cfg.model['voxel_layer'] + elif 'pts_voxel_layer' in model_cfg.model.keys(): + voxel_layer = model_cfg.model['pts_voxel_layer'] + else: + raise + voxel_layer = Voxelization(**voxel_layer) + voxels, coors, num_points = [], [], [] + for res in points: + res_voxels, res_coors, res_num_points = voxel_layer(res) + voxels.append(res_voxels) + coors.append(res_coors) + num_points.append(res_num_points) + voxels = torch.cat(voxels, dim=0) + num_points = torch.cat(num_points, dim=0) + coors_batch = [] + for i, coor in enumerate(coors): + coor_pad = F.pad(coor, (1, 0), mode='constant', value=i) + coors_batch.append(coor_pad) + coors_batch = torch.cat(coors_batch, dim=0) + return voxels, num_points, coors_batch + + @staticmethod + def post_process(model_cfg: Union[str, mmcv.Config], + deploy_cfg: Union[str, mmcv.Config], + outs: torch.Tensor, + img_metas: Dict, + device: str, + rescale=False): + """model post process. + + Args: + model_cfg (str | mmcv.Config): The model config. + deploy_cfg (str|mmcv.Config): Deployment config file or loaded + Config object. + outs (torch.Tensor): Output of model's head. + img_metas(Dict): Meta info for pcd. + device (str): A string specifying device type. + rescale (list[torch.Tensor]): whether th rescale bbox. + Returns: + list: A list contains predictions, include bboxes, scores, labels. + """ + from mmdet3d.core import bbox3d2result + from mmdet3d.models.builder import build_head + model_cfg = load_config(model_cfg)[0] + deploy_cfg = load_config(deploy_cfg)[0] + if 'bbox_head' in model_cfg.model.keys(): + head_cfg = dict(**model_cfg.model['bbox_head']) + elif 'pts_bbox_head' in model_cfg.model.keys(): + head_cfg = dict(**model_cfg.model['pts_bbox_head']) + else: + raise NotImplementedError('Not supported model.') + head_cfg['train_cfg'] = None + head_cfg['test_cfg'] = model_cfg.model['test_cfg'] + head = build_head(head_cfg) + if device == 'cpu': + logger = get_root_logger() + logger.warning( + 'Don\'t suggest using CPU device. Post process can\'t support.' + ) + if torch.cuda.is_available(): + device = 'cuda' + else: + raise NotImplementedError( + 'Post process don\'t support device=cpu') + cls_scores = [outs['scores'].to(device)] + bbox_preds = [outs['bbox_preds'].to(device)] + dir_scores = [outs['dir_scores'].to(device)] + with RewriterContext( + cfg=deploy_cfg, + backend=deploy_cfg.backend_config.type, + opset=deploy_cfg.onnx_config.opset_version): + bbox_list = head.get_bboxes( + cls_scores, bbox_preds, dir_scores, img_metas, rescale=False) + bbox_results = [ + bbox3d2result(bboxes, scores, labels) + for bboxes, scores, labels in bbox_list + ] + return bbox_results + + +def build_voxel_detection_model(model_files: Sequence[str], + model_cfg: Union[str, mmcv.Config], + deploy_cfg: Union[str, + mmcv.Config], device: str): + """Build 3d voxel object detection model for different backends. + + Args: + model_files (Sequence[str]): Input model file(s). + model_cfg (str | mmcv.Config): Input model config file or Config + object. + deploy_cfg (str | mmcv.Config): Input deployment config file or + Config object. + device (str): Device to input model + + Returns: + VoxelDetectionModel: Detector for a configured backend. + """ + deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg) + + backend = get_backend(deploy_cfg) + model_type = get_codebase_config(deploy_cfg).get('model_type', 'end2end') + + backend_detector = __BACKEND_MODEL.build( + model_type, + backend=backend, + backend_files=model_files, + device=device, + model_cfg=model_cfg, + deploy_cfg=deploy_cfg) + + return backend_detector diff --git a/mmdeploy/codebase/mmdet3d/models/__init__.py b/mmdeploy/codebase/mmdet3d/models/__init__.py new file mode 100644 index 0000000000..8de0c41b3c --- /dev/null +++ b/mmdeploy/codebase/mmdet3d/models/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .base import * # noqa: F401,F403 +from .centerpoint import * # noqa: F401,F403 +from .mvx_two_stage import * # noqa: F401,F403 +from .pillar_encode import * # noqa: F401,F403 +from .pillar_scatter import * # noqa: F401,F403 +from .voxelnet import * # noqa: F401,F403 diff --git a/mmdeploy/codebase/mmdet3d/models/base.py b/mmdeploy/codebase/mmdet3d/models/base.py new file mode 100644 index 0000000000..e8d7000e47 --- /dev/null +++ b/mmdeploy/codebase/mmdet3d/models/base.py @@ -0,0 +1,23 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmdeploy.core import FUNCTION_REWRITER + + +@FUNCTION_REWRITER.register_rewriter( + 'mmdet3d.models.detectors.base.Base3DDetector.forward_test') +def base3ddetector__forward_test(ctx, + self, + voxels, + num_points, + coors, + img_metas=None, + img=None, + rescale=False): + """Rewrite this function to run simple_test directly.""" + return self.simple_test(voxels, num_points, coors, img_metas, img) + + +@FUNCTION_REWRITER.register_rewriter( + 'mmdet3d.models.detectors.base.Base3DDetector.forward') +def base3ddetector__forward(ctx, self, *args): + """Rewrite this function to run the model directly.""" + return self.forward_test(*args) diff --git a/mmdeploy/codebase/mmdet3d/models/centerpoint.py b/mmdeploy/codebase/mmdet3d/models/centerpoint.py new file mode 100644 index 0000000000..6c9f5fc8c4 --- /dev/null +++ b/mmdeploy/codebase/mmdet3d/models/centerpoint.py @@ -0,0 +1,189 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from mmdet3d.core import circle_nms + +from mmdeploy.core import FUNCTION_REWRITER + + +@FUNCTION_REWRITER.register_rewriter( + 'mmdet3d.models.detectors.centerpoint.CenterPoint.extract_pts_feat') +def centerpoint__extract_pts_feat(ctx, self, voxels, num_points, coors, + img_feats, img_metas): + """Extract features from points. Rewrite this func to remove voxelize op. + + Args: + voxels (torch.Tensor): Point features or raw points in shape (N, M, C). + num_points (torch.Tensor): Number of points in each voxel. + coors (torch.Tensor): Coordinates of each voxel. + img_feats (list[torch.Tensor], optional): Image features used for + multi-modality fusion. Defaults to None. + img_metas (list[dict]): Meta information of samples. + + Returns: + torch.Tensor: Points feature. + """ + if not self.with_pts_bbox: + return None + + voxel_features = self.pts_voxel_encoder(voxels, num_points, coors) + batch_size = coors[-1, 0] + 1 + x = self.pts_middle_encoder(voxel_features, coors, batch_size) + x = self.pts_backbone(x) + if self.with_pts_neck: + x = self.pts_neck(x) + return x + + +@FUNCTION_REWRITER.register_rewriter( + 'mmdet3d.models.detectors.centerpoint.CenterPoint.simple_test_pts') +def centerpoint__simple_test_pts(ctx, self, x, img_metas, rescale=False): + """Rewrite this func to format model outputs. + + Args: + x (torch.Tensor): Input points feature. + img_metas (list[dict]): Meta information of samples. + rescale (bool): Whether need rescale. + + Returns: + List: Result of model. + """ + outs = self.pts_bbox_head(x) + bbox_preds, scores, dir_scores = [], [], [] + for task_res in outs: + bbox_preds.append(task_res[0]['reg']) + bbox_preds.append(task_res[0]['height']) + bbox_preds.append(task_res[0]['dim']) + if 'vel' in task_res[0].keys(): + bbox_preds.append(task_res[0]['vel']) + scores.append(task_res[0]['heatmap']) + dir_scores.append(task_res[0]['rot']) + bbox_preds = torch.cat(bbox_preds, dim=1) + scores = torch.cat(scores, dim=1) + dir_scores = torch.cat(dir_scores, dim=1) + return scores, bbox_preds, dir_scores + + +@FUNCTION_REWRITER.register_rewriter( + 'mmdet3d.models.dense_heads.centerpoint_head.CenterHead.get_bboxes') +def centerpoint__get_bbox(ctx, + self, + cls_scores, + bbox_preds, + dir_scores, + img_metas, + img=None, + rescale=False): + """Rewrite this func to format func inputs. + + Args + cls_scores (list[torch.Tensor]): Classification predicts results. + bbox_preds (list[torch.Tensor]): Bbox predicts results. + dir_scores (list[torch.Tensor]): Dir predicts results. + img_metas (list[dict]): Point cloud and image's meta info. + img (torch.Tensor): Input image. + rescale (Bool): Whether need rescale. + + Returns: + list[dict]: Decoded bbox, scores and labels after nms. + """ + rets = [] + scores_range = [0] + bbox_range = [0] + dir_range = [0] + self.test_cfg = self.test_cfg['pts'] + for i, task_head in enumerate(self.task_heads): + scores_range.append(scores_range[i] + self.num_classes[i]) + bbox_range.append(bbox_range[i] + 8) + dir_range.append(dir_range[i] + 2) + for task_id in range(len(self.num_classes)): + num_class_with_bg = self.num_classes[task_id] + + batch_heatmap = cls_scores[ + 0][:, scores_range[task_id]:scores_range[task_id + 1], + ...].sigmoid() + + batch_reg = bbox_preds[0][:, + bbox_range[task_id]:bbox_range[task_id] + 2, + ...] + batch_hei = bbox_preds[0][:, bbox_range[task_id] + + 2:bbox_range[task_id] + 3, ...] + + if self.norm_bbox: + batch_dim = torch.exp(bbox_preds[0][:, bbox_range[task_id] + + 3:bbox_range[task_id] + 6, + ...]) + else: + batch_dim = bbox_preds[0][:, bbox_range[task_id] + + 3:bbox_range[task_id] + 6, ...] + + batch_vel = bbox_preds[0][:, bbox_range[task_id] + + 6:bbox_range[task_id + 1], ...] + + batch_rots = dir_scores[0][:, + dir_range[task_id]:dir_range[task_id + 1], + ...][:, 0].unsqueeze(1) + batch_rotc = dir_scores[0][:, + dir_range[task_id]:dir_range[task_id + 1], + ...][:, 1].unsqueeze(1) + + temp = self.bbox_coder.decode( + batch_heatmap, + batch_rots, + batch_rotc, + batch_hei, + batch_dim, + batch_vel, + reg=batch_reg, + task_id=task_id) + assert self.test_cfg['nms_type'] in ['circle', 'rotate'] + batch_reg_preds = [box['bboxes'] for box in temp] + batch_cls_preds = [box['scores'] for box in temp] + batch_cls_labels = [box['labels'] for box in temp] + if self.test_cfg['nms_type'] == 'circle': + + boxes3d = temp[0]['bboxes'] + scores = temp[0]['scores'] + labels = temp[0]['labels'] + centers = boxes3d[:, [0, 1]] + boxes = torch.cat([centers, scores.view(-1, 1)], dim=1) + keep = torch.tensor( + circle_nms( + boxes.detach().cpu().numpy(), + self.test_cfg['min_radius'][task_id], + post_max_size=self.test_cfg['post_max_size']), + dtype=torch.long, + device=boxes.device) + + boxes3d = boxes3d[keep] + scores = scores[keep] + labels = labels[keep] + ret = dict(bboxes=boxes3d, scores=scores, labels=labels) + ret_task = [ret] + rets.append(ret_task) + else: + rets.append( + self.get_task_detections(num_class_with_bg, batch_cls_preds, + batch_reg_preds, batch_cls_labels, + img_metas)) + + # Merge branches results + num_samples = len(rets[0]) + + ret_list = [] + for i in range(num_samples): + for k in rets[0][i].keys(): + if k == 'bboxes': + bboxes = torch.cat([ret[i][k] for ret in rets]) + bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 5] * 0.5 + bboxes = img_metas[i]['box_type_3d'](bboxes, + self.bbox_coder.code_size) + elif k == 'scores': + scores = torch.cat([ret[i][k] for ret in rets]) + elif k == 'labels': + flag = 0 + for j, num_class in enumerate(self.num_classes): + rets[j][i][k] += flag + flag += num_class + labels = torch.cat([ret[i][k].int() for ret in rets]) + ret_list.append([bboxes, scores, labels]) + return ret_list diff --git a/mmdeploy/codebase/mmdet3d/models/mvx_two_stage.py b/mmdeploy/codebase/mmdet3d/models/mvx_two_stage.py new file mode 100644 index 0000000000..3018a2f732 --- /dev/null +++ b/mmdeploy/codebase/mmdet3d/models/mvx_two_stage.py @@ -0,0 +1,54 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmdeploy.core import FUNCTION_REWRITER + + +@FUNCTION_REWRITER.register_rewriter( + 'mmdet3d.models.detectors.mvx_two_stage.MVXTwoStageDetector.simple_test') +def mvxtwostagedetector__simple_test(ctx, + self, + voxels, + num_points, + coors, + img_metas, + img=None, + rescale=False): + """Rewrite this func to remove voxelize op. + + Args: + voxels (torch.Tensor): Point features or raw points in shape (N, M, C). + num_points (torch.Tensor): Number of points in each voxel. + coors (torch.Tensor): Coordinates of each voxel. + img_metas (list[dict]): Meta information of samples. + img (torch.Tensor): Input image. + rescale (Bool): Whether need rescale. + + Returns: + list[dict]: Decoded bbox, scores and labels after nms. + """ + _, pts_feats = self.extract_feat( + voxels, num_points, coors, img=img, img_metas=img_metas) + if pts_feats and self.with_pts_bbox: + bbox_pts = self.simple_test_pts(pts_feats, img_metas, rescale=rescale) + return bbox_pts + + +@FUNCTION_REWRITER.register_rewriter( + 'mmdet3d.models.detectors.mvx_two_stage.MVXTwoStageDetector.extract_feat') +def mvxtwostagedetector__extract_feat(ctx, self, voxels, num_points, coors, + img, img_metas): + """Rewrite this func to remove voxelize op. + + Args: + voxels (torch.Tensor): Point features or raw points in shape (N, M, C). + num_points (torch.Tensor): Number of points in each voxel. + coors (torch.Tensor): Coordinates of each voxel. + img (torch.Tensor): Input image. + img_metas (list[dict]): Meta information of samples. + + Returns: + tuple(torch.Tensor) : image feature and points feather. + """ + img_feats = self.extract_img_feat(img, img_metas) + pts_feats = self.extract_pts_feat(voxels, num_points, coors, img_feats, + img_metas) + return (img_feats, pts_feats) diff --git a/mmdeploy/codebase/mmdet3d/models/pillar_encode.py b/mmdeploy/codebase/mmdet3d/models/pillar_encode.py new file mode 100644 index 0000000000..71a30647b7 --- /dev/null +++ b/mmdeploy/codebase/mmdet3d/models/pillar_encode.py @@ -0,0 +1,65 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from mmdet3d.models.voxel_encoders.utils import get_paddings_indicator + +from mmdeploy.core import FUNCTION_REWRITER + + +@FUNCTION_REWRITER.register_rewriter( + 'mmdet3d.models.voxel_encoders.pillar_encoder.PillarFeatureNet.forward') +def pillar_encoder__forward(ctx, self, features, num_points, coors): + """Rewrite this func to optimize node. Modify the code at + _with_voxel_center and use slice instead of the original operation. + + Args: + features (torch.Tensor): Point features or raw points in shape + (N, M, C). + num_points (torch.Tensor): Number of points in each pillar. + coors (torch.Tensor): Coordinates of each voxel. + + Returns: + torch.Tensor: Features of pillars. + """ + features_ls = [features] + # Find distance of x, y, and z from cluster center + if self._with_cluster_center: + points_mean = features[:, :, :3].sum( + dim=1, keepdim=True) / num_points.type_as(features).view(-1, 1, 1) + f_cluster = features[:, :, :3] - points_mean + features_ls.append(f_cluster) + + # Find distance of x, y, and z from pillar center + device = features.device + if self._with_voxel_center: + if not self.legacy: + f_center = features[..., :3] - ( + coors * torch.tensor([1, self.vz, self.vy, self.vx]).to(device) + + + torch.tensor([1, self.z_offset, self.y_offset, self.x_offset + ]).to(device)).unsqueeze(1).flip(2)[..., :3] + else: + f_center = features[..., :3] - ( + coors * torch.tensor([1, self.vz, self.vy, self.vx]).to(device) + + + torch.tensor([1, self.z_offset, self.y_offset, self.x_offset + ]).to(device)).unsqueeze(1).flip(2)[..., :3] + features_ls[0] = torch.cat((f_center, features[..., 3:]), dim=-1) + features_ls.append(f_center) + + if self._with_distance: + points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True) + features_ls.append(points_dist) + + # Combine together feature decorations + features = torch.cat(features_ls, dim=-1) + # The feature decorations were calculated without regard to whether + # pillar was empty. Need to ensure that + # empty pillars remain set to zeros. + voxel_count = features.shape[1] + mask = get_paddings_indicator(num_points, voxel_count, axis=0) + mask = torch.unsqueeze(mask, -1).type_as(features) + features *= mask + for pfn in self.pfn_layers: + features = pfn(features, num_points) + + return features.squeeze(1) diff --git a/mmdeploy/codebase/mmdet3d/models/pillar_scatter.py b/mmdeploy/codebase/mmdet3d/models/pillar_scatter.py new file mode 100644 index 0000000000..7056e3d481 --- /dev/null +++ b/mmdeploy/codebase/mmdet3d/models/pillar_scatter.py @@ -0,0 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmdeploy.core import FUNCTION_REWRITER + + +@FUNCTION_REWRITER.register_rewriter( + 'mmdet3d.models.middle_encoders.pillar_scatter.' + 'PointPillarsScatter.forward_batch', ) +def pointpillarsscatter__forward(ctx, + self, + voxel_features, + coors, + batch_size=1): + """Scatter features of single sample. + + Args: + voxel_features (torch.Tensor): Voxel features from voxel encoder layer. + coors (torch.Tensor): Coordinates of each voxel. + The first column indicates the sample ID. + batch_size (int): Number of samples in the current batch. + """ + canvas = torch.zeros( + self.in_channels, + self.nx * self.ny, + dtype=voxel_features.dtype, + device=voxel_features.device) + + indices = coors[:, 2] * self.nx + coors[:, 3] + indices = indices.long() + voxels = voxel_features.t() + # Now scatter the blob back to the canvas. + canvas[:, indices] = voxels + # Undo the column stacking to final 4-dim tensor + canvas = canvas.view(1, self.in_channels, self.ny, self.nx) + return canvas diff --git a/mmdeploy/codebase/mmdet3d/models/voxelnet.py b/mmdeploy/codebase/mmdet3d/models/voxelnet.py new file mode 100644 index 0000000000..e5d285bb2f --- /dev/null +++ b/mmdeploy/codebase/mmdet3d/models/voxelnet.py @@ -0,0 +1,58 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmdeploy.core import FUNCTION_REWRITER + + +@FUNCTION_REWRITER.register_rewriter( + 'mmdet3d.models.detectors.voxelnet.VoxelNet.simple_test') +def voxelnet__simple_test(ctx, + self, + voxels, + num_points, + coors, + img_metas=None, + imgs=None, + rescale=False): + """Test function without augmentaiton. Rewrite this func to remove model + post process. + + Args: + voxels (torch.Tensor): Point features or raw points in shape (N, M, C). + num_points (torch.Tensor): Number of points in each pillar. + coors (torch.Tensor): Coordinates of each voxel. + input_metas (list[dict]): Contain pcd meta info. + + Returns: + List: Result of model. + """ + x = self.extract_feat(voxels, num_points, coors, img_metas) + bbox_preds, scores, dir_scores = self.bbox_head(x) + return bbox_preds, scores, dir_scores + + +@FUNCTION_REWRITER.register_rewriter( + 'mmdet3d.models.detectors.voxelnet.VoxelNet.extract_feat') +def voxelnet__extract_feat(ctx, + self, + voxels, + num_points, + coors, + img_metas=None): + """Extract features from points. Rewrite this func to remove voxelize op. + + Args: + voxels (torch.Tensor): Point features or raw points in shape (N, M, C). + num_points (torch.Tensor): Number of points in each pillar. + coors (torch.Tensor): Coordinates of each voxel. + input_metas (list[dict]): Contain pcd meta info. + + Returns: + torch.Tensor: Features from points. + """ + voxel_features = self.voxel_encoder(voxels, num_points, coors) + batch_size = coors[-1, 0] + 1 # refactor + assert batch_size == 1 + x = self.middle_encoder(voxel_features, coors, batch_size) + x = self.backbone(x) + if self.with_neck: + x = self.neck(x) + return x diff --git a/mmdeploy/codebase/mmedit/deploy/super_resolution_model.py b/mmdeploy/codebase/mmedit/deploy/super_resolution_model.py index e00ea7ef8c..ade5d0beea 100644 --- a/mmdeploy/codebase/mmedit/deploy/super_resolution_model.py +++ b/mmdeploy/codebase/mmedit/deploy/super_resolution_model.py @@ -54,6 +54,7 @@ def _init_wrapper(self, backend: Backend, backend_files: Sequence[str], backend=backend, backend_files=backend_files, device=device, + input_names=[self.input_name], output_names=output_names, deploy_cfg=self.deploy_cfg) diff --git a/mmdeploy/codebase/mmocr/deploy/text_detection_model.py b/mmdeploy/codebase/mmocr/deploy/text_detection_model.py index 31861b66e4..d6917161d9 100644 --- a/mmdeploy/codebase/mmocr/deploy/text_detection_model.py +++ b/mmdeploy/codebase/mmocr/deploy/text_detection_model.py @@ -67,6 +67,7 @@ def _init_wrapper(self, backend: Backend, backend_files: Sequence[str], backend=backend, backend_files=backend_files, device=device, + input_names=[self.input_name], output_names=output_names, deploy_cfg=self.deploy_cfg) diff --git a/mmdeploy/codebase/mmocr/deploy/text_recognition_model.py b/mmdeploy/codebase/mmocr/deploy/text_recognition_model.py index 7f07dbba63..de9d18154f 100644 --- a/mmdeploy/codebase/mmocr/deploy/text_recognition_model.py +++ b/mmdeploy/codebase/mmocr/deploy/text_recognition_model.py @@ -72,6 +72,7 @@ def _init_wrapper(self, backend: Backend, backend_files: Sequence[str], backend=backend, backend_files=backend_files, device=device, + input_names=[self.input_name], output_names=output_names, deploy_cfg=self.deploy_cfg) diff --git a/mmdeploy/codebase/mmpose/deploy/pose_detection.py b/mmdeploy/codebase/mmpose/deploy/pose_detection.py index 52bb1632bc..0405523400 100644 --- a/mmdeploy/codebase/mmpose/deploy/pose_detection.py +++ b/mmdeploy/codebase/mmpose/deploy/pose_detection.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. +import copy import logging import os from typing import Any, Dict, Optional, Sequence, Tuple, Union @@ -12,7 +13,60 @@ from mmdeploy.codebase.base import BaseTask from mmdeploy.codebase.mmpose.deploy.mmpose import MMPOSE_TASK -from mmdeploy.utils import Task +from mmdeploy.utils import Task, get_input_shape + + +def process_model_config( + model_cfg: mmcv.Config, + imgs: Union[Sequence[str], Sequence[np.ndarray]], + input_shape: Optional[Sequence[int]] = None, +): + """Process the model config. + + Args: + model_cfg (mmcv.Config): The model config. + imgs (Sequence[str] | Sequence[np.ndarray]): Input image(s), accepted + data type are List[str], List[np.ndarray]. + input_shape (list[int]): A list of two integer in (width, height) + format specifying input shape. Default: None. + + Returns: + mmcv.Config: the model config after processing. + """ + cfg = copy.deepcopy(model_cfg) + test_pipeline = cfg.data.test.pipeline + sdk_pipeline = [] + color_type = 'color' + channel_order = 'rgb' + + idx = 0 + while idx < len(test_pipeline): + trans = test_pipeline[idx] + if trans.type == 'ToTensor': + assert idx + 1 < len(test_pipeline) and \ + test_pipeline[idx + 1].type == 'NormalizeTensor' + trans = test_pipeline[idx + 1] + trans.type = 'Normalize' + trans['to_rgb'] = (channel_order == 'rgb') + trans['mean'] = [x * 255 for x in trans['mean']] + trans['std'] = [x * 255 for x in trans['std']] + sdk_pipeline.append(trans) + sdk_pipeline.append({'type': 'ImageToTensor', 'keys': ['img']}) + idx = idx + 2 + continue + + if trans.type == 'LoadImageFromFile': + if 'color_type' in trans: + color_type = trans['color_type'] # NOQA + if 'channel_order' in trans: + channel_order = trans['channel_order'] + if trans.type == 'TopDownAffine': + trans['image_size'] = input_shape + + sdk_pipeline.append(trans) + idx = idx + 1 + cfg.data.test.pipeline = sdk_pipeline + return cfg @MMPOSE_TASK.register_module(Task.POSE_DETECTION.value) @@ -130,7 +184,7 @@ def create_input(self, 'rotation': 0, 'ann_info': { - 'image_size': image_size, + 'image_size': np.array(image_size), 'num_joints': cfg.data_cfg['num_joints'], 'flip_pairs': flip_pairs } @@ -198,6 +252,7 @@ def evaluate_outputs(model_cfg: mmcv.Config, out: Optional[str] = None, metric_options: Optional[dict] = None, format_only: bool = False, + log_file: Optional[str] = None, **kwargs): """Perform post-processing to predictions of model. @@ -215,10 +270,15 @@ def evaluate_outputs(model_cfg: mmcv.Config, evaluation. It is useful when you want to format the result to a specific format and submit it to the test server. Defaults to `False`. + log_file (str | None): The file to write the evaluation results. + Defaults to `None` and the results will only print on stdout. """ + from mmcv.utils import get_logger + logger = get_logger('test', log_file=log_file, log_level=logging.INFO) + res_folder = '.' if out: - logging.info(f'\nwriting results to {out}') + logger.info(f'\nwriting results to {out}') mmcv.dump(outputs, out) res_folder, _ = os.path.split(out) os.makedirs(res_folder, exist_ok=True) @@ -229,7 +289,7 @@ def evaluate_outputs(model_cfg: mmcv.Config, results = dataset.evaluate(outputs, res_folder, **eval_config) for k, v in sorted(results.items()): - print(f'{k}: {v}') + logger.info(f'{k}: {v:.4f}') def get_model_name(self) -> str: """Get the model name. @@ -251,12 +311,24 @@ def get_partition_cfg(partition_type: str, **kwargs) -> Dict: raise NotImplementedError('Not supported yet.') def get_preprocess(self) -> Dict: - """Get the preprocess information for SDK.""" - raise NotImplementedError('Not supported yet.') + """Get the preprocess information for SDK. + + Return: + dict: Composed of the preprocess information. + """ + input_shape = get_input_shape(self.deploy_cfg) + model_cfg = process_model_config(self.model_cfg, [''], input_shape) + preprocess = model_cfg.data.test.pipeline + return preprocess def get_postprocess(self) -> Dict: """Get the postprocess information for SDK.""" - raise NotImplementedError('Not supported yet.') + postprocess = {'type': 'UNKNOWN'} + if self.model_cfg.model.type == 'TopDown': + postprocess[ + 'type'] = self.model_cfg.model.keypoint_head.type + 'Decode' + postprocess.update(self.model_cfg.model.test_cfg) + return postprocess @staticmethod def get_tensor_from_input(input_data: Dict[str, Any], diff --git a/mmdeploy/codebase/mmpose/deploy/pose_detection_model.py b/mmdeploy/codebase/mmpose/deploy/pose_detection_model.py index e54a2f9494..1844c5cc10 100644 --- a/mmdeploy/codebase/mmpose/deploy/pose_detection_model.py +++ b/mmdeploy/codebase/mmpose/deploy/pose_detection_model.py @@ -4,11 +4,22 @@ import mmcv import numpy as np import torch +from mmcv.utils import Registry from mmdeploy.codebase.base import BaseBackendModel -from mmdeploy.utils import Backend, get_backend, load_config +from mmdeploy.utils import (Backend, get_backend, get_codebase_config, + load_config) +def __build_backend_model(cls_name: str, registry: Registry, *args, **kwargs): + return registry.module_dict[cls_name](*args, **kwargs) + + +__BACKEND_MODEL = mmcv.utils.Registry( + 'backend_pose_detectors', build_func=__build_backend_model) + + +@__BACKEND_MODEL.register_module('end2end') class End2EndModel(BaseBackendModel): """End to end model for inference of pose detection. @@ -31,15 +42,14 @@ def __init__(self, model_cfg: Union[str, mmcv.Config] = None, **kwargs): super(End2EndModel, self).__init__(deploy_cfg=deploy_cfg) - from mmpose.models.heads.topdown_heatmap_base_head import \ - TopdownHeatmapBaseHead + from mmpose.models import builder self.deploy_cfg = deploy_cfg self.model_cfg = model_cfg self._init_wrapper( backend=backend, backend_files=backend_files, device=device) # create base_head for decoding heatmap - base_head = TopdownHeatmapBaseHead() + base_head = builder.build_head(model_cfg.model.keypoint_head) base_head.test_cfg = model_cfg.model.test_cfg self.base_head = base_head @@ -57,7 +67,9 @@ def _init_wrapper(self, backend, backend_files, device): backend=backend, backend_files=backend_files, device=device, - output_names=output_names) + input_names=[self.input_name], + output_names=output_names, + deploy_cfg=self.deploy_cfg) def forward(self, img: torch.Tensor, img_metas: Sequence[Sequence[dict]], *args, **kwargs): @@ -73,10 +85,12 @@ def forward(self, img: torch.Tensor, img_metas: Sequence[Sequence[dict]], Returns: list: A list contains predictions. """ + batch_size, _, img_height, img_width = img.shape input_img = img.contiguous() outputs = self.forward_test(input_img, img_metas, *args, **kwargs) heatmaps = outputs[0] - key_points = self.base_head.decode(img_metas, heatmaps) + key_points = self.base_head.decode( + img_metas, heatmaps, img_size=[img_width, img_height]) return key_points def forward_test(self, imgs: torch.Tensor, *args, **kwargs) -> \ @@ -136,6 +150,80 @@ def show_result(self, win_name=win_name) +@__BACKEND_MODEL.register_module('sdk') +class SDKEnd2EndModel(End2EndModel): + """SDK inference class, converts SDK output to mmcls format.""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def _cs2xyxy(self, + _center: np.ndarray, + _scale: np.ndarray, + padding: float = 1.25): + """This encodes (center, scale) to fake bbox(x,y,x,y) The dataloader in + mmpose convert the bbox of image to (center, scale) and use these + information in the pre/post process of model. Some setting of + dataloader will not collect bbox key. While in practice, we receive + image and bbox as input. Therefore this method try to convert the + (center, scale) back to bbox. It can not restore the real box with just + (center, scale) information, but sdk can handle the fake bbox normally. + + Args: + _center: (np.ndarray[float32](2,)) Center of the bbox (x, y) + _scale: (np.ndarray[float32](2,)) Scale of the bbox w & h + + Returns: + - np.ndarray[float32](4,): fake box if keypoint, the process in + topdown_affine will calculate original center, scale. + """ + scale = _scale.copy() + scale = scale / padding * 200 + center = _center.copy() + # fake box + box = np.array([center - 0.5 * scale, + center + 0.5 * scale - 1]).flatten() + return box + + def forward(self, img: List[torch.Tensor], *args, **kwargs) -> list: + """Run forward inference. + + Args: + img (List[torch.Tensor]): A list contains input image(s) + in [N x C x H x W] format. + *args: Other arguments. + **kwargs: Other key-pair arguments. + + Returns: + list: A list contains predictions. + """ + image_paths = [] + boxes = np.zeros(shape=(img.shape[0], 6)) + bbox_ids = [] + sdk_boxes = [] + for i, img_meta in enumerate(kwargs['img_metas']): + center = img_meta['center'] + scale = img_meta['scale'] + boxes[i, :2] = center + boxes[i, 2:4] = scale + boxes[i, 4] = np.prod(scale * 200.0) + boxes[i, 5] = img_meta[ + 'bbox_score'] if 'bbox_score' in img_meta else 1.0 + sdk_boxes.append(self._cs2xyxy(center, scale)) + image_paths.append(img_meta['image_file']) + bbox_ids.append(img_meta['bbox_id']) + + pred = self.wrapper.handle( + [img[0].contiguous().detach().cpu().numpy()], sdk_boxes)[0] + + result = dict( + preds=pred, + boxes=boxes, + image_paths=image_paths, + bbox_ids=bbox_ids) + return result + + def build_pose_detection_model(model_files: Sequence[str], model_cfg: Union[str, mmcv.Config], deploy_cfg: Union[str, mmcv.Config], @@ -157,12 +245,14 @@ def build_pose_detection_model(model_files: Sequence[str], deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg) backend = get_backend(deploy_cfg) - backend_pose_model = End2EndModel( - backend, - model_files, - device, - deploy_cfg=deploy_cfg, + model_type = get_codebase_config(deploy_cfg).get('model_type', 'end2end') + + backend_pose_model = __BACKEND_MODEL.build( + model_type, + backend=backend, + backend_files=model_files, + device=device, model_cfg=model_cfg, - **kwargs) + deploy_cfg=deploy_cfg) return backend_pose_model diff --git a/mmdeploy/codebase/mmpose/models/heads/__init__.py b/mmdeploy/codebase/mmpose/models/heads/__init__.py index f462d37c75..f48ba2e343 100644 --- a/mmdeploy/codebase/mmpose/models/heads/__init__.py +++ b/mmdeploy/codebase/mmpose/models/heads/__init__.py @@ -1,4 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. +from .deeppose_regression_head import deeppose_regression_head__inference_model from .topdown_heatmap_multi_stage_head import \ topdown_heatmap_msmu_head__inference_model from .topdown_heatmap_simple_head import \ @@ -6,5 +7,6 @@ __all__ = [ 'topdown_heatmap_simple_head__inference_model', - 'topdown_heatmap_msmu_head__inference_model' + 'topdown_heatmap_msmu_head__inference_model', + 'deeppose_regression_head__inference_model' ] diff --git a/mmdeploy/codebase/mmpose/models/heads/deeppose_regression_head.py b/mmdeploy/codebase/mmpose/models/heads/deeppose_regression_head.py new file mode 100644 index 0000000000..c484fa05da --- /dev/null +++ b/mmdeploy/codebase/mmpose/models/heads/deeppose_regression_head.py @@ -0,0 +1,24 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +from mmdeploy.core import FUNCTION_REWRITER + + +@FUNCTION_REWRITER.register_rewriter( + 'mmpose.models.heads.DeepposeRegressionHead.inference_model') +def deeppose_regression_head__inference_model(ctx, self, x, flip_pairs=None): + """Rewrite `forward_test` of TopDown for default backend. + + Rewrite this function to run forward directly. And we don't need to + transform result to np.ndarray. + + Args: + x (torch.Tensor[N,K,H,W]): Input features. + flip_pairs (None | list[tuple]): + Pairs of keypoints which are mirrored. + + Returns: + output_heatmap (torch.Tensor): Output heatmaps. + """ + assert flip_pairs is None + output = self.forward(x) + return output diff --git a/mmdeploy/codebase/mmpose/models/heads/topdown_heatmap_multi_stage_head.py b/mmdeploy/codebase/mmpose/models/heads/topdown_heatmap_multi_stage_head.py index 5bb1014a43..b00d4af460 100644 --- a/mmdeploy/codebase/mmpose/models/heads/topdown_heatmap_multi_stage_head.py +++ b/mmdeploy/codebase/mmpose/models/heads/topdown_heatmap_multi_stage_head.py @@ -24,3 +24,29 @@ def topdown_heatmap_msmu_head__inference_model(ctx, self, x, flip_pairs=None): assert isinstance(output, list) output = output[-1] return output + + +@FUNCTION_REWRITER.register_rewriter( + 'mmpose.models.heads.TopdownHeatmapMultiStageHead.inference_model') +def topdown_heatmap_multi_stage_head__inference_model(ctx, + self, + x, + flip_pairs=None): + """Rewrite ``inference_model`` for default backend. + + Rewrite this function to run forward directly. And we don't need to + transform result to np.ndarray. + + Args: + x (list[torch.Tensor[N,K,H,W]]): Input features. + flip_pairs (None | list[tuple]): + Pairs of keypoints which are mirrored. + + Returns: + output_heatmap (torch.Tensor): Output heatmaps. + """ + assert flip_pairs is None + output = self.forward(x) + assert isinstance(output, list) + output = output[-1] + return output diff --git a/mmdeploy/codebase/mmseg/deploy/segmentation_model.py b/mmdeploy/codebase/mmseg/deploy/segmentation_model.py index 46e0789031..a57cb9a70b 100644 --- a/mmdeploy/codebase/mmseg/deploy/segmentation_model.py +++ b/mmdeploy/codebase/mmseg/deploy/segmentation_model.py @@ -59,6 +59,7 @@ def _init_wrapper(self, backend, backend_files, device): backend=backend, backend_files=backend_files, device=device, + input_names=[self.input_name], output_names=output_names, deploy_cfg=self.deploy_cfg) diff --git a/mmdeploy/codebase/mmseg/models/__init__.py b/mmdeploy/codebase/mmseg/models/__init__.py index 77b260b1c3..f8c63589a9 100644 --- a/mmdeploy/codebase/mmseg/models/__init__.py +++ b/mmdeploy/codebase/mmseg/models/__init__.py @@ -1,3 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. from .decode_heads import * # noqa: F401,F403 from .segmentors import * # noqa: F401,F403 +from .utils import * # noqa: F401,F403 diff --git a/mmdeploy/codebase/mmseg/models/decode_heads/__init__.py b/mmdeploy/codebase/mmseg/models/decode_heads/__init__.py index 5d505fa8bc..e893f20460 100644 --- a/mmdeploy/codebase/mmseg/models/decode_heads/__init__.py +++ b/mmdeploy/codebase/mmseg/models/decode_heads/__init__.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. from .aspp_head import aspp_head__forward +from .ema_head import ema_module__forward from .psp_head import ppm__forward -__all__ = ['aspp_head__forward', 'ppm__forward'] +__all__ = ['aspp_head__forward', 'ppm__forward', 'ema_module__forward'] diff --git a/mmdeploy/codebase/mmseg/models/decode_heads/ema_head.py b/mmdeploy/codebase/mmseg/models/decode_heads/ema_head.py new file mode 100644 index 0000000000..5d839691b7 --- /dev/null +++ b/mmdeploy/codebase/mmseg/models/decode_heads/ema_head.py @@ -0,0 +1,45 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn.functional as F + +from mmdeploy.core import FUNCTION_REWRITER + + +@FUNCTION_REWRITER.register_rewriter( + func_name='mmseg.models.decode_heads.ema_head.EMAModule.forward') +def ema_module__forward(ctx, self, feats): + """Rewrite `forward` for default backend. + + Replace torch.einsum with other operations. + + Args: + ctx (ContextCaller): The context with additional information. + self: The instance of the original class. + feats (Tensor): Input feature. + + Returns: + torch.Tensor: Output feature. + """ + batch_size, channels, height, width = feats.size() + # [batch_size, channels, height*width] + feats = feats.view(batch_size, channels, height * width) + # [batch_size, channels, num_bases] + bases = self.bases.repeat(batch_size, 1, 1) + + with torch.no_grad(): + for i in range(self.num_stages): + # [batch_size, height*width, num_bases] + attention = torch.bmm(feats.transpose(1, 2), bases) + # attention = torch.einsum('bcn,bck->bnk', feats, bases) + attention = F.softmax(attention, dim=2) + # l1 norm + attention_normed = F.normalize(attention, dim=1, p=1) + # [batch_size, channels, num_bases] + bases = torch.bmm(feats, attention_normed) + # bases = torch.einsum('bcn,bnk->bck', feats, attention_normed) + # l2 norm + bases = F.normalize(bases, dim=1, p=2) + feats_recon = torch.bmm(bases, attention.transpose(1, 2)) + # feats_recon = torch.einsum('bck,bnk->bcn', bases, attention) + feats_recon = feats_recon.view(batch_size, channels, height, width) + return feats_recon diff --git a/mmdeploy/codebase/mmseg/models/decode_heads/psp_head.py b/mmdeploy/codebase/mmseg/models/decode_heads/psp_head.py index 81e10ad160..c792237029 100644 --- a/mmdeploy/codebase/mmseg/models/decode_heads/psp_head.py +++ b/mmdeploy/codebase/mmseg/models/decode_heads/psp_head.py @@ -1,4 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. + +import torch.nn as nn from mmseg.ops import resize from mmdeploy.core import FUNCTION_REWRITER @@ -30,6 +32,15 @@ def ppm__forward(ctx, self, x): ppm_outs = [] for ppm in self: + if isinstance(ppm[0], nn.AdaptiveAvgPool2d) and \ + ppm[0].output_size != 1: + assert not is_dynamic_flag, 'AdaptiveAvgPool2d is not \ + supported with dynamic shape in backends' + + # replace AdaptiveAvgPool2d with AvgPool2d explicitly + output_size = 2 * [ppm[0].output_size] + k = [int(size[i] / output_size[i]) for i in range(0, len(size))] + ppm[0] = nn.AvgPool2d(k, stride=k, padding=0, ceil_mode=False) ppm_out = ppm(x) upsampled_ppm_out = resize( ppm_out, diff --git a/mmdeploy/codebase/mmseg/models/segmentors/encoder_decoder.py b/mmdeploy/codebase/mmseg/models/segmentors/encoder_decoder.py index b50778e461..bca614ae86 100644 --- a/mmdeploy/codebase/mmseg/models/segmentors/encoder_decoder.py +++ b/mmdeploy/codebase/mmseg/models/segmentors/encoder_decoder.py @@ -24,8 +24,7 @@ def encoder_decoder__simple_test(ctx, self, img, img_meta, **kwargs): Returns: torch.Tensor: Output segmentation map pf shape [N, 1, H, W]. """ - x = self.extract_feat(img) - seg_logit = self._decode_head_forward_test(x, img_meta) + seg_logit = self.encode_decode(img, img_meta) seg_logit = resize( input=seg_logit, size=img_meta['img_shape'], diff --git a/mmdeploy/codebase/mmseg/models/utils/__init__.py b/mmdeploy/codebase/mmseg/models/utils/__init__.py new file mode 100644 index 0000000000..954eaa3487 --- /dev/null +++ b/mmdeploy/codebase/mmseg/models/utils/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .up_conv_block import up_conv_block__forward + +__all__ = ['up_conv_block__forward'] diff --git a/mmdeploy/codebase/mmseg/models/utils/up_conv_block.py b/mmdeploy/codebase/mmseg/models/utils/up_conv_block.py new file mode 100644 index 0000000000..2ca7592851 --- /dev/null +++ b/mmdeploy/codebase/mmseg/models/utils/up_conv_block.py @@ -0,0 +1,40 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +import torch + +from mmdeploy.core import FUNCTION_REWRITER +from mmdeploy.utils import is_dynamic_shape + + +@FUNCTION_REWRITER.register_rewriter( + func_name='mmseg.models.utils.UpConvBlock.forward') +def up_conv_block__forward(ctx, self, skip, x): + """Rewrite `forward` for default backend. + + To support dynamic shape for UNet backbone, + upsample feature maps with `size` instead of `scale_factor` + + Args: + ctx (ContextCaller): The context with additional information. + self: The instance of the original class. + skip (Tensor): Skip branch feature. + x (Tensor): Input feature to be upsampled. + + Returns: + Tensor: Upsampled output feature map. + """ + from mmcv.cnn import ConvModule + + # only valid when self.upsample is from build_upsample_layer + if is_dynamic_shape(ctx.cfg) and not isinstance(self.upsample, ConvModule): + # upsample with `size` instead of `scale_factor` + from mmseg.ops import Upsample + for c in self.upsample.interp_upsample: + if isinstance(c, Upsample): + c.size = skip.shape[-2:] + c.scale_factor = None + + x = self.upsample(x) + out = torch.cat([skip, x], dim=1) + out = self.conv_block(out) + return out diff --git a/mmdeploy/core/optimizers/function_marker.py b/mmdeploy/core/optimizers/function_marker.py index 98a46f6e73..5ad0501593 100644 --- a/mmdeploy/core/optimizers/function_marker.py +++ b/mmdeploy/core/optimizers/function_marker.py @@ -5,7 +5,7 @@ import torch from mmdeploy.core.rewriters import FUNCTION_REWRITER -from mmdeploy.utils import cfg_apply_marks, get_partition_config +from mmdeploy.utils import IR, cfg_apply_marks, get_partition_config MARK_FUNCTION_COUNT = dict() @@ -180,6 +180,20 @@ def impl(ys, prefix, level): return impl(xs, (), level) +@FUNCTION_REWRITER.register_rewriter( + 'mmdeploy.core.optimizers.function_marker.mark_tensors', ir=IR.TORCHSCRIPT) +def remove_mark__torchscript(ctx, xs: Any, *args, **kwargs): + """Disable all marks for TorchScript backend. + + As the Node `mark` is not able to be traced, we just return original input + for the function `mark_tensors`. + + Args: + xs (Any): Input structure which contains tensor. + """ + return xs + + def mark(func_name: Optional[str] = None, inputs: Optional[Sequence[str]] = None, outputs: Optional[Sequence[str]] = None, diff --git a/mmdeploy/core/rewriters/function_rewriter.py b/mmdeploy/core/rewriters/function_rewriter.py index 674361f634..e80ed41d06 100644 --- a/mmdeploy/core/rewriters/function_rewriter.py +++ b/mmdeploy/core/rewriters/function_rewriter.py @@ -1,8 +1,9 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Callable, Dict +from typing import Callable, Dict, List, Optional, Union -from mmdeploy.utils import Backend, get_root_logger -from .rewriter_utils import ContextCaller, RewriterRegistry, import_function +from mmdeploy.utils import IR, Backend, get_root_logger +from .rewriter_utils import (Checker, ContextCaller, RewriterRegistry, + import_function) def _set_func(origin_func_path: str, rewrite_func: Callable): @@ -66,32 +67,33 @@ class FunctionRewriter: def __init__(self): self._registry = RewriterRegistry() - def add_backend(self, backend: str): - """Add a backend by calling the _registry.add_backend.""" - self._registry.add_backend(backend) - - def register_rewriter(self, - func_name: str, - backend: str = Backend.DEFAULT.value, - **kwargs): + def register_rewriter( + self, + func_name: str, + backend: str = Backend.DEFAULT.value, + ir: IR = IR.DEFAULT, + extra_checkers: Optional[Union[Checker, List[Checker]]] = None, + **kwargs): """The interface of function rewriter decorator. Args: func_name (str): The function name/path to rewrite. - backend (str): The inference engine name. + backend (str): The rewriter will be activated on which backend. + ir (IR): The rewriter will be activated on which IR. + extra_checkers (Checker | List[Checker] | None): Other requirements + defined by Checker. + Returns: Callable: The process of registering function. """ - return self._registry.register_object(func_name, backend, **kwargs) + return self._registry.register_object(func_name, backend, ir, + extra_checkers, **kwargs) - def enter(self, - cfg: Dict = dict(), - backend: str = Backend.DEFAULT.value, - **kwargs): + def enter(self, cfg: Dict = dict(), env: Dict = dict(), **kwargs): """The implementation of function rewrite.""" # Get current records - functions_records = self._registry.get_records(backend) + functions_records = self._registry.get_records(env) self._origin_functions = list() self._additional_functions = list() diff --git a/mmdeploy/core/rewriters/module_rewriter.py b/mmdeploy/core/rewriters/module_rewriter.py index 43720443c6..d0961809a0 100644 --- a/mmdeploy/core/rewriters/module_rewriter.py +++ b/mmdeploy/core/rewriters/module_rewriter.py @@ -1,11 +1,13 @@ # Copyright (c) OpenMMLab. All rights reserved. import inspect +from typing import Dict, List, Optional, Union import mmcv from torch import nn -from mmdeploy.utils.constants import Backend -from .rewriter_utils import RewriterRegistry, eval_with_import +from mmdeploy.utils.constants import IR, Backend +from .rewriter_utils import (Checker, RewriterRegistry, collect_env, + eval_with_import) class ModuleRewriter: @@ -26,29 +28,33 @@ class ModuleRewriter: def __init__(self): self._registry = RewriterRegistry() - def add_backend(self, backend: str): - """Add a backend by calling the _registry.add_backend.""" - self._registry.add_backend(backend) - - def register_rewrite_module(self, - module_type: str, - backend: str = Backend.DEFAULT.value, - **kwargs): + def register_rewrite_module( + self, + module_type: str, + backend: str = Backend.DEFAULT.value, + ir: IR = IR.DEFAULT, + extra_checkers: Optional[Union[Checker, List[Checker]]] = None, + **kwargs): """The interface of module rewriter decorator. Args: module_type (str): The module type name to rewrite. - backend (str): The inference engine name. + backend (str): The rewriter will be activated on which backend. + ir (IR): The rewriter will be activated on which IR. + extra_checkers (Checker | List[Checker] | None): Other requirements + defined by Checker. Returns: - nn.Module: THe rewritten model. + nn.Module: The rewritten model. """ - return self._registry.register_object(module_type, backend, **kwargs) + return self._registry.register_object(module_type, backend, ir, + extra_checkers, **kwargs) def patch_model(self, model: nn.Module, cfg: mmcv.Config, backend: str = Backend.DEFAULT.value, + ir: IR = IR.DEFAULT, recursive: bool = True, **kwargs) -> nn.Module: """Replace the models that was registered. @@ -57,6 +63,7 @@ def patch_model(self, model (torch.nn.Module): The model to patch. cfg (Dict): Config dictionary of deployment. backend (str): The inference engine name. + ir (IR): The intermeditate representation name. recursive (bool): The flag to enable recursive patching. Returns: @@ -67,7 +74,9 @@ def patch_model(self, >>> patched_model = patch_model(model, cfg=deploy_cfg, >>> backend=backend) """ - self._collect_record(backend) + # TODO: Make the type of parameter backend to Backend + env = collect_env(Backend.get(backend), ir) + self._collect_record(env) return self._replace_module(model, cfg, recursive, **kwargs) def _replace_one_module(self, module, cfg, **kwargs): @@ -103,9 +112,9 @@ def _replace_module_impl(model, cfg, **kwargs): return _replace_module_impl(model, cfg, **kwargs) - def _collect_record(self, backend: str): + def _collect_record(self, env: Dict): """Collect models in registry.""" self._records = {} - records = self._registry.get_records(backend) + records = self._registry.get_records(env) for name, kwargs in records: self._records[eval_with_import(name)] = kwargs diff --git a/mmdeploy/core/rewriters/rewriter_manager.py b/mmdeploy/core/rewriters/rewriter_manager.py index df7e82703d..de3acaffd2 100644 --- a/mmdeploy/core/rewriters/rewriter_manager.py +++ b/mmdeploy/core/rewriters/rewriter_manager.py @@ -4,9 +4,10 @@ import mmcv import torch.nn as nn -from mmdeploy.utils.constants import Backend +from mmdeploy.utils.constants import IR, Backend from .function_rewriter import FunctionRewriter from .module_rewriter import ModuleRewriter +from .rewriter_utils import collect_env from .symbolic_rewriter import SymbolicRewriter @@ -18,20 +19,8 @@ def __init__(self): self.function_rewriter = FunctionRewriter() self.symbolic_rewriter = SymbolicRewriter() - def add_backend(self, backend: str): - """Add backend to all rewriters. - - Args: - backend (str): The backend to support. - """ - self.module_rewriter.add_backend(backend) - self.function_rewriter.add_backend(backend) - self.symbolic_rewriter.add_backend(backend) - REWRITER_MANAGER = RewriterManager() -for backend in Backend: - REWRITER_MANAGER.add_backend(backend.value) MODULE_REWRITER = REWRITER_MANAGER.module_rewriter FUNCTION_REWRITER = REWRITER_MANAGER.function_rewriter @@ -41,6 +30,7 @@ def add_backend(self, backend: str): def patch_model(model: nn.Module, cfg: mmcv.Config, backend: str = Backend.DEFAULT.value, + ir: IR = IR.DEFAULT, recursive: bool = True, **kwargs) -> nn.Module: """Patch the model, replace the modules that can be rewritten. Note that @@ -50,6 +40,7 @@ def patch_model(model: nn.Module, model (torch.nn.Module): The model to patch. cfg (Dict): Config dictionary of deployment. backend (str): The inference engine name. + ir (IR): The intermeditate representation name. recursive (bool): The flag to enable recursive patching. Returns: @@ -59,7 +50,7 @@ def patch_model(model: nn.Module, >>> from mmdeploy.core import patch_model >>> patched_model = patch_model(model, cfg=deploy_cfg, backend=backend) """ - return MODULE_REWRITER.patch_model(model, cfg, backend, recursive, + return MODULE_REWRITER.patch_model(model, cfg, backend, ir, recursive, **kwargs) @@ -71,6 +62,7 @@ class RewriterContext: Args: cfg (Dict): Config dictionary of deployment. backend (str): The inference engine name. + ir (IR): The intermeditate representation name. rewrite_manager (RewriterManager): An RewriteManager that consists of several rewriters @@ -84,20 +76,19 @@ class RewriterContext: def __init__(self, cfg: Dict = dict(), backend: str = Backend.DEFAULT.value, + ir: IR = IR.DEFAULT, rewriter_manager: RewriterManager = REWRITER_MANAGER, **kwargs): self._cfg = cfg - self._backend = backend self._kwargs = kwargs self._rewriter_manager = rewriter_manager + self._env = collect_env(Backend.get(backend), ir) def enter(self): """Call the enter() of rewriters.""" - self._rewriter_manager.function_rewriter.enter(self._cfg, - self._backend, + self._rewriter_manager.function_rewriter.enter(self._cfg, self._env, **self._kwargs) - self._rewriter_manager.symbolic_rewriter.enter(self._cfg, - self._backend, + self._rewriter_manager.symbolic_rewriter.enter(self._cfg, self._env, **self._kwargs) def exit(self): diff --git a/mmdeploy/core/rewriters/rewriter_utils.py b/mmdeploy/core/rewriters/rewriter_utils.py index 701078144a..a80fd84738 100644 --- a/mmdeploy/core/rewriters/rewriter_utils.py +++ b/mmdeploy/core/rewriters/rewriter_utils.py @@ -1,8 +1,11 @@ # Copyright (c) OpenMMLab. All rights reserved. import inspect -from typing import Any, Callable, Dict, List, Optional, Tuple +import warnings +from abc import ABCMeta, abstractmethod +from typing import Any, Callable, Dict, List, Optional, Tuple, Union -from mmdeploy.utils.constants import Backend +import mmdeploy +from mmdeploy.utils.constants import IR, Backend def eval_with_import(path: str) -> Any: @@ -56,6 +59,127 @@ def import_function(path: str) -> Tuple[Callable, Optional[type]]: return obj, None +def collect_env(backend: Backend, ir: IR, **kwargs) -> Dict: + """Collect current environment information, including backend, ir, codebase + version, etc. Rewriters will be checked according to env infos. + + Args: + backend (Backend): Current backend. + ir (IR): Current IR. + + Returns: + Dict: Record the value of Backend and IR as well as the versions of + libraries. + """ + from mmdeploy.utils import get_backend_version, get_codebase_version + env = dict(backend=backend, ir=ir) + env['mmdeploy'] = mmdeploy.__version__ + env.update(get_backend_version()) + env.update(get_codebase_version()) + env.update(kwargs) + return env + + +class Checker(metaclass=ABCMeta): + """The interface for checking whether a rewriter is valid.""" + + def __init__(self): + pass + + @abstractmethod + def check(self, env: Dict) -> bool: + """Check the if the rewriter is valid according to environment. + + Args: + env (Dict): The backend, IR info and version info. + """ + pass + + +class BackendChecker(Checker): + """Checker that determines which backend the rewriter must run on. + + Args: + required_backend (Backend): The rewriter will be activated on + which backend. + """ + + def __init__(self, required_backend: Backend): + super().__init__() + self.required_backend = required_backend + + def check(self, env: Dict) -> bool: + """Check the if the rewriter is valid according to backend. + + Args: + env (Dict): The backend, IR info and version info. + """ + return env['backend'] == self.required_backend + + +class IRChecker(Checker): + """Checker that determines which IR the rewriter must run on. + + Args: + required_ir (IR): The rewriter will be activated on which IR. + """ + + def __init__(self, required_ir: IR): + super().__init__() + self.required_ir = required_ir + + def check(self, env: Dict) -> bool: + """Check the if the rewriter is valid according to IR. + + Args: + env (Dict): The backend, IR info and version info. + """ + return env['ir'] == self.required_ir + + +class LibVersionChecker(Checker): + """Checker that determines which IR the rewriter must run on. + + Args: + lib (str): The name of library. + min_version (str | None): The rewriter should no lower than which + version. Default to `None`. + max_version (str | None): The rewriter should no greater than which + version. Default to `None`. + """ + + def __init__(self, + lib: str, + min_version: Optional[str] = None, + max_version: Optional[str] = None): + super().__init__() + self.lib = lib + self.min_version = min_version + self.max_version = max_version + + def check(self, env: Dict) -> bool: + """Check the if the rewriter is valid according to library version. + + Args: + env (Dict): The backend, IR info and version info. + """ + # If the library has not been installed + if env[self.lib] is None: + return False + + from packaging import version + valid = True + # The version should no less than min version and no greater than + # max version. + if self.min_version is not None: + if version.parse(env[self.lib]) < version.parse(self.min_version): + valid = False + if self.max_version is not None: + if version.parse(env[self.lib]) > version.parse(self.max_version): + valid = False + return valid + + class RewriterRegistry: """A registry that recoreds rewrite objects. @@ -75,58 +199,128 @@ class RewriterRegistry: >>> records = FUNCTION_REGISTRY.get_record("default") """ - # TODO: replace backend string with "Backend" constant def __init__(self): self._rewrite_records = dict() - self.add_backend(Backend.DEFAULT.value) - - def _check_backend(self, backend: str): - """Check if a backend has been supported.""" - if backend not in self._rewrite_records: - raise Exception('Backend is not supported by registry.') - - def add_backend(self, backend: str): - """Add a backend dictionary.""" - if backend not in self._rewrite_records: - self._rewrite_records[backend] = dict() - - def get_records(self, backend: str) -> List: - """Get all registered records in record table.""" - self._check_backend(backend) - - if backend != Backend.DEFAULT.value: - # Update dict A with dict B. - # Then convert the result dict to a list, while keeping the order - # of A and B: the elements only belong to B should alwarys come - # after the elements only belong to A. - # The complexity is O(n + m). - dict_a = self._rewrite_records[Backend.DEFAULT.value] - dict_b = self._rewrite_records[backend] - records = [] - for k, v in dict_a.items(): - if k in dict_b: - records.append((k, dict_b[k])) + + def get_records(self, env: Dict) -> List: + """Get all registered records that are valid in the given environment + from record table. + + If the backend and IR of rewriter are set to 'default', then the + rewriter is regarded as default rewriter. The default rewriter will be + activated only when all other rewriters are not valid. If there are + multiple rewriters are valid (except default rewriter), we will + activate the first one (The order is determined by the time when + rewriters are loaded). + + Args: + env (dict): Environment dictionary that includes backend, IR, + codebase version, etc. + + Returns: + List: A list that includes valid records. + """ + default_records = list() + records = list() + + for origin_function, rewriter_records in self._rewrite_records.items(): + default_rewriter = None + final_rewriter = None + for record in rewriter_records: + # Get the checkers of current rewriter + checkers: List[Checker] = record['_checkers'] + + # Check if the rewriter is default rewriter + if len(checkers) == 0: + # Process the default rewriter exceptionally + if default_rewriter is None: + default_rewriter = record + else: + warnings.warn( + 'Detect multiple valid rewriters for' + f'{origin_function}, use the first rewriter.') else: - records.append((k, v)) - for k, v in dict_b.items(): - if k not in dict_a: - records.append((k, v)) - else: - records = list( - self._rewrite_records[Backend.DEFAULT.value].items()) - return records - - def _register(self, name: str, backend: str, **kwargs): + # Check if the checker is valid. + # The checker is valid only if all the checks are passed + valid = True + for checker in checkers: + if not checker.check(env): + valid = False + break + + if valid: + # Check if there are multiple valid rewriters + if final_rewriter is not None: + warnings.warn( + 'Detect multiple valid rewriters for' + f'{origin_function}, use the first rewriter.') + else: + final_rewriter = record + + # Append final rewriter. + # If there is no valid rewriter, try not apply default rewriter + if final_rewriter is not None: + records.append((origin_function, final_rewriter)) + elif default_rewriter is not None: + default_records.append((origin_function, default_rewriter)) + + # Make the default records como to the front of list because we may + # want the non-default records to override them. + return default_records + records + + def _register(self, name: str, backend: Backend, ir: IR, + extra_checkers: List[Checker], **kwargs): """The implementation of register.""" - self._check_backend(backend) - self._rewrite_records[backend][name] = kwargs - def register_object(self, name: str, backend: str, **kwargs) -> Callable: - """The decorator to register an object.""" - self._check_backend(backend) + # Merge checkers to kwargs + record_dict = kwargs + + # Try to create a checker according to 'backend' field + if backend != Backend.DEFAULT: + extra_checkers.append(BackendChecker(backend)) + + # Try to create a checker according to 'ir' field + if ir != IR.DEFAULT: + extra_checkers.append(IRChecker(ir)) + + record_dict['_checkers'] = extra_checkers + + # There may be multiple rewriters of a function/module. We use a list + # to store the rewriters of a function/module. + if name not in self._rewrite_records: + self._rewrite_records[name] = list() + self._rewrite_records[name].append(record_dict) + + def register_object(self, + name: str, + backend: str, + ir: IR, + extra_checkers: Optional[Union[Checker, + List[Checker]]] = None, + **kwargs) -> Callable: + """The decorator to register an object. + + Args: + name (str): The import path to access the function/module. + backend (str): The rewriter will be activated on which backend. + ir (IR): The rewriter will be activated on which ir. + extra_chekcers (None | Checker | List[Checker]): Other requirements + for the rewriters. Default to `None`. + + Returns: + Callable: The decorator. + """ + + if extra_checkers is None: + extra_checkers = [] + elif isinstance(extra_checkers, Checker): + extra_checkers = [extra_checkers] + + backend = Backend.get(backend) def decorator(object): - self._register(name, backend, _object=object, **kwargs) + self._register( + name, backend, ir, extra_checkers, _object=object, **kwargs) return object return decorator diff --git a/mmdeploy/core/rewriters/symbolic_rewriter.py b/mmdeploy/core/rewriters/symbolic_rewriter.py index c9c16d071d..dd47cd8d58 100644 --- a/mmdeploy/core/rewriters/symbolic_rewriter.py +++ b/mmdeploy/core/rewriters/symbolic_rewriter.py @@ -1,13 +1,14 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Callable, Dict, Optional, Sequence +from typing import Callable, Dict, List, Optional, Sequence, Union from torch.autograd import Function from torch.onnx.symbolic_helper import parse_args from torch.onnx.symbolic_registry import _registry as pytorch_registry from torch.onnx.symbolic_registry import register_op -from mmdeploy.utils import Backend, get_root_logger -from .rewriter_utils import ContextCaller, RewriterRegistry, eval_with_import +from mmdeploy.utils import IR, Backend, get_root_logger +from .rewriter_utils import (Checker, ContextCaller, RewriterRegistry, + eval_with_import) class SymbolicRewriter: @@ -35,25 +36,27 @@ class SymbolicRewriter: def __init__(self) -> None: self._registry = RewriterRegistry() - def add_backend(self, backend: str): - """Add a backend by calling the _registry.add_backend.""" - self._registry.add_backend(backend) - def register_symbolic(self, func_name: str, backend: str = Backend.DEFAULT.value, is_pytorch: bool = False, arg_descriptors: Optional[Sequence[str]] = None, + ir: IR = IR.DEFAULT, + extra_checkers: Optional[Union[ + Checker, List[Checker]]] = None, **kwargs) -> Callable: """The decorator of the custom symbolic. Args: func_name (str): The function name/path to override the symbolic. - backend (str): The inference engine name. + backend (str): The rewriter will be activated on which backend. is_pytorch (bool): Enable this flag if func_name is the name of \ a pytorch builtin function. arg_descriptors (Sequence[str]): The argument descriptors of the \ symbol. + ir (IR): The rewriter will be activated on which IR. + extra_checkers (Checker | List[Checker] | None): Other requirements + defined by Checker. Returns: Callable: The process of registered symbolic. @@ -61,18 +64,20 @@ def register_symbolic(self, return self._registry.register_object( func_name, backend, + ir, + extra_checkers, is_pytorch=is_pytorch, arg_descriptors=arg_descriptors, **kwargs) def enter(self, cfg: Dict = dict(), - backend: str = Backend.DEFAULT.value, + env: Dict = dict(), opset: int = 11, **kwargs): """The implementation of symbolic register.""" # Get current records - symbolic_records = self._registry.get_records(backend) + symbolic_records = self._registry.get_records(env) self._pytorch_symbolic = list() self._extra_symbolic = list() diff --git a/mmdeploy/mmcv/ops/deform_conv.py b/mmdeploy/mmcv/ops/deform_conv.py index c7bdbc43cb..ccd0542678 100644 --- a/mmdeploy/mmcv/ops/deform_conv.py +++ b/mmdeploy/mmcv/ops/deform_conv.py @@ -2,6 +2,33 @@ from mmdeploy.core import SYMBOLIC_REWRITER +@SYMBOLIC_REWRITER.register_symbolic( + 'mmcv.ops.deform_conv.DeformConv2dFunction') +def deform_conv__default(ctx, + g, + input, + offset, + weight, + stride, + padding, + dilation, + groups, + deform_groups, + bias=False, + im2col_step=32): + """Rewrite symbolic function for default backend.""" + return g.op( + 'mmdeploy::MMCVDeformConv2d', + input, + offset, + weight, + stride_i=stride, + padding_i=[p for pair in zip(padding, padding) for p in pair], + dilation_i=dilation, + groups_i=groups, + deformable_groups_i=deform_groups) + + @SYMBOLIC_REWRITER.register_symbolic( 'mmcv.ops.deform_conv.DeformConv2dFunction', backend='openvino') def deform_conv_openvino(ctx, diff --git a/mmdeploy/mmcv/ops/roi_align.py b/mmdeploy/mmcv/ops/roi_align.py index c6da740fe5..33cd7342d5 100644 --- a/mmdeploy/mmcv/ops/roi_align.py +++ b/mmdeploy/mmcv/ops/roi_align.py @@ -1,10 +1,11 @@ # Copyright (c) OpenMMLab. All rights reserved. from typing import List +import torch from torch import Tensor from mmdeploy.core import SYMBOLIC_REWRITER -from mmdeploy.utils import Backend, get_backend +from mmdeploy.utils import Backend, get_backend, get_ir_config # Here using mmcv.ops.roi_align.__self__ to find @@ -17,7 +18,10 @@ def roi_align_default(ctx, g, input: Tensor, rois: Tensor, sampling_ratio: int, pool_mode: str, aligned: bool): """Rewrite symbolic function for default backend. - Replace onnx::RoiAlign with mmdeploy::MMCVRoiAlign. + Replace onnx::RoiAlign with mmcv::MMCVRoiAlign for PPLNN. For ONNXRuntime, + align operation get done outside the inference engine for opset versions + lower than 16. By default, onnx::RoiAlign get replaced to + mmdeploy::MMCVRoiAlign. Args: ctx (ContextCaller): The context with additional information. @@ -40,6 +44,58 @@ def roi_align_default(ctx, g, input: Tensor, rois: Tensor, backend = get_backend(ctx.cfg) if backend == Backend.PPLNN: domain = 'mmcv' + elif backend == Backend.ONNXRUNTIME: + from torch.onnx.symbolic_opset9 import _cast_Long + from torch.onnx.symbolic_opset11 import add, select, squeeze + batch_indices = _cast_Long( + g, + squeeze( + g, + select( + g, rois, 1, + g.op( + 'Constant', + value_t=torch.tensor([0], dtype=torch.long))), 1), + False) + rois = select( + g, rois, 1, + g.op( + 'Constant', + value_t=torch.tensor([1, 2, 3, 4], dtype=torch.long))) + ir_cfg = get_ir_config(ctx.cfg) + opset_version = ir_cfg.get('opset_version', 11) + if opset_version < 16: + # preprocess rois to make compatible with opset 16- + # as for opset 16+, `aligned` get implemented inside onnxruntime. + if aligned is True: + rois = add( + g, rois, + g.op( + 'Constant', + value_t=torch.tensor([-0.5 / spatial_scale], + dtype=torch.float))) + return g.op( + 'RoiAlign', + input, + rois, + batch_indices, + output_height_i=output_size[0], + output_width_i=output_size[1], + spatial_scale_f=spatial_scale, + sampling_ratio_i=sampling_ratio, + mode_s=pool_mode) + else: + return g.op( + 'RoiAlign', + input, + rois, + batch_indices, + output_height_i=output_size[0], + output_width_i=output_size[1], + spatial_scale_f=spatial_scale, + sampling_ratio_i=sampling_ratio, + mode_s=pool_mode, + aligned_i=aligned) else: domain = 'mmdeploy' return g.op( diff --git a/mmdeploy/pytorch/functions/topk.py b/mmdeploy/pytorch/functions/topk.py index be05fe93e0..64a416b28c 100644 --- a/mmdeploy/pytorch/functions/topk.py +++ b/mmdeploy/pytorch/functions/topk.py @@ -4,6 +4,7 @@ import torch from mmdeploy.core import FUNCTION_REWRITER +from mmdeploy.utils import get_root_logger @FUNCTION_REWRITER.register_rewriter(func_name='torch.topk', backend='default') @@ -47,7 +48,8 @@ def topk__tensorrt(ctx, TensorRT does not support topk with dynamic k. This function cast k to constant integer. """ - + # https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#topKsetup + MAX_TOPK_K = 3840 if dim is None: dim = int(input.ndim - 1) size = input.shape[dim] @@ -55,4 +57,11 @@ def topk__tensorrt(ctx, k = size if not isinstance(k, int): k = int(k) + if k > MAX_TOPK_K: + logger = get_root_logger() + logger.warning( + f'Maximum K of TopK in TensorRT is {MAX_TOPK_K}, but given {k}.' + f' Note that k will be set to {MAX_TOPK_K}.') + k = MAX_TOPK_K + return ctx.origin_func(input, k, dim=dim, largest=largest, sorted=sorted) diff --git a/mmdeploy/utils/__init__.py b/mmdeploy/utils/__init__.py index b4b05bd070..4847ba7b09 100644 --- a/mmdeploy/utils/__init__.py +++ b/mmdeploy/utils/__init__.py @@ -6,8 +6,9 @@ get_model_inputs, get_onnx_config, get_partition_config, get_task_type, is_dynamic_batch, is_dynamic_shape, load_config) -from .constants import SDK_TASK_MAP, Backend, Codebase, Task +from .constants import IR, SDK_TASK_MAP, Backend, Codebase, Task from .device import parse_cuda_device_id, parse_device_id +from .env import get_backend_version, get_codebase_version, get_library_version from .utils import get_file_path, get_root_logger, target_wrapper __all__ = [ @@ -18,5 +19,6 @@ 'get_model_inputs', 'cfg_apply_marks', 'get_input_shape', 'parse_device_id', 'parse_cuda_device_id', 'get_codebase_config', 'get_backend_config', 'get_root_logger', 'get_dynamic_axes', - 'target_wrapper', 'SDK_TASK_MAP', 'get_file_path' + 'target_wrapper', 'SDK_TASK_MAP', 'get_library_version', + 'get_codebase_version', 'get_backend_version', 'IR', 'get_file_path' ] diff --git a/mmdeploy/utils/config_utils.py b/mmdeploy/utils/config_utils.py index f1842a6672..3aab4e29e4 100644 --- a/mmdeploy/utils/config_utils.py +++ b/mmdeploy/utils/config_utils.py @@ -4,6 +4,7 @@ import mmcv from .constants import Backend, Codebase, Task +from .utils import deprecate def load_config(*args) -> List[mmcv.Config]: @@ -126,6 +127,7 @@ def get_ir_config(deploy_cfg: Union[str, mmcv.Config]) -> Dict: return ir_config +@deprecate(dst_obj=get_ir_config) def get_onnx_config(deploy_cfg: Union[str, mmcv.Config]) -> Dict: """Get the onnx parameters in export() from config. @@ -135,7 +137,6 @@ def get_onnx_config(deploy_cfg: Union[str, mmcv.Config]) -> Dict: Returns: Dict: The config dictionary of onnx parameters """ - onnx_config = get_ir_config(deploy_cfg=deploy_cfg) ir_type = onnx_config.get('type', None) assert ir_type is None or ir_type == 'onnx', 'Expect IR type is ONNX,'\ @@ -193,6 +194,10 @@ def is_dynamic_shape(deploy_cfg: Union[str, mmcv.Config], bool: Is config set dynamic shape (axis 2 and 3). """ + # Always dynamic for exporting torchscript + if get_backend(deploy_cfg) == Backend.TORCHSCRIPT: + return True + deploy_cfg = load_config(deploy_cfg)[0] ir_config = get_ir_config(deploy_cfg) @@ -353,18 +358,21 @@ def get_dynamic_axes( Dictionary with dynamic axes. """ deploy_cfg = load_config(deploy_cfg)[0] + ir_config = get_ir_config(deploy_cfg) + + # TODO onnx will be deprecated in the future onnx_config = deploy_cfg.get('onnx_config', None) - if onnx_config is None: + if onnx_config is None and ir_config == {}: raise KeyError( 'Field \'onnx_config\' was not found in \'deploy_cfg\'.') - dynamic_axes = onnx_config.get('dynamic_axes', None) + dynamic_axes = ir_config.get('dynamic_axes', None) if dynamic_axes and not isinstance(dynamic_axes, Dict): if axes_names is None: axes_names = [] - input_names = onnx_config.get('input_names', None) + input_names = ir_config.get('input_names', None) if input_names: axes_names += input_names - output_names = onnx_config.get('output_names', None) + output_names = ir_config.get('output_names', None) if output_names: axes_names += output_names if not axes_names: diff --git a/mmdeploy/utils/constants.py b/mmdeploy/utils/constants.py index da07cb28e7..bddd09de85 100644 --- a/mmdeploy/utils/constants.py +++ b/mmdeploy/utils/constants.py @@ -24,6 +24,7 @@ class Task(AdvancedEnum): CLASSIFICATION = 'Classification' OBJECT_DETECTION = 'ObjectDetection' INSTANCE_SEGMENTATION = 'InstanceSegmentation' + VOXEL_DETECTION = 'VoxelDetection' POSE_DETECTION = 'PoseDetection' @@ -34,9 +35,17 @@ class Codebase(AdvancedEnum): MMCLS = 'mmcls' MMOCR = 'mmocr' MMEDIT = 'mmedit' + MMDET3D = 'mmdet3d' MMPOSE = 'mmpose' +class IR(AdvancedEnum): + """Define intermediate representation enumerations.""" + ONNX = 'onnx' + TORCHSCRIPT = 'torchscript' + DEFAULT = 'default' + + class Backend(AdvancedEnum): """Define backend enumerations.""" PYTORCH = 'pytorch' @@ -46,6 +55,7 @@ class Backend(AdvancedEnum): NCNN = 'ncnn' OPENVINO = 'openvino' SDK = 'sdk' + TORCHSCRIPT = 'torchscript' DEFAULT = 'default' @@ -63,5 +73,7 @@ class Backend(AdvancedEnum): Task.TEXT_DETECTION: dict(component='TextDetHead', cls_name='TextDetector'), Task.TEXT_RECOGNITION: - dict(component='CTCConvertor', cls_name='TextRecognizer') + dict(component='CTCConvertor', cls_name='TextRecognizer'), + Task.POSE_DETECTION: + dict(component='Detector', cls_name='PoseDetector') } diff --git a/mmdeploy/utils/env.py b/mmdeploy/utils/env.py new file mode 100644 index 0000000000..8cc2cbd3d5 --- /dev/null +++ b/mmdeploy/utils/env.py @@ -0,0 +1,49 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import importlib + +from mmdeploy.utils import Codebase + + +def get_library_version(lib): + """Try to get the version of a library if it has been installed. + + Args: + lib (str): The name of library. + + Returns: + None | str: If the library has been installed, return version. + """ + try: + lib = importlib.import_module(lib) + except Exception: + version = None + else: + version = lib.__version__ + + return version + + +def get_codebase_version(): + """Get the version dictionary of all supported codebases. + + Returns: + Dict: The name and the version of supported codebases. + """ + version_dict = dict() + for enum in Codebase: + codebase = enum.value + version_dict[codebase] = get_library_version(codebase) + return version_dict + + +def get_backend_version(): + """Get the version dictionary of some supported backend. + + Returns: + Dict: The name and the version of some supported backend. + """ + backend_library_list = ['tensorrt', 'onnxruntime', 'ncnn'] + version_dict = dict() + for backend in backend_library_list: + version_dict[backend] = get_library_version(backend) + return version_dict diff --git a/mmdeploy/utils/export_info.py b/mmdeploy/utils/export_info.py index 70e14a67c8..2ae6dd2c29 100644 --- a/mmdeploy/utils/export_info.py +++ b/mmdeploy/utils/export_info.py @@ -1,14 +1,14 @@ # Copyright (c) OpenMMLab. All rights reserved. import importlib +import re from typing import Dict, List, Tuple, Union import mmcv from mmdeploy.apis import build_task_processor from mmdeploy.utils import (Backend, Task, get_backend, get_codebase, - get_common_config, get_onnx_config, - get_root_logger, get_task_type, is_dynamic_batch, - load_config) + get_common_config, get_ir_config, get_root_logger, + get_task_type, is_dynamic_batch, load_config) from mmdeploy.utils.constants import SDK_TASK_MAP as task_map @@ -89,12 +89,25 @@ def get_models(deploy_cfg: Union[str, mmcv.Config], """ name, _ = get_model_name_customs(deploy_cfg, model_cfg, work_dir) precision = 'FP32' - onnx_name = get_onnx_config(deploy_cfg)['save_file'] - net = onnx_name + ir_name = get_ir_config(deploy_cfg)['save_file'] + net = ir_name weights = '' backend = get_backend(deploy_cfg=deploy_cfg) + + def replace_suffix(file_name: str, dst_suffix: str) -> str: + """Replace the suffix to the destination one. + + Args: + file_name (str): The file name to be operated. + dst_suffix (str): The destination suffix. + + Return: + str: The file name of which the suffix has been replaced. + """ + return re.sub(r'\.[a-z]+', dst_suffix, file_name) + if backend == Backend.TENSORRT: - net = onnx_name.replace('.onnx', '.engine') + net = replace_suffix(ir_name, '.engine') common_cfg = get_common_config(deploy_cfg) fp16_mode = common_cfg.get('fp16_mode', False) int8_mode = common_cfg.get('int8_mode', False) @@ -104,17 +117,17 @@ def get_models(deploy_cfg: Union[str, mmcv.Config], precision = 'INT8' elif backend == Backend.PPLNN: precision = 'FP16' - weights = onnx_name.replace('.onnx', '.json') - net = onnx_name + weights = replace_suffix(ir_name, '.json') + net = ir_name elif backend == Backend.OPENVINO: - net = onnx_name.replace('.onnx', '.xml') - weights = onnx_name.replace('.onnx', '.bin') + net = replace_suffix(ir_name, '.xml') + weights = replace_suffix(ir_name, '.bin') elif backend == Backend.NCNN: - net = onnx_name.replace('.onnx', '.param') - weights = onnx_name.replace('.onnx', '.bin') + net = replace_suffix(ir_name, '.param') + weights = replace_suffix(ir_name, '.bin') if 'precision' in deploy_cfg['backend_config']: precision = deploy_cfg['backend_config']['precision'] - elif backend == Backend.ONNXRUNTIME: + elif backend in [Backend.ONNXRUNTIME, Backend.TORCHSCRIPT]: pass else: raise NotImplementedError(f'Not supported backend: {backend.value}.') @@ -151,8 +164,8 @@ def get_inference_info(deploy_cfg: mmcv.Config, model_cfg: mmcv.Config, module = 'Net' input = ['prep_output'] output = ['infer_output'] - onnx_config = get_onnx_config(deploy_cfg) - input_names = onnx_config.get('input_names', None) + ir_config = get_ir_config(deploy_cfg) + input_names = ir_config.get('input_names', None) input_name = input_names[0] if input_names else 'input' input_map = dict(img=input_name) return_dict = dict( @@ -323,14 +336,14 @@ def get_detail(deploy_cfg: mmcv.Config, model_cfg: mmcv.Config, codebase['pth'] = pth codebase['config'] = model_cfg.filename codebase_config = deploy_cfg.get('codebase_config', dict()) - onnx_config = get_onnx_config(deploy_cfg) + ir_config = get_ir_config(deploy_cfg) backend_config = deploy_cfg.get('backend_config', dict()) calib_config = deploy_cfg.get('calib_config', dict()) return dict( version=version, codebase=codebase, codebase_config=codebase_config, - onnx_config=onnx_config, + onnx_config=ir_config, backend_config=backend_config, calib_config=calib_config) diff --git a/mmdeploy/utils/test.py b/mmdeploy/utils/test.py index 3fd7457f0c..6f1decaa6b 100644 --- a/mmdeploy/utils/test.py +++ b/mmdeploy/utils/test.py @@ -14,7 +14,7 @@ import mmdeploy.codebase # noqa: F401,F403 from mmdeploy.core import RewriterContext, patch_model -from mmdeploy.utils import (Backend, get_backend, get_dynamic_axes, +from mmdeploy.utils import (IR, Backend, get_backend, get_dynamic_axes, get_ir_config, get_onnx_config) @@ -93,6 +93,8 @@ def check_backend(backend: Backend, require_plugin: bool = False): from mmdeploy.apis.ncnn import is_plugin_available elif backend == Backend.OPENVINO: from mmdeploy.apis.openvino import is_available + elif backend == Backend.TORCHSCRIPT: + from mmdeploy.backend.torchscript import ops_available as is_available else: warnings.warn('The backend checker is not available') return @@ -331,9 +333,12 @@ def get_flatten_inputs( if isinstance(value, torch.Tensor): flatten_inputs[name] = value elif isinstance(value, (list, tuple)): - for i, tensor in enumerate(value): - name_i = f'{name}_{i}' - flatten_inputs[name_i] = tensor + if len(value) == 1: + flatten_inputs[name] = value[0] + else: + for i, tensor in enumerate(value): + name_i = f'{name}_{i}' + flatten_inputs[name_i] = tensor return flatten_inputs @@ -356,15 +361,29 @@ def get_onnx_model(wrapped_model: nn.Module, patched_model = patch_model( wrapped_model, cfg=deploy_cfg, backend=backend.value) flatten_model_inputs = get_flatten_inputs(model_inputs) - input_names = [k for k, v in flatten_model_inputs.items() if k != 'ctx'] + input_names = onnx_cfg.get('input_names', None) + if input_names is None: + input_names = [ + k for k, v in flatten_model_inputs.items() if k != 'ctx' + ] output_names = onnx_cfg.get('output_names', None) dynamic_axes = get_dynamic_axes(deploy_cfg, input_names) + class DummyModel(torch.nn.Module): + + def __init__(self): + super(DummyModel, self).__init__() + self.model = patched_model + + def forward(self, inputs: dict): + return self.model(**inputs) + + model = DummyModel().eval() + with RewriterContext( cfg=deploy_cfg, backend=backend.value, opset=11), torch.no_grad(): torch.onnx.export( - patched_model, - tuple([v for k, v in model_inputs.items()]), + model, (model_inputs, {}), onnx_file_path, export_params=True, input_names=input_names, @@ -375,14 +394,40 @@ def get_onnx_model(wrapped_model: nn.Module, return onnx_file_path -def get_backend_outputs(onnx_file_path: str, +def get_ts_model(wrapped_model: nn.Module, + model_inputs: Dict[str, Union[Tuple, List, torch.Tensor]], + deploy_cfg: mmcv.Config) -> str: + """To get path to onnx model after export. + + Args: + wrapped_model (nn.Module): The input model. + model_inputs (dict): Inputs for model. + deploy_cfg (mmcv.Config): Deployment config. + + Returns: + str: The path to the TorchScript model file. + """ + ir_file_path = tempfile.NamedTemporaryFile(suffix='.pt').name + backend = get_backend(deploy_cfg) + patched_model = patch_model( + wrapped_model, cfg=deploy_cfg, backend=backend.value) + + from mmdeploy.apis.pytorch2torchscript import torch2torchscript_impl + torch2torchscript_impl( + patched_model, [v for _, v in model_inputs.items()], + deploy_cfg=deploy_cfg, + output_file=ir_file_path) + return ir_file_path + + +def get_backend_outputs(ir_file_path: str, model_inputs: Dict[str, Union[Tuple, List, torch.Tensor]], deploy_cfg: mmcv.Config) -> Union[Any, None]: """To get backend outputs of model. Args: - onnx_file_path (str): The path to the ONNX file. + ir_file_path (str): The path to the IR file. model_inputs (dict): Inputs for model. deploy_cfg (mmcv.Config): Deployment config. @@ -393,8 +438,13 @@ def get_backend_outputs(onnx_file_path: str, """ backend = get_backend(deploy_cfg) flatten_model_inputs = get_flatten_inputs(model_inputs) - input_names = [k for k, v in flatten_model_inputs.items() if k != 'ctx'] - output_names = get_ir_config(deploy_cfg).get('output_names', None) + ir_config = get_ir_config(deploy_cfg) + input_names = ir_config.get('input_names', None) + output_names = ir_config.get('output_names', None) + if input_names is None: + input_names = [ + k for k, v in flatten_model_inputs.items() if k != 'ctx' + ] # prepare backend model and input features if backend == Backend.TENSORRT: @@ -408,7 +458,7 @@ def get_backend_outputs(onnx_file_path: str, trt_file_path, 0, deploy_cfg=deploy_cfg, - onnx_model=onnx_file_path) + onnx_model=ir_file_path) backend_files = [trt_file_path] for k, v in model_inputs.items(): model_inputs[k] = model_inputs[k].cuda() @@ -441,7 +491,7 @@ def get_backend_outputs(onnx_file_path: str, backend_feats[input_names[i]] = feature_list[i] else: backend_feats[str(i)] = feature_list[i] - backend_files = [onnx_file_path] + backend_files = [ir_file_path] device = 'cpu' elif backend == Backend.NCNN: import mmdeploy.apis.ncnn as ncnn_apis @@ -449,8 +499,8 @@ def get_backend_outputs(onnx_file_path: str, return None work_dir = tempfile.TemporaryDirectory().name param_path, bin_path = ncnn_apis.get_output_model_file( - onnx_file_path, work_dir) - ncnn_apis.onnx2ncnn(onnx_file_path, param_path, bin_path) + ir_file_path, work_dir) + ncnn_apis.onnx2ncnn(ir_file_path, param_path, bin_path) backend_files = [param_path, bin_path] backend_feats = flatten_model_inputs device = 'cpu' @@ -459,27 +509,38 @@ def get_backend_outputs(onnx_file_path: str, import mmdeploy.apis.openvino as openvino_apis if not openvino_apis.is_available(): return None + from mmdeploy.apis.openvino import get_mo_options_from_cfg openvino_work_dir = tempfile.TemporaryDirectory().name openvino_file_path = openvino_apis.get_output_model_file( - onnx_file_path, openvino_work_dir) + ir_file_path, openvino_work_dir) input_info = { name: value.shape for name, value in flatten_model_inputs.items() } - openvino_apis.onnx2openvino(input_info, output_names, onnx_file_path, - openvino_work_dir) + mo_options = get_mo_options_from_cfg(deploy_cfg) + openvino_apis.onnx2openvino(input_info, output_names, ir_file_path, + openvino_work_dir, mo_options) backend_files = [openvino_file_path] backend_feats = flatten_model_inputs device = 'cpu' + elif backend == Backend.DEFAULT: return None + elif backend == Backend.TORCHSCRIPT: + backend_files = [ir_file_path] + device = 'cpu' + backend_feats = [v for _, v in model_inputs.items()] else: raise NotImplementedError( f'Unimplemented backend type: {backend.value}') from mmdeploy.codebase.base import BaseBackendModel - backend_model = BaseBackendModel._build_wrapper(backend, backend_files, - device, output_names) + backend_model = BaseBackendModel._build_wrapper( + backend, + backend_files, + device, + input_names=input_names, + output_names=output_names) with torch.no_grad(): backend_outputs = backend_model(backend_feats) backend_outputs = backend_model.output_to_list(backend_outputs) @@ -511,11 +572,15 @@ def get_rewrite_outputs(wrapped_model: nn.Module, cfg=deploy_cfg, backend=backend.value, opset=11), torch.no_grad(): ctx_outputs = wrapped_model(**model_inputs) - onnx_file_path = get_onnx_model(wrapped_model, model_inputs, deploy_cfg) + ir_type = get_ir_config(deploy_cfg).get('type', None) + if ir_type == IR.TORCHSCRIPT.value: + ir_file_path = get_ts_model(wrapped_model, model_inputs, deploy_cfg) + else: # TODO onnx as default, make it strict when more IR types involved + ir_file_path = get_onnx_model(wrapped_model, model_inputs, deploy_cfg) backend_outputs = None if run_with_backend: - backend_outputs = get_backend_outputs(onnx_file_path, model_inputs, + backend_outputs = get_backend_outputs(ir_file_path, model_inputs, deploy_cfg) if backend_outputs is None: diff --git a/mmdeploy/utils/utils.py b/mmdeploy/utils/utils.py index 47a5a18c5b..10d6d02bd0 100644 --- a/mmdeploy/utils/utils.py +++ b/mmdeploy/utils/utils.py @@ -4,7 +4,7 @@ import os import sys import traceback -from typing import Callable, Optional +from typing import Callable, Optional, Union import torch.multiprocessing as mp from mmcv.utils import get_logger @@ -60,6 +60,53 @@ def get_root_logger(log_file=None, log_level=logging.INFO) -> logging.Logger: return logger +def deprecate(status: str = 'future', + dst_obj: Optional[Union[object, str]] = None, + msg: str = '', + *args, + **kwargs) -> None: + """Deprecate a function or a class. + + Args: + status (str, optional): The status of the function or class. + Defaults to future. + dst_obj (str, object, optional): The object that will replace + the original one. Defaults to None. + msg (str): Additional message to be printed. + + Examples: + >>> from math import ceil + >>> from mmdeploy.utils.utils import deprecate + >>> @deprecate(status='past', dst_obj=ceil, msg='') + >>> def my_ceil(num): + >>> num = num if(num==int(num)) else int(num) + 1 + >>> return num + """ + logger = get_root_logger() + + def _register(src_obj): + + def fun(*args, **kwargs): + if status == 'future': + logger.warning( + f'DeprecationWarning: {src_obj.__name__} will be ' + f'deprecated in the future. {msg}') + elif status == 'past': + assert dst_obj is not None, 'for deprecated object, there' + ' must be a destination object' + logger.warning( + f'DeprecationWarning: {src_obj.__name__} was deprecated,' + f' use {dst_obj.__name__} instead. {msg}') + else: + raise KeyError(f'Unexpected key {status}') + result = src_obj(*args, **kwargs) + return result + + return fun + + return _register + + def get_file_path(prefix, candidates) -> str: """Search for file in candidates. diff --git a/mmdeploy/version.py b/mmdeploy/version.py index f694e0d9de..821f44df9c 100644 --- a/mmdeploy/version.py +++ b/mmdeploy/version.py @@ -1,7 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. from typing import Tuple -__version__ = '0.3.0' +__version__ = '0.4.0' short_version = __version__ diff --git a/tests/test_apis/test_onnx2openvino.py b/tests/test_apis/test_onnx2openvino.py index 43fa623cd2..885d00b312 100644 --- a/tests/test_apis/test_onnx2openvino.py +++ b/tests/test_apis/test_onnx2openvino.py @@ -60,9 +60,28 @@ def get_outputs(pytorch_model, openvino_model_path, input, input_name, return output_pytorch, openvino_output +def get_base_deploy_cfg(): + deploy_cfg = mmcv.Config(dict(backend_config=dict(type='openvino'))) + return deploy_cfg + + +def get_deploy_cfg_with_mo_args(): + deploy_cfg = mmcv.Config( + dict( + backend_config=dict( + type='openvino', + mo_options=dict( + args={'--data_type': 'FP32'}, flags=['--disable_fusing' + ])))) + return deploy_cfg + + +@pytest.mark.parametrize('get_deploy_cfg', + [get_base_deploy_cfg, get_deploy_cfg_with_mo_args]) @backend_checker(Backend.OPENVINO) -def test_onnx2openvino(): - from mmdeploy.apis.openvino import get_output_model_file, onnx2openvino +def test_onnx2openvino(get_deploy_cfg): + from mmdeploy.apis.openvino import (get_mo_options_from_cfg, + get_output_model_file, onnx2openvino) pytorch_model = TestModel().eval() export_img = torch.rand([1, 3, 8, 8]) onnx_file = tempfile.NamedTemporaryFile(suffix='.onnx').name @@ -74,7 +93,10 @@ def test_onnx2openvino(): input_info = {input_name: export_img.shape} output_names = [output_name] openvino_dir = tempfile.TemporaryDirectory().name - onnx2openvino(input_info, output_names, onnx_file, openvino_dir) + deploy_cfg = get_deploy_cfg() + mo_options = get_mo_options_from_cfg(deploy_cfg) + onnx2openvino(input_info, output_names, onnx_file, openvino_dir, + mo_options) openvino_model_path = get_output_model_file(onnx_file, openvino_dir) assert osp.exists(openvino_model_path), \ 'The file (.xml) for OpenVINO IR has not been created.' diff --git a/tests/test_apis/test_torch2onnx.py b/tests/test_apis/test_torch2onnx.py index 16dc2b0d50..349a9c642a 100644 --- a/tests/test_apis/test_torch2onnx.py +++ b/tests/test_apis/test_torch2onnx.py @@ -53,9 +53,8 @@ def get_deploy_cfg(input_name, output_name, dynamic_axes): input_names=[input_name], output_names=[output_name], input_shape=None), - codebase_config=dict(type='mmedit', task=''), # useless - backend_config=dict(type='onnxruntime') # useless - )) + codebase_config=dict(type='mmedit', task=''), + backend_config=dict(type='onnxruntime'))) @pytest.mark.parametrize('input_name', [input_name]) diff --git a/tests/test_apis/test_torch2torchscript.py b/tests/test_apis/test_torch2torchscript.py new file mode 100644 index 0000000000..4bb1c5c998 --- /dev/null +++ b/tests/test_apis/test_torch2torchscript.py @@ -0,0 +1,87 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import importlib +import os.path as osp +import tempfile + +import mmcv +import pytest + +from mmdeploy.apis import torch2torchscript +from mmdeploy.utils import IR, Backend +from mmdeploy.utils.test import get_random_name + +ts_file = tempfile.NamedTemporaryFile(suffix='.pt').name +input_name = get_random_name() +output_name = get_random_name() + + +def get_deploy_cfg(input_name, output_name): + return mmcv.Config( + dict( + ir_config=dict( + type=IR.TORCHSCRIPT.value, + input_names=[input_name], + output_names=[output_name], + input_shape=None), + codebase_config=dict(type='mmedit', task='SuperResolution'), + backend_config=dict(type=Backend.TORCHSCRIPT.value))) + + +def get_model_cfg(): + return mmcv.Config( + dict( + model=dict( + pretrained=None, + type='BasicRestorer', + generator=dict( + type='RRDBNet', + in_channels=3, + out_channels=3, + mid_channels=64, + num_blocks=23, + growth_channels=32), + pixel_loss=dict( + type='L1Loss', loss_weight=1.0, reduction='mean')), + test_cfg=dict(metrics='PSNR'), + test_pipeline=[ + dict( + type='LoadImageFromFile', + io_backend='disk', + key='lq', + flag='unchanged'), + dict( + type='LoadImageFromFile', + io_backend='disk', + key='gt', + flag='unchanged'), + dict(type='RescaleToZeroOne', keys=['lq', 'gt']), + dict( + type='Normalize', + keys=['lq', 'gt'], + mean=[0, 0, 0], + std=[1, 1, 1], + to_rgb=True), + dict( + type='Collect', + keys=['lq', 'gt'], + meta_keys=['lq_path', 'lq_path']), + dict(type='ImageToTensor', keys=['lq', 'gt']) + ])) + + +@pytest.mark.parametrize('input_name', [input_name]) +@pytest.mark.parametrize('output_name', [output_name]) +@pytest.mark.skipif( + not importlib.util.find_spec('mmedit'), reason='requires mmedit') +def test_torch2torchscript(input_name, output_name): + import numpy as np + deploy_cfg = get_deploy_cfg(input_name, output_name) + torch2torchscript( + np.random.rand(8, 8, 3), + '', + ts_file, + deploy_cfg, + model_cfg=get_model_cfg(), + device='cpu') + + assert osp.exists(ts_file) diff --git a/tests/test_backend/test_wrapper.py b/tests/test_backend/test_wrapper.py index f42d915131..b177a2ee58 100644 --- a/tests/test_backend/test_wrapper.py +++ b/tests/test_backend/test_wrapper.py @@ -10,6 +10,7 @@ from mmdeploy.utils.test import check_backend onnx_file = tempfile.NamedTemporaryFile(suffix='.onnx').name +ts_file = tempfile.NamedTemporaryFile(suffix='.pt').name test_img = torch.rand(1, 3, 8, 8) output_names = ['output'] input_names = ['input'] @@ -44,6 +45,18 @@ def generate_onnx_file(): dynamic_axes=None) +@pytest.fixture(autouse=True, scope='module') +def generate_torchscript_file(): + import mmcv + + from mmdeploy.apis import torch2torchscript_impl + deploy_cfg = mmcv.Config( + {'backend_config': dict(type=Backend.TORCHSCRIPT.value)}) + with torch.no_grad(): + torch2torchscript_impl(model, torch.rand(1, 3, 8, 8), deploy_cfg, + ts_file) + + def onnx2backend(backend, onnx_file): if backend == Backend.TENSORRT: from mmdeploy.backend.tensorrt import (create_trt_engine, @@ -107,6 +120,11 @@ def create_wrapper(backend, model_files): from mmdeploy.backend.openvino import OpenVINOWrapper openvino_model = OpenVINOWrapper(model_files, output_names) return openvino_model + elif backend == Backend.TORCHSCRIPT: + from mmdeploy.backend.torchscript import TorchscriptWrapper + torchscript_model = TorchscriptWrapper( + model_files, input_names=input_names, output_names=output_names) + return torchscript_model else: raise NotImplementedError(f'Unknown backend type: {backend.value}') @@ -134,20 +152,26 @@ def run_wrapper(backend, wrapper, input): results = wrapper({'input': input})['output'] results = results.detach().cpu() return results + elif backend == Backend.TORCHSCRIPT: + results = wrapper({'input': input})['output'] + return results else: raise NotImplementedError(f'Unknown backend type: {backend.value}') ALL_BACKEND = [ Backend.TENSORRT, Backend.ONNXRUNTIME, Backend.PPLNN, Backend.NCNN, - Backend.OPENVINO + Backend.OPENVINO, Backend.TORCHSCRIPT ] @pytest.mark.parametrize('backend', ALL_BACKEND) def test_wrapper(backend): check_backend(backend, True) - model_files = onnx2backend(backend, onnx_file) + if backend == Backend.TORCHSCRIPT: + model_files = ts_file + else: + model_files = onnx2backend(backend, onnx_file) assert model_files is not None wrapper = create_wrapper(backend, model_files) assert wrapper is not None diff --git a/tests/test_codebase/test_mmdet/test_mmdet_models.py b/tests/test_codebase/test_mmdet/test_mmdet_models.py index b3c8d90098..b3a1a94173 100644 --- a/tests/test_codebase/test_mmdet/test_mmdet_models.py +++ b/tests/test_codebase/test_mmdet/test_mmdet_models.py @@ -11,6 +11,7 @@ from mmdeploy.codebase import import_codebase from mmdeploy.utils import Backend, Codebase +from mmdeploy.utils.config_utils import get_ir_config from mmdeploy.utils.test import (WrapModel, check_backend, get_model_outputs, get_rewrite_outputs) @@ -222,6 +223,7 @@ def test_l2norm_forward(backend_type): dict( backend_config=dict(type=backend_type.value), onnx_config=dict(input_shape=None))) + seed_everything(1234) feat = torch.rand(1, 16, s, s) model_outputs = [l2norm_neck.forward(feat)] wrapped_model = WrapModel(l2norm_neck, 'forward') @@ -544,7 +546,6 @@ def test_single_roi_extractor(backend_type: Backend): wrapped_model=single_roi_extractor, model_inputs=model_inputs, deploy_cfg=deploy_cfg) - if isinstance(backend_outputs, dict): backend_outputs = backend_outputs.values() for model_output, backend_output in zip(model_outputs[0], backend_outputs): @@ -1227,26 +1228,14 @@ def test_get_bboxes_of_vfnet_head(backend_type: Backend): assert rewrite_outputs is not None -@pytest.mark.parametrize('backend_type', - [Backend.ONNXRUNTIME, Backend.OPENVINO]) -def test_base_dense_head_get_bboxes(backend_type: Backend): - """Test get_bboxes rewrite of base dense head.""" - check_backend(backend_type) - anchor_head = get_anchor_head_model() - anchor_head.cpu().eval() - s = 128 - img_metas = [{ - 'scale_factor': np.ones(4), - 'pad_shape': (s, s, 3), - 'img_shape': (s, s, 3) - }] - - output_names = ['dets', 'labels'] - - deploy_cfg = mmcv.Config( +def get_deploy_cfg(backend_type: Backend, ir_type: str): + return mmcv.Config( dict( backend_config=dict(type=backend_type.value), - onnx_config=dict(output_names=output_names, input_shape=None), + onnx_config=dict( + type=ir_type, + output_names=['dets', 'labels'], + input_shape=None), codebase_config=dict( type='mmdet', task='ObjectDetection', @@ -1259,6 +1248,26 @@ def test_base_dense_head_get_bboxes(backend_type: Backend): background_label_id=-1, )))) + +@pytest.mark.parametrize('backend_type, ir_type', + [(Backend.ONNXRUNTIME, 'onnx'), + (Backend.OPENVINO, 'onnx'), + (Backend.TORCHSCRIPT, 'torchscript')]) +def test_base_dense_head_get_bboxes(backend_type: Backend, ir_type: str): + """Test get_bboxes rewrite of base dense head.""" + check_backend(backend_type) + anchor_head = get_anchor_head_model() + anchor_head.cpu().eval() + s = 128 + img_metas = [{ + 'scale_factor': np.ones(4), + 'pad_shape': (s, s, 3), + 'img_shape': (s, s, 3) + }] + + deploy_cfg = get_deploy_cfg(backend_type, ir_type) + output_names = get_ir_config(deploy_cfg).get('output_names', None) + # the cls_score's size: (1, 36, 32, 32), (1, 36, 16, 16), # (1, 36, 8, 8), (1, 36, 4, 4), (1, 36, 2, 2). # the bboxes's size: (1, 36, 32, 32), (1, 36, 16, 16), diff --git a/tests/test_codebase/test_mmdet/test_mmdet_utils.py b/tests/test_codebase/test_mmdet/test_mmdet_utils.py index f80c28f6d9..13db44a010 100644 --- a/tests/test_codebase/test_mmdet/test_mmdet_utils.py +++ b/tests/test_codebase/test_mmdet/test_mmdet_utils.py @@ -5,7 +5,8 @@ from mmdeploy.codebase import import_codebase from mmdeploy.codebase.mmdet import (clip_bboxes, get_post_processing_params, - pad_with_value) + pad_with_value, + pad_with_value_if_necessary) from mmdeploy.utils import Codebase import_codebase(Codebase.MMDET) @@ -29,6 +30,15 @@ def test_pad_with_value(): assert np.allclose(padded_x.sum(), x.sum(), rtol=1e-03, atol=1e-05) +def test_pad_with_value_if_necessary(): + x = torch.rand(3, 2) + padded_x = pad_with_value_if_necessary( + x, pad_dim=1, pad_size=4, pad_value=0) + assert np.allclose( + padded_x.shape, torch.Size([3, 2]), rtol=1e-03, atol=1e-05) + assert np.allclose(padded_x.sum(), x.sum(), rtol=1e-03, atol=1e-05) + + config_with_mmdet_params = mmcv.Config( dict( codebase_config=dict( diff --git a/tests/test_codebase/test_mmdet3d/data/kitti/kitti_000008.bin b/tests/test_codebase/test_mmdet3d/data/kitti/kitti_000008.bin new file mode 100644 index 0000000000..24cefd327f Binary files /dev/null and b/tests/test_codebase/test_mmdet3d/data/kitti/kitti_000008.bin differ diff --git a/tests/test_codebase/test_mmdet3d/data/kitti/kitti_infos_val.pkl b/tests/test_codebase/test_mmdet3d/data/kitti/kitti_infos_val.pkl new file mode 100644 index 0000000000..f2acbd3dcc Binary files /dev/null and b/tests/test_codebase/test_mmdet3d/data/kitti/kitti_infos_val.pkl differ diff --git a/tests/test_codebase/test_mmdet3d/data/model_cfg.py b/tests/test_codebase/test_mmdet3d/data/model_cfg.py new file mode 100644 index 0000000000..0c46aad4b8 --- /dev/null +++ b/tests/test_codebase/test_mmdet3d/data/model_cfg.py @@ -0,0 +1,230 @@ +# Copyright (c) OpenMMLab. All rights reserved. +voxel_size = [0.16, 0.16, 4] + +model = dict( + type='VoxelNet', + voxel_layer=dict( + max_num_points=32, # max_points_per_voxel + point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1], + voxel_size=voxel_size, + max_voxels=(16000, 40000) # (training, testing) max_voxels + ), + voxel_encoder=dict( + type='PillarFeatureNet', + in_channels=4, + feat_channels=[64], + with_distance=False, + voxel_size=voxel_size, + point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]), + middle_encoder=dict( + type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]), + backbone=dict( + type='SECOND', + in_channels=64, + layer_nums=[3, 5, 5], + layer_strides=[2, 2, 2], + out_channels=[64, 128, 256]), + neck=dict( + type='SECONDFPN', + in_channels=[64, 128, 256], + upsample_strides=[1, 2, 4], + out_channels=[128, 128, 128]), + test_cfg=dict( + use_rotate_nms=True, + nms_across_levels=False, + nms_thr=0.01, + score_thr=0.1, + min_bbox_size=0, + nms_pre=100, + max_num=50), + bbox_head=dict( + type='Anchor3DHead', + num_classes=3, + in_channels=384, + feat_channels=384, + use_direction_classifier=True, + anchor_generator=dict( + type='AlignedAnchor3DRangeGenerator', + ranges=[ + [0, -39.68, -0.6, 69.12, 39.68, -0.6], + [0, -39.68, -0.6, 69.12, 39.68, -0.6], + [0, -39.68, -1.78, 69.12, 39.68, -1.78], + ], + sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], + rotations=[0, 1.57], + reshape_out=False), + diff_rad_by_sin=True, + bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), + loss_dir=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2))) +point_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1] +# dataset settings +data_root = 'tests/test_codebase/test_mmdet3d/data/kitti/' +dataset_type = 'KittiDataset' +class_names = ['Pedestrian', 'Cyclist', 'Car'] +input_modality = dict(use_lidar=True, use_camera=False) +# PointPillars adopted a different sampling strategies among classes +db_sampler = dict( + data_root=data_root, + info_path=data_root + 'kitti_dbinfos_train.pkl', + rate=1.0, + prepare=dict( + filter_by_difficulty=[-1], + filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)), + classes=class_names, + sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10)) +train_pipeline = [ + dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4), + dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), + dict(type='ObjectSample', db_sampler=db_sampler), + dict( + type='ObjectNoise', + num_try=100, + translation_std=[0.25, 0.25, 0.25], + global_rot_range=[0.0, 0.0], + rot_range=[-0.15707963267, 0.15707963267]), + dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), + dict( + type='GlobalRotScaleTrans', + rot_range=[-0.78539816, 0.78539816], + scale_ratio_range=[0.95, 1.05]), + dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), + dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), + dict(type='PointShuffle'), + dict(type='DefaultFormatBundle3D', class_names=class_names), + dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) +] +test_pipeline = [ + dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4), + dict( + type='MultiScaleFlipAug3D', + img_scale=(1333, 800), + pts_scale_ratio=1, + flip=False, + transforms=[ + dict( + type='GlobalRotScaleTrans', + rot_range=[0, 0], + scale_ratio_range=[1., 1.], + translation_std=[0, 0, 0]), + dict(type='RandomFlip3D'), + dict( + type='PointsRangeFilter', point_cloud_range=point_cloud_range), + dict( + type='DefaultFormatBundle3D', + class_names=class_names, + with_label=False), + dict(type='Collect3D', keys=['points']) + ]) +] +data = dict( + train=dict( + dataset=dict( + pipeline=train_pipeline, classes=class_names, + box_type_3d='LiDAR')), + val=dict(pipeline=test_pipeline, classes=class_names, box_type_3d='LiDAR'), + test=dict( + type=dataset_type, + data_root=data_root, + ann_file=data_root + 'kitti_infos_val.pkl', + split='training', + pts_prefix='velodyne_reduced', + pipeline=test_pipeline, + modality=input_modality, + classes=class_names, + test_mode=True, + box_type_3d='LiDAR')) + +point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0] +centerpoint_model = dict( + pts_voxel_layer=dict( + max_num_points=20, + voxel_size=voxel_size, + max_voxels=(30000, 40000), + point_cloud_range=point_cloud_range), + pts_voxel_encoder=dict( + type='PillarFeatureNet', + in_channels=4, + feat_channels=[64], + with_distance=False, + voxel_size=(0.2, 0.2, 8), + norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), + legacy=False), + pts_middle_encoder=dict( + type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)), + pts_backbone=dict( + type='SECOND', + in_channels=64, + out_channels=[64, 128, 256], + layer_nums=[3, 5, 5], + layer_strides=[2, 2, 2], + norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), + conv_cfg=dict(type='Conv2d', bias=False)), + pts_neck=dict( + type='SECONDFPN', + in_channels=[64, 128, 256], + out_channels=[128, 128, 128], + upsample_strides=[0.5, 1, 2], + norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), + upsample_cfg=dict(type='deconv', bias=False), + use_conv_for_no_stride=True), + pts_bbox_head=dict( + type='CenterHead', + in_channels=sum([128, 128, 128]), + tasks=[ + dict(num_class=1, class_names=['car']), + dict(num_class=2, class_names=['truck', 'construction_vehicle']), + dict(num_class=2, class_names=['bus', 'trailer']), + dict(num_class=1, class_names=['barrier']), + dict(num_class=2, class_names=['motorcycle', 'bicycle']), + dict(num_class=2, class_names=['pedestrian', 'traffic_cone']), + ], + common_heads=dict( + reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)), + share_conv_channel=64, + bbox_coder=dict( + type='CenterPointBBoxCoder', + post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], + pc_range=point_cloud_range[:2], + max_num=500, + score_threshold=0.1, + out_size_factor=4, + voxel_size=voxel_size[:2], + code_size=9), + separate_head=dict( + type='SeparateHead', init_bias=-2.19, final_kernel=3), + loss_cls=dict(type='GaussianFocalLoss', reduction='mean'), + loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25), + norm_bbox=True), + # model training and testing settings + train_cfg=dict( + pts=dict( + grid_size=[512, 512, 1], + voxel_size=voxel_size, + out_size_factor=4, + dense_reg=1, + gaussian_overlap=0.1, + max_objs=500, + min_radius=2, + code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])), + test_cfg=dict( + pts=dict( + post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], + max_per_img=500, + max_pool_nms=False, + min_radius=[4, 12, 10, 1, 0.85, 0.175], + pc_range=point_cloud_range[:2], + score_threshold=0.1, + out_size_factor=4, + voxel_size=voxel_size[:2], + nms_type='circle', + pre_max_size=1000, + post_max_size=83, + nms_thr=0.2))) diff --git a/tests/test_codebase/test_mmdet3d/test_mmdet3d_models.py b/tests/test_codebase/test_mmdet3d/test_mmdet3d_models.py new file mode 100644 index 0000000000..afddebf1ab --- /dev/null +++ b/tests/test_codebase/test_mmdet3d/test_mmdet3d_models.py @@ -0,0 +1,160 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import mmcv +import numpy as np +import pytest +import torch + +from mmdeploy.codebase import import_codebase +from mmdeploy.utils import Backend, Codebase, Task, load_config +from mmdeploy.utils.test import WrapModel, check_backend, get_rewrite_outputs + +try: + import_codebase(Codebase.MMDET3D) +except ImportError: + pytest.skip( + f'{Codebase.MMDET3D} is not installed.', allow_module_level=True) +model_cfg = load_config( + 'tests/test_codebase/test_mmdet3d/data/model_cfg.py')[0] + + +def get_pillar_encoder(): + from mmdet3d.models.voxel_encoders import PillarFeatureNet + model = PillarFeatureNet( + in_channels=4, + feat_channels=(64, ), + with_distance=False, + with_cluster_center=True, + with_voxel_center=True, + voxel_size=(0.2, 0.2, 4), + point_cloud_range=(0, -40, -3, 70.4, 40, 1), + norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), + mode='max') + model.requires_grad_(False) + return model + + +def get_pointpillars_scatter(): + from mmdet3d.models.middle_encoders import PointPillarsScatter + model = PointPillarsScatter(in_channels=64, output_shape=(16, 16)) + model.requires_grad_(False) + return model + + +@pytest.mark.parametrize('backend_type', [Backend.ONNXRUNTIME]) +def test_pillar_encoder(backend_type: Backend): + check_backend(backend_type, True) + model = get_pillar_encoder() + model.cpu().eval() + + deploy_cfg = mmcv.Config( + dict( + backend_config=dict(type=backend_type.value), + onnx_config=dict( + input_shape=None, + input_names=['features', 'num_points', 'coors'], + output_names=['outputs']), + codebase_config=dict( + type=Codebase.MMDET3D.value, task=Task.VOXEL_DETECTION.value))) + features = torch.rand(3945, 32, 4) * 100 + num_points = torch.randint(0, 32, (3945, ), dtype=torch.int32) + coors = torch.randint(0, 10, (3945, 4), dtype=torch.int32) + model_outputs = [model.forward(features, num_points, coors)] + wrapped_model = WrapModel(model, 'forward') + rewrite_inputs = { + 'features': features, + 'num_points': num_points, + 'coors': coors + } + rewrite_outputs, is_backend_output = get_rewrite_outputs( + wrapped_model=wrapped_model, + model_inputs=rewrite_inputs, + deploy_cfg=deploy_cfg) + if isinstance(rewrite_outputs, dict): + rewrite_outputs = rewrite_outputs['output'] + for model_output, rewrite_output in zip(model_outputs, rewrite_outputs): + if isinstance(rewrite_output, torch.Tensor): + rewrite_output = rewrite_output.cpu().numpy() + assert np.allclose( + model_output.shape, rewrite_output.shape, rtol=1e-03, atol=1e-03) + + +@pytest.mark.parametrize('backend_type', [Backend.ONNXRUNTIME]) +def test_pointpillars_scatter(backend_type: Backend): + check_backend(backend_type, True) + model = get_pointpillars_scatter() + model.cpu().eval() + + deploy_cfg = mmcv.Config( + dict( + backend_config=dict(type=backend_type.value), + onnx_config=dict( + input_shape=None, + input_names=['voxel_features', 'coors'], + output_names=['outputs']), + codebase_config=dict( + type=Codebase.MMDET3D.value, task=Task.VOXEL_DETECTION.value))) + voxel_features = torch.rand(16 * 16, 64) * 100 + coors = torch.randint(0, 10, (16 * 16, 4), dtype=torch.int32) + model_outputs = [model.forward_batch(voxel_features, coors, 1)] + wrapped_model = WrapModel(model, 'forward_batch') + rewrite_inputs = {'voxel_features': voxel_features, 'coors': coors} + rewrite_outputs, is_backend_output = get_rewrite_outputs( + wrapped_model=wrapped_model, + model_inputs=rewrite_inputs, + deploy_cfg=deploy_cfg) + if isinstance(rewrite_outputs, dict): + rewrite_outputs = rewrite_outputs['output'] + for model_output, rewrite_output in zip(model_outputs, rewrite_outputs): + if isinstance(rewrite_output, torch.Tensor): + rewrite_output = rewrite_output.cpu().numpy() + assert np.allclose( + model_output.shape, rewrite_output.shape, rtol=1e-03, atol=1e-03) + + +def get_centerpoint(): + from mmdet3d.models.detectors.centerpoint import CenterPoint + + model = CenterPoint(**model_cfg.centerpoint_model) + model.requires_grad_(False) + return model + + +def get_centerpoint_head(): + from mmdet3d.models import builder + model_cfg.centerpoint_model.pts_bbox_head.test_cfg = model_cfg.\ + centerpoint_model.test_cfg + head = builder.build_head(model_cfg.centerpoint_model.pts_bbox_head) + head.requires_grad_(False) + return head + + +@pytest.mark.parametrize('backend_type', [Backend.ONNXRUNTIME]) +def test_centerpoint(backend_type: Backend): + from mmdeploy.codebase.mmdet3d.deploy.voxel_detection import VoxelDetection + from mmdeploy.core import RewriterContext + check_backend(backend_type, True) + model = get_centerpoint() + model.cpu().eval() + deploy_cfg = mmcv.Config( + dict( + backend_config=dict(type=backend_type.value), + onnx_config=dict( + input_shape=None, + opset_version=11, + input_names=['voxels', 'num_points', 'coors'], + output_names=['outputs']), + codebase_config=dict( + type=Codebase.MMDET3D.value, task=Task.VOXEL_DETECTION.value))) + voxeldetection = VoxelDetection(model_cfg, deploy_cfg, 'cpu') + inputs, data = voxeldetection.create_input( + 'tests/test_codebase/test_mmdet3d/data/kitti/kitti_000008.bin') + + with RewriterContext( + cfg=deploy_cfg, + backend=deploy_cfg.backend_config.type, + opset=deploy_cfg.onnx_config.opset_version): + outputs = model.forward(*data) + head = get_centerpoint_head() + rewrite_outputs = head.get_bboxes(*[[i] for i in outputs], + inputs['img_metas'][0]) + assert rewrite_outputs is not None diff --git a/tests/test_codebase/test_mmdet3d/test_voxel_detection.py b/tests/test_codebase/test_mmdet3d/test_voxel_detection.py new file mode 100644 index 0000000000..aec5c5901c --- /dev/null +++ b/tests/test_codebase/test_mmdet3d/test_voxel_detection.py @@ -0,0 +1,152 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +from tempfile import NamedTemporaryFile, TemporaryDirectory + +import mmcv +import pytest +import torch +from torch.utils.data import DataLoader +from torch.utils.data.dataset import Dataset + +import mmdeploy.backend.onnxruntime as ort_apis +from mmdeploy.apis import build_task_processor +from mmdeploy.codebase import import_codebase +from mmdeploy.utils import Codebase, load_config +from mmdeploy.utils.test import DummyModel, SwitchBackendWrapper + +try: + import_codebase(Codebase.MMDET3D) +except ImportError: + pytest.skip( + f'{Codebase.MMDET3D} is not installed.', allow_module_level=True) + +model_cfg_path = 'tests/test_codebase/test_mmdet3d/data/model_cfg.py' +pcd_path = 'tests/test_codebase/test_mmdet3d/data/kitti/kitti_000008.bin' +model_cfg = load_config(model_cfg_path)[0] +deploy_cfg = mmcv.Config( + dict( + backend_config=dict(type='onnxruntime'), + codebase_config=dict(type='mmdet3d', task='VoxelDetection'), + onnx_config=dict( + type='onnx', + export_params=True, + keep_initializers_as_inputs=False, + opset_version=11, + input_shape=None, + input_names=['voxels', 'num_points', 'coors'], + output_names=['scores', 'bbox_preds', 'dir_scores']))) +onnx_file = NamedTemporaryFile(suffix='.onnx').name +task_processor = build_task_processor(model_cfg, deploy_cfg, 'cpu') + + +def test_init_pytorch_model(): + from mmdet3d.models import Base3DDetector + model = task_processor.init_pytorch_model(None) + assert isinstance(model, Base3DDetector) + + +@pytest.fixture +def backend_model(): + from mmdeploy.backend.onnxruntime import ORTWrapper + ort_apis.__dict__.update({'ORTWrapper': ORTWrapper}) + wrapper = SwitchBackendWrapper(ORTWrapper) + wrapper.set( + outputs={ + 'scores': torch.rand(1, 18, 32, 32), + 'bbox_preds': torch.rand(1, 42, 32, 32), + 'dir_scores': torch.rand(1, 12, 32, 32) + }) + + yield task_processor.init_backend_model(['']) + + wrapper.recover() + + +def test_init_backend_model(backend_model): + from mmdeploy.codebase.mmdet3d.deploy.voxel_detection_model import \ + VoxelDetectionModel + assert isinstance(backend_model, VoxelDetectionModel) + + +@pytest.mark.parametrize('device', ['cpu', 'cuda:0']) +def test_create_input(device): + if device == 'cuda:0' and not torch.cuda.is_available(): + pytest.skip('cuda is not available') + original_device = task_processor.device + task_processor.device = device + inputs = task_processor.create_input(pcd_path) + assert len(inputs) == 2 + task_processor.device = original_device + + +@pytest.mark.skipif( + reason='Only support GPU test', condition=not torch.cuda.is_available()) +def test_run_inference(backend_model): + task_processor.device = 'cuda:0' + torch_model = task_processor.init_pytorch_model(None) + input_dict, _ = task_processor.create_input(pcd_path) + torch_results = task_processor.run_inference(torch_model, input_dict) + backend_results = task_processor.run_inference(backend_model, input_dict) + assert torch_results is not None + assert backend_results is not None + assert len(torch_results[0]) == len(backend_results[0]) + task_processor.device = 'cpu' + + +@pytest.mark.skipif( + reason='Only support GPU test', condition=not torch.cuda.is_available()) +def test_visualize(): + task_processor.device = 'cuda:0' + input_dict, _ = task_processor.create_input(pcd_path) + torch_model = task_processor.init_pytorch_model(None) + results = task_processor.run_inference(torch_model, input_dict) + with TemporaryDirectory() as dir: + filename = dir + 'tmp.bin' + task_processor.visualize(torch_model, pcd_path, results[0], filename, + 'test', False) + assert os.path.exists(filename) + task_processor.device = 'cpu' + + +def test_build_dataset_and_dataloader(): + dataset = task_processor.build_dataset( + dataset_cfg=model_cfg, dataset_type='test') + assert isinstance(dataset, Dataset), 'Failed to build dataset' + dataloader = task_processor.build_dataloader(dataset, 1, 1) + assert isinstance(dataloader, DataLoader), 'Failed to build dataloader' + + +@pytest.mark.skipif( + reason='Only support GPU test', condition=not torch.cuda.is_available()) +def test_single_gpu_test_and_evaluate(): + from mmcv.parallel import MMDataParallel + task_processor.device = 'cuda:0' + + class DummyDataset(Dataset): + + def __getitem__(self, index): + return 0 + + def __len__(self): + return 0 + + def evaluate(self, *args, **kwargs): + return 0 + + def format_results(self, *args, **kwargs): + return 0 + + dataset = DummyDataset() + # Prepare dataloader + dataloader = DataLoader(dataset) + + # Prepare dummy model + model = DummyModel(outputs=[torch.rand([1, 10, 5]), torch.rand([1, 10])]) + model = MMDataParallel(model, device_ids=[0]) + # Run test + outputs = task_processor.single_gpu_test(model, dataloader) + assert isinstance(outputs, list) + output_file = NamedTemporaryFile(suffix='.pkl').name + task_processor.evaluate_outputs( + model_cfg, outputs, dataset, 'bbox', out=output_file, format_only=True) + task_processor.device = 'cpu' diff --git a/tests/test_codebase/test_mmdet3d/test_voxel_detection_model.py b/tests/test_codebase/test_mmdet3d/test_voxel_detection_model.py new file mode 100644 index 0000000000..5946f7b762 --- /dev/null +++ b/tests/test_codebase/test_mmdet3d/test_voxel_detection_model.py @@ -0,0 +1,99 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp + +import mmcv +import pytest +import torch + +import mmdeploy.backend.onnxruntime as ort_apis +from mmdeploy.codebase import import_codebase +from mmdeploy.utils import Backend, Codebase +from mmdeploy.utils.test import SwitchBackendWrapper, backend_checker + +try: + import_codebase(Codebase.MMDET3D) +except ImportError: + pytest.skip( + f'{Codebase.MMDET3D} is not installed.', allow_module_level=True) +from mmdeploy.codebase.mmdet3d.deploy.voxel_detection import VoxelDetection + +pcd_path = 'tests/test_codebase/test_mmdet3d/data/kitti/kitti_000008.bin' +model_cfg = 'tests/test_codebase/test_mmdet3d/data/model_cfg.py' + + +@backend_checker(Backend.ONNXRUNTIME) +class TestVoxelDetectionModel: + + @classmethod + def setup_class(cls): + # force add backend wrapper regardless of plugins + from mmdeploy.backend.onnxruntime import ORTWrapper + ort_apis.__dict__.update({'ORTWrapper': ORTWrapper}) + + # simplify backend inference + cls.wrapper = SwitchBackendWrapper(ORTWrapper) + cls.outputs = { + 'scores': torch.rand(1, 18, 32, 32), + 'bbox_preds': torch.rand(1, 42, 32, 32), + 'dir_scores': torch.rand(1, 12, 32, 32) + } + cls.wrapper.set(outputs=cls.outputs) + deploy_cfg = mmcv.Config({ + 'onnx_config': { + 'input_names': ['voxels', 'num_points', 'coors'], + 'output_names': ['scores', 'bbox_preds', 'dir_scores'], + 'opset_version': 11 + }, + 'backend_config': { + 'type': 'tensorrt' + } + }) + + from mmdeploy.utils import load_config + model_cfg_path = 'tests/test_codebase/test_mmdet3d/data/model_cfg.py' + model_cfg = load_config(model_cfg_path)[0] + from mmdeploy.codebase.mmdet3d.deploy.voxel_detection_model import \ + VoxelDetectionModel + cls.end2end_model = VoxelDetectionModel( + Backend.ONNXRUNTIME, [''], + device='cuda', + deploy_cfg=deploy_cfg, + model_cfg=model_cfg) + + @pytest.mark.skipif( + reason='Only support GPU test', + condition=not torch.cuda.is_available()) + def test_forward_and_show_result(self): + data = VoxelDetection.read_pcd_file(pcd_path, model_cfg, 'cuda') + results = self.end2end_model.forward(data['points'], data['img_metas']) + assert results is not None + from tempfile import TemporaryDirectory + with TemporaryDirectory() as dir: + self.end2end_model.show_result( + data, results, dir, 'backend_output.bin', show=False) + assert osp.exists(dir + '/backend_output.bin') + + +@backend_checker(Backend.ONNXRUNTIME) +def test_build_voxel_detection_model(): + from mmdeploy.utils import load_config + model_cfg_path = 'tests/test_codebase/test_mmdet3d/data/model_cfg.py' + model_cfg = load_config(model_cfg_path)[0] + deploy_cfg = mmcv.Config( + dict( + backend_config=dict(type=Backend.ONNXRUNTIME.value), + onnx_config=dict( + output_names=['scores', 'bbox_preds', 'dir_scores']), + codebase_config=dict(type=Codebase.MMDET3D.value))) + + from mmdeploy.backend.onnxruntime import ORTWrapper + ort_apis.__dict__.update({'ORTWrapper': ORTWrapper}) + + # simplify backend inference + with SwitchBackendWrapper(ORTWrapper) as wrapper: + wrapper.set(model_cfg=model_cfg, deploy_cfg=deploy_cfg) + from mmdeploy.codebase.mmdet3d.deploy.voxel_detection_model import ( + VoxelDetectionModel, build_voxel_detection_model) + voxeldetector = build_voxel_detection_model([''], model_cfg, + deploy_cfg, 'cpu') + assert isinstance(voxeldetector, VoxelDetectionModel) diff --git a/tests/test_codebase/test_mmseg/test_mmseg_models.py b/tests/test_codebase/test_mmseg/test_mmseg_models.py index 71dbf86187..dfcd5b4cdb 100644 --- a/tests/test_codebase/test_mmseg/test_mmseg_models.py +++ b/tests/test_codebase/test_mmseg/test_mmseg_models.py @@ -9,7 +9,7 @@ from mmseg.models.decode_heads.decode_head import BaseDecodeHead from mmdeploy.codebase import import_codebase -from mmdeploy.utils import Backend, Codebase +from mmdeploy.utils import Backend, Codebase, Task from mmdeploy.utils.test import (WrapModel, check_backend, get_model_outputs, get_rewrite_outputs) @@ -232,3 +232,85 @@ def test_psphead_forward(backend): rewrite_outputs = rewrite_outputs.to(model_outputs).reshape( model_outputs.shape) assert torch.allclose(rewrite_outputs, model_outputs, rtol=1, atol=1) + + +@pytest.mark.parametrize('backend', [Backend.ONNXRUNTIME]) +def test_emamodule_forward(backend): + check_backend(backend) + from mmseg.models.decode_heads.ema_head import EMAModule + head = EMAModule(8, 2, 2, 1.0).eval() + + deploy_cfg = mmcv.Config( + dict( + backend_config=dict(type=backend.value), + onnx_config=dict( + output_names=['result'], input_shape=(1, 8, 16, 16)), + codebase_config=dict(type='mmseg', task='Segmentation'))) + feats = torch.randn(1, 8, 16, 16) + model_inputs = {'feats': feats} + with torch.no_grad(): + model_outputs = get_model_outputs(head, 'forward', model_inputs) + wrapped_model = WrapModel(head, 'forward') + rewrite_outputs, is_backend_output = get_rewrite_outputs( + wrapped_model=wrapped_model, + model_inputs=model_inputs, + deploy_cfg=deploy_cfg) + if is_backend_output: + rewrite_outputs = rewrite_outputs[0] + rewrite_outputs = rewrite_outputs.to(model_outputs).reshape( + model_outputs.shape) + assert torch.allclose( + rewrite_outputs, model_outputs, rtol=1e-03, atol=1e-05) + + +@pytest.mark.parametrize('is_dynamic_shape', [True, False]) +@pytest.mark.parametrize('backend', [Backend.ONNXRUNTIME]) +def test_upconvblock_forward(backend, is_dynamic_shape): + check_backend(backend) + from mmseg.models.backbones.unet import BasicConvBlock + from mmseg.models.utils import UpConvBlock + + head = UpConvBlock(BasicConvBlock, 16, 8, 8).eval() + dynamic_axes = { + 'x': { + 0: 'b', + 2: 'h', + 3: 'w' + }, + 'skip': { + 0: 'b', + 2: 'h', + 3: 'w' + }, + 'output': { + 0: 'b', + 2: 'h', + 3: 'w' + }, + } if is_dynamic_shape else None + deploy_cfg = mmcv.Config( + dict( + backend_config=dict(type=backend.value), + onnx_config=dict( + input_names=['skip', 'x'], + output_names=['output'], + dynamic_axes=dynamic_axes), + codebase_config=dict( + type=Codebase.MMSEG.value, task=Task.SEGMENTATION.value))) + x = torch.randn(1, 16, 16, 16) + skip = torch.randn(1, 8, 32, 32) + model_inputs = {'x': x, 'skip': skip} + with torch.no_grad(): + model_outputs = get_model_outputs(head, 'forward', model_inputs) + + wrapped_model = WrapModel(head, 'forward') + rewrite_outputs, is_backend_output = get_rewrite_outputs( + wrapped_model=wrapped_model, + model_inputs=model_inputs, + deploy_cfg=deploy_cfg) + if is_backend_output: + rewrite_outputs = rewrite_outputs[0] + rewrite_outputs = rewrite_outputs.to(model_outputs).reshape( + model_outputs.shape) + assert torch.allclose( + rewrite_outputs, model_outputs, rtol=1e-03, atol=1e-05) diff --git a/tests/test_core/test_function_rewriter.py b/tests/test_core/test_function_rewriter.py index b9b43fb688..97a814e929 100644 --- a/tests/test_core/test_function_rewriter.py +++ b/tests/test_core/test_function_rewriter.py @@ -3,7 +3,8 @@ from mmdeploy.core import FUNCTION_REWRITER, RewriterContext from mmdeploy.core.rewriters.function_rewriter import FunctionRewriter -from mmdeploy.utils.constants import Backend +from mmdeploy.core.rewriters.rewriter_utils import collect_env +from mmdeploy.utils.constants import IR, Backend def test_function_rewriter(): @@ -97,7 +98,6 @@ def test_rewrite_homonymic_functions(self): assert package.module.func() == 1 function_rewriter = FunctionRewriter() - function_rewriter.add_backend(Backend.NCNN.value) @function_rewriter.register_rewriter(func_name=path1) def func_2(ctx): @@ -108,7 +108,7 @@ def func_2(ctx): def func_3(ctx): return 3 - function_rewriter.enter(backend=Backend.NCNN.value) + function_rewriter.enter(env=collect_env(Backend.NCNN, ir=IR.DEFAULT)) # This is a feature assert package.func() == 2 assert package.module.func() == 3 @@ -118,7 +118,6 @@ def func_3(ctx): assert package.module.func() == 1 function_rewriter2 = FunctionRewriter() - function_rewriter2.add_backend(Backend.NCNN.value) @function_rewriter2.register_rewriter( func_name=path1, backend=Backend.NCNN.value) @@ -129,7 +128,7 @@ def func_4(ctx): def func_5(ctx): return 5 - function_rewriter2.enter(backend=Backend.NCNN.value) + function_rewriter2.enter(env=collect_env(Backend.NCNN, ir=IR.DEFAULT)) # This is a feature assert package.func() == 4 assert package.module.func() == 5 @@ -146,7 +145,6 @@ def test_rewrite_homonymic_methods(self): c = package.C() function_rewriter = FunctionRewriter() - function_rewriter.add_backend(Backend.NCNN.value) assert c.method() == 1 @@ -159,14 +157,13 @@ def func_2(ctx, self): def func_3(ctx, self): return 3 - function_rewriter.enter(backend=Backend.NCNN.value) + function_rewriter.enter(env=collect_env(Backend.NCNN, ir=IR.DEFAULT)) assert c.method() == 3 function_rewriter.exit() assert c.method() == 1 function_rewriter2 = FunctionRewriter() - function_rewriter2.add_backend(Backend.NCNN.value) @function_rewriter2.register_rewriter( func_name=path1, backend=Backend.NCNN.value) @@ -177,7 +174,7 @@ def func_4(ctx, self): def func_5(ctx, self): return 5 - function_rewriter2.enter(backend=Backend.NCNN.value) + function_rewriter2.enter(env=collect_env(Backend.NCNN, ir=IR.DEFAULT)) assert c.method() == 4 function_rewriter2.exit() @@ -196,7 +193,6 @@ def test_rewrite_derived_methods(): assert derived_obj.method() == 1 function_rewriter = FunctionRewriter() - function_rewriter.add_backend(Backend.NCNN.value) @function_rewriter.register_rewriter(func_name=path1) def func_2(ctx, self): @@ -207,12 +203,12 @@ def func_2(ctx, self): def func_3(ctx, self): return 3 - function_rewriter.enter() + function_rewriter.enter(env=collect_env(Backend.DEFAULT, ir=IR.DEFAULT)) assert base_obj.method() == 2 assert derived_obj.method() == 2 function_rewriter.exit() - function_rewriter.enter(backend=Backend.NCNN.value) + function_rewriter.enter(env=collect_env(Backend.NCNN, ir=IR.DEFAULT)) assert base_obj.method() == 2 assert derived_obj.method() == 3 function_rewriter.exit() @@ -221,7 +217,7 @@ def func_3(ctx, self): assert derived_obj.method() == 1 # Check if the recovery is correct - function_rewriter.enter() + function_rewriter.enter(env=collect_env(Backend.DEFAULT, ir=IR.DEFAULT)) assert base_obj.method() == 2 assert derived_obj.method() == 2 function_rewriter.exit() diff --git a/tests/test_core/test_mark.py b/tests/test_core/test_mark.py index 5c0990f90f..fb85472ee1 100644 --- a/tests/test_core/test_mark.py +++ b/tests/test_core/test_mark.py @@ -4,8 +4,9 @@ import onnx import torch -from mmdeploy.core import mark +from mmdeploy.core import RewriterContext, mark from mmdeploy.core.optimizers import attribute_to_dict +from mmdeploy.utils.constants import IR, Backend output_file = tempfile.NamedTemporaryFile(suffix='.onnx').name @@ -68,3 +69,9 @@ def forward(self, x, y): type='output', name='c', shape=[2, 3, 4]) + + with RewriterContext( + cfg=None, backend=Backend.TORCHSCRIPT.value, + ir=IR.TORCHSCRIPT), torch.no_grad(), torch.jit.optimized_execution( + True): + torch.jit.trace(model, (x, y)) diff --git a/tests/test_core/test_rewriter_registry.py b/tests/test_core/test_rewriter_registry.py deleted file mode 100644 index b577d02623..0000000000 --- a/tests/test_core/test_rewriter_registry.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import pytest - -from mmdeploy.core.rewriters.rewriter_utils import RewriterRegistry -from mmdeploy.utils.constants import Backend - - -def test_check_backend(): - with pytest.raises(Exception): - registry = RewriterRegistry() - registry._check_backend(Backend.ONNXRUNTIME.value) - - -def test_add_backend(): - registry = RewriterRegistry() - registry.add_backend(Backend.ONNXRUNTIME.value) - assert Backend.ONNXRUNTIME.value in registry._rewrite_records - assert Backend.DEFAULT.value in registry._rewrite_records - assert Backend.TENSORRT.value not in registry._rewrite_records - - -def test_register_object(): - registry = RewriterRegistry() - - @registry.register_object('add', backend=Backend.DEFAULT.value) - def add(a, b): - return a + b - - records = registry._rewrite_records[Backend.DEFAULT.value] - assert records is not None - assert records['add'] is not None - assert records['add']['_object'] is not None - add_func = records['add']['_object'] - assert add_func(123, 456) == 123 + 456 - - -def test_get_records(): - registry = RewriterRegistry() - registry.add_backend(Backend.TENSORRT.value) - - @registry.register_object('add', backend=Backend.DEFAULT.value) - def add(a, b): - return a + b - - @registry.register_object('minus', backend=Backend.DEFAULT.value) - def minus(a, b): - return a - b - - @registry.register_object('add', backend=Backend.TENSORRT.value) - def fake_add(a, b): - return a * b - - default_records = dict(registry.get_records(Backend.DEFAULT.value)) - assert default_records['add']['_object'](1, 1) == 2 - assert default_records['minus']['_object'](1, 1) == 0 - - tensorrt_records = dict(registry.get_records(Backend.TENSORRT.value)) - assert tensorrt_records['add']['_object'](1, 1) == 1 - assert tensorrt_records['minus']['_object'](1, 1) == 0 diff --git a/tests/test_core/test_rewriter_utils.py b/tests/test_core/test_rewriter_utils.py new file mode 100644 index 0000000000..4954a573d8 --- /dev/null +++ b/tests/test_core/test_rewriter_utils.py @@ -0,0 +1,112 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import mmdeploy +import mmdeploy.core.rewriters.rewriter_utils as rewriter_utils +from mmdeploy.core.rewriters.rewriter_utils import (BackendChecker, + RewriterRegistry, + collect_env) +from mmdeploy.utils.constants import IR, Backend + + +def test_collect_env(): + env_dict = collect_env(Backend.ONNXRUNTIME, IR.ONNX, version='1.0') + assert env_dict['backend'] == Backend.ONNXRUNTIME + assert env_dict['ir'] == IR.ONNX + assert env_dict['version'] == '1.0' + assert env_dict['mmdeploy'] == mmdeploy.__version__ + + +class TestChecker: + env = collect_env(Backend.ONNXRUNTIME, IR.ONNX) + + def test_backend_checker(self): + true_checker = rewriter_utils.BackendChecker(Backend.ONNXRUNTIME) + assert true_checker.check(self.env) is True + + false_checker = rewriter_utils.BackendChecker(Backend.TENSORRT) + assert false_checker.check(self.env) is False + + def test_ir_checker(self): + true_checker = rewriter_utils.IRChecker(IR.ONNX) + assert true_checker.check(self.env) is True + + false_checker = rewriter_utils.IRChecker(IR.TORCHSCRIPT) + assert false_checker.check(self.env) is False + + def test_lib_version_checker(self): + true_checker = rewriter_utils.LibVersionChecker( + 'mmdeploy', mmdeploy.__version__, mmdeploy.__version__) + assert true_checker.check(self.env) is True + + false_checker = rewriter_utils.LibVersionChecker( + 'mmdeploy', max_version='0.0.0') + assert false_checker.check(self.env) is False + + +def test_register_object(): + registry = RewriterRegistry() + checker = rewriter_utils.BackendChecker(Backend.ONNXRUNTIME) + + @registry.register_object( + 'add', + backend=Backend.DEFAULT.value, + ir=IR.DEFAULT, + extra_checkers=checker) + def add(a, b): + return a + b + + records = registry._rewrite_records + assert records is not None + assert records['add'] is not None + assert isinstance(records['add'][0]['_checkers'], list) + assert isinstance(records['add'][0]['_checkers'][0], BackendChecker) + assert records['add'][0]['_object'] is not None + add_func = records['add'][0]['_object'] + assert add_func(123, 456) == 123 + 456 + + +def test_get_records(): + registry = RewriterRegistry() + + @registry.register_object( + 'get_num', backend=Backend.ONNXRUNTIME.value, ir=IR.ONNX) + def get_num_1(): + return 1 + + @registry.register_object( + 'get_num', backend=Backend.ONNXRUNTIME.value, ir=IR.TORCHSCRIPT) + def get_num_2(): + return 2 + + @registry.register_object( + 'get_num', backend=Backend.TENSORRT.value, ir=IR.ONNX) + def get_num_3(): + return 3 + + @registry.register_object( + 'get_num', backend=Backend.TENSORRT.value, ir=IR.TORCHSCRIPT) + def get_num_4(): + return 4 + + @registry.register_object( + 'get_num', backend=Backend.DEFAULT.value, ir=IR.DEFAULT) + def get_num_5(): + return 5 + + records = dict( + registry.get_records(collect_env(Backend.ONNXRUNTIME, IR.ONNX))) + assert records['get_num']['_object']() == 1 + + records = dict( + registry.get_records(collect_env(Backend.ONNXRUNTIME, IR.TORCHSCRIPT))) + assert records['get_num']['_object']() == 2 + + records = dict( + registry.get_records(collect_env(Backend.TENSORRT, IR.ONNX))) + assert records['get_num']['_object']() == 3 + + records = dict( + registry.get_records(collect_env(Backend.TENSORRT, IR.TORCHSCRIPT))) + assert records['get_num']['_object']() == 4 + + records = dict(registry.get_records(collect_env(Backend.NCNN, IR.ONNX))) + assert records['get_num']['_object']() == 5 diff --git a/tests/test_csrc/CMakeLists.txt b/tests/test_csrc/CMakeLists.txt index 34cc0349dd..d7026ffec2 100644 --- a/tests/test_csrc/CMakeLists.txt +++ b/tests/test_csrc/CMakeLists.txt @@ -2,6 +2,9 @@ cmake_minimum_required(VERSION 3.14) project(tests) +if ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES) + include(${CMAKE_SOURCE_DIR}/cmake/cuda.cmake) +endif() include(${CMAKE_SOURCE_DIR}/cmake/opencv.cmake) diff --git a/tests/test_csrc/core/test_value.cpp b/tests/test_csrc/core/test_value.cpp index 0ecc1c629b..f5cdf0075b 100644 --- a/tests/test_csrc/core/test_value.cpp +++ b/tests/test_csrc/core/test_value.cpp @@ -283,12 +283,22 @@ struct Doge { int value; }; +namespace mmdeploy { + +MMDEPLOY_REGISTER_TYPE_ID(Meow, 1234); +MMDEPLOY_REGISTER_TYPE_ID(Doge, 3456); + +} // namespace mmdeploy + template <> struct mmdeploy::is_cast_by_erasure : std::true_type {}; TEST_CASE("test dynamic interface for value", "[value]") { Value meow(Meow{100}); REQUIRE(meow.is_any()); + REQUIRE(meow.is_any()); + REQUIRE_FALSE(meow.is_any()); + REQUIRE_FALSE(meow.is_any()); REQUIRE(meow.get().value == 100); REQUIRE(meow.get_ref().value == 100); REQUIRE(meow.get_ptr() == &meow.get_ref()); diff --git a/tests/test_ops/test_ops.py b/tests/test_ops/test_ops.py index 22a4640d68..54ab2d7b12 100644 --- a/tests/test_ops/test_ops.py +++ b/tests/test_ops/test_ops.py @@ -16,7 +16,7 @@ TEST_NCNN = TestNCNNExporter() -@pytest.mark.parametrize('backend', [TEST_ONNXRT, TEST_TENSORRT]) +@pytest.mark.parametrize('backend', [TEST_TENSORRT]) @pytest.mark.parametrize('pool_h,pool_w,spatial_scale,sampling_ratio', [(2, 2, 1.0, 2), (4, 4, 2.0, 4)]) def test_roi_align(backend, @@ -214,6 +214,52 @@ def test_modulated_deform_conv(backend, save_dir=save_dir) +@pytest.mark.parametrize('backend', [TEST_TENSORRT]) +@pytest.mark.parametrize('in_channels,out_channels,stride,padding,' + 'dilation,groups,deform_groups,kernel_size', + [(3, 64, 1, 0, 1, 1, 1, 3), + (1, 32, 3, 2, 1, 1, 1, 3)]) +def test_deform_conv(backend, + in_channels, + out_channels, + stride, + padding, + dilation, + groups, + deform_groups, + kernel_size, + input_list=None, + save_dir=None): + backend.check_env() + + if input_list is None: + input = torch.rand( + 1, in_channels, 28, 28, requires_grad=False) # (n, c, h, w) + else: + input = torch.tensor(input_list[0]) + conv_offset = nn.Conv2d( + in_channels=in_channels, + out_channels=deform_groups * 2 * kernel_size * kernel_size, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + bias=True) + offset = conv_offset(input) + + from mmcv.ops import DeformConv2d + model = DeformConv2d(in_channels, out_channels, kernel_size, stride, + padding, dilation, groups, deform_groups).eval() + + with RewriterContext(cfg={}, backend=backend.backend_name, opset=11): + backend.run_and_validate( + model, [input, offset], + 'deform_conv', + input_names=['input', 'offset'], + output_names=['output'], + save_dir=save_dir) + + @pytest.mark.parametrize('backend', [TEST_TENSORRT]) @pytest.mark.parametrize('dynamic_export', [True, False]) @pytest.mark.parametrize('fp16_mode', [True, False]) diff --git a/tests/test_utils/test_util.py b/tests/test_utils/test_util.py index e9f5ad33c2..d4e6764eec 100644 --- a/tests/test_utils/test_util.py +++ b/tests/test_utils/test_util.py @@ -1,4 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. +import importlib import logging import os import tempfile @@ -144,7 +145,8 @@ def test_get_onnx_config(self): class TestIsDynamic: - config_with_onnx_config = mmcv.Config(dict(onnx_config=dict())) + config_with_onnx_config = mmcv.Config( + dict(onnx_config=dict(), backend_config=dict(type='default'))) config_with_dynamic_axes = mmcv.Config( dict( @@ -154,7 +156,8 @@ class TestIsDynamic: 0: 'batch', 2: 'height', 3: 'width' - }}))) + }}), + backend_config=dict(type='default'))) config_with_dynamic_axes_and_input_names = mmcv.Config( dict( @@ -165,12 +168,14 @@ class TestIsDynamic: 0: 'batch', 2: 'height', 3: 'width' - }}))) + }}), + backend_config=dict(type='default'))) config_with_dynamic_axes_list = mmcv.Config( dict( onnx_config=dict( - type='onnx', input_names=['image'], dynamic_axes=[[0, 2, 3]]))) + type='onnx', input_names=['image'], dynamic_axes=[[0, 2, 3]]), + backend_config=dict(type='default'))) def test_is_dynamic_batch_none(self): assert util.is_dynamic_batch( @@ -440,3 +445,25 @@ def test_get_root_logger(): from mmdeploy.utils import get_root_logger logger = get_root_logger() logger.info('This is a test message') + + +def test_get_library_version(): + assert util.get_library_version('abcdefg') is None + try: + lib = importlib.import_module('setuptools') + except ImportError: + pass + else: + assert util.get_library_version('setuptools') == lib.__version__ + + +def test_get_codebase_version(): + versions = util.get_codebase_version() + for k, v in versions.items(): + assert v == util.get_library_version(k) + + +def test_get_backend_version(): + versions = util.get_backend_version() + for k, v in versions.items(): + assert v == util.get_library_version(k) diff --git a/tools/check_env.py b/tools/check_env.py index 68aa2799e7..3718db1bd5 100644 --- a/tools/check_env.py +++ b/tools/check_env.py @@ -4,49 +4,36 @@ from mmcv.utils import get_git_hash import mmdeploy -from mmdeploy.utils import get_root_logger +from mmdeploy.utils import (get_backend_version, get_codebase_version, + get_root_logger) def collect_env(): """Collect the information of the running environments.""" env_info = collect_base_env() - env_info['MMDeployment'] = f'{mmdeploy.__version__}+{get_git_hash()[:7]}' + env_info['MMDeploy'] = f'{mmdeploy.__version__}+{get_git_hash()[:7]}' return env_info def check_backend(): - try: - import onnxruntime as ort - except ImportError: - ort_version = None - else: - ort_version = ort.__version__ + backend_versions = get_backend_version() + ort_version = backend_versions['onnxruntime'] + trt_version = backend_versions['tensorrt'] + ncnn_version = backend_versions['ncnn'] + import mmdeploy.apis.onnxruntime as ort_apis logger = get_root_logger() - logger.info(f'onnxruntime: {ort_version} ops_is_avaliable : ' + logger.info(f'onnxruntime: {ort_version}\tops_is_avaliable : ' f'{ort_apis.is_available()}') - try: - import tensorrt as trt - except ImportError: - trt_version = None - else: - trt_version = trt.__version__ import mmdeploy.apis.tensorrt as trt_apis - logger.info( - f'tensorrt: {trt_version} ops_is_avaliable : {trt_apis.is_available()}' - ) - - try: - import ncnn - except ImportError: - ncnn_version = None - else: - ncnn_version = ncnn.__version__ + logger.info(f'tensorrt: {trt_version}\tops_is_avaliable : ' + f'{trt_apis.is_available()}') + import mmdeploy.apis.ncnn as ncnn_apis logger.info( - f'ncnn: {ncnn_version} ops_is_avaliable : {ncnn_apis.is_available()}') + f'ncnn: {ncnn_version}\tops_is_avaliable : {ncnn_apis.is_available()}') import mmdeploy.apis.pplnn as pplnn_apis logger.info(f'pplnn_is_avaliable: {pplnn_apis.is_available()}') @@ -56,45 +43,9 @@ def check_backend(): def check_codebase(): - try: - import mmcls - except ImportError: - mmcls_version = None - else: - mmcls_version = mmcls.__version__ - logger.info(f'mmcls: {mmcls_version}') - - try: - import mmdet - except ImportError: - mmdet_version = None - else: - mmdet_version = mmdet.__version__ - logger.info(f'mmdet: {mmdet_version}') - - try: - import mmedit - except ImportError: - mmedit_version = None - else: - mmedit_version = mmedit.__version__ - logger.info(f'mmedit: {mmedit_version}') - - try: - import mmocr - except ImportError: - mmocr_version = None - else: - mmocr_version = mmocr.__version__ - logger.info(f'mmocr: {mmocr_version}') - - try: - import mmseg - except ImportError: - mmseg_version = None - else: - mmseg_version = mmseg.__version__ - logger.info(f'mmseg: {mmseg_version}') + codebase_versions = get_codebase_version() + for k, v in codebase_versions.items(): + logger.info(f'{k}:\t{v}') if __name__ == '__main__': diff --git a/tools/deploy.py b/tools/deploy.py index 80f7805b92..ca835a8611 100644 --- a/tools/deploy.py +++ b/tools/deploy.py @@ -10,9 +10,9 @@ from mmdeploy.apis import (create_calib_table, extract_model, get_predefined_partition_cfg, torch2onnx, - visualize_model) -from mmdeploy.utils import (Backend, get_backend, get_calib_filename, - get_ir_config, get_model_inputs, get_onnx_config, + torch2torchscript, visualize_model) +from mmdeploy.utils import (IR, Backend, get_backend, get_calib_filename, + get_ir_config, get_model_inputs, get_partition_config, get_root_logger, load_config, target_wrapper) from mmdeploy.utils.export_info import dump_info @@ -67,6 +67,21 @@ def create_process(name, target, args, kwargs, ret_value=None): logger.info(f'{name} success.') +def torch2ir(ir_type: IR): + """Return the conversion function from torch to the intermediate + representation. + + Args: + ir_type (IR): The type of the intermediate representation. + """ + if ir_type == IR.ONNX: + return torch2onnx + elif ir_type == IR.TORCHSCRIPT: + return torch2torchscript + else: + raise KeyError(f'Unexpected IR type {ir_type}') + + def main(): args = parse_args() set_start_method('spawn') @@ -88,18 +103,20 @@ def main(): ret_value = mp.Value('d', 0, lock=False) - # convert onnx - onnx_save_file = get_onnx_config(deploy_cfg)['save_file'] + # convert to IR + ir_config = get_ir_config(deploy_cfg) + ir_save_file = ir_config['save_file'] + ir_type = IR.get(ir_config['type']) create_process( - 'torch2onnx', - target=torch2onnx, - args=(args.img, args.work_dir, onnx_save_file, deploy_cfg_path, + f'torch2{ir_type.value}', + target=torch2ir(ir_type), + args=(args.img, args.work_dir, ir_save_file, deploy_cfg_path, model_cfg_path, checkpoint_path), kwargs=dict(device=args.device), ret_value=ret_value) # convert backend - onnx_files = [osp.join(args.work_dir, onnx_save_file)] + ir_files = [osp.join(args.work_dir, ir_save_file)] # partition model partition_cfgs = get_partition_config(deploy_cfg) @@ -113,8 +130,8 @@ def main(): partition_cfgs = get_predefined_partition_cfg( deploy_cfg, partition_cfgs['type']) - origin_onnx_file = onnx_files[0] - onnx_files = [] + origin_ir_file = ir_files[0] + ir_files = [] for partition_cfg in partition_cfgs: save_file = partition_cfg['save_file'] save_path = osp.join(args.work_dir, save_file) @@ -125,11 +142,11 @@ def main(): create_process( f'partition model {save_file} with start: {start}, end: {end}', extract_model, - args=(origin_onnx_file, start, end), + args=(origin_ir_file, start, end), kwargs=dict(dynamic_axes=dynamic_axes, save_file=save_path), ret_value=ret_value) - onnx_files.append(save_path) + ir_files.append(save_path) # calib data calib_filename = get_calib_filename(deploy_cfg) @@ -147,12 +164,12 @@ def main(): device=args.device), ret_value=ret_value) - backend_files = onnx_files + backend_files = ir_files # convert backend backend = get_backend(deploy_cfg) if backend == Backend.TENSORRT: model_params = get_model_inputs(deploy_cfg) - assert len(model_params) == len(onnx_files) + assert len(model_params) == len(ir_files) from mmdeploy.apis.tensorrt import is_available as trt_is_available from mmdeploy.apis.tensorrt import onnx2tensorrt @@ -161,7 +178,7 @@ def main(): + ' please install TensorRT and build TensorRT custom ops first.' backend_files = [] for model_id, model_param, onnx_path in zip( - range(len(onnx_files)), model_params, onnx_files): + range(len(ir_files)), model_params, ir_files): onnx_name = osp.splitext(osp.split(onnx_path)[1])[0] save_file = model_param.get('save_file', onnx_name + '.engine') @@ -187,7 +204,7 @@ def main(): from mmdeploy.apis.ncnn import get_output_model_file, onnx2ncnn backend_files = [] - for onnx_path in onnx_files: + for onnx_path in ir_files: model_param_path, model_bin_path = get_output_model_file( onnx_path, args.work_dir) create_process( @@ -205,17 +222,20 @@ def main(): 'OpenVINO is not available, please install OpenVINO first.' from mmdeploy.apis.openvino import (get_input_info_from_cfg, + get_mo_options_from_cfg, get_output_model_file, onnx2openvino) openvino_files = [] - for onnx_path in onnx_files: + for onnx_path in ir_files: model_xml_path = get_output_model_file(onnx_path, args.work_dir) input_info = get_input_info_from_cfg(deploy_cfg) output_names = get_ir_config(deploy_cfg).output_names + mo_options = get_mo_options_from_cfg(deploy_cfg) create_process( f'onnx2openvino with {onnx_path}', target=onnx2openvino, - args=(input_info, output_names, onnx_path, args.work_dir), + args=(input_info, output_names, onnx_path, args.work_dir, + mo_options), kwargs=dict(), ret_value=ret_value) openvino_files.append(model_xml_path) @@ -228,7 +248,7 @@ def main(): from mmdeploy.apis.pplnn import onnx2pplnn pplnn_files = [] - for onnx_path in onnx_files: + for onnx_path in ir_files: algo_file = onnx_path.replace('.onnx', '.json') model_inputs = get_model_inputs(deploy_cfg) assert 'opt_shape' in model_inputs, 'Expect opt_shape ' \