diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index db6b9b8c885..38064c446aa 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -50,7 +50,7 @@ jobs: run: pip install torch==${{matrix.torch}} torchvision==${{matrix.torchvision}} - name: Install mmdet dependencies run: | - pip install mmcv + pip install mmcv-full==latest+torch${{matrix.torch}}+cu101 -f https://openmmlab.oss-accelerate.aliyuncs.com/mmcv/dist/index.html pip install -r requirements.txt pip install "git+https://github.com/open-mmlab/cocoapi.git#subdirectory=pycocotools" - name: Lint with flake8 @@ -60,7 +60,7 @@ jobs: - name: Format with yapf run: yapf -r -d mmdet/ tools/ configs/ tests/ - name: Check docstring - run: interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --exclude mmdet/ops --ignore-regex "__repr__" --fail-under 80 mmdet + run: interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-regex "__repr__" --fail-under 80 mmdet - name: Build and install env: CUDA_ARCH: ${{matrix.cuda_arch}} diff --git a/.isort.cfg b/.isort.cfg index 0fff944ee29..947555524bb 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -3,6 +3,6 @@ line_length = 79 multi_line_output = 0 known_standard_library = setuptools known_first_party = mmdet -known_third_party = PIL,asynctest,cityscapesscripts,cv2,matplotlib,mmcv,numpy,onnx,pycocotools,pytest,robustness_eval,roi_align,roi_pool,seaborn,six,terminaltables,torch,torchvision +known_third_party = PIL,asynctest,cityscapesscripts,cv2,matplotlib,mmcv,numpy,onnx,pycocotools,pytest,robustness_eval,seaborn,six,terminaltables,torch,torchvision no_lines_before = STDLIB,LOCALFOLDER default_section = THIRDPARTY diff --git a/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py b/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py index 97bdedb55dc..f90b78cef38 100644 --- a/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py +++ b/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py @@ -38,7 +38,7 @@ stage_loss_weights=[1, 0.5, 0.25], bbox_roi_extractor=dict( type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=0), + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), out_channels=256, featmap_strides=[4, 8, 16, 32]), bbox_head=[ @@ -95,7 +95,7 @@ ], mask_roi_extractor=dict( type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=0), + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), out_channels=256, featmap_strides=[4, 8, 16, 32]), mask_head=dict( @@ -195,6 +195,6 @@ min_bbox_size=0), rcnn=dict( score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), + nms=dict(type='nms', iou_threshold=0.5), max_per_img=100, mask_thr_binary=0.5)) diff --git a/configs/_base_/models/cascade_rcnn_r50_fpn.py b/configs/_base_/models/cascade_rcnn_r50_fpn.py index dfe941500cb..303276b845f 100644 --- a/configs/_base_/models/cascade_rcnn_r50_fpn.py +++ b/configs/_base_/models/cascade_rcnn_r50_fpn.py @@ -38,7 +38,7 @@ stage_loss_weights=[1, 0.5, 0.25], bbox_roi_extractor=dict( type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=0), + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), out_channels=256, featmap_strides=[4, 8, 16, 32]), bbox_head=[ @@ -178,4 +178,6 @@ nms_thr=0.7, min_bbox_size=0), rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100)) diff --git a/configs/_base_/models/fast_rcnn_r50_fpn.py b/configs/_base_/models/fast_rcnn_r50_fpn.py index 69bcc92dae5..b8d9570deea 100644 --- a/configs/_base_/models/fast_rcnn_r50_fpn.py +++ b/configs/_base_/models/fast_rcnn_r50_fpn.py @@ -20,7 +20,7 @@ type='StandardRoIHead', bbox_roi_extractor=dict( type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=0), + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), out_channels=256, featmap_strides=[4, 8, 16, 32]), bbox_head=dict( @@ -57,4 +57,6 @@ debug=False)) test_cfg = dict( rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100)) diff --git a/configs/_base_/models/faster_rcnn_r50_caffe_c4.py b/configs/_base_/models/faster_rcnn_r50_caffe_c4.py index 2dd990f2b8c..5a381636382 100644 --- a/configs/_base_/models/faster_rcnn_r50_caffe_c4.py +++ b/configs/_base_/models/faster_rcnn_r50_caffe_c4.py @@ -43,7 +43,7 @@ norm_eval=True), bbox_roi_extractor=dict( type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=0), + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), out_channels=1024, featmap_strides=[16]), bbox_head=dict( @@ -111,4 +111,6 @@ nms_thr=0.7, min_bbox_size=0), rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100)) diff --git a/configs/_base_/models/faster_rcnn_r50_fpn.py b/configs/_base_/models/faster_rcnn_r50_fpn.py index 92ed16359e6..338a5c6b604 100644 --- a/configs/_base_/models/faster_rcnn_r50_fpn.py +++ b/configs/_base_/models/faster_rcnn_r50_fpn.py @@ -35,7 +35,7 @@ type='StandardRoIHead', bbox_roi_extractor=dict( type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=0), + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), out_channels=256, featmap_strides=[4, 8, 16, 32]), bbox_head=dict( @@ -103,7 +103,9 @@ nms_thr=0.7, min_bbox_size=0), rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100) # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) + # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) ) diff --git a/configs/_base_/models/mask_rcnn_r50_caffe_c4.py b/configs/_base_/models/mask_rcnn_r50_caffe_c4.py index c77f292c18d..b9b29b0b99d 100644 --- a/configs/_base_/models/mask_rcnn_r50_caffe_c4.py +++ b/configs/_base_/models/mask_rcnn_r50_caffe_c4.py @@ -43,7 +43,7 @@ norm_eval=True), bbox_roi_extractor=dict( type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=0), + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), out_channels=1024, featmap_strides=[16]), bbox_head=dict( @@ -122,6 +122,6 @@ min_bbox_size=0), rcnn=dict( score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), + nms=dict(type='nms', iou_threshold=0.5), max_per_img=100, mask_thr_binary=0.5)) diff --git a/configs/_base_/models/mask_rcnn_r50_fpn.py b/configs/_base_/models/mask_rcnn_r50_fpn.py index 470653bd149..4472bd0a80d 100644 --- a/configs/_base_/models/mask_rcnn_r50_fpn.py +++ b/configs/_base_/models/mask_rcnn_r50_fpn.py @@ -36,7 +36,7 @@ type='StandardRoIHead', bbox_roi_extractor=dict( type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=0), + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), out_channels=256, featmap_strides=[4, 8, 16, 32]), bbox_head=dict( @@ -55,7 +55,7 @@ loss_bbox=dict(type='L1Loss', loss_weight=1.0)), mask_roi_extractor=dict( type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=0), + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), out_channels=256, featmap_strides=[4, 8, 16, 32]), mask_head=dict( @@ -119,6 +119,6 @@ min_bbox_size=0), rcnn=dict( score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), + nms=dict(type='nms', iou_threshold=0.5), max_per_img=100, mask_thr_binary=0.5)) diff --git a/configs/_base_/models/retinanet_r50_fpn.py b/configs/_base_/models/retinanet_r50_fpn.py index f51f0863ced..a08b14f6099 100644 --- a/configs/_base_/models/retinanet_r50_fpn.py +++ b/configs/_base_/models/retinanet_r50_fpn.py @@ -56,5 +56,5 @@ nms_pre=1000, min_bbox_size=0, score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), + nms=dict(type='nms', iou_threshold=0.5), max_per_img=100) diff --git a/configs/_base_/models/ssd300.py b/configs/_base_/models/ssd300.py index fde4df3e6d0..ee7cf3adc8a 100644 --- a/configs/_base_/models/ssd300.py +++ b/configs/_base_/models/ssd300.py @@ -43,7 +43,7 @@ neg_pos_ratio=3, debug=False) test_cfg = dict( - nms=dict(type='nms', iou_thr=0.45), + nms=dict(type='nms', iou_threshold=0.45), min_bbox_size=0, score_thr=0.02, max_per_img=200) diff --git a/configs/atss/atss_r50_fpn_1x_coco.py b/configs/atss/atss_r50_fpn_1x_coco.py index f359f0bb9b4..e787622c24b 100644 --- a/configs/atss/atss_r50_fpn_1x_coco.py +++ b/configs/atss/atss_r50_fpn_1x_coco.py @@ -56,7 +56,7 @@ nms_pre=1000, min_bbox_size=0, score_thr=0.05, - nms=dict(type='nms', iou_thr=0.6), + nms=dict(type='nms', iou_threshold=0.6), max_per_img=100) # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py index 84651e7e566..081b998f6f5 100644 --- a/configs/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py +++ b/configs/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py @@ -1,5 +1,5 @@ _base_ = '../cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py' model = dict( backbone=dict( - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py index a25627b88e1..3b3683af235 100644 --- a/configs/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py +++ b/configs/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py @@ -1,5 +1,5 @@ _base_ = '../cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py' model = dict( backbone=dict( - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py index 2a9a87e77c2..daaa4729c82 100644 --- a/configs/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py +++ b/configs/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py @@ -1,5 +1,5 @@ _base_ = '../cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py' model = dict( backbone=dict( - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py index 0c1ebd7bbd0..a01df33c94e 100644 --- a/configs/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py +++ b/configs/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py @@ -1,5 +1,5 @@ _base_ = '../cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py' model = dict( backbone=dict( - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py index e24c9dcc920..aa664bd61c7 100644 --- a/configs/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py +++ b/configs/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py @@ -1,5 +1,5 @@ _base_ = '../cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py' model = dict( backbone=dict( - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py index 6ad09e9bc38..f5fee7e13cd 100644 --- a/configs/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py +++ b/configs/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py @@ -1,5 +1,5 @@ _base_ = '../faster_rcnn/faster_rcnn_r101_fpn_1x_coco.py' model = dict( backbone=dict( - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py index 44259a4a04e..8787088f27a 100644 --- a/configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py +++ b/configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py @@ -1,5 +1,5 @@ _base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' model = dict( backbone=dict( - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py b/configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py index 40396b91c89..1b695f0e190 100644 --- a/configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py +++ b/configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py @@ -5,11 +5,8 @@ type='SingleRoIExtractor', roi_layer=dict( _delete_=True, - type='DeformRoIPoolingPack', - out_size=7, - out_channels=256, - no_trans=False, - group_size=1, - trans_std=0.1), + type='DeformRoIPoolPack', + output_size=7, + output_channels=256), out_channels=256, featmap_strides=[4, 8, 16, 32]))) diff --git a/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py b/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py index 0452b7219a9..d1bcf3c102f 100644 --- a/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py +++ b/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py @@ -1,5 +1,5 @@ _base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' model = dict( backbone=dict( - dcn=dict(type='DCNv2', deformable_groups=1, fallback_on_stride=False), + dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py b/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py index a18a5abe115..d0ab89c261f 100644 --- a/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py +++ b/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py @@ -1,5 +1,5 @@ _base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' model = dict( backbone=dict( - dcn=dict(type='DCNv2', deformable_groups=4, fallback_on_stride=False), + dcn=dict(type='DCNv2', deform_groups=4, fallback_on_stride=False), stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py b/configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py index cfeb6d92a2f..ad7b0346a63 100644 --- a/configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py +++ b/configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py @@ -5,11 +5,8 @@ type='SingleRoIExtractor', roi_layer=dict( _delete_=True, - type='ModulatedDeformRoIPoolingPack', - out_size=7, - out_channels=256, - no_trans=False, - group_size=1, - trans_std=0.1), + type='ModulatedDeformRoIPoolPack', + output_size=7, + output_channels=256), out_channels=256, featmap_strides=[4, 8, 16, 32]))) diff --git a/configs/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py index 936c034ff9f..8357766f50f 100644 --- a/configs/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py +++ b/configs/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py @@ -11,5 +11,5 @@ frozen_stages=1, norm_cfg=dict(type='BN', requires_grad=True), style='pytorch', - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py index 16e19212966..cb340022ea2 100644 --- a/configs/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py +++ b/configs/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py @@ -1,5 +1,5 @@ _base_ = '../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py' model = dict( backbone=dict( - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py index f942b5cde3d..ababe58dc3f 100644 --- a/configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py +++ b/configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py @@ -1,5 +1,5 @@ _base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' model = dict( backbone=dict( - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py b/configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py index 7c8cc1e2109..5ca2a67cde6 100644 --- a/configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py +++ b/configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py @@ -1,5 +1,5 @@ _base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' model = dict( backbone=dict( - dcn=dict(type='DCNv2', deformable_groups=1, fallback_on_stride=False), + dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True))) diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco.py index a2a1e2a85c9..bbefd27aa02 100644 --- a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco.py +++ b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco.py @@ -12,5 +12,5 @@ stages=(False, False, True, True), position='after_conv2') ], - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True))) diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py index 04ec96e7c12..b1f26c081da 100644 --- a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py +++ b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py @@ -12,5 +12,5 @@ stages=(False, False, True, True), position='after_conv2') ], - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True))) diff --git a/configs/faster_rcnn/faster_rcnn_r50_fpn_soft_nms_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_fpn_soft_nms_1x_coco.py index 05f61483e4b..8ba6b017ff6 100644 --- a/configs/faster_rcnn/faster_rcnn_r50_fpn_soft_nms_1x_coco.py +++ b/configs/faster_rcnn/faster_rcnn_r50_fpn_soft_nms_1x_coco.py @@ -7,5 +7,5 @@ test_cfg = dict( rcnn=dict( score_thr=0.05, - nms=dict(type='soft_nms', iou_thr=0.5), + nms=dict(type='soft_nms', iou_threshold=0.5), max_per_img=100)) diff --git a/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_4x4_1x_coco.py b/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_4x4_1x_coco.py index 2a764e13dd1..d83fa17f173 100644 --- a/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_4x4_1x_coco.py +++ b/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_4x4_1x_coco.py @@ -10,7 +10,7 @@ conv_bias=True, loss_bbox=dict(type='GIoULoss', loss_weight=1.0))) # training and testing settings -test_cfg = dict(nms=dict(type='nms', iou_thr=0.6)) +test_cfg = dict(nms=dict(type='nms', iou_threshold=0.6)) # dataset settings img_norm_cfg = dict( diff --git a/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_4x4_1x_coco.py b/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_4x4_1x_coco.py index 31a89a8cc5c..67edb415c5f 100644 --- a/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_4x4_1x_coco.py +++ b/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_4x4_1x_coco.py @@ -3,7 +3,7 @@ model = dict( pretrained='open-mmlab://detectron2/resnet50_caffe', backbone=dict( - dcn=dict(type='DCNv2', deformable_groups=1, fallback_on_stride=False), + dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True)), bbox_head=dict( norm_on_bbox=True, @@ -13,7 +13,7 @@ conv_bias=True, loss_bbox=dict(type='GIoULoss', loss_weight=1.0))) # training and testing settings -test_cfg = dict(nms=dict(type='nms', iou_thr=0.6)) +test_cfg = dict(nms=dict(type='nms', iou_threshold=0.6)) # dataset settings img_norm_cfg = dict( diff --git a/configs/fcos/fcos_r50_caffe_fpn_4x4_1x_coco.py b/configs/fcos/fcos_r50_caffe_fpn_4x4_1x_coco.py index b4a826fed36..4697e9e7efc 100644 --- a/configs/fcos/fcos_r50_caffe_fpn_4x4_1x_coco.py +++ b/configs/fcos/fcos_r50_caffe_fpn_4x4_1x_coco.py @@ -56,7 +56,7 @@ nms_pre=1000, min_bbox_size=0, score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), + nms=dict(type='nms', iou_threshold=0.5), max_per_img=100) img_norm_cfg = dict( mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) diff --git a/configs/fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py b/configs/fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py index 9c17c171e70..b0bcad9e101 100644 --- a/configs/fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py +++ b/configs/fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py @@ -55,7 +55,7 @@ nms_pre=1000, min_bbox_size=0, score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), + nms=dict(type='nms', iou_threshold=0.5), max_per_img=100) img_norm_cfg = dict( mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) diff --git a/configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py b/configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py index 9bafedd3d91..4b62c81212e 100644 --- a/configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py +++ b/configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py @@ -45,7 +45,7 @@ test_cfg = dict( nms_pre=1000, score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), + nms=dict(type='nms', iou_threshold=0.5), max_per_img=100) data = dict(samples_per_gpu=4, workers_per_gpu=4) # optimizer diff --git a/configs/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py b/configs/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py index 235ea248506..eab622b2e8b 100644 --- a/configs/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py +++ b/configs/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py @@ -8,7 +8,7 @@ out_indices=(0, 1, 2, 3), frozen_stages=1, norm_cfg=dict(type='BN', requires_grad=True), - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True), norm_eval=True, style='pytorch')) diff --git a/configs/gfl/gfl_r50_fpn_1x_coco.py b/configs/gfl/gfl_r50_fpn_1x_coco.py index 99ac7e43ef6..77a15ebce37 100644 --- a/configs/gfl/gfl_r50_fpn_1x_coco.py +++ b/configs/gfl/gfl_r50_fpn_1x_coco.py @@ -51,7 +51,7 @@ nms_pre=1000, min_bbox_size=0, score_thr=0.05, - nms=dict(type='nms', iou_thr=0.6), + nms=dict(type='nms', iou_threshold=0.6), max_per_img=100) # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco.py b/configs/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco.py index 78948b65a13..a2370e234df 100644 --- a/configs/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco.py +++ b/configs/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco.py @@ -11,7 +11,7 @@ out_indices=(0, 1, 2, 3), frozen_stages=1, norm_cfg=dict(type='BN', requires_grad=True), - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, False, True, True), norm_eval=True, style='pytorch')) diff --git a/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py b/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py index c4bc3859dd3..1b40e039c1e 100644 --- a/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py +++ b/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py @@ -39,7 +39,7 @@ type='GridRoIHead', bbox_roi_extractor=dict( type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=0), + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), out_channels=256, featmap_strides=[4, 8, 16, 32]), bbox_head=dict( @@ -56,7 +56,7 @@ reg_class_agnostic=False), grid_roi_extractor=dict( type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=0), + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), out_channels=256, featmap_strides=[4, 8, 16, 32]), grid_head=dict( @@ -119,7 +119,9 @@ nms_thr=0.7, min_bbox_size=0), rcnn=dict( - score_thr=0.03, nms=dict(type='nms', iou_thr=0.3), max_per_img=100)) + score_thr=0.03, + nms=dict(type='nms', iou_threshold=0.3), + max_per_img=100)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=None) diff --git a/configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py b/configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py index 74c01e561b3..0fc528bfd49 100644 --- a/configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py +++ b/configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py @@ -5,7 +5,7 @@ bbox_roi_extractor=dict( type='GenericRoIExtractor', aggregation='sum', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), out_channels=256, featmap_strides=[4, 8, 16, 32], pre_cfg=dict( diff --git a/configs/groie/grid_rcnn_r50_fpn_gn-head_groie_1x_coco.py b/configs/groie/grid_rcnn_r50_fpn_gn-head_groie_1x_coco.py index afc9ca9de50..8e4b4ab2351 100644 --- a/configs/groie/grid_rcnn_r50_fpn_gn-head_groie_1x_coco.py +++ b/configs/groie/grid_rcnn_r50_fpn_gn-head_groie_1x_coco.py @@ -5,7 +5,7 @@ bbox_roi_extractor=dict( type='GenericRoIExtractor', aggregation='sum', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), out_channels=256, featmap_strides=[4, 8, 16, 32], pre_cfg=dict( @@ -25,7 +25,7 @@ kv_stride=2)), grid_roi_extractor=dict( type='GenericRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=2), out_channels=256, featmap_strides=[4, 8, 16, 32], pre_cfg=dict( diff --git a/configs/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py b/configs/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py index d59e85f3b74..8b83722197c 100644 --- a/configs/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py +++ b/configs/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py @@ -5,7 +5,7 @@ bbox_roi_extractor=dict( type='GenericRoIExtractor', aggregation='sum', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), out_channels=256, featmap_strides=[4, 8, 16, 32], pre_cfg=dict( @@ -25,7 +25,7 @@ kv_stride=2)), mask_roi_extractor=dict( type='GenericRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=2), out_channels=256, featmap_strides=[4, 8, 16, 32], pre_cfg=dict( diff --git a/configs/groie/mask_rcnn_r50_fpn_groie_1x_coco.py b/configs/groie/mask_rcnn_r50_fpn_groie_1x_coco.py index 462fd5711dc..81dfb4873bd 100644 --- a/configs/groie/mask_rcnn_r50_fpn_groie_1x_coco.py +++ b/configs/groie/mask_rcnn_r50_fpn_groie_1x_coco.py @@ -5,7 +5,7 @@ bbox_roi_extractor=dict( type='GenericRoIExtractor', aggregation='sum', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), out_channels=256, featmap_strides=[4, 8, 16, 32], pre_cfg=dict( @@ -25,7 +25,7 @@ kv_stride=2)), mask_roi_extractor=dict( type='GenericRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=2), out_channels=256, featmap_strides=[4, 8, 16, 32], pre_cfg=dict( diff --git a/configs/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py b/configs/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py index d16d83493ef..852c5ca7c5c 100644 --- a/configs/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py +++ b/configs/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py @@ -5,7 +5,7 @@ bbox_roi_extractor=dict( type='GenericRoIExtractor', aggregation='sum', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), out_channels=256, featmap_strides=[4, 8, 16, 32], pre_cfg=dict( @@ -25,7 +25,7 @@ kv_stride=2)), mask_roi_extractor=dict( type='GenericRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=2), out_channels=256, featmap_strides=[4, 8, 16, 32], pre_cfg=dict( diff --git a/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_mstrain_2x.py b/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_mstrain_2x.py index 987f9a0f2f0..f6c487bf18f 100644 --- a/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_mstrain_2x.py +++ b/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_mstrain_2x.py @@ -87,7 +87,7 @@ nms_pre=1000, min_bbox_size=0, score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), + nms=dict(type='nms', iou_threshold=0.5), max_per_img=100) # dataset settings dataset_type = 'CocoDataset' diff --git a/configs/htc/htc_r50_fpn_1x_coco.py b/configs/htc/htc_r50_fpn_1x_coco.py index 6e1880fb9c4..929cf464f60 100644 --- a/configs/htc/htc_r50_fpn_1x_coco.py +++ b/configs/htc/htc_r50_fpn_1x_coco.py @@ -3,7 +3,7 @@ roi_head=dict( semantic_roi_extractor=dict( type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=0), + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), out_channels=256, featmap_strides=[8]), semantic_head=dict( diff --git a/configs/htc/htc_without_semantic_r50_fpn_1x_coco.py b/configs/htc/htc_without_semantic_r50_fpn_1x_coco.py index a7bc763cd44..81ed3a8a03a 100644 --- a/configs/htc/htc_without_semantic_r50_fpn_1x_coco.py +++ b/configs/htc/htc_without_semantic_r50_fpn_1x_coco.py @@ -44,7 +44,7 @@ stage_loss_weights=[1, 0.5, 0.25], bbox_roi_extractor=dict( type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=0), + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), out_channels=256, featmap_strides=[4, 8, 16, 32]), bbox_head=[ @@ -101,7 +101,7 @@ ], mask_roi_extractor=dict( type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=0), + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), out_channels=256, featmap_strides=[4, 8, 16, 32]), mask_head=[ @@ -216,7 +216,7 @@ min_bbox_size=0), rcnn=dict( score_thr=0.001, - nms=dict(type='nms', iou_thr=0.5), + nms=dict(type='nms', iou_threshold=0.5), max_per_img=100, mask_thr_binary=0.5)) img_norm_cfg = dict( diff --git a/configs/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py b/configs/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py index 31415eee747..4a98ff28588 100644 --- a/configs/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py +++ b/configs/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py @@ -12,7 +12,7 @@ norm_cfg=dict(type='BN', requires_grad=True), norm_eval=True, style='pytorch', - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True))) # dataset settings img_norm_cfg = dict( diff --git a/configs/legacy_1.x/cascade_mask_rcnn_r50_fpn_1x_coco_v1.py b/configs/legacy_1.x/cascade_mask_rcnn_r50_fpn_1x_coco_v1.py index 3c4234cb2bc..5899444adf0 100644 --- a/configs/legacy_1.x/cascade_mask_rcnn_r50_fpn_1x_coco_v1.py +++ b/configs/legacy_1.x/cascade_mask_rcnn_r50_fpn_1x_coco_v1.py @@ -30,7 +30,10 @@ bbox_roi_extractor=dict( type='SingleRoIExtractor', roi_layer=dict( - type='RoIAlign', out_size=7, sample_num=2, aligned=False)), + type='RoIAlign', + output_size=7, + sampling_ratio=2, + aligned=False)), bbox_head=[ dict( type='Shared2FCBBoxHead', @@ -69,5 +72,8 @@ mask_roi_extractor=dict( type='SingleRoIExtractor', roi_layer=dict( - type='RoIAlign', out_size=14, sample_num=2, aligned=False)))) + type='RoIAlign', + output_size=14, + sampling_ratio=2, + aligned=False)))) dist_params = dict(backend='nccl', port=29515) diff --git a/configs/legacy_1.x/faster_rcnn_r50_fpn_1x_coco_v1.py b/configs/legacy_1.x/faster_rcnn_r50_fpn_1x_coco_v1.py index 482e1112838..1cb833cfbcd 100644 --- a/configs/legacy_1.x/faster_rcnn_r50_fpn_1x_coco_v1.py +++ b/configs/legacy_1.x/faster_rcnn_r50_fpn_1x_coco_v1.py @@ -22,7 +22,10 @@ bbox_roi_extractor=dict( type='SingleRoIExtractor', roi_layer=dict( - type='RoIAlign', out_size=7, sample_num=2, aligned=False), + type='RoIAlign', + output_size=7, + sampling_ratio=2, + aligned=False), out_channels=256, featmap_strides=[4, 8, 16, 32]), bbox_head=dict( diff --git a/configs/legacy_1.x/mask_rcnn_r50_fpn_1x_coco_v1.py b/configs/legacy_1.x/mask_rcnn_r50_fpn_1x_coco_v1.py index 5f519002844..0b200610191 100644 --- a/configs/legacy_1.x/mask_rcnn_r50_fpn_1x_coco_v1.py +++ b/configs/legacy_1.x/mask_rcnn_r50_fpn_1x_coco_v1.py @@ -13,11 +13,17 @@ bbox_roi_extractor=dict( type='SingleRoIExtractor', roi_layer=dict( - type='RoIAlign', out_size=7, sample_num=2, aligned=False)), + type='RoIAlign', + output_size=7, + sampling_ratio=2, + aligned=False)), mask_roi_extractor=dict( type='SingleRoIExtractor', roi_layer=dict( - type='RoIAlign', out_size=14, sample_num=2, aligned=False)), + type='RoIAlign', + output_size=14, + sampling_ratio=2, + aligned=False)), bbox_head=dict( bbox_coder=dict(type='LegacyDeltaXYWHBBoxCoder'), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)))) diff --git a/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_poly_1x_coco_v1.py b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_poly_1x_coco_v1.py index d0aefcee236..431e5ab3367 100644 --- a/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_poly_1x_coco_v1.py +++ b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_poly_1x_coco_v1.py @@ -7,12 +7,18 @@ roi_head=dict( bbox_roi_extractor=dict( roi_layer=dict( - type='RoIAlign', out_size=7, sample_num=2, aligned=False)), + type='RoIAlign', + output_size=7, + sampling_ratio=2, + aligned=False)), bbox_head=dict( loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), mask_roi_extractor=dict( roi_layer=dict( - type='RoIAlign', out_size=14, sample_num=2, aligned=False)))) + type='RoIAlign', + output_size=14, + sampling_ratio=2, + aligned=False)))) # use caffe img_norm img_norm_cfg = dict( mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) diff --git a/configs/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco.py b/configs/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco.py index 0938fed7d3c..76dde57d8a4 100644 --- a/configs/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco.py +++ b/configs/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco.py @@ -22,7 +22,7 @@ add_extra_convs=True, num_outs=5, norm_cfg=dict(type='BN'), - conv_cfg=dict(type='DCNv2', deformable_groups=2)), + conv_cfg=dict(type='DCNv2', deform_groups=2)), bbox_head=dict( type='FCOSHead', num_classes=80, @@ -55,7 +55,7 @@ nms_pre=1000, min_bbox_size=0, score_thr=0.05, - nms=dict(type='nms', iou_thr=0.6), + nms=dict(type='nms', iou_threshold=0.6), max_per_img=100) img_norm_cfg = dict( diff --git a/configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py b/configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py index 6fc61b060c6..a22f8f1998c 100644 --- a/configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py +++ b/configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py @@ -22,7 +22,7 @@ add_extra_convs=True, num_outs=5, norm_cfg=dict(type='BN'), - conv_cfg=dict(type='DCNv2', deformable_groups=2)), + conv_cfg=dict(type='DCNv2', deform_groups=2)), bbox_head=dict( type='NASFCOSHead', num_classes=80, @@ -54,7 +54,7 @@ nms_pre=1000, min_bbox_size=0, score_thr=0.05, - nms=dict(type='nms', iou_thr=0.6), + nms=dict(type='nms', iou_threshold=0.6), max_per_img=100) img_norm_cfg = dict( diff --git a/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py b/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py index a61f40b7751..dc7f97554b2 100644 --- a/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py +++ b/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py @@ -7,7 +7,8 @@ mask_roi_extractor=dict( type='GenericRoIExtractor', aggregation='concat', - roi_layer=dict(_delete_=True, type='SimpleRoIAlign', out_size=14), + roi_layer=dict( + _delete_=True, type='SimpleRoIAlign', output_size=14), out_channels=256, featmap_strides=[4]), mask_head=dict( diff --git a/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py b/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py index 39dd6f48f64..dd5153e6ef0 100644 --- a/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py +++ b/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py @@ -2,5 +2,5 @@ model = dict( pretrained='open-mmlab://regnetx_3.2gf', backbone=dict( - dcn=dict(type='DCNv2', deformable_groups=1, fallback_on_stride=False), + dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True))) diff --git a/configs/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py b/configs/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py index 41be70e23c1..241754cfb45 100644 --- a/configs/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py +++ b/configs/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py @@ -3,5 +3,5 @@ pretrained='torchvision://resnet101', backbone=dict( depth=101, - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True))) diff --git a/configs/reppoints/reppoints_moment_r50_fpn_1x_coco.py b/configs/reppoints/reppoints_moment_r50_fpn_1x_coco.py index a1a0c23b1e2..6d1c89b2082 100644 --- a/configs/reppoints/reppoints_moment_r50_fpn_1x_coco.py +++ b/configs/reppoints/reppoints_moment_r50_fpn_1x_coco.py @@ -62,6 +62,6 @@ nms_pre=1000, min_bbox_size=0, score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), + nms=dict(type='nms', iou_threshold=0.5), max_per_img=100) optimizer = dict(lr=0.01) diff --git a/configs/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py b/configs/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py index 34d577e6040..c33019da0cc 100644 --- a/configs/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py +++ b/configs/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py @@ -11,5 +11,5 @@ frozen_stages=1, norm_cfg=dict(type='BN', requires_grad=True), style='pytorch', - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True))) diff --git a/docs/config.md b/docs/config.md index d4dd1ab5e76..a4d48935563 100644 --- a/docs/config.md +++ b/docs/config.md @@ -96,8 +96,8 @@ model = dict( type='SingleRoIExtractor', # Type of the RoI feature extractor, most of methods uses SingleRoIExtractor. Refer to https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/roi_heads/roi_extractors/single_level.py#L10 for details. roi_layer=dict( # Config of RoI Layer type='RoIAlign', # Type of RoI Layer, DeformRoIPoolingPack and ModulatedDeformRoIPoolingPack are also supported. Refer to https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/roi_align/roi_align.py#L79 for details. - out_size=7, # The output size of feature maps. - sample_num=0), # Sampling ratio when extracting the RoI features. 0 means adaptive ratio. + output_size=7, # The output size of feature maps. + sampling_ratio=0), # Sampling ratio when extracting the RoI features. 0 means adaptive ratio. out_channels=256, # output channels of the extracted feature. featmap_strides=[4, 8, 16, 32]), # Strides of multi-scale feature maps. It should be consistent to the architecture of the backbone. bbox_head=dict( # Config of box head in the RoIHead. @@ -122,8 +122,8 @@ model = dict( type='SingleRoIExtractor', # Type of the RoI feature extractor, most of methods uses SingleRoIExtractor. roi_layer=dict( # Config of RoI Layer that extracts features for instance segmentation type='RoIAlign', # Type of RoI Layer, DeformRoIPoolingPack and ModulatedDeformRoIPoolingPack are also supported - out_size=14, # The output size of feature maps. - sample_num=0), # Sampling ratio when extracting the RoI features. + output_size=14, # The output size of feature maps. + sampling_ratio=0), # Sampling ratio when extracting the RoI features. out_channels=256, # Output channels of the extracted feature. featmap_strides=[4, 8, 16, 32]), # Strides of multi-scale feature maps. mask_head=dict( # Mask prediction head diff --git a/docs/install.md b/docs/install.md index ebe8cef9763..1681a9ea48d 100644 --- a/docs/install.md +++ b/docs/install.md @@ -45,14 +45,24 @@ conda install pytorch=1.3.1 cudatoolkit=9.2 torchvision=0.4.2 -c pytorch If you build PyTorch from source instead of installing the prebuilt pacakge, you can use more CUDA versions such as 9.0. -c. Clone the mmdetection repository. +c. Install mmcv, you can [install](https://github.com/open-mmlab/mmcv#install-with-pip) the pre-build mmcv. +Or you can choose either to compile mmcv from source by the following command + +``` +git clone https://github.com/open-mmlab/mmcv.git +cd mmcv +pip install -e . +cd .. +``` + +d. Clone the mmdetection repository. ```shell git clone https://github.com/open-mmlab/mmdetection.git cd mmdetection ``` -d. Install build requirements and then install mmdetection. +e. Install build requirements and then install mmdetection. (We install our forked version of pycocotools via the github repo instead of pypi for better compatibility with our repo.) diff --git a/mmdet/VERSION b/mmdet/VERSION index c043eea7767..bce4c60e36c 100644 --- a/mmdet/VERSION +++ b/mmdet/VERSION @@ -1 +1 @@ -2.2.1 +2.3.0rc0 diff --git a/mmdet/apis/inference.py b/mmdet/apis/inference.py index 400e487f146..bfc74a19423 100644 --- a/mmdet/apis/inference.py +++ b/mmdet/apis/inference.py @@ -3,13 +3,13 @@ import matplotlib.pyplot as plt import mmcv import torch +from mmcv.ops import RoIAlign, RoIPool from mmcv.parallel import collate, scatter from mmcv.runner import load_checkpoint from mmdet.core import get_classes from mmdet.datasets.pipelines import Compose from mmdet.models import build_detector -from mmdet.ops import RoIAlign, RoIPool def init_detector(config, checkpoint=None, device='cuda:0'): diff --git a/mmdet/apis/test.py b/mmdet/apis/test.py index 62997201fa5..a73c54f57e1 100644 --- a/mmdet/apis/test.py +++ b/mmdet/apis/test.py @@ -100,7 +100,9 @@ def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False): results.append(result) if rank == 0: - batch_size = len(data['img_metas'][0].data) + batch_size = ( + len(data['img_meta'].data) + if 'img_meta' in data else len(data['img_metas'][0].data)) for _ in range(batch_size * world_size): prog_bar.update() diff --git a/mmdet/core/bbox/samplers/score_hlr_sampler.py b/mmdet/core/bbox/samplers/score_hlr_sampler.py index 8885df985b3..3089451acf5 100644 --- a/mmdet/core/bbox/samplers/score_hlr_sampler.py +++ b/mmdet/core/bbox/samplers/score_hlr_sampler.py @@ -1,6 +1,6 @@ import torch +from mmcv.ops import nms_match -from mmdet.ops import nms_match from ..builder import BBOX_SAMPLERS from ..transforms import bbox2roi from .base_sampler import BaseSampler diff --git a/mmdet/core/mask/structures.py b/mmdet/core/mask/structures.py index 095c7aa69a5..b7888c39bbc 100644 --- a/mmdet/core/mask/structures.py +++ b/mmdet/core/mask/structures.py @@ -4,8 +4,7 @@ import numpy as np import pycocotools.mask as maskUtils import torch - -from mmdet.ops.roi_align import roi_align +from mmcv.ops.roi_align import roi_align class BaseInstanceMasks(metaclass=ABCMeta): @@ -280,7 +279,7 @@ def crop_and_resize(self, gt_masks_th = torch.from_numpy(self.masks).to(device).index_select( 0, inds).to(dtype=rois.dtype) targets = roi_align(gt_masks_th[:, None, :, :], rois, out_shape, - 1.0, 0, True).squeeze(1) + 1.0, 0, 'avg', True).squeeze(1) resized_masks = (targets >= 0.5).cpu().numpy() else: resized_masks = [] diff --git a/mmdet/core/post_processing/bbox_nms.py b/mmdet/core/post_processing/bbox_nms.py index a49e430e72f..b583dbffa82 100644 --- a/mmdet/core/post_processing/bbox_nms.py +++ b/mmdet/core/post_processing/bbox_nms.py @@ -1,6 +1,5 @@ import torch - -from mmdet.ops.nms import batched_nms +from mmcv.ops.nms import batched_nms def multiclass_nms(multi_bboxes, diff --git a/mmdet/core/post_processing/merge_augs.py b/mmdet/core/post_processing/merge_augs.py index b4e8d62732a..ed203d32daa 100644 --- a/mmdet/core/post_processing/merge_augs.py +++ b/mmdet/core/post_processing/merge_augs.py @@ -1,7 +1,7 @@ import numpy as np import torch +from mmcv.ops import nms -from mmdet.ops import nms from ..bbox import bbox_mapping_back @@ -36,7 +36,9 @@ def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg): flip_direction) recovered_proposals.append(_proposals) aug_proposals = torch.cat(recovered_proposals, dim=0) - merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr) + merged_proposals, _ = nms(aug_proposals[:, :4].contiguous(), + aug_proposals[:, -1].contiguous(), + rpn_test_cfg.nms_thr) scores = merged_proposals[:, 4] _, order = scores.sort(0, descending=True) num = min(rpn_test_cfg.max_num, merged_proposals.shape[0]) diff --git a/mmdet/models/backbones/resnet.py b/mmdet/models/backbones/resnet.py index 53c0e966c86..5f9dee7bbc2 100644 --- a/mmdet/models/backbones/resnet.py +++ b/mmdet/models/backbones/resnet.py @@ -1,11 +1,10 @@ import torch.nn as nn import torch.utils.checkpoint as cp -from mmcv.cnn import (build_conv_layer, build_norm_layer, constant_init, - kaiming_init) +from mmcv.cnn import (build_conv_layer, build_norm_layer, build_plugin_layer, + constant_init, kaiming_init) from mmcv.runner import load_checkpoint from torch.nn.modules.batchnorm import _BatchNorm -from mmdet.ops import build_plugin_layer from mmdet.utils import get_root_logger from ..builder import BACKBONES from ..utils import ResLayer diff --git a/mmdet/models/dense_heads/fovea_head.py b/mmdet/models/dense_heads/fovea_head.py index 37047c7dfe0..afe70515410 100644 --- a/mmdet/models/dense_heads/fovea_head.py +++ b/mmdet/models/dense_heads/fovea_head.py @@ -1,9 +1,9 @@ import torch import torch.nn as nn from mmcv.cnn import ConvModule, normal_init +from mmcv.ops import DeformConv2d from mmdet.core import multi_apply, multiclass_nms -from mmdet.ops import DeformConv from ..builder import HEADS from .anchor_free_head import AnchorFreeHead @@ -16,17 +16,17 @@ def __init__(self, in_channels, out_channels, kernel_size=3, - deformable_groups=4): + deform_groups=4): super(FeatureAlign, self).__init__() offset_channels = kernel_size * kernel_size * 2 self.conv_offset = nn.Conv2d( - 4, deformable_groups * offset_channels, 1, bias=False) - self.conv_adaption = DeformConv( + 4, deform_groups * offset_channels, 1, bias=False) + self.conv_adaption = DeformConv2d( in_channels, out_channels, kernel_size=kernel_size, padding=(kernel_size - 1) // 2, - deformable_groups=deformable_groups) + deform_groups=deform_groups) self.relu = nn.ReLU(inplace=True) def init_weights(self): @@ -53,13 +53,13 @@ def __init__(self, 512)), sigma=0.4, with_deform=False, - deformable_groups=4, + deform_groups=4, **kwargs): self.base_edge_list = base_edge_list self.scale_ranges = scale_ranges self.sigma = sigma self.with_deform = with_deform - self.deformable_groups = deformable_groups + self.deform_groups = deform_groups super().__init__(num_classes, in_channels, **kwargs) def _init_layers(self): @@ -95,7 +95,7 @@ def _init_layers(self): self.feat_channels, self.feat_channels, kernel_size=3, - deformable_groups=self.deformable_groups) + deform_groups=self.deform_groups) self.conv_cls = nn.Conv2d( int(self.feat_channels * 4), self.cls_out_channels, diff --git a/mmdet/models/dense_heads/ga_retina_head.py b/mmdet/models/dense_heads/ga_retina_head.py index e85c9ac474d..8822d1ca78e 100644 --- a/mmdet/models/dense_heads/ga_retina_head.py +++ b/mmdet/models/dense_heads/ga_retina_head.py @@ -1,7 +1,7 @@ import torch.nn as nn from mmcv.cnn import ConvModule, bias_init_with_prob, normal_init +from mmcv.ops import MaskedConv2d -from mmdet.ops import MaskedConv2d from ..builder import HEADS from .guided_anchor_head import FeatureAdaption, GuidedAnchorHead @@ -55,12 +55,12 @@ def _init_layers(self): self.feat_channels, self.feat_channels, kernel_size=3, - deformable_groups=self.deformable_groups) + deform_groups=self.deform_groups) self.feature_adaption_reg = FeatureAdaption( self.feat_channels, self.feat_channels, kernel_size=3, - deformable_groups=self.deformable_groups) + deform_groups=self.deform_groups) self.retina_cls = MaskedConv2d( self.feat_channels, self.num_anchors * self.cls_out_channels, diff --git a/mmdet/models/dense_heads/ga_rpn_head.py b/mmdet/models/dense_heads/ga_rpn_head.py index 4e3a262c09d..d7a9d824803 100644 --- a/mmdet/models/dense_heads/ga_rpn_head.py +++ b/mmdet/models/dense_heads/ga_rpn_head.py @@ -2,8 +2,8 @@ import torch.nn as nn import torch.nn.functional as F from mmcv.cnn import normal_init +from mmcv.ops import nms -from mmdet.ops import nms from ..builder import HEADS from .guided_anchor_head import GuidedAnchorHead from .rpn_test_mixin import RPNTestMixin @@ -117,15 +117,14 @@ def _get_bboxes_single(self, as_tuple=False).squeeze() proposals = proposals[valid_inds, :] scores = scores[valid_inds] - proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) # NMS in current level - proposals, _ = nms(proposals, cfg.nms_thr) + proposals, _ = nms(proposals, scores, cfg.nms_thr) proposals = proposals[:cfg.nms_post, :] mlvl_proposals.append(proposals) proposals = torch.cat(mlvl_proposals, 0) if cfg.nms_across_levels: # NMS across multi levels - proposals, _ = nms(proposals, cfg.nms_thr) + proposals, _ = nms(proposals[:, :4], proposals[:, -1], cfg.nms_thr) proposals = proposals[:cfg.max_num, :] else: scores = proposals[:, 4] diff --git a/mmdet/models/dense_heads/guided_anchor_head.py b/mmdet/models/dense_heads/guided_anchor_head.py index 63689735fdb..4be75830ef5 100644 --- a/mmdet/models/dense_heads/guided_anchor_head.py +++ b/mmdet/models/dense_heads/guided_anchor_head.py @@ -1,12 +1,12 @@ import torch import torch.nn as nn from mmcv.cnn import bias_init_with_prob, normal_init +from mmcv.ops import DeformConv2d, MaskedConv2d from mmdet.core import (anchor_inside_flags, build_anchor_generator, build_assigner, build_bbox_coder, build_sampler, calc_region, force_fp32, images_to_levels, multi_apply, multiclass_nms, unmap) -from mmdet.ops import DeformConv, MaskedConv2d from ..builder import HEADS, build_loss from .anchor_head import AnchorHead @@ -16,30 +16,30 @@ class FeatureAdaption(nn.Module): Feature Adaption Module is implemented based on DCN v1. It uses anchor shape prediction rather than feature map to - predict offsets of deformable conv layer. + predict offsets of deform conv layer. Args: in_channels (int): Number of channels in the input feature map. out_channels (int): Number of channels in the output feature map. kernel_size (int): Deformable conv kernel size. - deformable_groups (int): Deformable conv group size. + deform_groups (int): Deformable conv group size. """ def __init__(self, in_channels, out_channels, kernel_size=3, - deformable_groups=4): + deform_groups=4): super(FeatureAdaption, self).__init__() offset_channels = kernel_size * kernel_size * 2 self.conv_offset = nn.Conv2d( - 2, deformable_groups * offset_channels, 1, bias=False) - self.conv_adaption = DeformConv( + 2, deform_groups * offset_channels, 1, bias=False) + self.conv_adaption = DeformConv2d( in_channels, out_channels, kernel_size=kernel_size, padding=(kernel_size - 1) // 2, - deformable_groups=deformable_groups) + deform_groups=deform_groups) self.relu = nn.ReLU(inplace=True) def init_weights(self): @@ -74,7 +74,7 @@ class GuidedAnchorHead(AnchorHead): square_anchor_generator (dict): Config dict for square generator anchor_coder (dict): Config dict for anchor coder bbox_coder (dict): Config dict for bbox coder - deformable_groups: (int): Group number of DCN in + deform_groups: (int): Group number of DCN in FeatureAdaption module. loc_filter_thr (float): Threshold to filter out unconcerned regions. background_label (int | None): Label ID of background, set as 0 for @@ -113,7 +113,7 @@ def __init__( target_stds=[1.0, 1.0, 1.0, 1.0] ), reg_decoded_bbox=False, - deformable_groups=4, + deform_groups=4, loc_filter_thr=0.01, background_label=None, train_cfg=None, @@ -133,7 +133,7 @@ def __init__( self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels - self.deformable_groups = deformable_groups + self.deform_groups = deform_groups self.loc_filter_thr = loc_filter_thr # build approx_anchor_generator and square_anchor_generator @@ -209,7 +209,7 @@ def _init_layers(self): self.in_channels, self.feat_channels, kernel_size=3, - deformable_groups=self.deformable_groups) + deform_groups=self.deform_groups) self.conv_cls = MaskedConv2d(self.feat_channels, self.num_anchors * self.cls_out_channels, 1) @@ -636,8 +636,8 @@ def loss_loc_single(self, loc_pred, loc_target, loc_weight, loc_avg_factor): loss_loc = self.loss_loc( loc_pred.reshape(-1, 1), - loc_target.reshape(-1, 1).long(), - loc_weight.reshape(-1, 1), + loc_target.reshape(-1).long(), + loc_weight.reshape(-1), avg_factor=loc_avg_factor) return loss_loc diff --git a/mmdet/models/dense_heads/nasfcos_head.py b/mmdet/models/dense_heads/nasfcos_head.py index 472ec73d3c4..994ce0455e1 100644 --- a/mmdet/models/dense_heads/nasfcos_head.py +++ b/mmdet/models/dense_heads/nasfcos_head.py @@ -23,7 +23,7 @@ def _init_layers(self): type='DCNv2', kernel_size=3, use_bias=True, - deformable_groups=2, + deform_groups=2, padding=1) conv3x3_config = dict(type='Conv', kernel_size=3, padding=1) conv1x1_config = dict(type='Conv', kernel_size=1) diff --git a/mmdet/models/dense_heads/reppoints_head.py b/mmdet/models/dense_heads/reppoints_head.py index 72c20c8a875..447b648c96c 100644 --- a/mmdet/models/dense_heads/reppoints_head.py +++ b/mmdet/models/dense_heads/reppoints_head.py @@ -2,10 +2,10 @@ import torch import torch.nn as nn from mmcv.cnn import ConvModule, bias_init_with_prob, normal_init +from mmcv.ops import DeformConv2d from mmdet.core import (PointGenerator, build_assigner, build_sampler, images_to_levels, multi_apply, multiclass_nms, unmap) -from mmdet.ops import DeformConv from ..builder import HEADS, build_loss from .anchor_free_head import AnchorFreeHead @@ -57,7 +57,7 @@ def __init__(self, self.use_grid_points = use_grid_points self.center_init = center_init - # we use deformable conv to extract points features + # we use deform conv to extract points features self.dcn_kernel = int(np.sqrt(num_points)) self.dcn_pad = int((self.dcn_kernel - 1) / 2) assert self.dcn_kernel * self.dcn_kernel == num_points, \ @@ -130,9 +130,10 @@ def _init_layers(self): conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg)) pts_out_dim = 4 if self.use_grid_points else 2 * self.num_points - self.reppoints_cls_conv = DeformConv(self.feat_channels, - self.point_feat_channels, - self.dcn_kernel, 1, self.dcn_pad) + self.reppoints_cls_conv = DeformConv2d(self.feat_channels, + self.point_feat_channels, + self.dcn_kernel, 1, + self.dcn_pad) self.reppoints_cls_out = nn.Conv2d(self.point_feat_channels, self.cls_out_channels, 1, 1, 0) self.reppoints_pts_init_conv = nn.Conv2d(self.feat_channels, @@ -140,10 +141,10 @@ def _init_layers(self): 1, 1) self.reppoints_pts_init_out = nn.Conv2d(self.point_feat_channels, pts_out_dim, 1, 1, 0) - self.reppoints_pts_refine_conv = DeformConv(self.feat_channels, - self.point_feat_channels, - self.dcn_kernel, 1, - self.dcn_pad) + self.reppoints_pts_refine_conv = DeformConv2d(self.feat_channels, + self.point_feat_channels, + self.dcn_kernel, 1, + self.dcn_pad) self.reppoints_pts_refine_out = nn.Conv2d(self.point_feat_channels, pts_out_dim, 1, 1, 0) diff --git a/mmdet/models/dense_heads/rpn_head.py b/mmdet/models/dense_heads/rpn_head.py index d118b743f5c..84a8cb10d77 100644 --- a/mmdet/models/dense_heads/rpn_head.py +++ b/mmdet/models/dense_heads/rpn_head.py @@ -2,8 +2,8 @@ import torch.nn as nn import torch.nn.functional as F from mmcv.cnn import normal_init +from mmcv.ops import batched_nms -from mmdet.ops import batched_nms from ..builder import HEADS from .anchor_head import AnchorHead from .rpn_test_mixin import RPNTestMixin @@ -163,6 +163,6 @@ def _get_bboxes_single(self, ids = ids[valid_inds] # TODO: remove the hard coded nms type - nms_cfg = dict(type='nms', iou_thr=cfg.nms_thr) + nms_cfg = dict(type='nms', iou_threshold=cfg.nms_thr) dets, keep = batched_nms(proposals, scores, ids, nms_cfg) return dets[:cfg.nms_post] diff --git a/mmdet/models/losses/focal_loss.py b/mmdet/models/losses/focal_loss.py index 3f42102e950..7f657072619 100644 --- a/mmdet/models/losses/focal_loss.py +++ b/mmdet/models/losses/focal_loss.py @@ -1,7 +1,7 @@ import torch.nn as nn import torch.nn.functional as F +from mmcv.ops import sigmoid_focal_loss as _sigmoid_focal_loss -from mmdet.ops import sigmoid_focal_loss as _sigmoid_focal_loss from ..builder import LOSSES from .utils import weight_reduce_loss @@ -67,7 +67,7 @@ def sigmoid_focal_loss(pred, """ # Function.apply does not accept keyword arguments, so the decorator # "weighted_loss" is not applicable - loss = _sigmoid_focal_loss(pred, target, gamma, alpha) + loss = _sigmoid_focal_loss(pred, target, gamma, alpha, None, 'none') if weight is not None: if weight.shape != loss.shape: if weight.size(0) == loss.size(0): diff --git a/mmdet/models/necks/bfp.py b/mmdet/models/necks/bfp.py index 2788f39bd99..863a55530ed 100644 --- a/mmdet/models/necks/bfp.py +++ b/mmdet/models/necks/bfp.py @@ -1,8 +1,8 @@ import torch.nn as nn import torch.nn.functional as F from mmcv.cnn import ConvModule, xavier_init +from mmcv.cnn.bricks import NonLocal2d -from mmdet.ops import NonLocal2D from ..builder import NECKS @@ -55,7 +55,7 @@ def __init__(self, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg) elif self.refine_type == 'non_local': - self.refine = NonLocal2D( + self.refine = NonLocal2d( self.in_channels, reduction=1, use_scale=False, diff --git a/mmdet/models/necks/fpn_carafe.py b/mmdet/models/necks/fpn_carafe.py index 1b17a40e10b..b97a6aa7343 100644 --- a/mmdet/models/necks/fpn_carafe.py +++ b/mmdet/models/necks/fpn_carafe.py @@ -1,7 +1,7 @@ import torch.nn as nn from mmcv.cnn import ConvModule, build_upsample_layer, xavier_init +from mmcv.ops.carafe import CARAFEPack -from mmdet.ops.carafe import CARAFEPack from ..builder import NECKS diff --git a/mmdet/models/necks/nas_fpn.py b/mmdet/models/necks/nas_fpn.py index ab34287e703..8e333ce65d4 100644 --- a/mmdet/models/necks/nas_fpn.py +++ b/mmdet/models/necks/nas_fpn.py @@ -1,7 +1,7 @@ import torch.nn as nn from mmcv.cnn import ConvModule, caffe2_xavier_init +from mmcv.ops.merge_cells import GlobalPoolingCell, SumCell -from mmdet.ops.merge_cells import GlobalPoolingCell, SumCell from ..builder import NECKS diff --git a/mmdet/models/necks/nasfcos_fpn.py b/mmdet/models/necks/nasfcos_fpn.py index 1110a98e097..2daf79ef591 100644 --- a/mmdet/models/necks/nasfcos_fpn.py +++ b/mmdet/models/necks/nasfcos_fpn.py @@ -1,8 +1,8 @@ import torch.nn as nn import torch.nn.functional as F from mmcv.cnn import ConvModule, caffe2_xavier_init +from mmcv.ops.merge_cells import ConcatCell -from mmdet.ops.merge_cells import ConcatCell from ..builder import NECKS diff --git a/mmdet/models/roi_heads/mask_heads/fcn_mask_head.py b/mmdet/models/roi_heads/mask_heads/fcn_mask_head.py index 7ac5a870951..98b741a089f 100644 --- a/mmdet/models/roi_heads/mask_heads/fcn_mask_head.py +++ b/mmdet/models/roi_heads/mask_heads/fcn_mask_head.py @@ -3,12 +3,12 @@ import torch.nn as nn import torch.nn.functional as F from mmcv.cnn import ConvModule, build_upsample_layer +from mmcv.ops import Conv2d +from mmcv.ops.carafe import CARAFEPack from torch.nn.modules.utils import _pair from mmdet.core import auto_fp16, force_fp32, mask_target from mmdet.models.builder import HEADS, build_loss -from mmdet.ops import Conv2d -from mmdet.ops.carafe import CARAFEPack BYTES_PER_FLOAT = 4 # TODO: This memory limit may be too much or too little. It would be better to diff --git a/mmdet/models/roi_heads/mask_heads/mask_point_head.py b/mmdet/models/roi_heads/mask_heads/mask_point_head.py index 78e1d4a2a73..f38a5c9d759 100644 --- a/mmdet/models/roi_heads/mask_heads/mask_point_head.py +++ b/mmdet/models/roi_heads/mask_heads/mask_point_head.py @@ -3,9 +3,9 @@ import torch import torch.nn as nn from mmcv.cnn import ConvModule, normal_init +from mmcv.ops import point_sample, rel_roi_point_to_rel_img_point from mmdet.models.builder import HEADS, build_loss -from mmdet.ops import point_sample, rel_roi_point_to_rel_img_point @HEADS.register_module() diff --git a/mmdet/models/roi_heads/mask_heads/maskiou_head.py b/mmdet/models/roi_heads/mask_heads/maskiou_head.py index f5722cd7ba3..1c09eae8f41 100644 --- a/mmdet/models/roi_heads/mask_heads/maskiou_head.py +++ b/mmdet/models/roi_heads/mask_heads/maskiou_head.py @@ -2,11 +2,11 @@ import torch import torch.nn as nn from mmcv.cnn import kaiming_init, normal_init +from mmcv.ops import Conv2d, Linear, MaxPool2d from torch.nn.modules.utils import _pair from mmdet.core import force_fp32 from mmdet.models.builder import HEADS, build_loss -from mmdet.ops import Conv2d, Linear, MaxPool2d @HEADS.register_module() diff --git a/mmdet/models/roi_heads/point_rend_roi_head.py b/mmdet/models/roi_heads/point_rend_roi_head.py index f23a2a81e06..309258f1f50 100644 --- a/mmdet/models/roi_heads/point_rend_roi_head.py +++ b/mmdet/models/roi_heads/point_rend_roi_head.py @@ -2,9 +2,9 @@ import torch import torch.nn.functional as F +from mmcv.ops import point_sample, rel_roi_point_to_rel_img_point from mmdet.core import bbox2roi, bbox_mapping, merge_aug_masks -from mmdet.ops import point_sample, rel_roi_point_to_rel_img_point from .. import builder from ..builder import HEADS from .standard_roi_head import StandardRoIHead diff --git a/mmdet/models/roi_heads/roi_extractors/base_roi_extractor.py b/mmdet/models/roi_heads/roi_extractors/base_roi_extractor.py index ae1f705be59..0e42b52f361 100644 --- a/mmdet/models/roi_heads/roi_extractors/base_roi_extractor.py +++ b/mmdet/models/roi_heads/roi_extractors/base_roi_extractor.py @@ -2,8 +2,7 @@ import torch import torch.nn as nn - -from mmdet import ops +from mmcv import ops class BaseRoIExtractor(nn.Module, metaclass=ABCMeta): @@ -35,7 +34,7 @@ def build_roi_layers(self, layer_cfg, featmap_strides): Args: layer_cfg (dict): Dictionary to construct and config RoI layer - operation. Options are modules under ``mmdet/ops`` such as + operation. Options are modules under ``mmcv/ops`` such as ``RoIAlign``. featmap_strides (int): The stride of input feature map w.r.t to the original image size, which would be used to scale RoI diff --git a/mmdet/models/roi_heads/roi_extractors/generic_roi_extractor.py b/mmdet/models/roi_heads/roi_extractors/generic_roi_extractor.py index 0e7aff0bad3..5b1db799d82 100644 --- a/mmdet/models/roi_heads/roi_extractors/generic_roi_extractor.py +++ b/mmdet/models/roi_heads/roi_extractors/generic_roi_extractor.py @@ -1,6 +1,7 @@ +from mmcv.cnn.bricks import build_plugin_layer + from mmdet.core import force_fp32 from mmdet.models.builder import ROI_EXTRACTORS -from mmdet.ops.plugin import build_plugin_layer from .base_roi_extractor import BaseRoIExtractor @@ -44,7 +45,7 @@ def forward(self, feats, rois, roi_scale_factor=None): if len(feats) == 1: return self.roi_layers[0](feats[0], rois) - out_size = self.roi_layers[0].out_size + out_size = self.roi_layers[0].output_size num_levels = len(feats) roi_feats = feats[0].new_zeros( rois.size(0), self.out_channels, *out_size) diff --git a/mmdet/models/roi_heads/roi_extractors/single_level_roi_extractor.py b/mmdet/models/roi_heads/roi_extractors/single_level_roi_extractor.py index 94097eaa40a..a52857c7e34 100644 --- a/mmdet/models/roi_heads/roi_extractors/single_level_roi_extractor.py +++ b/mmdet/models/roi_heads/roi_extractors/single_level_roi_extractor.py @@ -53,7 +53,7 @@ def map_roi_levels(self, rois, num_levels): @force_fp32(apply_to=('feats', ), out_fp16=True) def forward(self, feats, rois, roi_scale_factor=None): """Forward function.""" - out_size = self.roi_layers[0].out_size + out_size = self.roi_layers[0].output_size num_levels = len(feats) roi_feats = feats[0].new_zeros( rois.size(0), self.out_channels, *out_size) diff --git a/mmdet/ops/__init__.py b/mmdet/ops/__init__.py deleted file mode 100644 index e05334e76fc..00000000000 --- a/mmdet/ops/__init__.py +++ /dev/null @@ -1,34 +0,0 @@ -from .context_block import ContextBlock -from .conv_ws import ConvWS2d, conv_ws_2d -from .corner_pool import CornerPool -from .dcn import (DeformConv, DeformConvPack, DeformRoIPooling, - DeformRoIPoolingPack, ModulatedDeformConv, - ModulatedDeformConvPack, ModulatedDeformRoIPoolingPack, - deform_conv, deform_roi_pooling, modulated_deform_conv) -from .generalized_attention import GeneralizedAttention -from .masked_conv import MaskedConv2d -from .nms import batched_nms, nms, nms_match, soft_nms -from .non_local import NonLocal2D -from .plugin import build_plugin_layer -from .point_sample import (SimpleRoIAlign, point_sample, - rel_roi_point_to_rel_img_point) -from .roi_align import RoIAlign, roi_align -from .roi_pool import RoIPool, roi_pool -from .saconv import SAConv2d -from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss -from .utils import get_compiler_version, get_compiling_cuda_version -from .wrappers import Conv2d, ConvTranspose2d, Linear, MaxPool2d - -__all__ = [ - 'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', - 'DeformConv', 'DeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', - 'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv', - 'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv', - 'deform_roi_pooling', 'SigmoidFocalLoss', 'sigmoid_focal_loss', - 'MaskedConv2d', 'ContextBlock', 'GeneralizedAttention', 'NonLocal2D', - 'get_compiler_version', 'get_compiling_cuda_version', 'ConvWS2d', - 'conv_ws_2d', 'build_plugin_layer', 'batched_nms', 'Conv2d', - 'ConvTranspose2d', 'MaxPool2d', 'Linear', 'nms_match', 'CornerPool', - 'point_sample', 'rel_roi_point_to_rel_img_point', 'SimpleRoIAlign', - 'SAConv2d' -] diff --git a/mmdet/ops/carafe/__init__.py b/mmdet/ops/carafe/__init__.py deleted file mode 100644 index 029038f897f..00000000000 --- a/mmdet/ops/carafe/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .carafe import CARAFE, CARAFENaive, CARAFEPack, carafe, carafe_naive - -__all__ = ['carafe', 'carafe_naive', 'CARAFE', 'CARAFENaive', 'CARAFEPack'] diff --git a/mmdet/ops/carafe/carafe.py b/mmdet/ops/carafe/carafe.py deleted file mode 100644 index cd72f55e306..00000000000 --- a/mmdet/ops/carafe/carafe.py +++ /dev/null @@ -1,237 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -from mmcv.cnn import UPSAMPLE_LAYERS, normal_init, xavier_init -from torch.autograd import Function -from torch.nn.modules.module import Module - -from . import carafe_ext, carafe_naive_ext - - -class CARAFENaiveFunction(Function): - - @staticmethod - def forward(ctx, features, masks, kernel_size, group_size, scale_factor): - assert scale_factor >= 1 - assert masks.size(1) == kernel_size * kernel_size * group_size - assert masks.size(-1) == features.size(-1) * scale_factor - assert masks.size(-2) == features.size(-2) * scale_factor - assert features.size(1) % group_size == 0 - assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1 - ctx.kernel_size = kernel_size - ctx.group_size = group_size - ctx.scale_factor = scale_factor - ctx.feature_size = features.size() - ctx.mask_size = masks.size() - - n, c, h, w = features.size() - output = features.new_zeros((n, c, h * scale_factor, w * scale_factor)) - if features.is_cuda: - carafe_naive_ext.forward(features, masks, kernel_size, group_size, - scale_factor, output) - else: - raise NotImplementedError - - if features.requires_grad or masks.requires_grad: - ctx.save_for_backward(features, masks) - return output - - @staticmethod - def backward(ctx, grad_output): - assert grad_output.is_cuda - - features, masks = ctx.saved_tensors - kernel_size = ctx.kernel_size - group_size = ctx.group_size - scale_factor = ctx.scale_factor - - grad_input = torch.zeros_like(features) - grad_masks = torch.zeros_like(masks) - carafe_naive_ext.backward(grad_output.contiguous(), features, masks, - kernel_size, group_size, scale_factor, - grad_input, grad_masks) - - return grad_input, grad_masks, None, None, None - - -carafe_naive = CARAFENaiveFunction.apply - - -class CARAFENaive(Module): - - def __init__(self, kernel_size, group_size, scale_factor): - super(CARAFENaive, self).__init__() - - assert isinstance(kernel_size, int) and isinstance( - group_size, int) and isinstance(scale_factor, int) - self.kernel_size = kernel_size - self.group_size = group_size - self.scale_factor = scale_factor - - def forward(self, features, masks): - return CARAFENaiveFunction.apply(features, masks, self.kernel_size, - self.group_size, self.scale_factor) - - -class CARAFEFunction(Function): - - @staticmethod - def forward(ctx, features, masks, kernel_size, group_size, scale_factor): - assert scale_factor >= 1 - assert masks.size(1) == kernel_size * kernel_size * group_size - assert masks.size(-1) == features.size(-1) * scale_factor - assert masks.size(-2) == features.size(-2) * scale_factor - assert features.size(1) % group_size == 0 - assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1 - ctx.kernel_size = kernel_size - ctx.group_size = group_size - ctx.scale_factor = scale_factor - ctx.feature_size = features.size() - ctx.mask_size = masks.size() - - n, c, h, w = features.size() - output = features.new_zeros((n, c, h * scale_factor, w * scale_factor)) - routput = features.new_zeros(output.size(), requires_grad=False) - rfeatures = features.new_zeros(features.size(), requires_grad=False) - rmasks = masks.new_zeros(masks.size(), requires_grad=False) - if features.is_cuda: - carafe_ext.forward(features, rfeatures, masks, rmasks, kernel_size, - group_size, scale_factor, routput, output) - else: - raise NotImplementedError - - if features.requires_grad or masks.requires_grad: - ctx.save_for_backward(features, masks, rfeatures) - return output - - @staticmethod - def backward(ctx, grad_output): - assert grad_output.is_cuda - - features, masks, rfeatures = ctx.saved_tensors - kernel_size = ctx.kernel_size - group_size = ctx.group_size - scale_factor = ctx.scale_factor - - rgrad_output = torch.zeros_like(grad_output, requires_grad=False) - rgrad_input_hs = torch.zeros_like(grad_output, requires_grad=False) - rgrad_input = torch.zeros_like(features, requires_grad=False) - rgrad_masks = torch.zeros_like(masks, requires_grad=False) - grad_input = torch.zeros_like(features, requires_grad=False) - grad_masks = torch.zeros_like(masks, requires_grad=False) - carafe_ext.backward(grad_output.contiguous(), rfeatures, masks, - kernel_size, group_size, scale_factor, - rgrad_output, rgrad_input_hs, rgrad_input, - rgrad_masks, grad_input, grad_masks) - return grad_input, grad_masks, None, None, None, None - - -carafe = CARAFEFunction.apply - - -class CARAFE(Module): - """ CARAFE: Content-Aware ReAssembly of FEatures - - Please refer to https://arxiv.org/abs/1905.02188 for more details. - - Args: - kernel_size (int): reassemble kernel size - group_size (int): reassemble group size - scale_factor (int): upsample ratio - - Returns: - upsampled feature map - """ - - def __init__(self, kernel_size, group_size, scale_factor): - super(CARAFE, self).__init__() - - assert isinstance(kernel_size, int) and isinstance( - group_size, int) and isinstance(scale_factor, int) - self.kernel_size = kernel_size - self.group_size = group_size - self.scale_factor = scale_factor - - def forward(self, features, masks): - return CARAFEFunction.apply(features, masks, self.kernel_size, - self.group_size, self.scale_factor) - - -@UPSAMPLE_LAYERS.register_module(name='carafe') -class CARAFEPack(nn.Module): - """A unified package of CARAFE upsampler that contains: 1) channel - compressor 2) content encoder 3) CARAFE op. - - Official implementation of ICCV 2019 paper - CARAFE: Content-Aware ReAssembly of FEatures - Please refer to https://arxiv.org/abs/1905.02188 for more details. - - Args: - channels (int): input feature channels - scale_factor (int): upsample ratio - up_kernel (int): kernel size of CARAFE op - up_group (int): group size of CARAFE op - encoder_kernel (int): kernel size of content encoder - encoder_dilation (int): dilation of content encoder - compressed_channels (int): output channels of channels compressor - - Returns: - upsampled feature map - """ - - def __init__(self, - channels, - scale_factor, - up_kernel=5, - up_group=1, - encoder_kernel=3, - encoder_dilation=1, - compressed_channels=64): - super(CARAFEPack, self).__init__() - self.channels = channels - self.scale_factor = scale_factor - self.up_kernel = up_kernel - self.up_group = up_group - self.encoder_kernel = encoder_kernel - self.encoder_dilation = encoder_dilation - self.compressed_channels = compressed_channels - self.channel_compressor = nn.Conv2d(channels, self.compressed_channels, - 1) - self.content_encoder = nn.Conv2d( - self.compressed_channels, - self.up_kernel * self.up_kernel * self.up_group * - self.scale_factor * self.scale_factor, - self.encoder_kernel, - padding=int((self.encoder_kernel - 1) * self.encoder_dilation / 2), - dilation=self.encoder_dilation, - groups=1) - self.init_weights() - - def init_weights(self): - for m in self.modules(): - if isinstance(m, nn.Conv2d): - xavier_init(m, distribution='uniform') - normal_init(self.content_encoder, std=0.001) - - def kernel_normalizer(self, mask): - mask = F.pixel_shuffle(mask, self.scale_factor) - n, mask_c, h, w = mask.size() - mask_channel = int(mask_c / (self.up_kernel * self.up_kernel)) - mask = mask.view(n, mask_channel, -1, h, w) - - mask = F.softmax(mask, dim=2) - mask = mask.view(n, mask_c, h, w).contiguous() - - return mask - - def feature_reassemble(self, x, mask): - x = carafe(x, mask, self.up_kernel, self.up_group, self.scale_factor) - return x - - def forward(self, x): - compressed_x = self.channel_compressor(x) - mask = self.content_encoder(compressed_x) - mask = self.kernel_normalizer(mask) - - x = self.feature_reassemble(x, mask) - return x diff --git a/mmdet/ops/carafe/grad_check.py b/mmdet/ops/carafe/grad_check.py deleted file mode 100644 index 9ddb2398342..00000000000 --- a/mmdet/ops/carafe/grad_check.py +++ /dev/null @@ -1,62 +0,0 @@ -import os.path as osp -import sys - -import mmcv -import torch -from torch.autograd import gradcheck - -sys.path.append(osp.abspath(osp.join(__file__, '../../'))) -from mmdet.ops.carafe import CARAFE, CARAFENaive # noqa: E402, isort:skip -from mmdet.ops.carafe import carafe, carafe_naive # noqa: E402, isort:skip - -feat = torch.randn(2, 64, 3, 3, requires_grad=True, device='cuda:0').double() -mask = torch.randn( - 2, 100, 6, 6, requires_grad=True, device='cuda:0').sigmoid().double() - -print('Gradcheck for carafe...') -test = gradcheck(CARAFE(5, 4, 2), (feat, mask), atol=1e-4, eps=1e-4) -print(test) - -print('Gradcheck for carafe naive...') -test = gradcheck(CARAFENaive(5, 4, 2), (feat, mask), atol=1e-4, eps=1e-4) -print(test) - -feat = torch.randn( - 2, 1024, 100, 100, requires_grad=True, device='cuda:0').float() -mask = torch.randn( - 2, 25, 200, 200, requires_grad=True, device='cuda:0').sigmoid().float() -loop_num = 500 - -time_forward = 0 -time_backward = 0 -bar = mmcv.ProgressBar(loop_num) -timer = mmcv.Timer() -for i in range(loop_num): - x = carafe(feat.clone(), mask.clone(), 5, 1, 2) - torch.cuda.synchronize() - time_forward += timer.since_last_check() - x.sum().backward(retain_graph=True) - torch.cuda.synchronize() - time_backward += timer.since_last_check() - bar.update() -forward_speed = (time_forward + 1e-3) * 1e3 / loop_num -backward_speed = (time_backward + 1e-3) * 1e3 / loop_num -print(f'\nCARAFE time forward: {forward_speed} ' - f'ms/iter | time backward: {backward_speed} ms/iter') - -time_naive_forward = 0 -time_naive_backward = 0 -bar = mmcv.ProgressBar(loop_num) -timer = mmcv.Timer() -for i in range(loop_num): - x = carafe_naive(feat.clone(), mask.clone(), 5, 1, 2) - torch.cuda.synchronize() - time_naive_forward += timer.since_last_check() - x.sum().backward(retain_graph=True) - torch.cuda.synchronize() - time_naive_backward += timer.since_last_check() - bar.update() -forward_speed = (time_naive_forward + 1e-3) * 1e3 / loop_num -backward_speed = (time_naive_backward + 1e-3) * 1e3 / loop_num -print('\nCARAFE naive time forward: ' - f'{forward_speed} ms/iter | time backward: {backward_speed} ms/iter') diff --git a/mmdet/ops/carafe/setup.py b/mmdet/ops/carafe/setup.py deleted file mode 100644 index 9b2a46d4e00..00000000000 --- a/mmdet/ops/carafe/setup.py +++ /dev/null @@ -1,36 +0,0 @@ -from setuptools import setup - -from torch.utils.cpp_extension import BuildExtension, CUDAExtension - -NVCC_ARGS = [ - '-D__CUDA_NO_HALF_OPERATORS__', - '-D__CUDA_NO_HALF_CONVERSIONS__', - '-D__CUDA_NO_HALF2_OPERATORS__', -] - -setup( - name='carafe', - ext_modules=[ - CUDAExtension( - 'carafe_ext', [ - 'src/cuda/carafe_cuda.cpp', 'src/cuda/carafe_cuda_kernel.cu', - 'src/carafe_ext.cpp' - ], - define_macros=[('WITH_CUDA', None)], - extra_compile_args={ - 'cxx': [], - 'nvcc': NVCC_ARGS - }), - CUDAExtension( - 'carafe_naive_ext', [ - 'src/cuda/carafe_naive_cuda.cpp', - 'src/cuda/carafe_naive_cuda_kernel.cu', - 'src/carafe_naive_ext.cpp' - ], - define_macros=[('WITH_CUDA', None)], - extra_compile_args={ - 'cxx': [], - 'nvcc': NVCC_ARGS - }) - ], - cmdclass={'build_ext': BuildExtension}) diff --git a/mmdet/ops/carafe/src/carafe_ext.cpp b/mmdet/ops/carafe/src/carafe_ext.cpp deleted file mode 100644 index 7998ac2cd9a..00000000000 --- a/mmdet/ops/carafe/src/carafe_ext.cpp +++ /dev/null @@ -1,57 +0,0 @@ -#include -#include - -#include -#include - -#ifdef WITH_CUDA -int carafe_forward_cuda(at::Tensor features, at::Tensor rfeatures, - at::Tensor masks, at::Tensor rmasks, int kernel_size, - int group_size, int scale_factor, at::Tensor routput, - at::Tensor output); - -int carafe_backward_cuda(at::Tensor top_grad, at::Tensor rfeatures, - at::Tensor masks, int kernel_size, int group_size, - int scale_factor, at::Tensor rtop_grad, - at::Tensor rbottom_grad_hs, at::Tensor rbottom_grad, - at::Tensor rmask_grad, at::Tensor bottom_grad, - at::Tensor mask_grad); -#endif - -int carafe_forward(at::Tensor features, at::Tensor rfeatures, - at::Tensor masks, at::Tensor rmasks, int kernel_size, - int group_size, int scale_factor, at::Tensor routput, - at::Tensor output) { - if (features.device().is_cuda()) { -#ifdef WITH_CUDA - return carafe_forward_cuda(features, rfeatures, masks, rmasks, kernel_size, - group_size, scale_factor, routput, output); -#else - AT_ERROR("carafe is not compiled with GPU support"); -#endif - } - AT_ERROR("carafe is not implemented on CPU"); -} - -int carafe_backward(at::Tensor top_grad, at::Tensor rfeatures, - at::Tensor masks, int kernel_size, int group_size, - int scale_factor, at::Tensor rtop_grad, - at::Tensor rbottom_grad_hs, at::Tensor rbottom_grad, - at::Tensor rmask_grad, at::Tensor bottom_grad, - at::Tensor mask_grad) { - if (top_grad.device().is_cuda()) { -#ifdef WITH_CUDA - return carafe_backward_cuda(top_grad, rfeatures, masks, kernel_size, - group_size, scale_factor, rtop_grad, rbottom_grad_hs, rbottom_grad, - rmask_grad, bottom_grad, mask_grad); -#else - AT_ERROR("carafe is not compiled with GPU support"); -#endif - } - AT_ERROR("carafe is not implemented on CPU"); -} - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("forward", &carafe_forward, "carafe forward"); - m.def("backward", &carafe_backward, "carafe backward"); -} diff --git a/mmdet/ops/carafe/src/carafe_naive_ext.cpp b/mmdet/ops/carafe/src/carafe_naive_ext.cpp deleted file mode 100644 index 357b8625df8..00000000000 --- a/mmdet/ops/carafe/src/carafe_naive_ext.cpp +++ /dev/null @@ -1,51 +0,0 @@ -#include -#include - -#include -#include - -#ifdef WITH_CUDA -int carafe_naive_forward_cuda(at::Tensor features, at::Tensor masks, - int kernel_size, int group_size, int scale_factor, - at::Tensor output); - -int carafe_naive_backward_cuda(at::Tensor top_grad, at::Tensor features, - at::Tensor masks, int kernel_size, - int group_size, int scale_factor, - at::Tensor bottom_grad, at::Tensor mask_grad); -#endif - -int carafe_naive_forward(at::Tensor features, at::Tensor masks, - int kernel_size, int group_size, int scale_factor, - at::Tensor output) { - if (features.device().is_cuda()) { -#ifdef WITH_CUDA - return carafe_naive_forward_cuda(features, masks, kernel_size, - group_size, scale_factor, output); -#else - AT_ERROR("carafe naive is not compiled with GPU support"); -#endif - } - AT_ERROR("carafe naive is not implemented on CPU"); -} - -int carafe_naive_backward(at::Tensor top_grad, at::Tensor features, - at::Tensor masks, int kernel_size, - int group_size, int scale_factor, - at::Tensor bottom_grad, at::Tensor mask_grad) { - if (top_grad.device().is_cuda()) { -#ifdef WITH_CUDA - return carafe_naive_backward_cuda(top_grad, features, masks, kernel_size, - group_size, scale_factor, bottom_grad, mask_grad); -#else - AT_ERROR("carafe naive is not compiled with GPU support"); -#endif - } - AT_ERROR("carafe naive is not implemented on CPU"); - -} - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("forward", &carafe_naive_forward, "carafe_naive forward"); - m.def("backward", &carafe_naive_backward, "carafe_naive backward"); -} diff --git a/mmdet/ops/carafe/src/cuda/carafe_cuda.cpp b/mmdet/ops/carafe/src/cuda/carafe_cuda.cpp deleted file mode 100644 index 59b536c027c..00000000000 --- a/mmdet/ops/carafe/src/cuda/carafe_cuda.cpp +++ /dev/null @@ -1,108 +0,0 @@ -#include -#include - -#include -#include - -int CARAFEForwardLaucher(const at::Tensor features, const at::Tensor masks, - const int kernel_size, const int group_size, - const int scale_factor, const int batch_size, - const int channels, const int input_height, - const int input_width, const int output_height, - const int output_width, const int mask_channels, - at::Tensor rfeatures, at::Tensor routput, - at::Tensor rmasks, at::Tensor output); - -int CARAFEBackwardLaucher(const at::Tensor top_grad, const at::Tensor rfeatures, - const at::Tensor masks, const int kernel_size, - const int group_size, const int scale_factor, - const int batch_size, const int channels, - const int input_height, const int input_width, - const int output_height, const int output_width, - const int mask_channels, at::Tensor rtop_grad, - at::Tensor rbottom_grad_hs, at::Tensor rbottom_grad, - at::Tensor rmask_grad, at::Tensor bottom_grad, - at::Tensor mask_grad); - -#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ") -#define CHECK_CONTIGUOUS(x) \ - TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") -#define CHECK_INPUT(x) \ - CHECK_CUDA(x); \ - CHECK_CONTIGUOUS(x) - -int carafe_forward_cuda(at::Tensor features, at::Tensor rfeatures, - at::Tensor masks, at::Tensor rmasks, int kernel_size, - int group_size, int scale_factor, at::Tensor routput, - at::Tensor output) { - CHECK_INPUT(features); - CHECK_INPUT(rfeatures); - CHECK_INPUT(masks); - CHECK_INPUT(rmasks); - CHECK_INPUT(output); - CHECK_INPUT(routput); - at::DeviceGuard guard(features.device()); - - const int batch_size = output.size(0); - const int num_channels = output.size(1); - const int output_height = output.size(2); - const int output_width = output.size(3); - - const int input_height = features.size(2); - const int input_width = features.size(3); - - const int mask_channels = masks.size(1); - - rfeatures.resize_({batch_size, input_height, input_width, num_channels}); - routput.resize_({batch_size, output_height, output_width, num_channels}); - rmasks.resize_({batch_size, output_height, output_width, mask_channels}); - - CARAFEForwardLaucher(features, masks, kernel_size, group_size, scale_factor, - batch_size, num_channels, input_height, input_width, - output_height, output_width, mask_channels, rfeatures, - routput, rmasks, output); - - return 1; -} - -int carafe_backward_cuda(at::Tensor top_grad, at::Tensor rfeatures, - at::Tensor masks, int kernel_size, int group_size, - int scale_factor, at::Tensor rtop_grad, - at::Tensor rbottom_grad_hs, at::Tensor rbottom_grad, - at::Tensor rmask_grad, at::Tensor bottom_grad, - at::Tensor mask_grad) { - CHECK_INPUT(top_grad); - CHECK_INPUT(rfeatures); - CHECK_INPUT(masks); - CHECK_INPUT(rtop_grad); - CHECK_INPUT(rbottom_grad_hs); - CHECK_INPUT(rbottom_grad); - CHECK_INPUT(rmask_grad); - CHECK_INPUT(bottom_grad); - CHECK_INPUT(mask_grad); - at::DeviceGuard guard(top_grad.device()); - - const int batch_size = top_grad.size(0); - const int num_channels = top_grad.size(1); - const int output_height = top_grad.size(2); - const int output_width = top_grad.size(3); - - const int input_height = bottom_grad.size(2); - const int input_width = bottom_grad.size(3); - - const int mask_channels = masks.size(1); - - rtop_grad.resize_({batch_size, output_height, output_width, num_channels}); - rbottom_grad.resize_({batch_size, input_height, input_width, num_channels}); - rbottom_grad_hs.resize_( - {batch_size, output_height, output_width, num_channels}); - rmask_grad.resize_({batch_size, output_height, output_width, mask_channels}); - - CARAFEBackwardLaucher(top_grad, rfeatures, masks, kernel_size, group_size, - scale_factor, batch_size, num_channels, input_height, - input_width, output_height, output_width, mask_channels, - rtop_grad, rbottom_grad_hs, rbottom_grad, rmask_grad, - bottom_grad, mask_grad); - - return 1; -} diff --git a/mmdet/ops/carafe/src/cuda/carafe_cuda_kernel.cu b/mmdet/ops/carafe/src/cuda/carafe_cuda_kernel.cu deleted file mode 100644 index a9566cf6e8c..00000000000 --- a/mmdet/ops/carafe/src/cuda/carafe_cuda_kernel.cu +++ /dev/null @@ -1,489 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -using namespace at; - -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ - i += blockDim.x * gridDim.x) - -#define THREADS_PER_BLOCK 1024 // 32 * 32 -#define WARP_SIZE 32 -#define THREADS_PER_PIXEL 32 -#define MAX_SHARED_MEMORY 49152 -#define MAX_SHARED_SCALAR_T 6144 // 49152 / 8 = 6144 -#define MAXIMIZE_KERNEL_SIZE true -#define kTileDim 32 -#define kBlockRows 8 -#define FULL_MASK 0xffffffff - -inline int divideUP(const int x, const int y) { return (((x) + (y)-1) / (y)); } - -__device__ inline int Loc2Index(const int n, const int c, const int h, - const int w, const int channel_num, - const int height, const int width) { - int index = w + (h + (c + n * channel_num) * height) * width; - return index; -} -/* TODO: move this to a common place */ -template -__device__ inline scalar_t min(scalar_t a, scalar_t b) { - return a < b ? a : b; -} - -template -__device__ inline scalar_t max(scalar_t a, scalar_t b) { - return a > b ? a : b; -} - -template -__device__ __forceinline__ scalar_t WARP_SHFL_DOWN(scalar_t val, int offset) -{ - return __shfl_down_sync(FULL_MASK, val, offset); -} - -template<> -__device__ __forceinline__ c10::Half WARP_SHFL_DOWN(c10::Half val, int offset) -{ - return c10::Half(WARP_SHFL_DOWN(val.x, offset), c10::Half::from_bits_t{}); -} - - -template -__device__ __forceinline__ scalar_t warpReduceSum(scalar_t val) { - for (int offset = 16; offset > 0; offset /= 2) - // val += __shfl_down_sync(FULL_MASK, val, offset); - val += WARP_SHFL_DOWN(val, offset); - return val; -} - -// Splits the original matrix into submatrices with size 32 * 32. -// Each block transposes one submatrix by loading it into shared memory. -// Reference https://devblogs.nvidia.com/efficient-matrix-transpose-cuda-cc/ -template -__global__ void BatchTranspose2DCUDAKernel(const int N, const int H, - const int W, const int dh, - const int dw, - const scalar_t *__restrict__ X, - scalar_t *__restrict__ Y) { - __shared__ scalar_t tile[kTileDim][kTileDim + 1]; - const int n = blockIdx.x / (dh * dw); - const int k = blockIdx.x % (dh * dw); - const int r = k / dw; - const int c = k % dw; - const int offset = n * H * W; - int x = c * kTileDim + threadIdx.x; - int y = r * kTileDim + threadIdx.y; - if (x < W) { - for (int i = 0; threadIdx.y + i < kTileDim && y + i < H; i += kBlockRows) { - tile[threadIdx.y + i][threadIdx.x] = X[offset + (y + i) * W + x]; - } - } - __syncthreads(); - x = r * kTileDim + threadIdx.x; - y = c * kTileDim + threadIdx.y; - if (x < H) { - for (int i = 0; threadIdx.y + i < kTileDim && y + i < W; i += kBlockRows) { - Y[offset + (y + i) * H + x] = tile[threadIdx.x][threadIdx.y + i]; - } - } -} -template -__global__ void CARAFEForward( - const int num_kernels, const scalar_t *__restrict__ bottom_data, - const scalar_t *__restrict__ bottom_masks, const int kernel_size, - const int group_size, const int scale_factor, const int channels, - const int down_height, const int down_width, const int height, - const int width, const int mask_channels, scalar_t *__restrict__ top_data) { -#if MAXIMIZE_KERNEL_SIZE - __shared__ float shared_mask[MAX_SHARED_SCALAR_T * 2]; -#else - __shared__ scalar_t shared_mask[MAX_SHARED_SCALAR_T]; -#endif - - int index = threadIdx.x + blockIdx.x * blockDim.x; - if (index > num_kernels - 1) { - return; - } - const int pixel_id = threadIdx.x / THREADS_PER_PIXEL; - const int split_id = threadIdx.x % THREADS_PER_PIXEL; - index = index / THREADS_PER_PIXEL; - const int pw = index % width; - const int ph = (index / width) % height; - const int n = index / width / height; - - const int down_pw = pw / scale_factor; - const int down_ph = ph / scale_factor; - - const int start_w = down_pw - (kernel_size - 1) / 2; - const int end_w = down_pw + (kernel_size - 1) / 2 + 1; - const int start_h = down_ph - (kernel_size - 1) / 2; - const int end_h = down_ph + (kernel_size - 1) / 2 + 1; - for (int c = split_id; c < mask_channels; c += THREADS_PER_PIXEL) { - int mask_index = Loc2Index(n, ph, pw, c, height, width, mask_channels); - shared_mask[c * WARP_SIZE + pixel_id] = bottom_masks[mask_index]; - } - __syncthreads(); - - const int channels_per_group = ceilf(channels / (float)group_size); -#pragma unroll - for (int c = split_id; c < channels; c += THREADS_PER_PIXEL) { - int mask_group = c / channels_per_group; - scalar_t output_val = 0; -#pragma unroll - for (int iy = start_h; iy < end_h; iy++) { -#pragma unroll - for (int ix = start_w; ix < end_w; ix++) { - if (iy < 0 || iy > down_height - 1 || ix < 0 || ix > down_width - 1) { - continue; - } - int mask_iy = iy - down_ph + (kernel_size - 1) / 2; - int mask_ix = ix - down_pw + (kernel_size - 1) / 2; - int mask_c = - (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix; - int feat_index = - Loc2Index(n, iy, ix, c, down_height, down_width, channels); - - output_val += bottom_data[feat_index] * - shared_mask[mask_c * WARP_SIZE + pixel_id]; - } - } - - int top_index = Loc2Index(n, ph, pw, c, height, width, channels); - top_data[top_index] = output_val; - } -} - -int CARAFEForwardLaucher(const at::Tensor features, const at::Tensor masks, - const int kernel_size, const int group_size, - const int scale_factor, const int batch_size, - const int channels, const int input_height, - const int input_width, const int output_height, - const int output_width, const int mask_channels, - at::Tensor rfeatures, at::Tensor routput, - at::Tensor rmasks, at::Tensor output) { - // one warp per pixel - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - features.scalar_type(), "NCHW2NHWC_Feature", ([&] { - const scalar_t *bottom_data = features.data_ptr(); - scalar_t *top_data = rfeatures.data_ptr(); - const int dh = divideUP(channels, kTileDim); - const int dw = divideUP(input_height * input_width, kTileDim); - BatchTranspose2DCUDAKernel - <<>>( - batch_size, channels, input_height * input_width, dh, dw, - bottom_data, top_data); - })); - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - features.scalar_type(), "NCHW2NHWC_Masks", ([&] { - const scalar_t *bottom_data = masks.data_ptr(); - scalar_t *top_data = rmasks.data_ptr(); - const int dh = divideUP(mask_channels, kTileDim); - const int dw = divideUP(output_height * output_width, kTileDim); - BatchTranspose2DCUDAKernel - <<>>( - batch_size, mask_channels, output_height * output_width, dh, dw, - bottom_data, top_data); - })); - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - features.scalar_type(), "CARAFELaucherForward", ([&] { - const int num_kernels = - batch_size * output_height * output_width * THREADS_PER_PIXEL; - const scalar_t *bottom_data = rfeatures.data_ptr(); - const scalar_t *bottom_masks = rmasks.data_ptr(); - scalar_t *top_data = routput.data_ptr(); - - CARAFEForward - <<>>( - num_kernels, bottom_data, bottom_masks, kernel_size, group_size, - scale_factor, channels, input_height, input_width, - output_height, output_width, mask_channels, top_data); - })); - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - features.scalar_type(), "NHWC2NCHW", ([&] { - const scalar_t *bottom_data = routput.data_ptr(); - scalar_t *top_data = output.data_ptr(); - const int dh = divideUP(output_height * output_width, kTileDim); - const int dw = divideUP(channels, kTileDim); - BatchTranspose2DCUDAKernel - <<>>( - batch_size, output_height * output_width, channels, dh, dw, - bottom_data, top_data); - })); - cudaError_t err = cudaGetLastError(); - if (cudaSuccess != err) { - fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err)); - exit(-1); - } - - return 1; -} - -template -__global__ void CARAFEBackward_Feature( - const int num_kernels, const scalar_t *__restrict__ top_diff, - const scalar_t *__restrict__ bottom_masks, const int kernel_size, - const int group_size, const int scale_factor, const int channels, - const int down_height, const int down_width, const int height, - const int width, const int mask_channels, - scalar_t *__restrict__ bottom_diff) { -#if MAXIMIZE_KERNEL_SIZE - __shared__ float shared_mask[MAX_SHARED_SCALAR_T * 2]; -#else - __shared__ scalar_t shared_mask[MAX_SHARED_SCALAR_T]; -#endif - - int index = threadIdx.x + blockIdx.x * blockDim.x; - if (index > num_kernels - 1) { - return; - } - - const int pixel_id = threadIdx.x / THREADS_PER_PIXEL; - const int split_id = threadIdx.x % THREADS_PER_PIXEL; - // (n, c, ph, pw) is an element in the bottom_data - index = index / THREADS_PER_PIXEL; - const int pw = index % width; - const int ph = (index / width) % height; - const int n = index / width / height; - - const int start_w = pw - (kernel_size - 1) * scale_factor / 2; - const int end_w = pw + (kernel_size - 1) * scale_factor / 2 + 1; - const int start_h = ph - (kernel_size - 1) * scale_factor / 2; - const int end_h = ph + (kernel_size - 1) * scale_factor / 2 + 1; - for (int c = split_id; c < mask_channels; c += THREADS_PER_PIXEL) { - const int mask_w = (c % kernel_size) * scale_factor; - const int mask_h = (c / kernel_size % kernel_size) * scale_factor; - const int mask_x = start_w + mask_w; - const int mask_y = start_h + mask_h; - if (mask_y < 0 || mask_y > height - 1 || mask_x < 0 || mask_x > width - 1) { - shared_mask[c * WARP_SIZE + pixel_id] = 0; - continue; - } - const int mask_group = c / (kernel_size * kernel_size); - const int mask_c = (2 * mask_group + 1) * kernel_size * kernel_size - c - 1; - int mask_index = - Loc2Index(n, mask_c, mask_y, mask_x, mask_channels, height, width); - shared_mask[c * WARP_SIZE + pixel_id] = bottom_masks[mask_index]; - } - __syncthreads(); - const int channels_per_group = ceilf(channels / (float)group_size); -#pragma unroll - for (int c = split_id; c < channels; c += THREADS_PER_PIXEL) { - int mask_group = c / channels_per_group; - int top_index = Loc2Index(n, ph, pw, c, height, width, channels); - scalar_t output_val = 0; -#pragma unroll - for (int iy = start_h; iy < end_h; iy += scale_factor) { -#pragma unroll - for (int ix = start_w; ix < end_w; ix += scale_factor) { - if (iy < 0 || iy > height - 1 || ix < 0 || ix > width - 1) { - continue; - } - int mask_iy = - (iy - ph + (kernel_size - 1) * scale_factor / 2) / scale_factor; - int mask_ix = - (ix - pw + (kernel_size - 1) * scale_factor / 2) / scale_factor; - int mask_c = - (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix; - int feat_index = Loc2Index(n, iy, ix, c, height, width, channels); - output_val += - shared_mask[mask_c * WARP_SIZE + pixel_id] * top_diff[feat_index]; - } - } - bottom_diff[top_index] = output_val; - } -} - -template -__global__ void FeatureSum(const int num_kernels, - const scalar_t *__restrict__ input_data, - const int scale_factor, const int channels, - const int height, const int width, - scalar_t *__restrict__ output_data) { - int index = threadIdx.x + blockIdx.x * blockDim.x; - if (index > num_kernels - 1) { - return; - } - const int split_id = threadIdx.x % THREADS_PER_PIXEL; - index = index / THREADS_PER_PIXEL; - const int pw = index % width; - const int ph = (index / width) % height; - const int n = index / width / height; - for (int c = split_id; c < channels; c += THREADS_PER_PIXEL) { - scalar_t output_val = 0; - for (int iy = ph * scale_factor; iy < (ph + 1) * scale_factor; iy++) { - for (int ix = pw * scale_factor; ix < (pw + 1) * scale_factor; ix++) { - int input_id = Loc2Index(n, iy, ix, c, height * scale_factor, - width * scale_factor, channels); - output_val += input_data[input_id]; - } - } - const int output_id = Loc2Index(n, ph, pw, c, height, width, channels); - output_data[output_id] = output_val; - } -} - -template -__global__ void CARAFEBackward_Mask(const int num_kernels, - const scalar_t *__restrict__ top_diff, - const scalar_t *__restrict__ bottom_data, - const int kernel_size, const int group_size, - const int scale_factor, const int channels, - const int down_height, const int down_width, - const int height, const int width, - const int mask_channels, - scalar_t *__restrict__ mask_diff) { - int index = threadIdx.x + blockIdx.x * blockDim.x; - if (index > num_kernels - 1) { - return; - } - - const int lane_id = index % WARP_SIZE; - index = index / WARP_SIZE; - const int mask_c = index % mask_channels; - // (n, c, ph, pw) is an element in the bottom_data - index = index / mask_channels; - const int pw = index % width; - const int ph = (index / width) % height; - const int n = index / width / height; - - const int down_pw = pw / scale_factor; - const int down_ph = ph / scale_factor; - - const int mask_group = mask_c / (kernel_size * kernel_size); - const int mask_loc = mask_c % (kernel_size * kernel_size); - - const int offset_x = mask_loc % kernel_size - (kernel_size - 1) / 2; - const int offset_y = - mask_loc / kernel_size % kernel_size - (kernel_size - 1) / 2; - - const int down_x = down_pw + offset_x; - const int down_y = down_ph + offset_y; - - scalar_t output_val = 0; - - if (down_y >= 0 && down_y <= down_height - 1 && down_x >= 0 && - down_x <= down_width - 1) { - const int channels_per_mask = ceilf(channels / (float)group_size); - const int start = channels_per_mask * mask_group; - const int end = min(channels_per_mask * (mask_group + 1), channels); - for (int c = start + lane_id; c < end; c += WARP_SIZE) { - int bottom_id = - Loc2Index(n, down_y, down_x, c, down_height, down_width, channels); - int top_id = Loc2Index(n, ph, pw, c, height, width, channels); - output_val += top_diff[top_id] * bottom_data[bottom_id]; - } - } - __syncwarp(); - output_val = warpReduceSum(output_val); - if (lane_id == 0) { - const int mask_id = - Loc2Index(n, ph, pw, mask_c, height, width, mask_channels); - mask_diff[mask_id] = output_val; - } -} - -int CARAFEBackwardLaucher(const at::Tensor top_grad, const at::Tensor rfeatures, - const at::Tensor masks, const int kernel_size, - const int group_size, const int scale_factor, - const int batch_size, const int channels, - const int input_height, const int input_width, - const int output_height, const int output_width, - const int mask_channels, at::Tensor rtop_grad, - at::Tensor rbottom_grad_hs, at::Tensor rbottom_grad, - at::Tensor rmask_grad, at::Tensor bottom_grad, - at::Tensor mask_grad) { - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - top_grad.scalar_type(), "NCHW2NHWC_Top_Grad", ([&] { - const scalar_t *bottom_data = top_grad.data_ptr(); - scalar_t *top_data = rtop_grad.data_ptr(); - const int dh = divideUP(channels, kTileDim); - const int dw = divideUP(output_height * output_width, kTileDim); - BatchTranspose2DCUDAKernel - <<>>( - batch_size, channels, output_height * output_width, dh, dw, - bottom_data, top_data); - })); - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - top_grad.scalar_type(), "CARAFELaucherBackward_Feature", ([&] { - const int num_kernels = - batch_size * output_height * output_width * THREADS_PER_PIXEL; - const scalar_t *top_diff = rtop_grad.data_ptr(); - const scalar_t *bottom_masks = masks.data_ptr(); - scalar_t *bottom_diff = rbottom_grad_hs.data_ptr(); - - CARAFEBackward_Feature - <<>>( - num_kernels, top_diff, bottom_masks, kernel_size, group_size, - scale_factor, channels, input_height, input_width, - output_height, output_width, mask_channels, bottom_diff); - })); - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - top_grad.scalar_type(), "FeatureSum", ([&] { - const int num_kernels = - batch_size * input_height * input_width * THREADS_PER_PIXEL; - const scalar_t *bottom_diff_hs = rbottom_grad_hs.data_ptr(); - scalar_t *bottom_diff = rbottom_grad.data_ptr(); - - FeatureSum - <<>>( - num_kernels, bottom_diff_hs, scale_factor, channels, - input_height, input_width, bottom_diff); - })); - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - top_grad.scalar_type(), "NHWC2NCHW_Bottom_Grad", ([&] { - const scalar_t *bottom_data = rbottom_grad.data_ptr(); - scalar_t *top_data = bottom_grad.data_ptr(); - const int dh = divideUP(input_height * input_width, kTileDim); - const int dw = divideUP(channels, kTileDim); - BatchTranspose2DCUDAKernel - <<>>( - batch_size, input_height * input_width, channels, dh, dw, - bottom_data, top_data); - })); - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - top_grad.scalar_type(), "CARAFELaucherBackward_Mask", ([&] { - const int num_kernels = batch_size * output_height * output_width * - mask_channels * WARP_SIZE; - const scalar_t *top_diff = rtop_grad.data_ptr(); - const scalar_t *bottom_data = rfeatures.data_ptr(); - scalar_t *mask_diff = rmask_grad.data_ptr(); - - CARAFEBackward_Mask - <<>>( - num_kernels, top_diff, bottom_data, kernel_size, group_size, - scale_factor, channels, input_height, input_width, - output_height, output_width, mask_channels, mask_diff); - })); - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - top_grad.scalar_type(), "NHWC2NCHW_Mask_Grad", ([&] { - const scalar_t *bottom_data = rmask_grad.data_ptr(); - scalar_t *top_data = mask_grad.data_ptr(); - const int dh = divideUP(output_height * output_width, kTileDim); - const int dw = divideUP(mask_channels, kTileDim); - BatchTranspose2DCUDAKernel - <<>>( - batch_size, output_height * output_width, mask_channels, dh, dw, - bottom_data, top_data); - })); - cudaError_t err = cudaGetLastError(); - if (cudaSuccess != err) { - fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err)); - exit(-1); - } - - return 1; -} diff --git a/mmdet/ops/carafe/src/cuda/carafe_naive_cuda.cpp b/mmdet/ops/carafe/src/cuda/carafe_naive_cuda.cpp deleted file mode 100644 index 394afd3ad06..00000000000 --- a/mmdet/ops/carafe/src/cuda/carafe_naive_cuda.cpp +++ /dev/null @@ -1,69 +0,0 @@ -#include -#include - -#include -#include - -int CARAFENAIVEForwardLaucher(const at::Tensor features, const at::Tensor masks, - const int kernel_size, const int group_size, - const int scale_factor, const int batch_size, - const int channels, const int height, - const int width, at::Tensor output); - -int CARAFENAIVEBackwardLaucher(const at::Tensor top_grad, - const at::Tensor features, - const at::Tensor masks, const int kernel_size, - const int group_size, const int scale_factor, - const int batch_size, const int channels, - const int height, const int width, - at::Tensor bottom_grad, at::Tensor mask_grad); - -#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ") -#define CHECK_CONTIGUOUS(x) \ - TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") -#define CHECK_INPUT(x) \ - CHECK_CUDA(x); \ - CHECK_CONTIGUOUS(x) - -int carafe_naive_forward_cuda(at::Tensor features, at::Tensor masks, - int kernel_size, int group_size, int scale_factor, - at::Tensor output) { - CHECK_INPUT(features); - CHECK_INPUT(masks); - CHECK_INPUT(output); - at::DeviceGuard guard(features.device()); - - int batch_size = output.size(0); - int num_channels = output.size(1); - int data_height = output.size(2); - int data_width = output.size(3); - - CARAFENAIVEForwardLaucher(features, masks, kernel_size, group_size, - scale_factor, batch_size, num_channels, data_height, - data_width, output); - - return 1; -} - -int carafe_naive_backward_cuda(at::Tensor top_grad, at::Tensor features, - at::Tensor masks, int kernel_size, - int group_size, int scale_factor, - at::Tensor bottom_grad, at::Tensor mask_grad) { - CHECK_INPUT(top_grad); - CHECK_INPUT(features); - CHECK_INPUT(masks); - CHECK_INPUT(bottom_grad); - CHECK_INPUT(mask_grad); - at::DeviceGuard guard(top_grad.device()); - - int batch_size = top_grad.size(0); - int num_channels = top_grad.size(1); - int data_height = top_grad.size(2); - int data_width = top_grad.size(3); - - CARAFENAIVEBackwardLaucher(top_grad, features, masks, kernel_size, group_size, - scale_factor, batch_size, num_channels, - data_height, data_width, bottom_grad, mask_grad); - - return 1; -} diff --git a/mmdet/ops/carafe/src/cuda/carafe_naive_cuda_kernel.cu b/mmdet/ops/carafe/src/cuda/carafe_naive_cuda_kernel.cu deleted file mode 100644 index 9cf9855a71c..00000000000 --- a/mmdet/ops/carafe/src/cuda/carafe_naive_cuda_kernel.cu +++ /dev/null @@ -1,176 +0,0 @@ -#include -#include - -using namespace at; // temporal fix for pytorch<=0.4.1 (see #9848) - -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ - i += blockDim.x * gridDim.x) - -#define THREADS_PER_BLOCK 1024 - -inline int GET_BLOCKS(const int N) { - int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - int max_block_num = 65536; - return min(optimal_block_num, max_block_num); -} - -__device__ inline int Loc2Index(const int n, const int c, const int h, - const int w, const int channel_num, - const int height, const int width) { - int index = w + (h + (c + n * channel_num) * height) * width; - return index; -} -template -__global__ void CARAFENAIVEForward(const int nthreads, - const scalar_t *bottom_data, - const scalar_t *bottom_masks, - const int kernel_size, const int group_size, - const int scale_factor, const int channels, - const int height, const int width, - scalar_t *top_data) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the bottom_data - int pw = index % width; - int ph = (index / width) % height; - int c = (index / width / height) % channels; - int n = index / width / height / channels; - - int mask_channels = kernel_size * kernel_size * group_size; - int mask_group = c / (channels / group_size); - - int down_pw = pw / scale_factor; - int down_ph = ph / scale_factor; - int down_width = width / scale_factor; - int down_height = height / scale_factor; - int start_w = down_pw - (kernel_size - 1) / 2; - int end_w = down_pw + (kernel_size - 1) / 2 + 1; - int start_h = down_ph - (kernel_size - 1) / 2; - int end_h = down_ph + (kernel_size - 1) / 2 + 1; - - scalar_t output_val = 0; - for (int iy = start_h; iy < end_h; iy++) { - for (int ix = start_w; ix < end_w; ix++) { - if (iy < 0 || iy > down_height - 1 || ix < 0 || ix > down_width - 1) { - continue; - } - int mask_iy = iy - down_ph + (kernel_size - 1) / 2; - int mask_ix = ix - down_pw + (kernel_size - 1) / 2; - int mask_c = - (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix; - int feat_index = - Loc2Index(n, c, iy, ix, channels, down_height, down_width); - int mask_index = - Loc2Index(n, mask_c, ph, pw, mask_channels, height, width); - output_val += bottom_data[feat_index] * bottom_masks[mask_index]; - } - } - top_data[index] = output_val; - } -} - -int CARAFENAIVEForwardLaucher(const at::Tensor features, const at::Tensor masks, - const int kernel_size, const int group_size, - const int scale_factor, const int batch_size, - const int channels, const int height, - const int width, at::Tensor output) { - const int output_size = batch_size * channels * height * width; - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - features.scalar_type(), "CARAFENAIVELaucherForward", ([&] { - const scalar_t *bottom_data = features.data_ptr(); - const scalar_t *bottom_masks = masks.data_ptr(); - scalar_t *top_data = output.data_ptr(); - - CARAFENAIVEForward - <<>>( - output_size, bottom_data, bottom_masks, kernel_size, group_size, - scale_factor, channels, height, width, top_data); - })); - cudaError_t err = cudaGetLastError(); - if (cudaSuccess != err) { - fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err)); - exit(-1); - } - - return 1; -} - -template -__global__ void CARAFENAIVEBackward( - const int nthreads, const scalar_t *top_diff, const scalar_t *bottom_data, - const scalar_t *bottom_masks, const int kernel_size, const int group_size, - const int scale_factor, const int channels, const int height, - const int width, scalar_t *bottom_diff, scalar_t *mask_diff) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the bottom_data - int pw = index % width; - int ph = (index / width) % height; - int c = (index / width / height) % channels; - int n = index / width / height / channels; - - int mask_channels = kernel_size * kernel_size * group_size; - int mask_group = c / (channels / group_size); - - int down_pw = pw / scale_factor; - int down_ph = ph / scale_factor; - int down_width = width / scale_factor; - int down_height = height / scale_factor; - int start_w = down_pw - (kernel_size - 1) / 2; - int end_w = down_pw + (kernel_size - 1) / 2 + 1; - int start_h = down_ph - (kernel_size - 1) / 2; - int end_h = down_ph + (kernel_size - 1) / 2 + 1; - - for (int iy = start_h; iy < end_h; iy++) { - for (int ix = start_w; ix < end_w; ix++) { - if (iy < 0 || iy > down_height - 1 || ix < 0 || ix > down_width - 1) { - continue; - } - int mask_iy = iy - down_ph + (kernel_size - 1) / 2; - int mask_ix = ix - down_pw + (kernel_size - 1) / 2; - int mask_c = - (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix; - int feat_index = - Loc2Index(n, c, iy, ix, channels, down_height, down_width); - int mask_index = - Loc2Index(n, mask_c, ph, pw, mask_channels, height, width); - atomicAdd(bottom_diff + feat_index, - bottom_masks[mask_index] * top_diff[index]); - atomicAdd(mask_diff + mask_index, - bottom_data[feat_index] * top_diff[index]); - } - } - } -} - -int CARAFENAIVEBackwardLaucher(const at::Tensor top_grad, - const at::Tensor features, - const at::Tensor masks, const int kernel_size, - const int group_size, const int scale_factor, - const int batch_size, const int channels, - const int height, const int width, - at::Tensor bottom_grad, at::Tensor mask_grad) { - const int output_size = batch_size * channels * height * width; - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - top_grad.scalar_type(), "CARAFENAIVELaucherBackward", ([&] { - const scalar_t *top_diff = top_grad.data_ptr(); - const scalar_t *bottom_data = features.data_ptr(); - const scalar_t *bottom_masks = masks.data_ptr(); - scalar_t *bottom_diff = bottom_grad.data_ptr(); - scalar_t *mask_diff = mask_grad.data_ptr(); - - CARAFENAIVEBackward - <<>>( - output_size, top_diff, bottom_data, bottom_masks, kernel_size, - group_size, scale_factor, channels, height, width, bottom_diff, - mask_diff); - })); - - cudaError_t err = cudaGetLastError(); - if (cudaSuccess != err) { - fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err)); - exit(-1); - } - - return 1; -} diff --git a/mmdet/ops/context_block.py b/mmdet/ops/context_block.py deleted file mode 100644 index 52b6f91c9b8..00000000000 --- a/mmdet/ops/context_block.py +++ /dev/null @@ -1,116 +0,0 @@ -import torch -from mmcv.cnn import constant_init, kaiming_init -from torch import nn - - -def last_zero_init(m): - if isinstance(m, nn.Sequential): - constant_init(m[-1], val=0) - else: - constant_init(m, val=0) - - -class ContextBlock(nn.Module): - """ContextBlock module in GCNet. - - See 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' - (https://arxiv.org/abs/1904.11492) for details. - - Args: - in_channels (int): Channels of the input feature map. - ratio (float): Ratio of channels of transform bottleneck - pooling_type (str): Pooling method for context modeling - fusion_types (list[str]|tuple[str]): Fusion method for feature fusion, - options: 'channels_add', 'channel_mul' - """ - - def __init__(self, - in_channels, - ratio, - pooling_type='att', - fusion_types=('channel_add', )): - super(ContextBlock, self).__init__() - assert pooling_type in ['avg', 'att'] - assert isinstance(fusion_types, (list, tuple)) - valid_fusion_types = ['channel_add', 'channel_mul'] - assert all([f in valid_fusion_types for f in fusion_types]) - assert len(fusion_types) > 0, 'at least one fusion should be used' - self.in_channels = in_channels - self.ratio = ratio - self.planes = int(in_channels * ratio) - self.pooling_type = pooling_type - self.fusion_types = fusion_types - if pooling_type == 'att': - self.conv_mask = nn.Conv2d(in_channels, 1, kernel_size=1) - self.softmax = nn.Softmax(dim=2) - else: - self.avg_pool = nn.AdaptiveAvgPool2d(1) - if 'channel_add' in fusion_types: - self.channel_add_conv = nn.Sequential( - nn.Conv2d(self.in_channels, self.planes, kernel_size=1), - nn.LayerNorm([self.planes, 1, 1]), - nn.ReLU(inplace=True), # yapf: disable - nn.Conv2d(self.planes, self.in_channels, kernel_size=1)) - else: - self.channel_add_conv = None - if 'channel_mul' in fusion_types: - self.channel_mul_conv = nn.Sequential( - nn.Conv2d(self.in_channels, self.planes, kernel_size=1), - nn.LayerNorm([self.planes, 1, 1]), - nn.ReLU(inplace=True), # yapf: disable - nn.Conv2d(self.planes, self.in_channels, kernel_size=1)) - else: - self.channel_mul_conv = None - self.reset_parameters() - - def reset_parameters(self): - if self.pooling_type == 'att': - kaiming_init(self.conv_mask, mode='fan_in') - self.conv_mask.inited = True - - if self.channel_add_conv is not None: - last_zero_init(self.channel_add_conv) - if self.channel_mul_conv is not None: - last_zero_init(self.channel_mul_conv) - - def spatial_pool(self, x): - batch, channel, height, width = x.size() - if self.pooling_type == 'att': - input_x = x - # [N, C, H * W] - input_x = input_x.view(batch, channel, height * width) - # [N, 1, C, H * W] - input_x = input_x.unsqueeze(1) - # [N, 1, H, W] - context_mask = self.conv_mask(x) - # [N, 1, H * W] - context_mask = context_mask.view(batch, 1, height * width) - # [N, 1, H * W] - context_mask = self.softmax(context_mask) - # [N, 1, H * W, 1] - context_mask = context_mask.unsqueeze(-1) - # [N, 1, C, 1] - context = torch.matmul(input_x, context_mask) - # [N, C, 1, 1] - context = context.view(batch, channel, 1, 1) - else: - # [N, C, 1, 1] - context = self.avg_pool(x) - - return context - - def forward(self, x): - # [N, C, 1, 1] - context = self.spatial_pool(x) - - out = x - if self.channel_mul_conv is not None: - # [N, C, 1, 1] - channel_mul_term = torch.sigmoid(self.channel_mul_conv(context)) - out = out * channel_mul_term - if self.channel_add_conv is not None: - # [N, C, 1, 1] - channel_add_term = self.channel_add_conv(context) - out = out + channel_add_term - - return out diff --git a/mmdet/ops/conv_ws.py b/mmdet/ops/conv_ws.py deleted file mode 100644 index 43d496ef062..00000000000 --- a/mmdet/ops/conv_ws.py +++ /dev/null @@ -1,146 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -from mmcv.cnn import CONV_LAYERS - - -def conv_ws_2d(input, - weight, - bias=None, - stride=1, - padding=0, - dilation=1, - groups=1, - eps=1e-5): - c_in = weight.size(0) - weight_flat = weight.view(c_in, -1) - mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1) - std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1) - weight = (weight - mean) / (std + eps) - return F.conv2d(input, weight, bias, stride, padding, dilation, groups) - - -@CONV_LAYERS.register_module(name='ConvWS') -class ConvWS2d(nn.Conv2d): - - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - bias=True, - eps=1e-5): - super(ConvWS2d, self).__init__( - in_channels, - out_channels, - kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - groups=groups, - bias=bias) - self.eps = eps - - def forward(self, x): - return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding, - self.dilation, self.groups, self.eps) - - -@CONV_LAYERS.register_module(name='ConvAWS') -class ConvAWS2d(nn.Conv2d): - """AWS (Adaptive Weight Standardization) - - This is a variant of Weight Standardization - (https://arxiv.org/pdf/1903.10520.pdf) - It is used in DetectoRS to avoid NaN - (https://arxiv.org/pdf/2006.02334.pdf) - - Args: - in_channels (int): Number of channels in the input image - out_channels (int): Number of channels produced by the convolution - kernel_size (int or tuple): Size of the conv kernel - stride (int or tuple, optional): Stride of the convolution. Default: 1 - padding (int or tuple, optional): Zero-padding added to both sides of - the input. Default: 0 - dilation (int or tuple, optional): Spacing between kernel elements. - Default: 1 - groups (int, optional): Number of blocked connections from input - channels to output channels. Default: 1 - bias (bool, optional): If set True, adds a learnable bias to the - output. Default: True - """ - - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - bias=True): - super().__init__( - in_channels, - out_channels, - kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - groups=groups, - bias=bias) - self.register_buffer('weight_gamma', - torch.ones(self.out_channels, 1, 1, 1)) - self.register_buffer('weight_beta', - torch.zeros(self.out_channels, 1, 1, 1)) - - def _get_weight(self, weight): - weight_flat = weight.view(weight.size(0), -1) - mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1) - std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1) - weight = (weight - mean) / std - weight = self.weight_gamma * weight + self.weight_beta - return weight - - def forward(self, x): - weight = self._get_weight(self.weight) - return F.conv2d(x, weight, self.bias, self.stride, self.padding, - self.dilation, self.groups) - - def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, - missing_keys, unexpected_keys, error_msgs): - """Override default load function. - - AWS overrides the function _load_from_state_dict to recover - weight_gamma and weight_beta if they are missing. If weight_gamma and - weight_beta are found in the checkpoint, this function will return - after super()._load_from_state_dict. Otherwise, it will compute the - mean and std of the pretrained weights and store them in weight_beta - and weight_gamma. - """ - - self.weight_gamma.data.fill_(-1) - local_missing_keys = [] - super()._load_from_state_dict(state_dict, prefix, local_metadata, - strict, local_missing_keys, - unexpected_keys, error_msgs) - if self.weight_gamma.data.mean() > 0: - for k in local_missing_keys: - missing_keys.append(k) - return - weight = self.weight.data - weight_flat = weight.view(weight.size(0), -1) - mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1) - std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1) - self.weight_beta.data.copy_(mean) - self.weight_gamma.data.copy_(std) - missing_gamma_beta = [ - k for k in local_missing_keys - if k.endswith('weight_gamma') or k.endswith('weight_beta') - ] - for k in missing_gamma_beta: - local_missing_keys.remove(k) - for k in local_missing_keys: - missing_keys.append(k) diff --git a/mmdet/ops/corner_pool/__init__.py b/mmdet/ops/corner_pool/__init__.py deleted file mode 100644 index a5457db99f0..00000000000 --- a/mmdet/ops/corner_pool/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .corner_pool import CornerPool - -__all__ = ['CornerPool'] diff --git a/mmdet/ops/corner_pool/corner_pool.py b/mmdet/ops/corner_pool/corner_pool.py deleted file mode 100644 index 00b6b774a4b..00000000000 --- a/mmdet/ops/corner_pool/corner_pool.py +++ /dev/null @@ -1,101 +0,0 @@ -from torch import nn -from torch.autograd import Function - -from . import corner_pool_ext - - -class TopPoolFunction(Function): - - @staticmethod - def forward(ctx, input): - output = corner_pool_ext.top_pool_forward(input) - ctx.save_for_backward(input) - return output - - @staticmethod - def backward(ctx, grad_output): - input = ctx.saved_variables[0] - output = corner_pool_ext.top_pool_backward(input, grad_output) - return output - - -class BottomPoolFunction(Function): - - @staticmethod - def forward(ctx, input): - output = corner_pool_ext.bottom_pool_forward(input) - ctx.save_for_backward(input) - return output - - @staticmethod - def backward(ctx, grad_output): - input = ctx.saved_variables[0] - output = corner_pool_ext.bottom_pool_backward(input, grad_output) - return output - - -class LeftPoolFunction(Function): - - @staticmethod - def forward(ctx, input): - output = corner_pool_ext.left_pool_forward(input) - ctx.save_for_backward(input) - return output - - @staticmethod - def backward(ctx, grad_output): - input = ctx.saved_variables[0] - output = corner_pool_ext.left_pool_backward(input, grad_output) - return output - - -class RightPoolFunction(Function): - - @staticmethod - def forward(ctx, input): - output = corner_pool_ext.right_pool_forward(input) - ctx.save_for_backward(input) - return output - - @staticmethod - def backward(ctx, grad_output): - input = ctx.saved_variables[0] - output = corner_pool_ext.right_pool_backward(input, grad_output) - return output - - -class CornerPool(nn.Module): - """Corner Pooling. - - Corner Pooling is a new type of pooling layer that helps a - convolutional network better localize corners of bounding boxes. - - Please refer to https://arxiv.org/abs/1808.01244 for more details. - Code is modified from https://github.com/princeton-vl/CornerNet-Lite. - - Args: - mode(str): Pooling orientation for the pooling layer - - - 'bottom': Bottom Pooling - - 'left': Left Pooling - - 'right': Right Pooling - - 'top': Top Pooling - - Returns: - Feature map after pooling. - """ - - pool_functions = { - 'bottom': BottomPoolFunction, - 'left': LeftPoolFunction, - 'right': RightPoolFunction, - 'top': TopPoolFunction, - } - - def __init__(self, mode): - super(CornerPool, self).__init__() - assert mode in self.pool_functions - self.corner_pool = self.pool_functions[mode] - - def forward(self, x): - return self.corner_pool.apply(x) diff --git a/mmdet/ops/corner_pool/src/corner_pool.cpp b/mmdet/ops/corner_pool/src/corner_pool.cpp deleted file mode 100644 index a1fde8078a8..00000000000 --- a/mmdet/ops/corner_pool/src/corner_pool.cpp +++ /dev/null @@ -1,268 +0,0 @@ -// Modified from -// https://github.com/princeton-vl/CornerNet-Lite/tree/master/core/models/py_utils/_cpools/src -#include - -#include - -at::Tensor bottom_pool_forward(at::Tensor input) { - // Initialize output - at::Tensor output = at::zeros_like(input); - - // Get height - int64_t height = input.size(2); - - output.copy_(input); - - for (int64_t ind = 1; ind < height; ind <<= 1) { - at::Tensor max_temp = at::slice(output, 2, ind, height); - at::Tensor cur_temp = at::slice(output, 2, ind, height).clone(); - at::Tensor next_temp = at::slice(output, 2, 0, height - ind).clone(); - at::max_out(max_temp, cur_temp, next_temp); - } - - return output; -} - -at::Tensor bottom_pool_backward(at::Tensor input, at::Tensor grad_output) { - auto output = at::zeros_like(input); - - int32_t batch = input.size(0); - int32_t channel = input.size(1); - int32_t height = input.size(2); - int32_t width = input.size(3); - - auto max_val = torch::zeros({batch, channel, width}, - at::device(at::kCUDA).dtype(at::kFloat)); - auto max_ind = torch::zeros({batch, channel, width}, - at::device(at::kCUDA).dtype(at::kLong)); - - auto input_temp = input.select(2, 0); - max_val.copy_(input_temp); - - max_ind.fill_(0); - - auto output_temp = output.select(2, 0); - auto grad_output_temp = grad_output.select(2, 0); - output_temp.copy_(grad_output_temp); - - auto un_max_ind = max_ind.unsqueeze(2); - auto gt_mask = torch::zeros({batch, channel, width}, - at::device(at::kCUDA).dtype(at::kBool)); - auto max_temp = torch::zeros({batch, channel, width}, - at::device(at::kCUDA).dtype(at::kFloat)); - for (int32_t ind = 0; ind < height - 1; ++ind) { - input_temp = input.select(2, ind + 1); - at::gt_out(gt_mask, input_temp, max_val); - - at::masked_select_out(max_temp, input_temp, gt_mask); - max_val.masked_scatter_(gt_mask, max_temp); - max_ind.masked_fill_(gt_mask, ind + 1); - - grad_output_temp = grad_output.select(2, ind + 1).unsqueeze(2); - output.scatter_add_(2, un_max_ind, grad_output_temp); - } - - return output; -} - -at::Tensor left_pool_forward(at::Tensor input) { - // Initialize output - at::Tensor output = at::zeros_like(input); - - // Get width - int64_t width = input.size(3); - - output.copy_(input); - - for (int64_t ind = 1; ind < width; ind <<= 1) { - at::Tensor max_temp = at::slice(output, 3, 0, width - ind); - at::Tensor cur_temp = at::slice(output, 3, 0, width - ind).clone(); - at::Tensor next_temp = at::slice(output, 3, ind, width).clone(); - at::max_out(max_temp, cur_temp, next_temp); - } - - return output; -} - -at::Tensor left_pool_backward(at::Tensor input, at::Tensor grad_output) { - auto output = at::zeros_like(input); - - int32_t batch = input.size(0); - int32_t channel = input.size(1); - int32_t height = input.size(2); - int32_t width = input.size(3); - - auto max_val = torch::zeros({batch, channel, height}, - at::device(at::kCUDA).dtype(at::kFloat)); - auto max_ind = torch::zeros({batch, channel, height}, - at::device(at::kCUDA).dtype(at::kLong)); - - auto input_temp = input.select(3, width - 1); - max_val.copy_(input_temp); - - max_ind.fill_(width - 1); - - auto output_temp = output.select(3, width - 1); - auto grad_output_temp = grad_output.select(3, width - 1); - output_temp.copy_(grad_output_temp); - - auto un_max_ind = max_ind.unsqueeze(3); - auto gt_mask = torch::zeros({batch, channel, height}, - at::device(at::kCUDA).dtype(at::kBool)); - auto max_temp = torch::zeros({batch, channel, height}, - at::device(at::kCUDA).dtype(at::kFloat)); - for (int32_t ind = 1; ind < width; ++ind) { - input_temp = input.select(3, width - ind - 1); - at::gt_out(gt_mask, input_temp, max_val); - - at::masked_select_out(max_temp, input_temp, gt_mask); - max_val.masked_scatter_(gt_mask, max_temp); - max_ind.masked_fill_(gt_mask, width - ind - 1); - - grad_output_temp = grad_output.select(3, width - ind - 1).unsqueeze(3); - output.scatter_add_(3, un_max_ind, grad_output_temp); - } - - return output; -} - -at::Tensor right_pool_forward(at::Tensor input) { - // Initialize output - at::Tensor output = at::zeros_like(input); - - // Get width - int64_t width = input.size(3); - - output.copy_(input); - - for (int64_t ind = 1; ind < width; ind <<= 1) { - at::Tensor max_temp = at::slice(output, 3, ind, width); - at::Tensor cur_temp = at::slice(output, 3, ind, width).clone(); - at::Tensor next_temp = at::slice(output, 3, 0, width - ind).clone(); - at::max_out(max_temp, cur_temp, next_temp); - } - - return output; -} - -at::Tensor right_pool_backward(at::Tensor input, at::Tensor grad_output) { - at::Tensor output = at::zeros_like(input); - - int32_t batch = input.size(0); - int32_t channel = input.size(1); - int32_t height = input.size(2); - int32_t width = input.size(3); - - auto max_val = torch::zeros({batch, channel, height}, - at::device(at::kCUDA).dtype(at::kFloat)); - auto max_ind = torch::zeros({batch, channel, height}, - at::device(at::kCUDA).dtype(at::kLong)); - - auto input_temp = input.select(3, 0); - max_val.copy_(input_temp); - - max_ind.fill_(0); - - auto output_temp = output.select(3, 0); - auto grad_output_temp = grad_output.select(3, 0); - output_temp.copy_(grad_output_temp); - - auto un_max_ind = max_ind.unsqueeze(3); - auto gt_mask = torch::zeros({batch, channel, height}, - at::device(at::kCUDA).dtype(at::kBool)); - auto max_temp = torch::zeros({batch, channel, height}, - at::device(at::kCUDA).dtype(at::kFloat)); - for (int32_t ind = 0; ind < width - 1; ++ind) { - input_temp = input.select(3, ind + 1); - at::gt_out(gt_mask, input_temp, max_val); - - at::masked_select_out(max_temp, input_temp, gt_mask); - max_val.masked_scatter_(gt_mask, max_temp); - max_ind.masked_fill_(gt_mask, ind + 1); - - grad_output_temp = grad_output.select(3, ind + 1).unsqueeze(3); - output.scatter_add_(3, un_max_ind, grad_output_temp); - } - - return output; -} - -at::Tensor top_pool_forward(at::Tensor input) { - // Initialize output - at::Tensor output = at::zeros_like(input); - - // Get height - int64_t height = input.size(2); - - output.copy_(input); - - for (int64_t ind = 1; ind < height; ind <<= 1) { - at::Tensor max_temp = at::slice(output, 2, 0, height - ind); - at::Tensor cur_temp = at::slice(output, 2, 0, height - ind).clone(); - at::Tensor next_temp = at::slice(output, 2, ind, height).clone(); - at::max_out(max_temp, cur_temp, next_temp); - } - - return output; -} - -at::Tensor top_pool_backward(at::Tensor input, at::Tensor grad_output) { - auto output = at::zeros_like(input); - - int32_t batch = input.size(0); - int32_t channel = input.size(1); - int32_t height = input.size(2); - int32_t width = input.size(3); - - auto max_val = torch::zeros({batch, channel, width}, - at::device(at::kCUDA).dtype(at::kFloat)); - auto max_ind = torch::zeros({batch, channel, width}, - at::device(at::kCUDA).dtype(at::kLong)); - - auto input_temp = input.select(2, height - 1); - max_val.copy_(input_temp); - - max_ind.fill_(height - 1); - - auto output_temp = output.select(2, height - 1); - auto grad_output_temp = grad_output.select(2, height - 1); - output_temp.copy_(grad_output_temp); - - auto un_max_ind = max_ind.unsqueeze(2); - auto gt_mask = torch::zeros({batch, channel, width}, - at::device(at::kCUDA).dtype(at::kBool)); - auto max_temp = torch::zeros({batch, channel, width}, - at::device(at::kCUDA).dtype(at::kFloat)); - for (int32_t ind = 1; ind < height; ++ind) { - input_temp = input.select(2, height - ind - 1); - at::gt_out(gt_mask, input_temp, max_val); - - at::masked_select_out(max_temp, input_temp, gt_mask); - max_val.masked_scatter_(gt_mask, max_temp); - max_ind.masked_fill_(gt_mask, height - ind - 1); - - grad_output_temp = grad_output.select(2, height - ind - 1).unsqueeze(2); - output.scatter_add_(2, un_max_ind, grad_output_temp); - } - - return output; -} - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("bottom_pool_forward", &bottom_pool_forward, "Bottom Pool Forward", - py::call_guard()); - m.def("bottom_pool_backward", &bottom_pool_backward, "Bottom Pool Backward", - py::call_guard()); - m.def("left_pool_forward", &left_pool_forward, "Left Pool Forward", - py::call_guard()); - m.def("left_pool_backward", &left_pool_backward, "Left Pool Backward", - py::call_guard()); - m.def("right_pool_forward", &right_pool_forward, "Right Pool Forward", - py::call_guard()); - m.def("right_pool_backward", &right_pool_backward, "Right Pool Backward", - py::call_guard()); - m.def("top_pool_forward", &top_pool_forward, "Top Pool Forward", - py::call_guard()); - m.def("top_pool_backward", &top_pool_backward, "Top Pool Backward", - py::call_guard()); -} diff --git a/mmdet/ops/dcn/__init__.py b/mmdet/ops/dcn/__init__.py deleted file mode 100644 index 79594c90b28..00000000000 --- a/mmdet/ops/dcn/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv, - ModulatedDeformConvPack, deform_conv, - modulated_deform_conv) -from .deform_pool import (DeformRoIPooling, DeformRoIPoolingPack, - ModulatedDeformRoIPoolingPack, deform_roi_pooling) - -__all__ = [ - 'DeformConv', 'DeformConvPack', 'ModulatedDeformConv', - 'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', - 'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv', - 'deform_roi_pooling' -] diff --git a/mmdet/ops/dcn/deform_conv.py b/mmdet/ops/dcn/deform_conv.py deleted file mode 100644 index 00766988e4e..00000000000 --- a/mmdet/ops/dcn/deform_conv.py +++ /dev/null @@ -1,460 +0,0 @@ -import math - -import torch -import torch.nn as nn -import torch.nn.functional as F -from mmcv.cnn import CONV_LAYERS -from mmcv.utils import print_log -from torch.autograd import Function -from torch.autograd.function import once_differentiable -from torch.nn.modules.utils import _pair, _single - -from . import deform_conv_ext - - -class DeformConvFunction(Function): - - @staticmethod - def forward(ctx, - input, - offset, - weight, - stride=1, - padding=0, - dilation=1, - groups=1, - deformable_groups=1, - im2col_step=64): - if input is not None and input.dim() != 4: - raise ValueError(f'Expected 4D tensor as input, got {input.dim()}' - 'D tensor instead.') - ctx.stride = _pair(stride) - ctx.padding = _pair(padding) - ctx.dilation = _pair(dilation) - ctx.groups = groups - ctx.deformable_groups = deformable_groups - ctx.im2col_step = im2col_step - - ctx.save_for_backward(input, offset, weight) - - output = input.new_empty( - DeformConvFunction._output_size(input, weight, ctx.padding, - ctx.dilation, ctx.stride)) - - ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones - - if not input.is_cuda: - raise NotImplementedError - else: - cur_im2col_step = min(ctx.im2col_step, input.shape[0]) - assert (input.shape[0] % - cur_im2col_step) == 0, 'im2col step must divide batchsize' - deform_conv_ext.deform_conv_forward( - input, weight, offset, output, ctx.bufs_[0], ctx.bufs_[1], - weight.size(3), weight.size(2), ctx.stride[1], ctx.stride[0], - ctx.padding[1], ctx.padding[0], ctx.dilation[1], - ctx.dilation[0], ctx.groups, ctx.deformable_groups, - cur_im2col_step) - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - input, offset, weight = ctx.saved_tensors - - grad_input = grad_offset = grad_weight = None - - if not grad_output.is_cuda: - raise NotImplementedError - else: - cur_im2col_step = min(ctx.im2col_step, input.shape[0]) - assert (input.shape[0] % - cur_im2col_step) == 0, 'im2col step must divide batchsize' - - if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]: - grad_input = torch.zeros_like(input) - grad_offset = torch.zeros_like(offset) - deform_conv_ext.deform_conv_backward_input( - input, offset, grad_output, grad_input, - grad_offset, weight, ctx.bufs_[0], weight.size(3), - weight.size(2), ctx.stride[1], ctx.stride[0], - ctx.padding[1], ctx.padding[0], ctx.dilation[1], - ctx.dilation[0], ctx.groups, ctx.deformable_groups, - cur_im2col_step) - - if ctx.needs_input_grad[2]: - grad_weight = torch.zeros_like(weight) - deform_conv_ext.deform_conv_backward_parameters( - input, offset, grad_output, - grad_weight, ctx.bufs_[0], ctx.bufs_[1], weight.size(3), - weight.size(2), ctx.stride[1], ctx.stride[0], - ctx.padding[1], ctx.padding[0], ctx.dilation[1], - ctx.dilation[0], ctx.groups, ctx.deformable_groups, 1, - cur_im2col_step) - - return (grad_input, grad_offset, grad_weight, None, None, None, None, - None) - - @staticmethod - def _output_size(input, weight, padding, dilation, stride): - channels = weight.size(0) - output_size = (input.size(0), channels) - for d in range(input.dim() - 2): - in_size = input.size(d + 2) - pad = padding[d] - kernel = dilation[d] * (weight.size(d + 2) - 1) + 1 - stride_ = stride[d] - output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, ) - if not all(map(lambda s: s > 0, output_size)): - raise ValueError('convolution input is too small (output would be ' - f'{"x".join(map(str, output_size))})') - return output_size - - -class ModulatedDeformConvFunction(Function): - - @staticmethod - def forward(ctx, - input, - offset, - mask, - weight, - bias=None, - stride=1, - padding=0, - dilation=1, - groups=1, - deformable_groups=1): - ctx.stride = stride - ctx.padding = padding - ctx.dilation = dilation - ctx.groups = groups - ctx.deformable_groups = deformable_groups - ctx.with_bias = bias is not None - if not ctx.with_bias: - bias = input.new_empty(1) # fake tensor - if not input.is_cuda: - raise NotImplementedError - if weight.requires_grad or mask.requires_grad or offset.requires_grad \ - or input.requires_grad: - ctx.save_for_backward(input, offset, mask, weight, bias) - output = input.new_empty( - ModulatedDeformConvFunction._infer_shape(ctx, input, weight)) - ctx._bufs = [input.new_empty(0), input.new_empty(0)] - deform_conv_ext.modulated_deform_conv_forward( - input, weight, bias, ctx._bufs[0], offset, mask, output, - ctx._bufs[1], weight.shape[2], weight.shape[3], ctx.stride, - ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation, - ctx.groups, ctx.deformable_groups, ctx.with_bias) - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - if not grad_output.is_cuda: - raise NotImplementedError - input, offset, mask, weight, bias = ctx.saved_tensors - grad_input = torch.zeros_like(input) - grad_offset = torch.zeros_like(offset) - grad_mask = torch.zeros_like(mask) - grad_weight = torch.zeros_like(weight) - grad_bias = torch.zeros_like(bias) - deform_conv_ext.modulated_deform_conv_backward( - input, weight, bias, ctx._bufs[0], offset, mask, ctx._bufs[1], - grad_input, grad_weight, grad_bias, grad_offset, grad_mask, - grad_output, weight.shape[2], weight.shape[3], ctx.stride, - ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation, - ctx.groups, ctx.deformable_groups, ctx.with_bias) - if not ctx.with_bias: - grad_bias = None - - return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, - None, None, None, None, None) - - @staticmethod - def _infer_shape(ctx, input, weight): - n = input.size(0) - channels_out = weight.size(0) - height, width = input.shape[2:4] - kernel_h, kernel_w = weight.shape[2:4] - # TODO: support different padding/stride/dilation in height and width - height_out = (height + 2 * ctx.padding - - (ctx.dilation * (kernel_h - 1) + 1)) // ctx.stride + 1 - width_out = (width + 2 * ctx.padding - - (ctx.dilation * (kernel_w - 1) + 1)) // ctx.stride + 1 - return n, channels_out, height_out, width_out - - -deform_conv = DeformConvFunction.apply -modulated_deform_conv = ModulatedDeformConvFunction.apply - - -class DeformConv(nn.Module): - - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - deformable_groups=1, - bias=False): - super(DeformConv, self).__init__() - - assert not bias - assert in_channels % groups == 0, \ - f'in_channels {in_channels} is not divisible by groups {groups}' - assert out_channels % groups == 0, \ - f'out_channels {out_channels} is not divisible ' \ - f'by groups {groups}' - - self.in_channels = in_channels - self.out_channels = out_channels - self.kernel_size = _pair(kernel_size) - self.stride = _pair(stride) - self.padding = _pair(padding) - self.dilation = _pair(dilation) - self.groups = groups - self.deformable_groups = deformable_groups - # enable compatibility with nn.Conv2d - self.transposed = False - self.output_padding = _single(0) - - self.weight = nn.Parameter( - torch.Tensor(out_channels, in_channels // self.groups, - *self.kernel_size)) - - self.reset_parameters() - - def reset_parameters(self): - n = self.in_channels - for k in self.kernel_size: - n *= k - stdv = 1. / math.sqrt(n) - self.weight.data.uniform_(-stdv, stdv) - - def forward(self, x, offset): - # To fix an assert error in deform_conv_cuda.cpp:128 - # input image is smaller than kernel - input_pad = ( - x.size(2) < self.kernel_size[0] or x.size(3) < self.kernel_size[1]) - if input_pad: - pad_h = max(self.kernel_size[0] - x.size(2), 0) - pad_w = max(self.kernel_size[1] - x.size(3), 0) - x = F.pad(x, (0, pad_w, 0, pad_h), 'constant', 0).contiguous() - offset = F.pad(offset, (0, pad_w, 0, pad_h), 'constant', - 0).contiguous() - out = deform_conv(x, offset, self.weight, self.stride, self.padding, - self.dilation, self.groups, self.deformable_groups) - if input_pad: - out = out[:, :, :out.size(2) - pad_h, :out.size(3) - - pad_w].contiguous() - return out - - -@CONV_LAYERS.register_module(name='DCN') -class DeformConvPack(DeformConv): - """A Deformable Conv Encapsulation that acts as normal Conv layers. - - The offset tensor is like `[y0, x0, y1, x1, y2, x2, ..., y8, x8]`. - The spatial arrangement is like: - ``` - (x0, y0) (x1, y1) (x2, y2) - (x3, y3) (x4, y4) (x5, y5) - (x6, y6) (x7, y7) (x8, y8) - ``` - - Args: - in_channels (int): Same as nn.Conv2d. - out_channels (int): Same as nn.Conv2d. - kernel_size (int or tuple[int]): Same as nn.Conv2d. - stride (int or tuple[int]): Same as nn.Conv2d. - padding (int or tuple[int]): Same as nn.Conv2d. - dilation (int or tuple[int]): Same as nn.Conv2d. - groups (int): Same as nn.Conv2d. - bias (bool or str): If specified as `auto`, it will be decided by the - norm_cfg. Bias will be set as True if norm_cfg is None, otherwise - False. - """ - - _version = 2 - - def __init__(self, *args, **kwargs): - super(DeformConvPack, self).__init__(*args, **kwargs) - - self.conv_offset = nn.Conv2d( - self.in_channels, - self.deformable_groups * 2 * self.kernel_size[0] * - self.kernel_size[1], - kernel_size=self.kernel_size, - stride=_pair(self.stride), - padding=_pair(self.padding), - dilation=_pair(self.dilation), - bias=True) - self.init_offset() - - def init_offset(self): - self.conv_offset.weight.data.zero_() - self.conv_offset.bias.data.zero_() - - def forward(self, x): - offset = self.conv_offset(x) - return deform_conv(x, offset, self.weight, self.stride, self.padding, - self.dilation, self.groups, self.deformable_groups) - - def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, - missing_keys, unexpected_keys, error_msgs): - version = local_metadata.get('version', None) - - if version is None or version < 2: - # the key is different in early versions - # In version < 2, DeformConvPack loads previous benchmark models. - if (prefix + 'conv_offset.weight' not in state_dict - and prefix[:-1] + '_offset.weight' in state_dict): - state_dict[prefix + 'conv_offset.weight'] = state_dict.pop( - prefix[:-1] + '_offset.weight') - if (prefix + 'conv_offset.bias' not in state_dict - and prefix[:-1] + '_offset.bias' in state_dict): - state_dict[prefix + - 'conv_offset.bias'] = state_dict.pop(prefix[:-1] + - '_offset.bias') - - if version is not None and version > 1: - print_log( - f'DeformConvPack {prefix.rstrip(".")} is upgraded to ' - 'version 2.', - logger='root') - - super()._load_from_state_dict(state_dict, prefix, local_metadata, - strict, missing_keys, unexpected_keys, - error_msgs) - - -class ModulatedDeformConv(nn.Module): - - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - deformable_groups=1, - bias=True): - super(ModulatedDeformConv, self).__init__() - self.in_channels = in_channels - self.out_channels = out_channels - self.kernel_size = _pair(kernel_size) - self.stride = stride - self.padding = padding - self.dilation = dilation - self.groups = groups - self.deformable_groups = deformable_groups - self.with_bias = bias - # enable compatibility with nn.Conv2d - self.transposed = False - self.output_padding = _single(0) - - self.weight = nn.Parameter( - torch.Tensor(out_channels, in_channels // groups, - *self.kernel_size)) - if bias: - self.bias = nn.Parameter(torch.Tensor(out_channels)) - else: - self.register_parameter('bias', None) - self.init_weights() - - def init_weights(self): - n = self.in_channels - for k in self.kernel_size: - n *= k - stdv = 1. / math.sqrt(n) - self.weight.data.uniform_(-stdv, stdv) - if self.bias is not None: - self.bias.data.zero_() - - def forward(self, x, offset, mask): - return modulated_deform_conv(x, offset, mask, self.weight, self.bias, - self.stride, self.padding, self.dilation, - self.groups, self.deformable_groups) - - -@CONV_LAYERS.register_module(name='DCNv2') -class ModulatedDeformConvPack(ModulatedDeformConv): - """A ModulatedDeformable Conv Encapsulation that acts as normal Conv - layers. - - Args: - in_channels (int): Same as nn.Conv2d. - out_channels (int): Same as nn.Conv2d. - kernel_size (int or tuple[int]): Same as nn.Conv2d. - stride (int): Same as nn.Conv2d, while tuple is not supported. - padding (int): Same as nn.Conv2d, while tuple is not supported. - dilation (int): Same as nn.Conv2d, while tuple is not supported. - groups (int): Same as nn.Conv2d. - bias (bool or str): If specified as `auto`, it will be decided by the - norm_cfg. Bias will be set as True if norm_cfg is None, otherwise - False. - """ - - _version = 2 - - def __init__(self, *args, **kwargs): - super(ModulatedDeformConvPack, self).__init__(*args, **kwargs) - - self.conv_offset = nn.Conv2d( - self.in_channels, - self.deformable_groups * 3 * self.kernel_size[0] * - self.kernel_size[1], - kernel_size=self.kernel_size, - stride=_pair(self.stride), - padding=_pair(self.padding), - dilation=_pair(self.dilation), - bias=True) - self.init_weights() - - def init_weights(self): - super(ModulatedDeformConvPack, self).init_weights() - if hasattr(self, 'conv_offset'): - self.conv_offset.weight.data.zero_() - self.conv_offset.bias.data.zero_() - - def forward(self, x): - out = self.conv_offset(x) - o1, o2, mask = torch.chunk(out, 3, dim=1) - offset = torch.cat((o1, o2), dim=1) - mask = torch.sigmoid(mask) - return modulated_deform_conv(x, offset, mask, self.weight, self.bias, - self.stride, self.padding, self.dilation, - self.groups, self.deformable_groups) - - def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, - missing_keys, unexpected_keys, error_msgs): - version = local_metadata.get('version', None) - - if version is None or version < 2: - # the key is different in early versions - # In version < 2, ModulatedDeformConvPack - # loads previous benchmark models. - if (prefix + 'conv_offset.weight' not in state_dict - and prefix[:-1] + '_offset.weight' in state_dict): - state_dict[prefix + 'conv_offset.weight'] = state_dict.pop( - prefix[:-1] + '_offset.weight') - if (prefix + 'conv_offset.bias' not in state_dict - and prefix[:-1] + '_offset.bias' in state_dict): - state_dict[prefix + - 'conv_offset.bias'] = state_dict.pop(prefix[:-1] + - '_offset.bias') - - if version is not None and version > 1: - print_log( - f'ModulatedDeformConvPack {prefix.rstrip(".")} is upgraded to ' - 'version 2.', - logger='root') - - super()._load_from_state_dict(state_dict, prefix, local_metadata, - strict, missing_keys, unexpected_keys, - error_msgs) diff --git a/mmdet/ops/dcn/deform_pool.py b/mmdet/ops/dcn/deform_pool.py deleted file mode 100644 index a0ccd60734d..00000000000 --- a/mmdet/ops/dcn/deform_pool.py +++ /dev/null @@ -1,258 +0,0 @@ -import torch -import torch.nn as nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable -from torch.nn.modules.utils import _pair - -from . import deform_pool_ext - - -class DeformRoIPoolingFunction(Function): - - @staticmethod - def forward(ctx, - data, - rois, - offset, - spatial_scale, - out_size, - out_channels, - no_trans, - group_size=1, - part_size=None, - sample_per_part=4, - trans_std=.0): - # TODO: support unsquare RoIs - out_h, out_w = _pair(out_size) - assert isinstance(out_h, int) and isinstance(out_w, int) - assert out_h == out_w - out_size = out_h # out_h and out_w must be equal - - ctx.spatial_scale = spatial_scale - ctx.out_size = out_size - ctx.out_channels = out_channels - ctx.no_trans = no_trans - ctx.group_size = group_size - ctx.part_size = out_size if part_size is None else part_size - ctx.sample_per_part = sample_per_part - ctx.trans_std = trans_std - - assert 0.0 <= ctx.trans_std <= 1.0 - if not data.is_cuda: - raise NotImplementedError - - n = rois.shape[0] - output = data.new_empty(n, out_channels, out_size, out_size) - output_count = data.new_empty(n, out_channels, out_size, out_size) - deform_pool_ext.deform_psroi_pooling_forward( - data, rois, offset, output, output_count, ctx.no_trans, - ctx.spatial_scale, ctx.out_channels, ctx.group_size, ctx.out_size, - ctx.part_size, ctx.sample_per_part, ctx.trans_std) - - if data.requires_grad or rois.requires_grad or offset.requires_grad: - ctx.save_for_backward(data, rois, offset) - ctx.output_count = output_count - - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - if not grad_output.is_cuda: - raise NotImplementedError - - data, rois, offset = ctx.saved_tensors - output_count = ctx.output_count - grad_input = torch.zeros_like(data) - grad_rois = None - grad_offset = torch.zeros_like(offset) - - deform_pool_ext.deform_psroi_pooling_backward( - grad_output, data, rois, offset, output_count, grad_input, - grad_offset, ctx.no_trans, ctx.spatial_scale, ctx.out_channels, - ctx.group_size, ctx.out_size, ctx.part_size, ctx.sample_per_part, - ctx.trans_std) - return (grad_input, grad_rois, grad_offset, None, None, None, None, - None, None, None, None) - - -deform_roi_pooling = DeformRoIPoolingFunction.apply - - -class DeformRoIPooling(nn.Module): - - def __init__(self, - spatial_scale, - out_size, - out_channels, - no_trans, - group_size=1, - part_size=None, - sample_per_part=4, - trans_std=.0): - super(DeformRoIPooling, self).__init__() - self.spatial_scale = spatial_scale - self.out_size = _pair(out_size) - self.out_channels = out_channels - self.no_trans = no_trans - self.group_size = group_size - self.part_size = out_size if part_size is None else part_size - self.sample_per_part = sample_per_part - self.trans_std = trans_std - - def forward(self, data, rois, offset): - if self.no_trans: - offset = data.new_empty(0) - return deform_roi_pooling(data, rois, offset, self.spatial_scale, - self.out_size, self.out_channels, - self.no_trans, self.group_size, - self.part_size, self.sample_per_part, - self.trans_std) - - -class DeformRoIPoolingPack(DeformRoIPooling): - - def __init__(self, - spatial_scale, - out_size, - out_channels, - no_trans, - group_size=1, - part_size=None, - sample_per_part=4, - trans_std=.0, - num_offset_fcs=3, - deform_fc_channels=1024): - super(DeformRoIPoolingPack, - self).__init__(spatial_scale, out_size, out_channels, no_trans, - group_size, part_size, sample_per_part, trans_std) - - self.num_offset_fcs = num_offset_fcs - self.deform_fc_channels = deform_fc_channels - - if not no_trans: - seq = [] - ic = self.out_size[0] * self.out_size[1] * self.out_channels - for i in range(self.num_offset_fcs): - if i < self.num_offset_fcs - 1: - oc = self.deform_fc_channels - else: - oc = self.out_size[0] * self.out_size[1] * 2 - seq.append(nn.Linear(ic, oc)) - ic = oc - if i < self.num_offset_fcs - 1: - seq.append(nn.ReLU(inplace=True)) - self.offset_fc = nn.Sequential(*seq) - self.offset_fc[-1].weight.data.zero_() - self.offset_fc[-1].bias.data.zero_() - - def forward(self, data, rois): - assert data.size(1) == self.out_channels - n = rois.shape[0] - if n == 0: - return data.new_empty(n, self.out_channels, self.out_size[0], - self.out_size[1]) - if self.no_trans: - offset = data.new_empty(0) - return deform_roi_pooling(data, rois, offset, self.spatial_scale, - self.out_size, self.out_channels, - self.no_trans, self.group_size, - self.part_size, self.sample_per_part, - self.trans_std) - else: - offset = data.new_empty(0) - x = deform_roi_pooling(data, rois, offset, self.spatial_scale, - self.out_size, self.out_channels, True, - self.group_size, self.part_size, - self.sample_per_part, self.trans_std) - offset = self.offset_fc(x.view(n, -1)) - offset = offset.view(n, 2, self.out_size[0], self.out_size[1]) - return deform_roi_pooling(data, rois, offset, self.spatial_scale, - self.out_size, self.out_channels, - self.no_trans, self.group_size, - self.part_size, self.sample_per_part, - self.trans_std) - - -class ModulatedDeformRoIPoolingPack(DeformRoIPooling): - - def __init__(self, - spatial_scale, - out_size, - out_channels, - no_trans, - group_size=1, - part_size=None, - sample_per_part=4, - trans_std=.0, - num_offset_fcs=3, - num_mask_fcs=2, - deform_fc_channels=1024): - super(ModulatedDeformRoIPoolingPack, - self).__init__(spatial_scale, out_size, out_channels, no_trans, - group_size, part_size, sample_per_part, trans_std) - - self.num_offset_fcs = num_offset_fcs - self.num_mask_fcs = num_mask_fcs - self.deform_fc_channels = deform_fc_channels - - if not no_trans: - offset_fc_seq = [] - ic = self.out_size[0] * self.out_size[1] * self.out_channels - for i in range(self.num_offset_fcs): - if i < self.num_offset_fcs - 1: - oc = self.deform_fc_channels - else: - oc = self.out_size[0] * self.out_size[1] * 2 - offset_fc_seq.append(nn.Linear(ic, oc)) - ic = oc - if i < self.num_offset_fcs - 1: - offset_fc_seq.append(nn.ReLU(inplace=True)) - self.offset_fc = nn.Sequential(*offset_fc_seq) - self.offset_fc[-1].weight.data.zero_() - self.offset_fc[-1].bias.data.zero_() - - mask_fc_seq = [] - ic = self.out_size[0] * self.out_size[1] * self.out_channels - for i in range(self.num_mask_fcs): - if i < self.num_mask_fcs - 1: - oc = self.deform_fc_channels - else: - oc = self.out_size[0] * self.out_size[1] - mask_fc_seq.append(nn.Linear(ic, oc)) - ic = oc - if i < self.num_mask_fcs - 1: - mask_fc_seq.append(nn.ReLU(inplace=True)) - else: - mask_fc_seq.append(nn.Sigmoid()) - self.mask_fc = nn.Sequential(*mask_fc_seq) - self.mask_fc[-2].weight.data.zero_() - self.mask_fc[-2].bias.data.zero_() - - def forward(self, data, rois): - assert data.size(1) == self.out_channels - n = rois.shape[0] - if n == 0: - return data.new_empty(n, self.out_channels, self.out_size[0], - self.out_size[1]) - if self.no_trans: - offset = data.new_empty(0) - return deform_roi_pooling(data, rois, offset, self.spatial_scale, - self.out_size, self.out_channels, - self.no_trans, self.group_size, - self.part_size, self.sample_per_part, - self.trans_std) - else: - offset = data.new_empty(0) - x = deform_roi_pooling(data, rois, offset, self.spatial_scale, - self.out_size, self.out_channels, True, - self.group_size, self.part_size, - self.sample_per_part, self.trans_std) - offset = self.offset_fc(x.view(n, -1)) - offset = offset.view(n, 2, self.out_size[0], self.out_size[1]) - mask = self.mask_fc(x.view(n, -1)) - mask = mask.view(n, 1, self.out_size[0], self.out_size[1]) - return deform_roi_pooling( - data, rois, offset, self.spatial_scale, self.out_size, - self.out_channels, self.no_trans, self.group_size, - self.part_size, self.sample_per_part, self.trans_std) * mask diff --git a/mmdet/ops/dcn/src/cuda/deform_conv_cuda.cpp b/mmdet/ops/dcn/src/cuda/deform_conv_cuda.cpp deleted file mode 100644 index a81045e18d7..00000000000 --- a/mmdet/ops/dcn/src/cuda/deform_conv_cuda.cpp +++ /dev/null @@ -1,686 +0,0 @@ -// modify from -// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c - -#include -#include - -#include -#include - -void deformable_im2col(const at::Tensor data_im, const at::Tensor data_offset, - const int channels, const int height, const int width, - const int ksize_h, const int ksize_w, const int pad_h, - const int pad_w, const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, - const int parallel_imgs, const int deformable_group, - at::Tensor data_col); - -void deformable_col2im(const at::Tensor data_col, const at::Tensor data_offset, - const int channels, const int height, const int width, - const int ksize_h, const int ksize_w, const int pad_h, - const int pad_w, const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, - const int parallel_imgs, const int deformable_group, - at::Tensor grad_im); - -void deformable_col2im_coord( - const at::Tensor data_col, const at::Tensor data_im, - const at::Tensor data_offset, const int channels, const int height, - const int width, const int ksize_h, const int ksize_w, const int pad_h, - const int pad_w, const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, const int parallel_imgs, - const int deformable_group, at::Tensor grad_offset); - -void modulated_deformable_im2col_cuda( - const at::Tensor data_im, const at::Tensor data_offset, - const at::Tensor data_mask, const int batch_size, const int channels, - const int height_im, const int width_im, const int height_col, - const int width_col, const int kernel_h, const int kenerl_w, - const int pad_h, const int pad_w, const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, const int deformable_group, - at::Tensor data_col); - -void modulated_deformable_col2im_cuda( - const at::Tensor data_col, const at::Tensor data_offset, - const at::Tensor data_mask, const int batch_size, const int channels, - const int height_im, const int width_im, const int height_col, - const int width_col, const int kernel_h, const int kenerl_w, - const int pad_h, const int pad_w, const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, const int deformable_group, - at::Tensor grad_im); - -void modulated_deformable_col2im_coord_cuda( - const at::Tensor data_col, const at::Tensor data_im, - const at::Tensor data_offset, const at::Tensor data_mask, - const int batch_size, const int channels, const int height_im, - const int width_im, const int height_col, const int width_col, - const int kernel_h, const int kenerl_w, const int pad_h, const int pad_w, - const int stride_h, const int stride_w, const int dilation_h, - const int dilation_w, const int deformable_group, at::Tensor grad_offset, - at::Tensor grad_mask); - -void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput, - at::Tensor weight, int kH, int kW, int dH, int dW, int padH, - int padW, int dilationH, int dilationW, int group, - int deformable_group) { - TORCH_CHECK(weight.ndimension() == 4, - "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, " - "but got: %s", - weight.ndimension()); - - TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); - - TORCH_CHECK(kW > 0 && kH > 0, - "kernel size should be greater than zero, but got kH: %d kW: %d", kH, - kW); - - TORCH_CHECK((weight.size(2) == kH && weight.size(3) == kW), - "kernel size should be consistent with weight, ", - "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", kH, - kW, weight.size(2), weight.size(3)); - - TORCH_CHECK(dW > 0 && dH > 0, - "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); - - TORCH_CHECK( - dilationW > 0 && dilationH > 0, - "dilation should be greater than 0, but got dilationH: %d dilationW: %d", - dilationH, dilationW); - - int ndim = input.ndimension(); - int dimf = 0; - int dimh = 1; - int dimw = 2; - - if (ndim == 4) { - dimf++; - dimh++; - dimw++; - } - - TORCH_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s", - ndim); - - long nInputPlane = weight.size(1) * group; - long inputHeight = input.size(dimh); - long inputWidth = input.size(dimw); - long nOutputPlane = weight.size(0); - long outputHeight = - (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; - long outputWidth = - (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; - - TORCH_CHECK(nInputPlane % deformable_group == 0, - "input channels must divide deformable group size"); - - if (outputWidth < 1 || outputHeight < 1) - AT_ERROR( - "Given input size: (%ld x %ld x %ld). " - "Calculated output size: (%ld x %ld x %ld). Output size is too small", - nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight, - outputWidth); - - TORCH_CHECK(input.size(1) == nInputPlane, - "invalid number of input planes, expected: %d, but got: %d", - nInputPlane, input.size(1)); - - TORCH_CHECK((inputHeight >= kH && inputWidth >= kW), - "input image is smaller than kernel"); - - TORCH_CHECK((offset.size(2) == outputHeight && offset.size(3) == outputWidth), - "invalid spatial size of offset, expected height: %d width: %d, but " - "got height: %d width: %d", - outputHeight, outputWidth, offset.size(2), offset.size(3)); - - TORCH_CHECK((offset.size(1) == deformable_group * 2 * kH * kW), - "invalid number of channels of offset"); - - if (gradOutput != NULL) { - TORCH_CHECK(gradOutput->size(dimf) == nOutputPlane, - "invalid number of gradOutput planes, expected: %d, but got: %d", - nOutputPlane, gradOutput->size(dimf)); - - TORCH_CHECK((gradOutput->size(dimh) == outputHeight && - gradOutput->size(dimw) == outputWidth), - "invalid size of gradOutput, expected height: %d width: %d , but " - "got height: %d width: %d", - outputHeight, outputWidth, gradOutput->size(dimh), - gradOutput->size(dimw)); - } -} - -int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight, - at::Tensor offset, at::Tensor output, - at::Tensor columns, at::Tensor ones, int kW, - int kH, int dW, int dH, int padW, int padH, - int dilationW, int dilationH, int group, - int deformable_group, int im2col_step) { - // todo: resize columns to include im2col: done - // todo: add im2col_step as input - // todo: add new output buffer and transpose it to output (or directly - // transpose output) todo: possibly change data indexing because of - // parallel_imgs - - shape_check(input, offset, NULL, weight, kH, kW, dH, dW, padH, padW, - dilationH, dilationW, group, deformable_group); - at::DeviceGuard guard(input.device()); - - input = input.contiguous(); - offset = offset.contiguous(); - weight = weight.contiguous(); - - int batch = 1; - if (input.ndimension() == 3) { - // Force batch - batch = 0; - input.unsqueeze_(0); - offset.unsqueeze_(0); - } - - // todo: assert batchsize dividable by im2col_step - - long batchSize = input.size(0); - long nInputPlane = input.size(1); - long inputHeight = input.size(2); - long inputWidth = input.size(3); - - long nOutputPlane = weight.size(0); - - long outputWidth = - (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; - long outputHeight = - (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; - - TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); - - output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane, - outputHeight, outputWidth}); - columns = at::zeros( - {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, - input.options()); - - if (ones.ndimension() != 2 || - ones.size(0) * ones.size(1) < outputHeight * outputWidth) { - ones = at::ones({outputHeight, outputWidth}, input.options()); - } - - input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, - inputHeight, inputWidth}); - offset = - offset.view({batchSize / im2col_step, im2col_step, - deformable_group * 2 * kH * kW, outputHeight, outputWidth}); - - at::Tensor output_buffer = - at::zeros({batchSize / im2col_step, nOutputPlane, - im2col_step * outputHeight, outputWidth}, - output.options()); - - output_buffer = output_buffer.view( - {output_buffer.size(0), group, output_buffer.size(1) / group, - output_buffer.size(2), output_buffer.size(3)}); - - for (int elt = 0; elt < batchSize / im2col_step; elt++) { - deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight, - inputWidth, kH, kW, padH, padW, dH, dW, dilationH, - dilationW, im2col_step, deformable_group, columns); - - columns = columns.view({group, columns.size(0) / group, columns.size(1)}); - weight = weight.view({group, weight.size(0) / group, weight.size(1), - weight.size(2), weight.size(3)}); - - for (int g = 0; g < group; g++) { - output_buffer[elt][g] = output_buffer[elt][g] - .flatten(1) - .addmm_(weight[g].flatten(1), columns[g]) - .view_as(output_buffer[elt][g]); - } - } - - output_buffer = output_buffer.view( - {output_buffer.size(0), output_buffer.size(1) * output_buffer.size(2), - output_buffer.size(3), output_buffer.size(4)}); - - output_buffer = output_buffer.view({batchSize / im2col_step, nOutputPlane, - im2col_step, outputHeight, outputWidth}); - output_buffer.transpose_(1, 2); - output.copy_(output_buffer); - output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth}); - - input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); - offset = offset.view( - {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); - - if (batch == 0) { - output = output.view({nOutputPlane, outputHeight, outputWidth}); - input = input.view({nInputPlane, inputHeight, inputWidth}); - offset = offset.view({offset.size(1), offset.size(2), offset.size(3)}); - } - - return 1; -} - -int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset, - at::Tensor gradOutput, at::Tensor gradInput, - at::Tensor gradOffset, at::Tensor weight, - at::Tensor columns, int kW, int kH, int dW, - int dH, int padW, int padH, int dilationW, - int dilationH, int group, - int deformable_group, int im2col_step) { - shape_check(input, offset, &gradOutput, weight, kH, kW, dH, dW, padH, padW, - dilationH, dilationW, group, deformable_group); - at::DeviceGuard guard(input.device()); - - input = input.contiguous(); - offset = offset.contiguous(); - gradOutput = gradOutput.contiguous(); - weight = weight.contiguous(); - - int batch = 1; - - if (input.ndimension() == 3) { - // Force batch - batch = 0; - input = input.view({1, input.size(0), input.size(1), input.size(2)}); - offset = offset.view({1, offset.size(0), offset.size(1), offset.size(2)}); - gradOutput = gradOutput.view( - {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)}); - } - - long batchSize = input.size(0); - long nInputPlane = input.size(1); - long inputHeight = input.size(2); - long inputWidth = input.size(3); - - long nOutputPlane = weight.size(0); - - long outputWidth = - (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; - long outputHeight = - (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; - - TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset"); - gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); - columns = at::zeros( - {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, - input.options()); - - // change order of grad output - gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step, - nOutputPlane, outputHeight, outputWidth}); - gradOutput.transpose_(1, 2); - - gradInput = gradInput.view({batchSize / im2col_step, im2col_step, nInputPlane, - inputHeight, inputWidth}); - input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, - inputHeight, inputWidth}); - gradOffset = gradOffset.view({batchSize / im2col_step, im2col_step, - deformable_group * 2 * kH * kW, outputHeight, - outputWidth}); - offset = - offset.view({batchSize / im2col_step, im2col_step, - deformable_group * 2 * kH * kW, outputHeight, outputWidth}); - - for (int elt = 0; elt < batchSize / im2col_step; elt++) { - // divide into groups - columns = columns.view({group, columns.size(0) / group, columns.size(1)}); - weight = weight.view({group, weight.size(0) / group, weight.size(1), - weight.size(2), weight.size(3)}); - gradOutput = gradOutput.view( - {gradOutput.size(0), group, gradOutput.size(1) / group, - gradOutput.size(2), gradOutput.size(3), gradOutput.size(4)}); - - for (int g = 0; g < group; g++) { - columns[g] = columns[g].addmm_(weight[g].flatten(1).transpose(0, 1), - gradOutput[elt][g].flatten(1), 0.0f, 1.0f); - } - - columns = - columns.view({columns.size(0) * columns.size(1), columns.size(2)}); - gradOutput = gradOutput.view( - {gradOutput.size(0), gradOutput.size(1) * gradOutput.size(2), - gradOutput.size(3), gradOutput.size(4), gradOutput.size(5)}); - - deformable_col2im_coord(columns, input[elt], offset[elt], nInputPlane, - inputHeight, inputWidth, kH, kW, padH, padW, dH, dW, - dilationH, dilationW, im2col_step, deformable_group, - gradOffset[elt]); - - deformable_col2im(columns, offset[elt], nInputPlane, inputHeight, - inputWidth, kH, kW, padH, padW, dH, dW, dilationH, - dilationW, im2col_step, deformable_group, gradInput[elt]); - } - - gradOutput.transpose_(1, 2); - gradOutput = - gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); - - gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); - input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); - gradOffset = gradOffset.view( - {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); - offset = offset.view( - {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); - - if (batch == 0) { - gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); - input = input.view({nInputPlane, inputHeight, inputWidth}); - gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth}); - offset = offset.view({offset.size(1), offset.size(2), offset.size(3)}); - gradOffset = - gradOffset.view({offset.size(1), offset.size(2), offset.size(3)}); - } - - return 1; -} - -int deform_conv_backward_parameters_cuda( - at::Tensor input, at::Tensor offset, at::Tensor gradOutput, - at::Tensor gradWeight, // at::Tensor gradBias, - at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH, - int padW, int padH, int dilationW, int dilationH, int group, - int deformable_group, float scale, int im2col_step) { - // todo: transpose and reshape outGrad - // todo: reshape columns - // todo: add im2col_step as input - - shape_check(input, offset, &gradOutput, gradWeight, kH, kW, dH, dW, padH, - padW, dilationH, dilationW, group, deformable_group); - at::DeviceGuard guard(input.device()); - - input = input.contiguous(); - offset = offset.contiguous(); - gradOutput = gradOutput.contiguous(); - - int batch = 1; - - if (input.ndimension() == 3) { - // Force batch - batch = 0; - input = input.view( - at::IntList({1, input.size(0), input.size(1), input.size(2)})); - gradOutput = gradOutput.view( - {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)}); - } - - long batchSize = input.size(0); - long nInputPlane = input.size(1); - long inputHeight = input.size(2); - long inputWidth = input.size(3); - - long nOutputPlane = gradWeight.size(0); - - long outputWidth = - (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; - long outputHeight = - (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; - - TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); - - columns = at::zeros( - {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, - input.options()); - - gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step, - nOutputPlane, outputHeight, outputWidth}); - gradOutput.transpose_(1, 2); - - at::Tensor gradOutputBuffer = at::zeros_like(gradOutput); - gradOutputBuffer = - gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, im2col_step, - outputHeight, outputWidth}); - gradOutputBuffer = gradOutputBuffer.contiguous(); - gradOutputBuffer.copy_(gradOutput); - gradOutputBuffer = - gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, - im2col_step * outputHeight, outputWidth}); - - gradOutput.transpose_(1, 2); - gradOutput = - gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); - - input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, - inputHeight, inputWidth}); - offset = - offset.view({batchSize / im2col_step, im2col_step, - deformable_group * 2 * kH * kW, outputHeight, outputWidth}); - - for (int elt = 0; elt < batchSize / im2col_step; elt++) { - deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight, - inputWidth, kH, kW, padH, padW, dH, dW, dilationH, - dilationW, im2col_step, deformable_group, columns); - - // divide into group - gradOutputBuffer = gradOutputBuffer.view( - {gradOutputBuffer.size(0), group, gradOutputBuffer.size(1) / group, - gradOutputBuffer.size(2), gradOutputBuffer.size(3)}); - columns = columns.view({group, columns.size(0) / group, columns.size(1)}); - gradWeight = - gradWeight.view({group, gradWeight.size(0) / group, gradWeight.size(1), - gradWeight.size(2), gradWeight.size(3)}); - - for (int g = 0; g < group; g++) { - gradWeight[g] = gradWeight[g] - .flatten(1) - .addmm_(gradOutputBuffer[elt][g].flatten(1), - columns[g].transpose(1, 0), 1.0, scale) - .view_as(gradWeight[g]); - } - gradOutputBuffer = gradOutputBuffer.view( - {gradOutputBuffer.size(0), - gradOutputBuffer.size(1) * gradOutputBuffer.size(2), - gradOutputBuffer.size(3), gradOutputBuffer.size(4)}); - columns = - columns.view({columns.size(0) * columns.size(1), columns.size(2)}); - gradWeight = gradWeight.view({gradWeight.size(0) * gradWeight.size(1), - gradWeight.size(2), gradWeight.size(3), - gradWeight.size(4)}); - } - - input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); - offset = offset.view( - {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); - - if (batch == 0) { - gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); - input = input.view({nInputPlane, inputHeight, inputWidth}); - } - - return 1; -} - -void modulated_deform_conv_cuda_forward( - at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, - at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns, - int kernel_h, int kernel_w, const int stride_h, const int stride_w, - const int pad_h, const int pad_w, const int dilation_h, - const int dilation_w, const int group, const int deformable_group, - const bool with_bias) { - TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); - TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); - at::DeviceGuard guard(input.device()); - - const int batch = input.size(0); - const int channels = input.size(1); - const int height = input.size(2); - const int width = input.size(3); - - const int channels_out = weight.size(0); - const int channels_kernel = weight.size(1); - const int kernel_h_ = weight.size(2); - const int kernel_w_ = weight.size(3); - - if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) - AT_ERROR("Input shape and kernel shape wont match: (%d x %d vs %d x %d).", - kernel_h_, kernel_w, kernel_h_, kernel_w_); - if (channels != channels_kernel * group) - AT_ERROR("Input shape and kernel channels wont match: (%d vs %d).", - channels, channels_kernel * group); - - const int height_out = - (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; - const int width_out = - (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; - - if (ones.ndimension() != 2 || - ones.size(0) * ones.size(1) < height_out * width_out) { - // Resize plane and fill with ones... - ones = at::ones({height_out, width_out}, input.options()); - } - - // resize output - output = output.view({batch, channels_out, height_out, width_out}).zero_(); - // resize temporary columns - columns = - at::zeros({channels * kernel_h * kernel_w, 1 * height_out * width_out}, - input.options()); - - output = output.view({output.size(0), group, output.size(1) / group, - output.size(2), output.size(3)}); - - for (int b = 0; b < batch; b++) { - modulated_deformable_im2col_cuda( - input[b], offset[b], mask[b], 1, channels, height, width, height_out, - width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, - dilation_h, dilation_w, deformable_group, columns); - - // divide into group - weight = weight.view({group, weight.size(0) / group, weight.size(1), - weight.size(2), weight.size(3)}); - columns = columns.view({group, columns.size(0) / group, columns.size(1)}); - - for (int g = 0; g < group; g++) { - output[b][g] = output[b][g] - .flatten(1) - .addmm_(weight[g].flatten(1), columns[g]) - .view_as(output[b][g]); - } - - weight = weight.view({weight.size(0) * weight.size(1), weight.size(2), - weight.size(3), weight.size(4)}); - columns = - columns.view({columns.size(0) * columns.size(1), columns.size(2)}); - } - - output = output.view({output.size(0), output.size(1) * output.size(2), - output.size(3), output.size(4)}); - - if (with_bias) { - output += bias.view({1, bias.size(0), 1, 1}); - } -} - -void modulated_deform_conv_cuda_backward( - at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, - at::Tensor offset, at::Tensor mask, at::Tensor columns, - at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias, - at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output, - int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, - int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, - const bool with_bias) { - TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); - TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); - at::DeviceGuard guard(input.device()); - - const int batch = input.size(0); - const int channels = input.size(1); - const int height = input.size(2); - const int width = input.size(3); - - const int channels_kernel = weight.size(1); - const int kernel_h_ = weight.size(2); - const int kernel_w_ = weight.size(3); - if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) - AT_ERROR("Input shape and kernel shape wont match: (%d x %d vs %d x %d).", - kernel_h_, kernel_w, kernel_h_, kernel_w_); - if (channels != channels_kernel * group) - AT_ERROR("Input shape and kernel channels wont match: (%d vs %d).", - channels, channels_kernel * group); - - const int height_out = - (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; - const int width_out = - (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; - - if (ones.ndimension() != 2 || - ones.size(0) * ones.size(1) < height_out * width_out) { - // Resize plane and fill with ones... - ones = at::ones({height_out, width_out}, input.options()); - } - - grad_input = grad_input.view({batch, channels, height, width}); - columns = at::zeros({channels * kernel_h * kernel_w, height_out * width_out}, - input.options()); - - grad_output = - grad_output.view({grad_output.size(0), group, grad_output.size(1) / group, - grad_output.size(2), grad_output.size(3)}); - - for (int b = 0; b < batch; b++) { - // divide int group - columns = columns.view({group, columns.size(0) / group, columns.size(1)}); - weight = weight.view({group, weight.size(0) / group, weight.size(1), - weight.size(2), weight.size(3)}); - - for (int g = 0; g < group; g++) { - columns[g].addmm_(weight[g].flatten(1).transpose(0, 1), - grad_output[b][g].flatten(1), 0.0f, 1.0f); - } - - columns = - columns.view({columns.size(0) * columns.size(1), columns.size(2)}); - weight = weight.view({weight.size(0) * weight.size(1), weight.size(2), - weight.size(3), weight.size(4)}); - - // gradient w.r.t. input coordinate data - modulated_deformable_col2im_coord_cuda( - columns, input[b], offset[b], mask[b], 1, channels, height, width, - height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, - stride_w, dilation_h, dilation_w, deformable_group, grad_offset[b], - grad_mask[b]); - // gradient w.r.t. input data - modulated_deformable_col2im_cuda( - columns, offset[b], mask[b], 1, channels, height, width, height_out, - width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, - dilation_h, dilation_w, deformable_group, grad_input[b]); - - // gradient w.r.t. weight, dWeight should accumulate across the batch and - // group - modulated_deformable_im2col_cuda( - input[b], offset[b], mask[b], 1, channels, height, width, height_out, - width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, - dilation_h, dilation_w, deformable_group, columns); - - columns = columns.view({group, columns.size(0) / group, columns.size(1)}); - grad_weight = grad_weight.view({group, grad_weight.size(0) / group, - grad_weight.size(1), grad_weight.size(2), - grad_weight.size(3)}); - if (with_bias) - grad_bias = grad_bias.view({group, grad_bias.size(0) / group}); - - for (int g = 0; g < group; g++) { - grad_weight[g] = - grad_weight[g] - .flatten(1) - .addmm_(grad_output[b][g].flatten(1), columns[g].transpose(0, 1)) - .view_as(grad_weight[g]); - if (with_bias) { - grad_bias[g] = - grad_bias[g] - .view({-1, 1}) - .addmm_(grad_output[b][g].flatten(1), ones.view({-1, 1})) - .view(-1); - } - } - - columns = - columns.view({columns.size(0) * columns.size(1), columns.size(2)}); - grad_weight = grad_weight.view({grad_weight.size(0) * grad_weight.size(1), - grad_weight.size(2), grad_weight.size(3), - grad_weight.size(4)}); - if (with_bias) - grad_bias = grad_bias.view({grad_bias.size(0) * grad_bias.size(1)}); - } - grad_output = grad_output.view({grad_output.size(0) * grad_output.size(1), - grad_output.size(2), grad_output.size(3), - grad_output.size(4)}); -} diff --git a/mmdet/ops/dcn/src/cuda/deform_conv_cuda_kernel.cu b/mmdet/ops/dcn/src/cuda/deform_conv_cuda_kernel.cu deleted file mode 100644 index 98752dccf8c..00000000000 --- a/mmdet/ops/dcn/src/cuda/deform_conv_cuda_kernel.cu +++ /dev/null @@ -1,867 +0,0 @@ -/*! - ******************* BEGIN Caffe Copyright Notice and Disclaimer **************** - * - * COPYRIGHT - * - * All contributions by the University of California: - * Copyright (c) 2014-2017 The Regents of the University of California (Regents) - * All rights reserved. - * - * All other contributions: - * Copyright (c) 2014-2017, the respective contributors - * All rights reserved. - * - * Caffe uses a shared copyright model: each contributor holds copyright over - * their contributions to Caffe. The project versioning records all such - * contribution and copyright details. If a contributor wants to further mark - * their specific copyright on a particular contribution, they should indicate - * their copyright solely in the commit message of the change when it is - * committed. - * - * LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * CONTRIBUTION AGREEMENT - * - * By contributing to the BVLC/caffe repository through pull-request, comment, - * or otherwise, the contributor releases their content to the - * license and copyright terms herein. - * - ***************** END Caffe Copyright Notice and Disclaimer ******************** - * - * Copyright (c) 2018 Microsoft - * Licensed under The MIT License [see LICENSE for details] - * \file modulated_deformable_im2col.cuh - * \brief Function definitions of converting an image to - * column matrix based on kernel, padding, dilation, and offset. - * These functions are mainly used in deformable convolution operators. - * \ref: https://arxiv.org/abs/1703.06211 - * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng - */ - -// modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu - -#include -#include -#include -#include -#include -#include - -using namespace at; - -#define CUDA_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ - i += blockDim.x * gridDim.x) - -const int CUDA_NUM_THREADS = 1024; -const int kMaxGridNum = 65535; - -inline int GET_BLOCKS(const int N) -{ - return std::min(kMaxGridNum, (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS); -} - -template -__device__ scalar_t deformable_im2col_bilinear(const scalar_t *bottom_data, const int data_width, - const int height, const int width, scalar_t h, scalar_t w) -{ - - int h_low = floor(h); - int w_low = floor(w); - int h_high = h_low + 1; - int w_high = w_low + 1; - - scalar_t lh = h - h_low; - scalar_t lw = w - w_low; - scalar_t hh = 1 - lh, hw = 1 - lw; - - scalar_t v1 = 0; - if (h_low >= 0 && w_low >= 0) - v1 = bottom_data[h_low * data_width + w_low]; - scalar_t v2 = 0; - if (h_low >= 0 && w_high <= width - 1) - v2 = bottom_data[h_low * data_width + w_high]; - scalar_t v3 = 0; - if (h_high <= height - 1 && w_low >= 0) - v3 = bottom_data[h_high * data_width + w_low]; - scalar_t v4 = 0; - if (h_high <= height - 1 && w_high <= width - 1) - v4 = bottom_data[h_high * data_width + w_high]; - - scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; - - scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - return val; -} - -template -__device__ scalar_t get_gradient_weight(scalar_t argmax_h, scalar_t argmax_w, - const int h, const int w, const int height, const int width) -{ - - if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) - { - //empty - return 0; - } - - int argmax_h_low = floor(argmax_h); - int argmax_w_low = floor(argmax_w); - int argmax_h_high = argmax_h_low + 1; - int argmax_w_high = argmax_w_low + 1; - - scalar_t weight = 0; - if (h == argmax_h_low && w == argmax_w_low) - weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); - if (h == argmax_h_low && w == argmax_w_high) - weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); - if (h == argmax_h_high && w == argmax_w_low) - weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); - if (h == argmax_h_high && w == argmax_w_high) - weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); - return weight; -} - -template -__device__ scalar_t get_coordinate_weight(scalar_t argmax_h, scalar_t argmax_w, - const int height, const int width, const scalar_t *im_data, - const int data_width, const int bp_dir) -{ - - if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) - { - //empty - return 0; - } - - int argmax_h_low = floor(argmax_h); - int argmax_w_low = floor(argmax_w); - int argmax_h_high = argmax_h_low + 1; - int argmax_w_high = argmax_w_low + 1; - - scalar_t weight = 0; - - if (bp_dir == 0) - { - if (argmax_h_low >= 0 && argmax_w_low >= 0) - weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low]; - if (argmax_h_low >= 0 && argmax_w_high <= width - 1) - weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high]; - if (argmax_h_high <= height - 1 && argmax_w_low >= 0) - weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low]; - if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) - weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high]; - } - else if (bp_dir == 1) - { - if (argmax_h_low >= 0 && argmax_w_low >= 0) - weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low]; - if (argmax_h_low >= 0 && argmax_w_high <= width - 1) - weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high]; - if (argmax_h_high <= height - 1 && argmax_w_low >= 0) - weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low]; - if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) - weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high]; - } - - return weight; -} - -template -__global__ void deformable_im2col_gpu_kernel(const int n, const scalar_t *data_im, const scalar_t *data_offset, - const int height, const int width, const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, const int channel_per_deformable_group, - const int batch_size, const int num_channels, const int deformable_group, - const int height_col, const int width_col, - scalar_t *data_col) -{ - CUDA_KERNEL_LOOP(index, n) - { - // index index of output matrix - const int w_col = index % width_col; - const int h_col = (index / width_col) % height_col; - const int b_col = (index / width_col / height_col) % batch_size; - const int c_im = (index / width_col / height_col) / batch_size; - const int c_col = c_im * kernel_h * kernel_w; - - // compute deformable group index - const int deformable_group_index = c_im / channel_per_deformable_group; - - const int h_in = h_col * stride_h - pad_h; - const int w_in = w_col * stride_w - pad_w; - scalar_t *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col; - //const scalar_t* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in; - const scalar_t *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width; - const scalar_t *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; - - for (int i = 0; i < kernel_h; ++i) - { - for (int j = 0; j < kernel_w; ++j) - { - const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col; - const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col; - const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; - const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; - scalar_t val = static_cast(0); - const scalar_t h_im = h_in + i * dilation_h + offset_h; - const scalar_t w_im = w_in + j * dilation_w + offset_w; - if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) - { - //const scalar_t map_h = i * dilation_h + offset_h; - //const scalar_t map_w = j * dilation_w + offset_w; - //const int cur_height = height - h_in; - //const int cur_width = width - w_in; - //val = deformable_im2col_bilinear(data_im_ptr, width, cur_height, cur_width, map_h, map_w); - val = deformable_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im); - } - *data_col_ptr = val; - data_col_ptr += batch_size * height_col * width_col; - } - } - } -} - -void deformable_im2col( - const at::Tensor data_im, const at::Tensor data_offset, const int channels, - const int height, const int width, const int ksize_h, const int ksize_w, - const int pad_h, const int pad_w, const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, const int parallel_imgs, - const int deformable_group, at::Tensor data_col) -{ - // num_axes should be smaller than block size - // todo: check parallel_imgs is correctly passed in - int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; - int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; - int num_kernels = channels * height_col * width_col * parallel_imgs; - int channel_per_deformable_group = channels / deformable_group; - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - data_im.scalar_type(), "deformable_im2col_gpu", ([&] { - const scalar_t *data_im_ = data_im.data_ptr(); - const scalar_t *data_offset_ = data_offset.data_ptr(); - scalar_t *data_col_ = data_col.data_ptr(); - - deformable_im2col_gpu_kernel<<>>( - num_kernels, data_im_, data_offset_, height, width, ksize_h, ksize_w, - pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, - channel_per_deformable_group, parallel_imgs, channels, deformable_group, - height_col, width_col, data_col_); - })); - - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) - { - printf("error in deformable_im2col: %s\n", cudaGetErrorString(err)); - } -} - -template -__global__ void deformable_col2im_gpu_kernel( - const int n, const scalar_t *data_col, const scalar_t *data_offset, - const int channels, const int height, const int width, - const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, - const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, - const int channel_per_deformable_group, - const int batch_size, const int deformable_group, - const int height_col, const int width_col, - scalar_t *grad_im) -{ - CUDA_KERNEL_LOOP(index, n) - { - const int j = (index / width_col / height_col / batch_size) % kernel_w; - const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h; - const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h; - // compute the start and end of the output - - const int deformable_group_index = c / channel_per_deformable_group; - - int w_out = index % width_col; - int h_out = (index / width_col) % height_col; - int b = (index / width_col / height_col) % batch_size; - int w_in = w_out * stride_w - pad_w; - int h_in = h_out * stride_h - pad_h; - - const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * - 2 * kernel_h * kernel_w * height_col * width_col; - const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; - const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; - const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; - const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; - const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h; - const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w; - - const scalar_t cur_top_grad = data_col[index]; - const int cur_h = (int)cur_inv_h_data; - const int cur_w = (int)cur_inv_w_data; - for (int dy = -2; dy <= 2; dy++) - { - for (int dx = -2; dx <= 2; dx++) - { - if (cur_h + dy >= 0 && cur_h + dy < height && - cur_w + dx >= 0 && cur_w + dx < width && - abs(cur_inv_h_data - (cur_h + dy)) < 1 && - abs(cur_inv_w_data - (cur_w + dx)) < 1) - { - int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; - scalar_t weight = get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width); - atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad); - } - } - } - } -} - -void deformable_col2im( - const at::Tensor data_col, const at::Tensor data_offset, const int channels, - const int height, const int width, const int ksize_h, - const int ksize_w, const int pad_h, const int pad_w, - const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, - const int parallel_imgs, const int deformable_group, - at::Tensor grad_im) -{ - - // todo: make sure parallel_imgs is passed in correctly - int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; - int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; - int num_kernels = channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs; - int channel_per_deformable_group = channels / deformable_group; - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - data_col.scalar_type(), "deformable_col2im_gpu", ([&] { - const scalar_t *data_col_ = data_col.data_ptr(); - const scalar_t *data_offset_ = data_offset.data_ptr(); - scalar_t *grad_im_ = grad_im.data_ptr(); - - deformable_col2im_gpu_kernel<<>>( - num_kernels, data_col_, data_offset_, channels, height, width, ksize_h, - ksize_w, pad_h, pad_w, stride_h, stride_w, - dilation_h, dilation_w, channel_per_deformable_group, - parallel_imgs, deformable_group, height_col, width_col, grad_im_); - })); - - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) - { - printf("error in deformable_col2im: %s\n", cudaGetErrorString(err)); - } -} - -template -__global__ void deformable_col2im_coord_gpu_kernel(const int n, const scalar_t *data_col, - const scalar_t *data_im, const scalar_t *data_offset, - const int channels, const int height, const int width, - const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, - const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, - const int channel_per_deformable_group, - const int batch_size, const int offset_channels, const int deformable_group, - const int height_col, const int width_col, scalar_t *grad_offset) -{ - CUDA_KERNEL_LOOP(index, n) - { - scalar_t val = 0; - int w = index % width_col; - int h = (index / width_col) % height_col; - int c = (index / width_col / height_col) % offset_channels; - int b = (index / width_col / height_col) / offset_channels; - // compute the start and end of the output - - const int deformable_group_index = c / (2 * kernel_h * kernel_w); - const int col_step = kernel_h * kernel_w; - int cnt = 0; - const scalar_t *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group * - batch_size * width_col * height_col; - const scalar_t *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) * - channel_per_deformable_group / kernel_h / kernel_w * height * width; - const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * - kernel_h * kernel_w * height_col * width_col; - - const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; - - for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step) - { - const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w; - const int bp_dir = offset_c % 2; - - int j = (col_pos / width_col / height_col / batch_size) % kernel_w; - int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h; - int w_out = col_pos % width_col; - int h_out = (col_pos / width_col) % height_col; - int w_in = w_out * stride_w - pad_w; - int h_in = h_out * stride_h - pad_h; - const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out); - const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out); - const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; - const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; - scalar_t inv_h = h_in + i * dilation_h + offset_h; - scalar_t inv_w = w_in + j * dilation_w + offset_w; - if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) - { - inv_h = inv_w = -2; - } - const scalar_t weight = get_coordinate_weight( - inv_h, inv_w, - height, width, data_im_ptr + cnt * height * width, width, bp_dir); - val += weight * data_col_ptr[col_pos]; - cnt += 1; - } - - grad_offset[index] = val; - } -} - -void deformable_col2im_coord( - const at::Tensor data_col, const at::Tensor data_im, const at::Tensor data_offset, - const int channels, const int height, const int width, const int ksize_h, - const int ksize_w, const int pad_h, const int pad_w, const int stride_h, - const int stride_w, const int dilation_h, const int dilation_w, - const int parallel_imgs, const int deformable_group, at::Tensor grad_offset) -{ - - int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; - int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; - int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w * deformable_group * parallel_imgs; - int channel_per_deformable_group = channels * ksize_h * ksize_w / deformable_group; - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - data_col.scalar_type(), "deformable_col2im_coord_gpu", ([&] { - const scalar_t *data_col_ = data_col.data_ptr(); - const scalar_t *data_im_ = data_im.data_ptr(); - const scalar_t *data_offset_ = data_offset.data_ptr(); - scalar_t *grad_offset_ = grad_offset.data_ptr(); - - deformable_col2im_coord_gpu_kernel<<>>( - num_kernels, data_col_, data_im_, data_offset_, channels, height, width, - ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, - dilation_h, dilation_w, channel_per_deformable_group, - parallel_imgs, 2 * ksize_h * ksize_w * deformable_group, deformable_group, - height_col, width_col, grad_offset_); - })); -} - -template -__device__ scalar_t dmcn_im2col_bilinear(const scalar_t *bottom_data, const int data_width, - const int height, const int width, scalar_t h, scalar_t w) -{ - int h_low = floor(h); - int w_low = floor(w); - int h_high = h_low + 1; - int w_high = w_low + 1; - - scalar_t lh = h - h_low; - scalar_t lw = w - w_low; - scalar_t hh = 1 - lh, hw = 1 - lw; - - scalar_t v1 = 0; - if (h_low >= 0 && w_low >= 0) - v1 = bottom_data[h_low * data_width + w_low]; - scalar_t v2 = 0; - if (h_low >= 0 && w_high <= width - 1) - v2 = bottom_data[h_low * data_width + w_high]; - scalar_t v3 = 0; - if (h_high <= height - 1 && w_low >= 0) - v3 = bottom_data[h_high * data_width + w_low]; - scalar_t v4 = 0; - if (h_high <= height - 1 && w_high <= width - 1) - v4 = bottom_data[h_high * data_width + w_high]; - - scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; - - scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - return val; -} - -template -__device__ scalar_t dmcn_get_gradient_weight(scalar_t argmax_h, scalar_t argmax_w, - const int h, const int w, const int height, const int width) -{ - if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) - { - //empty - return 0; - } - - int argmax_h_low = floor(argmax_h); - int argmax_w_low = floor(argmax_w); - int argmax_h_high = argmax_h_low + 1; - int argmax_w_high = argmax_w_low + 1; - - scalar_t weight = 0; - if (h == argmax_h_low && w == argmax_w_low) - weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); - if (h == argmax_h_low && w == argmax_w_high) - weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); - if (h == argmax_h_high && w == argmax_w_low) - weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); - if (h == argmax_h_high && w == argmax_w_high) - weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); - return weight; -} - -template -__device__ scalar_t dmcn_get_coordinate_weight(scalar_t argmax_h, scalar_t argmax_w, - const int height, const int width, const scalar_t *im_data, - const int data_width, const int bp_dir) -{ - if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) - { - //empty - return 0; - } - - int argmax_h_low = floor(argmax_h); - int argmax_w_low = floor(argmax_w); - int argmax_h_high = argmax_h_low + 1; - int argmax_w_high = argmax_w_low + 1; - - scalar_t weight = 0; - - if (bp_dir == 0) - { - if (argmax_h_low >= 0 && argmax_w_low >= 0) - weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low]; - if (argmax_h_low >= 0 && argmax_w_high <= width - 1) - weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high]; - if (argmax_h_high <= height - 1 && argmax_w_low >= 0) - weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low]; - if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) - weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high]; - } - else if (bp_dir == 1) - { - if (argmax_h_low >= 0 && argmax_w_low >= 0) - weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low]; - if (argmax_h_low >= 0 && argmax_w_high <= width - 1) - weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high]; - if (argmax_h_high <= height - 1 && argmax_w_low >= 0) - weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low]; - if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) - weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high]; - } - - return weight; -} - -template -__global__ void modulated_deformable_im2col_gpu_kernel(const int n, - const scalar_t *data_im, const scalar_t *data_offset, const scalar_t *data_mask, - const int height, const int width, const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, - const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, - const int channel_per_deformable_group, - const int batch_size, const int num_channels, const int deformable_group, - const int height_col, const int width_col, - scalar_t *data_col) -{ - CUDA_KERNEL_LOOP(index, n) - { - // index index of output matrix - const int w_col = index % width_col; - const int h_col = (index / width_col) % height_col; - const int b_col = (index / width_col / height_col) % batch_size; - const int c_im = (index / width_col / height_col) / batch_size; - const int c_col = c_im * kernel_h * kernel_w; - - // compute deformable group index - const int deformable_group_index = c_im / channel_per_deformable_group; - - const int h_in = h_col * stride_h - pad_h; - const int w_in = w_col * stride_w - pad_w; - - scalar_t *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col; - //const float* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in; - const scalar_t *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width; - const scalar_t *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; - - const scalar_t *data_mask_ptr = data_mask + (b_col * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col; - - for (int i = 0; i < kernel_h; ++i) - { - for (int j = 0; j < kernel_w; ++j) - { - const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col; - const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col; - const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_col) * width_col + w_col; - const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; - const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; - const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; - scalar_t val = static_cast(0); - const scalar_t h_im = h_in + i * dilation_h + offset_h; - const scalar_t w_im = w_in + j * dilation_w + offset_w; - //if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) { - if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) - { - //const float map_h = i * dilation_h + offset_h; - //const float map_w = j * dilation_w + offset_w; - //const int cur_height = height - h_in; - //const int cur_width = width - w_in; - //val = dmcn_im2col_bilinear(data_im_ptr, width, cur_height, cur_width, map_h, map_w); - val = dmcn_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im); - } - *data_col_ptr = val * mask; - data_col_ptr += batch_size * height_col * width_col; - //data_col_ptr += height_col * width_col; - } - } - } -} - -template -__global__ void modulated_deformable_col2im_gpu_kernel(const int n, - const scalar_t *data_col, const scalar_t *data_offset, const scalar_t *data_mask, - const int channels, const int height, const int width, - const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, - const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, - const int channel_per_deformable_group, - const int batch_size, const int deformable_group, - const int height_col, const int width_col, - scalar_t *grad_im) -{ - CUDA_KERNEL_LOOP(index, n) - { - const int j = (index / width_col / height_col / batch_size) % kernel_w; - const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h; - const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h; - // compute the start and end of the output - - const int deformable_group_index = c / channel_per_deformable_group; - - int w_out = index % width_col; - int h_out = (index / width_col) % height_col; - int b = (index / width_col / height_col) % batch_size; - int w_in = w_out * stride_w - pad_w; - int h_in = h_out * stride_h - pad_h; - - const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; - const scalar_t *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col; - const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; - const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; - const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_out) * width_col + w_out; - const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; - const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; - const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; - const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h; - const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w; - - const scalar_t cur_top_grad = data_col[index] * mask; - const int cur_h = (int)cur_inv_h_data; - const int cur_w = (int)cur_inv_w_data; - for (int dy = -2; dy <= 2; dy++) - { - for (int dx = -2; dx <= 2; dx++) - { - if (cur_h + dy >= 0 && cur_h + dy < height && - cur_w + dx >= 0 && cur_w + dx < width && - abs(cur_inv_h_data - (cur_h + dy)) < 1 && - abs(cur_inv_w_data - (cur_w + dx)) < 1) - { - int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; - scalar_t weight = dmcn_get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width); - atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad); - } - } - } - } -} - -template -__global__ void modulated_deformable_col2im_coord_gpu_kernel(const int n, - const scalar_t *data_col, const scalar_t *data_im, - const scalar_t *data_offset, const scalar_t *data_mask, - const int channels, const int height, const int width, - const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, - const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, - const int channel_per_deformable_group, - const int batch_size, const int offset_channels, const int deformable_group, - const int height_col, const int width_col, - scalar_t *grad_offset, scalar_t *grad_mask) -{ - CUDA_KERNEL_LOOP(index, n) - { - scalar_t val = 0, mval = 0; - int w = index % width_col; - int h = (index / width_col) % height_col; - int c = (index / width_col / height_col) % offset_channels; - int b = (index / width_col / height_col) / offset_channels; - // compute the start and end of the output - - const int deformable_group_index = c / (2 * kernel_h * kernel_w); - const int col_step = kernel_h * kernel_w; - int cnt = 0; - const scalar_t *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group * batch_size * width_col * height_col; - const scalar_t *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) * channel_per_deformable_group / kernel_h / kernel_w * height * width; - const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; - const scalar_t *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col; - - const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; - - for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step) - { - const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w; - const int bp_dir = offset_c % 2; - - int j = (col_pos / width_col / height_col / batch_size) % kernel_w; - int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h; - int w_out = col_pos % width_col; - int h_out = (col_pos / width_col) % height_col; - int w_in = w_out * stride_w - pad_w; - int h_in = h_out * stride_h - pad_h; - const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out); - const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out); - const int data_mask_hw_ptr = (((i * kernel_w + j) * height_col + h_out) * width_col + w_out); - const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; - const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; - const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; - scalar_t inv_h = h_in + i * dilation_h + offset_h; - scalar_t inv_w = w_in + j * dilation_w + offset_w; - if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) - { - inv_h = inv_w = -2; - } - else - { - mval += data_col_ptr[col_pos] * dmcn_im2col_bilinear(data_im_ptr + cnt * height * width, width, height, width, inv_h, inv_w); - } - const scalar_t weight = dmcn_get_coordinate_weight( - inv_h, inv_w, - height, width, data_im_ptr + cnt * height * width, width, bp_dir); - val += weight * data_col_ptr[col_pos] * mask; - cnt += 1; - } - // KERNEL_ASSIGN(grad_offset[index], offset_req, val); - grad_offset[index] = val; - if (offset_c % 2 == 0) - // KERNEL_ASSIGN(grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w], mask_req, mval); - grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w] = mval; - } -} - -void modulated_deformable_im2col_cuda( - const at::Tensor data_im, const at::Tensor data_offset, const at::Tensor data_mask, - const int batch_size, const int channels, const int height_im, const int width_im, - const int height_col, const int width_col, const int kernel_h, const int kenerl_w, - const int pad_h, const int pad_w, const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, - const int deformable_group, at::Tensor data_col) -{ - // num_axes should be smaller than block size - const int channel_per_deformable_group = channels / deformable_group; - const int num_kernels = channels * batch_size * height_col * width_col; - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - data_im.scalar_type(), "modulated_deformable_im2col_gpu", ([&] { - const scalar_t *data_im_ = data_im.data_ptr(); - const scalar_t *data_offset_ = data_offset.data_ptr(); - const scalar_t *data_mask_ = data_mask.data_ptr(); - scalar_t *data_col_ = data_col.data_ptr(); - - modulated_deformable_im2col_gpu_kernel<<>>( - num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im, kernel_h, kenerl_w, - pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, - batch_size, channels, deformable_group, height_col, width_col, data_col_); - })); - - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) - { - printf("error in modulated_deformable_im2col_cuda: %s\n", cudaGetErrorString(err)); - } -} - -void modulated_deformable_col2im_cuda( - const at::Tensor data_col, const at::Tensor data_offset, const at::Tensor data_mask, - const int batch_size, const int channels, const int height_im, const int width_im, - const int height_col, const int width_col, const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, - const int deformable_group, at::Tensor grad_im) -{ - - const int channel_per_deformable_group = channels / deformable_group; - const int num_kernels = channels * kernel_h * kernel_w * batch_size * height_col * width_col; - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - data_col.scalar_type(), "modulated_deformable_col2im_gpu", ([&] { - const scalar_t *data_col_ = data_col.data_ptr(); - const scalar_t *data_offset_ = data_offset.data_ptr(); - const scalar_t *data_mask_ = data_mask.data_ptr(); - scalar_t *grad_im_ = grad_im.data_ptr(); - - modulated_deformable_col2im_gpu_kernel<<>>( - num_kernels, data_col_, data_offset_, data_mask_, channels, height_im, width_im, - kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, - dilation_h, dilation_w, channel_per_deformable_group, - batch_size, deformable_group, height_col, width_col, grad_im_); - })); - - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) - { - printf("error in modulated_deformable_col2im_cuda: %s\n", cudaGetErrorString(err)); - } -} - -void modulated_deformable_col2im_coord_cuda( - const at::Tensor data_col, const at::Tensor data_im, const at::Tensor data_offset, const at::Tensor data_mask, - const int batch_size, const int channels, const int height_im, const int width_im, - const int height_col, const int width_col, const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, - const int deformable_group, - at::Tensor grad_offset, at::Tensor grad_mask) -{ - const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * kernel_w * deformable_group; - const int channel_per_deformable_group = channels * kernel_h * kernel_w / deformable_group; - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - data_col.scalar_type(), "modulated_deformable_col2im_coord_gpu", ([&] { - const scalar_t *data_col_ = data_col.data_ptr(); - const scalar_t *data_im_ = data_im.data_ptr(); - const scalar_t *data_offset_ = data_offset.data_ptr(); - const scalar_t *data_mask_ = data_mask.data_ptr(); - scalar_t *grad_offset_ = grad_offset.data_ptr(); - scalar_t *grad_mask_ = grad_mask.data_ptr(); - - modulated_deformable_col2im_coord_gpu_kernel<<>>( - num_kernels, data_col_, data_im_, data_offset_, data_mask_, channels, height_im, width_im, - kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, - dilation_h, dilation_w, channel_per_deformable_group, - batch_size, 2 * kernel_h * kernel_w * deformable_group, deformable_group, height_col, width_col, - grad_offset_, grad_mask_); - })); - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) - { - printf("error in modulated_deformable_col2im_coord_cuda: %s\n", cudaGetErrorString(err)); - } -} diff --git a/mmdet/ops/dcn/src/cuda/deform_pool_cuda.cpp b/mmdet/ops/dcn/src/cuda/deform_pool_cuda.cpp deleted file mode 100644 index 3c09f998029..00000000000 --- a/mmdet/ops/dcn/src/cuda/deform_pool_cuda.cpp +++ /dev/null @@ -1,82 +0,0 @@ -// modify from -// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c - -// based on -// author: Charles Shang -// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu - -#include -#include - -#include -#include - -void DeformablePSROIPoolForward( - const at::Tensor data, const at::Tensor bbox, const at::Tensor trans, - at::Tensor out, at::Tensor top_count, const int batch, const int channels, - const int height, const int width, const int num_bbox, - const int channels_trans, const int no_trans, const float spatial_scale, - const int output_dim, const int group_size, const int pooled_size, - const int part_size, const int sample_per_part, const float trans_std); - -void DeformablePSROIPoolBackwardAcc( - const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox, - const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad, - at::Tensor trans_grad, const int batch, const int channels, - const int height, const int width, const int num_bbox, - const int channels_trans, const int no_trans, const float spatial_scale, - const int output_dim, const int group_size, const int pooled_size, - const int part_size, const int sample_per_part, const float trans_std); - -void deform_psroi_pooling_cuda_forward( - at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out, - at::Tensor top_count, const int no_trans, const float spatial_scale, - const int output_dim, const int group_size, const int pooled_size, - const int part_size, const int sample_per_part, const float trans_std) { - TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); - at::DeviceGuard guard(input.device()); - - const int batch = input.size(0); - const int channels = input.size(1); - const int height = input.size(2); - const int width = input.size(3); - const int channels_trans = no_trans ? 2 : trans.size(1); - - const int num_bbox = bbox.size(0); - if (num_bbox != out.size(0)) - AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", - out.size(0), num_bbox); - - DeformablePSROIPoolForward( - input, bbox, trans, out, top_count, batch, channels, height, width, - num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size, - pooled_size, part_size, sample_per_part, trans_std); -} - -void deform_psroi_pooling_cuda_backward( - at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans, - at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad, - const int no_trans, const float spatial_scale, const int output_dim, - const int group_size, const int pooled_size, const int part_size, - const int sample_per_part, const float trans_std) { - TORCH_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); - TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); - at::DeviceGuard guard(input.device()); - - const int batch = input.size(0); - const int channels = input.size(1); - const int height = input.size(2); - const int width = input.size(3); - const int channels_trans = no_trans ? 2 : trans.size(1); - - const int num_bbox = bbox.size(0); - if (num_bbox != out_grad.size(0)) - AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", - out_grad.size(0), num_bbox); - - DeformablePSROIPoolBackwardAcc( - out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch, - channels, height, width, num_bbox, channels_trans, no_trans, - spatial_scale, output_dim, group_size, pooled_size, part_size, - sample_per_part, trans_std); -} diff --git a/mmdet/ops/dcn/src/cuda/deform_pool_cuda_kernel.cu b/mmdet/ops/dcn/src/cuda/deform_pool_cuda_kernel.cu deleted file mode 100644 index 18e3a048d3f..00000000000 --- a/mmdet/ops/dcn/src/cuda/deform_pool_cuda_kernel.cu +++ /dev/null @@ -1,364 +0,0 @@ -/*! - * Copyright (c) 2017 Microsoft - * Licensed under The MIT License [see LICENSE for details] - * \file deformable_psroi_pooling.cu - * \brief - * \author Yi Li, Guodong Zhang, Jifeng Dai -*/ -/***************** Adapted by Charles Shang *********************/ -// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/cuda/deform_psroi_pooling_cuda.cu - -#include -#include -#include -#include -#include - -using namespace at; - -#define CUDA_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ - i < (n); \ - i += blockDim.x * gridDim.x) - -const int CUDA_NUM_THREADS = 1024; -inline int GET_BLOCKS(const int N) -{ - return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS; -} - -template -__device__ scalar_t bilinear_interp( - const scalar_t *data, - const scalar_t x, - const scalar_t y, - const int width, - const int height) -{ - int x1 = floor(x); - int x2 = ceil(x); - int y1 = floor(y); - int y2 = ceil(y); - scalar_t dist_x = (scalar_t)(x - x1); - scalar_t dist_y = (scalar_t)(y - y1); - scalar_t value11 = data[y1 * width + x1]; - scalar_t value12 = data[y2 * width + x1]; - scalar_t value21 = data[y1 * width + x2]; - scalar_t value22 = data[y2 * width + x2]; - scalar_t value = (1 - dist_x) * (1 - dist_y) * value11 + (1 - dist_x) * dist_y * value12 + dist_x * (1 - dist_y) * value21 + dist_x * dist_y * value22; - return value; -} - -template -__global__ void DeformablePSROIPoolForwardKernel( - const int count, - const scalar_t *bottom_data, - const scalar_t spatial_scale, - const int channels, - const int height, const int width, - const int pooled_height, const int pooled_width, - const scalar_t *bottom_rois, const scalar_t *bottom_trans, - const int no_trans, - const scalar_t trans_std, - const int sample_per_part, - const int output_dim, - const int group_size, - const int part_size, - const int num_classes, - const int channels_each_class, - scalar_t *top_data, - scalar_t *top_count) -{ - CUDA_KERNEL_LOOP(index, count) - { - // The output is in order (n, ctop, ph, pw) - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int ctop = (index / pooled_width / pooled_height) % output_dim; - int n = index / pooled_width / pooled_height / output_dim; - - // [start, end) interval for spatial sampling - const scalar_t *offset_bottom_rois = bottom_rois + n * 5; - int roi_batch_ind = offset_bottom_rois[0]; - scalar_t roi_start_w = (scalar_t)(round(offset_bottom_rois[1])) * spatial_scale - 0.5; - scalar_t roi_start_h = (scalar_t)(round(offset_bottom_rois[2])) * spatial_scale - 0.5; - scalar_t roi_end_w = (scalar_t)(round(offset_bottom_rois[3]) + 1.) * spatial_scale - 0.5; - scalar_t roi_end_h = (scalar_t)(round(offset_bottom_rois[4]) + 1.) * spatial_scale - 0.5; - - // Force too small ROIs to be 1x1 - scalar_t roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0 - scalar_t roi_height = max(roi_end_h - roi_start_h, 0.1); - - // Compute w and h at bottom - scalar_t bin_size_h = roi_height / (scalar_t)(pooled_height); - scalar_t bin_size_w = roi_width / (scalar_t)(pooled_width); - - scalar_t sub_bin_size_h = bin_size_h / (scalar_t)(sample_per_part); - scalar_t sub_bin_size_w = bin_size_w / (scalar_t)(sample_per_part); - - int part_h = floor((scalar_t)(ph) / pooled_height * part_size); - int part_w = floor((scalar_t)(pw) / pooled_width * part_size); - int class_id = ctop / channels_each_class; - scalar_t trans_x = no_trans ? (scalar_t)(0) : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w] * (scalar_t)trans_std; - scalar_t trans_y = no_trans ? (scalar_t)(0) : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w] * (scalar_t)trans_std; - - scalar_t wstart = (scalar_t)(pw)*bin_size_w + roi_start_w; - wstart += trans_x * roi_width; - scalar_t hstart = (scalar_t)(ph)*bin_size_h + roi_start_h; - hstart += trans_y * roi_height; - - scalar_t sum = 0; - int count = 0; - int gw = floor((scalar_t)(pw)*group_size / pooled_width); - int gh = floor((scalar_t)(ph)*group_size / pooled_height); - gw = min(max(gw, 0), group_size - 1); - gh = min(max(gh, 0), group_size - 1); - - const scalar_t *offset_bottom_data = bottom_data + (roi_batch_ind * channels) * height * width; - for (int ih = 0; ih < sample_per_part; ih++) - { - for (int iw = 0; iw < sample_per_part; iw++) - { - scalar_t w = wstart + iw * sub_bin_size_w; - scalar_t h = hstart + ih * sub_bin_size_h; - // bilinear interpolation - if (w < -0.5 || w > width - 0.5 || h < -0.5 || h > height - 0.5) - { - continue; - } - w = min(max(w, 0.), width - 1.); - h = min(max(h, 0.), height - 1.); - int c = (ctop * group_size + gh) * group_size + gw; - scalar_t val = bilinear_interp(offset_bottom_data + c * height * width, w, h, width, height); - sum += val; - count++; - } - } - top_data[index] = count == 0 ? (scalar_t)(0) : sum / count; - top_count[index] = count; - } -} - -template -__global__ void DeformablePSROIPoolBackwardAccKernel( - const int count, - const scalar_t *top_diff, - const scalar_t *top_count, - const int num_rois, - const scalar_t spatial_scale, - const int channels, - const int height, const int width, - const int pooled_height, const int pooled_width, - const int output_dim, - scalar_t *bottom_data_diff, scalar_t *bottom_trans_diff, - const scalar_t *bottom_data, - const scalar_t *bottom_rois, - const scalar_t *bottom_trans, - const int no_trans, - const scalar_t trans_std, - const int sample_per_part, - const int group_size, - const int part_size, - const int num_classes, - const int channels_each_class) -{ - CUDA_KERNEL_LOOP(index, count) - { - // The output is in order (n, ctop, ph, pw) - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int ctop = (index / pooled_width / pooled_height) % output_dim; - int n = index / pooled_width / pooled_height / output_dim; - - // [start, end) interval for spatial sampling - const scalar_t *offset_bottom_rois = bottom_rois + n * 5; - int roi_batch_ind = offset_bottom_rois[0]; - scalar_t roi_start_w = (scalar_t)(round(offset_bottom_rois[1])) * spatial_scale - 0.5; - scalar_t roi_start_h = (scalar_t)(round(offset_bottom_rois[2])) * spatial_scale - 0.5; - scalar_t roi_end_w = (scalar_t)(round(offset_bottom_rois[3]) + 1.) * spatial_scale - 0.5; - scalar_t roi_end_h = (scalar_t)(round(offset_bottom_rois[4]) + 1.) * spatial_scale - 0.5; - - // Force too small ROIs to be 1x1 - scalar_t roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0 - scalar_t roi_height = max(roi_end_h - roi_start_h, 0.1); - - // Compute w and h at bottom - scalar_t bin_size_h = roi_height / (scalar_t)(pooled_height); - scalar_t bin_size_w = roi_width / (scalar_t)(pooled_width); - - scalar_t sub_bin_size_h = bin_size_h / (scalar_t)(sample_per_part); - scalar_t sub_bin_size_w = bin_size_w / (scalar_t)(sample_per_part); - - int part_h = floor((scalar_t)(ph) / pooled_height * part_size); - int part_w = floor((scalar_t)(pw) / pooled_width * part_size); - int class_id = ctop / channels_each_class; - scalar_t trans_x = no_trans ? (scalar_t)(0) : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w] * (scalar_t)trans_std; - scalar_t trans_y = no_trans ? (scalar_t)(0) : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w] * (scalar_t)trans_std; - - scalar_t wstart = (scalar_t)(pw)*bin_size_w + roi_start_w; - wstart += trans_x * roi_width; - scalar_t hstart = (scalar_t)(ph)*bin_size_h + roi_start_h; - hstart += trans_y * roi_height; - - if (top_count[index] <= 0) - { - continue; - } - scalar_t diff_val = top_diff[index] / top_count[index]; - const scalar_t *offset_bottom_data = bottom_data + roi_batch_ind * channels * height * width; - scalar_t *offset_bottom_data_diff = bottom_data_diff + roi_batch_ind * channels * height * width; - int gw = floor((scalar_t)(pw)*group_size / pooled_width); - int gh = floor((scalar_t)(ph)*group_size / pooled_height); - gw = min(max(gw, 0), group_size - 1); - gh = min(max(gh, 0), group_size - 1); - - for (int ih = 0; ih < sample_per_part; ih++) - { - for (int iw = 0; iw < sample_per_part; iw++) - { - scalar_t w = wstart + iw * sub_bin_size_w; - scalar_t h = hstart + ih * sub_bin_size_h; - // bilinear interpolation - if (w < -0.5 || w > width - 0.5 || h < -0.5 || h > height - 0.5) - { - continue; - } - w = min(max(w, 0.), width - 1.); - h = min(max(h, 0.), height - 1.); - int c = (ctop * group_size + gh) * group_size + gw; - // backward on feature - int x0 = floor(w); - int x1 = ceil(w); - int y0 = floor(h); - int y1 = ceil(h); - scalar_t dist_x = w - x0, dist_y = h - y0; - scalar_t q00 = (1 - dist_x) * (1 - dist_y); - scalar_t q01 = (1 - dist_x) * dist_y; - scalar_t q10 = dist_x * (1 - dist_y); - scalar_t q11 = dist_x * dist_y; - int bottom_index_base = c * height * width; - atomicAdd(offset_bottom_data_diff + bottom_index_base + y0 * width + x0, q00 * diff_val); - atomicAdd(offset_bottom_data_diff + bottom_index_base + y1 * width + x0, q01 * diff_val); - atomicAdd(offset_bottom_data_diff + bottom_index_base + y0 * width + x1, q10 * diff_val); - atomicAdd(offset_bottom_data_diff + bottom_index_base + y1 * width + x1, q11 * diff_val); - - if (no_trans) - { - continue; - } - scalar_t U00 = offset_bottom_data[bottom_index_base + y0 * width + x0]; - scalar_t U01 = offset_bottom_data[bottom_index_base + y1 * width + x0]; - scalar_t U10 = offset_bottom_data[bottom_index_base + y0 * width + x1]; - scalar_t U11 = offset_bottom_data[bottom_index_base + y1 * width + x1]; - scalar_t diff_x = (U11 * dist_y + U10 * (1 - dist_y) - U01 * dist_y - U00 * (1 - dist_y)) * trans_std * diff_val; - diff_x *= roi_width; - scalar_t diff_y = (U11 * dist_x + U01 * (1 - dist_x) - U10 * dist_x - U00 * (1 - dist_x)) * trans_std * diff_val; - diff_y *= roi_height; - - atomicAdd(bottom_trans_diff + (((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w, diff_x); - atomicAdd(bottom_trans_diff + (((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w, diff_y); - } - } - } -} - -void DeformablePSROIPoolForward(const at::Tensor data, - const at::Tensor bbox, - const at::Tensor trans, - at::Tensor out, - at::Tensor top_count, - const int batch, - const int channels, - const int height, - const int width, - const int num_bbox, - const int channels_trans, - const int no_trans, - const float spatial_scale, - const int output_dim, - const int group_size, - const int pooled_size, - const int part_size, - const int sample_per_part, - const float trans_std) -{ - const int pooled_height = pooled_size; - const int pooled_width = pooled_size; - const int count = num_bbox * output_dim * pooled_height * pooled_width; - const int num_classes = no_trans ? 1 : channels_trans / 2; - const int channels_each_class = no_trans ? output_dim : output_dim / num_classes; - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - data.scalar_type(), "deformable_psroi_pool_forward", ([&] { - const scalar_t *bottom_data = data.data_ptr(); - const scalar_t *bottom_rois = bbox.data_ptr(); - const scalar_t *bottom_trans = no_trans ? NULL : trans.data_ptr(); - scalar_t *top_data = out.data_ptr(); - scalar_t *top_count_data = top_count.data_ptr(); - - DeformablePSROIPoolForwardKernel<<>>( - count, bottom_data, (scalar_t)spatial_scale, channels, height, width, pooled_height, pooled_width, - bottom_rois, bottom_trans, no_trans, (scalar_t)trans_std, sample_per_part, output_dim, - group_size, part_size, num_classes, channels_each_class, top_data, top_count_data); - })); - - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) - { - printf("error in DeformablePSROIPoolForward: %s\n", cudaGetErrorString(err)); - } -} - -void DeformablePSROIPoolBackwardAcc(const at::Tensor out_grad, - const at::Tensor data, - const at::Tensor bbox, - const at::Tensor trans, - const at::Tensor top_count, - at::Tensor in_grad, - at::Tensor trans_grad, - const int batch, - const int channels, - const int height, - const int width, - const int num_bbox, - const int channels_trans, - const int no_trans, - const float spatial_scale, - const int output_dim, - const int group_size, - const int pooled_size, - const int part_size, - const int sample_per_part, - const float trans_std) -{ - // LOG(INFO) << "DeformablePSROIPoolBackward"; - const int num_rois = num_bbox; - const int pooled_height = pooled_size; - const int pooled_width = pooled_size; - const int count = num_bbox * output_dim * pooled_height * pooled_width; - const int num_classes = no_trans ? 1 : channels_trans / 2; - const int channels_each_class = no_trans ? output_dim : output_dim / num_classes; - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - out_grad.scalar_type(), "deformable_psroi_pool_backward_acc", ([&] { - const scalar_t *top_diff = out_grad.data_ptr(); - const scalar_t *bottom_data = data.data_ptr(); - const scalar_t *bottom_rois = bbox.data_ptr(); - const scalar_t *bottom_trans = no_trans ? NULL : trans.data_ptr(); - scalar_t *bottom_data_diff = in_grad.data_ptr(); - scalar_t *bottom_trans_diff = no_trans ? NULL : trans_grad.data_ptr(); - const scalar_t *top_count_data = top_count.data_ptr(); - - DeformablePSROIPoolBackwardAccKernel<<>>( - count, top_diff, top_count_data, num_rois, (scalar_t)spatial_scale, channels, height, width, - pooled_height, pooled_width, output_dim, bottom_data_diff, bottom_trans_diff, - bottom_data, bottom_rois, bottom_trans, no_trans, (scalar_t)trans_std, sample_per_part, - group_size, part_size, num_classes, channels_each_class); - })); - - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) - { - printf("error in DeformablePSROIPoolForward: %s\n", cudaGetErrorString(err)); - } -} diff --git a/mmdet/ops/dcn/src/deform_conv_ext.cpp b/mmdet/ops/dcn/src/deform_conv_ext.cpp deleted file mode 100644 index fac60162b69..00000000000 --- a/mmdet/ops/dcn/src/deform_conv_ext.cpp +++ /dev/null @@ -1,163 +0,0 @@ -// modify from -// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c - -#include -#include - -#include -#include - -#ifdef WITH_CUDA -int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight, - at::Tensor offset, at::Tensor output, - at::Tensor columns, at::Tensor ones, int kW, - int kH, int dW, int dH, int padW, int padH, - int dilationW, int dilationH, int group, - int deformable_group, int im2col_step); - -int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset, - at::Tensor gradOutput, at::Tensor gradInput, - at::Tensor gradOffset, at::Tensor weight, - at::Tensor columns, int kW, int kH, int dW, - int dH, int padW, int padH, int dilationW, - int dilationH, int group, - int deformable_group, int im2col_step); - -int deform_conv_backward_parameters_cuda( - at::Tensor input, at::Tensor offset, at::Tensor gradOutput, - at::Tensor gradWeight, // at::Tensor gradBias, - at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH, - int padW, int padH, int dilationW, int dilationH, int group, - int deformable_group, float scale, int im2col_step); - -void modulated_deform_conv_cuda_forward( - at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, - at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns, - int kernel_h, int kernel_w, const int stride_h, const int stride_w, - const int pad_h, const int pad_w, const int dilation_h, - const int dilation_w, const int group, const int deformable_group, - const bool with_bias); - -void modulated_deform_conv_cuda_backward( - at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, - at::Tensor offset, at::Tensor mask, at::Tensor columns, - at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias, - at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output, - int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, - int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, - const bool with_bias); -#endif - -int deform_conv_forward(at::Tensor input, at::Tensor weight, - at::Tensor offset, at::Tensor output, - at::Tensor columns, at::Tensor ones, int kW, - int kH, int dW, int dH, int padW, int padH, - int dilationW, int dilationH, int group, - int deformable_group, int im2col_step) { - if (input.device().is_cuda()) { -#ifdef WITH_CUDA - return deform_conv_forward_cuda(input, weight, offset, output, columns, - ones, kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, - deformable_group, im2col_step); -#else - AT_ERROR("deform conv is not compiled with GPU support"); -#endif - } - AT_ERROR("deform conv is not implemented on CPU"); -} - -int deform_conv_backward_input(at::Tensor input, at::Tensor offset, - at::Tensor gradOutput, at::Tensor gradInput, - at::Tensor gradOffset, at::Tensor weight, - at::Tensor columns, int kW, int kH, int dW, - int dH, int padW, int padH, int dilationW, - int dilationH, int group, - int deformable_group, int im2col_step) { - if (input.device().is_cuda()) { -#ifdef WITH_CUDA - return deform_conv_backward_input_cuda(input, offset, gradOutput, - gradInput, gradOffset, weight, columns, kW, kH, dW, dH, padW, padH, - dilationW, dilationH, group, deformable_group, im2col_step); -#else - AT_ERROR("deform conv is not compiled with GPU support"); -#endif - } - AT_ERROR("deform conv is not implemented on CPU"); -} - -int deform_conv_backward_parameters( - at::Tensor input, at::Tensor offset, at::Tensor gradOutput, - at::Tensor gradWeight, // at::Tensor gradBias, - at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH, - int padW, int padH, int dilationW, int dilationH, int group, - int deformable_group, float scale, int im2col_step) { - if (input.device().is_cuda()) { -#ifdef WITH_CUDA - return deform_conv_backward_parameters_cuda(input, offset, gradOutput, - gradWeight, columns, ones, kW, kH, dW, dH, padW, padH, dilationW, - dilationH, group, deformable_group, scale, im2col_step); -#else - AT_ERROR("deform conv is not compiled with GPU support"); -#endif - } - AT_ERROR("deform conv is not implemented on CPU"); -} - -void modulated_deform_conv_forward( - at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, - at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns, - int kernel_h, int kernel_w, const int stride_h, const int stride_w, - const int pad_h, const int pad_w, const int dilation_h, - const int dilation_w, const int group, const int deformable_group, - const bool with_bias) { - if (input.device().is_cuda()) { -#ifdef WITH_CUDA - return modulated_deform_conv_cuda_forward(input, weight, bias, ones, - offset, mask, output, columns, kernel_h, kernel_w, stride_h, - stride_w, pad_h, pad_w, dilation_h, dilation_w, group, - deformable_group, with_bias); -#else - AT_ERROR("modulated deform conv is not compiled with GPU support"); -#endif - } - AT_ERROR("modulated deform conv is not implemented on CPU"); -} - -void modulated_deform_conv_backward( - at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, - at::Tensor offset, at::Tensor mask, at::Tensor columns, - at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias, - at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output, - int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, - int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, - const bool with_bias) { - if (input.device().is_cuda()) { -#ifdef WITH_CUDA - return modulated_deform_conv_cuda_backward(input, weight, bias, ones, - offset, mask, columns, grad_input, grad_weight, grad_bias, grad_offset, - grad_mask, grad_output, kernel_h, kernel_w, stride_h, stride_w, - pad_h, pad_w, dilation_h, dilation_w, group, deformable_group, - with_bias); -#else - AT_ERROR("modulated deform conv is not compiled with GPU support"); -#endif - } - AT_ERROR("modulated deform conv is not implemented on CPU"); -} - - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("deform_conv_forward", &deform_conv_forward, - "deform forward"); - m.def("deform_conv_backward_input", &deform_conv_backward_input, - "deform_conv_backward_input"); - m.def("deform_conv_backward_parameters", - &deform_conv_backward_parameters, - "deform_conv_backward_parameters"); - m.def("modulated_deform_conv_forward", - &modulated_deform_conv_forward, - "modulated deform conv forward"); - m.def("modulated_deform_conv_backward", - &modulated_deform_conv_backward, - "modulated deform conv backward"); -} diff --git a/mmdet/ops/dcn/src/deform_pool_ext.cpp b/mmdet/ops/dcn/src/deform_pool_ext.cpp deleted file mode 100644 index 877064828d5..00000000000 --- a/mmdet/ops/dcn/src/deform_pool_ext.cpp +++ /dev/null @@ -1,71 +0,0 @@ -// modify from -// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c - -// based on -// author: Charles Shang -// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu - -#include -#include - -#include -#include - -#ifdef WITH_CUDA -void deform_psroi_pooling_cuda_forward( - at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out, - at::Tensor top_count, const int no_trans, const float spatial_scale, - const int output_dim, const int group_size, const int pooled_size, - const int part_size, const int sample_per_part, const float trans_std); - -void deform_psroi_pooling_cuda_backward( - at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans, - at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad, - const int no_trans, const float spatial_scale, const int output_dim, - const int group_size, const int pooled_size, const int part_size, - const int sample_per_part, const float trans_std); -#endif - -void deform_psroi_pooling_forward( - at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out, - at::Tensor top_count, const int no_trans, const float spatial_scale, - const int output_dim, const int group_size, const int pooled_size, - const int part_size, const int sample_per_part, const float trans_std) { - if (input.device().is_cuda()) { -#ifdef WITH_CUDA - return deform_psroi_pooling_cuda_forward(input, bbox, trans, out, top_count, - no_trans, spatial_scale, output_dim, group_size, pooled_size, - part_size, sample_per_part, trans_std); -#else - AT_ERROR("deform psroi pooling is not compiled with GPU support"); -#endif - } - AT_ERROR("deform psroi pooling is not implemented on CPU"); -} - -void deform_psroi_pooling_backward( - at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans, - at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad, - const int no_trans, const float spatial_scale, const int output_dim, - const int group_size, const int pooled_size, const int part_size, - const int sample_per_part, const float trans_std) { - if (input.device().is_cuda()) { -#ifdef WITH_CUDA - return deform_psroi_pooling_cuda_backward(out_grad, input, bbox, trans, - top_count, input_grad, trans_grad, no_trans, spatial_scale, - output_dim, group_size, pooled_size, part_size, sample_per_part, - trans_std); -#else - AT_ERROR("deform psroi pooling is not compiled with GPU support"); -#endif - } - AT_ERROR("deform psroi pooling is not implemented on CPU"); -} - - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward, - "deform psroi pooling forward"); - m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward, - "deform psroi pooling backward"); -} diff --git a/mmdet/ops/generalized_attention.py b/mmdet/ops/generalized_attention.py deleted file mode 100644 index 94a2e370f2e..00000000000 --- a/mmdet/ops/generalized_attention.py +++ /dev/null @@ -1,384 +0,0 @@ -import math - -import numpy as np -import torch -import torch.nn as nn -import torch.nn.functional as F -from mmcv.cnn import kaiming_init - - -class GeneralizedAttention(nn.Module): - """GeneralizedAttention module. - - See 'An Empirical Study of Spatial Attention Mechanisms in Deep Networks' - (https://arxiv.org/abs/1711.07971) for details. - - Args: - in_channels (int): Channels of the input feature map. - spatial_range (int): The spatial range. - -1 indicates no spatial range constraint. - num_heads (int): The head number of empirical_attention module. - position_embedding_dim (int): The position embedding dimension. - position_magnitude (int): A multiplier acting on coord difference. - kv_stride (int): The feature stride acting on key/value feature map. - q_stride (int): The feature stride acting on query feature map. - attention_type (str): A binary indicator string for indicating which - items in generalized empirical_attention module are used. - '1000' indicates 'query and key content' (appr - appr) item, - '0100' indicates 'query content and relative position' - (appr - position) item, - '0010' indicates 'key content only' (bias - appr) item, - '0001' indicates 'relative position only' (bias - position) item. - """ - - def __init__(self, - in_channels, - spatial_range=-1, - num_heads=9, - position_embedding_dim=-1, - position_magnitude=1, - kv_stride=2, - q_stride=1, - attention_type='1111'): - - super(GeneralizedAttention, self).__init__() - - # hard range means local range for non-local operation - self.position_embedding_dim = ( - position_embedding_dim - if position_embedding_dim > 0 else in_channels) - - self.position_magnitude = position_magnitude - self.num_heads = num_heads - self.in_channels = in_channels - self.spatial_range = spatial_range - self.kv_stride = kv_stride - self.q_stride = q_stride - self.attention_type = [bool(int(_)) for _ in attention_type] - self.qk_embed_dim = in_channels // num_heads - out_c = self.qk_embed_dim * num_heads - - if self.attention_type[0] or self.attention_type[1]: - self.query_conv = nn.Conv2d( - in_channels=in_channels, - out_channels=out_c, - kernel_size=1, - bias=False) - self.query_conv.kaiming_init = True - - if self.attention_type[0] or self.attention_type[2]: - self.key_conv = nn.Conv2d( - in_channels=in_channels, - out_channels=out_c, - kernel_size=1, - bias=False) - self.key_conv.kaiming_init = True - - self.v_dim = in_channels // num_heads - self.value_conv = nn.Conv2d( - in_channels=in_channels, - out_channels=self.v_dim * num_heads, - kernel_size=1, - bias=False) - self.value_conv.kaiming_init = True - - if self.attention_type[1] or self.attention_type[3]: - self.appr_geom_fc_x = nn.Linear( - self.position_embedding_dim // 2, out_c, bias=False) - self.appr_geom_fc_x.kaiming_init = True - - self.appr_geom_fc_y = nn.Linear( - self.position_embedding_dim // 2, out_c, bias=False) - self.appr_geom_fc_y.kaiming_init = True - - if self.attention_type[2]: - stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2) - appr_bias_value = -2 * stdv * torch.rand(out_c) + stdv - self.appr_bias = nn.Parameter(appr_bias_value) - - if self.attention_type[3]: - stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2) - geom_bias_value = -2 * stdv * torch.rand(out_c) + stdv - self.geom_bias = nn.Parameter(geom_bias_value) - - self.proj_conv = nn.Conv2d( - in_channels=self.v_dim * num_heads, - out_channels=in_channels, - kernel_size=1, - bias=True) - self.proj_conv.kaiming_init = True - self.gamma = nn.Parameter(torch.zeros(1)) - - if self.spatial_range >= 0: - # only works when non local is after 3*3 conv - if in_channels == 256: - max_len = 84 - elif in_channels == 512: - max_len = 42 - - max_len_kv = int((max_len - 1.0) / self.kv_stride + 1) - local_constraint_map = np.ones( - (max_len, max_len, max_len_kv, max_len_kv), dtype=np.int) - for iy in range(max_len): - for ix in range(max_len): - local_constraint_map[ - iy, ix, - max((iy - self.spatial_range) // - self.kv_stride, 0):min((iy + self.spatial_range + - 1) // self.kv_stride + - 1, max_len), - max((ix - self.spatial_range) // - self.kv_stride, 0):min((ix + self.spatial_range + - 1) // self.kv_stride + - 1, max_len)] = 0 - - self.local_constraint_map = nn.Parameter( - torch.from_numpy(local_constraint_map).byte(), - requires_grad=False) - - if self.q_stride > 1: - self.q_downsample = nn.AvgPool2d( - kernel_size=1, stride=self.q_stride) - else: - self.q_downsample = None - - if self.kv_stride > 1: - self.kv_downsample = nn.AvgPool2d( - kernel_size=1, stride=self.kv_stride) - else: - self.kv_downsample = None - - self.init_weights() - - def get_position_embedding(self, - h, - w, - h_kv, - w_kv, - q_stride, - kv_stride, - device, - feat_dim, - wave_length=1000): - h_idxs = torch.linspace(0, h - 1, h).cuda(device) - h_idxs = h_idxs.view((h, 1)) * q_stride - - w_idxs = torch.linspace(0, w - 1, w).cuda(device) - w_idxs = w_idxs.view((w, 1)) * q_stride - - h_kv_idxs = torch.linspace(0, h_kv - 1, h_kv).cuda(device) - h_kv_idxs = h_kv_idxs.view((h_kv, 1)) * kv_stride - - w_kv_idxs = torch.linspace(0, w_kv - 1, w_kv).cuda(device) - w_kv_idxs = w_kv_idxs.view((w_kv, 1)) * kv_stride - - # (h, h_kv, 1) - h_diff = h_idxs.unsqueeze(1) - h_kv_idxs.unsqueeze(0) - h_diff *= self.position_magnitude - - # (w, w_kv, 1) - w_diff = w_idxs.unsqueeze(1) - w_kv_idxs.unsqueeze(0) - w_diff *= self.position_magnitude - - feat_range = torch.arange(0, feat_dim / 4).cuda(device) - - dim_mat = torch.Tensor([wave_length]).cuda(device) - dim_mat = dim_mat**((4. / feat_dim) * feat_range) - dim_mat = dim_mat.view((1, 1, -1)) - - embedding_x = torch.cat( - ((w_diff / dim_mat).sin(), (w_diff / dim_mat).cos()), dim=2) - - embedding_y = torch.cat( - ((h_diff / dim_mat).sin(), (h_diff / dim_mat).cos()), dim=2) - - return embedding_x, embedding_y - - def forward(self, x_input): - num_heads = self.num_heads - - # use empirical_attention - if self.q_downsample is not None: - x_q = self.q_downsample(x_input) - else: - x_q = x_input - n, _, h, w = x_q.shape - - if self.kv_downsample is not None: - x_kv = self.kv_downsample(x_input) - else: - x_kv = x_input - _, _, h_kv, w_kv = x_kv.shape - - if self.attention_type[0] or self.attention_type[1]: - proj_query = self.query_conv(x_q).view( - (n, num_heads, self.qk_embed_dim, h * w)) - proj_query = proj_query.permute(0, 1, 3, 2) - - if self.attention_type[0] or self.attention_type[2]: - proj_key = self.key_conv(x_kv).view( - (n, num_heads, self.qk_embed_dim, h_kv * w_kv)) - - if self.attention_type[1] or self.attention_type[3]: - position_embed_x, position_embed_y = self.get_position_embedding( - h, w, h_kv, w_kv, self.q_stride, self.kv_stride, - x_input.device, self.position_embedding_dim) - # (n, num_heads, w, w_kv, dim) - position_feat_x = self.appr_geom_fc_x(position_embed_x).\ - view(1, w, w_kv, num_heads, self.qk_embed_dim).\ - permute(0, 3, 1, 2, 4).\ - repeat(n, 1, 1, 1, 1) - - # (n, num_heads, h, h_kv, dim) - position_feat_y = self.appr_geom_fc_y(position_embed_y).\ - view(1, h, h_kv, num_heads, self.qk_embed_dim).\ - permute(0, 3, 1, 2, 4).\ - repeat(n, 1, 1, 1, 1) - - position_feat_x /= math.sqrt(2) - position_feat_y /= math.sqrt(2) - - # accelerate for saliency only - if (np.sum(self.attention_type) == 1) and self.attention_type[2]: - appr_bias = self.appr_bias.\ - view(1, num_heads, 1, self.qk_embed_dim).\ - repeat(n, 1, 1, 1) - - energy = torch.matmul(appr_bias, proj_key).\ - view(n, num_heads, 1, h_kv * w_kv) - - h = 1 - w = 1 - else: - # (n, num_heads, h*w, h_kv*w_kv), query before key, 540mb for - if not self.attention_type[0]: - energy = torch.zeros( - n, - num_heads, - h, - w, - h_kv, - w_kv, - dtype=x_input.dtype, - device=x_input.device) - - # attention_type[0]: appr - appr - # attention_type[1]: appr - position - # attention_type[2]: bias - appr - # attention_type[3]: bias - position - if self.attention_type[0] or self.attention_type[2]: - if self.attention_type[0] and self.attention_type[2]: - appr_bias = self.appr_bias.\ - view(1, num_heads, 1, self.qk_embed_dim) - energy = torch.matmul(proj_query + appr_bias, proj_key).\ - view(n, num_heads, h, w, h_kv, w_kv) - - elif self.attention_type[0]: - energy = torch.matmul(proj_query, proj_key).\ - view(n, num_heads, h, w, h_kv, w_kv) - - elif self.attention_type[2]: - appr_bias = self.appr_bias.\ - view(1, num_heads, 1, self.qk_embed_dim).\ - repeat(n, 1, 1, 1) - - energy += torch.matmul(appr_bias, proj_key).\ - view(n, num_heads, 1, 1, h_kv, w_kv) - - if self.attention_type[1] or self.attention_type[3]: - if self.attention_type[1] and self.attention_type[3]: - geom_bias = self.geom_bias.\ - view(1, num_heads, 1, self.qk_embed_dim) - - proj_query_reshape = (proj_query + geom_bias).\ - view(n, num_heads, h, w, self.qk_embed_dim) - - energy_x = torch.matmul( - proj_query_reshape.permute(0, 1, 3, 2, 4), - position_feat_x.permute(0, 1, 2, 4, 3)) - energy_x = energy_x.\ - permute(0, 1, 3, 2, 4).unsqueeze(4) - - energy_y = torch.matmul( - proj_query_reshape, - position_feat_y.permute(0, 1, 2, 4, 3)) - energy_y = energy_y.unsqueeze(5) - - energy += energy_x + energy_y - - elif self.attention_type[1]: - proj_query_reshape = proj_query.\ - view(n, num_heads, h, w, self.qk_embed_dim) - proj_query_reshape = proj_query_reshape.\ - permute(0, 1, 3, 2, 4) - position_feat_x_reshape = position_feat_x.\ - permute(0, 1, 2, 4, 3) - position_feat_y_reshape = position_feat_y.\ - permute(0, 1, 2, 4, 3) - - energy_x = torch.matmul(proj_query_reshape, - position_feat_x_reshape) - energy_x = energy_x.permute(0, 1, 3, 2, 4).unsqueeze(4) - - energy_y = torch.matmul(proj_query_reshape, - position_feat_y_reshape) - energy_y = energy_y.unsqueeze(5) - - energy += energy_x + energy_y - - elif self.attention_type[3]: - geom_bias = self.geom_bias.\ - view(1, num_heads, self.qk_embed_dim, 1).\ - repeat(n, 1, 1, 1) - - position_feat_x_reshape = position_feat_x.\ - view(n, num_heads, w*w_kv, self.qk_embed_dim) - - position_feat_y_reshape = position_feat_y.\ - view(n, num_heads, h * h_kv, self.qk_embed_dim) - - energy_x = torch.matmul(position_feat_x_reshape, geom_bias) - energy_x = energy_x.view(n, num_heads, 1, w, 1, w_kv) - - energy_y = torch.matmul(position_feat_y_reshape, geom_bias) - energy_y = energy_y.view(n, num_heads, h, 1, h_kv, 1) - - energy += energy_x + energy_y - - energy = energy.view(n, num_heads, h * w, h_kv * w_kv) - - if self.spatial_range >= 0: - cur_local_constraint_map = \ - self.local_constraint_map[:h, :w, :h_kv, :w_kv].\ - contiguous().\ - view(1, 1, h*w, h_kv*w_kv) - - energy = energy.masked_fill_(cur_local_constraint_map, - float('-inf')) - - attention = F.softmax(energy, 3) - - proj_value = self.value_conv(x_kv) - proj_value_reshape = proj_value.\ - view((n, num_heads, self.v_dim, h_kv * w_kv)).\ - permute(0, 1, 3, 2) - - out = torch.matmul(attention, proj_value_reshape).\ - permute(0, 1, 3, 2).\ - contiguous().\ - view(n, self.v_dim * self.num_heads, h, w) - - out = self.proj_conv(out) - out = self.gamma * out + x_input - return out - - def init_weights(self): - for m in self.modules(): - if hasattr(m, 'kaiming_init') and m.kaiming_init: - kaiming_init( - m, - mode='fan_in', - nonlinearity='leaky_relu', - bias=0, - distribution='uniform', - a=1) diff --git a/mmdet/ops/masked_conv/__init__.py b/mmdet/ops/masked_conv/__init__.py deleted file mode 100644 index f537ace080f..00000000000 --- a/mmdet/ops/masked_conv/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .masked_conv import MaskedConv2d, masked_conv2d - -__all__ = ['masked_conv2d', 'MaskedConv2d'] diff --git a/mmdet/ops/masked_conv/masked_conv.py b/mmdet/ops/masked_conv/masked_conv.py deleted file mode 100644 index d29793286c0..00000000000 --- a/mmdet/ops/masked_conv/masked_conv.py +++ /dev/null @@ -1,89 +0,0 @@ -import math - -import torch -import torch.nn as nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable -from torch.nn.modules.utils import _pair - -from . import masked_conv2d_ext - - -class MaskedConv2dFunction(Function): - - @staticmethod - def forward(ctx, features, mask, weight, bias, padding=0, stride=1): - assert mask.dim() == 3 and mask.size(0) == 1 - assert features.dim() == 4 and features.size(0) == 1 - assert features.size()[2:] == mask.size()[1:] - pad_h, pad_w = _pair(padding) - stride_h, stride_w = _pair(stride) - if stride_h != 1 or stride_w != 1: - raise ValueError( - 'Stride could not only be 1 in masked_conv2d currently.') - if not features.is_cuda: - raise NotImplementedError - - out_channel, in_channel, kernel_h, kernel_w = weight.size() - - batch_size = features.size(0) - out_h = int( - math.floor((features.size(2) + 2 * pad_h - - (kernel_h - 1) - 1) / stride_h + 1)) - out_w = int( - math.floor((features.size(3) + 2 * pad_w - - (kernel_h - 1) - 1) / stride_w + 1)) - mask_inds = torch.nonzero(mask[0] > 0, as_tuple=False) - output = features.new_zeros(batch_size, out_channel, out_h, out_w) - if mask_inds.numel() > 0: - mask_h_idx = mask_inds[:, 0].contiguous() - mask_w_idx = mask_inds[:, 1].contiguous() - data_col = features.new_zeros(in_channel * kernel_h * kernel_w, - mask_inds.size(0)) - masked_conv2d_ext.masked_im2col_forward(features, mask_h_idx, - mask_w_idx, kernel_h, - kernel_w, pad_h, pad_w, - data_col) - - masked_output = torch.addmm(1, bias[:, None], 1, - weight.view(out_channel, -1), data_col) - masked_conv2d_ext.masked_col2im_forward(masked_output, mask_h_idx, - mask_w_idx, out_h, out_w, - out_channel, output) - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - return (None, ) * 5 - - -masked_conv2d = MaskedConv2dFunction.apply - - -class MaskedConv2d(nn.Conv2d): - """A MaskedConv2d which inherits the official Conv2d. - - The masked forward doesn't implement the backward function and only - supports the stride parameter to be 1 currently. - """ - - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - bias=True): - super(MaskedConv2d, - self).__init__(in_channels, out_channels, kernel_size, stride, - padding, dilation, groups, bias) - - def forward(self, input, mask=None): - if mask is None: # fallback to the normal Conv2d - return super(MaskedConv2d, self).forward(input) - else: - return masked_conv2d(input, mask, self.weight, self.bias, - self.padding) diff --git a/mmdet/ops/masked_conv/src/cuda/masked_conv2d_cuda.cpp b/mmdet/ops/masked_conv/src/cuda/masked_conv2d_cuda.cpp deleted file mode 100644 index 84bd7c27913..00000000000 --- a/mmdet/ops/masked_conv/src/cuda/masked_conv2d_cuda.cpp +++ /dev/null @@ -1,69 +0,0 @@ -#include - -#include -#include - -int MaskedIm2colForwardLaucher(const at::Tensor im, const int height, - const int width, const int channels, - const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, - const at::Tensor mask_h_idx, - const at::Tensor mask_w_idx, const int mask_cnt, - at::Tensor col); - -int MaskedCol2imForwardLaucher(const at::Tensor col, const int height, - const int width, const int channels, - const at::Tensor mask_h_idx, - const at::Tensor mask_w_idx, const int mask_cnt, - at::Tensor im); - -#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ") -#define CHECK_CONTIGUOUS(x) \ - TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") -#define CHECK_INPUT(x) \ - CHECK_CUDA(x); \ - CHECK_CONTIGUOUS(x) - -int masked_im2col_forward_cuda(const at::Tensor im, const at::Tensor mask_h_idx, - const at::Tensor mask_w_idx, const int kernel_h, - const int kernel_w, const int pad_h, - const int pad_w, at::Tensor col) { - CHECK_INPUT(im); - CHECK_INPUT(mask_h_idx); - CHECK_INPUT(mask_w_idx); - CHECK_INPUT(col); - // im: (n, ic, h, w), kernel size (kh, kw) - // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh) - at::DeviceGuard guard(im.device()); - - int channels = im.size(1); - int height = im.size(2); - int width = im.size(3); - int mask_cnt = mask_h_idx.size(0); - - MaskedIm2colForwardLaucher(im, height, width, channels, kernel_h, kernel_w, - pad_h, pad_w, mask_h_idx, mask_w_idx, mask_cnt, - col); - - return 1; -} - -int masked_col2im_forward_cuda(const at::Tensor col, - const at::Tensor mask_h_idx, - const at::Tensor mask_w_idx, int height, - int width, int channels, at::Tensor im) { - CHECK_INPUT(col); - CHECK_INPUT(mask_h_idx); - CHECK_INPUT(mask_w_idx); - CHECK_INPUT(im); - // im: (n, ic, h, w), kernel size (kh, kw) - // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh) - at::DeviceGuard guard(col.device()); - - int mask_cnt = mask_h_idx.size(0); - - MaskedCol2imForwardLaucher(col, height, width, channels, mask_h_idx, - mask_w_idx, mask_cnt, im); - - return 1; -} diff --git a/mmdet/ops/masked_conv/src/cuda/masked_conv2d_kernel.cu b/mmdet/ops/masked_conv/src/cuda/masked_conv2d_kernel.cu deleted file mode 100644 index b8323592f52..00000000000 --- a/mmdet/ops/masked_conv/src/cuda/masked_conv2d_kernel.cu +++ /dev/null @@ -1,114 +0,0 @@ -#include -#include -#include - -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ - i += blockDim.x * gridDim.x) - -#define THREADS_PER_BLOCK 1024 - -inline int GET_BLOCKS(const int N) { - int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - int max_block_num = 65000; - return min(optimal_block_num, max_block_num); -} - -template -__global__ void MaskedIm2colForward(const int n, const scalar_t *data_im, - const int height, const int width, - const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, - const int64_t *mask_h_idx, - const int64_t *mask_w_idx, - const int mask_cnt, scalar_t *data_col) { - // mask_cnt * channels - CUDA_1D_KERNEL_LOOP(index, n) { - const int m_index = index % mask_cnt; - const int h_col = mask_h_idx[m_index]; - const int w_col = mask_w_idx[m_index]; - const int c_im = index / mask_cnt; - const int c_col = c_im * kernel_h * kernel_w; - const int h_offset = h_col - pad_h; - const int w_offset = w_col - pad_w; - scalar_t *data_col_ptr = data_col + c_col * mask_cnt + m_index; - for (int i = 0; i < kernel_h; ++i) { - int h_im = h_offset + i; - for (int j = 0; j < kernel_w; ++j) { - int w_im = w_offset + j; - if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) { - *data_col_ptr = - (scalar_t)data_im[(c_im * height + h_im) * width + w_im]; - } else { - *data_col_ptr = 0.0; - } - data_col_ptr += mask_cnt; - } - } - } -} - -int MaskedIm2colForwardLaucher(const at::Tensor bottom_data, const int height, - const int width, const int channels, - const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, - const at::Tensor mask_h_idx, - const at::Tensor mask_w_idx, const int mask_cnt, - at::Tensor top_data) { - const int output_size = mask_cnt * channels; - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - bottom_data.scalar_type(), "MaskedIm2colLaucherForward", ([&] { - const scalar_t *bottom_data_ = bottom_data.data_ptr(); - const int64_t *mask_h_idx_ = mask_h_idx.data_ptr(); - const int64_t *mask_w_idx_ = mask_w_idx.data_ptr(); - scalar_t *top_data_ = top_data.data_ptr(); - MaskedIm2colForward - <<>>( - output_size, bottom_data_, height, width, kernel_h, kernel_w, - pad_h, pad_w, mask_h_idx_, mask_w_idx_, mask_cnt, top_data_); - })); - THCudaCheck(cudaGetLastError()); - return 1; -} - -template -__global__ void MaskedCol2imForward(const int n, const scalar_t *data_col, - const int height, const int width, - const int channels, - const int64_t *mask_h_idx, - const int64_t *mask_w_idx, - const int mask_cnt, scalar_t *data_im) { - CUDA_1D_KERNEL_LOOP(index, n) { - const int m_index = index % mask_cnt; - const int h_im = mask_h_idx[m_index]; - const int w_im = mask_w_idx[m_index]; - const int c_im = index / mask_cnt; - // compute the start and end of the output - data_im[(c_im * height + h_im) * width + w_im] = data_col[index]; - } -} - -int MaskedCol2imForwardLaucher(const at::Tensor bottom_data, const int height, - const int width, const int channels, - const at::Tensor mask_h_idx, - const at::Tensor mask_w_idx, const int mask_cnt, - at::Tensor top_data) { - const int output_size = mask_cnt * channels; - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - bottom_data.scalar_type(), "MaskedCol2imLaucherForward", ([&] { - const scalar_t *bottom_data_ = bottom_data.data_ptr(); - const int64_t *mask_h_idx_ = mask_h_idx.data_ptr(); - const int64_t *mask_w_idx_ = mask_w_idx.data_ptr(); - scalar_t *top_data_ = top_data.data_ptr(); - - MaskedCol2imForward - <<>>( - output_size, bottom_data_, height, width, channels, mask_h_idx_, - mask_w_idx_, mask_cnt, top_data_); - })); - THCudaCheck(cudaGetLastError()); - return 1; -} diff --git a/mmdet/ops/masked_conv/src/masked_conv2d_ext.cpp b/mmdet/ops/masked_conv/src/masked_conv2d_ext.cpp deleted file mode 100644 index 39058ad7755..00000000000 --- a/mmdet/ops/masked_conv/src/masked_conv2d_ext.cpp +++ /dev/null @@ -1,54 +0,0 @@ -#include - -#include -#include - -#ifdef WITH_CUDA -int masked_im2col_forward_cuda(const at::Tensor im, const at::Tensor mask_h_idx, - const at::Tensor mask_w_idx, const int kernel_h, - const int kernel_w, const int pad_h, - const int pad_w, at::Tensor col); - -int masked_col2im_forward_cuda(const at::Tensor col, - const at::Tensor mask_h_idx, - const at::Tensor mask_w_idx, int height, - int width, int channels, at::Tensor im); -#endif - -int masked_im2col_forward(const at::Tensor im, const at::Tensor mask_h_idx, - const at::Tensor mask_w_idx, const int kernel_h, - const int kernel_w, const int pad_h, - const int pad_w, at::Tensor col) { - if (im.device().is_cuda()) { -#ifdef WITH_CUDA - return masked_im2col_forward_cuda(im, mask_h_idx, mask_w_idx, kernel_h, - kernel_w, pad_h, pad_w, col); -#else - AT_ERROR("masked_im2col is not compiled with GPU support"); -#endif - } - AT_ERROR("masked_im2col is not implemented on CPU"); -} - -int masked_col2im_forward(const at::Tensor col, - const at::Tensor mask_h_idx, - const at::Tensor mask_w_idx, int height, - int width, int channels, at::Tensor im) { - if (col.device().is_cuda()) { -#ifdef WITH_CUDA - return masked_col2im_forward_cuda(col, mask_h_idx, mask_w_idx, height, - width, channels, im); -#else - AT_ERROR("masked_col2im is not compiled with GPU support"); -#endif - } - AT_ERROR("masked_col2im is not implemented on CPU"); -} - - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("masked_im2col_forward", &masked_im2col_forward, - "masked_im2col forward"); - m.def("masked_col2im_forward", &masked_col2im_forward, - "masked_col2im forward"); -} diff --git a/mmdet/ops/merge_cells.py b/mmdet/ops/merge_cells.py deleted file mode 100644 index e1d588404b2..00000000000 --- a/mmdet/ops/merge_cells.py +++ /dev/null @@ -1,147 +0,0 @@ -from abc import abstractmethod - -import torch -import torch.nn as nn -import torch.nn.functional as F -from mmcv.cnn import ConvModule - - -class BaseMergeCell(nn.Module): - """The basic class for cells used in NAS-FPN and NAS-FCOS. - - BaseMergeCell takes 2 inputs. After applying concolution - on them, they are resized to the target size. Then, - they go through binary_op, which depends on the type of cell. - If with_out_conv is True, the result of output will go through - another convolution layer. - - Args: - in_channels (int): number of input channels in out_conv layer. - out_channels (int): number of output channels in out_conv layer. - with_out_conv (bool): Whether to use out_conv layer - out_conv_cfg (dict): Config dict for convolution layer, which should - contain "groups", "kernel_size", "padding", "bias" to build - out_conv layer. - out_norm_cfg (dict): Config dict for normalization layer in out_conv. - out_conv_order (tuple): The order of conv/norm/activation layers in - out_conv. - with_input1_conv (bool): Whether to use convolution on input1. - with_input2_conv (bool): Whether to use convolution on input2. - input_conv_cfg (dict): Config dict for building input1_conv layer and - input2_conv layer, which is expected to contain the type of - convolution. - Default: None, which means using conv2d. - input_norm_cfg (dict): Config dict for normalization layer in - input1_conv and input2_conv layer. Default: None. - upsample_mode (str): Interpolation method used to resize the output - of input1_conv and input2_conv to target size. Currently, we - support ['nearest', 'bilinear']. Default: 'nearest'. - """ - - def __init__(self, - fused_channels=256, - out_channels=256, - with_out_conv=True, - out_conv_cfg=dict( - groups=1, kernel_size=3, padding=1, bias=True), - out_norm_cfg=None, - out_conv_order=('act', 'conv', 'norm'), - with_input1_conv=False, - with_input2_conv=False, - input_conv_cfg=None, - input_norm_cfg=None, - upsample_mode='nearest'): - super(BaseMergeCell, self).__init__() - assert upsample_mode in ['nearest', 'bilinear'] - self.with_out_conv = with_out_conv - self.with_input1_conv = with_input1_conv - self.with_input2_conv = with_input2_conv - self.upsample_mode = upsample_mode - - if self.with_out_conv: - self.out_conv = ConvModule( - fused_channels, - out_channels, - **out_conv_cfg, - norm_cfg=out_norm_cfg, - order=out_conv_order) - - self.input1_conv = self._build_input_conv( - out_channels, input_conv_cfg, - input_norm_cfg) if with_input1_conv else nn.Sequential() - self.input2_conv = self._build_input_conv( - out_channels, input_conv_cfg, - input_norm_cfg) if with_input2_conv else nn.Sequential() - - def _build_input_conv(self, channel, conv_cfg, norm_cfg): - return ConvModule( - channel, - channel, - 3, - padding=1, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - bias=True) - - @abstractmethod - def _binary_op(self, x1, x2): - pass - - def _resize(self, x, size): - if x.shape[-2:] == size: - return x - elif x.shape[-2:] < size: - return F.interpolate(x, size=size, mode=self.upsample_mode) - else: - assert x.shape[-2] % size[-2] == 0 and x.shape[-1] % size[-1] == 0 - kernel_size = x.shape[-1] // size[-1] - x = F.max_pool2d(x, kernel_size=kernel_size, stride=kernel_size) - return x - - def forward(self, x1, x2, out_size=None): - assert x1.shape[:2] == x2.shape[:2] - assert out_size is None or len(out_size) == 2 - if out_size is None: # resize to larger one - out_size = max(x1.size()[2:], x2.size()[2:]) - - x1 = self.input1_conv(x1) - x2 = self.input2_conv(x2) - - x1 = self._resize(x1, out_size) - x2 = self._resize(x2, out_size) - - x = self._binary_op(x1, x2) - if self.with_out_conv: - x = self.out_conv(x) - return x - - -class SumCell(BaseMergeCell): - - def __init__(self, in_channels, out_channels, **kwargs): - super(SumCell, self).__init__(in_channels, out_channels, **kwargs) - - def _binary_op(self, x1, x2): - return x1 + x2 - - -class ConcatCell(BaseMergeCell): - - def __init__(self, in_channels, out_channels, **kwargs): - super(ConcatCell, self).__init__(in_channels * 2, out_channels, - **kwargs) - - def _binary_op(self, x1, x2): - ret = torch.cat([x1, x2], dim=1) - return ret - - -class GlobalPoolingCell(BaseMergeCell): - - def __init__(self, in_channels=None, out_channels=None, **kwargs): - super().__init__(in_channels, out_channels, **kwargs) - self.global_pool = nn.AdaptiveAvgPool2d((1, 1)) - - def _binary_op(self, x1, x2): - x2_att = self.global_pool(x2).sigmoid() - return x2 + x2_att * x1 diff --git a/mmdet/ops/nms/__init__.py b/mmdet/ops/nms/__init__.py deleted file mode 100644 index 4440a5056b4..00000000000 --- a/mmdet/ops/nms/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .nms_wrapper import batched_nms, nms, nms_match, soft_nms - -__all__ = ['nms', 'soft_nms', 'batched_nms', 'nms_match'] diff --git a/mmdet/ops/nms/nms_wrapper.py b/mmdet/ops/nms/nms_wrapper.py deleted file mode 100644 index 867693b3a9d..00000000000 --- a/mmdet/ops/nms/nms_wrapper.py +++ /dev/null @@ -1,190 +0,0 @@ -import numpy as np -import torch - -from . import nms_ext - - -def nms(dets, iou_thr, device_id=None): - """Dispatch to either CPU or GPU NMS implementations. - - The input can be either a torch tensor or numpy array. GPU NMS will be used - if the input is a gpu tensor or device_id is specified, otherwise CPU NMS - will be used. The returned type will always be the same as inputs. - - Arguments: - dets (torch.Tensor or np.ndarray): bboxes with scores. - iou_thr (float): IoU threshold for NMS. - device_id (int, optional): when `dets` is a numpy array, if `device_id` - is None, then cpu nms is used, otherwise gpu_nms will be used. - - Returns: - tuple: kept bboxes and indice, which is always the same data type as - the input. - - Example: - >>> dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.9], - >>> [49.3, 32.9, 51.0, 35.3, 0.9], - >>> [49.2, 31.8, 51.0, 35.4, 0.5], - >>> [35.1, 11.5, 39.1, 15.7, 0.5], - >>> [35.6, 11.8, 39.3, 14.2, 0.5], - >>> [35.3, 11.5, 39.9, 14.5, 0.4], - >>> [35.2, 11.7, 39.7, 15.7, 0.3]], dtype=np.float32) - >>> iou_thr = 0.6 - >>> suppressed, inds = nms(dets, iou_thr) - >>> assert len(inds) == len(suppressed) == 3 - """ - # convert dets (tensor or numpy array) to tensor - if isinstance(dets, torch.Tensor): - is_numpy = False - dets_th = dets - elif isinstance(dets, np.ndarray): - is_numpy = True - device = 'cpu' if device_id is None else f'cuda:{device_id}' - dets_th = torch.from_numpy(dets).to(device) - else: - raise TypeError('dets must be either a Tensor or numpy array, ' - f'but got {type(dets)}') - - # execute cpu or cuda nms - if dets_th.shape[0] == 0: - inds = dets_th.new_zeros(0, dtype=torch.long) - else: - if dets_th.is_cuda: - inds = nms_ext.nms(dets_th, iou_thr) - else: - inds = nms_ext.nms(dets_th, iou_thr) - - if is_numpy: - inds = inds.cpu().numpy() - return dets[inds, :], inds - - -def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3): - """Dispatch to only CPU Soft NMS implementations. - - The input can be either a torch tensor or numpy array. - The returned type will always be the same as inputs. - - Arguments: - dets (torch.Tensor or np.ndarray): bboxes with scores. - iou_thr (float): IoU threshold for Soft NMS. - method (str): either 'linear' or 'gaussian' - sigma (float): hyperparameter for gaussian method - min_score (float): score filter threshold - - Returns: - tuple: new det bboxes and indice, which is always the same - data type as the input. - - Example: - >>> dets = np.array([[4., 3., 5., 3., 0.9], - >>> [4., 3., 5., 4., 0.9], - >>> [3., 1., 3., 1., 0.5], - >>> [3., 1., 3., 1., 0.5], - >>> [3., 1., 3., 1., 0.4], - >>> [3., 1., 3., 1., 0.0]], dtype=np.float32) - >>> iou_thr = 0.6 - >>> new_dets, inds = soft_nms(dets, iou_thr, sigma=0.5) - >>> assert len(inds) == len(new_dets) == 5 - """ - # convert dets (tensor or numpy array) to tensor - if isinstance(dets, torch.Tensor): - is_tensor = True - dets_t = dets.detach().cpu() - elif isinstance(dets, np.ndarray): - is_tensor = False - dets_t = torch.from_numpy(dets) - else: - raise TypeError('dets must be either a Tensor or numpy array, ' - f'but got {type(dets)}') - - method_codes = {'linear': 1, 'gaussian': 2} - if method not in method_codes: - raise ValueError(f'Invalid method for SoftNMS: {method}') - results = nms_ext.soft_nms(dets_t, iou_thr, method_codes[method], sigma, - min_score) - - new_dets = results[:, :5] - inds = results[:, 5] - - if is_tensor: - return new_dets.to( - device=dets.device, dtype=dets.dtype), inds.to( - device=dets.device, dtype=torch.long) - else: - return new_dets.numpy().astype(dets.dtype), inds.numpy().astype( - np.int64) - - -def batched_nms(bboxes, scores, inds, nms_cfg, class_agnostic=False): - """Performs non-maximum suppression in a batched fashion. - - Modified from https://github.com/pytorch/vision/blob - /505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39. - In order to perform NMS independently per class, we add an offset to all - the boxes. The offset is dependent only on the class idx, and is large - enough so that boxes from different classes do not overlap. - - Arguments: - bboxes (torch.Tensor): bboxes in shape (N, 4). - scores (torch.Tensor): scores in shape (N, ). - inds (torch.Tensor): each index value correspond to a bbox cluster, - and NMS will not be applied between elements of different inds, - shape (N, ). - nms_cfg (dict): specify nms type and class_agnostic as well as other - parameters like iou_thr. - class_agnostic (bool): if true, nms is class agnostic, - i.e. IoU thresholding happens over all bboxes, - regardless of the predicted class - - Returns: - tuple: kept bboxes and indice. - """ - nms_cfg_ = nms_cfg.copy() - class_agnostic = nms_cfg_.pop('class_agnostic', class_agnostic) - if class_agnostic: - bboxes_for_nms = bboxes - else: - max_coordinate = bboxes.max() - offsets = inds.to(bboxes) * (max_coordinate + 1) - bboxes_for_nms = bboxes + offsets[:, None] - nms_type = nms_cfg_.pop('type', 'nms') - nms_op = eval(nms_type) - dets, keep = nms_op( - torch.cat([bboxes_for_nms, scores[:, None]], -1), **nms_cfg_) - bboxes = bboxes[keep] - scores = dets[:, -1] - return torch.cat([bboxes, scores[:, None]], -1), keep - - -def nms_match(dets, thresh): - """Matched dets into different groups by NMS. - - NMS match is Similar to NMS but when a bbox is suppressed, nms match will - record the indice of supporessed bbox and form a group with the indice of - kept bbox. In each group, indice is sorted as score order. - - Arguments: - dets (torch.Tensor | np.ndarray): Det bboxes with scores, shape (N, 5). - iou_thr (float): IoU thresh for NMS. - - Returns: - List[Tensor | ndarray]: The outer list corresponds different matched - group, the inner Tensor corresponds the indices for a group in - score order. - """ - if dets.shape[0] == 0: - matched = [] - else: - assert dets.shape[-1] == 5, 'inputs dets.shape should be (N, 5), ' \ - f'but get {dets.shape}' - if isinstance(dets, torch.Tensor): - dets_t = dets.detach().cpu() - else: - dets_t = torch.from_numpy(dets) - matched = nms_ext.nms_match(dets_t, thresh) - - if isinstance(dets, torch.Tensor): - return [dets.new_tensor(m, dtype=torch.long) for m in matched] - else: - return [np.array(m, dtype=np.int) for m in matched] diff --git a/mmdet/ops/nms/src/cpu/nms_cpu.cpp b/mmdet/ops/nms/src/cpu/nms_cpu.cpp deleted file mode 100644 index 230657e9774..00000000000 --- a/mmdet/ops/nms/src/cpu/nms_cpu.cpp +++ /dev/null @@ -1,293 +0,0 @@ -// Soft-NMS is added by MMDetection. -// Modified from -// https://github.com/bharatsingh430/soft-nms/blob/master/lib/nms/cpu_nms.pyx. -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -#include - -template -at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) { - AT_ASSERTM(!dets.device().is_cuda(), "dets must be a CPU tensor"); - - if (dets.numel() == 0) { - return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); - } - - auto x1_t = dets.select(1, 0).contiguous(); - auto y1_t = dets.select(1, 1).contiguous(); - auto x2_t = dets.select(1, 2).contiguous(); - auto y2_t = dets.select(1, 3).contiguous(); - auto scores = dets.select(1, 4).contiguous(); - - at::Tensor areas_t = (x2_t - x1_t) * (y2_t - y1_t); - - auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); - - auto ndets = dets.size(0); - at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte)); - at::Tensor keep_t = at::zeros({ndets}, dets.options().dtype(at::kLong)); - - auto suppressed = suppressed_t.data_ptr(); - auto keep = keep_t.data_ptr(); - auto order = order_t.data_ptr(); - auto x1 = x1_t.data_ptr(); - auto y1 = y1_t.data_ptr(); - auto x2 = x2_t.data_ptr(); - auto y2 = y2_t.data_ptr(); - auto areas = areas_t.data_ptr(); - - int64_t num_to_keep = 0; - - for (int64_t _i = 0; _i < ndets; _i++) { - auto i = order[_i]; - if (suppressed[i] == 1) continue; - keep[num_to_keep++] = i; - auto ix1 = x1[i]; - auto iy1 = y1[i]; - auto ix2 = x2[i]; - auto iy2 = y2[i]; - auto iarea = areas[i]; - - for (int64_t _j = _i + 1; _j < ndets; _j++) { - auto j = order[_j]; - if (suppressed[j] == 1) continue; - auto xx1 = std::max(ix1, x1[j]); - auto yy1 = std::max(iy1, y1[j]); - auto xx2 = std::min(ix2, x2[j]); - auto yy2 = std::min(iy2, y2[j]); - - auto w = std::max(static_cast(0), xx2 - xx1); - auto h = std::max(static_cast(0), yy2 - yy1); - auto inter = w * h; - auto ovr = inter / (iarea + areas[j] - inter); - if (ovr > threshold) suppressed[j] = 1; - } - } - return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep); -} - -at::Tensor nms_cpu(const at::Tensor& dets, const float threshold) { - at::Tensor result; - AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] { - result = nms_cpu_kernel(dets, threshold); - }); - return result; -} - -template -at::Tensor soft_nms_cpu_kernel(const at::Tensor& dets, const float threshold, - const unsigned char method, const float sigma, - const float min_score) { - AT_ASSERTM(!dets.device().is_cuda(), "dets must be a CPU tensor"); - - if (dets.numel() == 0) { - return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); - } - - auto x1_t = dets.select(1, 0).contiguous(); - auto y1_t = dets.select(1, 1).contiguous(); - auto x2_t = dets.select(1, 2).contiguous(); - auto y2_t = dets.select(1, 3).contiguous(); - auto scores_t = dets.select(1, 4).contiguous(); - - at::Tensor areas_t = (x2_t - x1_t) * (y2_t - y1_t); - - auto ndets = dets.size(0); - auto x1 = x1_t.data_ptr(); - auto y1 = y1_t.data_ptr(); - auto x2 = x2_t.data_ptr(); - auto y2 = y2_t.data_ptr(); - auto scores = scores_t.data_ptr(); - auto areas = areas_t.data_ptr(); - - int64_t pos = 0; - at::Tensor inds_t = at::arange(ndets, dets.options()); - auto inds = inds_t.data_ptr(); - - for (int64_t i = 0; i < ndets; i++) { - auto max_score = scores[i]; - auto max_pos = i; - - auto ix1 = x1[i]; - auto iy1 = y1[i]; - auto ix2 = x2[i]; - auto iy2 = y2[i]; - auto iscore = scores[i]; - auto iarea = areas[i]; - auto iind = inds[i]; - - pos = i + 1; - // get max box - while (pos < ndets) { - if (max_score < scores[pos]) { - max_score = scores[pos]; - max_pos = pos; - } - pos = pos + 1; - } - // add max box as a detection - x1[i] = x1[max_pos]; - y1[i] = y1[max_pos]; - x2[i] = x2[max_pos]; - y2[i] = y2[max_pos]; - scores[i] = scores[max_pos]; - areas[i] = areas[max_pos]; - inds[i] = inds[max_pos]; - - // swap ith box with position of max box - x1[max_pos] = ix1; - y1[max_pos] = iy1; - x2[max_pos] = ix2; - y2[max_pos] = iy2; - scores[max_pos] = iscore; - areas[max_pos] = iarea; - inds[max_pos] = iind; - - ix1 = x1[i]; - iy1 = y1[i]; - ix2 = x2[i]; - iy2 = y2[i]; - iscore = scores[i]; - iarea = areas[i]; - - pos = i + 1; - // NMS iterations, note that N changes if detection boxes fall below - // threshold - while (pos < ndets) { - auto xx1 = std::max(ix1, x1[pos]); - auto yy1 = std::max(iy1, y1[pos]); - auto xx2 = std::min(ix2, x2[pos]); - auto yy2 = std::min(iy2, y2[pos]); - - auto w = std::max(static_cast(0), xx2 - xx1); - auto h = std::max(static_cast(0), yy2 - yy1); - auto inter = w * h; - auto ovr = inter / (iarea + areas[pos] - inter); - - scalar_t weight = 1.; - if (method == 1) { - if (ovr > threshold) weight = 1 - ovr; - } else if (method == 2) { - weight = std::exp(-(ovr * ovr) / sigma); - } else { - // original NMS - if (ovr > threshold) { - weight = 0; - } else { - weight = 1; - } - } - scores[pos] = weight * scores[pos]; - // if box score falls below threshold, discard the box by - // swapping with last box update N - if (scores[pos] < min_score) { - x1[pos] = x1[ndets - 1]; - y1[pos] = y1[ndets - 1]; - x2[pos] = x2[ndets - 1]; - y2[pos] = y2[ndets - 1]; - scores[pos] = scores[ndets - 1]; - areas[pos] = areas[ndets - 1]; - inds[pos] = inds[ndets - 1]; - ndets = ndets - 1; - pos = pos - 1; - } - pos = pos + 1; - } - } - at::Tensor result = at::zeros({6, ndets}, dets.options()); - result[0] = x1_t.slice(0, 0, ndets); - result[1] = y1_t.slice(0, 0, ndets); - result[2] = x2_t.slice(0, 0, ndets); - result[3] = y2_t.slice(0, 0, ndets); - result[4] = scores_t.slice(0, 0, ndets); - result[5] = inds_t.slice(0, 0, ndets); - - result = result.t().contiguous(); - return result; -} - -at::Tensor soft_nms_cpu(const at::Tensor& dets, const float threshold, - const unsigned char method, const float sigma, - const float min_score) { - at::Tensor result; - AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "soft_nms", [&] { - result = soft_nms_cpu_kernel(dets, threshold, method, sigma, - min_score); - }); - return result; -} - - -template -std::vector > nms_match_cpu_kernel(const at::Tensor& dets, - const float threshold) { - AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); - - auto x1_t = dets.select(1, 0).contiguous(); - auto y1_t = dets.select(1, 1).contiguous(); - auto x2_t = dets.select(1, 2).contiguous(); - auto y2_t = dets.select(1, 3).contiguous(); - auto scores = dets.select(1, 4).contiguous(); - - at::Tensor areas_t = (x2_t - x1_t) * (y2_t - y1_t); - - auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); - - auto ndets = dets.size(0); - at::Tensor suppressed_t = - at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); - - auto suppressed = suppressed_t.data_ptr(); - auto order = order_t.data_ptr(); - auto x1 = x1_t.data_ptr(); - auto y1 = y1_t.data_ptr(); - auto x2 = x2_t.data_ptr(); - auto y2 = y2_t.data_ptr(); - auto areas = areas_t.data_ptr(); - - std::vector keep; - std::vector > matched; - - for (int64_t _i = 0; _i < ndets; _i++) { - auto i = order[_i]; - if (suppressed[i] == 1) continue; - keep.push_back(i); - std::vector v_i; - auto ix1 = x1[i]; - auto iy1 = y1[i]; - auto ix2 = x2[i]; - auto iy2 = y2[i]; - auto iarea = areas[i]; - - for (int64_t _j = _i + 1; _j < ndets; _j++) { - auto j = order[_j]; - if (suppressed[j] == 1) continue; - auto xx1 = std::max(ix1, x1[j]); - auto yy1 = std::max(iy1, y1[j]); - auto xx2 = std::min(ix2, x2[j]); - auto yy2 = std::min(iy2, y2[j]); - - auto w = std::max(static_cast(0), xx2 - xx1); - auto h = std::max(static_cast(0), yy2 - yy1); - auto inter = w * h; - auto ovr = inter / (iarea + areas[j] - inter); - if (ovr >= threshold) { - suppressed[j] = 1; - v_i.push_back(j); - } - } - matched.push_back(v_i); - } - for (size_t i = 0; i < keep.size(); i++) - matched[i].insert(matched[i].begin(), keep[i]); - return matched; -} - -std::vector > nms_match_cpu(const at::Tensor& dets, - const float threshold) { - std::vector > result; - // result = nms_match_cpu_kernel(dets, threshold); - AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_match", [&] { - result = nms_match_cpu_kernel(dets, threshold); - }); - return result; -} diff --git a/mmdet/ops/nms/src/cuda/nms_cuda.cpp b/mmdet/ops/nms/src/cuda/nms_cuda.cpp deleted file mode 100644 index d46b8166904..00000000000 --- a/mmdet/ops/nms/src/cuda/nms_cuda.cpp +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -#include - -#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ") - -at::Tensor nms_cuda_forward(const at::Tensor boxes, float nms_overlap_thresh); - -at::Tensor nms_cuda(const at::Tensor& dets, const float threshold) { - CHECK_CUDA(dets); - if (dets.numel() == 0) - return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); - return nms_cuda_forward(dets, threshold); -} diff --git a/mmdet/ops/nms/src/cuda/nms_kernel.cu b/mmdet/ops/nms/src/cuda/nms_kernel.cu deleted file mode 100644 index bb6d18abcfa..00000000000 --- a/mmdet/ops/nms/src/cuda/nms_kernel.cu +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -#include -#include -#include - -#include -#include - -#include -#include - -int const threadsPerBlock = sizeof(unsigned long long) * 8; - -__device__ inline float devIoU(float const * const a, float const * const b) { - float left = max(a[0], b[0]), right = min(a[2], b[2]); - float top = max(a[1], b[1]), bottom = min(a[3], b[3]); - float width = max(right - left, 0.f), height = max(bottom - top, 0.f); - float interS = width * height; - float Sa = (a[2] - a[0]) * (a[3] - a[1]); - float Sb = (b[2] - b[0]) * (b[3] - b[1]); - return interS / (Sa + Sb - interS); -} - -__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, - const float *dev_boxes, unsigned long long *dev_mask) { - const int row_start = blockIdx.y; - const int col_start = blockIdx.x; - - // if (row_start > col_start) return; - - const int row_size = - min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); - const int col_size = - min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); - - __shared__ float block_boxes[threadsPerBlock * 5]; - if (threadIdx.x < col_size) { - block_boxes[threadIdx.x * 5 + 0] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; - block_boxes[threadIdx.x * 5 + 1] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; - block_boxes[threadIdx.x * 5 + 2] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; - block_boxes[threadIdx.x * 5 + 3] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; - block_boxes[threadIdx.x * 5 + 4] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; - } - __syncthreads(); - - if (threadIdx.x < row_size) { - const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; - const float *cur_box = dev_boxes + cur_box_idx * 5; - int i = 0; - unsigned long long t = 0; - int start = 0; - if (row_start == col_start) { - start = threadIdx.x + 1; - } - for (i = start; i < col_size; i++) { - if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { - t |= 1ULL << i; - } - } - const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock); - dev_mask[cur_box_idx * col_blocks + col_start] = t; - } -} - -// boxes is a N x 5 tensor -at::Tensor nms_cuda_forward(const at::Tensor boxes, float nms_overlap_thresh) { - - // Ensure CUDA uses the input tensor device. - at::DeviceGuard guard(boxes.device()); - - using scalar_t = float; - AT_ASSERTM(boxes.device().is_cuda(), "boxes must be a CUDA tensor"); - auto scores = boxes.select(1, 4); - auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); - auto boxes_sorted = boxes.index_select(0, order_t); - - int boxes_num = boxes.size(0); - - const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock); - - scalar_t* boxes_dev = boxes_sorted.data_ptr(); - - THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState - - unsigned long long* mask_dev = NULL; - //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev, - // boxes_num * col_blocks * sizeof(unsigned long long))); - - mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long)); - - dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock), - THCCeilDiv(boxes_num, threadsPerBlock)); - dim3 threads(threadsPerBlock); - nms_kernel<<>>(boxes_num, - nms_overlap_thresh, - boxes_dev, - mask_dev); - - std::vector mask_host(boxes_num * col_blocks); - THCudaCheck(cudaMemcpyAsync( - &mask_host[0], - mask_dev, - sizeof(unsigned long long) * boxes_num * col_blocks, - cudaMemcpyDeviceToHost, - at::cuda::getCurrentCUDAStream() - )); - - std::vector remv(col_blocks); - memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); - - at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU)); - int64_t* keep_out = keep.data_ptr(); - - int num_to_keep = 0; - for (int i = 0; i < boxes_num; i++) { - int nblock = i / threadsPerBlock; - int inblock = i % threadsPerBlock; - - if (!(remv[nblock] & (1ULL << inblock))) { - keep_out[num_to_keep++] = i; - unsigned long long *p = &mask_host[0] + i * col_blocks; - for (int j = nblock; j < col_blocks; j++) { - remv[j] |= p[j]; - } - } - } - - THCudaFree(state, mask_dev); - // TODO improve this part - return order_t.index({ - keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to( - order_t.device(), keep.scalar_type())}); -} diff --git a/mmdet/ops/nms/src/nms_ext.cpp b/mmdet/ops/nms/src/nms_ext.cpp deleted file mode 100644 index 2a4402eeb17..00000000000 --- a/mmdet/ops/nms/src/nms_ext.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// Modified from https://github.com/bharatsingh430/soft-nms/blob/master/lib/nms/cpu_nms.pyx, Soft-NMS is added -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -#include - -at::Tensor nms_cpu(const at::Tensor& dets, const float threshold); - -at::Tensor soft_nms_cpu(const at::Tensor& dets, const float threshold, - const unsigned char method, const float sigma, const - float min_score); - -std::vector > nms_match_cpu(const at::Tensor& dets, const float threshold); - - -#ifdef WITH_CUDA -at::Tensor nms_cuda(const at::Tensor& dets, const float threshold); -#endif - -at::Tensor nms(const at::Tensor& dets, const float threshold){ - if (dets.device().is_cuda()) { -#ifdef WITH_CUDA - return nms_cuda(dets, threshold); -#else - AT_ERROR("nms is not compiled with GPU support"); -#endif - } - return nms_cpu(dets, threshold); -} - -at::Tensor soft_nms(const at::Tensor& dets, const float threshold, - const unsigned char method, const float sigma, const - float min_score) { - if (dets.device().is_cuda()) { - AT_ERROR("soft_nms is not implemented on GPU"); - } - return soft_nms_cpu(dets, threshold, method, sigma, min_score); -} - -std::vector > nms_match(const at::Tensor& dets, const float threshold) { - if (dets.type().is_cuda()) { - AT_ERROR("nms_match is not implemented on GPU"); - } - return nms_match_cpu(dets, threshold); -} - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("nms", &nms, "non-maximum suppression"); - m.def("soft_nms", &soft_nms, "soft non-maximum suppression"); - m.def("nms_match", &nms_match, "non-maximum suppression match"); -} diff --git a/mmdet/ops/non_local.py b/mmdet/ops/non_local.py deleted file mode 100644 index 3630eb2f73a..00000000000 --- a/mmdet/ops/non_local.py +++ /dev/null @@ -1,103 +0,0 @@ -import torch -import torch.nn as nn -from mmcv.cnn import ConvModule, constant_init, normal_init - - -class NonLocal2D(nn.Module): - """Non-local module. - - See https://arxiv.org/abs/1711.07971 for details. - - Args: - in_channels (int): Channels of the input feature map. - reduction (int): Channel reduction ratio. - use_scale (bool): Whether to scale pairwise_weight by 1/inter_channels. - conv_cfg (dict): The config dict for convolution layers. - (only applicable to conv_out) - norm_cfg (dict): The config dict for normalization layers. - (only applicable to conv_out) - mode (str): Options are `embedded_gaussian` and `dot_product`. - """ - - def __init__(self, - in_channels, - reduction=2, - use_scale=True, - conv_cfg=None, - norm_cfg=None, - mode='embedded_gaussian'): - super(NonLocal2D, self).__init__() - self.in_channels = in_channels - self.reduction = reduction - self.use_scale = use_scale - self.inter_channels = in_channels // reduction - self.mode = mode - assert mode in ['embedded_gaussian', 'dot_product'] - - # g, theta, phi are actually `nn.Conv2d`. Here we use ConvModule for - # potential usage. - self.g = ConvModule( - self.in_channels, self.inter_channels, kernel_size=1, act_cfg=None) - self.theta = ConvModule( - self.in_channels, self.inter_channels, kernel_size=1, act_cfg=None) - self.phi = ConvModule( - self.in_channels, self.inter_channels, kernel_size=1, act_cfg=None) - self.conv_out = ConvModule( - self.inter_channels, - self.in_channels, - kernel_size=1, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=None) - - self.init_weights() - - def init_weights(self, std=0.01, zeros_init=True): - for m in [self.g, self.theta, self.phi]: - normal_init(m.conv, std=std) - if zeros_init: - constant_init(self.conv_out.conv, 0) - else: - normal_init(self.conv_out.conv, std=std) - - def embedded_gaussian(self, theta_x, phi_x): - # pairwise_weight: [N, HxW, HxW] - pairwise_weight = torch.matmul(theta_x, phi_x) - if self.use_scale: - # theta_x.shape[-1] is `self.inter_channels` - pairwise_weight /= theta_x.shape[-1]**0.5 - pairwise_weight = pairwise_weight.softmax(dim=-1) - return pairwise_weight - - def dot_product(self, theta_x, phi_x): - # pairwise_weight: [N, HxW, HxW] - pairwise_weight = torch.matmul(theta_x, phi_x) - pairwise_weight /= pairwise_weight.shape[-1] - return pairwise_weight - - def forward(self, x): - n, _, h, w = x.shape - - # g_x: [N, HxW, C] - g_x = self.g(x).view(n, self.inter_channels, -1) - g_x = g_x.permute(0, 2, 1) - - # theta_x: [N, HxW, C] - theta_x = self.theta(x).view(n, self.inter_channels, -1) - theta_x = theta_x.permute(0, 2, 1) - - # phi_x: [N, C, HxW] - phi_x = self.phi(x).view(n, self.inter_channels, -1) - - pairwise_func = getattr(self, self.mode) - # pairwise_weight: [N, HxW, HxW] - pairwise_weight = pairwise_func(theta_x, phi_x) - - # y: [N, HxW, C] - y = torch.matmul(pairwise_weight, g_x) - # y: [N, C, H, W] - y = y.permute(0, 2, 1).reshape(n, self.inter_channels, h, w) - - output = x + self.conv_out(y) - - return output diff --git a/mmdet/ops/plugin.py b/mmdet/ops/plugin.py deleted file mode 100644 index 5189d71e5a8..00000000000 --- a/mmdet/ops/plugin.py +++ /dev/null @@ -1,44 +0,0 @@ -from mmcv.cnn import ConvModule - -from .context_block import ContextBlock -from .generalized_attention import GeneralizedAttention -from .non_local import NonLocal2D - -plugin_cfg = { - # format: layer_type: (abbreviation, module) - 'ContextBlock': ('context_block', ContextBlock), - 'GeneralizedAttention': ('gen_attention_block', GeneralizedAttention), - 'NonLocal2D': ('nonlocal_block', NonLocal2D), - 'ConvModule': ('conv_block', ConvModule), -} - - -def build_plugin_layer(cfg, postfix='', **kwargs): - """Build plugin layer. - - Args: - cfg (None or dict): cfg should contain: - type (str): identify plugin layer type. - layer args: args needed to instantiate a plugin layer. - postfix (int, str): appended into norm abbreviation to - create named layer. - - Returns: - name (str): abbreviation + postfix - layer (nn.Module): created plugin layer - """ - assert isinstance(cfg, dict) and 'type' in cfg - cfg_ = cfg.copy() - - layer_type = cfg_.pop('type') - if layer_type not in plugin_cfg: - raise KeyError(f'Unrecognized plugin type {layer_type}') - else: - abbr, plugin_layer = plugin_cfg[layer_type] - - assert isinstance(postfix, (int, str)) - name = abbr + str(postfix) - - layer = plugin_layer(**kwargs, **cfg_) - - return name, layer diff --git a/mmdet/ops/point_sample.py b/mmdet/ops/point_sample.py deleted file mode 100644 index f739bca4466..00000000000 --- a/mmdet/ops/point_sample.py +++ /dev/null @@ -1,218 +0,0 @@ -# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend # noqa - -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.nn.modules.utils import _pair - - -def normalize(grid): - """Normalize input grid from [-1, 1] to [0, 1] - - Args: - grid (Tensor): The grid to be normalize, range [-1, 1]. - - Returns: - Tensor: Normalized grid, range [0, 1]. - """ - - return (grid + 1.0) / 2.0 - - -def denormalize(grid): - """Denormalize input grid from range [0, 1] to [-1, 1] - Args: - grid (Tensor): The grid to be denormalize, range [0, 1]. - - Returns: - Tensor: Denormalized grid, range [-1, 1]. - """ - - return grid * 2.0 - 1.0 - - -def generate_grid(num_grid, size, device): - """Generate regular square grid of points in [0, 1] x [0, 1] coordinate - space. - - Args: - num_grid (int): The number of grids to sample, one for each region. - size (tuple(int, int)): The side size of the regular grid. - device (torch.device): Desired device of returned tensor. - - Returns: - (torch.Tensor): A tensor of shape (num_grid, size[0]*size[1], 2) that - contains coordinates for the regular grids. - """ - - affine_trans = torch.tensor([[[1., 0., 0.], [0., 1., 0.]]], device=device) - grid = F.affine_grid( - affine_trans, torch.Size((1, 1, *size)), align_corners=False) - grid = normalize(grid) - return grid.view(1, -1, 2).expand(num_grid, -1, -1) - - -def rel_roi_point_to_abs_img_point(rois, rel_roi_points): - """Convert roi based relative point coordinates to image based absolute - point coordinates. - - Args: - rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5) - rel_roi_points (Tensor): Point coordinates inside RoI, relative to - RoI, location, range (0, 1), shape (N, P, 2) - - Returns: - Tensor: Image based absolute point coordinates, shape (N, P, 2) - """ - - with torch.no_grad(): - assert rel_roi_points.size(0) == rois.size(0) - assert rois.dim() == 2 - assert rel_roi_points.dim() == 3 - assert rel_roi_points.size(2) == 2 - # remove batch idx - if rois.size(1) == 5: - rois = rois[:, 1:] - abs_img_points = rel_roi_points.clone() - abs_img_points[:, :, 0] = abs_img_points[:, :, 0] * ( - rois[:, None, 2] - rois[:, None, 0]) - abs_img_points[:, :, 1] = abs_img_points[:, :, 1] * ( - rois[:, None, 3] - rois[:, None, 1]) - abs_img_points[:, :, 0] += rois[:, None, 0] - abs_img_points[:, :, 1] += rois[:, None, 1] - return abs_img_points - - -def abs_img_point_to_rel_img_point(abs_img_points, - img_shape, - spatial_scale=1.): - """Convert image based absolute point coordinates to image based relative - coordinates for sampling. - - Args: - abs_img_points (Tensor): Image based absolute point coordinates, - shape (N, P, 2) - img_shape (tuple): (height, width) of image or feature map. - spatial_scale (float): Scale points by this factor. Default: 1. - - Returns: - Tensor: Image based relative point coordinates for sampling, - shape (N, P, 2) - """ - - assert isinstance(img_shape, tuple) and len(img_shape) == 2 - h, w = img_shape - scale = torch.tensor([w, h], - dtype=torch.float, - device=abs_img_points.device) - scale = scale.view(1, 1, 2) - rel_img_points = abs_img_points / scale * spatial_scale - - return rel_img_points - - -def rel_roi_point_to_rel_img_point(rois, - rel_roi_points, - img_shape, - spatial_scale=1.): - """Convert roi based relative point coordinates to image based absolute - point coordinates. - - Args: - rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5) - rel_roi_points (Tensor): Point coordinates inside RoI, relative to - RoI, location, range (0, 1), shape (N, P, 2) - img_shape (tuple): (height, width) of image or feature map. - spatial_scale (float): Scale points by this factor. Default: 1. - - Returns: - Tensor: Image based relative point coordinates for sampling, - shape (N, P, 2) - """ - - abs_img_point = rel_roi_point_to_abs_img_point(rois, rel_roi_points) - rel_img_point = abs_img_point_to_rel_img_point(abs_img_point, img_shape, - spatial_scale) - - return rel_img_point - - -def point_sample(input, points, align_corners=False, **kwargs): - """A wrapper around :function:`grid_sample` to support 3D point_coords - tensors Unlike :function:`torch.nn.functional.grid_sample` it assumes - point_coords to lie inside [0, 1] x [0, 1] square. - - Args: - input (Tensor): Feature map, shape (N, C, H, W). - points (Tensor): Image based absolute point coordinates (normalized), - range [0, 1] x [0, 1], shape (N, P, 2) or (N, Hgrid, Wgrid, 2). - align_corners (bool): Whether align_corners. Default: False - - Returns: - Tensor: Features of `point` on `input`, shape (N, C, P) or - (N, C, Hgrid, Wgrid). - """ - - add_dim = False - if points.dim() == 3: - add_dim = True - points = points.unsqueeze(2) - output = F.grid_sample( - input, denormalize(points), align_corners=align_corners, **kwargs) - if add_dim: - output = output.squeeze(3) - return output - - -class SimpleRoIAlign(nn.Module): - - def __init__(self, out_size, spatial_scale, aligned=True): - """Simple RoI align in PointRend, faster than standard RoIAlign. - - Args: - out_size (tuple[int]): h, w - spatial_scale (float): scale the input boxes by this number - aligned (bool): if False, use the legacy implementation in - MMDetection, align_corners=True will be used in F.grid_sample. - If True, align the results more perfectly. - """ - - super(SimpleRoIAlign, self).__init__() - self.out_size = _pair(out_size) - self.spatial_scale = float(spatial_scale) - # to be consistent with other RoI ops - self.use_torchvision = False - self.aligned = aligned - - def forward(self, features, rois): - - num_imgs = features.size(0) - num_rois = rois.size(0) - rel_roi_points = generate_grid( - num_rois, self.out_size, device=rois.device) - - point_feats = [] - for batch_ind in range(num_imgs): - # unravel batch dim - feat = features[batch_ind].unsqueeze(0) - inds = (rois[:, 0].long() == batch_ind) - if inds.any(): - rel_img_points = rel_roi_point_to_rel_img_point( - rois[inds], rel_roi_points[inds], feat.shape[2:], - self.spatial_scale).unsqueeze(0) - point_feat = point_sample( - feat, rel_img_points, align_corners=not self.aligned) - point_feat = point_feat.squeeze(0).transpose(0, 1) - point_feats.append(point_feat) - - channels = features.size(1) - roi_feats = torch.cat(point_feats, dim=0) - roi_feats = roi_feats.reshape(num_rois, channels, *self.out_size) - - return roi_feats - - def __repr__(self): - format_str = self.__class__.__name__ - format_str += '(out_size={}, spatial_scale={}'.format( - self.out_size, self.spatial_scale) - return format_str diff --git a/mmdet/ops/roi_align/__init__.py b/mmdet/ops/roi_align/__init__.py deleted file mode 100644 index 6da98298fa5..00000000000 --- a/mmdet/ops/roi_align/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .roi_align import RoIAlign, roi_align - -__all__ = ['roi_align', 'RoIAlign'] diff --git a/mmdet/ops/roi_align/gradcheck.py b/mmdet/ops/roi_align/gradcheck.py deleted file mode 100644 index 136456b398b..00000000000 --- a/mmdet/ops/roi_align/gradcheck.py +++ /dev/null @@ -1,30 +0,0 @@ -import os.path as osp -import sys - -import numpy as np -import torch -from torch.autograd import gradcheck - -sys.path.append(osp.abspath(osp.join(__file__, '../../'))) -from roi_align import RoIAlign # noqa: E402, isort:skip - -feat_size = 15 -spatial_scale = 1.0 / 8 -img_size = feat_size / spatial_scale -num_imgs = 2 -num_rois = 20 - -batch_ind = np.random.randint(num_imgs, size=(num_rois, 1)) -rois = np.random.rand(num_rois, 4) * img_size * 0.5 -rois[:, 2:] += img_size * 0.5 -rois = np.hstack((batch_ind, rois)) - -feat = torch.randn( - num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0') -rois = torch.from_numpy(rois).float().cuda() -inputs = (feat, rois) -print('Gradcheck for roi align...') -test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3) -print(test) -test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3) -print(test) diff --git a/mmdet/ops/roi_align/roi_align.py b/mmdet/ops/roi_align/roi_align.py deleted file mode 100644 index 27be883b424..00000000000 --- a/mmdet/ops/roi_align/roi_align.py +++ /dev/null @@ -1,154 +0,0 @@ -from torch import nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable -from torch.nn.modules.utils import _pair - -from . import roi_align_ext - - -class RoIAlignFunction(Function): - - @staticmethod - def forward(ctx, - features, - rois, - out_size, - spatial_scale, - sample_num=0, - aligned=True): - out_h, out_w = _pair(out_size) - assert isinstance(out_h, int) and isinstance(out_w, int) - ctx.spatial_scale = spatial_scale - ctx.sample_num = sample_num - ctx.save_for_backward(rois) - ctx.feature_size = features.size() - ctx.aligned = aligned - - if aligned: - output = roi_align_ext.forward_v2(features, rois, spatial_scale, - out_h, out_w, sample_num, - aligned) - elif features.is_cuda: - (batch_size, num_channels, data_height, - data_width) = features.size() - num_rois = rois.size(0) - - output = features.new_zeros(num_rois, num_channels, out_h, out_w) - roi_align_ext.forward_v1(features, rois, out_h, out_w, - spatial_scale, sample_num, output) - else: - raise NotImplementedError - - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - feature_size = ctx.feature_size - spatial_scale = ctx.spatial_scale - sample_num = ctx.sample_num - rois = ctx.saved_tensors[0] - aligned = ctx.aligned - assert feature_size is not None - - batch_size, num_channels, data_height, data_width = feature_size - out_w = grad_output.size(3) - out_h = grad_output.size(2) - - grad_input = grad_rois = None - if not aligned: - if ctx.needs_input_grad[0]: - grad_input = rois.new_zeros(batch_size, num_channels, - data_height, data_width) - roi_align_ext.backward_v1(grad_output.contiguous(), rois, - out_h, out_w, spatial_scale, - sample_num, grad_input) - else: - grad_input = roi_align_ext.backward_v2(grad_output, rois, - spatial_scale, out_h, out_w, - batch_size, num_channels, - data_height, data_width, - sample_num, aligned) - - return grad_input, grad_rois, None, None, None, None - - -roi_align = RoIAlignFunction.apply - - -class RoIAlign(nn.Module): - - def __init__(self, - out_size, - spatial_scale, - sample_num=0, - use_torchvision=False, - aligned=True): - """ - Args: - out_size (tuple): h, w - spatial_scale (float): scale the input boxes by this number - sample_num (int): number of inputs samples to take for each - output sample. 2 to take samples densely for current models. - use_torchvision (bool): whether to use roi_align from torchvision - aligned (bool): if False, use the legacy implementation in - MMDetection. If True, align the results more perfectly. - - Note: - The implementation of RoIAlign when aligned=True is modified from - https://github.com/facebookresearch/detectron2/ - - The meaning of aligned=True: - - Given a continuous coordinate c, its two neighboring pixel - indices (in our pixel model) are computed by floor(c - 0.5) and - ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete - indices [0] and [1] (which are sampled from the underlying signal - at continuous coordinates 0.5 and 1.5). But the original roi_align - (aligned=False) does not subtract the 0.5 when computing - neighboring pixel indices and therefore it uses pixels with a - slightly incorrect alignment (relative to our pixel model) when - performing bilinear interpolation. - - With `aligned=True`, - we first appropriately scale the ROI and then shift it by -0.5 - prior to calling roi_align. This produces the correct neighbors; - - The difference does not make a difference to the model's - performance if ROIAlign is used together with conv layers. - """ - super(RoIAlign, self).__init__() - self.out_size = _pair(out_size) - self.spatial_scale = float(spatial_scale) - self.aligned = aligned - self.sample_num = int(sample_num) - self.use_torchvision = use_torchvision - assert not (use_torchvision and - aligned), 'Torchvision does not support aligned RoIAlgin' - - def forward(self, features, rois): - """ - Args: - features: NCHW images - rois: Bx5 boxes. First column is the index into N. The other 4 - columns are xyxy. - """ - assert rois.dim() == 2 and rois.size(1) == 5 - - if self.use_torchvision: - from torchvision.ops import roi_align as tv_roi_align - return tv_roi_align(features, rois, self.out_size, - self.spatial_scale, self.sample_num) - else: - return roi_align(features, rois, self.out_size, self.spatial_scale, - self.sample_num, self.aligned) - - def __repr__(self): - indent_str = '\n ' - format_str = self.__class__.__name__ - format_str += f'({indent_str}out_size={self.out_size},' - format_str += f'{indent_str}spatial_scale={self.spatial_scale},' - format_str += f'{indent_str}sample_num={self.sample_num},' - format_str += f'{indent_str}use_torchvision={self.use_torchvision},' - format_str += f'{indent_str}aligned={self.aligned})' - return format_str diff --git a/mmdet/ops/roi_align/src/cpu/roi_align_v2.cpp b/mmdet/ops/roi_align/src/cpu/roi_align_v2.cpp deleted file mode 100644 index 9e01fe17da0..00000000000 --- a/mmdet/ops/roi_align/src/cpu/roi_align_v2.cpp +++ /dev/null @@ -1,404 +0,0 @@ -// Modified from -// https://github.com/facebookresearch/detectron2/tree/master/detectron2/layers/csrc/ROIAlign -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -#include -#include - -// implementation taken from Caffe2 -template -struct PreCalc { - int pos1; - int pos2; - int pos3; - int pos4; - T w1; - T w2; - T w3; - T w4; -}; - -template -void pre_calc_for_bilinear_interpolate( - const int height, const int width, const int pooled_height, - const int pooled_width, const int iy_upper, const int ix_upper, - T roi_start_h, T roi_start_w, T bin_size_h, T bin_size_w, - int roi_bin_grid_h, int roi_bin_grid_w, std::vector>& pre_calc) { - int pre_calc_index = 0; - for (int ph = 0; ph < pooled_height; ph++) { - for (int pw = 0; pw < pooled_width; pw++) { - for (int iy = 0; iy < iy_upper; iy++) { - const T yy = roi_start_h + ph * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < ix_upper; ix++) { - const T xx = roi_start_w + pw * bin_size_w + - static_cast(ix + .5f) * bin_size_w / - static_cast(roi_bin_grid_w); - - T x = xx; - T y = yy; - // deal with: inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - // empty - PreCalc pc; - pc.pos1 = 0; - pc.pos2 = 0; - pc.pos3 = 0; - pc.pos4 = 0; - pc.w1 = 0; - pc.w2 = 0; - pc.w3 = 0; - pc.w4 = 0; - pre_calc[pre_calc_index] = pc; - pre_calc_index += 1; - continue; - } - - if (y <= 0) { - y = 0; - } - if (x <= 0) { - x = 0; - } - - int y_low = (int)y; - int x_low = (int)x; - int y_high; - int x_high; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T)y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T)x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - // save weights and indices - PreCalc pc; - pc.pos1 = y_low * width + x_low; - pc.pos2 = y_low * width + x_high; - pc.pos3 = y_high * width + x_low; - pc.pos4 = y_high * width + x_high; - pc.w1 = w1; - pc.w2 = w2; - pc.w3 = w3; - pc.w4 = w4; - pre_calc[pre_calc_index] = pc; - - pre_calc_index += 1; - } - } - } - } -} - -template -void ROIAlignForward(const int nthreads, const T* input, const T& spatial_scale, - const int channels, const int height, const int width, - const int pooled_height, const int pooled_width, - const int sampling_ratio, const T* rois, T* output, - bool aligned) { - int n_rois = nthreads / channels / pooled_width / pooled_height; - // (n, c, ph, pw) is an element in the pooled output - // can be parallelized using omp - // #pragma omp parallel for num_threads(32) - for (int n = 0; n < n_rois; n++) { - int index_n = n * channels * pooled_width * pooled_height; - - const T* offset_rois = rois + n * 5; - int roi_batch_ind = offset_rois[0]; - - // Do not use rounding; this implementation detail is critical - T offset = aligned ? (T)0.5 : (T)0.0; - T roi_start_w = offset_rois[1] * spatial_scale - offset; - T roi_start_h = offset_rois[2] * spatial_scale - offset; - T roi_end_w = offset_rois[3] * spatial_scale - offset; - T roi_end_h = offset_rois[4] * spatial_scale - offset; - - T roi_width = roi_end_w - roi_start_w; - T roi_height = roi_end_h - roi_start_h; - if (aligned) { - AT_ASSERTM(roi_width >= 0 && roi_height >= 0, - "ROIs in ROIAlign cannot have non-negative size!"); - } else { // for backward-compatibility only - roi_width = std::max(roi_width, (T)1.); - roi_height = std::max(roi_height, (T)1.); - } - T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) - ? sampling_ratio - : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = - (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // We do average (integral) pooling inside a bin - // When the grid is empty, output zeros == 0/1, instead of NaN. - const T count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 - - // we want to precalculate indices and weights shared by all channels, - // this is the key point of optimization - std::vector> pre_calc(roi_bin_grid_h * roi_bin_grid_w * - pooled_width * pooled_height); - pre_calc_for_bilinear_interpolate( - height, width, pooled_height, pooled_width, roi_bin_grid_h, - roi_bin_grid_w, roi_start_h, roi_start_w, bin_size_h, bin_size_w, - roi_bin_grid_h, roi_bin_grid_w, pre_calc); - - for (int c = 0; c < channels; c++) { - int index_n_c = index_n + c * pooled_width * pooled_height; - const T* offset_input = - input + (roi_batch_ind * channels + c) * height * width; - int pre_calc_index = 0; - - for (int ph = 0; ph < pooled_height; ph++) { - for (int pw = 0; pw < pooled_width; pw++) { - int index = index_n_c + ph * pooled_width + pw; - - T output_val = 0.; - for (int iy = 0; iy < roi_bin_grid_h; iy++) { - for (int ix = 0; ix < roi_bin_grid_w; ix++) { - PreCalc pc = pre_calc[pre_calc_index]; - output_val += pc.w1 * offset_input[pc.pos1] + - pc.w2 * offset_input[pc.pos2] + - pc.w3 * offset_input[pc.pos3] + - pc.w4 * offset_input[pc.pos4]; - - pre_calc_index += 1; - } - } - output_val /= count; - - output[index] = output_val; - } // for pw - } // for ph - } // for c - } // for n -} - -template -void bilinear_interpolate_gradient(const int height, const int width, T y, T x, - T& w1, T& w2, T& w3, T& w4, int& x_low, - int& x_high, int& y_low, int& y_high, - const int index /* index for debug only*/) { - // deal with cases that inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - // empty - w1 = w2 = w3 = w4 = 0.; - x_low = x_high = y_low = y_high = -1; - return; - } - - if (y <= 0) y = 0; - if (x <= 0) x = 0; - - y_low = (int)y; - x_low = (int)x; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T)y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T)x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - - // reference in forward - // T v1 = input[y_low * width + x_low]; - // T v2 = input[y_low * width + x_high]; - // T v3 = input[y_high * width + x_low]; - // T v4 = input[y_high * width + x_high]; - // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - - w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - return; -} - -template -inline void add(T* address, const T& val) { - *address += val; -} - -template -void ROIAlignBackward(const int nthreads, const T* grad_output, - const T& spatial_scale, const int channels, - const int height, const int width, - const int pooled_height, const int pooled_width, - const int sampling_ratio, T* grad_input, const T* rois, - const int n_stride, const int c_stride, - const int h_stride, const int w_stride, bool aligned) { - for (int index = 0; index < nthreads; index++) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - const T* offset_rois = rois + n * 5; - int roi_batch_ind = offset_rois[0]; - - // Do not use rounding; this implementation detail is critical - T offset = aligned ? (T)0.5 : (T)0.0; - T roi_start_w = offset_rois[1] * spatial_scale - offset; - T roi_start_h = offset_rois[2] * spatial_scale - offset; - T roi_end_w = offset_rois[3] * spatial_scale - offset; - T roi_end_h = offset_rois[4] * spatial_scale - offset; - - T roi_width = roi_end_w - roi_start_w; - T roi_height = roi_end_h - roi_start_h; - if (aligned) { - AT_ASSERTM(roi_width >= 0 && roi_height >= 0, - "ROIs in ROIAlign do not have non-negative size!"); - } else { // for backward-compatibility only - roi_width = std::max(roi_width, (T)1.); - roi_height = std::max(roi_height, (T)1.); - } - T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - T* offset_grad_input = - grad_input + ((roi_batch_ind * channels + c) * height * width); - - int output_offset = n * n_stride + c * c_stride; - const T* offset_grad_output = grad_output + output_offset; - const T grad_output_this_bin = - offset_grad_output[ph * h_stride + pw * w_stride]; - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) - ? sampling_ratio - : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = - (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // We do average (integral) pooling inside a bin - const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 - - for (int iy = 0; iy < roi_bin_grid_h; iy++) { - const T y = roi_start_h + ph * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < roi_bin_grid_w; ix++) { - const T x = roi_start_w + pw * bin_size_w + - static_cast(ix + .5f) * bin_size_w / - static_cast(roi_bin_grid_w); - - T w1, w2, w3, w4; - int x_low, x_high, y_low, y_high; - - bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4, - x_low, x_high, y_low, y_high, index); - - T g1 = grad_output_this_bin * w1 / count; - T g2 = grad_output_this_bin * w2 / count; - T g3 = grad_output_this_bin * w3 / count; - T g4 = grad_output_this_bin * w4 / count; - - if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { - // atomic add is not needed for now since it is single threaded - add(offset_grad_input + y_low * width + x_low, static_cast(g1)); - add(offset_grad_input + y_low * width + x_high, static_cast(g2)); - add(offset_grad_input + y_high * width + x_low, static_cast(g3)); - add(offset_grad_input + y_high * width + x_high, static_cast(g4)); - } // if - } // ix - } // iy - } // for -} // ROIAlignBackward - -at::Tensor ROIAlignForwardV2CPULaucher(const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, bool aligned) { - AT_ASSERTM(input.device().is_cpu(), "input must be a CPU tensor"); - AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor"); - - at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; - - at::CheckedFrom c = "ROIAlignForwardV2CPULaucher"; - at::checkAllSameType(c, {input_t, rois_t}); - - auto num_rois = rois.size(0); - auto channels = input.size(1); - auto height = input.size(2); - auto width = input.size(3); - - at::Tensor output = at::zeros( - {num_rois, channels, pooled_height, pooled_width}, input.options()); - - auto output_size = num_rois * pooled_height * pooled_width * channels; - - if (output.numel() == 0) return output; - - AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "ROIAlign_forward", [&] { - ROIAlignForward( - output_size, input.contiguous().data_ptr(), spatial_scale, - channels, height, width, pooled_height, pooled_width, sampling_ratio, - rois.contiguous().data_ptr(), output.data_ptr(), aligned); - }); - return output; -} - -at::Tensor ROIAlignBackwardV2CPULaucher( - const at::Tensor& grad, const at::Tensor& rois, const float spatial_scale, - const int pooled_height, const int pooled_width, const int batch_size, - const int channels, const int height, const int width, - const int sampling_ratio, bool aligned) { - AT_ASSERTM(grad.device().is_cpu(), "grad must be a CPU tensor"); - AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor"); - - at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}; - - at::CheckedFrom c = "ROIAlignBackwardV2CPULaucher"; - at::checkAllSameType(c, {grad_t, rois_t}); - - at::Tensor grad_input = - at::zeros({batch_size, channels, height, width}, grad.options()); - - // handle possibly empty gradients - if (grad.numel() == 0) { - return grad_input; - } - - // get stride values to ensure indexing into gradients is correct. - int n_stride = grad.stride(0); - int c_stride = grad.stride(1); - int h_stride = grad.stride(2); - int w_stride = grad.stride(3); - - AT_DISPATCH_FLOATING_TYPES_AND_HALF(grad.scalar_type(), "ROIAlign_backward", [&] { - ROIAlignBackward( - grad.numel(), grad.contiguous().data_ptr(), spatial_scale, - channels, height, width, pooled_height, pooled_width, sampling_ratio, - grad_input.data_ptr(), rois.contiguous().data_ptr(), - n_stride, c_stride, h_stride, w_stride, aligned); - }); - return grad_input; -} diff --git a/mmdet/ops/roi_align/src/cuda/roi_align_kernel.cu b/mmdet/ops/roi_align/src/cuda/roi_align_kernel.cu deleted file mode 100644 index 7afa33229d8..00000000000 --- a/mmdet/ops/roi_align/src/cuda/roi_align_kernel.cu +++ /dev/null @@ -1,283 +0,0 @@ -#include -#include -#include - -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ - i += blockDim.x * gridDim.x) - -#define THREADS_PER_BLOCK 1024 - -inline int GET_BLOCKS(const int N) { - int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - int max_block_num = 65000; - return min(optimal_block_num, max_block_num); -} - -template -__device__ scalar_t bilinear_interpolate(const scalar_t *bottom_data, - const int height, const int width, - scalar_t y, scalar_t x) { - // deal with cases that inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - return 0; - } - - if (y <= 0) y = 0; - if (x <= 0) x = 0; - - int y_low = (int)y; - int x_low = (int)x; - int y_high; - int x_high; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (scalar_t)y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (scalar_t)x_low; - } else { - x_high = x_low + 1; - } - - scalar_t ly = y - y_low; - scalar_t lx = x - x_low; - scalar_t hy = 1. - ly; - scalar_t hx = 1. - lx; - // do bilinear interpolation - scalar_t lt = bottom_data[y_low * width + x_low]; - scalar_t rt = bottom_data[y_low * width + x_high]; - scalar_t lb = bottom_data[y_high * width + x_low]; - scalar_t rb = bottom_data[y_high * width + x_high]; - scalar_t w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - scalar_t val = (w1 * lt + w2 * rt + w3 * lb + w4 * rb); - - return val; -} - -template -__global__ void ROIAlignForwardV1( - const int nthreads, const scalar_t *bottom_data, - const scalar_t *bottom_rois, const scalar_t spatial_scale, - const int sample_num, const int channels, const int height, const int width, - const int pooled_height, const int pooled_width, scalar_t *top_data) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the aligned output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - const scalar_t *offset_bottom_rois = bottom_rois + n * 5; - int roi_batch_ind = offset_bottom_rois[0]; - scalar_t roi_start_w = offset_bottom_rois[1] * spatial_scale; - scalar_t roi_start_h = offset_bottom_rois[2] * spatial_scale; - scalar_t roi_end_w = (offset_bottom_rois[3] + 1) * spatial_scale; - scalar_t roi_end_h = (offset_bottom_rois[4] + 1) * spatial_scale; - - // Force malformed ROIs to be 1x1 - scalar_t roi_width = fmaxf((scalar_t)roi_end_w - roi_start_w, 0.); - scalar_t roi_height = fmaxf((scalar_t)roi_end_h - roi_start_h, 0.); - - scalar_t bin_size_h = roi_height / pooled_height; - scalar_t bin_size_w = roi_width / pooled_width; - - const scalar_t *offset_bottom_data = - bottom_data + (roi_batch_ind * channels + c) * height * width; - - int sample_num_h = (sample_num > 0) - ? sample_num - : ceil(roi_height / pooled_height); // e.g., = 2 - int sample_num_w = - (sample_num > 0) ? sample_num : ceil(roi_width / pooled_width); - - scalar_t output_val = 0; - for (int iy = 0; iy < sample_num_h; iy++) { - const scalar_t y = roi_start_h + ph * bin_size_h + - (scalar_t)(iy + scalar_t(.5f)) * bin_size_h / - (scalar_t)(sample_num_h); - for (int ix = 0; ix < sample_num_w; ix++) { - const scalar_t x = roi_start_w + pw * bin_size_w + - (scalar_t)(ix + scalar_t(.5f)) * bin_size_w / - (scalar_t)(sample_num_w); - scalar_t val = bilinear_interpolate(offset_bottom_data, - height, width, y, x); - output_val += val; - } - } - output_val /= (sample_num_h * sample_num_w); - top_data[index] = output_val; - } -} - -int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, - const float spatial_scale, const int sample_num, - const int channels, const int height, - const int width, const int num_rois, - const int pooled_height, const int pooled_width, - at::Tensor output) { - const int output_size = num_rois * pooled_height * pooled_width * channels; - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - features.scalar_type(), "ROIAlignLaucherForward", ([&] { - const scalar_t *bottom_data = features.data_ptr(); - const scalar_t *rois_data = rois.data_ptr(); - scalar_t *top_data = output.data_ptr(); - - ROIAlignForwardV1 - <<>>( - output_size, bottom_data, rois_data, scalar_t(spatial_scale), - sample_num, channels, height, width, pooled_height, - pooled_width, top_data); - })); - THCudaCheck(cudaGetLastError()); - return 1; -} - -template -__device__ void bilinear_interpolate_gradient(const int height, const int width, - scalar_t y, scalar_t x, - scalar_t &w1, scalar_t &w2, - scalar_t &w3, scalar_t &w4, - int &x_low, int &x_high, - int &y_low, int &y_high) { - // deal with cases that inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - w1 = w2 = w3 = w4 = 0.; - x_low = x_high = y_low = y_high = -1; - return; - } - - if (y <= 0) y = 0; - if (x <= 0) x = 0; - - y_low = (int)y; - x_low = (int)x; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (scalar_t)y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (scalar_t)x_low; - } else { - x_high = x_low + 1; - } - - scalar_t ly = y - y_low; - scalar_t lx = x - x_low; - scalar_t hy = 1. - ly; - scalar_t hx = 1. - lx; - - w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - return; -} - -template -__global__ void ROIAlignBackwardV1( - const int nthreads, const scalar_t *top_diff, const scalar_t *bottom_rois, - const scalar_t spatial_scale, const int sample_num, const int channels, - const int height, const int width, const int pooled_height, - const int pooled_width, scalar_t *bottom_diff) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the aligned output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - const scalar_t *offset_bottom_rois = bottom_rois + n * 5; - int roi_batch_ind = offset_bottom_rois[0]; - scalar_t roi_start_w = offset_bottom_rois[1] * spatial_scale; - scalar_t roi_start_h = offset_bottom_rois[2] * spatial_scale; - scalar_t roi_end_w = (offset_bottom_rois[3] + 1) * spatial_scale; - scalar_t roi_end_h = (offset_bottom_rois[4] + 1) * spatial_scale; - - // Force malformed ROIs to be 1x1 - scalar_t roi_width = fmaxf((scalar_t)roi_end_w - roi_start_w, 0.); - scalar_t roi_height = fmaxf((scalar_t)roi_end_h - roi_start_h, 0.); - - scalar_t bin_size_h = roi_height / pooled_height; - scalar_t bin_size_w = roi_width / pooled_width; - - scalar_t *offset_bottom_diff = - bottom_diff + (roi_batch_ind * channels + c) * height * width; - int offset_top = (n * channels + c) * pooled_height * pooled_width + - ph * pooled_width + pw; - scalar_t offset_top_diff = top_diff[offset_top]; - - int sample_num_h = (sample_num > 0) - ? sample_num - : ceil(roi_height / pooled_height); // e.g., = 2 - int sample_num_w = - (sample_num > 0) ? sample_num : ceil(roi_width / pooled_width); - - const scalar_t count = (scalar_t)(sample_num_h * sample_num_w); - - for (int iy = 0; iy < sample_num_h; iy++) { - const scalar_t y = - roi_start_h + ph * bin_size_h + - (scalar_t)(iy + .5f) * bin_size_h / (scalar_t)(sample_num_h); - for (int ix = 0; ix < sample_num_w; ix++) { - const scalar_t x = - roi_start_w + pw * bin_size_w + - (scalar_t)(ix + .5f) * bin_size_w / (scalar_t)(sample_num_w); - scalar_t w1, w2, w3, w4; - int x_low, x_high, y_low, y_high; - - bilinear_interpolate_gradient( - height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high); - scalar_t g1 = offset_top_diff * w1 / count; - scalar_t g2 = offset_top_diff * w2 / count; - scalar_t g3 = offset_top_diff * w3 / count; - scalar_t g4 = offset_top_diff * w4 / count; - if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { - atomicAdd(offset_bottom_diff + y_low * width + x_low, g1); - atomicAdd(offset_bottom_diff + y_low * width + x_high, g2); - atomicAdd(offset_bottom_diff + y_high * width + x_low, g3); - atomicAdd(offset_bottom_diff + y_high * width + x_high, g4); - } - } - } - } -} - -int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, - const float spatial_scale, const int sample_num, - const int channels, const int height, - const int width, const int num_rois, - const int pooled_height, const int pooled_width, - at::Tensor bottom_grad) { - const int output_size = num_rois * pooled_height * pooled_width * channels; - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - top_grad.scalar_type(), "ROIAlignLaucherBackward", ([&] { - const scalar_t *top_diff = top_grad.data_ptr(); - const scalar_t *rois_data = rois.data_ptr(); - scalar_t *bottom_diff = bottom_grad.data_ptr(); - if (sizeof(scalar_t) == sizeof(double)) { - fprintf(stderr, "double is not supported\n"); - exit(-1); - } - - ROIAlignBackwardV1 - <<>>( - output_size, top_diff, rois_data, spatial_scale, sample_num, - channels, height, width, pooled_height, pooled_width, - bottom_diff); - })); - THCudaCheck(cudaGetLastError()); - return 1; -} diff --git a/mmdet/ops/roi_align/src/cuda/roi_align_kernel_v2.cu b/mmdet/ops/roi_align/src/cuda/roi_align_kernel_v2.cu deleted file mode 100644 index 0189323cd1e..00000000000 --- a/mmdet/ops/roi_align/src/cuda/roi_align_kernel_v2.cu +++ /dev/null @@ -1,348 +0,0 @@ -// Modified from -// https://github.com/facebookresearch/detectron2/tree/master/detectron2/layers/csrc/ROIAlign -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -#include -#include -#include -#include - -// TODO make it in a common file -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ - i += blockDim.x * gridDim.x) - -template -__device__ T bilinear_interpolate(const T* bottom_data, const int height, - const int width, T y, T x, - const int index /* index for debug only*/) { - // deal with cases that inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - // empty - return 0; - } - - if (y <= 0) y = 0; - if (x <= 0) x = 0; - - int y_low = (int)y; - int x_low = (int)x; - int y_high; - int x_high; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T)y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T)x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - // do bilinear interpolation - T v1 = bottom_data[y_low * width + x_low]; - T v2 = bottom_data[y_low * width + x_high]; - T v3 = bottom_data[y_high * width + x_low]; - T v4 = bottom_data[y_high * width + x_high]; - T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - - return val; -} - -template -__global__ void RoIAlignForwardV2( - const int nthreads, const T* bottom_data, const T spatial_scale, - const int channels, const int height, const int width, - const int pooled_height, const int pooled_width, const int sampling_ratio, - const T* bottom_rois, T* top_data, bool aligned) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - const T* offset_bottom_rois = bottom_rois + n * 5; - int roi_batch_ind = offset_bottom_rois[0]; - - // Do not use rounding; this implementation detail is critical - T offset = aligned ? (T)0.5 : (T)0.0; - T roi_start_w = offset_bottom_rois[1] * spatial_scale - offset; - T roi_start_h = offset_bottom_rois[2] * spatial_scale - offset; - T roi_end_w = offset_bottom_rois[3] * spatial_scale - offset; - T roi_end_h = offset_bottom_rois[4] * spatial_scale - offset; - - T roi_width = roi_end_w - roi_start_w; - T roi_height = roi_end_h - roi_start_h; - if (!aligned) { // for backward-compatibility only - roi_width = max(roi_width, (T)1.); - roi_height = max(roi_height, (T)1.); - } - T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - const T* offset_bottom_data = - bottom_data + (roi_batch_ind * channels + c) * height * width; - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) - ? sampling_ratio - : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = - (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // We do average (integral) pooling inside a bin - // When the grid is empty, output zeros. - const T count = max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 - - T output_val = 0.; - for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1 - { - const T y = roi_start_h + ph * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < roi_bin_grid_w; ix++) { - const T x = roi_start_w + pw * bin_size_w + - static_cast(ix + .5f) * bin_size_w / - static_cast(roi_bin_grid_w); - - T val = bilinear_interpolate(offset_bottom_data, height, width, y, x, - index); - output_val += val; - } - } - output_val /= count; - - top_data[index] = output_val; - } -} - -template -__device__ void bilinear_interpolate_gradient( - const int height, const int width, T y, T x, T& w1, T& w2, T& w3, T& w4, - int& x_low, int& x_high, int& y_low, int& y_high, - const int index /* index for debug only*/) { - // deal with cases that inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - // empty - w1 = w2 = w3 = w4 = 0.; - x_low = x_high = y_low = y_high = -1; - return; - } - - if (y <= 0) y = 0; - if (x <= 0) x = 0; - - y_low = (int)y; - x_low = (int)x; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T)y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T)x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - - // reference in forward - // T v1 = bottom_data[y_low * width + x_low]; - // T v2 = bottom_data[y_low * width + x_high]; - // T v3 = bottom_data[y_high * width + x_low]; - // T v4 = bottom_data[y_high * width + x_high]; - // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - - w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - return; -} - -template -__global__ void RoIAlignBackwardFeatureV2( - const int nthreads, const T* top_diff, const int num_rois, - const T spatial_scale, const int channels, const int height, - const int width, const int pooled_height, const int pooled_width, - const int sampling_ratio, T* bottom_diff, const T* bottom_rois, - bool aligned) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - const T* offset_bottom_rois = bottom_rois + n * 5; - int roi_batch_ind = offset_bottom_rois[0]; - - // Do not use rounding; this implementation detail is critical - T offset = aligned ? (T)0.5 : (T)0.0; - T roi_start_w = offset_bottom_rois[1] * spatial_scale - offset; - T roi_start_h = offset_bottom_rois[2] * spatial_scale - offset; - T roi_end_w = offset_bottom_rois[3] * spatial_scale - offset; - T roi_end_h = offset_bottom_rois[4] * spatial_scale - offset; - - T roi_width = roi_end_w - roi_start_w; - T roi_height = roi_end_h - roi_start_h; - if (!aligned) { // for backward-compatibility only - roi_width = max(roi_width, (T)1.); - roi_height = max(roi_height, (T)1.); - } - T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - T* offset_bottom_diff = - bottom_diff + (roi_batch_ind * channels + c) * height * width; - - int top_offset = (n * channels + c) * pooled_height * pooled_width; - const T* offset_top_diff = top_diff + top_offset; - const T top_diff_this_bin = offset_top_diff[ph * pooled_width + pw]; - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) - ? sampling_ratio - : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = - (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // We do average (integral) pooling inside a bin - const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 - - for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1 - { - const T y = roi_start_h + ph * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < roi_bin_grid_w; ix++) { - const T x = roi_start_w + pw * bin_size_w + - static_cast(ix + .5f) * bin_size_w / - static_cast(roi_bin_grid_w); - - T w1, w2, w3, w4; - int x_low, x_high, y_low, y_high; - - bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4, - x_low, x_high, y_low, y_high, index); - - T g1 = top_diff_this_bin * w1 / count; - T g2 = top_diff_this_bin * w2 / count; - T g3 = top_diff_this_bin * w3 / count; - T g4 = top_diff_this_bin * w4 / count; - - if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { - atomicAdd(offset_bottom_diff + y_low * width + x_low, - static_cast(g1)); - atomicAdd(offset_bottom_diff + y_low * width + x_high, - static_cast(g2)); - atomicAdd(offset_bottom_diff + y_high * width + x_low, - static_cast(g3)); - atomicAdd(offset_bottom_diff + y_high * width + x_high, - static_cast(g4)); - } // if - } // ix - } // iy - } // CUDA_1D_KERNEL_LOOP -} // RoIAlignBackward - -at::Tensor ROIAlignForwardV2Laucher(const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, bool aligned) { - AT_ASSERTM(input.device().is_cuda(), "input must be a CUDA tensor"); - AT_ASSERTM(rois.device().is_cuda(), "rois must be a CUDA tensor"); - at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; - - at::CheckedFrom c = "ROIAlign_forward_cuda"; - at::checkAllSameGPU(c, {input_t, rois_t}); - at::checkAllSameType(c, {input_t, rois_t}); - at::cuda::CUDAGuard device_guard(input.device()); - - auto num_rois = rois.size(0); - auto channels = input.size(1); - auto height = input.size(2); - auto width = input.size(3); - - auto output = at::empty({num_rois, channels, pooled_height, pooled_width}, - input.options()); - auto output_size = num_rois * pooled_height * pooled_width * channels; - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - dim3 grid(std::min(at::cuda::ATenCeilDiv(static_cast(output_size), static_cast(512)), static_cast(4096))); - dim3 block(512); - - if (output.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return output; - } - - AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "ROIAlign_forward", [&] { - RoIAlignForwardV2<<>>( - output_size, input.contiguous().data_ptr(), spatial_scale, - channels, height, width, pooled_height, pooled_width, sampling_ratio, - rois.contiguous().data_ptr(), output.data_ptr(), aligned); - }); - cudaDeviceSynchronize(); - AT_CUDA_CHECK(cudaGetLastError()); - return output; -} - -// TODO remove the dependency on input and use instead its sizes -> save memory -at::Tensor ROIAlignBackwardV2Laucher( - const at::Tensor& grad, const at::Tensor& rois, const float spatial_scale, - const int pooled_height, const int pooled_width, const int batch_size, - const int channels, const int height, const int width, - const int sampling_ratio, bool aligned) { - AT_ASSERTM(grad.device().is_cuda(), "grad must be a CUDA tensor"); - AT_ASSERTM(rois.device().is_cuda(), "rois must be a CUDA tensor"); - - at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}; - at::CheckedFrom c = "ROIAlign_backward_cuda"; - at::checkAllSameGPU(c, {grad_t, rois_t}); - at::checkAllSameType(c, {grad_t, rois_t}); - at::cuda::CUDAGuard device_guard(grad.device()); - - auto num_rois = rois.size(0); - auto grad_input = - at::zeros({batch_size, channels, height, width}, grad.options()); - - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - dim3 grid(std::min(at::cuda::ATenCeilDiv(static_cast(grad.numel()), static_cast(512)), static_cast(4096))); - dim3 block(512); - - // handle possibly empty gradients - if (grad.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return grad_input; - } - - AT_DISPATCH_FLOATING_TYPES(grad.scalar_type(), "ROIAlign_backward", [&] { - RoIAlignBackwardFeatureV2<<>>( - grad.numel(), grad.contiguous().data_ptr(), num_rois, - spatial_scale, channels, height, width, pooled_height, pooled_width, - sampling_ratio, grad_input.data_ptr(), - rois.contiguous().data_ptr(), aligned); - }); - AT_CUDA_CHECK(cudaGetLastError()); - return grad_input; -} diff --git a/mmdet/ops/roi_align/src/roi_align_ext.cpp b/mmdet/ops/roi_align/src/roi_align_ext.cpp deleted file mode 100644 index 18add01bba2..00000000000 --- a/mmdet/ops/roi_align/src/roi_align_ext.cpp +++ /dev/null @@ -1,168 +0,0 @@ -#include -#include - -#include -#include - -#ifdef WITH_CUDA -int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, - const float spatial_scale, const int sample_num, - const int channels, const int height, - const int width, const int num_rois, - const int pooled_height, const int pooled_width, - at::Tensor output); - -int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, - const float spatial_scale, const int sample_num, - const int channels, const int height, - const int width, const int num_rois, - const int pooled_height, const int pooled_width, - at::Tensor bottom_grad); - -at::Tensor ROIAlignForwardV2Laucher(const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, bool aligned); - -at::Tensor ROIAlignBackwardV2Laucher( - const at::Tensor& grad, const at::Tensor& rois, const float spatial_scale, - const int pooled_height, const int pooled_width, const int batch_size, - const int channels, const int height, const int width, - const int sampling_ratio, bool aligned); -#endif - -at::Tensor ROIAlignForwardV2CPULaucher(const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, bool aligned); - -at::Tensor ROIAlignBackwardV2CPULaucher( - const at::Tensor& grad, const at::Tensor& rois, const float spatial_scale, - const int pooled_height, const int pooled_width, const int batch_size, - const int channels, const int height, const int width, - const int sampling_ratio, bool aligned); - -#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ") -#define CHECK_CONTIGUOUS(x) \ - TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") -#define CHECK_INPUT(x) \ - CHECK_CUDA(x); \ - CHECK_CONTIGUOUS(x) - -int ROIAlign_forwardV1(at::Tensor features, at::Tensor rois, int pooled_height, - int pooled_width, float spatial_scale, int sample_num, - at::Tensor output) { - if (features.device().is_cuda()) { -#ifdef WITH_CUDA - CHECK_INPUT(features); - CHECK_INPUT(rois); - CHECK_INPUT(output); - at::DeviceGuard guard(features.device()); - - // Number of ROIs - int num_rois = rois.size(0); - int size_rois = rois.size(1); - - if (size_rois != 5) { - printf("wrong roi size\n"); - return 0; - } - - int num_channels = features.size(1); - int data_height = features.size(2); - int data_width = features.size(3); - - ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num, - num_channels, data_height, data_width, num_rois, - pooled_height, pooled_width, output); - - return 1; -#else - AT_ERROR("ROIAlign is not compiled with GPU support"); -#endif - } - AT_ERROR("ROIAlign is not implemented on CPU"); -} - -int ROIAlign_backwardV1(at::Tensor top_grad, at::Tensor rois, int pooled_height, - int pooled_width, float spatial_scale, int sample_num, - at::Tensor bottom_grad) { - if (top_grad.device().is_cuda()) { -#ifdef WITH_CUDA - CHECK_INPUT(top_grad); - CHECK_INPUT(rois); - CHECK_INPUT(bottom_grad); - at::DeviceGuard guard(top_grad.device()); - - // Number of ROIs - int num_rois = rois.size(0); - int size_rois = rois.size(1); - if (size_rois != 5) { - printf("wrong roi size\n"); - return 0; - } - - int num_channels = bottom_grad.size(1); - int data_height = bottom_grad.size(2); - int data_width = bottom_grad.size(3); - - ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num, - num_channels, data_height, data_width, num_rois, - pooled_height, pooled_width, bottom_grad); - - return 1; -#else - AT_ERROR("ROIAlign is not compiled with GPU support"); -#endif - } - AT_ERROR("ROIAlign is not implemented on CPU"); -} - -// Interface for Python -inline at::Tensor ROIAlign_forwardV2(const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, bool aligned) { - if (input.device().is_cuda()) { -#ifdef WITH_CUDA - return ROIAlignForwardV2Laucher(input, rois, spatial_scale, pooled_height, - pooled_width, sampling_ratio, aligned); -#else - AT_ERROR("ROIAlignV2 is not compiled with GPU support"); -#endif - } - return ROIAlignForwardV2CPULaucher(input, rois, spatial_scale, pooled_height, - pooled_width, sampling_ratio, aligned); -} - -inline at::Tensor ROIAlign_backwardV2( - const at::Tensor& grad, const at::Tensor& rois, const float spatial_scale, - const int pooled_height, const int pooled_width, const int batch_size, - const int channels, const int height, const int width, - const int sampling_ratio, bool aligned) { - if (grad.device().is_cuda()) { -#ifdef WITH_CUDA - return ROIAlignBackwardV2Laucher(grad, rois, spatial_scale, pooled_height, - pooled_width, batch_size, channels, height, - width, sampling_ratio, aligned); -#else - AT_ERROR("ROIAlignV2 is not compiled with GPU support"); -#endif - } - return ROIAlignBackwardV2CPULaucher(grad, rois, spatial_scale, pooled_height, - pooled_width, batch_size, channels, - height, width, sampling_ratio, aligned); -} - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("forward_v1", &ROIAlign_forwardV1, "Roi_Align V1 forward"); - m.def("backward_v1", &ROIAlign_backwardV1, "Roi_Align V1 backward"); - m.def("forward_v2", &ROIAlign_forwardV2, "Roi_Align V2 forward"); - m.def("backward_v2", &ROIAlign_backwardV2, "Roi_Align V2 backward"); -} diff --git a/mmdet/ops/roi_pool/__init__.py b/mmdet/ops/roi_pool/__init__.py deleted file mode 100644 index 9f0474e5939..00000000000 --- a/mmdet/ops/roi_pool/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .roi_pool import RoIPool, roi_pool - -__all__ = ['roi_pool', 'RoIPool'] diff --git a/mmdet/ops/roi_pool/gradcheck.py b/mmdet/ops/roi_pool/gradcheck.py deleted file mode 100644 index d11af790241..00000000000 --- a/mmdet/ops/roi_pool/gradcheck.py +++ /dev/null @@ -1,16 +0,0 @@ -import os.path as osp -import sys - -import torch -from torch.autograd import gradcheck - -sys.path.append(osp.abspath(osp.join(__file__, '../../'))) -from roi_pool import RoIPool # noqa: E402, isort:skip - -feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda() -rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55], - [1, 67, 40, 110, 120]]).cuda() -inputs = (feat, rois) -print('Gradcheck for roi pooling...') -test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3) -print(test) diff --git a/mmdet/ops/roi_pool/roi_pool.py b/mmdet/ops/roi_pool/roi_pool.py deleted file mode 100644 index 13c2708b333..00000000000 --- a/mmdet/ops/roi_pool/roi_pool.py +++ /dev/null @@ -1,75 +0,0 @@ -import torch -import torch.nn as nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable -from torch.nn.modules.utils import _pair - -from . import roi_pool_ext - - -class RoIPoolFunction(Function): - - @staticmethod - def forward(ctx, features, rois, out_size, spatial_scale): - assert features.is_cuda - out_h, out_w = _pair(out_size) - assert isinstance(out_h, int) and isinstance(out_w, int) - ctx.save_for_backward(rois) - num_channels = features.size(1) - num_rois = rois.size(0) - out_size = (num_rois, num_channels, out_h, out_w) - output = features.new_zeros(out_size) - argmax = features.new_zeros(out_size, dtype=torch.int) - roi_pool_ext.forward(features, rois, out_h, out_w, spatial_scale, - output, argmax) - ctx.spatial_scale = spatial_scale - ctx.feature_size = features.size() - ctx.argmax = argmax - - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - assert grad_output.is_cuda - spatial_scale = ctx.spatial_scale - feature_size = ctx.feature_size - argmax = ctx.argmax - rois = ctx.saved_tensors[0] - assert feature_size is not None - - grad_input = grad_rois = None - if ctx.needs_input_grad[0]: - grad_input = grad_output.new_zeros(feature_size) - roi_pool_ext.backward(grad_output.contiguous(), rois, argmax, - spatial_scale, grad_input) - - return grad_input, grad_rois, None, None - - -roi_pool = RoIPoolFunction.apply - - -class RoIPool(nn.Module): - - def __init__(self, out_size, spatial_scale, use_torchvision=False): - super(RoIPool, self).__init__() - - self.out_size = _pair(out_size) - self.spatial_scale = float(spatial_scale) - self.use_torchvision = use_torchvision - - def forward(self, features, rois): - if self.use_torchvision: - from torchvision.ops import roi_pool as tv_roi_pool - return tv_roi_pool(features, rois, self.out_size, - self.spatial_scale) - else: - return roi_pool(features, rois, self.out_size, self.spatial_scale) - - def __repr__(self): - format_str = self.__class__.__name__ - format_str += f'(out_size={self.out_size}, ' - format_str += f'spatial_scale={self.spatial_scale}, ' - format_str += f'use_torchvision={self.use_torchvision})' - return format_str diff --git a/mmdet/ops/roi_pool/src/cuda/roi_pool_kernel.cu b/mmdet/ops/roi_pool/src/cuda/roi_pool_kernel.cu deleted file mode 100644 index 88fab97fbb4..00000000000 --- a/mmdet/ops/roi_pool/src/cuda/roi_pool_kernel.cu +++ /dev/null @@ -1,151 +0,0 @@ -#include -#include -#include - -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ - i += blockDim.x * gridDim.x) - -#define THREADS_PER_BLOCK 1024 - -inline int GET_BLOCKS(const int N) { - int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - int max_block_num = 65000; - return min(optimal_block_num, max_block_num); -} - -template -__global__ void ROIPoolForward(const int nthreads, const scalar_t *bottom_data, - const scalar_t *rois, - const scalar_t spatial_scale, const int channels, - const int height, const int width, - const int pooled_h, const int pooled_w, - scalar_t *top_data, int *argmax_data) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_w; - int ph = (index / pooled_w) % pooled_h; - int c = (index / pooled_w / pooled_h) % channels; - int n = index / pooled_w / pooled_h / channels; - - const scalar_t *offset_rois = rois + n * 5; - int roi_batch_ind = offset_rois[0]; - // calculate the roi region on feature maps - scalar_t roi_x1 = offset_rois[1] * spatial_scale; - scalar_t roi_y1 = offset_rois[2] * spatial_scale; - scalar_t roi_x2 = (offset_rois[3] + 1) * spatial_scale; - scalar_t roi_y2 = (offset_rois[4] + 1) * spatial_scale; - - // force malformed rois to be 1x1 - scalar_t roi_w = roi_x2 - roi_x1; - scalar_t roi_h = roi_y2 - roi_y1; - if (roi_w <= 0 || roi_h <= 0) continue; - - scalar_t bin_size_w = roi_w / static_cast(pooled_w); - scalar_t bin_size_h = roi_h / static_cast(pooled_h); - - // the corresponding bin region - int bin_x1 = floor(static_cast(pw) * bin_size_w + roi_x1); - int bin_y1 = floor(static_cast(ph) * bin_size_h + roi_y1); - int bin_x2 = ceil(static_cast(pw + 1) * bin_size_w + roi_x1); - int bin_y2 = ceil(static_cast(ph + 1) * bin_size_h + roi_y1); - - // add roi offsets and clip to input boundaries - bin_x1 = min(max(bin_x1, 0), width); - bin_y1 = min(max(bin_y1, 0), height); - bin_x2 = min(max(bin_x2, 0), width); - bin_y2 = min(max(bin_y2, 0), height); - bool is_empty = (bin_y2 <= bin_y1) || (bin_x2 <= bin_x1); - - // If nothing is pooled, argmax = -1 causes nothing to be backprop'd - int max_idx = -1; - bottom_data += (roi_batch_ind * channels + c) * height * width; - - // Define an empty pooling region to be zero - scalar_t max_val = is_empty ? static_cast(0) - : bottom_data[bin_y1 * width + bin_x1] - 1; - - for (int h = bin_y1; h < bin_y2; ++h) { - for (int w = bin_x1; w < bin_x2; ++w) { - int offset = h * width + w; - if (bottom_data[offset] > max_val) { - max_val = bottom_data[offset]; - max_idx = offset; - } - } - } - top_data[index] = max_val; - if (argmax_data != NULL) argmax_data[index] = max_idx; - } -} - -int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, - const float spatial_scale, const int channels, - const int height, const int width, const int num_rois, - const int pooled_h, const int pooled_w, - at::Tensor output, at::Tensor argmax) { - const int output_size = num_rois * channels * pooled_h * pooled_w; - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - features.scalar_type(), "ROIPoolLaucherForward", ([&] { - const scalar_t *bottom_data = features.data_ptr(); - const scalar_t *rois_data = rois.data_ptr(); - scalar_t *top_data = output.data_ptr(); - int *argmax_data = argmax.data_ptr(); - - ROIPoolForward<<>>( - output_size, bottom_data, rois_data, scalar_t(spatial_scale), - channels, height, width, pooled_h, pooled_w, top_data, argmax_data); - })); - THCudaCheck(cudaGetLastError()); - return 1; -} -template -__global__ void ROIPoolBackward(const int nthreads, const scalar_t *top_diff, - const scalar_t *rois, const int *argmax_data, - const scalar_t spatial_scale, - const int channels, const int height, - const int width, const int pooled_h, - const int pooled_w, scalar_t *bottom_diff) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - int pw = index % pooled_w; - int ph = (index / pooled_w) % pooled_h; - int c = (index / pooled_w / pooled_h) % channels; - int n = index / pooled_w / pooled_h / channels; - int roi_batch_ind = rois[n * 5]; - int bottom_index = argmax_data[(n * channels + c) * pooled_h * pooled_w + - ph * pooled_w + pw]; - if (bottom_index != -1) { - atomicAdd(bottom_diff + (roi_batch_ind * channels + c) * height * width + - bottom_index, - top_diff[index]); - } - } -} -int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, - const at::Tensor argmax, const float spatial_scale, - const int batch_size, const int channels, - const int height, const int width, - const int num_rois, const int pooled_h, - const int pooled_w, at::Tensor bottom_grad) { - const int output_size = num_rois * pooled_h * pooled_w * channels; - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - top_grad.scalar_type(), "ROIPoolLaucherBackward", ([&] { - const scalar_t *top_diff = top_grad.data_ptr(); - const scalar_t *rois_data = rois.data_ptr(); - const int *argmax_data = argmax.data_ptr(); - scalar_t *bottom_diff = bottom_grad.data_ptr(); - if (sizeof(scalar_t) == sizeof(double)) { - fprintf(stderr, "double is not supported\n"); - exit(-1); - } - ROIPoolBackward<<>>( - output_size, top_diff, rois_data, argmax_data, - scalar_t(spatial_scale), channels, height, width, pooled_h, - pooled_w, bottom_diff); - })); - THCudaCheck(cudaGetLastError()); - return 1; -} diff --git a/mmdet/ops/roi_pool/src/roi_pool_ext.cpp b/mmdet/ops/roi_pool/src/roi_pool_ext.cpp deleted file mode 100644 index 27d6b8a5d07..00000000000 --- a/mmdet/ops/roi_pool/src/roi_pool_ext.cpp +++ /dev/null @@ -1,104 +0,0 @@ -#include - -#include -#include - -#ifdef WITH_CUDA -int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, - const float spatial_scale, const int channels, - const int height, const int width, const int num_rois, - const int pooled_h, const int pooled_w, - at::Tensor output, at::Tensor argmax); - -int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, - const at::Tensor argmax, const float spatial_scale, - const int batch_size, const int channels, - const int height, const int width, - const int num_rois, const int pooled_h, - const int pooled_w, at::Tensor bottom_grad); -#endif - -#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ") -#define CHECK_CONTIGUOUS(x) \ - TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") -#define CHECK_INPUT(x) \ - CHECK_CUDA(x); \ - CHECK_CONTIGUOUS(x) - -int roi_pooling_forward(at::Tensor features, at::Tensor rois, - int pooled_height, int pooled_width, - float spatial_scale, at::Tensor output, - at::Tensor argmax) { - if (features.device().is_cuda()) { -#ifdef WITH_CUDA - CHECK_INPUT(features); - CHECK_INPUT(rois); - CHECK_INPUT(output); - CHECK_INPUT(argmax); - at::DeviceGuard guard(features.device()); - - // Number of ROIs - int num_rois = rois.size(0); - int size_rois = rois.size(1); - - if (size_rois != 5) { - printf("wrong roi size\n"); - return 0; - } - - int channels = features.size(1); - int height = features.size(2); - int width = features.size(3); - - ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width, - num_rois, pooled_height, pooled_width, output, argmax); - - return 1; -#else - AT_ERROR("roi_pool is not compiled with GPU support"); -#endif - } - AT_ERROR("roi_pool is not implemented on CPU"); -} - -int roi_pooling_backward(at::Tensor top_grad, at::Tensor rois, - at::Tensor argmax, float spatial_scale, - at::Tensor bottom_grad) { - if (top_grad.device().is_cuda()) { -#ifdef WITH_CUDA - CHECK_INPUT(top_grad); - CHECK_INPUT(rois); - CHECK_INPUT(argmax); - CHECK_INPUT(bottom_grad); - at::DeviceGuard guard(top_grad.device()); - - int pooled_height = top_grad.size(2); - int pooled_width = top_grad.size(3); - int num_rois = rois.size(0); - int size_rois = rois.size(1); - - if (size_rois != 5) { - printf("wrong roi size\n"); - return 0; - } - int batch_size = bottom_grad.size(0); - int channels = bottom_grad.size(1); - int height = bottom_grad.size(2); - int width = bottom_grad.size(3); - - ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size, - channels, height, width, num_rois, pooled_height, - pooled_width, bottom_grad); - - return 1; -#else - AT_ERROR("roi_pool is not compiled with GPU support"); -#endif - } - AT_ERROR("roi_pool is not implemented on CPU"); -} - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("forward", &roi_pooling_forward, "Roi_Pooling forward"); - m.def("backward", &roi_pooling_backward, "Roi_Pooling backward"); -} diff --git a/mmdet/ops/saconv.py b/mmdet/ops/saconv.py deleted file mode 100644 index fb35be67e0e..00000000000 --- a/mmdet/ops/saconv.py +++ /dev/null @@ -1,126 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -from mmcv.cnn import CONV_LAYERS, constant_init - -from .conv_ws import ConvAWS2d -from .dcn import deform_conv - - -@CONV_LAYERS.register_module(name='SAC') -class SAConv2d(ConvAWS2d): - """SAC (Switchable Atrous Convolution) - - This is an implementation of SAC in DetectoRS - (https://arxiv.org/pdf/2006.02334.pdf). - - Args: - in_channels (int): Number of channels in the input image - out_channels (int): Number of channels produced by the convolution - kernel_size (int or tuple): Size of the convolving kernel - stride (int or tuple, optional): Stride of the convolution. Default: 1 - padding (int or tuple, optional): Zero-padding added to both sides of - the input. Default: 0 - padding_mode (string, optional): ``'zeros'``, ``'reflect'``, - ``'replicate'`` or ``'circular'``. Default: ``'zeros'`` - dilation (int or tuple, optional): Spacing between kernel elements. - Default: 1 - groups (int, optional): Number of blocked connections from input - channels to output channels. Default: 1 - bias (bool, optional): If ``True``, adds a learnable bias to the - output. Default: ``True`` - use_deform: If ``True``, replace convolution with deformable - convolution. Default: ``False``. - """ - - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - bias=True, - use_deform=False): - super().__init__( - in_channels, - out_channels, - kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - groups=groups, - bias=bias) - self.use_deform = use_deform - self.switch = nn.Conv2d( - self.in_channels, 1, kernel_size=1, stride=stride, bias=True) - self.weight_diff = nn.Parameter(torch.Tensor(self.weight.size())) - self.pre_context = nn.Conv2d( - self.in_channels, self.in_channels, kernel_size=1, bias=True) - self.post_context = nn.Conv2d( - self.out_channels, self.out_channels, kernel_size=1, bias=True) - if self.use_deform: - self.offset_s = nn.Conv2d( - self.in_channels, - 18, - kernel_size=3, - padding=1, - stride=stride, - bias=True) - self.offset_l = nn.Conv2d( - self.in_channels, - 18, - kernel_size=3, - padding=1, - stride=stride, - bias=True) - self.init_weights() - - def init_weights(self): - constant_init(self.switch, 0, bias=1) - self.weight_diff.data.zero_() - constant_init(self.pre_context, 0) - constant_init(self.post_context, 0) - if self.use_deform: - constant_init(self.offset_s, 0) - constant_init(self.offset_l, 0) - - def forward(self, x): - # pre-context - avg_x = F.adaptive_avg_pool2d(x, output_size=1) - avg_x = self.pre_context(avg_x) - avg_x = avg_x.expand_as(x) - x = x + avg_x - # switch - avg_x = F.pad(x, pad=(2, 2, 2, 2), mode='reflect') - avg_x = F.avg_pool2d(avg_x, kernel_size=5, stride=1, padding=0) - switch = self.switch(avg_x) - # sac - weight = self._get_weight(self.weight) - if self.use_deform: - offset = self.offset_s(avg_x) - out_s = deform_conv(x, offset, weight, self.stride, self.padding, - self.dilation, self.groups, 1) - else: - out_s = super().conv2d_forward(x, weight) - ori_p = self.padding - ori_d = self.dilation - self.padding = tuple(3 * p for p in self.padding) - self.dilation = tuple(3 * d for d in self.dilation) - weight = weight + self.weight_diff - if self.use_deform: - offset = self.offset_l(avg_x) - out_l = deform_conv(x, offset, weight, self.stride, self.padding, - self.dilation, self.groups, 1) - else: - out_l = super().conv2d_forward(x, weight) - out = switch * out_s + (1 - switch) * out_l - self.padding = ori_p - self.dilation = ori_d - # post-context - avg_x = F.adaptive_avg_pool2d(out, output_size=1) - avg_x = self.post_context(avg_x) - avg_x = avg_x.expand_as(out) - out = out + avg_x - return out diff --git a/mmdet/ops/sigmoid_focal_loss/__init__.py b/mmdet/ops/sigmoid_focal_loss/__init__.py deleted file mode 100644 index 218032945b2..00000000000 --- a/mmdet/ops/sigmoid_focal_loss/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss - -__all__ = ['SigmoidFocalLoss', 'sigmoid_focal_loss'] diff --git a/mmdet/ops/sigmoid_focal_loss/sigmoid_focal_loss.py b/mmdet/ops/sigmoid_focal_loss/sigmoid_focal_loss.py deleted file mode 100644 index 0715af38e1e..00000000000 --- a/mmdet/ops/sigmoid_focal_loss/sigmoid_focal_loss.py +++ /dev/null @@ -1,54 +0,0 @@ -import torch.nn as nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable - -from . import sigmoid_focal_loss_ext - - -class SigmoidFocalLossFunction(Function): - - @staticmethod - def forward(ctx, input, target, gamma=2.0, alpha=0.25): - ctx.save_for_backward(input, target) - num_classes = input.shape[1] - ctx.num_classes = num_classes - ctx.gamma = gamma - ctx.alpha = alpha - - loss = sigmoid_focal_loss_ext.forward(input, target, num_classes, - gamma, alpha) - return loss - - @staticmethod - @once_differentiable - def backward(ctx, d_loss): - input, target = ctx.saved_tensors - num_classes = ctx.num_classes - gamma = ctx.gamma - alpha = ctx.alpha - d_loss = d_loss.contiguous() - d_input = sigmoid_focal_loss_ext.backward(input, target, d_loss, - num_classes, gamma, alpha) - return d_input, None, None, None, None - - -sigmoid_focal_loss = SigmoidFocalLossFunction.apply - - -# TODO: remove this module -class SigmoidFocalLoss(nn.Module): - - def __init__(self, gamma, alpha): - super(SigmoidFocalLoss, self).__init__() - self.gamma = gamma - self.alpha = alpha - - def forward(self, logits, targets): - assert logits.is_cuda - loss = sigmoid_focal_loss(logits, targets, self.gamma, self.alpha) - return loss.sum() - - def __repr__(self): - tmpstr = self.__class__.__name__ - tmpstr += f'(gamma={self.gamma}, alpha={self.alpha})' - return tmpstr diff --git a/mmdet/ops/sigmoid_focal_loss/src/cuda/sigmoid_focal_loss_cuda.cu b/mmdet/ops/sigmoid_focal_loss/src/cuda/sigmoid_focal_loss_cuda.cu deleted file mode 100644 index 012d01c26b1..00000000000 --- a/mmdet/ops/sigmoid_focal_loss/src/cuda/sigmoid_focal_loss_cuda.cu +++ /dev/null @@ -1,175 +0,0 @@ -// modified from -// https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu - -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -// This file is modified from -// https://github.com/pytorch/pytorch/blob/master/modules/detectron/sigmoid_focal_loss_op.cu -// Cheng-Yang Fu -// cyfu@cs.unc.edu -#include -#include - -#include -#include -#include - -#include - -// TODO make it in a common file -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ - i += blockDim.x * gridDim.x) - -template -__global__ void SigmoidFocalLossForward(const int nthreads, - const scalar_t *logits, - const int64_t *targets, - const int num_classes, - const float gamma, const float alpha, - const int num, scalar_t *losses) { - CUDA_1D_KERNEL_LOOP(i, nthreads) { - int n = i / num_classes; - int d = i % num_classes; // current class[0~79]; - int t = targets[n]; // target class [0~79]; - - // Decide it is positive or negative case. - scalar_t c1 = (t == d); - scalar_t c2 = (t >= 0 & t != d); - - scalar_t zn = (1.0 - alpha); - scalar_t zp = (alpha); - - // p = 1. / 1. + expf(-x); p = sigmoid(x) - scalar_t p = 1. / (1. + expf(-logits[i])); - - // (1-p)**gamma * log(p) where - scalar_t term1 = powf((1. - p), gamma) * logf(max(p, FLT_MIN)); - - // p**gamma * log(1-p) - scalar_t term2 = - powf(p, gamma) * - (-1. * logits[i] * (logits[i] >= 0) - - logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))); - - losses[i] = 0.0; - losses[i] += -c1 * term1 * zp; - losses[i] += -c2 * term2 * zn; - - } // CUDA_1D_KERNEL_LOOP -} // SigmoidFocalLossForward - -template -__global__ void SigmoidFocalLossBackward( - const int nthreads, const scalar_t *logits, const int64_t *targets, - const scalar_t *d_losses, const int num_classes, const float gamma, - const float alpha, const int num, scalar_t *d_logits) { - CUDA_1D_KERNEL_LOOP(i, nthreads) { - int n = i / num_classes; - int d = i % num_classes; // current class[0~79]; - int t = targets[n]; // target class [0~79], 80 is background; - - // Decide it is positive or negative case. - scalar_t c1 = (t == d); - scalar_t c2 = (t >= 0 & t != d); - - scalar_t zn = (1.0 - alpha); - scalar_t zp = (alpha); - // p = 1. / 1. + expf(-x); p = sigmoid(x) - scalar_t p = 1. / (1. + expf(-logits[i])); - - // (1-p)**g * (1 - p - g*p*log(p) - scalar_t term1 = - powf((1. - p), gamma) * (1. - p - (p * gamma * logf(max(p, FLT_MIN)))); - - // (p**g) * (g*(1-p)*log(1-p) - p) - scalar_t term2 = - powf(p, gamma) * - ((-1. * logits[i] * (logits[i] >= 0) - - logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))) * - (1. - p) * gamma - - p); - d_logits[i] = 0.0; - d_logits[i] += -c1 * term1 * zp; - d_logits[i] += -c2 * term2 * zn; - d_logits[i] = d_logits[i] * d_losses[i]; - - } // CUDA_1D_KERNEL_LOOP -} // SigmoidFocalLossBackward - -at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits, - const at::Tensor &targets, - const int num_classes, - const float gamma, const float alpha) { - AT_ASSERTM(logits.device().is_cuda(), "logits must be a CUDA tensor"); - AT_ASSERTM(targets.device().is_cuda(), "targets must be a CUDA tensor"); - AT_ASSERTM(logits.dim() == 2, "logits should be NxClass"); - AT_ASSERTM(targets.max().item() <= (long)num_classes, - "target label should smaller or equal than num classes"); - - const int num_samples = logits.size(0); - - auto losses = at::empty({num_samples, logits.size(1)}, logits.options()); - auto losses_size = num_samples * logits.size(1); - - dim3 grid( - std::min(THCCeilDiv((int64_t)losses_size, (int64_t)512), (int64_t)4096)); - dim3 block(512); - - if (losses.numel() == 0) { - THCudaCheck(cudaGetLastError()); - return losses; - } - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - logits.scalar_type(), "SigmoidFocalLoss_forward", [&] { - SigmoidFocalLossForward - <<>>( - losses_size, logits.contiguous().data_ptr(), - targets.contiguous().data_ptr(), num_classes, gamma, - alpha, num_samples, losses.data_ptr()); - }); - THCudaCheck(cudaGetLastError()); - return losses; -} - -at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits, - const at::Tensor &targets, - const at::Tensor &d_losses, - const int num_classes, - const float gamma, - const float alpha) { - AT_ASSERTM(logits.device().is_cuda(), "logits must be a CUDA tensor"); - AT_ASSERTM(targets.device().is_cuda(), "targets must be a CUDA tensor"); - AT_ASSERTM(d_losses.device().is_cuda(), "d_losses must be a CUDA tensor"); - - AT_ASSERTM(logits.dim() == 2, "logits should be NxClass"); - - const int num_samples = logits.size(0); - AT_ASSERTM(logits.size(1) == num_classes, - "logits.size(1) should be num_classes"); - - auto d_logits = at::zeros({num_samples, num_classes}, logits.options()); - auto d_logits_size = num_samples * logits.size(1); - - dim3 grid(std::min(THCCeilDiv((int64_t)d_logits_size, (int64_t)512), - (int64_t)4096)); - dim3 block(512); - - if (d_logits.numel() == 0) { - THCudaCheck(cudaGetLastError()); - return d_logits; - } - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - logits.scalar_type(), "SigmoidFocalLoss_backward", [&] { - SigmoidFocalLossBackward - <<>>( - d_logits_size, logits.contiguous().data_ptr(), - targets.contiguous().data_ptr(), - d_losses.contiguous().data_ptr(), num_classes, gamma, - alpha, num_samples, d_logits.data_ptr()); - }); - - THCudaCheck(cudaGetLastError()); - return d_logits; -} diff --git a/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_ext.cpp b/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_ext.cpp deleted file mode 100644 index 3d66f3f8ff8..00000000000 --- a/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_ext.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// modify from -// https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h -#include - -#ifdef WITH_CUDA -at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits, - const at::Tensor &targets, - const int num_classes, - const float gamma, const float alpha); - -at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits, - const at::Tensor &targets, - const at::Tensor &d_losses, - const int num_classes, - const float gamma, const float alpha); -#endif - -// Interface for Python -at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits, - const at::Tensor &targets, - const int num_classes, const float gamma, - const float alpha) { - if (logits.device().is_cuda()) { -#ifdef WITH_CUDA - at::DeviceGuard guard(logits.device()); - return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, - alpha); -#else - AT_ERROR("SigmoidFocalLoss is not compiled with GPU support"); -#endif - } - AT_ERROR("SigmoidFocalLoss is not implemented on the CPU"); -} - -at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits, - const at::Tensor &targets, - const at::Tensor &d_losses, - const int num_classes, const float gamma, - const float alpha) { - if (logits.device().is_cuda()) { -#ifdef WITH_CUDA - at::DeviceGuard guard(logits.device()); - return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, - num_classes, gamma, alpha); -#else - AT_ERROR("SigmoidFocalLoss is not compiled with GPU support"); -#endif - } - AT_ERROR("SigmoidFocalLoss is not implemented on the CPU"); -} - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("forward", &SigmoidFocalLoss_forward, - "SigmoidFocalLoss forward"); - m.def("backward", &SigmoidFocalLoss_backward, - "SigmoidFocalLoss backward"); -} diff --git a/mmdet/ops/utils/__init__.py b/mmdet/ops/utils/__init__.py deleted file mode 100644 index 0244c0f5470..00000000000 --- a/mmdet/ops/utils/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# from . import compiling_info -from .compiling_info import get_compiler_version, get_compiling_cuda_version - -# get_compiler_version = compiling_info.get_compiler_version -# get_compiling_cuda_version = compiling_info.get_compiling_cuda_version - -__all__ = ['get_compiler_version', 'get_compiling_cuda_version'] diff --git a/mmdet/ops/utils/src/compiling_info.cpp b/mmdet/ops/utils/src/compiling_info.cpp deleted file mode 100644 index a671805aaf0..00000000000 --- a/mmdet/ops/utils/src/compiling_info.cpp +++ /dev/null @@ -1,56 +0,0 @@ -// modified from -// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/vision.cpp -#include - -#ifdef WITH_CUDA -#include -int get_cudart_version() { return CUDART_VERSION; } -#endif - -std::string get_compiling_cuda_version() { -#ifdef WITH_CUDA - std::ostringstream oss; - - // copied from - // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231 - auto printCudaStyleVersion = [&](int v) { - oss << (v / 1000) << "." << (v / 10 % 100); - if (v % 10 != 0) { - oss << "." << (v % 10); - } - }; - printCudaStyleVersion(get_cudart_version()); - return oss.str(); -#else - return std::string("not available"); -#endif -} - -// similar to -// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp -std::string get_compiler_version() { - std::ostringstream ss; -#if defined(__GNUC__) -#ifndef __clang__ - { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; } -#endif -#endif - -#if defined(__clang_major__) - { - ss << "clang " << __clang_major__ << "." << __clang_minor__ << "." - << __clang_patchlevel__; - } -#endif - -#if defined(_MSC_VER) - { ss << "MSVC " << _MSC_FULL_VER; } -#endif - return ss.str(); -} - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("get_compiler_version", &get_compiler_version, "get_compiler_version"); - m.def("get_compiling_cuda_version", &get_compiling_cuda_version, - "get_compiling_cuda_version"); -} diff --git a/mmdet/ops/wrappers.py b/mmdet/ops/wrappers.py deleted file mode 100644 index 10109936fc0..00000000000 --- a/mmdet/ops/wrappers.py +++ /dev/null @@ -1,100 +0,0 @@ -r"""Modified from https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/wrappers.py # noqa: E501 - -Wrap some nn modules to support empty tensor -input. Currently, these wrappers are mainly used in mask heads like -fcn_mask_head and maskiou_heads since mask heads are trained on only positive -RoIs. -""" -import math - -import torch -import torch.nn as nn -from mmcv.cnn import CONV_LAYERS -from torch.nn.modules.utils import _pair - - -class NewEmptyTensorOp(torch.autograd.Function): - - @staticmethod - def forward(ctx, x, new_shape): - ctx.shape = x.shape - return x.new_empty(new_shape) - - @staticmethod - def backward(ctx, grad): - shape = ctx.shape - return NewEmptyTensorOp.apply(grad, shape), None - - -@CONV_LAYERS.register_module(name='Conv', force=True) -class Conv2d(nn.Conv2d): - - def forward(self, x): - if x.numel() == 0 and torch.__version__ <= '1.4': - out_shape = [x.shape[0], self.out_channels] - for i, k, p, s, d in zip(x.shape[-2:], self.kernel_size, - self.padding, self.stride, self.dilation): - o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1 - out_shape.append(o) - empty = NewEmptyTensorOp.apply(x, out_shape) - if self.training: - # produce dummy gradient to avoid DDP warning. - dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 - return empty + dummy - else: - return empty - - return super().forward(x) - - -class ConvTranspose2d(nn.ConvTranspose2d): - - def forward(self, x): - if x.numel() == 0 and torch.__version__ <= '1.4.0': - out_shape = [x.shape[0], self.out_channels] - for i, k, p, s, d, op in zip(x.shape[-2:], self.kernel_size, - self.padding, self.stride, - self.dilation, self.output_padding): - out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op) - empty = NewEmptyTensorOp.apply(x, out_shape) - if self.training: - # produce dummy gradient to avoid DDP warning. - dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 - return empty + dummy - else: - return empty - - return super(ConvTranspose2d, self).forward(x) - - -class MaxPool2d(nn.MaxPool2d): - - def forward(self, x): - if x.numel() == 0 and torch.__version__ <= '1.4': - out_shape = list(x.shape[:2]) - for i, k, p, s, d in zip(x.shape[-2:], _pair(self.kernel_size), - _pair(self.padding), _pair(self.stride), - _pair(self.dilation)): - o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1 - o = math.ceil(o) if self.ceil_mode else math.floor(o) - out_shape.append(o) - empty = NewEmptyTensorOp.apply(x, out_shape) - return empty - - return super().forward(x) - - -class Linear(torch.nn.Linear): - - def forward(self, x): - if x.numel() == 0: - out_shape = [x.shape[0], self.out_features] - empty = NewEmptyTensorOp.apply(x, out_shape) - if self.training: - # produce dummy gradient to avoid DDP warning. - dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 - return empty + dummy - else: - return empty - - return super().forward(x) diff --git a/mmdet/utils/collect_env.py b/mmdet/utils/collect_env.py index 053f2403b1d..7bd71a87274 100644 --- a/mmdet/utils/collect_env.py +++ b/mmdet/utils/collect_env.py @@ -53,7 +53,7 @@ def collect_env(): env_info['MMCV'] = mmcv.__version__ env_info['MMDetection'] = mmdet.__version__ - from mmdet.ops import get_compiler_version, get_compiling_cuda_version + from mmcv.ops import get_compiler_version, get_compiling_cuda_version env_info['MMDetection Compiler'] = get_compiler_version() env_info['MMDetection CUDA Compiler'] = get_compiling_cuda_version() return env_info diff --git a/requirements/runtime.txt b/requirements/runtime.txt index 2a66ea699f1..8581152d49c 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -1,5 +1,5 @@ matplotlib -mmcv==0.6.2 +mmcv>=1.0.2 numpy # need older pillow until torchvision is fixed Pillow<=6.2.2 diff --git a/setup.py b/setup.py index 3724a29b3e7..f67b36beee2 100755 --- a/setup.py +++ b/setup.py @@ -70,7 +70,8 @@ def write_version_py(): sha = get_hash() with open('mmdet/VERSION', 'r') as f: SHORT_VERSION = f.read().strip() - VERSION_INFO = ', '.join(SHORT_VERSION.split('.')) + VERSION_INFO = ', '.join( + [x if x.isdigit() else f'"{x}"' for x in SHORT_VERSION.split('.')]) VERSION = SHORT_VERSION + '+' + sha version_file_str = content.format(time.asctime(), VERSION, SHORT_VERSION, @@ -199,7 +200,6 @@ def gen_packages_items(): keywords='computer vision, object detection', url='https://github.com/open-mmlab/mmdetection', packages=find_packages(exclude=('configs', 'tools', 'demo')), - package_data={'mmdet.ops': ['*/*.so']}, classifiers=[ 'Development Status :: 4 - Beta', 'License :: OSI Approved :: Apache Software License', @@ -219,83 +219,6 @@ def gen_packages_items(): 'build': parse_requirements('requirements/build.txt'), 'optional': parse_requirements('requirements/optional.txt'), }, - ext_modules=[ - make_cuda_ext( - name='compiling_info', - module='mmdet.ops.utils', - sources=['src/compiling_info.cpp']), - make_cuda_ext( - name='nms_ext', - module='mmdet.ops.nms', - sources=['src/nms_ext.cpp', 'src/cpu/nms_cpu.cpp'], - sources_cuda=[ - 'src/cuda/nms_cuda.cpp', 'src/cuda/nms_kernel.cu' - ]), - make_cuda_ext( - name='roi_align_ext', - module='mmdet.ops.roi_align', - sources=[ - 'src/roi_align_ext.cpp', - 'src/cpu/roi_align_v2.cpp', - ], - sources_cuda=[ - 'src/cuda/roi_align_kernel.cu', - 'src/cuda/roi_align_kernel_v2.cu' - ]), - make_cuda_ext( - name='roi_pool_ext', - module='mmdet.ops.roi_pool', - sources=['src/roi_pool_ext.cpp'], - sources_cuda=['src/cuda/roi_pool_kernel.cu']), - make_cuda_ext( - name='deform_conv_ext', - module='mmdet.ops.dcn', - sources=['src/deform_conv_ext.cpp'], - sources_cuda=[ - 'src/cuda/deform_conv_cuda.cpp', - 'src/cuda/deform_conv_cuda_kernel.cu' - ]), - make_cuda_ext( - name='deform_pool_ext', - module='mmdet.ops.dcn', - sources=['src/deform_pool_ext.cpp'], - sources_cuda=[ - 'src/cuda/deform_pool_cuda.cpp', - 'src/cuda/deform_pool_cuda_kernel.cu' - ]), - make_cuda_ext( - name='sigmoid_focal_loss_ext', - module='mmdet.ops.sigmoid_focal_loss', - sources=['src/sigmoid_focal_loss_ext.cpp'], - sources_cuda=['src/cuda/sigmoid_focal_loss_cuda.cu']), - make_cuda_ext( - name='masked_conv2d_ext', - module='mmdet.ops.masked_conv', - sources=['src/masked_conv2d_ext.cpp'], - sources_cuda=[ - 'src/cuda/masked_conv2d_cuda.cpp', - 'src/cuda/masked_conv2d_kernel.cu' - ]), - make_cuda_ext( - name='carafe_ext', - module='mmdet.ops.carafe', - sources=['src/carafe_ext.cpp'], - sources_cuda=[ - 'src/cuda/carafe_cuda.cpp', - 'src/cuda/carafe_cuda_kernel.cu' - ]), - make_cuda_ext( - name='carafe_naive_ext', - module='mmdet.ops.carafe', - sources=['src/carafe_naive_ext.cpp'], - sources_cuda=[ - 'src/cuda/carafe_naive_cuda.cpp', - 'src/cuda/carafe_naive_cuda_kernel.cu' - ]), - make_cuda_ext( - name='corner_pool_ext', - module='mmdet.ops.corner_pool', - sources=['src/corner_pool.cpp']), - ], + ext_modules=[], cmdclass={'build_ext': BuildExtension}, zip_safe=False) diff --git a/tests/test_config.py b/tests/test_config.py index 91cbe81b3c2..bbcf4493206 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -274,8 +274,8 @@ def _check_roi_extractor(config, roi_extractor, prev_roi_extractor=None): assert (len(config.featmap_strides) == len(roi_extractor.roi_layers)) assert (config.out_channels == roi_extractor.out_channels) from torch.nn.modules.utils import _pair - assert (_pair( - config.roi_layer.out_size) == roi_extractor.roi_layers[0].out_size) + assert (_pair(config.roi_layer.output_size) == + roi_extractor.roi_layers[0].output_size) if 'use_torchvision' in config.roi_layer: assert (config.roi_layer.use_torchvision == diff --git a/tests/test_models/test_backbones.py b/tests/test_models/test_backbones.py index feb08858dcb..d8bf69e6596 100644 --- a/tests/test_models/test_backbones.py +++ b/tests/test_models/test_backbones.py @@ -1,5 +1,6 @@ import pytest import torch +from mmcv.ops import DeformConv2dPack from torch.nn.modules import AvgPool2d, GroupNorm from torch.nn.modules.batchnorm import _BatchNorm @@ -9,7 +10,6 @@ from mmdet.models.backbones.resnet import BasicBlock, Bottleneck from mmdet.models.backbones.resnext import Bottleneck as BottleneckX from mmdet.models.utils import ResLayer -from mmdet.ops import DeformConvPack def is_block(modules): @@ -52,7 +52,7 @@ def test_resnet_basic_block(): with pytest.raises(AssertionError): # Not implemented yet. - dcn = dict(type='DCN', deformable_groups=1, fallback_on_stride=False) + dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False) BasicBlock(64, 64, dcn=dcn) with pytest.raises(AssertionError): @@ -146,11 +146,11 @@ def test_resnet_bottleneck(): assert block.conv2.stride == (1, 1) # Test Bottleneck DCN - dcn = dict(type='DCN', deformable_groups=1, fallback_on_stride=False) + dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False) with pytest.raises(AssertionError): Bottleneck(64, 64, dcn=dcn, conv_cfg=dict(type='Conv')) block = Bottleneck(64, 64, dcn=dcn) - assert isinstance(block.conv2, DeformConvPack) + assert isinstance(block.conv2, DeformConv2dPack) # Test Bottleneck forward block = Bottleneck(64, 16) @@ -198,7 +198,7 @@ def test_resnet_bottleneck(): attention_type='0010', kv_stride=2), position='after_conv2'), - dict(cfg=dict(type='NonLocal2D'), position='after_conv2'), + dict(cfg=dict(type='NonLocal2d'), position='after_conv2'), dict( cfg=dict(type='ContextBlock', ratio=1. / 16), position='after_conv3') @@ -301,7 +301,7 @@ def test_resnet_backbone(): with pytest.raises(AssertionError): # len(stage_with_dcn) == num_stages - dcn = dict(type='DCN', deformable_groups=1, fallback_on_stride=False) + dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False) ResNet(50, dcn=dcn, stage_with_dcn=(True, )) with pytest.raises(AssertionError): @@ -468,7 +468,7 @@ def test_resnet_backbone(): kv_stride=2), stages=(False, True, True, True), position='after_conv2'), - dict(cfg=dict(type='NonLocal2D'), position='after_conv2'), + dict(cfg=dict(type='NonLocal2d'), position='after_conv2'), dict( cfg=dict(type='ContextBlock', ratio=1. / 16), stages=(False, True, True, False), @@ -632,7 +632,7 @@ def test_renext_bottleneck(): assert block.conv2.out_channels == 128 # Test ResNeXt Bottleneck with DCN - dcn = dict(type='DCN', deformable_groups=1, fallback_on_stride=False) + dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False) with pytest.raises(AssertionError): # conv_cfg must be None if dcn is not None BottleneckX( @@ -743,7 +743,7 @@ def test_res2net_bottle2neck(): assert block.scales == 4 # Test Res2Net Bottle2neck with DCN - dcn = dict(type='DCN', deformable_groups=1, fallback_on_stride=False) + dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False) with pytest.raises(AssertionError): # conv_cfg must be None if dcn is not None Bottle2neck( diff --git a/tests/test_models/test_pisa_heads.py b/tests/test_models/test_pisa_heads.py index c085147c998..6b1d42db49c 100644 --- a/tests/test_models/test_pisa_heads.py +++ b/tests/test_models/test_pisa_heads.py @@ -174,7 +174,7 @@ def test_pisa_roi_head_loss(): bbox_roi_extractor = dict( type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=0), + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), out_channels=1, featmap_strides=[1]) diff --git a/tests/test_models/test_roi_extractor.py b/tests/test_models/test_roi_extractor.py index a523edb4875..22743f2d3be 100644 --- a/tests/test_models/test_roi_extractor.py +++ b/tests/test_models/test_roi_extractor.py @@ -7,7 +7,7 @@ def test_groie(): # test with pre/post cfg = dict( - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), out_channels=256, featmap_strides=[4, 8, 16, 32], pre_cfg=dict( @@ -42,7 +42,7 @@ def test_groie(): # test w.o. pre/post cfg = dict( - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), out_channels=256, featmap_strides=[4, 8, 16, 32]) @@ -63,7 +63,7 @@ def test_groie(): # test w.o. pre/post concat cfg = dict( aggregation='concat', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), out_channels=256 * 4, featmap_strides=[4, 8, 16, 32]) @@ -85,7 +85,7 @@ def test_groie(): with pytest.raises(AssertionError): cfg = dict( aggregation='not support', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), out_channels=1024, featmap_strides=[4, 8, 16, 32]) _ = GenericRoIExtractor(**cfg) @@ -93,7 +93,7 @@ def test_groie(): # test concat channels number cfg = dict( aggregation='concat', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), out_channels=256 * 5, # 256*5 != 256*4 featmap_strides=[4, 8, 16, 32]) diff --git a/tests/test_ops/test_corner_pool.py b/tests/test_ops/test_corner_pool.py deleted file mode 100644 index cb84acf0d79..00000000000 --- a/tests/test_ops/test_corner_pool.py +++ /dev/null @@ -1,58 +0,0 @@ -""" -CommandLine: - pytest tests/test_corner_pool.py -""" -import pytest -import torch - -from mmdet.ops import CornerPool - - -def test_corner_pool_device_and_dtypes_cpu(): - """ - CommandLine: - xdoctest -m tests/test_corner_pool.py \ - test_corner_pool_device_and_dtypes_cpu - """ - with pytest.raises(AssertionError): - # pool mode must in ['bottom', 'left', 'right', 'top'] - pool = CornerPool('corner') - - lr_tensor = torch.tensor([[[[0, 0, 0, 0, 0], [2, 1, 3, 0, 2], - [5, 4, 1, 1, 6], [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0]]]]) - tb_tensor = torch.tensor([[[[0, 3, 1, 0, 0], [0, 1, 1, 0, 0], - [0, 3, 4, 0, 0], [0, 2, 2, 0, 0], - [0, 0, 2, 0, 0]]]]) - # Left Pool - left_answer = torch.tensor([[[[0, 0, 0, 0, 0], [3, 3, 3, 2, 2], - [6, 6, 6, 6, 6], [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0]]]]) - pool = CornerPool('left') - left_tensor = pool(lr_tensor) - assert left_tensor.type() == lr_tensor.type() - assert torch.equal(left_tensor, left_answer) - # Right Pool - right_answer = torch.tensor([[[[0, 0, 0, 0, 0], [2, 2, 3, 3, 3], - [5, 5, 5, 5, 6], [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0]]]]) - pool = CornerPool('right') - right_tensor = pool(lr_tensor) - assert right_tensor.type() == lr_tensor.type() - assert torch.equal(right_tensor, right_answer) - # Top Pool - top_answer = torch.tensor([[[[0, 3, 4, 0, 0], [0, 3, 4, 0, 0], - [0, 3, 4, 0, 0], [0, 2, 2, 0, 0], - [0, 0, 2, 0, 0]]]]) - pool = CornerPool('top') - top_tensor = pool(tb_tensor) - assert top_tensor.type() == tb_tensor.type() - assert torch.equal(top_tensor, top_answer) - # Bottom Pool - bottom_answer = torch.tensor([[[[0, 3, 1, 0, 0], [0, 3, 1, 0, 0], - [0, 3, 4, 0, 0], [0, 3, 4, 0, 0], - [0, 3, 4, 0, 0]]]]) - pool = CornerPool('bottom') - bottom_tensor = pool(tb_tensor) - assert bottom_tensor.type() == tb_tensor.type() - assert torch.equal(bottom_tensor, bottom_answer) diff --git a/tests/test_ops/test_merge_cells.py b/tests/test_ops/test_merge_cells.py deleted file mode 100644 index 25e76ee7a20..00000000000 --- a/tests/test_ops/test_merge_cells.py +++ /dev/null @@ -1,65 +0,0 @@ -""" -CommandLine: - pytest tests/test_merge_cells.py -""" -import torch -import torch.nn.functional as F - -from mmdet.ops.merge_cells import (BaseMergeCell, ConcatCell, - GlobalPoolingCell, SumCell) - - -def test_sum_cell(): - inputs_x = torch.randn([2, 256, 32, 32]) - inputs_y = torch.randn([2, 256, 16, 16]) - sum_cell = SumCell(256, 256) - output = sum_cell(inputs_x, inputs_y, out_size=inputs_x.shape[-2:]) - assert output.size() == inputs_x.size() - output = sum_cell(inputs_x, inputs_y, out_size=inputs_y.shape[-2:]) - assert output.size() == inputs_y.size() - output = sum_cell(inputs_x, inputs_y) - assert output.size() == inputs_x.size() - - -def test_concat_cell(): - inputs_x = torch.randn([2, 256, 32, 32]) - inputs_y = torch.randn([2, 256, 16, 16]) - concat_cell = ConcatCell(256, 256) - output = concat_cell(inputs_x, inputs_y, out_size=inputs_x.shape[-2:]) - assert output.size() == inputs_x.size() - output = concat_cell(inputs_x, inputs_y, out_size=inputs_y.shape[-2:]) - assert output.size() == inputs_y.size() - output = concat_cell(inputs_x, inputs_y) - assert output.size() == inputs_x.size() - - -def test_global_pool_cell(): - inputs_x = torch.randn([2, 256, 32, 32]) - inputs_y = torch.randn([2, 256, 32, 32]) - gp_cell = GlobalPoolingCell(with_out_conv=False) - gp_cell_out = gp_cell(inputs_x, inputs_y, out_size=inputs_x.shape[-2:]) - assert (gp_cell_out.size() == inputs_x.size()) - gp_cell = GlobalPoolingCell(256, 256) - gp_cell_out = gp_cell(inputs_x, inputs_y, out_size=inputs_x.shape[-2:]) - assert (gp_cell_out.size() == inputs_x.size()) - - -def test_resize_methods(): - inputs_x = torch.randn([2, 256, 128, 128]) - target_resize_sizes = [(128, 128), (256, 256)] - resize_methods_list = ['nearest', 'bilinear'] - - for method in resize_methods_list: - merge_cell = BaseMergeCell(upsample_mode=method) - for target_size in target_resize_sizes: - merge_cell_out = merge_cell._resize(inputs_x, target_size) - gt_out = F.interpolate(inputs_x, size=target_size, mode=method) - assert merge_cell_out.equal(gt_out) - - target_size = (64, 64) # resize to a smaller size - merge_cell = BaseMergeCell() - merge_cell_out = merge_cell._resize(inputs_x, target_size) - kernel_size = inputs_x.shape[-1] // target_size[-1] - gt_out = F.max_pool2d( - inputs_x, kernel_size=kernel_size, stride=kernel_size) - assert (merge_cell_out == gt_out).all() diff --git a/tests/test_ops/test_nms.py b/tests/test_ops/test_nms.py deleted file mode 100644 index a9063149309..00000000000 --- a/tests/test_ops/test_nms.py +++ /dev/null @@ -1,113 +0,0 @@ -""" -CommandLine: - pytest tests/test_nms.py -""" -import numpy as np -import pytest -import torch - -from mmdet.ops.nms.nms_wrapper import nms, nms_match - - -def test_nms_device_and_dtypes_cpu(): - """ - CommandLine: - xdoctest -m tests/test_nms.py test_nms_device_and_dtypes_cpu - """ - iou_thr = 0.6 - base_dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.1], - [49.3, 32.9, 51.0, 35.3, 0.05], - [35.3, 11.5, 39.9, 14.5, 0.9], - [35.2, 11.7, 39.7, 15.7, 0.3]]) - - base_expected_suppressed = np.array([[35.3, 11.5, 39.9, 14.5, 0.9], - [49.1, 32.4, 51.0, 35.9, 0.1]]) - # CPU can handle float32 and float64 - dets = base_dets.astype(np.float32) - expected_suppressed = base_expected_suppressed.astype(np.float32) - suppressed, inds = nms(dets, iou_thr) - assert dets.dtype == suppressed.dtype - assert np.array_equal(suppressed, expected_suppressed) - - dets = torch.FloatTensor(base_dets) - expected_suppressed = torch.FloatTensor(base_expected_suppressed) - suppressed, inds = nms(dets, iou_thr) - assert dets.dtype == suppressed.dtype - assert torch.equal(suppressed, expected_suppressed) - - dets = base_dets.astype(np.float64) - expected_suppressed = base_expected_suppressed.astype(np.float64) - suppressed, inds = nms(dets, iou_thr) - assert dets.dtype == suppressed.dtype - assert np.array_equal(suppressed, expected_suppressed) - - dets = torch.DoubleTensor(base_dets) - expected_suppressed = torch.DoubleTensor(base_expected_suppressed) - suppressed, inds = nms(dets, iou_thr) - assert dets.dtype == suppressed.dtype - assert torch.equal(suppressed, expected_suppressed) - - -def test_nms_device_and_dtypes_gpu(): - """ - CommandLine: - xdoctest -m tests/test_nms.py test_nms_device_and_dtypes_gpu - """ - if not torch.cuda.is_available(): - import pytest - pytest.skip('test requires GPU and torch+cuda') - - iou_thr = 0.6 - base_dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.1], - [49.3, 32.9, 51.0, 35.3, 0.05], - [35.3, 11.5, 39.9, 14.5, 0.9], - [35.2, 11.7, 39.7, 15.7, 0.3]]) - - base_expected_suppressed = np.array([[35.3, 11.5, 39.9, 14.5, 0.9], - [49.1, 32.4, 51.0, 35.9, 0.1]]) - - for device_id in range(torch.cuda.device_count()): - print(f'Run NMS on device_id = {device_id!r}') - # GPU can handle float32 but not float64 - dets = base_dets.astype(np.float32) - expected_suppressed = base_expected_suppressed.astype(np.float32) - suppressed, inds = nms(dets, iou_thr, device_id) - assert dets.dtype == suppressed.dtype - assert np.array_equal(suppressed, expected_suppressed) - - dets = torch.FloatTensor(base_dets).to(device_id) - expected_suppressed = torch.FloatTensor(base_expected_suppressed).to( - device_id) - suppressed, inds = nms(dets, iou_thr) - assert dets.dtype == suppressed.dtype - assert torch.equal(suppressed, expected_suppressed) - - -def test_nms_match(): - iou_thr = 0.6 - # empty input - empty_dets = np.array([]) - assert len(nms_match(empty_dets, iou_thr)) == 0 - - # non empty ndarray input - np_dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.9], - [49.3, 32.9, 51.0, 35.3, 0.9], - [35.3, 11.5, 39.9, 14.5, 0.4], - [35.2, 11.7, 39.7, 15.7, 0.3]]) - np_groups = nms_match(np_dets, iou_thr) - assert isinstance(np_groups[0], np.ndarray) - assert len(np_groups) == 2 - nms_keep_inds = nms(np_dets, iou_thr)[1] - assert set([g[0].item() for g in np_groups]) == set(nms_keep_inds.tolist()) - - # non empty tensor input - tensor_dets = torch.from_numpy(np_dets) - tensor_groups = nms_match(tensor_dets, iou_thr) - assert isinstance(tensor_groups[0], torch.Tensor) - for i in range(len(tensor_groups)): - assert np.equal(tensor_groups[i].numpy(), np_groups[i]).all() - - # input of wrong shape - wrong_dets = np.zeros((2, 3)) - with pytest.raises(AssertionError): - nms_match(wrong_dets, iou_thr) diff --git a/tests/test_ops/test_soft_nms.py b/tests/test_ops/test_soft_nms.py deleted file mode 100644 index 58503eaaac2..00000000000 --- a/tests/test_ops/test_soft_nms.py +++ /dev/null @@ -1,41 +0,0 @@ -""" -CommandLine: - pytest tests/test_soft_nms.py -""" -import numpy as np -import torch - -from mmdet.ops.nms.nms_wrapper import soft_nms - - -def test_soft_nms_device_and_dtypes_cpu(): - """ - CommandLine: - xdoctest -m tests/test_soft_nms.py test_soft_nms_device_and_dtypes_cpu - """ - iou_thr = 0.7 - base_dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.9], - [49.3, 32.9, 51.0, 35.3, 0.9], - [35.3, 11.5, 39.9, 14.5, 0.4], - [35.2, 11.7, 39.7, 15.7, 0.3]]) - - # CPU can handle float32 and float64 - dets = base_dets.astype(np.float32) - new_dets, inds = soft_nms(dets, iou_thr) - assert dets.dtype == new_dets.dtype - assert len(inds) == len(new_dets) == 4 - - dets = torch.FloatTensor(base_dets) - new_dets, inds = soft_nms(dets, iou_thr) - assert dets.dtype == new_dets.dtype - assert len(inds) == len(new_dets) == 4 - - dets = base_dets.astype(np.float64) - new_dets, inds = soft_nms(dets, iou_thr) - assert dets.dtype == new_dets.dtype - assert len(inds) == len(new_dets) == 4 - - dets = torch.DoubleTensor(base_dets) - new_dets, inds = soft_nms(dets, iou_thr) - assert dets.dtype == new_dets.dtype - assert len(inds) == len(new_dets) == 4 diff --git a/tests/test_ops/test_wrappers.py b/tests/test_ops/test_wrappers.py deleted file mode 100644 index 1ae38f70478..00000000000 --- a/tests/test_ops/test_wrappers.py +++ /dev/null @@ -1,198 +0,0 @@ -from collections import OrderedDict -from itertools import product -from unittest.mock import patch - -import torch -import torch.nn as nn - -from mmdet.ops import Conv2d, ConvTranspose2d, Linear, MaxPool2d - -torch.__version__ = '1.1' # force test - - -def test_conv2d(): - """ - CommandLine: - xdoctest -m tests/test_wrappers.py test_conv2d - """ - - test_cases = OrderedDict([('in_w', [10, 20]), ('in_h', [10, 20]), - ('in_channel', [1, 3]), ('out_channel', [1, 3]), - ('kernel_size', [3, 5]), ('stride', [1, 2]), - ('padding', [0, 1]), ('dilation', [1, 2])]) - - # train mode - for in_h, in_w, in_cha, out_cha, k, s, p, d in product( - *list(test_cases.values())): - # wrapper op with 0-dim input - x_empty = torch.randn(0, in_cha, in_h, in_w) - torch.manual_seed(0) - wrapper = Conv2d(in_cha, out_cha, k, stride=s, padding=p, dilation=d) - wrapper_out = wrapper(x_empty) - - # torch op with 3-dim input as shape reference - x_normal = torch.randn(3, in_cha, in_h, in_w).requires_grad_(True) - torch.manual_seed(0) - ref = nn.Conv2d(in_cha, out_cha, k, stride=s, padding=p, dilation=d) - ref_out = ref(x_normal) - - assert wrapper_out.shape[0] == 0 - assert wrapper_out.shape[1:] == ref_out.shape[1:] - - wrapper_out.sum().backward() - assert wrapper.weight.grad is not None - assert wrapper.weight.grad.shape == wrapper.weight.shape - - assert torch.equal(wrapper(x_normal), ref_out) - - # eval mode - x_empty = torch.randn(0, in_cha, in_h, in_w) - wrapper = Conv2d(in_cha, out_cha, k, stride=s, padding=p, dilation=d) - wrapper.eval() - wrapper(x_empty) - - -def test_conv_transposed_2d(): - test_cases = OrderedDict([('in_w', [10, 20]), ('in_h', [10, 20]), - ('in_channel', [1, 3]), ('out_channel', [1, 3]), - ('kernel_size', [3, 5]), ('stride', [1, 2]), - ('padding', [0, 1]), ('dilation', [1, 2])]) - - for in_h, in_w, in_cha, out_cha, k, s, p, d in product( - *list(test_cases.values())): - # wrapper op with 0-dim input - x_empty = torch.randn(0, in_cha, in_h, in_w, requires_grad=True) - # out padding must be smaller than either stride or dilation - op = min(s, d) - 1 - torch.manual_seed(0) - wrapper = ConvTranspose2d( - in_cha, - out_cha, - k, - stride=s, - padding=p, - dilation=d, - output_padding=op) - wrapper_out = wrapper(x_empty) - - # torch op with 3-dim input as shape reference - x_normal = torch.randn(3, in_cha, in_h, in_w) - torch.manual_seed(0) - ref = nn.ConvTranspose2d( - in_cha, - out_cha, - k, - stride=s, - padding=p, - dilation=d, - output_padding=op) - ref_out = ref(x_normal) - - assert wrapper_out.shape[0] == 0 - assert wrapper_out.shape[1:] == ref_out.shape[1:] - - wrapper_out.sum().backward() - assert wrapper.weight.grad is not None - assert wrapper.weight.grad.shape == wrapper.weight.shape - - assert torch.equal(wrapper(x_normal), ref_out) - - # eval mode - x_empty = torch.randn(0, in_cha, in_h, in_w) - wrapper = ConvTranspose2d( - in_cha, out_cha, k, stride=s, padding=p, dilation=d, output_padding=op) - wrapper.eval() - wrapper(x_empty) - - -def test_max_pool_2d(): - test_cases = OrderedDict([('in_w', [10, 20]), ('in_h', [10, 20]), - ('in_channel', [1, 3]), ('out_channel', [1, 3]), - ('kernel_size', [3, 5]), ('stride', [1, 2]), - ('padding', [0, 1]), ('dilation', [1, 2])]) - - for in_h, in_w, in_cha, out_cha, k, s, p, d in product( - *list(test_cases.values())): - # wrapper op with 0-dim input - x_empty = torch.randn(0, in_cha, in_h, in_w, requires_grad=True) - wrapper = MaxPool2d(k, stride=s, padding=p, dilation=d) - wrapper_out = wrapper(x_empty) - - # torch op with 3-dim input as shape reference - x_normal = torch.randn(3, in_cha, in_h, in_w) - ref = nn.MaxPool2d(k, stride=s, padding=p, dilation=d) - ref_out = ref(x_normal) - - assert wrapper_out.shape[0] == 0 - assert wrapper_out.shape[1:] == ref_out.shape[1:] - - assert torch.equal(wrapper(x_normal), ref_out) - - -def test_linear(): - test_cases = OrderedDict([ - ('in_w', [10, 20]), - ('in_h', [10, 20]), - ('in_feature', [1, 3]), - ('out_feature', [1, 3]), - ]) - - for in_h, in_w, in_feature, out_feature in product( - *list(test_cases.values())): - # wrapper op with 0-dim input - x_empty = torch.randn(0, in_feature, requires_grad=True) - torch.manual_seed(0) - wrapper = Linear(in_feature, out_feature) - wrapper_out = wrapper(x_empty) - - # torch op with 3-dim input as shape reference - x_normal = torch.randn(3, in_feature) - torch.manual_seed(0) - ref = nn.Linear(in_feature, out_feature) - ref_out = ref(x_normal) - - assert wrapper_out.shape[0] == 0 - assert wrapper_out.shape[1:] == ref_out.shape[1:] - - wrapper_out.sum().backward() - assert wrapper.weight.grad is not None - assert wrapper.weight.grad.shape == wrapper.weight.shape - - assert torch.equal(wrapper(x_normal), ref_out) - - # eval mode - x_empty = torch.randn(0, in_feature) - wrapper = Linear(in_feature, out_feature) - wrapper.eval() - wrapper(x_empty) - - -def test_nn_op_forward_called(): - torch.__version__ = '1.4.1' - - for m in ['Conv2d', 'ConvTranspose2d', 'MaxPool2d']: - with patch(f'torch.nn.{m}.forward') as nn_module_forward: - # randn input - x_empty = torch.randn(0, 3, 10, 10) - wrapper = eval(m)(3, 2, 1) - wrapper(x_empty) - nn_module_forward.assert_called_with(x_empty) - - # non-randn input - x_normal = torch.randn(1, 3, 10, 10) - wrapper = eval(m)(3, 2, 1) - wrapper(x_normal) - nn_module_forward.assert_called_with(x_normal) - - with patch('torch.nn.Linear.forward') as nn_module_forward: - # randn input - x_empty = torch.randn(0, 3) - wrapper = Linear(3, 3) - wrapper(x_empty) - nn_module_forward.assert_not_called() - - # non-randn input - x_normal = torch.randn(1, 3) - wrapper = Linear(3, 3) - wrapper(x_normal) - nn_module_forward.assert_called_with(x_normal) diff --git a/tools/pytorch2onnx.py b/tools/pytorch2onnx.py index 0425b280a64..4a251be3451 100644 --- a/tools/pytorch2onnx.py +++ b/tools/pytorch2onnx.py @@ -4,12 +4,12 @@ import mmcv import onnx import torch +from mmcv.ops import RoIAlign, RoIPool from mmcv.runner import load_checkpoint from onnx import optimizer from torch.onnx import OperatorExportTypes from mmdet.models import build_detector -from mmdet.ops import RoIAlign, RoIPool def export_onnx_model(model, inputs, passes):