From 7b9098042eb9914839b3016dfd03e52f113181e0 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Sun, 2 Sep 2018 22:40:43 +0800 Subject: [PATCH 01/81] set up the codebase skeleton (WIP) --- .gitignore | 3 + compile.sh | 22 ++ mmdet/__init__.py | 1 + mmdet/core/__init__.py | 6 + mmdet/core/anchor_generator.py | 80 +++++ mmdet/core/bbox_ops/__init__.py | 12 + mmdet/core/bbox_ops/geometry.py | 63 ++++ mmdet/core/bbox_ops/sampling.py | 255 ++++++++++++++ mmdet/core/bbox_ops/transforms.py | 128 +++++++ mmdet/core/eval/__init__.py | 13 + mmdet/core/eval/bbox_overlaps.py | 49 +++ mmdet/core/eval/class_names.py | 103 ++++++ mmdet/core/eval/mean_ap.py | 372 ++++++++++++++++++++ mmdet/core/eval/recall.py | 185 ++++++++++ mmdet/core/hooks.py | 246 +++++++++++++ mmdet/core/mask_ops/__init__.py | 10 + mmdet/core/mask_ops/segms.py | 271 ++++++++++++++ mmdet/core/mask_ops/utils.py | 35 ++ mmdet/core/post_processing/__init__.py | 8 + mmdet/core/post_processing/bbox_nms.py | 54 +++ mmdet/core/post_processing/merge_augs.py | 96 +++++ mmdet/core/targets/__init__.py | 5 + mmdet/core/targets/anchor_target.py | 2 + mmdet/core/targets/bbox_target.py | 2 + mmdet/core/targets/mask_target.py | 2 + mmdet/datasets/__init__.py | 4 + mmdet/datasets/coco.py | 288 +++++++++++++++ mmdet/datasets/collate.py | 57 +++ mmdet/datasets/sampler.py | 134 +++++++ mmdet/datasets/transforms.py | 208 +++++++++++ mmdet/datasets/utils/__init__.py | 2 + mmdet/datasets/utils/data_container.py | 80 +++++ mmdet/datasets/utils/misc.py | 62 ++++ mmdet/models/__init__.py | 0 mmdet/models/backbones/__init__.py | 1 + mmdet/models/backbones/resnet.py | 325 +++++++++++++++++ mmdet/models/bbox_heads/__init__.py | 3 + mmdet/models/bbox_heads/bbox_head.py | 123 +++++++ mmdet/models/builder.py | 47 +++ mmdet/models/common/__init__.py | 4 + mmdet/models/common/conv_module.py | 95 +++++ mmdet/models/common/norm.py | 17 + mmdet/models/detectors/__init__.py | 0 mmdet/models/detectors/rpn.py | 100 ++++++ mmdet/models/detectors/two_stage.py | 329 +++++++++++++++++ mmdet/models/mask_heads/__init__.py | 3 + mmdet/models/mask_heads/fcn_mask_head.py | 175 +++++++++ mmdet/models/misc.py | 9 + mmdet/models/necks/__init__.py | 3 + mmdet/models/necks/fpn.py | 125 +++++++ mmdet/models/roi_extractors/__init__.py | 3 + mmdet/models/roi_extractors/single_level.py | 73 ++++ mmdet/models/rpn_heads/__init__.py | 3 + mmdet/models/rpn_heads/rpn_head.py | 237 +++++++++++++ mmdet/models/weight_init.py | 39 ++ mmdet/nn/__init__.py | 1 + mmdet/nn/parallel/__init__.py | 7 + mmdet/nn/parallel/_functions.py | 74 ++++ mmdet/nn/parallel/data_parallel.py | 9 + mmdet/nn/parallel/distributed.py | 9 + mmdet/nn/parallel/scatter_gather.py | 48 +++ mmdet/ops/__init__.py | 3 + mmdet/ops/nms/.gitignore | 1 + mmdet/ops/nms/Makefile | 8 + mmdet/ops/nms/__init__.py | 1 + mmdet/ops/nms/cpu_nms.pyx | 68 ++++ mmdet/ops/nms/cpu_soft_nms.pyx | 123 +++++++ mmdet/ops/nms/gpu_nms.hpp | 3 + mmdet/ops/nms/gpu_nms.pyx | 43 +++ mmdet/ops/nms/nms_kernel.cu | 188 ++++++++++ mmdet/ops/nms/nms_wrapper.py | 46 +++ mmdet/ops/nms/setup.py | 91 +++++ mmdet/ops/roi_align/__init__.py | 2 + mmdet/ops/roi_align/functions/__init__.py | 0 mmdet/ops/roi_align/functions/roi_align.py | 61 ++++ mmdet/ops/roi_align/gradcheck.py | 29 ++ mmdet/ops/roi_align/modules/__init__.py | 0 mmdet/ops/roi_align/modules/roi_align.py | 16 + mmdet/ops/roi_align/setup.py | 12 + mmdet/ops/roi_align/src/roi_align_cuda.cpp | 85 +++++ mmdet/ops/roi_align/src/roi_align_kernel.cu | 319 +++++++++++++++++ mmdet/ops/roi_pool/__init__.py | 2 + mmdet/ops/roi_pool/functions/__init__.py | 0 mmdet/ops/roi_pool/functions/roi_pool.py | 56 +++ mmdet/ops/roi_pool/gradcheck.py | 15 + mmdet/ops/roi_pool/modules/__init__.py | 0 mmdet/ops/roi_pool/modules/roi_pool.py | 14 + mmdet/ops/roi_pool/setup.py | 12 + mmdet/ops/roi_pool/src/roi_pool_cuda.cpp | 86 +++++ mmdet/ops/roi_pool/src/roi_pool_kernel.cu | 193 ++++++++++ mmdet/version.py | 1 + setup.py | 40 +++ 92 files changed, 6238 insertions(+) create mode 100755 compile.sh create mode 100644 mmdet/__init__.py create mode 100644 mmdet/core/__init__.py create mode 100644 mmdet/core/anchor_generator.py create mode 100644 mmdet/core/bbox_ops/__init__.py create mode 100644 mmdet/core/bbox_ops/geometry.py create mode 100644 mmdet/core/bbox_ops/sampling.py create mode 100644 mmdet/core/bbox_ops/transforms.py create mode 100644 mmdet/core/eval/__init__.py create mode 100644 mmdet/core/eval/bbox_overlaps.py create mode 100644 mmdet/core/eval/class_names.py create mode 100644 mmdet/core/eval/mean_ap.py create mode 100644 mmdet/core/eval/recall.py create mode 100644 mmdet/core/hooks.py create mode 100644 mmdet/core/mask_ops/__init__.py create mode 100644 mmdet/core/mask_ops/segms.py create mode 100644 mmdet/core/mask_ops/utils.py create mode 100644 mmdet/core/post_processing/__init__.py create mode 100644 mmdet/core/post_processing/bbox_nms.py create mode 100644 mmdet/core/post_processing/merge_augs.py create mode 100644 mmdet/core/targets/__init__.py create mode 100644 mmdet/core/targets/anchor_target.py create mode 100644 mmdet/core/targets/bbox_target.py create mode 100644 mmdet/core/targets/mask_target.py create mode 100644 mmdet/datasets/__init__.py create mode 100644 mmdet/datasets/coco.py create mode 100644 mmdet/datasets/collate.py create mode 100644 mmdet/datasets/sampler.py create mode 100644 mmdet/datasets/transforms.py create mode 100644 mmdet/datasets/utils/__init__.py create mode 100644 mmdet/datasets/utils/data_container.py create mode 100644 mmdet/datasets/utils/misc.py create mode 100644 mmdet/models/__init__.py create mode 100644 mmdet/models/backbones/__init__.py create mode 100644 mmdet/models/backbones/resnet.py create mode 100644 mmdet/models/bbox_heads/__init__.py create mode 100644 mmdet/models/bbox_heads/bbox_head.py create mode 100644 mmdet/models/builder.py create mode 100644 mmdet/models/common/__init__.py create mode 100644 mmdet/models/common/conv_module.py create mode 100644 mmdet/models/common/norm.py create mode 100644 mmdet/models/detectors/__init__.py create mode 100644 mmdet/models/detectors/rpn.py create mode 100644 mmdet/models/detectors/two_stage.py create mode 100644 mmdet/models/mask_heads/__init__.py create mode 100644 mmdet/models/mask_heads/fcn_mask_head.py create mode 100644 mmdet/models/misc.py create mode 100644 mmdet/models/necks/__init__.py create mode 100644 mmdet/models/necks/fpn.py create mode 100644 mmdet/models/roi_extractors/__init__.py create mode 100644 mmdet/models/roi_extractors/single_level.py create mode 100644 mmdet/models/rpn_heads/__init__.py create mode 100644 mmdet/models/rpn_heads/rpn_head.py create mode 100644 mmdet/models/weight_init.py create mode 100644 mmdet/nn/__init__.py create mode 100644 mmdet/nn/parallel/__init__.py create mode 100644 mmdet/nn/parallel/_functions.py create mode 100644 mmdet/nn/parallel/data_parallel.py create mode 100644 mmdet/nn/parallel/distributed.py create mode 100644 mmdet/nn/parallel/scatter_gather.py create mode 100644 mmdet/ops/__init__.py create mode 100644 mmdet/ops/nms/.gitignore create mode 100644 mmdet/ops/nms/Makefile create mode 100644 mmdet/ops/nms/__init__.py create mode 100644 mmdet/ops/nms/cpu_nms.pyx create mode 100644 mmdet/ops/nms/cpu_soft_nms.pyx create mode 100644 mmdet/ops/nms/gpu_nms.hpp create mode 100644 mmdet/ops/nms/gpu_nms.pyx create mode 100644 mmdet/ops/nms/nms_kernel.cu create mode 100644 mmdet/ops/nms/nms_wrapper.py create mode 100644 mmdet/ops/nms/setup.py create mode 100644 mmdet/ops/roi_align/__init__.py create mode 100644 mmdet/ops/roi_align/functions/__init__.py create mode 100644 mmdet/ops/roi_align/functions/roi_align.py create mode 100644 mmdet/ops/roi_align/gradcheck.py create mode 100644 mmdet/ops/roi_align/modules/__init__.py create mode 100644 mmdet/ops/roi_align/modules/roi_align.py create mode 100644 mmdet/ops/roi_align/setup.py create mode 100644 mmdet/ops/roi_align/src/roi_align_cuda.cpp create mode 100644 mmdet/ops/roi_align/src/roi_align_kernel.cu create mode 100644 mmdet/ops/roi_pool/__init__.py create mode 100644 mmdet/ops/roi_pool/functions/__init__.py create mode 100644 mmdet/ops/roi_pool/functions/roi_pool.py create mode 100644 mmdet/ops/roi_pool/gradcheck.py create mode 100644 mmdet/ops/roi_pool/modules/__init__.py create mode 100644 mmdet/ops/roi_pool/modules/roi_pool.py create mode 100644 mmdet/ops/roi_pool/setup.py create mode 100644 mmdet/ops/roi_pool/src/roi_pool_cuda.cpp create mode 100644 mmdet/ops/roi_pool/src/roi_pool_kernel.cu create mode 100644 mmdet/version.py create mode 100644 setup.py diff --git a/.gitignore b/.gitignore index 894a44cc066..ffbae97a51e 100644 --- a/.gitignore +++ b/.gitignore @@ -102,3 +102,6 @@ venv.bak/ # mypy .mypy_cache/ + +# cython generated cpp +mmdet/ops/nms/*.cpp \ No newline at end of file diff --git a/compile.sh b/compile.sh new file mode 100755 index 00000000000..8bf418054a2 --- /dev/null +++ b/compile.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +PYTHON=${PYTHON:-"python"} + +echo "Building roi align op..." +cd mmdet/ops/roi_align +if [ -d "build" ]; then + rm -r build +fi +$PYTHON setup.py build_ext --inplace + +echo "Building roi pool op..." +cd ../roi_pool +if [ -d "build" ]; then + rm -r build +fi +$PYTHON setup.py build_ext --inplace + +echo "Building nms op..." +cd ../nms +make clean +make PYTHON=${PYTHON} diff --git a/mmdet/__init__.py b/mmdet/__init__.py new file mode 100644 index 00000000000..58f3ace6c03 --- /dev/null +++ b/mmdet/__init__.py @@ -0,0 +1 @@ +from .version import __version__ diff --git a/mmdet/core/__init__.py b/mmdet/core/__init__.py new file mode 100644 index 00000000000..7992d8deb3b --- /dev/null +++ b/mmdet/core/__init__.py @@ -0,0 +1,6 @@ +from .anchor_generator import * +from .bbox_ops import * +from .mask_ops import * +from .eval import * +from .nn import * +from .targets import * diff --git a/mmdet/core/anchor_generator.py b/mmdet/core/anchor_generator.py new file mode 100644 index 00000000000..e7a1fa256fb --- /dev/null +++ b/mmdet/core/anchor_generator.py @@ -0,0 +1,80 @@ +import torch + + +class AnchorGenerator(object): + + def __init__(self, base_size, scales, ratios, scale_major=True): + self.base_size = base_size + self.scales = torch.Tensor(scales) + self.ratios = torch.Tensor(ratios) + self.scale_major = scale_major + self.base_anchors = self.gen_base_anchors() + + @property + def num_base_anchors(self): + return self.base_anchors.size(0) + + def gen_base_anchors(self): + base_anchor = torch.Tensor( + [0, 0, self.base_size - 1, self.base_size - 1]) + + w = base_anchor[2] - base_anchor[0] + 1 + h = base_anchor[3] - base_anchor[1] + 1 + x_ctr = base_anchor[0] + 0.5 * (w - 1) + y_ctr = base_anchor[1] + 0.5 * (h - 1) + + h_ratios = torch.sqrt(self.ratios) + w_ratios = 1 / h_ratios + if self.scale_major: + ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1) + hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1) + else: + ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1) + hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1) + + base_anchors = torch.stack( + [ + x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), + x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1) + ], + dim=-1).round() + + return base_anchors + + def _meshgrid(self, x, y, row_major=True): + xx = x.repeat(len(y)) + yy = y.view(-1, 1).repeat(1, len(x)).view(-1) + if row_major: + return xx, yy + else: + return yy, xx + + def grid_anchors(self, featmap_size, stride=16, device='cuda'): + feat_h, feat_w = featmap_size + shift_x = torch.arange(0, feat_w, device=device) * stride + shift_y = torch.arange(0, feat_h, device=device) * stride + shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) + shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1) + # first feat_w elements correspond to the first row of shifts + # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get + # shifted anchors (K, A, 4), reshape to (K*A, 4) + base_anchors = self.base_anchors.to(device) + all_anchors = base_anchors[None, :, :] + shifts[:, None, :] + all_anchors = all_anchors.view(-1, 4) + # first A rows correspond to A anchors of (0, 0) in feature map, + # then (0, 1), (0, 2), ... + return all_anchors + + def valid_flags(self, featmap_size, valid_size, device='cuda'): + feat_h, feat_w = featmap_size + valid_h, valid_w = valid_size + assert valid_h <= feat_h and valid_w <= feat_w + valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device) + valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device) + valid_x[:valid_w] = 1 + valid_y[:valid_h] = 1 + valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) + valid = valid_xx & valid_yy + valid = valid[:, None].expand( + valid.size(0), self.num_base_anchors).contiguous().view(-1) + return valid diff --git a/mmdet/core/bbox_ops/__init__.py b/mmdet/core/bbox_ops/__init__.py new file mode 100644 index 00000000000..4bf9aeb74a5 --- /dev/null +++ b/mmdet/core/bbox_ops/__init__.py @@ -0,0 +1,12 @@ +from .geometry import bbox_overlaps +from .sampling import (random_choice, bbox_assign, bbox_assign_via_overlaps, + bbox_sampling, sample_positives, sample_negatives) +from .transforms import (bbox_transform, bbox_transform_inv, bbox_flip, + bbox_mapping, bbox_mapping_back, bbox2roi, roi2bbox) + +__all__ = [ + 'bbox_overlaps', 'random_choice', 'bbox_assign', + 'bbox_assign_via_overlaps', 'bbox_sampling', 'sample_positives', + 'sample_negatives', 'bbox_transform', 'bbox_transform_inv', 'bbox_flip', + 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox' +] diff --git a/mmdet/core/bbox_ops/geometry.py b/mmdet/core/bbox_ops/geometry.py new file mode 100644 index 00000000000..a852a06fb0c --- /dev/null +++ b/mmdet/core/bbox_ops/geometry.py @@ -0,0 +1,63 @@ +import torch + + +def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False): + """Calculate overlap between two set of bboxes. + + If ``is_aligned`` is ``False``, then calculate the ious between each bbox + of bboxes1 and bboxes2, otherwise the ious between each aligned pair of + bboxes1 and bboxes2. + + Args: + bboxes1 (Tensor): shape (m, 4) + bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n + must be equal. + mode (str): "iou" (intersection over union) or iof (intersection over + foreground). + + Returns: + ious(Tensor): shape (n, k) if is_aligned == False else shape (n, 1) + """ + + assert mode in ['iou', 'iof'] + + rows = bboxes1.size(0) + cols = bboxes2.size(0) + if is_aligned: + assert rows == cols + + if rows * cols == 0: + return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols) + + if is_aligned: + lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2] + rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2] + + wh = (rb - lt + 1).clamp(min=0) # [rows, 2] + overlap = wh[:, 0] * wh[:, 1] + area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( + bboxes1[:, 3] - bboxes1[:, 1] + 1) + + if mode == 'iou': + area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( + bboxes2[:, 3] - bboxes2[:, 1] + 1) + ious = overlap / (area1 + area2 - overlap) + else: + ious = overlap / area1 + else: + lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2] + rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2] + + wh = (rb - lt + 1).clamp(min=0) # [rows, cols, 2] + overlap = wh[:, :, 0] * wh[:, :, 1] + area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( + bboxes1[:, 3] - bboxes1[:, 1] + 1) + + if mode == 'iou': + area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( + bboxes2[:, 3] - bboxes2[:, 1] + 1) + ious = overlap / (area1[:, None] + area2 - overlap) + else: + ious = overlap / (area1[:, None]) + + return ious diff --git a/mmdet/core/bbox_ops/sampling.py b/mmdet/core/bbox_ops/sampling.py new file mode 100644 index 00000000000..9825e3bd15e --- /dev/null +++ b/mmdet/core/bbox_ops/sampling.py @@ -0,0 +1,255 @@ +import numpy as np +import torch + +from .geometry import bbox_overlaps + + +def random_choice(gallery, num): + assert len(gallery) >= num + if isinstance(gallery, list): + gallery = np.array(gallery) + cands = np.arange(len(gallery)) + np.random.shuffle(cands) + rand_inds = cands[:num] + if not isinstance(gallery, np.ndarray): + rand_inds = torch.from_numpy(rand_inds).long() + if gallery.is_cuda: + rand_inds = rand_inds.cuda(gallery.get_device()) + return gallery[rand_inds] + + +def bbox_assign(proposals, + gt_bboxes, + gt_crowd_bboxes=None, + gt_labels=None, + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=.0, + crowd_thr=-1): + """Assign a corresponding gt bbox or background to each proposal/anchor + This function assign a gt bbox to every proposal, each proposals will be + assigned with -1, 0, or a positive number. -1 means don't care, 0 means + negative sample, positive number is the index (1-based) of assigned gt. + If gt_crowd_bboxes is not None, proposals which have iof(intersection over foreground) + with crowd bboxes over crowd_thr will be ignored + Args: + proposals(Tensor): proposals or RPN anchors, shape (n, 4) + gt_bboxes(Tensor): shape (k, 4) + gt_crowd_bboxes(Tensor): shape(m, 4) + gt_labels(Tensor, optional): shape (k, ) + pos_iou_thr(float): iou threshold for positive bboxes + neg_iou_thr(float or tuple): iou threshold for negative bboxes + min_pos_iou(float): minimum iou for a bbox to be considered as a positive bbox, + for RPN, it is usually set as 0, for Fast R-CNN, + it is usually set as pos_iou_thr + crowd_thr: ignore proposals which have iof(intersection over foreground) with + crowd bboxes over crowd_thr + Returns: + tuple: (assigned_gt_inds, argmax_overlaps, max_overlaps), shape (n, ) + """ + + # calculate overlaps between the proposals and the gt boxes + overlaps = bbox_overlaps(proposals, gt_bboxes) + if overlaps.numel() == 0: + raise ValueError('No gt bbox or proposals') + + # ignore proposals according to crowd bboxes + if (crowd_thr > 0) and (gt_crowd_bboxes is + not None) and (gt_crowd_bboxes.numel() > 0): + crowd_overlaps = bbox_overlaps(proposals, gt_crowd_bboxes, mode='iof') + crowd_max_overlaps, _ = crowd_overlaps.max(dim=1) + crowd_bboxes_inds = torch.nonzero( + crowd_max_overlaps > crowd_thr).long() + if crowd_bboxes_inds.numel() > 0: + overlaps[crowd_bboxes_inds, :] = -1 + + return bbox_assign_via_overlaps(overlaps, gt_labels, pos_iou_thr, + neg_iou_thr, min_pos_iou) + + +def bbox_assign_via_overlaps(overlaps, + gt_labels=None, + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=.0): + """Assign a corresponding gt bbox or background to each proposal/anchor + This function assign a gt bbox to every proposal, each proposals will be + assigned with -1, 0, or a positive number. -1 means don't care, 0 means + negative sample, positive number is the index (1-based) of assigned gt. + The assignment is done in following steps, the order matters: + 1. assign every anchor to -1 + 2. assign proposals whose iou with all gts < neg_iou_thr to 0 + 3. for each anchor, if the iou with its nearest gt >= pos_iou_thr, + assign it to that bbox + 4. for each gt bbox, assign its nearest proposals(may be more than one) + to itself + Args: + overlaps(Tensor): overlaps between n proposals and k gt_bboxes, shape(n, k) + gt_labels(Tensor, optional): shape (k, ) + pos_iou_thr(float): iou threshold for positive bboxes + neg_iou_thr(float or tuple): iou threshold for negative bboxes + min_pos_iou(float): minimum iou for a bbox to be considered as a positive bbox, + for RPN, it is usually set as 0, for Fast R-CNN, + it is usually set as pos_iou_thr + Returns: + tuple: (assigned_gt_inds, argmax_overlaps, max_overlaps), shape (n, ) + """ + num_bboxes, num_gts = overlaps.size(0), overlaps.size(1) + # 1. assign -1 by default + assigned_gt_inds = overlaps.new(num_bboxes).long().fill_(-1) + + if overlaps.numel() == 0: + raise ValueError('No gt bbox or proposals') + + assert overlaps.size() == (num_bboxes, num_gts) + # for each anchor, which gt best overlaps with it + # for each anchor, the max iou of all gts + max_overlaps, argmax_overlaps = overlaps.max(dim=1) + # for each gt, which anchor best overlaps with it + # for each gt, the max iou of all proposals + gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=0) + + # 2. assign negative: below + if isinstance(neg_iou_thr, float): + assigned_gt_inds[(max_overlaps >= 0) + & (max_overlaps < neg_iou_thr)] = 0 + elif isinstance(neg_iou_thr, tuple): + assert len(neg_iou_thr) == 2 + assigned_gt_inds[(max_overlaps >= neg_iou_thr[0]) + & (max_overlaps < neg_iou_thr[1])] = 0 + + # 3. assign positive: above positive IoU threshold + pos_inds = max_overlaps >= pos_iou_thr + assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1 + + # 4. assign fg: for each gt, proposals with highest IoU + for i in range(num_gts): + if gt_max_overlaps[i] >= min_pos_iou: + assigned_gt_inds[overlaps[:, i] == gt_max_overlaps[i]] = i + 1 + + if gt_labels is None: + return assigned_gt_inds, argmax_overlaps, max_overlaps + else: + assigned_labels = assigned_gt_inds.new(num_bboxes).fill_(0) + pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze() + if pos_inds.numel() > 0: + assigned_labels[pos_inds] = gt_labels[assigned_gt_inds[pos_inds] - + 1] + return assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps + + +def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True): + """Balance sampling for positive bboxes/anchors + 1. calculate average positive num for each gt: num_per_gt + 2. sample at most num_per_gt positives for each gt + 3. random sampling from rest anchors if not enough fg + """ + pos_inds = torch.nonzero(assigned_gt_inds > 0) + if pos_inds.numel() != 0: + pos_inds = pos_inds.squeeze(1) + if pos_inds.numel() <= num_expected: + return pos_inds + elif not balance_sampling: + return random_choice(pos_inds, num_expected) + else: + unique_gt_inds = torch.unique(assigned_gt_inds[pos_inds].cpu()) + num_gts = len(unique_gt_inds) + num_per_gt = int(round(num_expected / float(num_gts)) + 1) + sampled_inds = [] + for i in unique_gt_inds: + inds = torch.nonzero(assigned_gt_inds == i.item()) + if inds.numel() != 0: + inds = inds.squeeze(1) + else: + continue + if len(inds) > num_per_gt: + inds = random_choice(inds, num_per_gt) + sampled_inds.append(inds) + sampled_inds = torch.cat(sampled_inds) + if len(sampled_inds) < num_expected: + num_extra = num_expected - len(sampled_inds) + extra_inds = np.array( + list(set(pos_inds.cpu()) - set(sampled_inds.cpu()))) + if len(extra_inds) > num_extra: + extra_inds = random_choice(extra_inds, num_extra) + extra_inds = torch.from_numpy(extra_inds).to( + assigned_gt_inds.device).long() + sampled_inds = torch.cat([sampled_inds, extra_inds]) + elif len(sampled_inds) > num_expected: + sampled_inds = random_choice(sampled_inds, num_expected) + return sampled_inds + + +def sample_negatives(assigned_gt_inds, + num_expected, + max_overlaps=None, + balance_thr=0, + hard_fraction=0.5): + """Balance sampling for negative bboxes/anchors + negative samples are split into 2 set: hard(balance_thr <= iou < neg_iou_thr) + and easy(iou < balance_thr), around equal number of bg are sampled + from each set. + """ + neg_inds = torch.nonzero(assigned_gt_inds == 0) + if neg_inds.numel() != 0: + neg_inds = neg_inds.squeeze(1) + if len(neg_inds) <= num_expected: + return neg_inds + elif balance_thr <= 0: + # uniform sampling among all negative samples + return random_choice(neg_inds, num_expected) + else: + assert max_overlaps is not None + max_overlaps = max_overlaps.cpu().numpy() + # balance sampling for negative samples + neg_set = set(neg_inds.cpu().numpy()) + easy_set = set( + np.where( + np.logical_and(max_overlaps >= 0, + max_overlaps < balance_thr))[0]) + hard_set = set(np.where(max_overlaps >= balance_thr)[0]) + easy_neg_inds = list(easy_set & neg_set) + hard_neg_inds = list(hard_set & neg_set) + + num_expected_hard = int(num_expected * hard_fraction) + if len(hard_neg_inds) > num_expected_hard: + sampled_hard_inds = random_choice(hard_neg_inds, num_expected_hard) + else: + sampled_hard_inds = np.array(hard_neg_inds, dtype=np.int) + num_expected_easy = num_expected - len(sampled_hard_inds) + if len(easy_neg_inds) > num_expected_easy: + sampled_easy_inds = random_choice(easy_neg_inds, num_expected_easy) + else: + sampled_easy_inds = np.array(easy_neg_inds, dtype=np.int) + sampled_inds = np.concatenate((sampled_easy_inds, sampled_hard_inds)) + if len(sampled_inds) < num_expected: + num_extra = num_expected - len(sampled_inds) + extra_inds = np.array(list(neg_set - set(sampled_inds))) + if len(extra_inds) > num_extra: + extra_inds = random_choice(extra_inds, num_extra) + sampled_inds = np.concatenate((sampled_inds, extra_inds)) + sampled_inds = torch.from_numpy(sampled_inds).long().to( + assigned_gt_inds.device) + return sampled_inds + + +def bbox_sampling(assigned_gt_inds, + num_expected, + pos_fraction, + neg_pos_ub, + pos_balance_sampling=True, + max_overlaps=None, + neg_balance_thr=0, + neg_hard_fraction=0.5): + num_expected_pos = int(num_expected * pos_fraction) + pos_inds = sample_positives(assigned_gt_inds, num_expected_pos, + pos_balance_sampling) + num_sampled_pos = pos_inds.numel() + num_neg_max = int( + neg_pos_ub * + num_sampled_pos) if num_sampled_pos > 0 else int(neg_pos_ub) + num_expected_neg = min(num_neg_max, num_expected - num_sampled_pos) + neg_inds = sample_negatives(assigned_gt_inds, num_expected_neg, + max_overlaps, neg_balance_thr, + neg_hard_fraction) + return pos_inds, neg_inds diff --git a/mmdet/core/bbox_ops/transforms.py b/mmdet/core/bbox_ops/transforms.py new file mode 100644 index 00000000000..6f83a1dc56e --- /dev/null +++ b/mmdet/core/bbox_ops/transforms.py @@ -0,0 +1,128 @@ +import mmcv +import numpy as np +import torch + + +def bbox_transform(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]): + assert proposals.size() == gt.size() + + proposals = proposals.float() + gt = gt.float() + px = (proposals[..., 0] + proposals[..., 2]) * 0.5 + py = (proposals[..., 1] + proposals[..., 3]) * 0.5 + pw = proposals[..., 2] - proposals[..., 0] + 1.0 + ph = proposals[..., 3] - proposals[..., 1] + 1.0 + + gx = (gt[..., 0] + gt[..., 2]) * 0.5 + gy = (gt[..., 1] + gt[..., 3]) * 0.5 + gw = gt[..., 2] - gt[..., 0] + 1.0 + gh = gt[..., 3] - gt[..., 1] + 1.0 + + dx = (gx - px) / pw + dy = (gy - py) / ph + dw = torch.log(gw / pw) + dh = torch.log(gh / ph) + deltas = torch.stack([dx, dy, dw, dh], dim=-1) + + means = deltas.new_tensor(means).unsqueeze(0) + stds = deltas.new_tensor(stds).unsqueeze(0) + deltas = deltas.sub_(means).div_(stds) + + return deltas + + +def bbox_transform_inv(rois, + deltas, + means=[0, 0, 0, 0], + stds=[1, 1, 1, 1], + max_shape=None, + wh_ratio_clip=16 / 1000): + means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4) + stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4) + denorm_deltas = deltas * stds + means + dx = denorm_deltas[:, 0::4] + dy = denorm_deltas[:, 1::4] + dw = denorm_deltas[:, 2::4] + dh = denorm_deltas[:, 3::4] + max_ratio = np.abs(np.log(wh_ratio_clip)) + dw = dw.clamp(min=-max_ratio, max=max_ratio) + dh = dh.clamp(min=-max_ratio, max=max_ratio) + px = ((rois[:, 0] + rois[:, 2]) * 0.5).unsqueeze(1).expand_as(dx) + py = ((rois[:, 1] + rois[:, 3]) * 0.5).unsqueeze(1).expand_as(dy) + pw = (rois[:, 2] - rois[:, 0] + 1.0).unsqueeze(1).expand_as(dw) + ph = (rois[:, 3] - rois[:, 1] + 1.0).unsqueeze(1).expand_as(dh) + gw = pw * dw.exp() + gh = ph * dh.exp() + gx = torch.addcmul(px, 1, pw, dx) # gx = px + pw * dx + gy = torch.addcmul(py, 1, ph, dy) # gy = py + ph * dy + x1 = gx - gw * 0.5 + 0.5 + y1 = gy - gh * 0.5 + 0.5 + x2 = gx + gw * 0.5 - 0.5 + y2 = gy + gh * 0.5 - 0.5 + if max_shape is not None: + x1 = x1.clamp(min=0, max=max_shape[1] - 1) + y1 = y1.clamp(min=0, max=max_shape[0] - 1) + x2 = x2.clamp(min=0, max=max_shape[1] - 1) + y2 = y2.clamp(min=0, max=max_shape[0] - 1) + bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view_as(deltas) + return bboxes + + +def bbox_flip(bboxes, img_shape): + """Flip bboxes horizontally + Args: + bboxes(Tensor): shape (..., 4*k) + img_shape(Tensor): image shape + """ + if isinstance(bboxes, torch.Tensor): + assert bboxes.shape[-1] % 4 == 0 + flipped = bboxes.clone() + flipped[:, 0::4] = img_shape[1] - bboxes[:, 2::4] - 1 + flipped[:, 2::4] = img_shape[1] - bboxes[:, 0::4] - 1 + return flipped + elif isinstance(bboxes, np.ndarray): + return mmcv.bbox_flip(bboxes, img_shape) + + +def bbox_mapping(bboxes, img_shape, flip): + """Map bboxes from the original image scale to testing scale""" + new_bboxes = bboxes * img_shape[-1] + if flip: + new_bboxes = bbox_flip(new_bboxes, img_shape) + return new_bboxes + + +def bbox_mapping_back(bboxes, img_shape, flip): + """Map bboxes from testing scale to original image scale""" + new_bboxes = bbox_flip(bboxes, img_shape) if flip else bboxes + new_bboxes = new_bboxes / img_shape[-1] + return new_bboxes + + +def bbox2roi(bbox_list): + """Convert a list of bboxes to roi format. + Args: + bbox_list (Tensor): a list of bboxes corresponding to a list of images + Returns: + Tensor: shape (n, 5), [batch_ind, x1, y1, x2, y2] + """ + rois_list = [] + for img_id, bboxes in enumerate(bbox_list): + if bboxes.size(0) > 0: + img_inds = bboxes.new_full((bboxes.size(0), 1), img_id) + rois = torch.cat([img_inds, bboxes[:, :4]], dim=-1) + else: + rois = bboxes.new_zeros((0, 5)) + rois_list.append(rois) + rois = torch.cat(rois_list, 0) + return rois + + +def roi2bbox(rois): + bbox_list = [] + img_ids = torch.unique(rois[:, 0].cpu(), sorted=True) + for img_id in img_ids: + inds = (rois[:, 0] == img_id.item()) + bbox = rois[inds, 1:] + bbox_list.append(bbox) + return bbox_list diff --git a/mmdet/core/eval/__init__.py b/mmdet/core/eval/__init__.py new file mode 100644 index 00000000000..fe4893a0af6 --- /dev/null +++ b/mmdet/core/eval/__init__.py @@ -0,0 +1,13 @@ +from .class_names import (voc_classes, imagenet_det_classes, + imagenet_vid_classes, coco_classes, dataset_aliases, + get_classes) +from .mean_ap import average_precision, eval_map, print_map_summary +from .recall import (eval_recalls, print_recall_summary, plot_num_recall, + plot_iou_recall) + +__all__ = [ + 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', + 'coco_classes', 'dataset_aliases', 'get_classes', 'average_precision', + 'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary', + 'plot_num_recall', 'plot_iou_recall' +] diff --git a/mmdet/core/eval/bbox_overlaps.py b/mmdet/core/eval/bbox_overlaps.py new file mode 100644 index 00000000000..ad4c70523fd --- /dev/null +++ b/mmdet/core/eval/bbox_overlaps.py @@ -0,0 +1,49 @@ +import numpy as np + + +def bbox_overlaps(bboxes1, bboxes2, mode='iou'): + """Calculate the ious between each bbox of bboxes1 and bboxes2. + + Args: + bboxes1(ndarray): shape (n, 4) + bboxes2(ndarray): shape (k, 4) + mode(str): iou (intersection over union) or iof (intersection + over foreground) + + Returns: + ious(ndarray): shape (n, k) + """ + + assert mode in ['iou', 'iof'] + + bboxes1 = bboxes1.astype(np.float32) + bboxes2 = bboxes2.astype(np.float32) + rows = bboxes1.shape[0] + cols = bboxes2.shape[0] + ious = np.zeros((rows, cols), dtype=np.float32) + if rows * cols == 0: + return ious + exchange = False + if bboxes1.shape[0] > bboxes2.shape[0]: + bboxes1, bboxes2 = bboxes2, bboxes1 + ious = np.zeros((cols, rows), dtype=np.float32) + exchange = True + area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( + bboxes1[:, 3] - bboxes1[:, 1] + 1) + area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( + bboxes2[:, 3] - bboxes2[:, 1] + 1) + for i in range(bboxes1.shape[0]): + x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) + y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) + x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) + y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) + overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum( + y_end - y_start + 1, 0) + if mode == 'iou': + union = area1[i] + area2 - overlap + else: + union = area1[i] if not exchange else area2 + ious[i, :] = overlap / union + if exchange: + ious = ious.T + return ious diff --git a/mmdet/core/eval/class_names.py b/mmdet/core/eval/class_names.py new file mode 100644 index 00000000000..b68e9135dca --- /dev/null +++ b/mmdet/core/eval/class_names.py @@ -0,0 +1,103 @@ +import mmcv + + +def voc_classes(): + return [ + 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', + 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', + 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' + ] + + +def imagenet_det_classes(): + return [ + 'accordion', 'airplane', 'ant', 'antelope', 'apple', 'armadillo', + 'artichoke', 'axe', 'baby_bed', 'backpack', 'bagel', 'balance_beam', + 'banana', 'band_aid', 'banjo', 'baseball', 'basketball', 'bathing_cap', + 'beaker', 'bear', 'bee', 'bell_pepper', 'bench', 'bicycle', 'binder', + 'bird', 'bookshelf', 'bow_tie', 'bow', 'bowl', 'brassiere', 'burrito', + 'bus', 'butterfly', 'camel', 'can_opener', 'car', 'cart', 'cattle', + 'cello', 'centipede', 'chain_saw', 'chair', 'chime', 'cocktail_shaker', + 'coffee_maker', 'computer_keyboard', 'computer_mouse', 'corkscrew', + 'cream', 'croquet_ball', 'crutch', 'cucumber', 'cup_or_mug', 'diaper', + 'digital_clock', 'dishwasher', 'dog', 'domestic_cat', 'dragonfly', + 'drum', 'dumbbell', 'electric_fan', 'elephant', 'face_powder', 'fig', + 'filing_cabinet', 'flower_pot', 'flute', 'fox', 'french_horn', 'frog', + 'frying_pan', 'giant_panda', 'goldfish', 'golf_ball', 'golfcart', + 'guacamole', 'guitar', 'hair_dryer', 'hair_spray', 'hamburger', + 'hammer', 'hamster', 'harmonica', 'harp', 'hat_with_a_wide_brim', + 'head_cabbage', 'helmet', 'hippopotamus', 'horizontal_bar', 'horse', + 'hotdog', 'iPod', 'isopod', 'jellyfish', 'koala_bear', 'ladle', + 'ladybug', 'lamp', 'laptop', 'lemon', 'lion', 'lipstick', 'lizard', + 'lobster', 'maillot', 'maraca', 'microphone', 'microwave', 'milk_can', + 'miniskirt', 'monkey', 'motorcycle', 'mushroom', 'nail', 'neck_brace', + 'oboe', 'orange', 'otter', 'pencil_box', 'pencil_sharpener', 'perfume', + 'person', 'piano', 'pineapple', 'ping-pong_ball', 'pitcher', 'pizza', + 'plastic_bag', 'plate_rack', 'pomegranate', 'popsicle', 'porcupine', + 'power_drill', 'pretzel', 'printer', 'puck', 'punching_bag', 'purse', + 'rabbit', 'racket', 'ray', 'red_panda', 'refrigerator', + 'remote_control', 'rubber_eraser', 'rugby_ball', 'ruler', + 'salt_or_pepper_shaker', 'saxophone', 'scorpion', 'screwdriver', + 'seal', 'sheep', 'ski', 'skunk', 'snail', 'snake', 'snowmobile', + 'snowplow', 'soap_dispenser', 'soccer_ball', 'sofa', 'spatula', + 'squirrel', 'starfish', 'stethoscope', 'stove', 'strainer', + 'strawberry', 'stretcher', 'sunglasses', 'swimming_trunks', 'swine', + 'syringe', 'table', 'tape_player', 'tennis_ball', 'tick', 'tie', + 'tiger', 'toaster', 'traffic_light', 'train', 'trombone', 'trumpet', + 'turtle', 'tv_or_monitor', 'unicycle', 'vacuum', 'violin', + 'volleyball', 'waffle_iron', 'washer', 'water_bottle', 'watercraft', + 'whale', 'wine_bottle', 'zebra' + ] + + +def imagenet_vid_classes(): + return [ + 'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car', + 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda', + 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit', + 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle', + 'watercraft', 'whale', 'zebra' + ] + + +def coco_classes(): + return [ + 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', + 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', + 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', + 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', + 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', + 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', + 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', + 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', + 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', + 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', + 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', + 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', + 'scissors', 'teddy bear', 'hair drier', 'toothbrush' + ] + + +dataset_aliases = { + 'voc': ['voc', 'pascal_voc', 'voc07', 'voc12'], + 'imagenet_det': ['det', 'imagenet_det', 'ilsvrc_det'], + 'imagenet_vid': ['vid', 'imagenet_vid', 'ilsvrc_vid'], + 'coco': ['coco', 'mscoco', 'ms_coco'] +} + + +def get_classes(dataset): + """Get class names of a dataset.""" + alias2name = {} + for name, aliases in dataset_aliases.items(): + for alias in aliases: + alias2name[alias] = name + + if mmcv.is_str(dataset): + if dataset in alias2name: + labels = eval(alias2name[dataset] + '_labels()') + else: + raise ValueError('Unrecognized dataset: {}'.format(dataset)) + else: + raise TypeError('dataset must a str, but got {}'.format(type(dataset))) + return labels diff --git a/mmdet/core/eval/mean_ap.py b/mmdet/core/eval/mean_ap.py new file mode 100644 index 00000000000..9a33f764040 --- /dev/null +++ b/mmdet/core/eval/mean_ap.py @@ -0,0 +1,372 @@ +import numpy as np +from terminaltables import AsciiTable + +from .bbox_overlaps import bbox_overlaps +from .class_names import get_classes + + +def average_precision(recalls, precisions, mode='area'): + """Calculate average precision (for single or multiple scales). + + Args: + recalls(ndarray): shape (num_scales, num_dets) or (num_dets, ) + precisions(ndarray): shape (num_scales, num_dets) or (num_dets, ) + mode(str): 'area' or '11points', 'area' means calculating the area + under precision-recall curve, '11points' means calculating + the average precision of recalls at [0, 0.1, ..., 1] + + Returns: + float or ndarray: calculated average precision + """ + no_scale = False + if recalls.ndim == 1: + no_scale = True + recalls = recalls[np.newaxis, :] + precisions = precisions[np.newaxis, :] + assert recalls.shape == precisions.shape and recalls.ndim == 2 + num_scales = recalls.shape[0] + ap = np.zeros(num_scales, dtype=np.float32) + if mode == 'area': + zeros = np.zeros((num_scales, 1), dtype=recalls.dtype) + ones = np.ones((num_scales, 1), dtype=recalls.dtype) + mrec = np.hstack((zeros, recalls, ones)) + mpre = np.hstack((zeros, precisions, zeros)) + for i in range(mpre.shape[1] - 1, 0, -1): + mpre[:, i - 1] = np.maximum(mpre[:, i - 1], mpre[:, i]) + for i in range(num_scales): + ind = np.where(mrec[i, 1:] != mrec[i, :-1])[0] + ap[i] = np.sum( + (mrec[i, ind + 1] - mrec[i, ind]) * mpre[i, ind + 1]) + elif mode == '11points': + for i in range(num_scales): + for thr in np.arange(0, 1 + 1e-3, 0.1): + precs = precisions[i, recalls[i, :] >= thr] + prec = precs.max() if precs.size > 0 else 0 + ap[i] += prec + ap /= 11 + else: + raise ValueError( + 'Unrecognized mode, only "area" and "11points" are supported') + if no_scale: + ap = ap[0] + return ap + + +def tpfp_imagenet(det_bboxes, + gt_bboxes, + gt_ignore, + default_iou_thr, + area_ranges=None): + """Check if detected bboxes are true positive or false positive. + + Args: + det_bbox(ndarray): the detected bbox + gt_bboxes(ndarray): ground truth bboxes of this image + gt_ignore(ndarray): indicate if gts are ignored for evaluation or not + default_iou_thr(float): the iou thresholds for medium and large bboxes + area_ranges(list or None): gt bbox area ranges + + Returns: + tuple: two arrays (tp, fp) whose elements are 0 and 1 + """ + num_dets = det_bboxes.shape[0] + num_gts = gt_bboxes.shape[0] + if area_ranges is None: + area_ranges = [(None, None)] + num_scales = len(area_ranges) + # tp and fp are of shape (num_scales, num_gts), each row is tp or fp + # of a certain scale. + tp = np.zeros((num_scales, num_dets), dtype=np.float32) + fp = np.zeros((num_scales, num_dets), dtype=np.float32) + if gt_bboxes.shape[0] == 0: + if area_ranges == [(None, None)]: + fp[...] = 1 + else: + det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0] + 1) * ( + det_bboxes[:, 3] - det_bboxes[:, 1] + 1) + for i, (min_area, max_area) in enumerate(area_ranges): + fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1 + return tp, fp + ious = bbox_overlaps(det_bboxes, gt_bboxes - 1) + gt_w = gt_bboxes[:, 2] - gt_bboxes[:, 0] + 1 + gt_h = gt_bboxes[:, 3] - gt_bboxes[:, 1] + 1 + iou_thrs = np.minimum((gt_w * gt_h) / ((gt_w + 10.0) * (gt_h + 10.0)), + default_iou_thr) + # sort all detections by scores in descending order + sort_inds = np.argsort(-det_bboxes[:, -1]) + for k, (min_area, max_area) in enumerate(area_ranges): + gt_covered = np.zeros(num_gts, dtype=bool) + # if no area range is specified, gt_area_ignore is all False + if min_area is None: + gt_area_ignore = np.zeros_like(gt_ignore, dtype=bool) + else: + gt_areas = gt_w * gt_h + gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area) + for i in sort_inds: + max_iou = -1 + matched_gt = -1 + # find best overlapped available gt + for j in range(num_gts): + # different from PASCAL VOC: allow finding other gts if the + # best overlaped ones are already matched by other det bboxes + if gt_covered[j]: + continue + elif ious[i, j] >= iou_thrs[j] and ious[i, j] > max_iou: + max_iou = ious[i, j] + matched_gt = j + # there are 4 cases for a det bbox: + # 1. this det bbox matches a gt, tp = 1, fp = 0 + # 2. this det bbox matches an ignored gt, tp = 0, fp = 0 + # 3. this det bbox matches no gt and within area range, tp = 0, fp = 1 + # 4. this det bbox matches no gt but is beyond area range, tp = 0, fp = 0 + if matched_gt >= 0: + gt_covered[matched_gt] = 1 + if not (gt_ignore[matched_gt] or gt_area_ignore[matched_gt]): + tp[k, i] = 1 + elif min_area is None: + fp[k, i] = 1 + else: + bbox = det_bboxes[i, :4] + area = (bbox[2] - bbox[0] + 1) * (bbox[3] - bbox[1] + 1) + if area >= min_area and area < max_area: + fp[k, i] = 1 + return tp, fp + + +def tpfp_default(det_bboxes, gt_bboxes, gt_ignore, iou_thr, area_ranges=None): + """Check if detected bboxes are true positive or false positive. + + Args: + det_bbox(ndarray): the detected bbox + gt_bboxes(ndarray): ground truth bboxes of this image + gt_ignore(ndarray): indicate if gts are ignored for evaluation or not + iou_thr(float): the iou thresholds + + Returns: + tuple: (tp, fp), two arrays whose elements are 0 and 1 + """ + num_dets = det_bboxes.shape[0] + num_gts = gt_bboxes.shape[0] + if area_ranges is None: + area_ranges = [(None, None)] + num_scales = len(area_ranges) + # tp and fp are of shape (num_scales, num_gts), each row is tp or fp of + # a certain scale + tp = np.zeros((num_scales, num_dets), dtype=np.float32) + fp = np.zeros((num_scales, num_dets), dtype=np.float32) + # if there is no gt bboxes in this image, then all det bboxes + # within area range are false positives + if gt_bboxes.shape[0] == 0: + if area_ranges == [(None, None)]: + fp[...] = 1 + else: + det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0] + 1) * ( + det_bboxes[:, 3] - det_bboxes[:, 1] + 1) + for i, (min_area, max_area) in enumerate(area_ranges): + fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1 + return tp, fp + ious = bbox_overlaps(det_bboxes, gt_bboxes) + ious_max = ious.max(axis=1) + ious_argmax = ious.argmax(axis=1) + sort_inds = np.argsort(-det_bboxes[:, -1]) + for k, (min_area, max_area) in enumerate(area_ranges): + gt_covered = np.zeros(num_gts, dtype=bool) + # if no area range is specified, gt_area_ignore is all False + if min_area is None: + gt_area_ignore = np.zeros_like(gt_ignore, dtype=bool) + else: + gt_areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0] + 1) * ( + gt_bboxes[:, 3] - gt_bboxes[:, 1] + 1) + gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area) + for i in sort_inds: + if ious_max[i] >= iou_thr: + matched_gt = ious_argmax[i] + if not (gt_ignore[matched_gt] or gt_area_ignore[matched_gt]): + if not gt_covered[matched_gt]: + gt_covered[matched_gt] = True + tp[k, i] = 1 + else: + fp[k, i] = 1 + # otherwise ignore this detected bbox, tp = 0, fp = 0 + elif min_area is None: + fp[k, i] = 1 + else: + bbox = det_bboxes[i, :4] + area = (bbox[2] - bbox[0] + 1) * (bbox[3] - bbox[1] + 1) + if area >= min_area and area < max_area: + fp[k, i] = 1 + return tp, fp + + +def get_cls_results(det_results, gt_bboxes, gt_labels, gt_ignore, class_id): + """Get det results and gt information of a certain class.""" + cls_dets = [det[class_id] + for det in det_results] # det bboxes of this class + cls_gts = [] # gt bboxes of this class + cls_gt_ignore = [] + for j in range(len(gt_bboxes)): + gt_bbox = gt_bboxes[j] + cls_inds = (gt_labels[j] == class_id + 1) + cls_gt = gt_bbox[cls_inds, :] if gt_bbox.shape[0] > 0 else gt_bbox + cls_gts.append(cls_gt) + if gt_ignore is None: + cls_gt_ignore.append(np.zeros(cls_gt.shape[0], dtype=np.int32)) + else: + cls_gt_ignore.append(gt_ignore[j][cls_inds]) + return cls_dets, cls_gts, cls_gt_ignore + + +def eval_map(det_results, + gt_bboxes, + gt_labels, + gt_ignore=None, + scale_ranges=None, + iou_thr=0.5, + dataset=None, + print_summary=True): + """Evaluate mAP of a dataset. + + Args: + det_results(list): a list of list, [[cls1_det, cls2_det, ...], ...] + gt_bboxes(list): ground truth bboxes of each image, a list of K*4 array + gt_labels(list): ground truth labels of each image, a list of K array + gt_ignore(list): gt ignore indicators of each image, a list of K array + scale_ranges(list, optional): [(min1, max1), (min2, max2), ...] + iou_thr(float): IoU threshold + dataset(None or str): dataset name, there are minor differences in + metrics for different datsets, e.g. "voc07", "imagenet_det", etc. + print_summary(bool): whether to print the mAP summary + + Returns: + tuple: (mAP, [dict, dict, ...]) + """ + assert len(det_results) == len(gt_bboxes) == len(gt_labels) + if gt_ignore is not None: + assert len(gt_ignore) == len(gt_labels) + for i in range(len(gt_ignore)): + assert len(gt_labels[i]) == len(gt_ignore[i]) + area_ranges = ([(rg[0]**2, rg[1]**2) for rg in scale_ranges] + if scale_ranges is not None else None) + num_scales = len(scale_ranges) if scale_ranges is not None else 1 + eval_results = [] + num_classes = len(det_results[0]) # positive class num + gt_labels = [ + label if label.ndim == 1 else label[:, 0] for label in gt_labels + ] + for i in range(num_classes): + # get gt and det bboxes of this class + cls_dets, cls_gts, cls_gt_ignore = get_cls_results( + det_results, gt_bboxes, gt_labels, gt_ignore, i) + # calculate tp and fp for each image + tpfp_func = (tpfp_imagenet + if dataset in ['det', 'vid'] else tpfp_default) + tpfp = [ + tpfp_func(cls_dets[j], cls_gts[j], cls_gt_ignore[j], iou_thr, + area_ranges) for j in range(len(cls_dets)) + ] + tp, fp = tuple(zip(*tpfp)) + # calculate gt number of each scale, gts ignored or beyond scale are not counted + num_gts = np.zeros(num_scales, dtype=int) + for j, bbox in enumerate(cls_gts): + if area_ranges is None: + num_gts[0] += np.sum(np.logical_not(cls_gt_ignore[j])) + else: + gt_areas = (bbox[:, 2] - bbox[:, 0] + 1) * ( + bbox[:, 3] - bbox[:, 1] + 1) + for k, (min_area, max_area) in enumerate(area_ranges): + num_gts[k] += np.sum( + np.logical_not(cls_gt_ignore[j]) & + (gt_areas >= min_area) & (gt_areas < max_area)) + # sort all det bboxes by score, also sort tp and fp + cls_dets = np.vstack(cls_dets) + num_dets = cls_dets.shape[0] + sort_inds = np.argsort(-cls_dets[:, -1]) + tp = np.hstack(tp)[:, sort_inds] + fp = np.hstack(fp)[:, sort_inds] + # calculate recall and precision with tp and fp + tp = np.cumsum(tp, axis=1) + fp = np.cumsum(fp, axis=1) + eps = np.finfo(np.float32).eps + recalls = tp / np.maximum(num_gts[:, np.newaxis], eps) + precisions = tp / np.maximum((tp + fp), eps) + # calculate AP + if scale_ranges is None: + recalls = recalls[0, :] + precisions = precisions[0, :] + num_gts = num_gts.item() + mode = 'area' if dataset != 'voc07' else '11points' + ap = average_precision(recalls, precisions, mode) + eval_results.append({ + 'num_gts': num_gts, + 'num_dets': num_dets, + 'recall': recalls, + 'precision': precisions, + 'ap': ap + }) + if scale_ranges is not None: + # shape (num_classes, num_scales) + all_ap = np.vstack([cls_result['ap'] for cls_result in eval_results]) + all_num_gts = np.vstack( + [cls_result['num_gts'] for cls_result in eval_results]) + mean_ap = [ + all_ap[all_num_gts[:, i] > 0, i].mean() + if np.any(all_num_gts[:, i] > 0) else 0.0 + for i in range(num_scales) + ] + else: + aps = [] + for cls_result in eval_results: + if cls_result['num_gts'] > 0: + aps.append(cls_result['ap']) + mean_ap = np.array(aps).mean().item() if aps else 0.0 + if print_summary: + print_map_summary(mean_ap, eval_results, dataset) + + return mean_ap, eval_results + + +def print_map_summary(mean_ap, results, dataset=None): + """Print mAP and results of each class. + + Args: + mean_ap(float): calculated from `eval_map` + results(list): calculated from `eval_map` + dataset(None or str or list): dataset name. + """ + num_scales = len(results[0]['ap']) if isinstance(results[0]['ap'], + np.ndarray) else 1 + num_classes = len(results) + + recalls = np.zeros((num_scales, num_classes), dtype=np.float32) + precisions = np.zeros((num_scales, num_classes), dtype=np.float32) + aps = np.zeros((num_scales, num_classes), dtype=np.float32) + num_gts = np.zeros((num_scales, num_classes), dtype=int) + for i, cls_result in enumerate(results): + if cls_result['recall'].size > 0: + recalls[:, i] = np.array(cls_result['recall'], ndmin=2)[:, -1] + precisions[:, i] = np.array( + cls_result['precision'], ndmin=2)[:, -1] + aps[:, i] = cls_result['ap'] + num_gts[:, i] = cls_result['num_gts'] + + if dataset is None: + label_names = [str(i) for i in range(1, num_classes + 1)] + else: + label_names = get_classes(dataset) + + if not isinstance(mean_ap, list): + mean_ap = [mean_ap] + header = ['class', 'gts', 'dets', 'recall', 'precision', 'ap'] + for i in range(num_scales): + table_data = [header] + for j in range(num_classes): + row_data = [ + label_names[j], num_gts[i, j], results[j]['num_dets'], + '{:.3f}'.format(recalls[i, j]), '{:.3f}'.format( + precisions[i, j]), '{:.3f}'.format(aps[i, j]) + ] + table_data.append(row_data) + table_data.append(['mAP', '', '', '', '', '{:.3f}'.format(mean_ap[i])]) + table = AsciiTable(table_data) + table.inner_footing_row_border = True + print(table.table) diff --git a/mmdet/core/eval/recall.py b/mmdet/core/eval/recall.py new file mode 100644 index 00000000000..2a56f42fdef --- /dev/null +++ b/mmdet/core/eval/recall.py @@ -0,0 +1,185 @@ +import numpy as np +from terminaltables import AsciiTable + +from .bbox_overlaps import bbox_overlaps + + +def _recalls(all_ious, proposal_nums, thrs): + + img_num = all_ious.shape[0] + total_gt_num = sum([ious.shape[0] for ious in all_ious]) + + _ious = np.zeros((proposal_nums.size, total_gt_num), dtype=np.float32) + for k, proposal_num in enumerate(proposal_nums): + tmp_ious = np.zeros(0) + for i in range(img_num): + ious = all_ious[i][:, :proposal_num].copy() + gt_ious = np.zeros((ious.shape[0])) + if ious.size == 0: + tmp_ious = np.hstack((tmp_ious, gt_ious)) + continue + for j in range(ious.shape[0]): + gt_max_overlaps = ious.argmax(axis=1) + max_ious = ious[np.arange(0, ious.shape[0]), gt_max_overlaps] + gt_idx = max_ious.argmax() + gt_ious[j] = max_ious[gt_idx] + box_idx = gt_max_overlaps[gt_idx] + ious[gt_idx, :] = -1 + ious[:, box_idx] = -1 + tmp_ious = np.hstack((tmp_ious, gt_ious)) + _ious[k, :] = tmp_ious + + _ious = np.fliplr(np.sort(_ious, axis=1)) + recalls = np.zeros((proposal_nums.size, thrs.size)) + for i, thr in enumerate(thrs): + recalls[:, i] = (_ious >= thr).sum(axis=1) / float(total_gt_num) + + return recalls + + +def set_recall_param(proposal_nums, iou_thrs): + """Check proposal_nums and iou_thrs and set correct format. + """ + if isinstance(proposal_nums, list): + _proposal_nums = np.array(proposal_nums) + elif isinstance(proposal_nums, int): + _proposal_nums = np.array([proposal_nums]) + else: + _proposal_nums = proposal_nums + + if iou_thrs is None: + _iou_thrs = np.array([0.5]) + elif isinstance(iou_thrs, list): + _iou_thrs = np.array(iou_thrs) + elif isinstance(iou_thrs, float): + _iou_thrs = np.array([iou_thrs]) + else: + _iou_thrs = iou_thrs + + return _proposal_nums, _iou_thrs + + +def eval_recalls(gts, + proposals, + proposal_nums=None, + iou_thrs=None, + print_summary=True): + """Calculate recalls. + + Args: + gts(list or ndarray): a list of arrays of shape (n, 4) + proposals(list or ndarray): a list of arrays of shape (k, 4) or (k, 5) + proposal_nums(int or list of int or ndarray): top N proposals + thrs(float or list or ndarray): iou thresholds + + Returns: + ndarray: recalls of different ious and proposal nums + """ + + img_num = len(gts) + assert img_num == len(proposals) + + proposal_nums, iou_thrs = set_recall_param(proposal_nums, iou_thrs) + + all_ious = [] + for i in range(img_num): + if proposals[i].ndim == 2 and proposals[i].shape[1] == 5: + scores = proposals[i][:, 4] + sort_idx = np.argsort(scores)[::-1] + img_proposal = proposals[i][sort_idx, :] + else: + img_proposal = proposals[i] + prop_num = min(img_proposal.shape[0], proposal_nums[-1]) + if gts[i] is None or gts[i].shape[0] == 0: + ious = np.zeros((0, img_proposal.shape[0]), dtype=np.float32) + else: + ious = bbox_overlaps(gts[i], img_proposal[:prop_num, :4]) + all_ious.append(ious) + all_ious = np.array(all_ious) + recalls = _recalls(all_ious, proposal_nums, iou_thrs) + if print_summary: + print_recall_summary(recalls, proposal_nums, iou_thrs) + return recalls + + +def print_recall_summary(recalls, + proposal_nums, + iou_thrs, + row_idxs=None, + col_idxs=None): + """Print recalls in a table. + + Args: + recalls(ndarray): calculated from `bbox_recalls` + proposal_nums(ndarray or list): top N proposals + iou_thrs(ndarray or list): iou thresholds + row_idxs(ndarray): which rows(proposal nums) to print + col_idxs(ndarray): which cols(iou thresholds) to print + """ + proposal_nums = np.array(proposal_nums, dtype=np.int32) + iou_thrs = np.array(iou_thrs) + if row_idxs is None: + row_idxs = np.arange(proposal_nums.size) + if col_idxs is None: + col_idxs = np.arange(iou_thrs.size) + row_header = [''] + iou_thrs[col_idxs].tolist() + table_data = [row_header] + for i, num in enumerate(proposal_nums[row_idxs]): + row = [ + '{:.3f}'.format(val) + for val in recalls[row_idxs[i], col_idxs].tolist() + ] + row.insert(0, num) + table_data.append(row) + table = AsciiTable(table_data) + print(table.table) + + +def plot_num_recall(recalls, proposal_nums): + """Plot Proposal_num-Recalls curve. + + Args: + recalls(ndarray or list): shape (k,) + proposal_nums(ndarray or list): same shape as `recalls` + """ + if isinstance(proposal_nums, np.ndarray): + _proposal_nums = proposal_nums.tolist() + else: + _proposal_nums = proposal_nums + if isinstance(recalls, np.ndarray): + _recalls = recalls.tolist() + else: + _recalls = recalls + + import matplotlib.pyplot as plt + f = plt.figure() + plt.plot([0] + _proposal_nums, [0] + _recalls) + plt.xlabel('Proposal num') + plt.ylabel('Recall') + plt.axis([0, proposal_nums.max(), 0, 1]) + f.show() + + +def plot_iou_recall(recalls, iou_thrs): + """Plot IoU-Recalls curve. + + Args: + recalls(ndarray or list): shape (k,) + iou_thrs(ndarray or list): same shape as `recalls` + """ + if isinstance(iou_thrs, np.ndarray): + _iou_thrs = iou_thrs.tolist() + else: + _iou_thrs = iou_thrs + if isinstance(recalls, np.ndarray): + _recalls = recalls.tolist() + else: + _recalls = recalls + + import matplotlib.pyplot as plt + f = plt.figure() + plt.plot(_iou_thrs + [1.0], _recalls + [0.]) + plt.xlabel('IoU') + plt.ylabel('Recall') + plt.axis([iou_thrs.min(), 1, 0, 1]) + f.show() diff --git a/mmdet/core/hooks.py b/mmdet/core/hooks.py new file mode 100644 index 00000000000..3347639d51a --- /dev/null +++ b/mmdet/core/hooks.py @@ -0,0 +1,246 @@ +import os +import os.path as osp +import shutil +import time + +import mmcv +import numpy as np +import torch +from mmcv.torchpack import Hook +from mmdet import collate, scatter +from pycocotools.cocoeval import COCOeval + +from .eval import eval_recalls + + +class EmptyCacheHook(Hook): + + def before_epoch(self, runner): + torch.cuda.empty_cache() + + def after_epoch(self, runner): + torch.cuda.empty_cache() + + +class DistEvalHook(Hook): + + def __init__(self, dataset, interval=1): + self.dataset = dataset + self.interval = interval + self.lock_dir = None + + def _barrier(self, rank, world_size): + """Due to some issues with `torch.distributed.barrier()`, we have to + implement this ugly barrier function. + """ + if rank == 0: + for i in range(1, world_size): + tmp = osp.join(self.lock_dir, '{}.pkl'.format(i)) + while not (osp.exists(tmp)): + time.sleep(1) + for i in range(1, world_size): + tmp = osp.join(self.lock_dir, '{}.pkl'.format(i)) + os.remove(tmp) + else: + tmp = osp.join(self.lock_dir, '{}.pkl'.format(rank)) + mmcv.dump([], tmp) + while osp.exists(tmp): + time.sleep(1) + + def before_run(self, runner): + self.lock_dir = osp.join(runner.work_dir, '.lock_map_hook') + if runner.rank == 0: + if osp.exists(self.lock_dir): + shutil.rmtree(self.lock_dir) + mmcv.mkdir_or_exist(self.lock_dir) + + def after_train_epoch(self, runner): + if not self.every_n_epochs(runner, self.interval): + return + runner.model.eval() + results = [None for _ in range(len(self.dataset))] + prog_bar = mmcv.ProgressBar(len(self.dataset)) + for idx in range(runner.rank, len(self.dataset), runner.world_size): + data = self.dataset[idx] + device_id = torch.cuda.current_device() + imgs_data = tuple( + scatter(collate([data], samples_per_gpu=1), [device_id])[0]) + + # compute output + with torch.no_grad(): + result = runner.model( + *imgs_data, + return_loss=False, + return_bboxes=True, + rescale=True) + results[idx] = result + + batch_size = runner.world_size + for _ in range(batch_size): + prog_bar.update() + + if runner.rank == 0: + print('\n') + self._barrier(runner.rank, runner.world_size) + for i in range(1, runner.world_size): + tmp_file = osp.join(runner.work_dir, 'temp_{}.pkl'.format(i)) + tmp_results = mmcv.load(tmp_file) + for idx in range(i, len(results), runner.world_size): + results[idx] = tmp_results[idx] + os.remove(tmp_file) + self.evaluate(runner, results) + else: + tmp_file = osp.join(runner.work_dir, + 'temp_{}.pkl'.format(runner.rank)) + mmcv.dump(results, tmp_file) + self._barrier(runner.rank, runner.world_size) + self._barrier(runner.rank, runner.world_size) + + def evaluate(self): + raise NotImplementedError + + +class CocoEvalMixin(object): + + def _xyxy2xywh(self, bbox): + _bbox = bbox.tolist() + return [ + _bbox[0], + _bbox[1], + _bbox[2] - _bbox[0] + 1, + _bbox[3] - _bbox[1] + 1, + ] + + def det2json(self, dataset, results): + json_results = [] + for idx in range(len(dataset)): + img_id = dataset.img_ids[idx] + result = results[idx] + for label in range(len(result)): + bboxes = result[label] + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['bbox'] = self._xyxy2xywh(bboxes[i]) + data['score'] = float(bboxes[i][4]) + data['category_id'] = dataset.cat_ids[label] + json_results.append(data) + return json_results + + def segm2json(self, dataset, results): + json_results = [] + for idx in range(len(dataset)): + img_id = dataset.img_ids[idx] + det, seg = results[idx] + for label in range(len(det)): + bboxes = det[label] + segms = seg[label] + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['bbox'] = self._xyxy2xywh(bboxes[i]) + data['score'] = float(bboxes[i][4]) + data['category_id'] = dataset.cat_ids[label] + segms[i]['counts'] = segms[i]['counts'].decode() + data['segmentation'] = segms[i] + json_results.append(data) + return json_results + + def proposal2json(self, dataset, results): + json_results = [] + for idx in range(len(dataset)): + img_id = dataset.img_ids[idx] + bboxes = results[idx] + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['bbox'] = self._xyxy2xywh(bboxes[i]) + data['score'] = float(bboxes[i][4]) + data['category_id'] = 1 + json_results.append(data) + return json_results + + def results2json(self, dataset, results, out_file): + if isinstance(results[0], list): + json_results = self.det2json(dataset, results) + elif isinstance(results[0], tuple): + json_results = self.segm2json(dataset, results) + elif isinstance(results[0], np.ndarray): + json_results = self.proposal2json(dataset, results) + else: + raise TypeError('invalid type of results') + mmcv.dump(json_results, out_file, file_format='json') + + +class DistEvalRecallHook(DistEvalHook): + + def __init__(self, + dataset, + proposal_nums=(100, 300, 1000), + iou_thrs=np.arange(0.5, 0.96, 0.05)): + super(DistEvalRecallHook, self).__init__(dataset) + self.proposal_nums = np.array(proposal_nums, dtype=np.int32) + self.iou_thrs = np.array(iou_thrs, dtype=np.float32) + + def evaluate(self, runner, results): + # official coco evaluation is too slow, here we use our own + # implementation, which may get slightly different results + gt_bboxes = [] + for i in range(len(self.dataset)): + img_id = self.dataset.img_ids[i] + ann_ids = self.dataset.coco.getAnnIds(imgIds=img_id) + ann_info = self.dataset.coco.loadAnns(ann_ids) + if len(ann_info) == 0: + gt_bboxes.append(np.zeros((0, 4))) + continue + bboxes = [] + for ann in ann_info: + if ann.get('ignore', False) or ann['iscrowd']: + continue + x1, y1, w, h = ann['bbox'] + bboxes.append([x1, y1, x1 + w - 1, y1 + h - 1]) + bboxes = np.array(bboxes, dtype=np.float32) + if bboxes.shape[0] == 0: + bboxes = np.zeros((0, 4)) + gt_bboxes.append(bboxes) + + recalls = eval_recalls( + gt_bboxes, + results, + self.proposal_nums, + self.iou_thrs, + print_summary=False) + ar = recalls.mean(axis=1) + for i, num in enumerate(self.proposal_nums): + runner.log_buffer.output['AR@{}'.format(num)] = ar[i] + runner.log_buffer.ready = True + + +class CocoDistEvalmAPHook(DistEvalHook, CocoEvalMixin): + + def evaluate(self, runner, results): + tmp_file = osp.join(runner.work_dir, 'temp_0.json') + self.results2json(self.dataset, results, tmp_file) + + res_types = ['bbox', 'segm'] if runner.model.with_mask else ['bbox'] + cocoGt = self.dataset.coco + cocoDt = cocoGt.loadRes(tmp_file) + imgIds = cocoGt.getImgIds() + for res_type in res_types: + iou_type = res_type + cocoEval = COCOeval(cocoGt, cocoDt, iou_type) + cocoEval.params.imgIds = imgIds + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + field = '{}_mAP'.format(res_type) + runner.log_buffer.output[field] = cocoEval.stats[0] + runner.log_buffer.ready = True + os.remove(tmp_file) + + +class CocoDistCascadeEvalmAPHook(CocoDistEvalmAPHook): + + def evaluate(self, runner, results): + results = [res[-1] for res in results] + super(CocoDistCascadeEvalmAPHook, self).evaluate(runner, results) diff --git a/mmdet/core/mask_ops/__init__.py b/mmdet/core/mask_ops/__init__.py new file mode 100644 index 00000000000..25850cdc62a --- /dev/null +++ b/mmdet/core/mask_ops/__init__.py @@ -0,0 +1,10 @@ +from .segms import (flip_segms, polys_to_mask, mask_to_bbox, + polys_to_mask_wrt_box, polys_to_boxes, rle_mask_voting, + rle_mask_nms, rle_masks_to_boxes) +from .utils import split_combined_gt_polys + +__all__ = [ + 'flip_segms', 'polys_to_mask', 'mask_to_bbox', 'polys_to_mask_wrt_box', + 'polys_to_boxes', 'rle_mask_voting', 'rle_mask_nms', 'rle_masks_to_boxes', + 'split_combined_gt_polys' +] diff --git a/mmdet/core/mask_ops/segms.py b/mmdet/core/mask_ops/segms.py new file mode 100644 index 00000000000..b2ae6b69a1f --- /dev/null +++ b/mmdet/core/mask_ops/segms.py @@ -0,0 +1,271 @@ +# This file is copied from Detectron. + +# Copyright (c) 2017-present, Facebook, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################## +"""Functions for interacting with segmentation masks in the COCO format. +The following terms are used in this module + mask: a binary mask encoded as a 2D numpy array + segm: a segmentation mask in one of the two COCO formats (polygon or RLE) + polygon: COCO's polygon format + RLE: COCO's run length encoding format +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import numpy as np +import pycocotools.mask as mask_util + + +def flip_segms(segms, height, width): + """Left/right flip each mask in a list of masks.""" + + def _flip_poly(poly, width): + flipped_poly = np.array(poly) + flipped_poly[0::2] = width - np.array(poly[0::2]) - 1 + return flipped_poly.tolist() + + def _flip_rle(rle, height, width): + if 'counts' in rle and type(rle['counts']) == list: + # Magic RLE format handling painfully discovered by looking at the + # COCO API showAnns function. + rle = mask_util.frPyObjects([rle], height, width) + mask = mask_util.decode(rle) + mask = mask[:, ::-1, :] + rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8)) + return rle + + flipped_segms = [] + for segm in segms: + if type(segm) == list: + # Polygon format + flipped_segms.append([_flip_poly(poly, width) for poly in segm]) + else: + # RLE format + assert type(segm) == dict + flipped_segms.append(_flip_rle(segm, height, width)) + return flipped_segms + + +def polys_to_mask(polygons, height, width): + """Convert from the COCO polygon segmentation format to a binary mask + encoded as a 2D array of data type numpy.float32. The polygon segmentation + is understood to be enclosed inside a height x width image. The resulting + mask is therefore of shape (height, width). + """ + rle = mask_util.frPyObjects(polygons, height, width) + mask = np.array(mask_util.decode(rle), dtype=np.float32) + # Flatten in case polygons was a list + mask = np.sum(mask, axis=2) + mask = np.array(mask > 0, dtype=np.float32) + return mask + + +def mask_to_bbox(mask): + """Compute the tight bounding box of a binary mask.""" + xs = np.where(np.sum(mask, axis=0) > 0)[0] + ys = np.where(np.sum(mask, axis=1) > 0)[0] + + if len(xs) == 0 or len(ys) == 0: + return None + + x0 = xs[0] + x1 = xs[-1] + y0 = ys[0] + y1 = ys[-1] + return np.array((x0, y0, x1, y1), dtype=np.float32) + + +def polys_to_mask_wrt_box(polygons, box, M): + """Convert from the COCO polygon segmentation format to a binary mask + encoded as a 2D array of data type numpy.float32. The polygon segmentation + is understood to be enclosed in the given box and rasterized to an M x M + mask. The resulting mask is therefore of shape (M, M). + """ + w = box[2] - box[0] + h = box[3] - box[1] + + w = np.maximum(w, 1) + h = np.maximum(h, 1) + + polygons_norm = [] + for poly in polygons: + p = np.array(poly, dtype=np.float32) + p[0::2] = (p[0::2] - box[0]) * M / w + p[1::2] = (p[1::2] - box[1]) * M / h + polygons_norm.append(p) + + rle = mask_util.frPyObjects(polygons_norm, M, M) + mask = np.array(mask_util.decode(rle), dtype=np.float32) + # Flatten in case polygons was a list + mask = np.sum(mask, axis=2) + mask = np.array(mask > 0, dtype=np.float32) + return mask + + +def polys_to_boxes(polys): + """Convert a list of polygons into an array of tight bounding boxes.""" + boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32) + for i in range(len(polys)): + poly = polys[i] + x0 = min(min(p[::2]) for p in poly) + x1 = max(max(p[::2]) for p in poly) + y0 = min(min(p[1::2]) for p in poly) + y1 = max(max(p[1::2]) for p in poly) + boxes_from_polys[i, :] = [x0, y0, x1, y1] + + return boxes_from_polys + + +def rle_mask_voting(top_masks, + all_masks, + all_dets, + iou_thresh, + binarize_thresh, + method='AVG'): + """Returns new masks (in correspondence with `top_masks`) by combining + multiple overlapping masks coming from the pool of `all_masks`. Two methods + for combining masks are supported: 'AVG' uses a weighted average of + overlapping mask pixels; 'UNION' takes the union of all mask pixels. + """ + if len(top_masks) == 0: + return + + all_not_crowd = [False] * len(all_masks) + top_to_all_overlaps = mask_util.iou(top_masks, all_masks, all_not_crowd) + decoded_all_masks = [ + np.array(mask_util.decode(rle), dtype=np.float32) for rle in all_masks + ] + decoded_top_masks = [ + np.array(mask_util.decode(rle), dtype=np.float32) for rle in top_masks + ] + all_boxes = all_dets[:, :4].astype(np.int32) + all_scores = all_dets[:, 4] + + # Fill box support with weights + mask_shape = decoded_all_masks[0].shape + mask_weights = np.zeros((len(all_masks), mask_shape[0], mask_shape[1])) + for k in range(len(all_masks)): + ref_box = all_boxes[k] + x_0 = max(ref_box[0], 0) + x_1 = min(ref_box[2] + 1, mask_shape[1]) + y_0 = max(ref_box[1], 0) + y_1 = min(ref_box[3] + 1, mask_shape[0]) + mask_weights[k, y_0:y_1, x_0:x_1] = all_scores[k] + mask_weights = np.maximum(mask_weights, 1e-5) + + top_segms_out = [] + for k in range(len(top_masks)): + # Corner case of empty mask + if decoded_top_masks[k].sum() == 0: + top_segms_out.append(top_masks[k]) + continue + + inds_to_vote = np.where(top_to_all_overlaps[k] >= iou_thresh)[0] + # Only matches itself + if len(inds_to_vote) == 1: + top_segms_out.append(top_masks[k]) + continue + + masks_to_vote = [decoded_all_masks[i] for i in inds_to_vote] + if method == 'AVG': + ws = mask_weights[inds_to_vote] + soft_mask = np.average(masks_to_vote, axis=0, weights=ws) + mask = np.array(soft_mask > binarize_thresh, dtype=np.uint8) + elif method == 'UNION': + # Any pixel that's on joins the mask + soft_mask = np.sum(masks_to_vote, axis=0) + mask = np.array(soft_mask > 1e-5, dtype=np.uint8) + else: + raise NotImplementedError('Method {} is unknown'.format(method)) + rle = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0] + top_segms_out.append(rle) + + return top_segms_out + + +def rle_mask_nms(masks, dets, thresh, mode='IOU'): + """Performs greedy non-maximum suppression based on an overlap measurement + between masks. The type of measurement is determined by `mode` and can be + either 'IOU' (standard intersection over union) or 'IOMA' (intersection over + mininum area). + """ + if len(masks) == 0: + return [] + if len(masks) == 1: + return [0] + + if mode == 'IOU': + # Computes ious[m1, m2] = area(intersect(m1, m2)) / area(union(m1, m2)) + all_not_crowds = [False] * len(masks) + ious = mask_util.iou(masks, masks, all_not_crowds) + elif mode == 'IOMA': + # Computes ious[m1, m2] = area(intersect(m1, m2)) / min(area(m1), area(m2)) + all_crowds = [True] * len(masks) + # ious[m1, m2] = area(intersect(m1, m2)) / area(m2) + ious = mask_util.iou(masks, masks, all_crowds) + # ... = max(area(intersect(m1, m2)) / area(m2), + # area(intersect(m2, m1)) / area(m1)) + ious = np.maximum(ious, ious.transpose()) + elif mode == 'CONTAINMENT': + # Computes ious[m1, m2] = area(intersect(m1, m2)) / area(m2) + # Which measures how much m2 is contained inside m1 + all_crowds = [True] * len(masks) + ious = mask_util.iou(masks, masks, all_crowds) + else: + raise NotImplementedError('Mode {} is unknown'.format(mode)) + + scores = dets[:, 4] + order = np.argsort(-scores) + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + ovr = ious[i, order[1:]] + inds_to_keep = np.where(ovr <= thresh)[0] + order = order[inds_to_keep + 1] + + return keep + + +def rle_masks_to_boxes(masks): + """Computes the bounding box of each mask in a list of RLE encoded masks.""" + if len(masks) == 0: + return [] + + decoded_masks = [ + np.array(mask_util.decode(rle), dtype=np.float32) for rle in masks + ] + + def get_bounds(flat_mask): + inds = np.where(flat_mask > 0)[0] + return inds.min(), inds.max() + + boxes = np.zeros((len(decoded_masks), 4)) + keep = [True] * len(decoded_masks) + for i, mask in enumerate(decoded_masks): + if mask.sum() == 0: + keep[i] = False + continue + flat_mask = mask.sum(axis=0) + x0, x1 = get_bounds(flat_mask) + flat_mask = mask.sum(axis=1) + y0, y1 = get_bounds(flat_mask) + boxes[i, :] = (x0, y0, x1, y1) + + return boxes, np.where(keep)[0] diff --git a/mmdet/core/mask_ops/utils.py b/mmdet/core/mask_ops/utils.py new file mode 100644 index 00000000000..2802430007e --- /dev/null +++ b/mmdet/core/mask_ops/utils.py @@ -0,0 +1,35 @@ +import cvbase as cvb +import numpy as np +import pycocotools.mask as mask_utils + +import mmcv + + +def split_combined_gt_polys(gt_polys, gt_poly_lens, num_polys_per_mask): + """Split the combined 1-D polys into masks. + + A mask is represented as a list of polys, and a poly is represented as + a 1-D array. In dataset, all masks are concatenated into a single 1-D + tensor. Here we need to split the tensor into original representations. + + Args: + gt_polys (list): a list (length = image num) of 1-D tensors + gt_poly_lens (list): a list (length = image num) of poly length + num_polys_per_mask (list): a list (length = image num) of poly number + of each mask + + Returns: + list: a list (length = image num) of list (length = mask num) of + list (length = poly num) of numpy array + """ + mask_polys_list = [] + for img_id in range(len(gt_polys)): + gt_polys_single = gt_polys[img_id].cpu().numpy() + gt_polys_lens_single = gt_poly_lens[img_id].cpu().numpy().tolist() + num_polys_per_mask_single = num_polys_per_mask[ + img_id].cpu().numpy().tolist() + + split_gt_polys = mmcv.slice_list(gt_polys_single, gt_polys_lens_single) + mask_polys = mmcv.slice_list(split_gt_polys, num_polys_per_mask_single) + mask_polys_list.append(mask_polys) + return mask_polys_list diff --git a/mmdet/core/post_processing/__init__.py b/mmdet/core/post_processing/__init__.py new file mode 100644 index 00000000000..1b24a3fc685 --- /dev/null +++ b/mmdet/core/post_processing/__init__.py @@ -0,0 +1,8 @@ +from .bbox_nms import multiclass_nms +from .merge_augs import (merge_aug_proposals, merge_aug_bboxes, + merge_aug_scores, merge_aug_masks) + +__all__ = [ + 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', + 'merge_aug_scores', 'merge_aug_masks' +] diff --git a/mmdet/core/post_processing/bbox_nms.py b/mmdet/core/post_processing/bbox_nms.py new file mode 100644 index 00000000000..f619d2682a0 --- /dev/null +++ b/mmdet/core/post_processing/bbox_nms.py @@ -0,0 +1,54 @@ +import torch + +from mmdet.ops import nms + + +def multiclass_nms(multi_bboxes, multi_scores, score_thr, nms_thr, max_num=-1): + """NMS for multi-class bboxes. + + Args: + multi_bboxes (Tensor): shape (n, #class*4) or (n, 4) + multi_scores (Tensor): shape (n, #class) + score_thr (float): bbox threshold, bboxes with scores lower than it + will not be considered. + nms_thr (float): NMS IoU threshold + max_num (int): if there are more than max_num bboxes after NMS, + only top max_num will be kept. + + Returns: + tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels + are 0-based. + """ + num_classes = multi_scores.shape[1] + bboxes, labels = [], [] + for i in range(1, num_classes): + cls_inds = multi_scores[:, i] > score_thr + if not cls_inds.any(): + continue + # get bboxes and scores of this class + if multi_bboxes.shape[1] == 4: + _bboxes = multi_bboxes[cls_inds, :] + else: + _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4] + _scores = multi_scores[cls_inds, i] + cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1) + # perform nms + nms_keep = nms(cls_dets, nms_thr) + cls_dets = cls_dets[nms_keep, :] + cls_labels = multi_bboxes.new_full( + (len(nms_keep), ), i - 1, dtype=torch.long) + bboxes.append(cls_dets) + labels.append(cls_labels) + if bboxes: + bboxes = torch.cat(bboxes) + labels = torch.cat(labels) + if bboxes.shape[0] > max_num: + _, inds = bboxes[:, -1].sort(descending=True) + inds = inds[:max_num] + bboxes = bboxes[inds] + labels = labels[inds] + else: + bboxes = multi_bboxes.new_zeros((0, 5)) + labels = multi_bboxes.new_zeros((0, ), dtype=torch.long) + + return bboxes, labels diff --git a/mmdet/core/post_processing/merge_augs.py b/mmdet/core/post_processing/merge_augs.py new file mode 100644 index 00000000000..5d56e481e5a --- /dev/null +++ b/mmdet/core/post_processing/merge_augs.py @@ -0,0 +1,96 @@ +import torch + +from mmcv.ops import nms +import numpy as np + +from ..bbox_ops import bbox_mapping_back + + +def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg): + """Merge augmented proposals (multiscale, flip, etc.) + + Args: + aug_proposals (list[Tensor]): proposals from different testing + schemes, shape (n, 5). Note that they are not rescaled to the + original image size. + img_metas (list[dict]): image info including "shape_scale" and "flip". + rpn_test_cfg (dict): rpn test config. + + Returns: + Tensor: shape (n, 4), proposals corresponding to original image scale. + """ + recovered_proposals = [] + for proposals, img_info in zip(aug_proposals, img_metas): + shape_scale = img_info['shape_scale'][0] + flip = img_info['flip'][0] + _proposals = proposals.clone() + _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], shape_scale, + flip) + recovered_proposals.append(_proposals) + aug_proposals = torch.cat(recovered_proposals, dim=0) + nms_keep = nms(aug_proposals, rpn_test_cfg.nms_thr, + aug_proposals.get_device()) + merged_proposals = aug_proposals[nms_keep, :] + scores = merged_proposals[:, 4] + _, order = scores.sort(0, descending=True) + num = min(rpn_test_cfg.max_num, merged_proposals.shape[0]) + order = order[:num] + merged_proposals = merged_proposals[order, :] + return merged_proposals + + +def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg): + """Merge augmented detection bboxes and scores. + + Args: + aug_bboxes (list[Tensor]): shape (n, 4*#class) + aug_scores (list[Tensor] or None): shape (n, #class) + img_shapes (list[Tensor]): shape (3, ). + rcnn_test_cfg (dict): rcnn test config. + + Returns: + tuple: (bboxes, scores) + """ + recovered_bboxes = [] + for bboxes, img_info in zip(aug_bboxes, img_metas): + shape_scale = img_info['shape_scale'][0] + flip = img_info['flip'][0] + bboxes = bbox_mapping_back(bboxes, shape_scale, flip) + recovered_bboxes.append(bboxes) + bboxes = torch.stack(recovered_bboxes).mean(dim=0) + if aug_scores is None: + return bboxes + else: + scores = torch.stack(aug_scores).mean(dim=0) + return bboxes, scores + + +def merge_aug_scores(aug_scores): + """Merge augmented bbox scores.""" + if isinstance(aug_scores[0], torch.Tensor): + return torch.mean(torch.stack(aug_scores), dim=0) + else: + return np.mean(aug_scores, axis=0) + + +def merge_aug_masks(aug_masks, bboxes, img_metas, rcnn_test_cfg, weights=None): + """Merge augmented mask prediction. + + Args: + aug_masks (list[ndarray]): shape (n, #class, h, w) + img_shapes (list[ndarray]): shape (3, ). + rcnn_test_cfg (dict): rcnn test config. + + Returns: + tuple: (bboxes, scores) + """ + recovered_masks = [ + mask if not img_info['flip'][0] else mask[..., ::-1] + for mask, img_info in zip(aug_masks, img_metas) + ] + if weights is None: + merged_masks = np.mean(recovered_masks, axis=0) + else: + merged_masks = np.average( + np.array(recovered_masks), axis=0, weights=np.array(weights)) + return merged_masks diff --git a/mmdet/core/targets/__init__.py b/mmdet/core/targets/__init__.py new file mode 100644 index 00000000000..b3b2567efff --- /dev/null +++ b/mmdet/core/targets/__init__.py @@ -0,0 +1,5 @@ +from .anchor_target import anchor_target +from .bbox_target import bbox_target +from .mask_target import mask_target + +__all__ = ['anchor_target', 'bbox_target', 'mask_target'] diff --git a/mmdet/core/targets/anchor_target.py b/mmdet/core/targets/anchor_target.py new file mode 100644 index 00000000000..ec2389f9088 --- /dev/null +++ b/mmdet/core/targets/anchor_target.py @@ -0,0 +1,2 @@ +def anchor_target(): + pass diff --git a/mmdet/core/targets/bbox_target.py b/mmdet/core/targets/bbox_target.py new file mode 100644 index 00000000000..49642c22987 --- /dev/null +++ b/mmdet/core/targets/bbox_target.py @@ -0,0 +1,2 @@ +def bbox_target(): + pass diff --git a/mmdet/core/targets/mask_target.py b/mmdet/core/targets/mask_target.py new file mode 100644 index 00000000000..4c330e13b81 --- /dev/null +++ b/mmdet/core/targets/mask_target.py @@ -0,0 +1,2 @@ +def mask_target(): + pass diff --git a/mmdet/datasets/__init__.py b/mmdet/datasets/__init__.py new file mode 100644 index 00000000000..6045c2b0923 --- /dev/null +++ b/mmdet/datasets/__init__.py @@ -0,0 +1,4 @@ +from .coco import CocoDataset +from .collate import * +from .sampler import * +from .transforms import * diff --git a/mmdet/datasets/coco.py b/mmdet/datasets/coco.py new file mode 100644 index 00000000000..e0705e79b61 --- /dev/null +++ b/mmdet/datasets/coco.py @@ -0,0 +1,288 @@ +import os.path as osp + +import mmcv +import numpy as np +from pycocotools.coco import COCO +from torch.utils.data import Dataset + +from .transforms import (ImageTransform, BboxTransform, PolyMaskTransform, + Numpy2Tensor) +from .utils import show_ann, random_scale +from .utils import DataContainer as DC + + +def parse_ann_info(ann_info, cat2label, with_mask=True): + """Parse bbox and mask annotation. + + Args: + ann_info (list[dict]): Annotation info of an image. + cat2label (dict): The mapping from category ids to labels. + with_mask (bool): Whether to parse mask annotations. + + Returns: + tuple: gt_bboxes, gt_labels and gt_mask_info + """ + gt_bboxes = [] + gt_labels = [] + gt_bboxes_ignore = [] + # each mask consists of one or several polys, each poly is a list of float. + if with_mask: + gt_mask_polys = [] + gt_poly_lens = [] + for i, ann in enumerate(ann_info): + if ann.get('ignore', False): + continue + x1, y1, w, h = ann['bbox'] + if ann['area'] <= 0 or w < 1 or h < 1: + continue + bbox = [x1, y1, x1 + w - 1, y1 + h - 1] + if ann['iscrowd']: + gt_bboxes_ignore.append(bbox) + else: + gt_bboxes.append(bbox) + gt_labels.append(cat2label[ann['category_id']]) + if with_mask: + # Note polys are not resized + mask_polys = [ + p for p in ann['segmentation'] if len(p) >= 6 + ] # valid polygons have >= 3 points (6 coordinates) + poly_lens = [len(p) for p in mask_polys] + gt_mask_polys.append(mask_polys) + gt_poly_lens.extend(poly_lens) + if gt_bboxes: + gt_bboxes = np.array(gt_bboxes, dtype=np.float32) + gt_labels = np.array(gt_labels, dtype=np.int64) + else: + gt_bboxes = np.zeros((0, 4), dtype=np.float32) + gt_labels = np.array([], dtype=np.int64) + + if gt_bboxes_ignore: + gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32) + else: + gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32) + + ann = dict( + bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore) + + if with_mask: + ann['mask_polys'] = gt_mask_polys + ann['poly_lens'] = gt_poly_lens + return ann + + +class CocoDataset(Dataset): + + def __init__(self, + ann_file, + img_prefix, + img_scale, + img_norm_cfg, + size_divisor=None, + proposal_file=None, + num_max_proposals=1000, + flip_ratio=0, + with_mask=True, + with_crowd=True, + with_label=True, + test_mode=False, + debug=False): + # path of the data file + self.coco = COCO(ann_file) + # filter images with no annotation during training + if not test_mode: + self.img_ids, self.img_infos = self._filter_imgs() + else: + self.img_ids = self.coco.getImgIds() + self.img_infos = [ + self.coco.loadImgs(idx)[0] for idx in self.img_ids + ] + assert len(self.img_ids) == len(self.img_infos) + # get the mapping from original category ids to labels + self.cat_ids = self.coco.getCatIds() + self.cat2label = { + cat_id: i + 1 + for i, cat_id in enumerate(self.cat_ids) + } + # prefix of images path + self.img_prefix = img_prefix + # (long_edge, short_edge) or [(long1, short1), (long2, short2), ...] + self.img_scales = img_scale if isinstance(img_scale, + list) else [img_scale] + assert mmcv.is_list_of(self.img_scales, tuple) + # color channel order and normalize configs + self.img_norm_cfg = img_norm_cfg + # proposals + self.proposals = mmcv.load( + proposal_file) if proposal_file is not None else None + self.num_max_proposals = num_max_proposals + # flip ratio + self.flip_ratio = flip_ratio + assert flip_ratio >= 0 and flip_ratio <= 1 + # padding border to ensure the image size can be divided by + # size_divisor (used for FPN) + self.size_divisor = size_divisor + # with crowd or not, False when using RetinaNet + self.with_crowd = with_crowd + # with mask or not + self.with_mask = with_mask + # with label is False for RPN + self.with_label = with_label + # in test mode or not + self.test_mode = test_mode + # debug mode or not + self.debug = debug + + # set group flag for the sampler + self._set_group_flag() + # transforms + self.img_transform = ImageTransform( + size_divisor=self.size_divisor, **self.img_norm_cfg) + self.bbox_transform = BboxTransform() + self.mask_transform = PolyMaskTransform() + self.numpy2tensor = Numpy2Tensor() + + def __len__(self): + return len(self.img_ids) + + def _filter_imgs(self, min_size=32): + """Filter images too small or without ground truths.""" + img_ids = list(set([_['image_id'] for _ in self.coco.anns.values()])) + valid_ids = [] + img_infos = [] + for i in img_ids: + info = self.coco.loadImgs(i)[0] + if min(info['width'], info['height']) >= min_size: + valid_ids.append(i) + img_infos.append(info) + return valid_ids, img_infos + + def _load_ann_info(self, idx): + img_id = self.img_ids[idx] + ann_ids = self.coco.getAnnIds(imgIds=img_id) + ann_info = self.coco.loadAnns(ann_ids) + return ann_info + + def _set_group_flag(self): + """Set flag according to image aspect ratio. + + Images with aspect ratio greater than 1 will be set as group 1, + otherwise group 0. + """ + self.flag = np.zeros(len(self.img_ids), dtype=np.uint8) + for i in range(len(self.img_ids)): + img_info = self.img_infos[i] + if img_info['width'] / img_info['height'] > 1: + self.flag[i] = 1 + + def _rand_another(self, idx): + pool = np.where(self.flag == self.flag[idx])[0] + return np.random.choice(pool) + + def __getitem__(self, idx): + if self.test_mode: + return self.prepare_test_img(idx) + while True: + img_info = self.img_infos[idx] + ann_info = self._load_ann_info(idx) + + # load image + img = mmcv.imread(osp.join(self.img_prefix, img_info['file_name'])) + if self.debug: + show_ann(self.coco, img, ann_info) + + # load proposals if necessary + if self.proposals is not None: + proposals = self.proposals[idx][:self.num_max_proposals, :4] + # TODO: Handle empty proposals properly. Currently images with + # no proposals are just ignored, but they can be used for + # training in concept. + if len(proposals) == 0: + idx = self._rand_another(idx) + continue + + ann = parse_ann_info(ann_info, self.cat2label, self.with_mask) + gt_bboxes = ann['bboxes'] + gt_labels = ann['labels'] + gt_bboxes_ignore = ann['bboxes_ignore'] + # skip the image if there is no valid gt bbox + if len(gt_bboxes) == 0: + idx = self._rand_another(idx) + continue + + # apply transforms + flip = True if np.random.rand() < self.flip_ratio else False + img_scale = random_scale(self.img_scales) # sample a scale + img, img_shape, scale_factor = self.img_transform( + img, img_scale, flip) + if self.proposals is not None: + proposals = self.bbox_transform(proposals, img_shape, + scale_factor, flip) + gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor, + flip) + gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape, + scale_factor, flip) + + if self.with_mask: + gt_mask_polys, gt_poly_lens, num_polys_per_mask = \ + self.mask_transform( + ann['mask_polys'], ann['poly_lens'], + img_info['height'], img_info['width'], flip) + + ori_shape = (img_info['height'], img_info['width']) + img_meta = dict( + ori_shape=DC(ori_shape), + img_shape=DC(img_shape), + scale_factor=DC(scale_factor), + flip=DC(flip)) + + data = dict( + img=DC(img, stack=True), + img_meta=img_meta, + gt_bboxes=DC(gt_bboxes)) + if self.proposals is not None: + data['proposals'] = DC(proposals) + if self.with_label: + data['gt_labels'] = DC(gt_labels) + if self.with_crowd: + data['gt_bboxes_ignore'] = DC(gt_bboxes_ignore) + if self.with_mask: + data['gt_mask_polys'] = DC(gt_mask_polys) + data['gt_poly_lens'] = DC(gt_poly_lens) + data['num_polys_per_mask'] = DC(num_polys_per_mask) + return data + + def prepare_test_img(self, idx): + """Prepare an image for testing (multi-scale and flipping)""" + img_info = self._load_info(idx, with_ann=False) + img_file = osp.join(self.prefix, img_info['file_name']) + proposal = (self.proposals[idx][:, :4] + if self.proposals is not None else None) + + def prepare_single(img_file, scale, flip, proposal=None): + img_np, shape_scale_np = self.img_transform(img_file, scale, flip) + img, shape_scale = self.numpy2tensor(img_np, shape_scale_np) + img_meta = dict(shape_scale=shape_scale, flip=flip) + if proposal is not None: + proposal = self.bbox_transform(proposal, shape_scale_np, flip) + proposal = self.numpy2tensor(proposal) + return img, img_meta, proposal + + imgs = [] + img_metas = [] + proposals = [] + for scale in self.img_scale: + img, img_meta, proposal = prepare_single(img_file, scale, False, + proposal) + imgs.append(img) + img_metas.append(img_meta) + proposals.append(proposal) + if self.flip_ratio > 0: + img, img_meta, prop = prepare_single(img_file, scale, True, + proposal) + imgs.append(img) + img_metas.append(img_meta) + proposals.append(prop) + if self.proposals is None: + return imgs, img_metas + else: + return imgs, img_metas, proposals diff --git a/mmdet/datasets/collate.py b/mmdet/datasets/collate.py new file mode 100644 index 00000000000..44117d6f2d0 --- /dev/null +++ b/mmdet/datasets/collate.py @@ -0,0 +1,57 @@ +import collections + +import torch +import torch.nn.functional as F +from torch.utils.data.dataloader import default_collate + +from .utils import DataContainer + +# https://github.com/pytorch/pytorch/issues/973 +import resource +rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) +resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) + +__all__ = ['collate'] + + +def collate(batch, samples_per_gpu=1): + + if not isinstance(batch, collections.Sequence): + raise TypeError("{} is not supported.".format(batch.dtype)) + + if isinstance(batch[0], DataContainer): + assert len(batch) % samples_per_gpu == 0 + stacked = [] + if batch[0].stack: + for i in range(0, len(batch), samples_per_gpu): + assert isinstance(batch[i].data, torch.Tensor) + # TODO: handle tensors other than 3d + assert batch[i].dim() == 3 + c, h, w = batch[0].size() + for sample in batch[i:i + samples_per_gpu]: + assert c == sample.size(0) + h = max(h, sample.size(1)) + w = max(w, sample.size(2)) + padded_samples = [ + F.pad( + sample.data, + (0, w - sample.size(2), 0, h - sample.size(1)), + value=sample.padding_value) + for sample in batch[i:i + samples_per_gpu] + ] + stacked.append(default_collate(padded_samples)) + else: + for i in range(0, len(batch), samples_per_gpu): + stacked.append( + [sample.data for sample in batch[i:i + samples_per_gpu]]) + return DataContainer(stacked, batch[0].stack, batch[0].padding_value) + elif isinstance(batch[0], collections.Sequence): + transposed = zip(*batch) + return [collate(samples, samples_per_gpu) for samples in transposed] + elif isinstance(batch[0], collections.Mapping): + return { + key: collate([d[key] for d in batch], samples_per_gpu) + for key in batch[0] + } + else: + return default_collate(batch) diff --git a/mmdet/datasets/sampler.py b/mmdet/datasets/sampler.py new file mode 100644 index 00000000000..74089821bf1 --- /dev/null +++ b/mmdet/datasets/sampler.py @@ -0,0 +1,134 @@ +from __future__ import division + +import math +import torch +import numpy as np + +from torch.distributed import get_world_size, get_rank +from torch.utils.data.sampler import Sampler + +__all__ = ['GroupSampler', 'DistributedGroupSampler'] + + +class GroupSampler(Sampler): + + def __init__(self, dataset, samples_per_gpu=1): + assert hasattr(dataset, 'flag') + self.dataset = dataset + self.samples_per_gpu = samples_per_gpu + self.flag = dataset.flag.astype(np.int64) + self.group_sizes = np.bincount(self.flag) + self.num_samples = 0 + for i, size in enumerate(self.group_sizes): + self.num_samples += int(np.ceil( + size / self.samples_per_gpu)) * self.samples_per_gpu + + def __iter__(self): + indices = [] + for i, size in enumerate(self.group_sizes): + if size == 0: + continue + indice = np.where(self.flag == i)[0] + assert len(indice) == size + np.random.shuffle(indice) + num_extra = int(np.ceil(size / self.samples_per_gpu) + ) * self.samples_per_gpu - len(indice) + indice = np.concatenate([indice, indice[:num_extra]]) + indices.append(indice) + indices = np.concatenate(indices) + indices = [ + indices[i * self.samples_per_gpu:(i + 1) * self.samples_per_gpu] + for i in np.random.permutation( + range(len(indices) // self.samples_per_gpu)) + ] + indices = np.concatenate(indices) + indices = torch.from_numpy(indices).long() + assert len(indices) == self.num_samples + return iter(indices) + + def __len__(self): + return self.num_samples + + +class DistributedGroupSampler(Sampler): + """Sampler that restricts data loading to a subset of the dataset. + It is especially useful in conjunction with + :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each + process can pass a DistributedSampler instance as a DataLoader sampler, + and load a subset of the original dataset that is exclusive to it. + .. note:: + Dataset is assumed to be of constant size. + Arguments: + dataset: Dataset used for sampling. + num_replicas (optional): Number of processes participating in + distributed training. + rank (optional): Rank of the current process within num_replicas. + """ + + def __init__(self, + dataset, + samples_per_gpu=1, + num_replicas=None, + rank=None): + if num_replicas is None: + num_replicas = get_world_size() + if rank is None: + rank = get_rank() + self.dataset = dataset + self.samples_per_gpu = samples_per_gpu + self.num_replicas = num_replicas + self.rank = rank + self.epoch = 0 + + assert hasattr(self.dataset, 'flag') + self.flag = self.dataset.flag + self.group_sizes = np.bincount(self.flag) + + self.num_samples = 0 + for i, j in enumerate(self.group_sizes): + self.num_samples += int( + math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu / + self.num_replicas)) * self.samples_per_gpu + self.total_size = self.num_samples * self.num_replicas + + def __iter__(self): + # deterministically shuffle based on epoch + g = torch.Generator() + g.manual_seed(self.epoch) + + indices = [] + for i, size in enumerate(self.group_sizes): + if size > 0: + indice = np.where(self.flag == i)[0] + assert len(indice) == size + indice = indice[list(torch.randperm(int(size), + generator=g))].tolist() + extra = int( + math.ceil( + size * 1.0 / self.samples_per_gpu / self.num_replicas) + ) * self.samples_per_gpu * self.num_replicas - len(indice) + indice += indice[:extra] + indices += indice + + assert len(indices) == self.total_size + + indices = [ + indices[j] for i in list( + torch.randperm( + len(indices) // self.samples_per_gpu, generator=g)) + for j in range(i * self.samples_per_gpu, (i + 1) * + self.samples_per_gpu) + ] + + # subsample + offset = self.num_samples * self.rank + indices = indices[offset:offset + self.num_samples] + assert len(indices) == self.num_samples + + return iter(indices) + + def __len__(self): + return self.num_samples + + def set_epoch(self, epoch): + self.epoch = epoch diff --git a/mmdet/datasets/transforms.py b/mmdet/datasets/transforms.py new file mode 100644 index 00000000000..81f3a627d0d --- /dev/null +++ b/mmdet/datasets/transforms.py @@ -0,0 +1,208 @@ +import mmcv +# import cvbase as cvb +import numpy as np +import torch + +from mmdet.core import segms + +__all__ = [ + 'ImageTransform', 'BboxTransform', 'PolyMaskTransform', 'Numpy2Tensor' +] + + +class ImageTransform(object): + """Preprocess an image + 1. rescale the image to expected size + 2. normalize the image + 3. flip the image (if needed) + 4. pad the image (if needed) + 5. transpose to (c, h, w) + """ + + def __init__(self, + mean=(0, 0, 0), + std=(1, 1, 1), + to_rgb=True, + size_divisor=None): + self.mean = np.array(mean, dtype=np.float32) + self.std = np.array(std, dtype=np.float32) + self.to_rgb = to_rgb + self.size_divisor = size_divisor + + def __call__(self, img, scale, flip=False): + img, scale_factor = mmcv.imrescale(img, scale, True) + img_shape = img.shape + img = mmcv.imnorm(img, self.mean, self.std, self.to_rgb) + if flip: + img = mmcv.imflip(img) + if self.size_divisor is not None: + img = mmcv.impad_to_multiple(img, self.size_divisor) + img = img.transpose(2, 0, 1) + return img, img_shape, scale_factor + + # img, scale = cvb.resize_keep_ar(img_or_path, max_long_edge, + # max_short_edge, True) + # shape_scale = np.array(img.shape + (scale, ), dtype=np.float32) + # if flip: + # img = img[:, ::-1, :].copy() + # if self.color_order == 'RGB': + # img = cvb.bgr2rgb(img) + # img = img.astype(np.float32) + # img -= self.color_mean + # img /= self.color_std + # if self.size_divisor is None: + # padded_img = img + # else: + # pad_h = int(np.ceil( + # img.shape[0] / self.size_divisor)) * self.size_divisor + # pad_w = int(np.ceil( + # img.shape[1] / self.size_divisor)) * self.size_divisor + # padded_img = cvb.pad_img(img, (pad_h, pad_w), pad_val=0) + # padded_img = padded_img.transpose(2, 0, 1) + # return padded_img, shape_scale + + +class ImageCrop(object): + """crop image patches and resize patches into fixed size + 1. (read and) flip image (if needed) + 2. crop image patches according to given bboxes + 3. resize patches into fixed size (default 224x224) + 4. normalize the image (if needed) + 5. transpose to (c, h, w) (if needed) + """ + + def __init__(self, + normalize=True, + transpose=True, + color_order='RGB', + color_mean=(0, 0, 0), + color_std=(1, 1, 1)): + self.normalize = normalize + self.transpose = transpose + + assert color_order in ['RGB', 'BGR'] + self.color_order = color_order + self.color_mean = np.array(color_mean, dtype=np.float32) + self.color_std = np.array(color_std, dtype=np.float32) + + def __call__(self, + img_or_path, + bboxes, + crop_size, + scale_ratio=1.0, + flip=False): + img = cvb.read_img(img_or_path) + if flip: + img = img[:, ::-1, :].copy() + crop_imgs = cvb.crop_img( + img, + bboxes[:, :4], + scale_ratio=scale_ratio, + pad_fill=self.color_mean) + processed_crop_imgs_list = [] + for i in range(len(crop_imgs)): + crop_img = crop_imgs[i] + crop_img = cvb.resize(crop_img, crop_size) + crop_img = crop_img.astype(np.float32) + crop_img -= self.color_mean + crop_img /= self.color_std + processed_crop_imgs_list.append(crop_img) + processed_crop_imgs = np.stack(processed_crop_imgs_list, axis=0) + processed_crop_imgs = processed_crop_imgs.transpose(0, 3, 1, 2) + return processed_crop_imgs + + +class BboxTransform(object): + """Preprocess gt bboxes + 1. rescale bboxes according to image size + 2. flip bboxes (if needed) + 3. pad the first dimension to `max_num_gts` + """ + + def __init__(self, max_num_gts=None): + self.max_num_gts = max_num_gts + + def __call__(self, bboxes, img_shape, scale_factor, flip=False): + gt_bboxes = bboxes * scale_factor + if flip: + gt_bboxes = mmcv.bbox_flip(gt_bboxes, img_shape) + if self.max_num_gts is None: + return gt_bboxes + else: + num_gts = gt_bboxes.shape[0] + padded_bboxes = np.zeros((self.max_num_gts, 4), dtype=np.float32) + padded_bboxes[:num_gts, :] = gt_bboxes + return padded_bboxes + + +class PolyMaskTransform(object): + + def __init__(self): + pass + + def __call__(self, gt_mask_polys, gt_poly_lens, img_h, img_w, flip=False): + """ + Args: + gt_mask_polys(list): a list of masks, each mask is a list of polys, + each poly is a list of numbers + gt_poly_lens(list): a list of int, indicating the size of each poly + """ + if flip: + gt_mask_polys = segms.flip_segms(gt_mask_polys, img_h, img_w) + num_polys_per_mask = np.array( + [len(mask_polys) for mask_polys in gt_mask_polys], dtype=np.int64) + gt_poly_lens = np.array(gt_poly_lens, dtype=np.int64) + gt_mask_polys = [ + np.concatenate(mask_polys).astype(np.float32) + for mask_polys in gt_mask_polys + ] + gt_mask_polys = np.concatenate(gt_mask_polys) + return gt_mask_polys, gt_poly_lens, num_polys_per_mask + + +class MaskTransform(object): + """Preprocess masks + 1. resize masks to expected size and stack to a single array + 2. flip the masks (if needed) + 3. pad the masks (if needed) + """ + + def __init__(self, max_num_gts, pad_size=None): + self.max_num_gts = max_num_gts + self.pad_size = pad_size + + def __call__(self, masks, img_size, flip=False): + max_long_edge = max(img_size) + max_short_edge = min(img_size) + masks = [ + cvb.resize_keep_ar( + mask, + max_long_edge, + max_short_edge, + interpolation=cvb.INTER_NEAREST) for mask in masks + ] + masks = np.stack(masks, axis=0) + if flip: + masks = masks[:, ::-1, :] + if self.pad_size is None: + pad_h = masks.shape[1] + pad_w = masks.shape[2] + else: + pad_size = self.pad_size if self.pad_size > 0 else max_long_edge + pad_h = pad_w = pad_size + padded_masks = np.zeros( + (self.max_num_gts, pad_h, pad_w), dtype=masks.dtype) + padded_masks[:masks.shape[0], :masks.shape[1], :masks.shape[2]] = masks + return padded_masks + + +class Numpy2Tensor(object): + + def __init__(self): + pass + + def __call__(self, *args): + if len(args) == 1: + return torch.from_numpy(args[0]) + else: + return tuple([torch.from_numpy(array) for array in args]) diff --git a/mmdet/datasets/utils/__init__.py b/mmdet/datasets/utils/__init__.py new file mode 100644 index 00000000000..de3ea43bdf4 --- /dev/null +++ b/mmdet/datasets/utils/__init__.py @@ -0,0 +1,2 @@ +from .data_container import DataContainer +from .misc import * diff --git a/mmdet/datasets/utils/data_container.py b/mmdet/datasets/utils/data_container.py new file mode 100644 index 00000000000..c27beab37bb --- /dev/null +++ b/mmdet/datasets/utils/data_container.py @@ -0,0 +1,80 @@ +import functools +from collections import Sequence + +import mmcv +import numpy as np +import torch + + +def to_tensor(data): + """Convert objects of various python types to :obj:`torch.Tensor`. + + Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, + :class:`Sequence`, :class:`int` and :class:`float`. + """ + if isinstance(data, np.ndarray): + return torch.from_numpy(data) + elif isinstance(data, torch.Tensor): + return data + elif isinstance(data, Sequence) and not mmcv.is_str(data): + return torch.tensor(data) + elif isinstance(data, int): + return torch.LongTensor([data]) + elif isinstance(data, float): + return torch.FloatTensor([data]) + else: + raise TypeError('type {} cannot be converted to tensor.'.format( + type(data))) + + +def assert_tensor_type(func): + + @functools.wraps(func) + def wrapper(*args, **kwargs): + if not isinstance(args[0].data, torch.Tensor): + raise AttributeError('{} has no attribute {} for type {}'.format( + args[0].__class__.__name__, func.__name__, args[0].datatype)) + return func(*args, **kwargs) + + return wrapper + + +class DataContainer(object): + + def __init__(self, data, stack=False, padding_value=0): + if isinstance(data, list): + self._data = data + else: + self._data = to_tensor(data) + self._stack = stack + self._padding_value = padding_value + + def __repr__(self): + return '{}({})'.format(self.__class__.__name__, repr(self.data)) + + @property + def data(self): + return self._data + + @property + def datatype(self): + if isinstance(self.data, torch.Tensor): + return self.data.type() + else: + return type(self.data) + + @property + def stack(self): + return self._stack + + @property + def padding_value(self): + return self._padding_value + + @assert_tensor_type + def size(self, *args, **kwargs): + return self.data.size(*args, **kwargs) + + @assert_tensor_type + def dim(self): + return self.data.dim() diff --git a/mmdet/datasets/utils/misc.py b/mmdet/datasets/utils/misc.py new file mode 100644 index 00000000000..419c11ad084 --- /dev/null +++ b/mmdet/datasets/utils/misc.py @@ -0,0 +1,62 @@ +import mmcv + +import matplotlib.pyplot as plt +import numpy as np +import pycocotools.mask as maskUtils + + +def random_scale(img_scales, mode='range'): + """Randomly select a scale from a list of scales or scale ranges. + + Args: + img_scales (list[tuple]): Image scale or scale range. + mode (str): "range" or "value". + + Returns: + tuple: Sampled image scale. + """ + num_scales = len(img_scales) + if num_scales == 1: # fixed scale is specified + img_scale = img_scales[0] + elif num_scales == 2: # randomly sample a scale + if mode == 'range': + img_scale_long = [max(s) for s in img_scales] + img_scale_short = [min(s) for s in img_scales] + long_edge = np.random.randint( + min(img_scale_long), + max(img_scale_long) + 1) + short_edge = np.random.randint( + min(img_scale_short), + max(img_scale_short) + 1) + img_scale = (long_edge, short_edge) + elif mode == 'value': + img_scale = img_scales[np.random.randint(num_scales)] + else: + if mode != 'value': + raise ValueError( + 'Only "value" mode supports more than 2 image scales') + img_scale = img_scales[np.random.randint(num_scales)] + return img_scale + + +def show_ann(coco, img, ann_info): + plt.imshow(mmcv.bgr2rgb(img)) + plt.axis('off') + coco.showAnns(ann_info) + plt.show() + + +def draw_bbox_and_segm(img, results, dataset, score_thr=0.5): + bbox_results, segm_results = results + hi_bboxes = [] + for cls_bboxes, cls_segms in zip(bbox_results, segm_results): + if len(cls_bboxes) == 0: + hi_bboxes.append(cls_bboxes) + continue + inds = np.where(cls_bboxes[:, -1] > score_thr)[0] + hi_bboxes.append(cls_bboxes[inds, :]) + color_mask = np.random.random((1, 3)) + for i in inds: + mask = maskUtils.decode(cls_segms[i]).astype(np.bool) + img[mask] = img[mask] * 0.5 + color_mask * 0.5 + mmcv.draw_bboxes_with_label(np.ascontiguousarray(img), hi_bboxes, dataset) diff --git a/mmdet/models/__init__.py b/mmdet/models/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/mmdet/models/backbones/__init__.py b/mmdet/models/backbones/__init__.py new file mode 100644 index 00000000000..f9e21e83d14 --- /dev/null +++ b/mmdet/models/backbones/__init__.py @@ -0,0 +1 @@ +from .resnet import resnet diff --git a/mmdet/models/backbones/resnet.py b/mmdet/models/backbones/resnet.py new file mode 100644 index 00000000000..f8203accd4b --- /dev/null +++ b/mmdet/models/backbones/resnet.py @@ -0,0 +1,325 @@ +import math +import torch.nn as nn +import torch.utils.checkpoint as cp +from torchpack import load_checkpoint + + +def conv3x3(in_planes, out_planes, stride=1, dilation=1): + "3x3 convolution with padding" + return nn.Conv2d( + in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=dilation, + dilation=dilation, + bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='fb'): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride, dilation) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + self.dilation = dilation + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='fb', + with_cp=False): + """Bottleneck block + if style is "fb", the stride-two layer is the 3x3 conv layer, + if style is "msra", the stride-two layer is the first 1x1 conv layer + """ + super(Bottleneck, self).__init__() + assert style in ['fb', 'msra'] + if style == 'fb': + conv1_stride = 1 + conv2_stride = stride + else: + conv1_stride = stride + conv2_stride = 1 + self.conv1 = nn.Conv2d( + inplanes, planes, kernel_size=1, stride=conv1_stride, bias=False) + self.conv2 = nn.Conv2d( + planes, + planes, + kernel_size=3, + stride=conv2_stride, + padding=dilation, + dilation=dilation, + bias=False) + + self.bn1 = nn.BatchNorm2d(planes) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d( + planes, planes * self.expansion, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + self.dilation = dilation + self.with_cp = with_cp + + def forward(self, x): + + def _inner_forward(x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +def make_res_layer(block, + inplanes, + planes, + blocks, + stride=1, + dilation=1, + style='fb', + with_cp=False): + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append( + block( + inplanes, + planes, + stride, + dilation, + downsample, + style=style, + with_cp=with_cp)) + inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append( + block(inplanes, planes, 1, dilation, style=style, with_cp=with_cp)) + + return nn.Sequential(*layers) + + +class ResHead(nn.Module): + + def __init__(self, block, num_blocks, stride=2, dilation=1, style='fb'): + self.layer4 = make_res_layer( + block, + 1024, + 512, + num_blocks, + stride=stride, + dilation=dilation, + style=style) + + def forward(self, x): + return self.layer4(x) + + +class ResNet(nn.Module): + + def __init__(self, + block, + layers, + strides=(1, 2, 2, 2), + dilations=(1, 1, 1, 1), + out_indices=(0, 1, 2, 3), + frozen_stages=-1, + style='fb', + sync_bn=False, + with_cp=False): + super(ResNet, self).__init__() + if not len(layers) == len(strides) == len(dilations): + raise ValueError( + 'The number of layers, strides and dilations must be equal, ' + 'but found have {} layers, {} strides and {} dilations'.format( + len(layers), len(strides), len(dilations))) + assert max(out_indices) < len(layers) + self.out_indices = out_indices + self.frozen_stages = frozen_stages + self.style = style + self.sync_bn = sync_bn + self.inplanes = 64 + self.conv1 = nn.Conv2d( + 3, 64, kernel_size=7, stride=2, padding=3, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.res_layers = [] + for i, num_blocks in enumerate(layers): + + stride = strides[i] + dilation = dilations[i] + + layer_name = 'layer{}'.format(i + 1) + planes = 64 * 2**i + res_layer = make_res_layer( + block, + self.inplanes, + planes, + num_blocks, + stride=stride, + dilation=dilation, + style=self.style, + with_cp=with_cp) + self.inplanes = planes * block.expansion + setattr(self, layer_name, res_layer) + self.res_layers.append(layer_name) + self.feat_dim = block.expansion * 64 * 2**(len(layers) - 1) + self.with_cp = with_cp + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + load_checkpoint(self, pretrained, strict=False) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + nn.init.normal_(m.weight, 0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + outs = [] + for i, layer_name in enumerate(self.res_layers): + res_layer = getattr(self, layer_name) + x = res_layer(x) + if i in self.out_indices: + outs.append(x) + if len(outs) == 1: + return outs[0] + else: + return tuple(outs) + + def train(self, mode=True): + super(ResNet, self).train(mode) + if not self.sync_bn: + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eval() + if mode and self.frozen_stages >= 0: + for param in self.conv1.parameters(): + param.requires_grad = False + for param in self.bn1.parameters(): + param.requires_grad = False + self.bn1.eval() + self.bn1.weight.requires_grad = False + self.bn1.bias.requires_grad = False + for i in range(1, self.frozen_stages + 1): + mod = getattr(self, 'layer{}'.format(i)) + mod.eval() + for param in mod.parameters(): + param.requires_grad = False + + +resnet_cfg = { + 18: (BasicBlock, (2, 2, 2, 2)), + 34: (BasicBlock, (3, 4, 6, 3)), + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)) +} + + +def resnet(depth, + num_stages=4, + strides=(1, 2, 2, 2), + dilations=(1, 1, 1, 1), + out_indices=(2, ), + frozen_stages=-1, + style='fb', + sync_bn=False, + with_cp=False): + """Constructs a ResNet model. + + Args: + depth (int): depth of resnet, from {18, 34, 50, 101, 152} + num_stages (int): num of resnet stages, normally 4 + strides (list): strides of the first block of each stage + dilations (list): dilation of each stage + out_indices (list): output from which stages + """ + if depth not in resnet_cfg: + raise KeyError('invalid depth {} for resnet'.format(depth)) + block, layers = resnet_cfg[depth] + model = ResNet(block, layers[:num_stages], strides, dilations, out_indices, + frozen_stages, style, sync_bn, with_cp) + return model diff --git a/mmdet/models/bbox_heads/__init__.py b/mmdet/models/bbox_heads/__init__.py new file mode 100644 index 00000000000..e6709af6176 --- /dev/null +++ b/mmdet/models/bbox_heads/__init__.py @@ -0,0 +1,3 @@ +from .bbox_head import BBoxHead + +__all__ = ['BBoxHead'] diff --git a/mmdet/models/bbox_heads/bbox_head.py b/mmdet/models/bbox_heads/bbox_head.py new file mode 100644 index 00000000000..9f0c188a459 --- /dev/null +++ b/mmdet/models/bbox_heads/bbox_head.py @@ -0,0 +1,123 @@ +import torch.nn as nn +import torch.nn.functional as F + +from mmdet.core import (bbox_transform_inv, bbox_target, multiclass_nms, + weighted_cross_entropy, weighted_smoothl1, accuracy) + + +class BBoxHead(nn.Module): + """Simplest RoI head, with only two fc layers for classification and + regression respectively""" + + def __init__(self, + exclude_mal_box=True, + with_avg_pool=False, + with_cls=True, + with_reg=True, + roi_feat_size=7, + in_channels=256, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=False): + super(BBoxHead, self).__init__() + assert with_cls or with_reg + self.with_avg_pool = with_avg_pool + self.with_cls = with_cls + self.with_reg = with_reg + self.roi_feat_size = roi_feat_size + self.in_channels = in_channels + self.num_classes = num_classes + self.target_means = target_means + self.target_stds = target_stds + self.reg_class_agnostic = reg_class_agnostic + self.exclude_mal_box = exclude_mal_box + + in_channels = self.in_channels + if self.with_avg_pool: + self.avg_pool = nn.AvgPool2d(roi_feat_size) + else: + in_channels *= (self.roi_feat_size * self.roi_feat_size) + if self.with_cls: + self.fc_cls = nn.Linear(in_channels, num_classes) + if self.with_reg: + out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes + self.fc_reg = nn.Linear(in_channels, out_dim_reg) + self.debug_imgs = None + + def init_weights(self): + if self.with_cls: + nn.init.normal_(self.fc_cls.weight, 0, 0.01) + nn.init.constant_(self.fc_cls.bias, 0) + if self.with_reg: + nn.init.normal_(self.fc_reg.weight, 0, 0.001) + nn.init.constant_(self.fc_reg.bias, 0) + + def forward(self, x): + if self.with_avg_pool: + x = self.avg_pool(x) + x = x.view(x.size(0), -1) + cls_score = self.fc_cls(x) if self.with_cls else None + bbox_pred = self.fc_reg(x) if self.with_reg else None + return cls_score, bbox_pred + + def bbox_target(self, pos_proposals, neg_proposals, pos_gt_bboxes, + pos_gt_labels, rcnn_train_cfg): + reg_num_classes = 1 if self.reg_class_agnostic else self.num_classes + cls_reg_targets = bbox_target( + pos_proposals, + neg_proposals, + pos_gt_bboxes, + pos_gt_labels, + self.target_means, + self.target_stds, + rcnn_train_cfg, + reg_num_classes, + debug_imgs=self.debug_imgs) + return cls_reg_targets + + def loss(self, cls_score, bbox_pred, labels, label_weights, bbox_targets, + bbox_weights): + losses = dict() + if cls_score is not None: + losses['loss_cls'] = weighted_cross_entropy( + cls_score, labels, label_weights) + losses['acc'] = accuracy(cls_score, labels) + if bbox_pred is not None: + losses['loss_reg'] = weighted_smoothl1( + bbox_pred, + bbox_targets, + bbox_weights, + ave_factor=bbox_targets.size(0)) + return losses + + def get_det_bboxes(self, + rois, + cls_score, + bbox_pred, + img_shape, + rescale=False, + nms_cfg=None): + if isinstance(cls_score, list): + cls_score = sum(cls_score) / float(len(cls_score)) + scores = F.softmax(cls_score, dim=1) if cls_score is not None else None + + if bbox_pred is not None: + bboxes = bbox_transform_inv(rois[:, 1:], bbox_pred, + self.target_means, self.target_stds, + img_shape) + else: + bboxes = rois[:, 1:] + # TODO: add clip here + + if rescale: + bboxes /= img_shape[-1] + + if nms_cfg is None: + return bboxes, scores + else: + det_bboxes, det_labels = multiclass_nms( + bboxes, scores, nms_cfg.score_thr, nms_cfg.nms_thr, + nms_cfg.max_per_img) + + return det_bboxes, det_labels diff --git a/mmdet/models/builder.py b/mmdet/models/builder.py new file mode 100644 index 00000000000..f109d851397 --- /dev/null +++ b/mmdet/models/builder.py @@ -0,0 +1,47 @@ +import mmcv +from torch import nn + +from . import (backbones, necks, roi_extractors, rpn_heads, bbox_heads, + mask_heads) + +__all__ = [ + 'build_backbone', 'build_neck', 'build_rpn_head', 'build_roi_extractor', + 'build_bbox_head', 'build_mask_head' +] + + +def _build_module(cfg, parrent=None): + return cfg if isinstance(cfg, nn.Module) else mmcv.obj_from_dict( + cfg, parrent) + + +def build(cfg, parrent=None): + if isinstance(cfg, list): + modules = [_build_module(cfg_, parrent) for cfg_ in cfg] + return nn.Sequential(*modules) + else: + return _build_module(cfg, parrent) + + +def build_backbone(cfg): + return build(cfg, backbones) + + +def build_neck(cfg): + return build(cfg, necks) + + +def build_rpn_head(cfg): + return build(cfg, rpn_heads) + + +def build_roi_extractor(cfg): + return build(cfg, roi_extractors) + + +def build_bbox_head(cfg): + return build(cfg, bbox_heads) + + +def build_mask_head(cfg): + return build(cfg, mask_heads) diff --git a/mmdet/models/common/__init__.py b/mmdet/models/common/__init__.py new file mode 100644 index 00000000000..1a611c25106 --- /dev/null +++ b/mmdet/models/common/__init__.py @@ -0,0 +1,4 @@ +from .conv_module import ConvModule +from .norm import build_norm_layer + +__all__ = ['ConvModule', 'build_norm_layer'] diff --git a/mmdet/models/common/conv_module.py b/mmdet/models/common/conv_module.py new file mode 100644 index 00000000000..25121972da2 --- /dev/null +++ b/mmdet/models/common/conv_module.py @@ -0,0 +1,95 @@ +import warnings + +import torch.nn as nn + +from .norm import build_norm_layer + + +class ConvModule(nn.Module): + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + normalize=None, + activation='relu', + inplace=True, + activate_last=True): + super(ConvModule, self).__init__() + self.with_norm = normalize is not None + self.with_activatation = activation is not None + self.with_bias = bias + self.activation = activation + self.activate_last = activate_last + + if self.with_norm and self.with_bias: + warnings.warn('ConvModule has norm and bias at the same time') + + self.conv = nn.Conv2d( + in_channels, + out_channels, + kernel_size, + stride, + padding, + dilation, + groups, + bias=bias) + + self.in_channels = self.conv.in_channels + self.out_channels = self.conv.out_channels + self.kernel_size = self.conv.kernel_size + self.stride = self.conv.stride + self.padding = self.conv.padding + self.dilation = self.conv.dilation + self.transposed = self.conv.transposed + self.output_padding = self.conv.output_padding + self.groups = self.conv.groups + + if self.with_norm: + # self.norm_type, self.norm_params = parse_norm(normalize) + # assert self.norm_type in [None, 'BN', 'SyncBN', 'GN', 'SN'] + # self.Norm2d = norm_cfg[self.norm_type] + if self.activate_last: + self.norm = build_norm_layer(normalize, out_channels) + # self.norm = self.Norm2d(out_channels, **self.norm_params) + else: + self.norm = build_norm_layer(normalize, in_channels) + # self.norm = self.Norm2d(in_channels, **self.norm_params) + + if self.with_activatation: + assert activation in ['relu'], 'Only ReLU supported.' + if self.activation == 'relu': + self.activate = nn.ReLU(inplace=inplace) + + # Default using msra init + self.init_weights() + + def init_weights(self): + nonlinearity = 'relu' if self.activation is None else self.activation + nn.init.kaiming_normal_( + self.conv.weight, mode='fan_out', nonlinearity=nonlinearity) + if self.with_bias: + nn.init.constant_(self.conv.bias, 0) + if self.with_norm: + nn.init.constant_(self.norm.weight, 1) + nn.init.constant_(self.norm.bias, 0) + + def forward(self, x, activate=True, norm=True): + if self.activate_last: + x = self.conv(x) + if norm and self.with_norm: + x = self.norm(x) + if activate and self.with_activatation: + x = self.activate(x) + else: + if norm and self.with_norm: + x = self.norm(x) + if activate and self.with_activatation: + x = self.activate(x) + x = self.conv(x) + return x diff --git a/mmdet/models/common/norm.py b/mmdet/models/common/norm.py new file mode 100644 index 00000000000..7b82cd046e8 --- /dev/null +++ b/mmdet/models/common/norm.py @@ -0,0 +1,17 @@ +import torch.nn as nn + +norm_cfg = {'BN': nn.BatchNorm2d, 'SyncBN': None, 'GN': None} + + +def build_norm_layer(cfg, num_features): + assert isinstance(cfg, dict) and 'type' in cfg + cfg_ = cfg.copy() + cfg_.setdefault('eps', 1e-5) + layer_type = cfg_.pop('type') + + if layer_type not in norm_cfg: + raise KeyError('Unrecognized norm type {}'.format(layer_type)) + elif norm_cfg[layer_type] is None: + raise NotImplementedError + + return norm_cfg[layer_type](num_features, **cfg_) diff --git a/mmdet/models/detectors/__init__.py b/mmdet/models/detectors/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/mmdet/models/detectors/rpn.py b/mmdet/models/detectors/rpn.py new file mode 100644 index 00000000000..6d80c9d9b10 --- /dev/null +++ b/mmdet/models/detectors/rpn.py @@ -0,0 +1,100 @@ +import torch.nn as nn + +from mmdet.core import tensor2imgs, merge_aug_proposals, bbox_mapping +from .. import builder + + +class RPN(nn.Module): + + def __init__(self, + backbone, + neck, + rpn_head, + rpn_train_cfg, + rpn_test_cfg, + pretrained=None): + super(RPN, self).__init__() + self.backbone = builder.build_backbone(backbone) + self.neck = builder.build_neck(neck) if neck is not None else None + self.rpn_head = builder.build_rpn_head(rpn_head) + self.rpn_train_cfg = rpn_train_cfg + self.rpn_test_cfg = rpn_test_cfg + self.init_weights(pretrained=pretrained) + + def init_weights(self, pretrained=None): + if pretrained is not None: + print('load model from: {}'.format(pretrained)) + self.backbone.init_weights(pretrained=pretrained) + if self.neck is not None: + self.neck.init_weights() + self.rpn_head.init_weights() + + def forward(self, + img, + img_meta, + gt_bboxes=None, + return_loss=True, + return_bboxes=False, + rescale=False): + if not return_loss: + return self.test(img, img_meta, rescale) + + img_shapes = img_meta['shape_scale'] + + if self.rpn_train_cfg.get('debug', False): + self.rpn_head.debug_imgs = tensor2imgs(img) + + x = self.backbone(img) + if self.neck is not None: + x = self.neck(x) + rpn_outs = self.rpn_head(x) + + rpn_loss_inputs = rpn_outs + (gt_bboxes, img_shapes, + self.rpn_train_cfg) + losses = self.rpn_head.loss(*rpn_loss_inputs) + return losses + + def test(self, imgs, img_metas, rescale=False): + """Test w/ or w/o augmentations.""" + assert isinstance(imgs, list) and isinstance(img_metas, list) + assert len(imgs) == len(img_metas) + img_per_gpu = imgs[0].size(0) + assert img_per_gpu == 1 + if len(imgs) == 1: + return self.simple_test(imgs[0], img_metas[0], rescale) + else: + return self.aug_test(imgs, img_metas, rescale) + + def simple_test(self, img, img_meta, rescale=False): + img_shapes = img_meta['shape_scale'] + # get feature maps + x = self.backbone(img) + if self.neck is not None: + x = self.neck(x) + rpn_outs = self.rpn_head(x) + proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg) + proposals = self.rpn_head.get_proposals(*proposal_inputs)[0] + if rescale: + proposals[:, :4] /= img_shapes[0][-1] + return proposals.cpu().numpy() + + def aug_test(self, imgs, img_metas, rescale=False): + aug_proposals = [] + for img, img_meta in zip(imgs, img_metas): + x = self.backbone(img) + if self.neck is not None: + x = self.neck(x) + rpn_outs = self.rpn_head(x) + proposal_inputs = rpn_outs + (img_meta['shape_scale'], + self.rpn_test_cfg) + proposal_list = self.rpn_head.get_proposals(*proposal_inputs) + assert len(proposal_list) == 1 + aug_proposals.append(proposal_list[0]) # len(proposal_list) = 1 + merged_proposals = merge_aug_proposals(aug_proposals, img_metas, + self.rpn_test_cfg) + if not rescale: + img_shape = img_metas[0]['shape_scale'][0] + flip = img_metas[0]['flip'][0] + merged_proposals[:, :4] = bbox_mapping(merged_proposals[:, :4], + img_shape, flip) + return merged_proposals.cpu().numpy() diff --git a/mmdet/models/detectors/two_stage.py b/mmdet/models/detectors/two_stage.py new file mode 100644 index 00000000000..0c057d606fb --- /dev/null +++ b/mmdet/models/detectors/two_stage.py @@ -0,0 +1,329 @@ +import torch +import torch.nn as nn + +from .. import builder +from mmdet.core.utils import tensor2imgs +from mmdet.core import (bbox2roi, bbox_mapping, split_combined_gt_polys, + bbox_sampling, multiclass_nms, merge_aug_proposals, + merge_aug_bboxes, merge_aug_masks, bbox2result) + + +class TwoStageDetector(nn.Module): + + def __init__(self, + backbone, + neck, + rpn_head, + roi_block, + bbox_head, + rpn_train_cfg, + rpn_test_cfg, + rcnn_train_cfg, + rcnn_test_cfg, + mask_block=None, + mask_head=None, + pretrained=None): + super(TwoStageDetector, self).__init__() + self.backbone = builder.build_backbone(backbone) + self.neck = builder.build_neck(neck) if neck is not None else None + self.rpn_head = builder.build_rpn_head(rpn_head) + self.bbox_roi_extractor = builder.build_roi_block(roi_block) + self.bbox_head = builder.build_bbox_head(bbox_head) + self.mask_roi_extractor = builder.build_roi_block(mask_block) if ( + mask_block is not None) else None + self.mask_head = builder.build_mask_head(mask_head) if ( + mask_head is not None) else None + self.with_mask = False if self.mask_head is None else True + + self.rpn_train_cfg = rpn_train_cfg + self.rpn_test_cfg = rpn_test_cfg + self.rcnn_train_cfg = rcnn_train_cfg + self.rcnn_test_cfg = rcnn_test_cfg + self.init_weights(pretrained=pretrained) + + def init_weights(self, pretrained=None): + if pretrained is not None: + print('load model from: {}'.format(pretrained)) + self.backbone.init_weights(pretrained=pretrained) + if self.neck is not None: + if isinstance(self.neck, nn.Sequential): + for m in self.neck: + m.init_weights() + else: + self.neck.init_weights() + self.rpn_head.init_weights() + self.bbox_roi_extractor.init_weights() + self.bbox_head.init_weights() + if self.mask_roi_extractor is not None: + self.mask_roi_extractor.init_weights() + if self.mask_head is not None: + self.mask_head.init_weights() + + def forward(self, + img, + img_meta, + gt_bboxes=None, + gt_labels=None, + gt_ignore=None, + gt_polys=None, + gt_poly_lens=None, + num_polys_per_mask=None, + return_loss=True, + return_bboxes=False, + rescale=False): + if not return_loss: + return self.test(img, img_meta, rescale) + + if not self.with_mask: + assert (gt_polys is None and gt_poly_lens is None + and num_polys_per_mask is None) + else: + assert (gt_polys is not None and gt_poly_lens is not None + and num_polys_per_mask is not None) + gt_polys = split_combined_gt_polys(gt_polys, gt_poly_lens, + num_polys_per_mask) + + if self.rpn_train_cfg.get('debug', False): + self.rpn_head.debug_imgs = tensor2imgs(img) + if self.rcnn_train_cfg.get('debug', False): + self.bbox_head.debug_imgs = tensor2imgs(img) + if self.mask_head is not None: + self.mask_head.debug_imgs = tensor2imgs(img) + + img_shapes = img_meta['shape_scale'] + + x = self.backbone(img) + if self.neck is not None: + x = self.neck(x) + + rpn_outs = self.rpn_head(x) + proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg) + proposal_list = self.rpn_head.get_proposals(*proposal_inputs) + + (pos_inds, neg_inds, pos_proposals, neg_proposals, + pos_assigned_gt_inds, pos_gt_bboxes, pos_gt_labels) = bbox_sampling( + proposal_list, gt_bboxes, gt_ignore, gt_labels, + self.rcnn_train_cfg) + + labels, label_weights, bbox_targets, bbox_weights = \ + self.bbox_head.proposal_target( + pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels, + self.rcnn_train_cfg) + + rois = bbox2roi([ + torch.cat([pos, neg], dim=0) + for pos, neg in zip(pos_proposals, neg_proposals) + ]) + # TODO: a more flexible way to configurate feat maps + roi_feats = self.bbox_roi_extractor( + x[:self.bbox_roi_extractor.num_inputs], rois) + cls_score, bbox_pred = self.bbox_head(roi_feats) + + losses = dict() + rpn_loss_inputs = rpn_outs + (gt_bboxes, img_shapes, + self.rpn_train_cfg) + rpn_losses = self.rpn_head.loss(*rpn_loss_inputs) + losses.update(rpn_losses) + + loss_bbox = self.bbox_head.loss(cls_score, bbox_pred, labels, + label_weights, bbox_targets, + bbox_weights) + losses.update(loss_bbox) + + if self.with_mask: + mask_targets = self.mask_head.mask_target( + pos_proposals, pos_assigned_gt_inds, gt_polys, img_shapes, + self.rcnn_train_cfg) + pos_rois = bbox2roi(pos_proposals) + mask_feats = self.mask_roi_extractor( + x[:self.mask_roi_extractor.num_inputs], pos_rois) + mask_pred = self.mask_head(mask_feats) + losses['loss_mask'] = self.mask_head.loss(mask_pred, mask_targets, + torch.cat(pos_gt_labels)) + return losses + + def test(self, imgs, img_metas, rescale=False): + """Test w/ or w/o augmentations.""" + assert isinstance(imgs, list) and isinstance(img_metas, list) + assert len(imgs) == len(img_metas) + img_per_gpu = imgs[0].size(0) + assert img_per_gpu == 1 + if len(imgs) == 1: + return self.simple_test(imgs[0], img_metas[0], rescale) + else: + return self.aug_test(imgs, img_metas, rescale) + + def simple_test_bboxes(self, x, img_meta, rescale=False): + """Test only det bboxes without augmentation.""" + + img_shapes = img_meta['shape_scale'] + rpn_outs = self.rpn_head(x) + proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg) + proposal_list = self.rpn_head.get_proposals(*proposal_inputs) + + rois = bbox2roi(proposal_list) + roi_feats = self.bbox_roi_extractor( + x[:len(self.bbox_roi_extractor.featmap_strides)], rois) + cls_score, bbox_pred = self.bbox_head(roi_feats) + # image shape of the first image in the batch (only one) + img_shape = img_shapes[0] + det_bboxes, det_labels = self.bbox_head.get_det_bboxes( + rois, + cls_score, + bbox_pred, + img_shape, + rescale=rescale, + nms_cfg=self.rcnn_test_cfg) + return det_bboxes, det_labels + + def simple_test_mask(self, + x, + img_meta, + det_bboxes, + det_labels, + rescale=False): + # image shape of the first image in the batch (only one) + img_shape = img_meta['shape_scale'][0] + if det_bboxes.shape[0] == 0: + segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] + else: + # if det_bboxes is rescaled to the original image size, we need to + # rescale it back to the testing scale to obtain RoIs. + _bboxes = (det_bboxes[:, :4] * img_shape[-1] + if rescale else det_bboxes) + mask_rois = bbox2roi([_bboxes]) + mask_feats = self.mask_roi_extractor( + x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois) + mask_pred = self.mask_head(mask_feats) + segm_result = self.mask_head.get_seg_masks( + mask_pred, det_bboxes, det_labels, img_shape, + self.rcnn_test_cfg, rescale) + return segm_result + + def simple_test(self, img, img_meta, rescale=False): + """Test without augmentation.""" + # get feature maps + x = self.backbone(img) + if self.neck is not None: + x = self.neck(x) + det_bboxes, det_labels = self.simple_test_bboxes( + x, img_meta, rescale=rescale) + bbox_result = bbox2result(det_bboxes, det_labels, + self.bbox_head.num_classes) + if not self.with_mask: + return bbox_result + + segm_result = self.simple_test_mask( + x, img_meta, det_bboxes, det_labels, rescale=rescale) + + return bbox_result, segm_result + + def aug_test_bboxes(self, imgs, img_metas): + """Test with augmentations for det bboxes.""" + # step 1: get RPN proposals for augmented images, apply NMS to the + # union of all proposals. + aug_proposals = [] + for img, img_meta in zip(imgs, img_metas): + x = self.backbone(img) + if self.neck is not None: + x = self.neck(x) + rpn_outs = self.rpn_head(x) + proposal_inputs = rpn_outs + (img_meta['shape_scale'], + self.rpn_test_cfg) + proposal_list = self.rpn_head.get_proposals(*proposal_inputs) + assert len(proposal_list) == 1 + aug_proposals.append(proposal_list[0]) # len(proposal_list) = 1 + # after merging, proposals will be rescaled to the original image size + merged_proposals = merge_aug_proposals(aug_proposals, img_metas, + self.rpn_test_cfg) + # step 2: Given merged proposals, predict bboxes for augmented images, + # output the union of these bboxes. + aug_bboxes = [] + aug_scores = [] + for img, img_meta in zip(imgs, img_metas): + # only one image in the batch + img_shape = img_meta['shape_scale'][0] + flip = img_meta['flip'][0] + proposals = bbox_mapping(merged_proposals[:, :4], img_shape, flip) + rois = bbox2roi([proposals]) + # recompute feature maps to save GPU memory + x = self.backbone(img) + if self.neck is not None: + x = self.neck(x) + roi_feats = self.bbox_roi_extractor( + x[:len(self.bbox_roi_extractor.featmap_strides)], rois) + cls_score, bbox_pred = self.bbox_head(roi_feats) + bboxes, scores = self.bbox_head.get_det_bboxes( + rois, + cls_score, + bbox_pred, + img_shape, + rescale=False, + nms_cfg=None) + aug_bboxes.append(bboxes) + aug_scores.append(scores) + # after merging, bboxes will be rescaled to the original image size + merged_bboxes, merged_scores = merge_aug_bboxes( + aug_bboxes, aug_scores, img_metas, self.rcnn_test_cfg) + det_bboxes, det_labels = multiclass_nms( + merged_bboxes, merged_scores, self.rcnn_test_cfg.score_thr, + self.rcnn_test_cfg.nms_thr, self.rcnn_test_cfg.max_per_img) + return det_bboxes, det_labels + + def aug_test_mask(self, + imgs, + img_metas, + det_bboxes, + det_labels, + rescale=False): + # step 3: Given merged bboxes, predict masks for augmented images, + # scores of masks are averaged across augmented images. + if rescale: + _det_bboxes = det_bboxes + else: + _det_bboxes = det_bboxes.clone() + _det_bboxes[:, :4] *= img_metas[0]['shape_scale'][0][-1] + if det_bboxes.shape[0] == 0: + segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] + else: + aug_masks = [] + for img, img_meta in zip(imgs, img_metas): + img_shape = img_meta['shape_scale'][0] + flip = img_meta['flip'][0] + _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, flip) + mask_rois = bbox2roi([_bboxes]) + x = self.backbone(img) + if self.neck is not None: + x = self.neck(x) + mask_feats = self.mask_roi_extractor( + x[:len(self.mask_roi_extractor.featmap_strides)], + mask_rois) + mask_pred = self.mask_head(mask_feats) + # convert to numpy array to save memory + aug_masks.append(mask_pred.sigmoid().cpu().numpy()) + merged_masks = merge_aug_masks(aug_masks, img_metas, + self.rcnn_test_cfg) + segm_result = self.mask_head.get_seg_masks( + merged_masks, _det_bboxes, det_labels, + img_metas[0]['shape_scale'][0], self.rcnn_test_cfg, rescale) + return segm_result + + def aug_test(self, imgs, img_metas, rescale=False): + """Test with augmentations. + If rescale is False, then returned bboxes and masks will fit the scale + if imgs[0]. + """ + # aug test det bboxes + det_bboxes, det_labels = self.aug_test_bboxes(imgs, img_metas) + if rescale: + _det_bboxes = det_bboxes + else: + _det_bboxes = det_bboxes.clone() + _det_bboxes[:, :4] *= img_metas[0]['shape_scale'][0][-1] + bbox_result = bbox2result(_det_bboxes, det_labels, + self.bbox_head.num_classes) + if not self.with_mask: + return bbox_result + segm_result = self.aug_test_mask( + imgs, img_metas, det_bboxes, det_labels, rescale=rescale) + return bbox_result, segm_result diff --git a/mmdet/models/mask_heads/__init__.py b/mmdet/models/mask_heads/__init__.py new file mode 100644 index 00000000000..a21ae9add5a --- /dev/null +++ b/mmdet/models/mask_heads/__init__.py @@ -0,0 +1,3 @@ +from .fcn_mask_head import FCNMaskHead + +__all__ = ['FCNMaskHead'] diff --git a/mmdet/models/mask_heads/fcn_mask_head.py b/mmdet/models/mask_heads/fcn_mask_head.py new file mode 100644 index 00000000000..28865a68f00 --- /dev/null +++ b/mmdet/models/mask_heads/fcn_mask_head.py @@ -0,0 +1,175 @@ +import mmcv +import numpy as np +import pycocotools.mask as mask_util +import torch +import torch.nn as nn +import torch.utils.checkpoint as cp + +from ..common import ConvModule +from mmdet.core import mask_target, mask_cross_entropy + + +class FCNMaskHead(nn.Module): + + def __init__(self, + num_convs=4, + roi_feat_size=14, + in_channels=256, + conv_kernel_size=3, + conv_out_channels=256, + upsample_method='deconv', + upsample_ratio=2, + num_classes=81, + class_agnostic=False, + with_cp=False, + normalize=None): + super(FCNMaskHead, self).__init__() + if upsample_method not in [None, 'deconv', 'nearest', 'bilinear']: + raise ValueError( + 'Invalid upsample method {}, accepted methods ' + 'are "deconv", "nearest", "bilinear"'.format(upsample_method)) + self.num_convs = num_convs + self.roi_feat_size = roi_feat_size # WARN: not used and reserved + self.in_channels = in_channels + self.conv_kernel_size = conv_kernel_size + self.conv_out_channels = conv_out_channels + self.upsample_method = upsample_method + self.upsample_ratio = upsample_ratio + self.num_classes = num_classes + self.class_agnostic = class_agnostic + self.normalize = normalize + self.with_bias = normalize is None + self.with_cp = with_cp + + self.convs = nn.ModuleList() + for i in range(self.num_convs): + in_channels = (self.in_channels + if i == 0 else self.conv_out_channels) + padding = (self.conv_kernel_size - 1) // 2 + self.convs.append( + ConvModule( + in_channels, + self.conv_out_channels, + 3, + padding=padding, + normalize=normalize, + bias=self.with_bias)) + if self.upsample_method is None: + self.upsample = None + elif self.upsample_method == 'deconv': + self.upsample = nn.ConvTranspose2d( + self.conv_out_channels, + self.conv_out_channels, + self.upsample_ratio, + stride=self.upsample_ratio) + else: + self.upsample = nn.Upsample( + scale_factor=self.upsample_ratio, mode=self.upsample_method) + + out_channels = 1 if self.class_agnostic else self.num_classes + self.conv_logits = nn.Conv2d(self.conv_out_channels, out_channels, 1) + self.relu = nn.ReLU(inplace=True) + self.debug_imgs = None + + def init_weights(self): + for m in [self.upsample, self.conv_logits]: + if m is None: + continue + nn.init.kaiming_normal_( + m.weight, mode='fan_out', nonlinearity='relu') + nn.init.constant_(m.bias, 0) + + def convs_forward(self, x): + + def m_lvl_convs_forward(x): + for conv in self.convs[1:-1]: + x = conv(x) + return x + + if self.num_convs > 0: + x = self.convs[0](x) + if self.num_convs > 1: + if self.with_cp and x.requires_grad: + x = cp.checkpoint(m_lvl_convs_forward, x) + else: + x = m_lvl_convs_forward(x) + x = self.convs[-1](x) + return x + + def forward(self, x): + x = self.convs_forward(x) + if self.upsample is not None: + x = self.upsample(x) + if self.upsample_method == 'deconv': + x = self.relu(x) + mask_pred = self.conv_logits(x) + return mask_pred + + def mask_target(self, pos_proposals, pos_assigned_gt_inds, gt_masks, + img_shapes, rcnn_train_cfg): + mask_targets = mask_target(pos_proposals, pos_assigned_gt_inds, + gt_masks, img_shapes, rcnn_train_cfg) + return mask_targets + + def loss(self, mask_pred, mask_targets, labels): + loss_mask = mask_cross_entropy(mask_pred, mask_targets, labels) + return loss_mask + + def get_seg_masks(self, + mask_pred, + det_bboxes, + det_labels, + img_shape, + rcnn_test_cfg, + ori_scale, + rescale=True): + """Get segmentation masks from mask_pred and bboxes + Args: + mask_pred (Tensor or ndarray): shape (n, #class+1, h, w). + For single-scale testing, mask_pred is the direct output of + model, whose type is Tensor, while for multi-scale testing, + it will be converted to numpy array outside of this method. + det_bboxes (Tensor): shape (n, 4/5) + det_labels (Tensor): shape (n, ) + img_shape (Tensor): shape (3, ) + rcnn_test_cfg (dict): rcnn testing config + rescale (bool): whether rescale masks to original image size + Returns: + list[list]: encoded masks + """ + if isinstance(mask_pred, torch.Tensor): + mask_pred = mask_pred.sigmoid().cpu().numpy() + assert isinstance(mask_pred, np.ndarray) + cls_segms = [[] for _ in range(self.num_classes - 1)] + bboxes = det_bboxes.cpu().numpy()[:, :4] + labels = det_labels.cpu().numpy() + 1 + scale_factor = img_shape[-1] if rescale else 1.0 + img_h = ori_scale['height'] if rescale else np.round( + ori_scale['height'].item() * img_shape[-1].item()).astype(np.int32) + img_w = ori_scale['width'] if rescale else np.round( + ori_scale['width'].item() * img_shape[-1].item()).astype(np.int32) + + for i in range(bboxes.shape[0]): + bbox = (bboxes[i, :] / float(scale_factor)).astype(int) + label = labels[i] + w = bbox[2] - bbox[0] + 1 + h = bbox[3] - bbox[1] + 1 + w = max(w, 1) + h = max(h, 1) + + if not self.class_agnostic: + mask_pred_ = mask_pred[i, label, :, :] + else: + mask_pred_ = mask_pred[i, 0, :, :] + + im_mask = np.zeros((img_h, img_w), dtype=np.float32) + + im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = mmcv.resize( + mask_pred_, (w, h)) + # im_mask = cv2.resize(im_mask, (img_w, img_h)) + im_mask = np.array( + im_mask > rcnn_test_cfg.mask_thr_binary, dtype=np.uint8) + rle = mask_util.encode( + np.array(im_mask[:, :, np.newaxis], order='F'))[0] + cls_segms[label - 1].append(rle) + return cls_segms diff --git a/mmdet/models/misc.py b/mmdet/models/misc.py new file mode 100644 index 00000000000..ad52b587ac1 --- /dev/null +++ b/mmdet/models/misc.py @@ -0,0 +1,9 @@ +from functools import partial + +from six.moves import map, zip + + +def multi_apply(func, *args, **kwargs): + pfunc = partial(func, **kwargs) if kwargs else func + map_results = map(pfunc, *args) + return tuple(map(list, zip(*map_results))) diff --git a/mmdet/models/necks/__init__.py b/mmdet/models/necks/__init__.py new file mode 100644 index 00000000000..0093021ebac --- /dev/null +++ b/mmdet/models/necks/__init__.py @@ -0,0 +1,3 @@ +from .fpn import FPN + +__all__ = ['FPN'] diff --git a/mmdet/models/necks/fpn.py b/mmdet/models/necks/fpn.py new file mode 100644 index 00000000000..c4734e18621 --- /dev/null +++ b/mmdet/models/necks/fpn.py @@ -0,0 +1,125 @@ +import torch.nn as nn +import torch.nn.functional as F +from ..common import ConvModule +from ..weight_init import xavier_init + + +class FPN(nn.Module): + + def __init__(self, + in_channels, + out_channels, + num_outs, + start_level=0, + end_level=-1, + add_extra_convs=False, + normalize=None, + activation=None): + super(FPN, self).__init__() + assert isinstance(in_channels, list) + self.in_channels = in_channels + self.out_channels = out_channels + self.num_ins = len(in_channels) + self.num_outs = num_outs + self.activation = activation + self.with_bias = normalize is None + + if end_level == -1: + self.backbone_end_level = self.num_ins + assert num_outs >= self.num_ins - start_level + else: + # if end_level < inputs, no extra level is allowed + self.backbone_end_level = end_level + assert end_level <= len(in_channels) + assert num_outs == end_level - start_level + self.start_level = start_level + self.end_level = end_level + self.add_extra_convs = add_extra_convs + + self.lateral_convs = nn.ModuleList() + self.fpn_convs = nn.ModuleList() + + for i in range(self.start_level, self.backbone_end_level): + l_conv = ConvModule( + in_channels[i], + out_channels, + 1, + normalize=normalize, + bias=self.with_bias, + activation=self.activation, + inplace=False) + fpn_conv = ConvModule( + out_channels, + out_channels, + 3, + padding=1, + normalize=normalize, + bias=self.with_bias, + activation=self.activation, + inplace=False) + + self.lateral_convs.append(l_conv) + self.fpn_convs.append(fpn_conv) + + # lvl_id = i - self.start_level + # setattr(self, 'lateral_conv{}'.format(lvl_id), l_conv) + # setattr(self, 'fpn_conv{}'.format(lvl_id), fpn_conv) + + # add extra conv layers (e.g., RetinaNet) + extra_levels = num_outs - self.backbone_end_level + self.start_level + if add_extra_convs and extra_levels >= 1: + for i in range(extra_levels): + in_channels = (self.in_channels[self.backbone_end_level - 1] + if i == 0 else out_channels) + extra_fpn_conv = ConvModule( + in_channels, + out_channels, + 3, + stride=2, + padding=1, + normalize=normalize, + bias=self.with_bias, + activation=self.activation, + inplace=False) + self.fpn_convs.append(extra_fpn_conv) + + # default init_weights for conv(msra) and norm in ConvModule + def init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + xavier_init(m, distribution='uniform') + + def forward(self, inputs): + assert len(inputs) == len(self.in_channels) + + # build laterals + laterals = [ + lateral_conv(inputs[i + self.start_level]) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + + # build top-down path + used_backbone_levels = len(laterals) + for i in range(used_backbone_levels - 1, 0, -1): + laterals[i - 1] += F.upsample( + laterals[i], scale_factor=2, mode='nearest') + + # build outputs + # part 1: from original levels + outs = [ + self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) + ] + # part 2: add extra levels + if self.num_outs > len(outs): + # use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN) + if not self.add_extra_convs: + for i in range(self.num_outs - used_backbone_levels): + outs.append(F.max_pool2d(outs[-1], 1, stride=2)) + # add conv layers on top of original feature maps (RetinaNet) + else: + orig = inputs[self.backbone_end_level - 1] + outs.append(self.fpn_convs[used_backbone_levels](orig)) + for i in range(used_backbone_levels + 1, self.num_outs): + # BUG: we should add relu before each extra conv + outs.append(self.fpn_convs[i](outs[-1])) + return tuple(outs) diff --git a/mmdet/models/roi_extractors/__init__.py b/mmdet/models/roi_extractors/__init__.py new file mode 100644 index 00000000000..e76e689753f --- /dev/null +++ b/mmdet/models/roi_extractors/__init__.py @@ -0,0 +1,3 @@ +from .single_level import SingleLevelRoI + +__all__ = ['SingleLevelRoI'] diff --git a/mmdet/models/roi_extractors/single_level.py b/mmdet/models/roi_extractors/single_level.py new file mode 100644 index 00000000000..3e37ac83d6f --- /dev/null +++ b/mmdet/models/roi_extractors/single_level.py @@ -0,0 +1,73 @@ +from __future__ import division + +import torch +import torch.nn as nn + +from mmdet import ops + + +class SingleLevelRoI(nn.Module): + """Extract RoI features from a single level feature map. Each RoI is + mapped to a level according to its scale.""" + + def __init__(self, + roi_layer, + out_channels, + featmap_strides, + finest_scale=56): + super(SingleLevelRoI, self).__init__() + self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides) + self.out_channels = out_channels + self.featmap_strides = featmap_strides + self.finest_scale = finest_scale + + @property + def num_inputs(self): + return len(self.featmap_strides) + + def init_weights(self): + pass + + def build_roi_layers(self, layer_cfg, featmap_strides): + cfg = layer_cfg.copy() + layer_type = cfg.pop('type') + assert hasattr(ops, layer_type) + layer_cls = getattr(ops, layer_type) + roi_layers = nn.ModuleList( + [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides]) + return roi_layers + + def map_roi_levels(self, rois, num_levels): + """Map rois to corresponding feature levels (0-based) by scales. + + scale < finest_scale: level 0 + finest_scale <= scale < finest_scale * 2: level 1 + finest_scale * 2 <= scale < finest_scale * 4: level 2 + scale >= finest_scale * 4: level 3 + """ + scale = torch.sqrt( + (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1)) + target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6)) + target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long() + return target_lvls + + def forward(self, feats, rois): + """Extract roi features with the roi layer. If multiple feature levels + are used, then rois are mapped to corresponding levels according to + their scales. + """ + if len(feats) == 1: + return self.roi_layers[0](feats[0], rois) + + out_size = self.roi_layers[0].out_size + num_levels = len(feats) + target_lvls = self.map_roi_levels(rois, num_levels) + roi_feats = torch.cuda.FloatTensor(rois.size()[0], self.out_channels, + out_size, out_size).fill_(0) + for i in range(num_levels): + inds = target_lvls == i + if inds.any(): + rois_ = rois[inds, :] + roi_feats_t = self.roi_layers[i](feats[i], rois_) + roi_feats[inds] += roi_feats_t + return roi_feats diff --git a/mmdet/models/rpn_heads/__init__.py b/mmdet/models/rpn_heads/__init__.py new file mode 100644 index 00000000000..fbc4b3affbf --- /dev/null +++ b/mmdet/models/rpn_heads/__init__.py @@ -0,0 +1,3 @@ +from .rpn_head import RPNHead + +__all__ = ['RPNHead'] diff --git a/mmdet/models/rpn_heads/rpn_head.py b/mmdet/models/rpn_heads/rpn_head.py new file mode 100644 index 00000000000..f2fce9ebe7a --- /dev/null +++ b/mmdet/models/rpn_heads/rpn_head.py @@ -0,0 +1,237 @@ +from __future__ import division + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from mmdet.core import (AnchorGenerator, anchor_target, bbox_transform_inv, + weighted_cross_entropy, weighted_smoothl1, + weighted_binary_cross_entropy) +from mmdet.ops import nms +from ..misc import multi_apply +from ..weight_init import normal_init + + +class RPNHead(nn.Module): + + def __init__(self, + in_channels, + feat_channels=512, + coarsest_stride=32, + anchor_scales=[8, 16, 32], + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + anchor_base_sizes=None, + target_means=(.0, .0, .0, .0), + target_stds=(1.0, 1.0, 1.0, 1.0), + use_sigmoid_cls=False): + super(RPNHead, self).__init__() + self.in_channels = in_channels + self.feat_channels = feat_channels + self.coarsest_stride = coarsest_stride + self.anchor_scales = anchor_scales + self.anchor_ratios = anchor_ratios + self.anchor_strides = anchor_strides + self.anchor_base_sizes = anchor_strides.copy( + ) if anchor_base_sizes is None else anchor_base_sizes + self.target_means = target_means + self.target_stds = target_stds + self.use_sigmoid_cls = use_sigmoid_cls + + self.anchor_generators = [] + for anchor_base in self.anchor_base_sizes: + self.anchor_generators.append( + AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) + self.rpn_conv = nn.Conv2d(in_channels, feat_channels, 3, padding=1) + self.relu = nn.ReLU(inplace=True) + self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) + out_channels = (self.num_anchors + if self.use_sigmoid_cls else self.num_anchors * 2) + self.rpn_cls = nn.Conv2d(feat_channels, out_channels, 1) + self.rpn_reg = nn.Conv2d(feat_channels, self.num_anchors * 4, 1) + self.debug_imgs = None + + def init_weights(self): + normal_init(self.rpn_conv, std=0.01) + normal_init(self.rpn_cls, std=0.01) + normal_init(self.rpn_reg, std=0.01) + + def forward_single(self, x): + rpn_feat = self.relu(self.rpn_conv(x)) + rpn_cls_score = self.rpn_cls(rpn_feat) + rpn_bbox_pred = self.rpn_reg(rpn_feat) + return rpn_cls_score, rpn_bbox_pred + + def forward(self, feats): + return multi_apply(self.forward_single, feats) + + def get_anchors(self, featmap_sizes, img_shapes): + """Get anchors given a list of feature map sizes, and get valid flags + at the same time. (Extra padding regions should be marked as invalid) + """ + # calculate actual image shapes + padded_img_shapes = [] + for img_shape in img_shapes: + h, w = img_shape[:2] + padded_h = int( + np.ceil(h / self.coarsest_stride) * self.coarsest_stride) + padded_w = int( + np.ceil(w / self.coarsest_stride) * self.coarsest_stride) + padded_img_shapes.append((padded_h, padded_w)) + # generate anchors for different feature levels + # len = feature levels + anchor_list = [] + # len = imgs per gpu + valid_flag_list = [[] for _ in range(len(img_shapes))] + for i in range(len(featmap_sizes)): + anchor_stride = self.anchor_strides[i] + anchors = self.anchor_generators[i].grid_anchors( + featmap_sizes[i], anchor_stride) + anchor_list.append(anchors) + # for each image in this feature level, get valid flags + featmap_size = featmap_sizes[i] + for img_id, (h, w) in enumerate(padded_img_shapes): + valid_feat_h = min( + int(np.ceil(h / anchor_stride)), featmap_size[0]) + valid_feat_w = min( + int(np.ceil(w / anchor_stride)), featmap_size[1]) + flags = self.anchor_generators[i].valid_flags( + featmap_size, (valid_feat_h, valid_feat_w)) + valid_flag_list[img_id].append(flags) + return anchor_list, valid_flag_list + + def loss_single(self, rpn_cls_score, rpn_bbox_pred, labels, label_weights, + bbox_targets, bbox_weights, num_total_samples, cfg): + labels = labels.contiguous().view(-1) + label_weights = label_weights.contiguous().view(-1) + bbox_targets = bbox_targets.contiguous().view(-1, 4) + bbox_weights = bbox_weights.contiguous().view(-1, 4) + if self.use_sigmoid_cls: + rpn_cls_score = rpn_cls_score.permute(0, 2, 3, + 1).contiguous().view(-1) + loss_cls = weighted_binary_cross_entropy( + rpn_cls_score, + labels, + label_weights, + ave_factor=num_total_samples) + else: + rpn_cls_score = rpn_cls_score.permute(0, 2, 3, + 1).contiguous().view(-1, 2) + loss_cls = weighted_cross_entropy( + rpn_cls_score, + labels, + label_weights, + ave_factor=num_total_samples) + rpn_bbox_pred = rpn_bbox_pred.permute(0, 2, 3, 1).contiguous().view( + -1, 4) + loss_reg = weighted_smoothl1( + rpn_bbox_pred, + bbox_targets, + bbox_weights, + beta=cfg.smoothl1_beta, + ave_factor=num_total_samples) + return loss_cls, loss_reg + + def loss(self, rpn_cls_scores, rpn_bbox_preds, gt_bboxes, img_shapes, cfg): + featmap_sizes = [featmap.size()[-2:] for featmap in rpn_cls_scores] + assert len(featmap_sizes) == len(self.anchor_generators) + + anchor_list, valid_flag_list = self.get_anchors( + featmap_sizes, img_shapes) + cls_reg_targets = anchor_target( + anchor_list, valid_flag_list, featmap_sizes, gt_bboxes, img_shapes, + self.target_means, self.target_stds, cfg) + if cls_reg_targets is None: + return None + (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, + num_total_samples) = cls_reg_targets + losses_cls, losses_reg = multi_apply( + self.loss_single, + rpn_cls_scores, + rpn_bbox_preds, + labels_list, + label_weights_list, + bbox_targets_list, + bbox_weights_list, + num_total_samples=num_total_samples, + cfg=cfg) + return dict(loss_rpn_cls=losses_cls, loss_rpn_reg=losses_reg) + + def get_proposals(self, rpn_cls_scores, rpn_bbox_preds, img_shapes, cfg): + img_per_gpu = len(img_shapes) + featmap_sizes = [featmap.size()[-2:] for featmap in rpn_cls_scores] + mlvl_anchors = [ + self.anchor_generators[idx].grid_anchors(featmap_sizes[idx], + self.anchor_strides[idx]) + for idx in range(len(featmap_sizes)) + ] + proposal_list = [] + for img_id in range(img_per_gpu): + rpn_cls_score_list = [ + rpn_cls_scores[idx][img_id].detach() + for idx in range(len(rpn_cls_scores)) + ] + rpn_bbox_pred_list = [ + rpn_bbox_preds[idx][img_id].detach() + for idx in range(len(rpn_bbox_preds)) + ] + assert len(rpn_cls_score_list) == len(rpn_bbox_pred_list) + img_shape = img_shapes[img_id] + proposals = self._get_proposals_single( + rpn_cls_score_list, rpn_bbox_pred_list, mlvl_anchors, + img_shape, cfg) + proposal_list.append(proposals) + return proposal_list + + def _get_proposals_single(self, rpn_cls_scores, rpn_bbox_preds, + mlvl_anchors, img_shape, cfg): + mlvl_proposals = [] + for idx in range(len(rpn_cls_scores)): + rpn_cls_score = rpn_cls_scores[idx] + rpn_bbox_pred = rpn_bbox_preds[idx] + assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] + anchors = mlvl_anchors[idx] + if self.use_sigmoid_cls: + rpn_cls_score = rpn_cls_score.permute(1, 2, + 0).contiguous().view(-1) + rpn_cls_prob = F.sigmoid(rpn_cls_score) + scores = rpn_cls_prob + else: + rpn_cls_score = rpn_cls_score.permute(1, 2, + 0).contiguous().view( + -1, 2) + rpn_cls_prob = F.softmax(rpn_cls_score, dim=1) + scores = rpn_cls_prob[:, 1] + rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).contiguous().view( + -1, 4) + _, order = scores.sort(0, descending=True) + if cfg.nms_pre > 0: + order = order[:cfg.nms_pre] + rpn_bbox_pred = rpn_bbox_pred[order, :] + anchors = anchors[order, :] + scores = scores[order] + proposals = bbox_transform_inv(anchors, rpn_bbox_pred, + self.target_means, self.target_stds, + img_shape) + w = proposals[:, 2] - proposals[:, 0] + 1 + h = proposals[:, 3] - proposals[:, 1] + 1 + valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & + (h >= cfg.min_bbox_size)).squeeze() + proposals = proposals[valid_inds, :] + scores = scores[valid_inds] + proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) + nms_keep = nms(proposals, cfg.nms_thr)[:cfg.nms_post] + proposals = proposals[nms_keep, :] + mlvl_proposals.append(proposals) + proposals = torch.cat(mlvl_proposals, 0) + if cfg.nms_across_levels: + nms_keep = nms(proposals, cfg.nms_thr)[:cfg.max_num] + proposals = proposals[nms_keep, :] + else: + scores = proposals[:, 4] + _, order = scores.sort(0, descending=True) + num = min(cfg.max_num, proposals.shape[0]) + order = order[:num] + proposals = proposals[order, :] + return proposals diff --git a/mmdet/models/weight_init.py b/mmdet/models/weight_init.py new file mode 100644 index 00000000000..2e9b13b4fbc --- /dev/null +++ b/mmdet/models/weight_init.py @@ -0,0 +1,39 @@ +import torch.nn as nn + + +def xavier_init(module, gain=1, bias=0, distribution='normal'): + assert distribution in ['uniform', 'normal'] + if distribution == 'uniform': + nn.init.xavier_uniform_(module.weight, gain=gain) + else: + nn.init.xavier_normal_(module.weight, gain=gain) + if hasattr(module, 'bias'): + nn.init.constant_(module.bias, bias) + + +def normal_init(module, mean=0, std=1, bias=0): + nn.init.normal_(module.weight, mean, std) + if hasattr(module, 'bias'): + nn.init.constant_(module.bias, bias) + + +def uniform_init(module, a=0, b=1, bias=0): + nn.init.uniform_(module.weight, a, b) + if hasattr(module, 'bias'): + nn.init.constant_(module.bias, bias) + + +def kaiming_init(module, + mode='fan_out', + nonlinearity='relu', + bias=0, + distribution='normal'): + assert distribution in ['uniform', 'normal'] + if distribution == 'uniform': + nn.init.kaiming_uniform_( + module.weight, mode=mode, nonlinearity=nonlinearity) + else: + nn.init.kaiming_normal_( + module.weight, mode=mode, nonlinearity=nonlinearity) + if hasattr(module, 'bias'): + nn.init.constant_(module.bias, bias) diff --git a/mmdet/nn/__init__.py b/mmdet/nn/__init__.py new file mode 100644 index 00000000000..1b627f5e7b8 --- /dev/null +++ b/mmdet/nn/__init__.py @@ -0,0 +1 @@ +from .parallel import MMDataParallel, MMDistributedDataParallel diff --git a/mmdet/nn/parallel/__init__.py b/mmdet/nn/parallel/__init__.py new file mode 100644 index 00000000000..0ea0a58e4a5 --- /dev/null +++ b/mmdet/nn/parallel/__init__.py @@ -0,0 +1,7 @@ +from .data_parallel import MMDataParallel +from .distributed import MMDistributedDataParallel +from .scatter_gather import scatter, scatter_kwargs + +__all__ = [ + 'MMDataParallel', 'MMDistributedDataParallel', 'scatter', 'scatter_kwargs' +] diff --git a/mmdet/nn/parallel/_functions.py b/mmdet/nn/parallel/_functions.py new file mode 100644 index 00000000000..75bb954dce4 --- /dev/null +++ b/mmdet/nn/parallel/_functions.py @@ -0,0 +1,74 @@ +import torch +from torch.nn.parallel._functions import _get_stream + + +def scatter(input, devices, streams=None): + """Scatters tensor across multiple GPUs. + """ + if streams is None: + streams = [None] * len(devices) + + if isinstance(input, list): + chunk_size = (len(input) - 1) // len(devices) + 1 + outputs = [ + scatter(input[i], [devices[i // chunk_size]], + [streams[i // chunk_size]]) for i in range(len(input)) + ] + return outputs + elif isinstance(input, torch.Tensor): + output = input.contiguous() + # TODO: copy to a pinned buffer first (if copying from CPU) + stream = streams[0] if output.numel() > 0 else None + with torch.cuda.device(devices[0]), torch.cuda.stream(stream): + output = output.cuda(devices[0], non_blocking=True) + return output + else: + raise Exception('Unknown type {}.'.format(type(input))) + + +def synchronize_stream(output, devices, streams): + if isinstance(output, list): + chunk_size = len(output) // len(devices) + for i in range(len(devices)): + for j in range(chunk_size): + synchronize_stream(output[i * chunk_size + j], [devices[i]], + [streams[i]]) + elif isinstance(output, torch.Tensor): + if output.numel() != 0: + with torch.cuda.device(devices[0]): + main_stream = torch.cuda.current_stream() + main_stream.wait_stream(streams[0]) + output.record_stream(main_stream) + else: + raise Exception('Unknown type {}.'.format(type(output))) + + +def get_input_device(input): + if isinstance(input, list): + for item in input: + input_device = get_input_device(item) + if input_device != -1: + return input_device + return -1 + elif isinstance(input, torch.Tensor): + return input.get_device() if input.is_cuda else -1 + else: + raise Exception('Unknown type {}.'.format(type(input))) + + +class Scatter(object): + + @staticmethod + def forward(target_gpus, input): + input_device = get_input_device(input) + streams = None + if input_device == -1: + # Perform CPU to GPU copies in a background stream + streams = [_get_stream(device) for device in target_gpus] + + outputs = scatter(input, target_gpus, streams) + # Synchronize with the copy stream + if streams is not None: + synchronize_stream(outputs, target_gpus, streams) + + return tuple(outputs) diff --git a/mmdet/nn/parallel/data_parallel.py b/mmdet/nn/parallel/data_parallel.py new file mode 100644 index 00000000000..6735cb4afb7 --- /dev/null +++ b/mmdet/nn/parallel/data_parallel.py @@ -0,0 +1,9 @@ +from torch.nn.parallel import DataParallel + +from .scatter_gather import scatter_kwargs + + +class MMDataParallel(DataParallel): + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) diff --git a/mmdet/nn/parallel/distributed.py b/mmdet/nn/parallel/distributed.py new file mode 100644 index 00000000000..2809778ad93 --- /dev/null +++ b/mmdet/nn/parallel/distributed.py @@ -0,0 +1,9 @@ +from torch.nn.parallel import DistributedDataParallel + +from .scatter_gather import scatter_kwargs + + +class MMDistributedDataParallel(DistributedDataParallel): + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) diff --git a/mmdet/nn/parallel/scatter_gather.py b/mmdet/nn/parallel/scatter_gather.py new file mode 100644 index 00000000000..82511fd1db1 --- /dev/null +++ b/mmdet/nn/parallel/scatter_gather.py @@ -0,0 +1,48 @@ +import torch +from ._functions import Scatter +from torch.nn.parallel._functions import Scatter as OrigScatter +from detkit.datasets.utils import DataContainer + + +def scatter(inputs, target_gpus, dim=0): + """Scatter inputs to target gpus. + + The only difference from original :func:`scatter` is to add support for + :type:`~mmdet.DataContainer`. + """ + + def scatter_map(obj): + if isinstance(obj, torch.Tensor): + return OrigScatter.apply(target_gpus, None, dim, obj) + if isinstance(obj, DataContainer) and isinstance(obj.data, list): + return Scatter.forward(target_gpus, obj.data) + if isinstance(obj, tuple) and len(obj) > 0: + return list(zip(*map(scatter_map, obj))) + if isinstance(obj, list) and len(obj) > 0: + return list(map(list, zip(*map(scatter_map, obj)))) + if isinstance(obj, dict) and len(obj) > 0: + return list(map(type(obj), zip(*map(scatter_map, obj.items())))) + return [obj for targets in target_gpus] + + # After scatter_map is called, a scatter_map cell will exist. This cell + # has a reference to the actual function scatter_map, which has references + # to a closure that has a reference to the scatter_map cell (because the + # fn is recursive). To avoid this reference cycle, we set the function to + # None, clearing the cell + try: + return scatter_map(inputs) + finally: + scatter_map = None + + +def scatter_kwargs(inputs, kwargs, target_gpus, dim=0): + """Scatter with support for kwargs dictionary""" + inputs = scatter(inputs, target_gpus, dim) if inputs else [] + kwargs = scatter(kwargs, target_gpus, dim) if kwargs else [] + if len(inputs) < len(kwargs): + inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) + elif len(kwargs) < len(inputs): + kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) + inputs = tuple(inputs) + kwargs = tuple(kwargs) + return inputs, kwargs diff --git a/mmdet/ops/__init__.py b/mmdet/ops/__init__.py new file mode 100644 index 00000000000..52e5808016c --- /dev/null +++ b/mmdet/ops/__init__.py @@ -0,0 +1,3 @@ +from .nms import nms, soft_nms +from .roi_align import RoIAlign, roi_align +from .roi_pool import RoIPool, roi_pool diff --git a/mmdet/ops/nms/.gitignore b/mmdet/ops/nms/.gitignore new file mode 100644 index 00000000000..ce1da4c53c0 --- /dev/null +++ b/mmdet/ops/nms/.gitignore @@ -0,0 +1 @@ +*.cpp diff --git a/mmdet/ops/nms/Makefile b/mmdet/ops/nms/Makefile new file mode 100644 index 00000000000..39556dd28ba --- /dev/null +++ b/mmdet/ops/nms/Makefile @@ -0,0 +1,8 @@ +PYTHON=${PYTHON:-python} + +all: + echo "Compiling nms kernels..." + $(PYTHON) setup.py build_ext --inplace + +clean: + rm *.so diff --git a/mmdet/ops/nms/__init__.py b/mmdet/ops/nms/__init__.py new file mode 100644 index 00000000000..1cf8569b97b --- /dev/null +++ b/mmdet/ops/nms/__init__.py @@ -0,0 +1 @@ +from .nms_wrapper import nms, soft_nms diff --git a/mmdet/ops/nms/cpu_nms.pyx b/mmdet/ops/nms/cpu_nms.pyx new file mode 100644 index 00000000000..1d0bef3321d --- /dev/null +++ b/mmdet/ops/nms/cpu_nms.pyx @@ -0,0 +1,68 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +import numpy as np +cimport numpy as np + +cdef inline np.float32_t max(np.float32_t a, np.float32_t b): + return a if a >= b else b + +cdef inline np.float32_t min(np.float32_t a, np.float32_t b): + return a if a <= b else b + +def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): + cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] + cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] + cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] + cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] + cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] + + cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) + cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] + + cdef int ndets = dets.shape[0] + cdef np.ndarray[np.int_t, ndim=1] suppressed = \ + np.zeros((ndets), dtype=np.int) + + # nominal indices + cdef int _i, _j + # sorted indices + cdef int i, j + # temp variables for box i's (the box currently under consideration) + cdef np.float32_t ix1, iy1, ix2, iy2, iarea + # variables for computing overlap with box j (lower scoring box) + cdef np.float32_t xx1, yy1, xx2, yy2 + cdef np.float32_t w, h + cdef np.float32_t inter, ovr + + keep = [] + for _i in range(ndets): + i = order[_i] + if suppressed[i] == 1: + continue + keep.append(i) + ix1 = x1[i] + iy1 = y1[i] + ix2 = x2[i] + iy2 = y2[i] + iarea = areas[i] + for _j in range(_i + 1, ndets): + j = order[_j] + if suppressed[j] == 1: + continue + xx1 = max(ix1, x1[j]) + yy1 = max(iy1, y1[j]) + xx2 = min(ix2, x2[j]) + yy2 = min(iy2, y2[j]) + w = max(0.0, xx2 - xx1 + 1) + h = max(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (iarea + areas[j] - inter) + if ovr >= thresh: + suppressed[j] = 1 + + return keep diff --git a/mmdet/ops/nms/cpu_soft_nms.pyx b/mmdet/ops/nms/cpu_soft_nms.pyx new file mode 100644 index 00000000000..05ec5a54462 --- /dev/null +++ b/mmdet/ops/nms/cpu_soft_nms.pyx @@ -0,0 +1,123 @@ +# ---------------------------------------------------------- +# Soft-NMS: Improving Object Detection With One Line of Code +# Copyright (c) University of Maryland, College Park +# Licensed under The MIT License [see LICENSE for details] +# Written by Navaneeth Bodla and Bharat Singh +# ---------------------------------------------------------- + +import numpy as np +cimport numpy as np + + +cdef inline np.float32_t max(np.float32_t a, np.float32_t b): + return a if a >= b else b + +cdef inline np.float32_t min(np.float32_t a, np.float32_t b): + return a if a <= b else b + +def cpu_soft_nms( + np.ndarray[float, ndim=2] boxes_in, + float sigma=0.5, + float Nt=0.3, + float threshold=0.001, + unsigned int method=0 +): + boxes = boxes_in.copy() + cdef unsigned int N = boxes.shape[0] + cdef float iw, ih, box_area + cdef float ua + cdef int pos = 0 + cdef float maxscore = 0 + cdef int maxpos = 0 + cdef float x1, x2, y1, y2, tx1, tx2, ty1, ty2, ts, area, weight, ov + inds = np.arange(N) + + for i in range(N): + maxscore = boxes[i, 4] + maxpos = i + + tx1 = boxes[i,0] + ty1 = boxes[i,1] + tx2 = boxes[i,2] + ty2 = boxes[i,3] + ts = boxes[i,4] + ti = inds[i] + + pos = i + 1 + # get max box + while pos < N: + if maxscore < boxes[pos, 4]: + maxscore = boxes[pos, 4] + maxpos = pos + pos = pos + 1 + + # add max box as a detection + boxes[i,0] = boxes[maxpos,0] + boxes[i,1] = boxes[maxpos,1] + boxes[i,2] = boxes[maxpos,2] + boxes[i,3] = boxes[maxpos,3] + boxes[i,4] = boxes[maxpos,4] + inds[i] = inds[maxpos] + + # swap ith box with position of max box + boxes[maxpos,0] = tx1 + boxes[maxpos,1] = ty1 + boxes[maxpos,2] = tx2 + boxes[maxpos,3] = ty2 + boxes[maxpos,4] = ts + inds[maxpos] = ti + + tx1 = boxes[i,0] + ty1 = boxes[i,1] + tx2 = boxes[i,2] + ty2 = boxes[i,3] + ts = boxes[i,4] + + pos = i + 1 + # NMS iterations, note that N changes if detection boxes fall below + # threshold + while pos < N: + x1 = boxes[pos, 0] + y1 = boxes[pos, 1] + x2 = boxes[pos, 2] + y2 = boxes[pos, 3] + s = boxes[pos, 4] + + area = (x2 - x1 + 1) * (y2 - y1 + 1) + iw = (min(tx2, x2) - max(tx1, x1) + 1) + if iw > 0: + ih = (min(ty2, y2) - max(ty1, y1) + 1) + if ih > 0: + ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih) + ov = iw * ih / ua #iou between max box and detection box + + if method == 1: # linear + if ov > Nt: + weight = 1 - ov + else: + weight = 1 + elif method == 2: # gaussian + weight = np.exp(-(ov * ov)/sigma) + else: # original NMS + if ov > Nt: + weight = 0 + else: + weight = 1 + + boxes[pos, 4] = weight*boxes[pos, 4] + + # if box score falls below threshold, discard the box by + # swapping with last box update N + if boxes[pos, 4] < threshold: + boxes[pos,0] = boxes[N-1, 0] + boxes[pos,1] = boxes[N-1, 1] + boxes[pos,2] = boxes[N-1, 2] + boxes[pos,3] = boxes[N-1, 3] + boxes[pos,4] = boxes[N-1, 4] + inds[pos] = inds[N-1] + N = N - 1 + pos = pos - 1 + + pos = pos + 1 + + return boxes[:N], inds[:N] \ No newline at end of file diff --git a/mmdet/ops/nms/gpu_nms.hpp b/mmdet/ops/nms/gpu_nms.hpp new file mode 100644 index 00000000000..2d45e344aeb --- /dev/null +++ b/mmdet/ops/nms/gpu_nms.hpp @@ -0,0 +1,3 @@ +void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, + int boxes_dim, float nms_overlap_thresh, int device_id, size_t base); +size_t nms_Malloc(); diff --git a/mmdet/ops/nms/gpu_nms.pyx b/mmdet/ops/nms/gpu_nms.pyx new file mode 100644 index 00000000000..e5ae7257873 --- /dev/null +++ b/mmdet/ops/nms/gpu_nms.pyx @@ -0,0 +1,43 @@ +# -------------------------------------------------------- +# Faster R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +import numpy as np +cimport numpy as np + +assert sizeof(int) == sizeof(np.int32_t) + +cdef extern from "gpu_nms.hpp": + void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int, size_t) nogil + size_t nms_Malloc() nogil + +memory_pool = {} + +def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, + np.int32_t device_id=0): + cdef int boxes_num = dets.shape[0] + cdef int boxes_dim = dets.shape[1] + cdef int num_out + cdef size_t base + cdef np.ndarray[np.int32_t, ndim=1] \ + keep = np.zeros(boxes_num, dtype=np.int32) + cdef np.ndarray[np.float32_t, ndim=1] \ + scores = dets[:, 4] + cdef np.ndarray[np.int_t, ndim=1] \ + order = scores.argsort()[::-1] + cdef np.ndarray[np.float32_t, ndim=2] \ + sorted_dets = dets[order, :] + cdef float cthresh = thresh + if device_id not in memory_pool: + with nogil: + base = nms_Malloc() + memory_pool[device_id] = base + # print "malloc", base + base = memory_pool[device_id] + with nogil: + _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, cthresh, device_id, base) + keep = keep[:num_out] + return list(order[keep]) diff --git a/mmdet/ops/nms/nms_kernel.cu b/mmdet/ops/nms/nms_kernel.cu new file mode 100644 index 00000000000..4c5f0ec5e10 --- /dev/null +++ b/mmdet/ops/nms/nms_kernel.cu @@ -0,0 +1,188 @@ +// ------------------------------------------------------------------ +// Faster R-CNN +// Copyright (c) 2015 Microsoft +// Licensed under The MIT License [see fast-rcnn/LICENSE for details] +// Written by Shaoqing Ren +// ------------------------------------------------------------------ + +#include +#include +#include +#include "gpu_nms.hpp" + +#define CUDA_CHECK(condition) \ + /* Code block avoids redefinition of cudaError_t error */ \ + do { \ + cudaError_t error = condition; \ + if (error != cudaSuccess) { \ + std::cout << cudaGetErrorString(error) << std::endl; \ + } \ + } while (0) + +#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) +#define MULTIPLIER 16 +#define LONGLONG_SIZE 64 + +int const threadsPerBlock = + sizeof(unsigned long long) * 8 * + MULTIPLIER; // number of bits for a long long variable + +__device__ inline float devIoU(float const* const a, float const* const b) { + float left = max(a[0], b[0]), right = min(a[2], b[2]); + float top = max(a[1], b[1]), bottom = min(a[3], b[3]); + float width = max(right - left + 1, 0.f), + height = max(bottom - top + 1, 0.f); + float interS = width * height; + float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); + float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); + return interS / (Sa + Sb - interS); +} + +__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, + const float* dev_boxes, + unsigned long long* dev_mask) { + const int row_start = blockIdx.y; + const int col_start = blockIdx.x; + + // if (row_start > col_start) return; + + const int row_size = + min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); + const int col_size = + min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); + + __shared__ float block_boxes[threadsPerBlock * 5]; + if (threadIdx.x < col_size) { + block_boxes[threadIdx.x * 5 + 0] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; + block_boxes[threadIdx.x * 5 + 1] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; + block_boxes[threadIdx.x * 5 + 2] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; + block_boxes[threadIdx.x * 5 + 3] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; + block_boxes[threadIdx.x * 5 + 4] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; + } + __syncthreads(); + + unsigned long long ts[MULTIPLIER]; + + if (threadIdx.x < row_size) { +#pragma unroll + for (int i = 0; i < MULTIPLIER; ++i) { + ts[i] = 0; + } + const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; + const float* cur_box = dev_boxes + cur_box_idx * 5; + int i = 0; + int start = 0; + if (row_start == col_start) { + start = threadIdx.x + 1; + } + for (i = start; i < col_size; i++) { + if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { + ts[i / LONGLONG_SIZE] |= 1ULL << (i % LONGLONG_SIZE); + } + } + const int col_blocks = DIVUP(n_boxes, threadsPerBlock); + +#pragma unroll + for (int i = 0; i < MULTIPLIER; ++i) { + dev_mask[(cur_box_idx * col_blocks + col_start) * MULTIPLIER + i] = + ts[i]; + } + } +} + +void _set_device(int device_id) { + int current_device; + CUDA_CHECK(cudaGetDevice(¤t_device)); + if (current_device == device_id) { + return; + } + // The call to cudaSetDevice must come before any calls to Get, which + // may perform initialization using the GPU. + CUDA_CHECK(cudaSetDevice(device_id)); +} + +const size_t MEMORY_SIZE = 500000000; +size_t nms_Malloc() { + float* boxes_dev = NULL; + CUDA_CHECK(cudaMalloc(&boxes_dev, MEMORY_SIZE)); + return size_t(boxes_dev); +} + +void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, + int boxes_dim, float nms_overlap_thresh, int device_id, size_t base) { + _set_device(device_id); + + float* boxes_dev = NULL; + unsigned long long* mask_dev = NULL; + + const int col_blocks = DIVUP(boxes_num, threadsPerBlock); + + if (base > 0) { + size_t require_mem = + boxes_num * boxes_dim * sizeof(float) + + boxes_num * col_blocks * sizeof(unsigned long long) * MULTIPLIER; + if (require_mem >= MEMORY_SIZE) { + std::cout << "require_mem: " << require_mem << std::endl; + } + boxes_dev = (float*)(base); + mask_dev = + (unsigned long long*)(base + + 512 * ((unsigned long long)(boxes_num * + boxes_dim * + sizeof(float) / + 512) + + 1)); + } else { + CUDA_CHECK( + cudaMalloc(&boxes_dev, boxes_num * boxes_dim * sizeof(float))); + CUDA_CHECK(cudaMalloc(&mask_dev, MULTIPLIER * boxes_num * col_blocks * + sizeof(unsigned long long))); + } + CUDA_CHECK(cudaMemcpy(boxes_dev, boxes_host, + boxes_num * boxes_dim * sizeof(float), + cudaMemcpyHostToDevice)); + + dim3 blocks(DIVUP(boxes_num, threadsPerBlock), + DIVUP(boxes_num, threadsPerBlock)); + dim3 threads(threadsPerBlock); + nms_kernel<<>>(boxes_num, nms_overlap_thresh, boxes_dev, + mask_dev); + + std::vector mask_host(boxes_num * col_blocks * + MULTIPLIER); + CUDA_CHECK(cudaMemcpy( + &mask_host[0], mask_dev, + sizeof(unsigned long long) * boxes_num * col_blocks * MULTIPLIER, + cudaMemcpyDeviceToHost)); + + std::vector remv(col_blocks * MULTIPLIER); + memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks * MULTIPLIER); + + int num_to_keep = 0; + for (int i = 0; i < boxes_num; i++) { + int nblock = i / threadsPerBlock; + int inblock = i % threadsPerBlock; + int offset = inblock / LONGLONG_SIZE; + int bit_pos = inblock % LONGLONG_SIZE; + + if (!(remv[nblock * MULTIPLIER + offset] & (1ULL << bit_pos))) { + keep_out[num_to_keep++] = i; + unsigned long long* p = &mask_host[0] + i * col_blocks * MULTIPLIER; + for (int j = nblock * MULTIPLIER + offset; + j < col_blocks * MULTIPLIER; j++) { + remv[j] |= p[j]; + } + } + } + *num_out = num_to_keep; + + if (!base) { + CUDA_CHECK(cudaFree(boxes_dev)); + CUDA_CHECK(cudaFree(mask_dev)); + } +} diff --git a/mmdet/ops/nms/nms_wrapper.py b/mmdet/ops/nms/nms_wrapper.py new file mode 100644 index 00000000000..43d5e5c6e5c --- /dev/null +++ b/mmdet/ops/nms/nms_wrapper.py @@ -0,0 +1,46 @@ +import numpy as np +import torch + +from .gpu_nms import gpu_nms +from .cpu_nms import cpu_nms +from .cpu_soft_nms import cpu_soft_nms + + +def nms(dets, thresh, device_id=None): + """Dispatch to either CPU or GPU NMS implementations.""" + + if isinstance(dets, torch.Tensor): + if dets.is_cuda: + device_id = dets.get_device() + dets = dets.detach().cpu().numpy() + assert isinstance(dets, np.ndarray) + + if dets.shape[0] == 0: + inds = [] + else: + inds = (gpu_nms(dets, thresh, device_id=device_id) + if device_id is not None else cpu_nms(dets, thresh)) + + if isinstance(dets, torch.Tensor): + return dets.new_tensor(inds, dtype=torch.long) + else: + return np.array(inds, dtype=np.int) + + +def soft_nms(dets, Nt=0.3, method=1, sigma=0.5, min_score=0): + if isinstance(dets, torch.Tensor): + _dets = dets.detach().cpu().numpy() + else: + _dets = dets.copy() + assert isinstance(_dets, np.ndarray) + + new_dets, inds = cpu_soft_nms( + _dets, Nt=Nt, method=method, sigma=sigma, threshold=min_score) + + if isinstance(dets, torch.Tensor): + return dets.new_tensor( + inds, dtype=torch.long), dets.new_tensor(new_dets) + else: + return np.array( + inds, dtype=np.int), np.array( + new_dets, dtype=np.float32) diff --git a/mmdet/ops/nms/setup.py b/mmdet/ops/nms/setup.py new file mode 100644 index 00000000000..98bf57c8f13 --- /dev/null +++ b/mmdet/ops/nms/setup.py @@ -0,0 +1,91 @@ +import os +from distutils.core import setup +from distutils.extension import Extension + +import numpy as np +from Cython.Build import cythonize +from Cython.Distutils import build_ext + +CUDA_ROOT = '/usr/local/cuda' +CUDA = { + "include": os.path.join(CUDA_ROOT, 'include'), + "lib": os.path.join(CUDA_ROOT, 'lib64'), + "nvcc": os.path.join(CUDA_ROOT, 'bin', "nvcc") +} + +inc_dirs = [CUDA['include'], np.get_include()] + +lib_dirs = [CUDA['lib']] + +# extensions +ext_args = dict( + include_dirs=inc_dirs, + library_dirs=lib_dirs, + language='c++', + libraries=['cudart'], + extra_compile_args={ + "cc": ['-Wno-unused-function', '-Wno-write-strings'], + "nvcc": [ + '-arch=sm_52', '--ptxas-options=-v', '-c', '--compiler-options', + '-fPIC' + ], + }, +) + +extensions = [ + Extension('cpu_nms', ['cpu_nms.pyx'], **ext_args), + Extension('gpu_nms', ['gpu_nms.pyx', 'nms_kernel.cu'], **ext_args), + Extension('cpu_soft_nms', ['cpu_soft_nms.pyx'], **ext_args), +] + + +def customize_compiler_for_nvcc(self): + """inject deep into distutils to customize how the dispatch + to cc/nvcc works. + If you subclass UnixCCompiler, it's not trivial to get your subclass + injected in, and still have the right customizations (i.e. + distutils.sysconfig.customize_compiler) run on it. So instead of going + the OO route, I have this. Note, it's kindof like a wierd functional + subclassing going on.""" + + # tell the compiler it can processes .cu + self.src_extensions.append('.cu') + + # save references to the default compiler_so and _comple methods + default_compiler_so = self.compiler_so + super = self._compile + + # now redefine the _compile method. This gets executed for each + # object but distutils doesn't have the ability to change compilers + # based on source extension: we add it. + def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): + if os.path.splitext(src)[1] == '.cu': + # use the cuda for .cu files + self.set_executable('compiler_so', CUDA['nvcc']) + # use only a subset of the extra_postargs, which are 1-1 translated + # from the extra_compile_args in the Extension class + postargs = extra_postargs['nvcc'] + else: + postargs = extra_postargs['cc'] + + super(obj, src, ext, cc_args, postargs, pp_opts) + # reset the default compiler_so, which we might have changed for cuda + self.compiler_so = default_compiler_so + + # inject our redefined _compile method into the class + self._compile = _compile + + +# run the customize_compiler +class custom_build_ext(build_ext): + + def build_extensions(self): + customize_compiler_for_nvcc(self.compiler) + build_ext.build_extensions(self) + + +setup( + name='nms', + cmdclass={'build_ext': custom_build_ext}, + ext_modules=cythonize(extensions), +) diff --git a/mmdet/ops/roi_align/__init__.py b/mmdet/ops/roi_align/__init__.py new file mode 100644 index 00000000000..ae27e21d6c7 --- /dev/null +++ b/mmdet/ops/roi_align/__init__.py @@ -0,0 +1,2 @@ +from .functions.roi_align import roi_align +from .modules.roi_align import RoIAlign diff --git a/mmdet/ops/roi_align/functions/__init__.py b/mmdet/ops/roi_align/functions/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/mmdet/ops/roi_align/functions/roi_align.py b/mmdet/ops/roi_align/functions/roi_align.py new file mode 100644 index 00000000000..0e546fe5952 --- /dev/null +++ b/mmdet/ops/roi_align/functions/roi_align.py @@ -0,0 +1,61 @@ +from torch.autograd import Function, Variable + +from .. import roi_align_cuda + + +class RoIAlignFunction(Function): + + @staticmethod + def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0): + if isinstance(out_size, int): + out_h = out_size + out_w = out_size + elif isinstance(out_size, tuple): + assert len(out_size) == 2 + assert isinstance(out_size[0], int) + assert isinstance(out_size[1], int) + out_h, out_w = out_size + else: + raise TypeError( + '"out_size" must be an integer or tuple of integers') + ctx.spatial_scale = spatial_scale + ctx.sample_num = sample_num + ctx.save_for_backward(rois) + ctx.feature_size = features.size() + + batch_size, num_channels, data_height, data_width = features.size() + num_rois = rois.size(0) + + output = features.new_zeros(num_rois, num_channels, out_h, out_w) + if features.is_cuda: + roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale, + sample_num, output) + else: + raise NotImplementedError + + return output + + @staticmethod + def backward(ctx, grad_output): + feature_size = ctx.feature_size + spatial_scale = ctx.spatial_scale + sample_num = ctx.sample_num + rois = ctx.saved_tensors[0] + assert (feature_size is not None and grad_output.is_cuda) + + batch_size, num_channels, data_height, data_width = feature_size + out_w = grad_output.size(3) + out_h = grad_output.size(2) + + grad_input = grad_rois = None + if ctx.needs_input_grad[0]: + grad_input = Variable( + rois.new(batch_size, num_channels, data_height, data_width) + .zero_()) + roi_align_cuda.backward(grad_output, rois, out_h, out_w, + spatial_scale, sample_num, grad_input) + + return grad_input, grad_rois, None, None, None + + +roi_align = RoIAlignFunction.apply diff --git a/mmdet/ops/roi_align/gradcheck.py b/mmdet/ops/roi_align/gradcheck.py new file mode 100644 index 00000000000..e2c51e64bb7 --- /dev/null +++ b/mmdet/ops/roi_align/gradcheck.py @@ -0,0 +1,29 @@ +import numpy as np +import torch +from torch.autograd import gradcheck + +import os.path as osp +import sys +sys.path.append(osp.abspath(osp.join(__file__, '../../'))) +from roi_align import RoIAlign + +feat_size = 15 +spatial_scale = 1.0 / 8 +img_size = feat_size / spatial_scale +num_imgs = 2 +num_rois = 20 + +batch_ind = np.random.randint(num_imgs, size=(num_rois, 1)) +rois = np.random.rand(num_rois, 4) * img_size * 0.5 +rois[:, 2:] += img_size * 0.5 +rois = np.hstack((batch_ind, rois)) + +feat = torch.randn( + num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0') +rois = torch.from_numpy(rois).float().cuda() +inputs = (feat, rois) +print('Gradcheck for roi align...') +test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3) +print(test) +test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3) +print(test) diff --git a/mmdet/ops/roi_align/modules/__init__.py b/mmdet/ops/roi_align/modules/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/mmdet/ops/roi_align/modules/roi_align.py b/mmdet/ops/roi_align/modules/roi_align.py new file mode 100644 index 00000000000..b83b74e6b7c --- /dev/null +++ b/mmdet/ops/roi_align/modules/roi_align.py @@ -0,0 +1,16 @@ +from torch.nn.modules.module import Module +from ..functions.roi_align import RoIAlignFunction + + +class RoIAlign(Module): + + def __init__(self, out_size, spatial_scale, sample_num=0): + super(RoIAlign, self).__init__() + + self.out_size = out_size + self.spatial_scale = float(spatial_scale) + self.sample_num = int(sample_num) + + def forward(self, features, rois): + return RoIAlignFunction.apply(features, rois, self.out_size, + self.spatial_scale, self.sample_num) diff --git a/mmdet/ops/roi_align/setup.py b/mmdet/ops/roi_align/setup.py new file mode 100644 index 00000000000..f02a5ea30d6 --- /dev/null +++ b/mmdet/ops/roi_align/setup.py @@ -0,0 +1,12 @@ +from setuptools import setup +from torch.utils.cpp_extension import BuildExtension, CUDAExtension + +setup( + name='roi_align_cuda', + ext_modules=[ + CUDAExtension('roi_align_cuda', [ + 'src/roi_align_cuda.cpp', + 'src/roi_align_kernel.cu', + ]), + ], + cmdclass={'build_ext': BuildExtension}) diff --git a/mmdet/ops/roi_align/src/roi_align_cuda.cpp b/mmdet/ops/roi_align/src/roi_align_cuda.cpp new file mode 100644 index 00000000000..e4c28c14226 --- /dev/null +++ b/mmdet/ops/roi_align/src/roi_align_cuda.cpp @@ -0,0 +1,85 @@ +#include + +#include +#include + +int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, + const float spatial_scale, const int sample_num, + const int channels, const int height, + const int width, const int num_rois, + const int pooled_height, const int pooled_width, + at::Tensor output); + +int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, + const float spatial_scale, const int sample_num, + const int channels, const int height, + const int width, const int num_rois, + const int pooled_height, const int pooled_width, + at::Tensor bottom_grad); + +#define CHECK_CUDA(x) AT_ASSERT(x.type().is_cuda(), #x " must be a CUDAtensor ") +#define CHECK_CONTIGUOUS(x) \ + AT_ASSERT(x.is_contiguous(), #x " must be contiguous ") +#define CHECK_INPUT(x) \ + CHECK_CUDA(x); \ + CHECK_CONTIGUOUS(x) + +int roi_align_forward_cuda(at::Tensor features, at::Tensor rois, + int pooled_height, int pooled_width, + float spatial_scale, int sample_num, + at::Tensor output) { + CHECK_INPUT(features); + CHECK_INPUT(rois); + CHECK_INPUT(output); + + // Number of ROIs + int num_rois = rois.size(0); + int size_rois = rois.size(1); + + if (size_rois != 5) { + printf("wrong roi size\n"); + return 0; + } + + int num_channels = features.size(1); + int data_height = features.size(2); + int data_width = features.size(3); + + ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num, + num_channels, data_height, data_width, num_rois, + pooled_height, pooled_width, output); + + return 1; +} + +int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois, + int pooled_height, int pooled_width, + float spatial_scale, int sample_num, + at::Tensor bottom_grad) { + CHECK_INPUT(top_grad); + CHECK_INPUT(rois); + CHECK_INPUT(bottom_grad); + + // Number of ROIs + int num_rois = rois.size(0); + int size_rois = rois.size(1); + if (size_rois != 5) { + printf("wrong roi size\n"); + return 0; + } + + int num_channels = bottom_grad.size(1); + int data_height = bottom_grad.size(2); + int data_width = bottom_grad.size(3); + + ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num, + num_channels, data_height, data_width, num_rois, + pooled_height, pooled_width, bottom_grad); + + return 1; +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)"); + m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)"); +} diff --git a/mmdet/ops/roi_align/src/roi_align_kernel.cu b/mmdet/ops/roi_align/src/roi_align_kernel.cu new file mode 100644 index 00000000000..31be093c038 --- /dev/null +++ b/mmdet/ops/roi_align/src/roi_align_kernel.cu @@ -0,0 +1,319 @@ +#include + +#include +#include + +#include +#include +#include + +#define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ + i += blockDim.x * gridDim.x) + +#define THREADS_PER_BLOCK 1024 + +inline int GET_BLOCKS(const int N) { + int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + int max_block_num = 65000; + return min(optimal_block_num, max_block_num); +} + +template +__device__ scalar_t bilinear_interpolate(const scalar_t *bottom_data, + const int height, const int width, + scalar_t y, scalar_t x) { + // deal with cases that inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) { + return 0; + } + + if (y <= 0) + y = 0; + if (x <= 0) + x = 0; + + int y_low = (int)y; + int x_low = (int)x; + int y_high; + int x_high; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (scalar_t)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (scalar_t)x_low; + } else { + x_high = x_low + 1; + } + + scalar_t ly = y - y_low; + scalar_t lx = x - x_low; + scalar_t hy = 1. - ly; + scalar_t hx = 1. - lx; + // do bilinear interpolation + scalar_t lt = bottom_data[y_low * width + x_low]; + scalar_t rt = bottom_data[y_low * width + x_high]; + scalar_t lb = bottom_data[y_high * width + x_low]; + scalar_t rb = bottom_data[y_high * width + x_high]; + scalar_t w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + scalar_t val = (w1 * lt + w2 * rt + w3 * lb + w4 * rb); + + return val; +} + +template +__global__ void +ROIAlignForward(const int nthreads, const scalar_t *bottom_data, + const scalar_t *bottom_rois, const scalar_t spatial_scale, + const int sample_num, const int channels, const int height, + const int width, const int pooled_height, + const int pooled_width, scalar_t *top_data) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the aligned output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const scalar_t *offset_bottom_rois = bottom_rois + n * 5; + int roi_batch_ind = offset_bottom_rois[0]; + scalar_t roi_start_w = offset_bottom_rois[1] * spatial_scale; + scalar_t roi_start_h = offset_bottom_rois[2] * spatial_scale; + scalar_t roi_end_w = (offset_bottom_rois[3] + 1) * spatial_scale; + scalar_t roi_end_h = (offset_bottom_rois[4] + 1) * spatial_scale; + + // Force malformed ROIs to be 1x1 + scalar_t roi_width = fmaxf((scalar_t)roi_end_w - roi_start_w, 0.); + scalar_t roi_height = fmaxf((scalar_t)roi_end_h - roi_start_h, 0.); + + scalar_t bin_size_h = roi_height / pooled_height; + scalar_t bin_size_w = roi_width / pooled_width; + + const scalar_t *offset_bottom_data = + bottom_data + (roi_batch_ind * channels + c) * height * width; + + int sample_num_h = (sample_num > 0) + ? sample_num + : ceil(roi_height / pooled_height); // e.g., = 2 + int sample_num_w = + (sample_num > 0) ? sample_num : ceil(roi_width / pooled_width); + + scalar_t h = (scalar_t)(ph + 0.5) * bin_size_h + roi_start_h; + scalar_t w = (scalar_t)(pw + 0.5) * bin_size_w + roi_start_w; + + int hstart = fminf(floor(h), height - 2); + int wstart = fminf(floor(w), width - 2); + + scalar_t output_val = 0; + for (int iy = 0; iy < sample_num_h; iy++) { + const scalar_t y = roi_start_h + ph * bin_size_h + + (scalar_t)(iy + scalar_t(.5f)) * bin_size_h / + (scalar_t)(sample_num_h); + for (int ix = 0; ix < sample_num_w; ix++) { + const scalar_t x = roi_start_w + pw * bin_size_w + + (scalar_t)(ix + scalar_t(.5f)) * bin_size_w / + (scalar_t)(sample_num_w); + scalar_t val = bilinear_interpolate(offset_bottom_data, + height, width, y, x); + output_val += val; + } + } + output_val /= (sample_num_h * sample_num_w); + top_data[index] = output_val; + } +} + +int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, + const float spatial_scale, const int sample_num, + const int channels, const int height, + const int width, const int num_rois, + const int pooled_height, const int pooled_width, + at::Tensor output) { + const int output_size = num_rois * pooled_height * pooled_width * channels; + AT_DISPATCH_FLOATING_TYPES( + features.type(), "ROIAlignLaucherForward", ([&] { + const scalar_t *bottom_data = features.data(); + const scalar_t *rois_data = rois.data(); + scalar_t *top_data = output.data(); + + ROIAlignForward< + scalar_t><<>>( + output_size, bottom_data, rois_data, scalar_t(spatial_scale), + sample_num, channels, height, width, pooled_height, pooled_width, + top_data); + })); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err) { + fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err)); + exit(-1); + } + + return 1; +} + +template +__device__ void +bilinear_interpolate_gradient(const int height, const int width, scalar_t y, + scalar_t x, scalar_t &w1, scalar_t &w2, + scalar_t &w3, scalar_t &w4, int &x_low, + int &x_high, int &y_low, int &y_high) { + // deal with cases that inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) { + w1 = w2 = w3 = w4 = 0.; + x_low = x_high = y_low = y_high = -1; + return; + } + + if (y <= 0) + y = 0; + if (x <= 0) + x = 0; + + y_low = (int)y; + x_low = (int)x; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (scalar_t)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (scalar_t)x_low; + } else { + x_high = x_low + 1; + } + + scalar_t ly = y - y_low; + scalar_t lx = x - x_low; + scalar_t hy = 1. - ly; + scalar_t hx = 1. - lx; + + w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + return; +} + +template +__global__ void +ROIAlignBackward(const int nthreads, const scalar_t *top_diff, + const scalar_t *bottom_rois, const scalar_t spatial_scale, + const int sample_num, const int channels, const int height, + const int width, const int pooled_height, + const int pooled_width, scalar_t *bottom_diff) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the aligned output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const scalar_t *offset_bottom_rois = bottom_rois + n * 5; + int roi_batch_ind = offset_bottom_rois[0]; + scalar_t roi_start_w = offset_bottom_rois[1] * spatial_scale; + scalar_t roi_start_h = offset_bottom_rois[2] * spatial_scale; + scalar_t roi_end_w = (offset_bottom_rois[3] + 1) * spatial_scale; + scalar_t roi_end_h = (offset_bottom_rois[4] + 1) * spatial_scale; + + // Force malformed ROIs to be 1x1 + scalar_t roi_width = fmaxf((scalar_t)roi_end_w - roi_start_w, 0.); + scalar_t roi_height = fmaxf((scalar_t)roi_end_h - roi_start_h, 0.); + + scalar_t bin_size_h = roi_height / pooled_height; + scalar_t bin_size_w = roi_width / pooled_width; + + scalar_t *offset_bottom_diff = + bottom_diff + (roi_batch_ind * channels + c) * height * width; + int offset_top = (n * channels + c) * pooled_height * pooled_width + + ph * pooled_width + pw; + scalar_t offset_top_diff = top_diff[offset_top]; + + int sample_num_h = (sample_num > 0) + ? sample_num + : ceil(roi_height / pooled_height); // e.g., = 2 + int sample_num_w = + (sample_num > 0) ? sample_num : ceil(roi_width / pooled_width); + + const scalar_t count = (scalar_t)(sample_num_h * sample_num_w); + + scalar_t h = (scalar_t)(ph + 0.5) * bin_size_h + roi_start_h; + scalar_t w = (scalar_t)(pw + 0.5) * bin_size_w + roi_start_w; + + int hstart = fminf(floor(h), height - 2); + int wstart = fminf(floor(w), width - 2); + + for (int iy = 0; iy < sample_num_h; iy++) { + const scalar_t y = + roi_start_h + ph * bin_size_h + + (scalar_t)(iy + .5f) * bin_size_h / (scalar_t)(sample_num_h); + for (int ix = 0; ix < sample_num_w; ix++) { + const scalar_t x = + roi_start_w + pw * bin_size_w + + (scalar_t)(ix + .5f) * bin_size_w / (scalar_t)(sample_num_w); + scalar_t w1, w2, w3, w4; + int x_low, x_high, y_low, y_high; + + bilinear_interpolate_gradient( + height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high); + scalar_t g1 = offset_top_diff * w1 / count; + scalar_t g2 = offset_top_diff * w2 / count; + scalar_t g3 = offset_top_diff * w3 / count; + scalar_t g4 = offset_top_diff * w4 / count; + if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { + atomicAdd(offset_bottom_diff + y_low * width + x_low, g1); + atomicAdd(offset_bottom_diff + y_low * width + x_high, g2); + atomicAdd(offset_bottom_diff + y_high * width + x_low, g3); + atomicAdd(offset_bottom_diff + y_high * width + x_high, g4); + } + } + } + } +} + +template <> +__global__ void ROIAlignBackward( + const int nthreads, const double *top_diff, const double *bottom_rois, + const double spatial_scale, const int sample_num, const int channels, + const int height, const int width, const int pooled_height, + const int pooled_width, double *bottom_diff) {} + +int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, + const float spatial_scale, const int sample_num, + const int channels, const int height, + const int width, const int num_rois, + const int pooled_height, const int pooled_width, + at::Tensor bottom_grad) { + const int output_size = num_rois * pooled_height * pooled_width * channels; + + AT_DISPATCH_FLOATING_TYPES( + top_grad.type(), "ROIAlignLaucherBackward", ([&] { + const scalar_t *top_diff = top_grad.data(); + const scalar_t *rois_data = rois.data(); + scalar_t *bottom_diff = bottom_grad.data(); + if (sizeof(scalar_t) == sizeof(double)) { + fprintf(stderr, "double is not supported\n"); + exit(-1); + } + + ROIAlignBackward< + scalar_t><<>>( + output_size, top_diff, rois_data, spatial_scale, sample_num, + channels, height, width, pooled_height, pooled_width, bottom_diff); + })); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err) { + fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err)); + exit(-1); + } + + return 1; +} diff --git a/mmdet/ops/roi_pool/__init__.py b/mmdet/ops/roi_pool/__init__.py new file mode 100644 index 00000000000..9c8506d319d --- /dev/null +++ b/mmdet/ops/roi_pool/__init__.py @@ -0,0 +1,2 @@ +from .functions.roi_pool import roi_pool +from .modules.roi_pool import RoIPool diff --git a/mmdet/ops/roi_pool/functions/__init__.py b/mmdet/ops/roi_pool/functions/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/mmdet/ops/roi_pool/functions/roi_pool.py b/mmdet/ops/roi_pool/functions/roi_pool.py new file mode 100644 index 00000000000..78ba1395fb9 --- /dev/null +++ b/mmdet/ops/roi_pool/functions/roi_pool.py @@ -0,0 +1,56 @@ +import torch +from torch.autograd import Function + +from .. import roi_pool_cuda + + +class RoIPoolFunction(Function): + + @staticmethod + def forward(ctx, features, rois, out_size, spatial_scale): + if isinstance(out_size, int): + out_h = out_size + out_w = out_size + elif isinstance(out_size, tuple): + assert len(out_size) == 2 + assert isinstance(out_size[0], int) + assert isinstance(out_size[1], int) + out_h, out_w = out_size + else: + raise TypeError( + '"out_size" must be an integer or tuple of integers') + assert features.is_cuda + ctx.save_for_backward(rois) + num_channels = features.size(1) + num_rois = rois.size(0) + out_size = (num_rois, num_channels, out_h, out_w) + output = features.new_zeros(*out_size) + + argmax = features.new_zeros(*out_size, dtype=torch.int) + roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale, + output, argmax) + ctx.spatial_scale = spatial_scale + ctx.feature_size = features.size() + ctx.argmax = argmax + + return output + + @staticmethod + def backward(ctx, grad_output): + assert grad_output.is_cuda + spatial_scale = ctx.spatial_scale + feature_size = ctx.feature_size + argmax = ctx.argmax + rois = ctx.saved_tensors[0] + assert feature_size is not None + + grad_input = grad_rois = None + if ctx.needs_input_grad[0]: + grad_input = grad_output.new(feature_size).zero_() + roi_pool_cuda.backward(grad_output, rois, argmax, spatial_scale, + grad_input) + + return grad_input, grad_rois, None, None + + +roi_pool = RoIPoolFunction.apply diff --git a/mmdet/ops/roi_pool/gradcheck.py b/mmdet/ops/roi_pool/gradcheck.py new file mode 100644 index 00000000000..dfc08b2e138 --- /dev/null +++ b/mmdet/ops/roi_pool/gradcheck.py @@ -0,0 +1,15 @@ +import torch +from torch.autograd import gradcheck + +import os.path as osp +import sys +sys.path.append(osp.abspath(osp.join(__file__, '../../'))) +from roi_pooling import RoIPool + +feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda() +rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55], + [1, 67, 40, 110, 120]]).cuda() +inputs = (feat, rois) +print('Gradcheck for roi pooling...') +test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3) +print(test) diff --git a/mmdet/ops/roi_pool/modules/__init__.py b/mmdet/ops/roi_pool/modules/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/mmdet/ops/roi_pool/modules/roi_pool.py b/mmdet/ops/roi_pool/modules/roi_pool.py new file mode 100644 index 00000000000..d7fffd08c65 --- /dev/null +++ b/mmdet/ops/roi_pool/modules/roi_pool.py @@ -0,0 +1,14 @@ +from torch.nn.modules.module import Module +from ..functions.roi_pool import roi_pool + + +class RoIPool(Module): + + def __init__(self, out_size, spatial_scale): + super(RoIPool, self).__init__() + + self.out_size = out_size + self.spatial_scale = float(spatial_scale) + + def forward(self, features, rois): + return roi_pool(features, rois, self.out_size, self.spatial_scale) diff --git a/mmdet/ops/roi_pool/setup.py b/mmdet/ops/roi_pool/setup.py new file mode 100644 index 00000000000..16991b88922 --- /dev/null +++ b/mmdet/ops/roi_pool/setup.py @@ -0,0 +1,12 @@ +from setuptools import setup +from torch.utils.cpp_extension import BuildExtension, CUDAExtension + +setup( + name='roi_pool', + ext_modules=[ + CUDAExtension('roi_pool_cuda', [ + 'src/roi_pool_cuda.cpp', + 'src/roi_pool_kernel.cu', + ]) + ], + cmdclass={'build_ext': BuildExtension}) diff --git a/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp b/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp new file mode 100644 index 00000000000..799c151d192 --- /dev/null +++ b/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp @@ -0,0 +1,86 @@ +#include + +#include +#include + +int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, + const float spatial_scale, const int channels, + const int height, const int width, const int num_rois, + const int pooled_h, const int pooled_w, + at::Tensor output, at::Tensor argmax); + +int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, + const at::Tensor argmax, const float spatial_scale, + const int batch_size, const int channels, + const int height, const int width, + const int num_rois, const int pooled_h, + const int pooled_w, at::Tensor bottom_grad); + +#define CHECK_CUDA(x) AT_ASSERT(x.type().is_cuda(), #x " must be a CUDAtensor ") +#define CHECK_CONTIGUOUS(x) \ + AT_ASSERT(x.is_contiguous(), #x " must be contiguous ") +#define CHECK_INPUT(x) \ + CHECK_CUDA(x); \ + CHECK_CONTIGUOUS(x) + +int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois, + int pooled_height, int pooled_width, + float spatial_scale, at::Tensor output, + at::Tensor argmax) { + CHECK_INPUT(features); + CHECK_INPUT(rois); + CHECK_INPUT(output); + CHECK_INPUT(argmax); + + // Number of ROIs + int num_rois = rois.size(0); + int size_rois = rois.size(1); + + if (size_rois != 5) { + printf("wrong roi size\n"); + return 0; + } + + int channels = features.size(1); + int height = features.size(2); + int width = features.size(3); + + ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width, + num_rois, pooled_height, pooled_width, output, argmax); + + return 1; +} + +int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois, + at::Tensor argmax, float spatial_scale, + at::Tensor bottom_grad) { + CHECK_INPUT(top_grad); + CHECK_INPUT(rois); + CHECK_INPUT(argmax); + CHECK_INPUT(bottom_grad); + + int pooled_height = top_grad.size(2); + int pooled_width = top_grad.size(3); + int num_rois = rois.size(0); + int size_rois = rois.size(1); + + if (size_rois != 5) { + printf("wrong roi size\n"); + return 0; + } + int batch_size = bottom_grad.size(0); + int channels = bottom_grad.size(1); + int height = bottom_grad.size(2); + int width = bottom_grad.size(3); + + ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size, + channels, height, width, num_rois, pooled_height, + pooled_width, bottom_grad); + + return 1; +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)"); + m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)"); +} diff --git a/mmdet/ops/roi_pool/src/roi_pool_kernel.cu b/mmdet/ops/roi_pool/src/roi_pool_kernel.cu new file mode 100644 index 00000000000..c94a9cd7850 --- /dev/null +++ b/mmdet/ops/roi_pool/src/roi_pool_kernel.cu @@ -0,0 +1,193 @@ +#include + +#include +#include + +#include +#include +#include + +#define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ + i += blockDim.x * gridDim.x) + +#define THREADS_PER_BLOCK 1024 + +inline int GET_BLOCKS(const int N) { + int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + int max_block_num = 65000; + return min(optimal_block_num, max_block_num); +} + +template +__global__ void ROIPoolForward(const int nthreads, const scalar_t *bottom_data, + const scalar_t *rois, + const scalar_t spatial_scale, const int channels, + const int height, const int width, + const int pooled_h, const int pooled_w, + scalar_t *top_data, int *argmax_data) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the pooled output + int pw = index % pooled_w; + int ph = (index / pooled_w) % pooled_h; + int c = (index / pooled_w / pooled_h) % channels; + int n = index / pooled_w / pooled_h / channels; + + const scalar_t *offset_rois = rois + n * 5; + int roi_batch_ind = offset_rois[0]; + // calculate the roi region on feature maps + scalar_t roi_x1 = offset_rois[1] * spatial_scale; + scalar_t roi_y1 = offset_rois[2] * spatial_scale; + scalar_t roi_x2 = (offset_rois[3] + 1) * spatial_scale; + scalar_t roi_y2 = (offset_rois[4] + 1) * spatial_scale; + + // force malformed rois to be 1x1 + scalar_t roi_w = roi_x2 - roi_x1; + scalar_t roi_h = roi_y2 - roi_y1; + if (roi_w <= 0 || roi_h <= 0) + continue; + + scalar_t bin_size_w = roi_w / static_cast(pooled_w); + scalar_t bin_size_h = roi_h / static_cast(pooled_h); + + // the corresponding bin region + int bin_x1 = floor(static_cast(pw) * bin_size_w + roi_x1); + int bin_y1 = floor(static_cast(ph) * bin_size_h + roi_y1); + int bin_x2 = ceil(static_cast(pw + 1) * bin_size_w + roi_x1); + int bin_y2 = ceil(static_cast(ph + 1) * bin_size_h + roi_y1); + + // add roi offsets and clip to input boundaries + bin_x1 = min(max(bin_x1, 0), width); + bin_y1 = min(max(bin_y1, 0), height); + bin_x2 = min(max(bin_x2, 0), width); + bin_y2 = min(max(bin_y2, 0), height); + bool is_empty = (bin_y2 <= bin_y1) || (bin_x2 <= bin_x1); + + // If nothing is pooled, argmax = -1 causes nothing to be backprop'd + int max_idx = -1; + bottom_data += (roi_batch_ind * channels + c) * height * width; + + // Define an empty pooling region to be zero + scalar_t max_val = is_empty ? 0 : bottom_data[bin_y1 * width + bin_x1] - 1; + + for (int h = bin_y1; h < bin_y2; ++h) { + for (int w = bin_x1; w < bin_x2; ++w) { + int offset = h * width + w; + if (bottom_data[offset] > max_val) { + max_val = bottom_data[offset]; + max_idx = offset; + } + } + } + top_data[index] = max_val; + if (argmax_data != NULL) + argmax_data[index] = max_idx; + } +} + +int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, + const float spatial_scale, const int channels, + const int height, const int width, const int num_rois, + const int pooled_h, const int pooled_w, + at::Tensor output, at::Tensor argmax) { + const int output_size = num_rois * channels * pooled_h * pooled_w; + + AT_DISPATCH_FLOATING_TYPES( + features.type(), "ROIPoolLaucherForward", ([&] { + const scalar_t *bottom_data = features.data(); + const scalar_t *rois_data = rois.data(); + scalar_t *top_data = output.data(); + int *argmax_data = argmax.data(); + + ROIPoolForward< + scalar_t><<>>( + output_size, bottom_data, rois_data, scalar_t(spatial_scale), + channels, height, width, pooled_h, pooled_w, top_data, argmax_data); + })); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err) { + fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err)); + exit(-1); + } + return 1; +} + +template +__global__ void ROIPoolBackward(const int nthreads, const scalar_t *top_diff, + const scalar_t *rois, const int *argmax_data, + const scalar_t spatial_scale, + const int channels, const int height, + const int width, const int pooled_h, + const int pooled_w, scalar_t *bottom_diff) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + int pw = index % pooled_w; + int ph = (index / pooled_w) % pooled_h; + int c = (index / pooled_w / pooled_h) % channels; + int n = index / pooled_w / pooled_h / channels; + + int roi_batch_ind = rois[n * 5]; + int bottom_index = argmax_data[(n * channels + c) * pooled_h * pooled_w + + ph * pooled_w + pw]; + + atomicAdd(bottom_diff + (roi_batch_ind * channels + c) * height * width + + bottom_index, + top_diff[index]); + } +} + +template <> +__global__ void +ROIPoolBackward(const int nthreads, const double *top_diff, + const double *rois, const int *argmax_data, + const double spatial_scale, const int channels, + const int height, const int width, const int pooled_h, + const int pooled_w, double *bottom_diff) { + // CUDA_1D_KERNEL_LOOP(index, nthreads) { + // int pw = index % pooled_w; + // int ph = (index / pooled_w) % pooled_h; + // int c = (index / pooled_w / pooled_h) % channels; + // int n = index / pooled_w / pooled_h / channels; + + // int roi_batch_ind = rois[n * 5]; + // int bottom_index = argmax_data[(n * channels + c) * pooled_h * pooled_w + + // ph * pooled_w + pw]; + + // *(bottom_diff + (roi_batch_ind * channels + c) * height * width + + // bottom_index) +=top_diff[index]; + // } +} + +int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, + const at::Tensor argmax, const float spatial_scale, + const int batch_size, const int channels, + const int height, const int width, + const int num_rois, const int pooled_h, + const int pooled_w, at::Tensor bottom_grad) { + const int output_size = num_rois * pooled_h * pooled_w * channels; + + AT_DISPATCH_FLOATING_TYPES( + top_grad.type(), "ROIPoolLaucherBackward", ([&] { + const scalar_t *top_diff = top_grad.data(); + const scalar_t *rois_data = rois.data(); + const int *argmax_data = argmax.data(); + scalar_t *bottom_diff = bottom_grad.data(); + + if (sizeof(scalar_t) == sizeof(double)) { + fprintf(stderr, "double is not supported\n"); + exit(-1); + } + + ROIPoolBackward< + scalar_t><<>>( + output_size, top_diff, rois_data, argmax_data, + scalar_t(spatial_scale), channels, height, width, pooled_h, + pooled_w, bottom_diff); + })); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err) { + fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err)); + exit(-1); + } + + return 1; +} diff --git a/mmdet/version.py b/mmdet/version.py new file mode 100644 index 00000000000..2b8877c5057 --- /dev/null +++ b/mmdet/version.py @@ -0,0 +1 @@ +__version__ = '0.5.0' diff --git a/setup.py b/setup.py new file mode 100644 index 00000000000..8ed19bd5a81 --- /dev/null +++ b/setup.py @@ -0,0 +1,40 @@ +from setuptools import find_packages, setup + + +def readme(): + with open('README.md') as f: + content = f.read() + return content + + +def get_version(): + version_file = 'mmcv/version.py' + with open(version_file, 'r') as f: + exec(compile(f.read(), version_file, 'exec')) + return locals()['__version__'] + + +setup( + name='mmdet', + version=get_version(), + description='Open MMLab Detection Toolbox', + long_description=readme(), + keywords='computer vision, object detection', + packages=find_packages(), + classifiers=[ + 'Development Status :: 4 - Beta', + 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Topic :: Utilities', + ], + license='GPLv3', + setup_requires=['pytest-runner'], + tests_require=['pytest'], + install_requires=['numpy', 'matplotlib', 'six', 'terminaltables'], + zip_safe=False) From 2dba837c62a4cbe7297775a05409e515cc708b73 Mon Sep 17 00:00:00 2001 From: pangjm Date: Wed, 12 Sep 2018 21:39:44 +0800 Subject: [PATCH 02/81] Update single base version --- TDL.md | 29 ++ mmdet/core/__init__.py | 9 +- mmdet/core/bbox_ops/__init__.py | 10 +- mmdet/core/bbox_ops/bbox_target.py | 87 ++++++ mmdet/core/bbox_ops/sampling.py | 42 ++- mmdet/core/bbox_ops/transforms.py | 19 ++ mmdet/core/losses/__init__.py | 12 + mmdet/core/losses/losses.py | 110 ++++++++ mmdet/core/mask_ops/__init__.py | 3 +- mmdet/core/mask_ops/mask_target.py | 35 +++ mmdet/core/post_processing/merge_augs.py | 2 +- mmdet/core/rpn_ops/__init__.py | 2 + mmdet/core/{ => rpn_ops}/anchor_generator.py | 0 mmdet/core/rpn_ops/anchor_target.py | 159 +++++++++++ mmdet/core/targets/__init__.py | 5 - mmdet/core/targets/anchor_target.py | 2 - mmdet/core/targets/bbox_target.py | 2 - mmdet/core/targets/mask_target.py | 2 - mmdet/core/test_engine.py | 14 + mmdet/core/train_engine.py | 40 +++ mmdet/core/utils/__init__.py | 3 + mmdet/core/utils/dist_utils.py | 60 ++++ mmdet/core/{ => utils}/hooks.py | 17 +- mmdet/core/utils/misc.py | 118 ++++++++ mmdet/datasets/coco.py | 28 +- mmdet/datasets/data_engine.py | 29 ++ mmdet/datasets/transforms.py | 9 +- mmdet/models/__init__.py | 1 + mmdet/models/bbox_heads/__init__.py | 3 +- mmdet/models/bbox_heads/bbox_head.py | 14 +- mmdet/models/bbox_heads/convfc_bbox_head.py | 174 ++++++++++++ mmdet/models/builder.py | 3 +- mmdet/models/detectors/__init__.py | 1 + .../detectors/{two_stage.py => detector.py} | 241 ++++++++-------- mmdet/models/detectors/rpn.py | 100 ------- mmdet/models/mask_heads/fcn_mask_head.py | 54 +--- mmdet/models/necks/fpn.py | 4 +- mmdet/models/rpn_heads/rpn_head.py | 4 +- mmdet/models/{common => utils}/__init__.py | 2 + mmdet/models/{common => utils}/conv_module.py | 0 mmdet/models/{ => utils}/misc.py | 0 mmdet/models/{common => utils}/norm.py | 0 mmdet/models/{ => utils}/weight_init.py | 0 mmdet/nn/parallel/scatter_gather.py | 2 +- tools/eval.py | 265 ++++++++++++++++++ tools/examples/r50_fpn_frcnn_1x.py | 125 +++++++++ tools/examples/r50_fpn_maskrcnn_1x.py | 136 +++++++++ tools/examples/r50_fpn_rpn_1x.py | 95 +++++++ tools/test.py | 65 +++++ tools/train.py | 85 ++++++ 50 files changed, 1911 insertions(+), 311 deletions(-) create mode 100644 TDL.md create mode 100644 mmdet/core/bbox_ops/bbox_target.py create mode 100644 mmdet/core/losses/__init__.py create mode 100644 mmdet/core/losses/losses.py create mode 100644 mmdet/core/mask_ops/mask_target.py create mode 100644 mmdet/core/rpn_ops/__init__.py rename mmdet/core/{ => rpn_ops}/anchor_generator.py (100%) create mode 100644 mmdet/core/rpn_ops/anchor_target.py delete mode 100644 mmdet/core/targets/__init__.py delete mode 100644 mmdet/core/targets/anchor_target.py delete mode 100644 mmdet/core/targets/bbox_target.py delete mode 100644 mmdet/core/targets/mask_target.py create mode 100644 mmdet/core/test_engine.py create mode 100644 mmdet/core/train_engine.py create mode 100644 mmdet/core/utils/__init__.py create mode 100644 mmdet/core/utils/dist_utils.py rename mmdet/core/{ => utils}/hooks.py (96%) create mode 100644 mmdet/core/utils/misc.py create mode 100644 mmdet/datasets/data_engine.py create mode 100644 mmdet/models/bbox_heads/convfc_bbox_head.py rename mmdet/models/detectors/{two_stage.py => detector.py} (60%) delete mode 100644 mmdet/models/detectors/rpn.py rename mmdet/models/{common => utils}/__init__.py (71%) rename mmdet/models/{common => utils}/conv_module.py (100%) rename mmdet/models/{ => utils}/misc.py (100%) rename mmdet/models/{common => utils}/norm.py (100%) rename mmdet/models/{ => utils}/weight_init.py (100%) create mode 100644 tools/eval.py create mode 100644 tools/examples/r50_fpn_frcnn_1x.py create mode 100644 tools/examples/r50_fpn_maskrcnn_1x.py create mode 100644 tools/examples/r50_fpn_rpn_1x.py create mode 100644 tools/test.py create mode 100644 tools/train.py diff --git a/TDL.md b/TDL.md new file mode 100644 index 00000000000..1679338c047 --- /dev/null +++ b/TDL.md @@ -0,0 +1,29 @@ +### MMCV +- [ ] Implement the attr 'get' of 'Config' +- [ ] Config bugs: None type to '{}' with addict +- [ ] Default logger should be only with gpu0 +- [ ] Unit Test: mmcv and mmcv.torchpack + + +### MMDetection + +#### Basic +- [ ] Implement training function without distributed +- [ ] Verify nccl/nccl2/gloo +- [ ] Replace UGLY code: params plug in 'args' to reach a global flow +- [ ] Replace 'print' by 'logger' + + +#### Testing +- [ ] Implement distributed testing +- [ ] Implement single gpu testing + + +#### Refactor +- [ ] Re-consider params names +- [ ] Refactor functions in 'core' +- [ ] Merge single test & aug test as one function, so as other redundancy + +#### New features +- [ ] Plug loss params into Config +- [ ] Multi-head communication diff --git a/mmdet/core/__init__.py b/mmdet/core/__init__.py index 7992d8deb3b..52ed690e668 100644 --- a/mmdet/core/__init__.py +++ b/mmdet/core/__init__.py @@ -1,6 +1,9 @@ -from .anchor_generator import * +from .train_engine import * +from .test_engine import * +from .rpn_ops import * from .bbox_ops import * from .mask_ops import * +from .losses import * from .eval import * -from .nn import * -from .targets import * +from .post_processing import * +from .utils import * diff --git a/mmdet/core/bbox_ops/__init__.py b/mmdet/core/bbox_ops/__init__.py index 4bf9aeb74a5..dbdbb970648 100644 --- a/mmdet/core/bbox_ops/__init__.py +++ b/mmdet/core/bbox_ops/__init__.py @@ -1,12 +1,16 @@ from .geometry import bbox_overlaps from .sampling import (random_choice, bbox_assign, bbox_assign_via_overlaps, - bbox_sampling, sample_positives, sample_negatives) + bbox_sampling, sample_positives, sample_negatives, + sample_proposals) from .transforms import (bbox_transform, bbox_transform_inv, bbox_flip, - bbox_mapping, bbox_mapping_back, bbox2roi, roi2bbox) + bbox_mapping, bbox_mapping_back, bbox2roi, roi2bbox, + bbox2result) +from .bbox_target import bbox_target __all__ = [ 'bbox_overlaps', 'random_choice', 'bbox_assign', 'bbox_assign_via_overlaps', 'bbox_sampling', 'sample_positives', 'sample_negatives', 'bbox_transform', 'bbox_transform_inv', 'bbox_flip', - 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox' + 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', + 'bbox_target', 'sample_proposals' ] diff --git a/mmdet/core/bbox_ops/bbox_target.py b/mmdet/core/bbox_ops/bbox_target.py new file mode 100644 index 00000000000..ce1f885e184 --- /dev/null +++ b/mmdet/core/bbox_ops/bbox_target.py @@ -0,0 +1,87 @@ +import mmcv +import torch + +from .geometry import bbox_overlaps +from .transforms import bbox_transform, bbox_transform_inv + + +def bbox_target(pos_proposals_list, + neg_proposals_list, + pos_gt_bboxes_list, + pos_gt_labels_list, + cfg, + reg_num_classes=1, + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0], + return_list=False): + img_per_gpu = len(pos_proposals_list) + all_labels = [] + all_label_weights = [] + all_bbox_targets = [] + all_bbox_weights = [] + for img_id in range(img_per_gpu): + pos_proposals = pos_proposals_list[img_id] + neg_proposals = neg_proposals_list[img_id] + pos_gt_bboxes = pos_gt_bboxes_list[img_id] + pos_gt_labels = pos_gt_labels_list[img_id] + debug_img = debug_imgs[img_id] if cfg.debug else None + labels, label_weights, bbox_targets, bbox_weights = proposal_target_single( + pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels, + reg_num_classes, cfg, target_means, target_stds) + all_labels.append(labels) + all_label_weights.append(label_weights) + all_bbox_targets.append(bbox_targets) + all_bbox_weights.append(bbox_weights) + + if return_list: + return all_labels, all_label_weights, all_bbox_targets, all_bbox_weights + + labels = torch.cat(all_labels, 0) + label_weights = torch.cat(all_label_weights, 0) + bbox_targets = torch.cat(all_bbox_targets, 0) + bbox_weights = torch.cat(all_bbox_weights, 0) + return labels, label_weights, bbox_targets, bbox_weights + + +def proposal_target_single(pos_proposals, + neg_proposals, + pos_gt_bboxes, + pos_gt_labels, + reg_num_classes, + cfg, + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]): + num_pos = pos_proposals.size(0) + num_neg = neg_proposals.size(0) + num_samples = num_pos + num_neg + labels = pos_proposals.new_zeros(num_samples, dtype=torch.long) + label_weights = pos_proposals.new_zeros(num_samples) + bbox_targets = pos_proposals.new_zeros(num_samples, 4) + bbox_weights = pos_proposals.new_zeros(num_samples, 4) + if num_pos > 0: + labels[:num_pos] = pos_gt_labels + pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight + label_weights[:num_pos] = pos_weight + pos_bbox_targets = bbox_transform(pos_proposals, pos_gt_bboxes, + target_means, target_stds) + bbox_targets[:num_pos, :] = pos_bbox_targets + bbox_weights[:num_pos, :] = 1 + if num_neg > 0: + label_weights[-num_neg:] = 1.0 + if reg_num_classes > 1: + bbox_targets, bbox_weights = expand_target(bbox_targets, bbox_weights, + labels, reg_num_classes) + + return labels, label_weights, bbox_targets, bbox_weights + + +def expand_target(bbox_targets, bbox_weights, labels, num_classes): + bbox_targets_expand = bbox_targets.new_zeros((bbox_targets.size(0), + 4 * num_classes)) + bbox_weights_expand = bbox_weights.new_zeros((bbox_weights.size(0), + 4 * num_classes)) + for i in torch.nonzero(labels > 0).squeeze(-1): + start, end = labels[i] * 4, (labels[i] + 1) * 4 + bbox_targets_expand[i, start:end] = bbox_targets[i, :] + bbox_weights_expand[i, start:end] = bbox_weights[i, :] + return bbox_targets_expand, bbox_weights_expand diff --git a/mmdet/core/bbox_ops/sampling.py b/mmdet/core/bbox_ops/sampling.py index 9825e3bd15e..eed82049640 100644 --- a/mmdet/core/bbox_ops/sampling.py +++ b/mmdet/core/bbox_ops/sampling.py @@ -42,7 +42,7 @@ def bbox_assign(proposals, min_pos_iou(float): minimum iou for a bbox to be considered as a positive bbox, for RPN, it is usually set as 0, for Fast R-CNN, it is usually set as pos_iou_thr - crowd_thr: ignore proposals which have iof(intersection over foreground) with + crowd_thr: ignore proposals which have iof(intersection over foreground) with crowd bboxes over crowd_thr Returns: tuple: (assigned_gt_inds, argmax_overlaps, max_overlaps), shape (n, ) @@ -253,3 +253,43 @@ def bbox_sampling(assigned_gt_inds, max_overlaps, neg_balance_thr, neg_hard_fraction) return pos_inds, neg_inds + + + +def sample_proposals(proposals_list, gt_bboxes_list, gt_crowds_list, + gt_labels_list, cfg): + cfg_list = [cfg for _ in range(len(proposals_list))] + results = map(sample_proposals_single, proposals_list, gt_bboxes_list, + gt_crowds_list, gt_labels_list, cfg_list) + # list of tuple to tuple of list + return tuple(map(list, zip(*results))) + + +def sample_proposals_single(proposals, + gt_bboxes, + gt_crowds, + gt_labels, + cfg): + proposals = proposals[:, :4] + assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps = \ + bbox_assign( + proposals, gt_bboxes, gt_crowds, gt_labels, cfg.pos_iou_thr, + cfg.neg_iou_thr, cfg.pos_iou_thr, cfg.crowd_thr) + if cfg.add_gt_as_proposals: + proposals = torch.cat([gt_bboxes, proposals], dim=0) + gt_assign_self = torch.arange( + 1, len(gt_labels) + 1, dtype=torch.long, device=proposals.device) + assigned_gt_inds = torch.cat([gt_assign_self, assigned_gt_inds]) + assigned_labels = torch.cat([gt_labels, assigned_labels]) + + pos_inds, neg_inds = bbox_sampling( + assigned_gt_inds, cfg.roi_batch_size, cfg.pos_fraction, cfg.neg_pos_ub, + cfg.pos_balance_sampling, max_overlaps, cfg.neg_balance_thr) + pos_proposals = proposals[pos_inds] + neg_proposals = proposals[neg_inds] + pos_assigned_gt_inds = assigned_gt_inds[pos_inds] - 1 + pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :] + pos_gt_labels = assigned_labels[pos_inds] + + return (pos_inds, neg_inds, pos_proposals, neg_proposals, + pos_assigned_gt_inds, pos_gt_bboxes, pos_gt_labels) diff --git a/mmdet/core/bbox_ops/transforms.py b/mmdet/core/bbox_ops/transforms.py index 6f83a1dc56e..a9f1e2a45fa 100644 --- a/mmdet/core/bbox_ops/transforms.py +++ b/mmdet/core/bbox_ops/transforms.py @@ -126,3 +126,22 @@ def roi2bbox(rois): bbox = rois[inds, 1:] bbox_list.append(bbox) return bbox_list + + +def bbox2result(bboxes, labels, num_classes): + """Convert detection results to a list of numpy arrays + Args: + bboxes (Tensor): shape (n, 5) + labels (Tensor): shape (n, ) + num_classes (int): class number, including background class + Returns: + list(ndarray): bbox results of each class + """ + if bboxes.shape[0] == 0: + return [ + np.zeros((0, 5), dtype=np.float32) for i in range(num_classes - 1) + ] + else: + bboxes = bboxes.cpu().numpy() + labels = labels.cpu().numpy() + return [bboxes[labels == i, :] for i in range(num_classes - 1)] diff --git a/mmdet/core/losses/__init__.py b/mmdet/core/losses/__init__.py new file mode 100644 index 00000000000..3e4447ff0a6 --- /dev/null +++ b/mmdet/core/losses/__init__.py @@ -0,0 +1,12 @@ +from .losses import ( + weighted_nll_loss, weighted_cross_entropy, weighted_binary_cross_entropy, + sigmoid_focal_loss, weighted_sigmoid_focal_loss, mask_cross_entropy, + weighted_mask_cross_entropy, smooth_l1_loss, weighted_smoothl1, accuracy) + +__all__ = [ + 'weighted_nll_loss', 'weighted_cross_entropy', + 'weighted_binary_cross_entropy', 'sigmoid_focal_loss', + 'weighted_sigmoid_focal_loss', 'mask_cross_entropy', + 'weighted_mask_cross_entropy', 'smooth_l1_loss', 'weighted_smoothl1', + 'accuracy' +] diff --git a/mmdet/core/losses/losses.py b/mmdet/core/losses/losses.py new file mode 100644 index 00000000000..575c91d0536 --- /dev/null +++ b/mmdet/core/losses/losses.py @@ -0,0 +1,110 @@ +# TODO merge naive and weighted loss to one function. +import torch +import torch.nn.functional as F + +from ..bbox_ops import bbox_transform_inv, bbox_overlaps + + +def weighted_nll_loss(pred, label, weight, ave_factor=None): + if ave_factor is None: + ave_factor = max(torch.sum(weight > 0).float().item(), 1.) + raw = F.nll_loss(pred, label, size_average=False, reduce=False) + return torch.sum(raw * weight)[None] / ave_factor + + +def weighted_cross_entropy(pred, label, weight, ave_factor=None): + if ave_factor is None: + ave_factor = max(torch.sum(weight > 0).float().item(), 1.) + raw = F.cross_entropy(pred, label, size_average=False, reduce=False) + return torch.sum(raw * weight)[None] / ave_factor + + +def weighted_binary_cross_entropy(pred, label, weight, ave_factor=None): + if ave_factor is None: + ave_factor = max(torch.sum(weight > 0).float().item(), 1.) + return F.binary_cross_entropy_with_logits( + pred, label.float(), weight.float(), + size_average=False)[None] / ave_factor + + +def sigmoid_focal_loss(pred, + target, + weight, + gamma=2.0, + alpha=0.25, + size_average=True): + pred_sigmoid = pred.sigmoid() + pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) + weight = (alpha * target + (1 - alpha) * (1 - target)) * weight + weight = weight * pt.pow(gamma) + return F.binary_cross_entropy_with_logits( + pred, target, weight, size_average=size_average) + + +def weighted_sigmoid_focal_loss(pred, + target, + weight, + gamma=2.0, + alpha=0.25, + ave_factor=None, + num_classes=80): + if ave_factor is None: + ave_factor = torch.sum(weight > 0).float().item() / num_classes + 1e-6 + return sigmoid_focal_loss( + pred, target, weight, gamma=gamma, alpha=alpha, + size_average=False)[None] / ave_factor + + +def mask_cross_entropy(pred, target, label): + num_rois = pred.size()[0] + inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) + pred_slice = pred[inds, label].squeeze(1) + return F.binary_cross_entropy_with_logits( + pred_slice, target, size_average=True)[None] + + +def weighted_mask_cross_entropy(pred, target, weight, label): + num_rois = pred.size()[0] + num_samples = torch.sum(weight > 0).float().item() + 1e-6 + assert num_samples >= 1 + inds = torch.arange(0, num_rois).long().cuda() + pred_slice = pred[inds, label].squeeze(1) + return F.binary_cross_entropy_with_logits( + pred_slice, target, weight, size_average=False)[None] / num_samples + + +def smooth_l1_loss(pred, target, beta=1.0, size_average=True, reduce=True): + assert beta > 0 + assert pred.size() == target.size() and target.numel() > 0 + diff = torch.abs(pred - target) + loss = torch.where(diff < beta, 0.5 * diff * diff / beta, + diff - 0.5 * beta) + if size_average: + loss /= pred.numel() + if reduce: + loss = loss.sum() + return loss + + +def weighted_smoothl1(pred, target, weight, beta=1.0, ave_factor=None): + if ave_factor is None: + ave_factor = torch.sum(weight > 0).float().item() / 4 + 1e-6 + loss = smooth_l1_loss(pred, target, beta, size_average=False, reduce=False) + return torch.sum(loss * weight)[None] / ave_factor + + +def accuracy(pred, target, topk=1): + if isinstance(topk, int): + topk = (topk, ) + return_single = True + + maxk = max(topk) + _, pred_label = pred.topk(maxk, 1, True, True) + pred_label = pred_label.t() + correct = pred_label.eq(target.view(1, -1).expand_as(pred_label)) + + res = [] + for k in topk: + correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / pred.size(0))) + return res[0] if return_single else res diff --git a/mmdet/core/mask_ops/__init__.py b/mmdet/core/mask_ops/__init__.py index 25850cdc62a..4669ba1f910 100644 --- a/mmdet/core/mask_ops/__init__.py +++ b/mmdet/core/mask_ops/__init__.py @@ -2,9 +2,10 @@ polys_to_mask_wrt_box, polys_to_boxes, rle_mask_voting, rle_mask_nms, rle_masks_to_boxes) from .utils import split_combined_gt_polys +from .mask_target import mask_target __all__ = [ 'flip_segms', 'polys_to_mask', 'mask_to_bbox', 'polys_to_mask_wrt_box', 'polys_to_boxes', 'rle_mask_voting', 'rle_mask_nms', 'rle_masks_to_boxes', - 'split_combined_gt_polys' + 'split_combined_gt_polys', 'mask_target' ] diff --git a/mmdet/core/mask_ops/mask_target.py b/mmdet/core/mask_ops/mask_target.py new file mode 100644 index 00000000000..3fb65e35874 --- /dev/null +++ b/mmdet/core/mask_ops/mask_target.py @@ -0,0 +1,35 @@ +import torch +import numpy as np + +from .segms import polys_to_mask_wrt_box + + +def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_polys_list, + img_meta, cfg): + cfg_list = [cfg for _ in range(len(pos_proposals_list))] + img_metas = [img_meta for _ in range(len(pos_proposals_list))] + mask_targets = map(mask_target_single, pos_proposals_list, + pos_assigned_gt_inds_list, gt_polys_list, img_metas, + cfg_list) + mask_targets = torch.cat(tuple(mask_targets), dim=0) + return mask_targets + + +def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_polys, + img_meta, cfg): + + mask_size = cfg.mask_size + num_pos = pos_proposals.size(0) + mask_targets = pos_proposals.new_zeros((num_pos, mask_size, mask_size)) + if num_pos > 0: + pos_proposals = pos_proposals.cpu().numpy() + pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() + scale_factor = img_meta['scale_factor'][0].cpu().numpy() + for i in range(num_pos): + bbox = pos_proposals[i, :] / scale_factor + polys = gt_polys[pos_assigned_gt_inds[i]] + mask = polys_to_mask_wrt_box(polys, bbox, mask_size) + mask = np.array(mask > 0, dtype=np.float32) + mask_targets[i, ...] = torch.from_numpy(mask).to( + mask_targets.device) + return mask_targets diff --git a/mmdet/core/post_processing/merge_augs.py b/mmdet/core/post_processing/merge_augs.py index 5d56e481e5a..35dfce24f91 100644 --- a/mmdet/core/post_processing/merge_augs.py +++ b/mmdet/core/post_processing/merge_augs.py @@ -1,6 +1,6 @@ import torch -from mmcv.ops import nms +from mmdet.ops import nms import numpy as np from ..bbox_ops import bbox_mapping_back diff --git a/mmdet/core/rpn_ops/__init__.py b/mmdet/core/rpn_ops/__init__.py new file mode 100644 index 00000000000..4d5f9244dde --- /dev/null +++ b/mmdet/core/rpn_ops/__init__.py @@ -0,0 +1,2 @@ +from .anchor_generator import * +from .anchor_target import * diff --git a/mmdet/core/anchor_generator.py b/mmdet/core/rpn_ops/anchor_generator.py similarity index 100% rename from mmdet/core/anchor_generator.py rename to mmdet/core/rpn_ops/anchor_generator.py diff --git a/mmdet/core/rpn_ops/anchor_target.py b/mmdet/core/rpn_ops/anchor_target.py new file mode 100644 index 00000000000..a6bba8ed221 --- /dev/null +++ b/mmdet/core/rpn_ops/anchor_target.py @@ -0,0 +1,159 @@ +import torch +import numpy as np +from ..bbox_ops import (bbox_assign, bbox_transform, bbox_sampling) + + +def anchor_target(anchor_list, valid_flag_list, featmap_sizes, gt_bboxes_list, + img_shapes, target_means, target_stds, cfg): + """Compute anchor regression and classification targets + + Args: + anchor_list(list): anchors of each feature map level + featuremap_sizes(list): feature map sizes + gt_bboxes_list(list): ground truth bbox of images in a mini-batch + img_shapes(list): shape of each image in a mini-batch + cfg(dict): configs + + Returns: + tuple + """ + if len(featmap_sizes) == len(anchor_list): + all_anchors = torch.cat(anchor_list, 0) + anchor_nums = [anchors.size(0) for anchors in anchor_list] + use_isomerism_anchors = False + elif len(img_shapes) == len(anchor_list): + # using different anchors for different images + all_anchors_list = [ + torch.cat(anchor_list[img_id], 0) + for img_id in range(len(img_shapes)) + ] + anchor_nums = [anchors.size(0) for anchors in anchor_list[0]] + use_isomerism_anchors = True + else: + raise ValueError('length of anchor_list should be equal to number of ' + 'feature lvls or number of images in a batch') + all_labels = [] + all_label_weights = [] + all_bbox_targets = [] + all_bbox_weights = [] + num_total_sampled = 0 + for img_id in range(len(img_shapes)): + if isinstance(valid_flag_list[img_id], list): + valid_flags = torch.cat(valid_flag_list[img_id], 0) + else: + valid_flags = valid_flag_list[img_id] + if use_isomerism_anchors: + all_anchors = all_anchors_list[img_id] + inside_flags = anchor_inside_flags(all_anchors, valid_flags, + img_shapes[img_id][:2], + cfg.allowed_border) + if not inside_flags.any(): + return None + gt_bboxes = gt_bboxes_list[img_id] + anchor_targets = anchor_target_single(all_anchors, inside_flags, + gt_bboxes, target_means, + target_stds, cfg) + (labels, label_weights, bbox_targets, bbox_weights, pos_inds, + neg_inds) = anchor_targets + all_labels.append(labels) + all_label_weights.append(label_weights) + all_bbox_targets.append(bbox_targets) + all_bbox_weights.append(bbox_weights) + num_total_sampled += max(pos_inds.numel() + neg_inds.numel(), 1) + all_labels = torch.stack(all_labels, 0) + all_label_weights = torch.stack(all_label_weights, 0) + all_bbox_targets = torch.stack(all_bbox_targets, 0) + all_bbox_weights = torch.stack(all_bbox_weights, 0) + # split into different feature levels + labels_list = [] + label_weights_list = [] + bbox_targets_list = [] + bbox_weights_list = [] + start = 0 + for anchor_num in anchor_nums: + end = start + anchor_num + labels_list.append(all_labels[:, start:end].squeeze(0)) + label_weights_list.append(all_label_weights[:, start:end].squeeze(0)) + bbox_targets_list.append(all_bbox_targets[:, start:end].squeeze(0)) + bbox_weights_list.append(all_bbox_weights[:, start:end].squeeze(0)) + start = end + return (labels_list, label_weights_list, bbox_targets_list, + bbox_weights_list, num_total_sampled) + + +def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means, + target_stds, cfg): + num_total_anchors = all_anchors.size(0) + anchors = all_anchors[inside_flags, :] + assigned_gt_inds, argmax_overlaps, max_overlaps = bbox_assign( + anchors, + gt_bboxes, + pos_iou_thr=cfg.pos_iou_thr, + neg_iou_thr=cfg.neg_iou_thr, + min_pos_iou=cfg.min_pos_iou) + pos_inds, neg_inds = bbox_sampling(assigned_gt_inds, cfg.anchor_batch_size, + cfg.pos_fraction, cfg.neg_pos_ub, + cfg.pos_balance_sampling, max_overlaps, + cfg.neg_balance_thr) + + bbox_targets = torch.zeros_like(anchors) + bbox_weights = torch.zeros_like(anchors) + labels = torch.zeros_like(assigned_gt_inds) + label_weights = torch.zeros_like(assigned_gt_inds, dtype=torch.float) + + if len(pos_inds) > 0: + pos_inds = unique(pos_inds) + pos_anchors = anchors[pos_inds, :] + pos_gt_bbox = gt_bboxes[assigned_gt_inds[pos_inds] - 1, :] + pos_bbox_targets = bbox_transform(pos_anchors, pos_gt_bbox, + target_means, target_stds) + bbox_targets[pos_inds, :] = pos_bbox_targets + bbox_weights[pos_inds, :] = 1.0 + labels[pos_inds] = 1 + if cfg.pos_weight <= 0: + label_weights[pos_inds] = 1.0 + else: + label_weights[pos_inds] = cfg.pos_weight + if len(neg_inds) > 0: + neg_inds = unique(neg_inds) + label_weights[neg_inds] = 1.0 + + # map up to original set of anchors + labels = unmap(labels, num_total_anchors, inside_flags) + label_weights = unmap(label_weights, num_total_anchors, inside_flags) + bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) + bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) + + return (labels, label_weights, bbox_targets, bbox_weights, pos_inds, + neg_inds) + +def anchor_inside_flags(all_anchors, valid_flags, img_shape, allowed_border=0): + img_h, img_w = img_shape.float() + if allowed_border >= 0: + inside_flags = valid_flags & \ + (all_anchors[:, 0] >= -allowed_border) & \ + (all_anchors[:, 1] >= -allowed_border) & \ + (all_anchors[:, 2] < img_w + allowed_border) & \ + (all_anchors[:, 3] < img_h + allowed_border) + else: + inside_flags = valid_flags + return inside_flags + +def unique(tensor): + if tensor.is_cuda: + u_tensor = np.unique(tensor.cpu().numpy()) + return tensor.new_tensor(u_tensor) + else: + return torch.unique(tensor) + +def unmap(data, count, inds, fill=0): + """ Unmap a subset of item (data) back to the original set of items (of + size count) """ + if data.dim() == 1: + ret = data.new_full((count, ), fill) + ret[inds] = data + else: + new_size = (count, ) + data.size()[1:] + ret = data.new_full(new_size, fill) + ret[inds, :] = data + return ret diff --git a/mmdet/core/targets/__init__.py b/mmdet/core/targets/__init__.py deleted file mode 100644 index b3b2567efff..00000000000 --- a/mmdet/core/targets/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .anchor_target import anchor_target -from .bbox_target import bbox_target -from .mask_target import mask_target - -__all__ = ['anchor_target', 'bbox_target', 'mask_target'] diff --git a/mmdet/core/targets/anchor_target.py b/mmdet/core/targets/anchor_target.py deleted file mode 100644 index ec2389f9088..00000000000 --- a/mmdet/core/targets/anchor_target.py +++ /dev/null @@ -1,2 +0,0 @@ -def anchor_target(): - pass diff --git a/mmdet/core/targets/bbox_target.py b/mmdet/core/targets/bbox_target.py deleted file mode 100644 index 49642c22987..00000000000 --- a/mmdet/core/targets/bbox_target.py +++ /dev/null @@ -1,2 +0,0 @@ -def bbox_target(): - pass diff --git a/mmdet/core/targets/mask_target.py b/mmdet/core/targets/mask_target.py deleted file mode 100644 index 4c330e13b81..00000000000 --- a/mmdet/core/targets/mask_target.py +++ /dev/null @@ -1,2 +0,0 @@ -def mask_target(): - pass diff --git a/mmdet/core/test_engine.py b/mmdet/core/test_engine.py new file mode 100644 index 00000000000..4825beda640 --- /dev/null +++ b/mmdet/core/test_engine.py @@ -0,0 +1,14 @@ +from mmdet.datasets import collate +from mmdet.nn.parallel import scatter + +__all__ = ['_data_func'] + +def _data_func(data, gpu_id): + imgs, img_metas = tuple( + scatter(collate([data], samples_per_gpu=1), [gpu_id])[0]) + return dict( + img=imgs, + img_meta=img_metas, + return_loss=False, + return_bboxes=True, + rescale=True) diff --git a/mmdet/core/train_engine.py b/mmdet/core/train_engine.py new file mode 100644 index 00000000000..cc745faad87 --- /dev/null +++ b/mmdet/core/train_engine.py @@ -0,0 +1,40 @@ +import numpy as np +import torch +from collections import OrderedDict +from mmdet.nn.parallel import scatter + + +def parse_losses(losses): + log_vars = OrderedDict() + for loss_key, loss_value in losses.items(): + if isinstance(loss_value, dict): + for _key, _value in loss_value.items(): + if isinstance(_value, list): + _value = sum([_loss.mean() for _loss in _value]) + else: + _value = _value.mean() + log_vars[_keys] = _value + elif isinstance(loss_value, list): + log_vars[loss_key] = sum(_loss.mean() for _loss in loss_value) + else: + log_vars[loss_key] = loss_value.mean() + + loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key) + log_vars['loss'] = loss + for _key, _value in log_vars.items(): + log_vars[_key] = _value.item() + + return loss, log_vars + + +def batch_processor(model, data, train_mode, args=None): + data = scatter(data, [torch.cuda.current_device()])[0] + losses = model(**data) + loss, log_vars = parse_losses(losses) + + outputs = dict( + loss=loss / args.world_size, + log_vars=log_vars, + num_samples=len(data['img'].data)) + + return outputs diff --git a/mmdet/core/utils/__init__.py b/mmdet/core/utils/__init__.py new file mode 100644 index 00000000000..2b6e79d62e6 --- /dev/null +++ b/mmdet/core/utils/__init__.py @@ -0,0 +1,3 @@ +from .dist_utils import * +from .hooks import * +from .misc import * diff --git a/mmdet/core/utils/dist_utils.py b/mmdet/core/utils/dist_utils.py new file mode 100644 index 00000000000..47279c7bf8f --- /dev/null +++ b/mmdet/core/utils/dist_utils.py @@ -0,0 +1,60 @@ +import os +import torch +import torch.multiprocessing as mp +import torch.distributed as dist +from torch.nn.utils import clip_grad +from mmcv.torchpack import Hook, OptimizerStepperHook + +__all__ = [ + 'init_dist', 'average_gradients', 'broadcast_params', + 'DistOptimizerStepperHook', 'DistSamplerSeedHook' +] + + +def init_dist(world_size, + rank, + backend='gloo', + master_ip='127.0.0.1', + port=29500): + if mp.get_start_method(allow_none=True) is None: + mp.set_start_method('spawn') + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(rank % num_gpus) + os.environ['MASTER_ADDR'] = master_ip + os.environ['MASTER_PORT'] = str(port) + if backend == 'nccl': + dist.init_process_group(backend='nccl') + else: + dist.init_process_group( + backend='gloo', rank=rank, world_size=world_size) + + +def average_gradients(model): + for param in model.parameters(): + if param.requires_grad and not (param.grad is None): + dist.all_reduce(param.grad.data) + + +def broadcast_params(model): + for p in model.state_dict().values(): + dist.broadcast(p, 0) + + +class DistOptimizerStepperHook(OptimizerStepperHook): + + def after_train_iter(self, runner): + runner.optimizer.zero_grad() + runner.outputs['loss'].backward() + average_gradients(runner.model) + if self.grad_clip: + clip_grad.clip_grad_norm_( + filter(lambda p: p.requires_grad, runner.model.parameters()), + max_norm=self.max_norm, + norm_type=self.norm_type) + runner.optimizer.step() + + +class DistSamplerSeedHook(Hook): + + def before_epoch(self, runner): + runner.data_loader.sampler.set_epoch(runner.epoch) diff --git a/mmdet/core/hooks.py b/mmdet/core/utils/hooks.py similarity index 96% rename from mmdet/core/hooks.py rename to mmdet/core/utils/hooks.py index 3347639d51a..f97e1fb2906 100644 --- a/mmdet/core/hooks.py +++ b/mmdet/core/utils/hooks.py @@ -7,10 +7,16 @@ import numpy as np import torch from mmcv.torchpack import Hook -from mmdet import collate, scatter +from mmdet.datasets import collate +from mmdet.nn.parallel import scatter from pycocotools.cocoeval import COCOeval -from .eval import eval_recalls +from ..eval import eval_recalls + +__all__ = [ + 'EmptyCacheHook', 'DistEvalHook', 'DistEvalRecallHook', + 'CocoDistEvalmAPHook' +] class EmptyCacheHook(Hook): @@ -237,10 +243,3 @@ def evaluate(self, runner, results): runner.log_buffer.output[field] = cocoEval.stats[0] runner.log_buffer.ready = True os.remove(tmp_file) - - -class CocoDistCascadeEvalmAPHook(CocoDistEvalmAPHook): - - def evaluate(self, runner, results): - results = [res[-1] for res in results] - super(CocoDistCascadeEvalmAPHook, self).evaluate(runner, results) diff --git a/mmdet/core/utils/misc.py b/mmdet/core/utils/misc.py new file mode 100644 index 00000000000..0f9c05e4577 --- /dev/null +++ b/mmdet/core/utils/misc.py @@ -0,0 +1,118 @@ +import subprocess + +import mmcv +import numpy as np +import torch + +__all__ = ['tensor2imgs', 'unique', 'unmap', 'results2json'] + + +def tensor2imgs(tensor, + color_order='RGB', + color_mean=(0.485, 0.456, 0.406), + color_std=(0.229, 0.224, 0.225)): + assert color_order in ['RGB', 'BGR'] + img_per_gpu = tensor.size(0) + color_mean = np.array(color_mean, dtype=np.float32) + color_std = np.array(color_std, dtype=np.float32) + imgs = [] + for img_id in range(img_per_gpu): + img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) + if color_order == 'RGB': + img = mmcv.rgb2bgr(img) + img = img * color_std + color_mean + imgs.append(np.ascontiguousarray(img)) + return imgs + + +def unique(tensor): + if tensor.is_cuda: + u_tensor = np.unique(tensor.cpu().numpy()) + return tensor.new_tensor(u_tensor) + else: + return torch.unique(tensor) + + +def unmap(data, count, inds, fill=0): + """ Unmap a subset of item (data) back to the original set of items (of + size count) """ + if data.dim() == 1: + ret = data.new_full((count, ), fill) + ret[inds] = data + else: + new_size = (count, ) + data.size()[1:] + ret = data.new_full(new_size, fill) + ret[inds, :] = data + return ret + +def xyxy2xywh(bbox): + _bbox = bbox.tolist() + return [ + _bbox[0], + _bbox[1], + _bbox[2] - _bbox[0] + 1, + _bbox[3] - _bbox[1] + 1, + ] + +def det2json(dataset, results): + json_results = [] + for idx in range(len(dataset)): + img_id = dataset.img_ids[idx] + result = results[idx] + for label in range(len(result)): + bboxes = result[label] + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['bbox'] = xyxy2xywh(bboxes[i]) + data['score'] = float(bboxes[i][4]) + data['category_id'] = dataset.cat_ids[label] + json_results.append(data) + return json_results + + +def segm2json(dataset, results): + json_results = [] + for idx in range(len(dataset)): + img_id = dataset.img_ids[idx] + det, seg = results[idx] + for label in range(len(det)): + bboxes = det[label] + segms = seg[label] + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['bbox'] = xyxy2xywh(bboxes[i]) + data['score'] = float(bboxes[i][4]) + data['category_id'] = dataset.cat_ids[label] + segms[i]['counts'] = segms[i]['counts'].decode() + data['segmentation'] = segms[i] + json_results.append(data) + return json_results + + +def proposal2json(dataset, results): + json_results = [] + for idx in range(len(dataset)): + img_id = dataset.img_ids[idx] + bboxes = results[idx] + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['bbox'] = xyxy2xywh(bboxes[i]) + data['score'] = float(bboxes[i][4]) + data['category_id'] = 1 + json_results.append(data) + return json_results + + +def results2json(dataset, results, out_file): + if isinstance(results[0], list): + json_results = det2json(dataset, results) + elif isinstance(results[0], tuple): + json_results = segm2json(dataset, results) + elif isinstance(results[0], np.ndarray): + json_results = proposal2json(dataset, results) + else: + raise TypeError('invalid type of results') + mmcv.dump(json_results, out_file) diff --git a/mmdet/datasets/coco.py b/mmdet/datasets/coco.py index e0705e79b61..a7eedca6a2c 100644 --- a/mmdet/datasets/coco.py +++ b/mmdet/datasets/coco.py @@ -71,7 +71,6 @@ def parse_ann_info(ann_info, cat2label, with_mask=True): class CocoDataset(Dataset): - def __init__(self, ann_file, img_prefix, @@ -253,31 +252,38 @@ def __getitem__(self, idx): def prepare_test_img(self, idx): """Prepare an image for testing (multi-scale and flipping)""" - img_info = self._load_info(idx, with_ann=False) - img_file = osp.join(self.prefix, img_info['file_name']) + img_info = self.img_infos[idx] + img = mmcv.imread(osp.join(self.img_prefix, img_info['file_name'])) proposal = (self.proposals[idx][:, :4] if self.proposals is not None else None) - def prepare_single(img_file, scale, flip, proposal=None): - img_np, shape_scale_np = self.img_transform(img_file, scale, flip) - img, shape_scale = self.numpy2tensor(img_np, shape_scale_np) - img_meta = dict(shape_scale=shape_scale, flip=flip) + def prepare_single(img, scale, flip, proposal=None): + _img, _img_shape, _scale_factor = self.img_transform( + img, scale, flip) + img, img_shape, scale_factor = self.numpy2tensor( + _img, _img_shape, _scale_factor) + ori_shape = (img_info['height'], img_info['width']) + img_meta = dict( + ori_shape=ori_shape, + img_shape=img_shape, + scale_factor=scale_factor, + flip=flip) if proposal is not None: - proposal = self.bbox_transform(proposal, shape_scale_np, flip) + proposal = self.bbox_transform(proposal, _scale_factor, flip) proposal = self.numpy2tensor(proposal) return img, img_meta, proposal imgs = [] img_metas = [] proposals = [] - for scale in self.img_scale: - img, img_meta, proposal = prepare_single(img_file, scale, False, + for scale in self.img_scales: + img, img_meta, proposal = prepare_single(img, scale, False, proposal) imgs.append(img) img_metas.append(img_meta) proposals.append(proposal) if self.flip_ratio > 0: - img, img_meta, prop = prepare_single(img_file, scale, True, + img, img_meta, prop = prepare_single(img, scale, True, proposal) imgs.append(img) img_metas.append(img_meta) diff --git a/mmdet/datasets/data_engine.py b/mmdet/datasets/data_engine.py new file mode 100644 index 00000000000..0c89f21878a --- /dev/null +++ b/mmdet/datasets/data_engine.py @@ -0,0 +1,29 @@ +from functools import partial +import torch +from .coco import CocoDataset +from .collate import collate +from .sampler import GroupSampler, DistributedGroupSampler + + +def build_data(cfg, args): + dataset = CocoDataset(**cfg) + + if args.dist: + sampler = DistributedGroupSampler(dataset, args.img_per_gpu, + args.world_size, args.rank) + batch_size = args.img_per_gpu + num_workers = args.data_workers + else: + sampler = GroupSampler(dataset, args.img_per_gpu) + batch_size = args.world_size * args.img_per_gpu + num_workers = args.world_size * args.data_workers + + loader = torch.utils.data.DataLoader( + dataset, + batch_size=args.img_per_gpu, + sampler=sampler, + num_workers=num_workers, + collate_fn=partial(collate, samples_per_gpu=args.img_per_gpu), + pin_memory=False) + + return loader diff --git a/mmdet/datasets/transforms.py b/mmdet/datasets/transforms.py index 81f3a627d0d..1532fe074f2 100644 --- a/mmdet/datasets/transforms.py +++ b/mmdet/datasets/transforms.py @@ -1,9 +1,8 @@ import mmcv -# import cvbase as cvb import numpy as np import torch -from mmdet.core import segms +from mmdet.core.mask_ops import segms __all__ = [ 'ImageTransform', 'BboxTransform', 'PolyMaskTransform', 'Numpy2Tensor' @@ -64,7 +63,7 @@ def __call__(self, img, scale, flip=False): class ImageCrop(object): """crop image patches and resize patches into fixed size - 1. (read and) flip image (if needed) + 1. (read and) flip image (if needed) 2. crop image patches according to given bboxes 3. resize patches into fixed size (default 224x224) 4. normalize the image (if needed) @@ -126,6 +125,8 @@ def __call__(self, bboxes, img_shape, scale_factor, flip=False): gt_bboxes = bboxes * scale_factor if flip: gt_bboxes = mmcv.bbox_flip(gt_bboxes, img_shape) + gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1]) + gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0]) if self.max_num_gts is None: return gt_bboxes else: @@ -205,4 +206,4 @@ def __call__(self, *args): if len(args) == 1: return torch.from_numpy(args[0]) else: - return tuple([torch.from_numpy(array) for array in args]) + return tuple([torch.from_numpy(np.array(array)) for array in args]) diff --git a/mmdet/models/__init__.py b/mmdet/models/__init__.py index e69de29bb2d..2209550509f 100644 --- a/mmdet/models/__init__.py +++ b/mmdet/models/__init__.py @@ -0,0 +1 @@ +from .detectors import Detector diff --git a/mmdet/models/bbox_heads/__init__.py b/mmdet/models/bbox_heads/__init__.py index e6709af6176..49a86359429 100644 --- a/mmdet/models/bbox_heads/__init__.py +++ b/mmdet/models/bbox_heads/__init__.py @@ -1,3 +1,4 @@ from .bbox_head import BBoxHead +from .convfc_bbox_head import ConvFCRoIHead, SharedFCRoIHead -__all__ = ['BBoxHead'] +__all__ = ['BBoxHead', 'ConvFCRoIHead', 'SharedFCRoIHead'] diff --git a/mmdet/models/bbox_heads/bbox_head.py b/mmdet/models/bbox_heads/bbox_head.py index 9f0c188a459..5f6e1136eed 100644 --- a/mmdet/models/bbox_heads/bbox_head.py +++ b/mmdet/models/bbox_heads/bbox_head.py @@ -1,7 +1,7 @@ import torch.nn as nn import torch.nn.functional as F -from mmdet.core import (bbox_transform_inv, bbox_target, multiclass_nms, +from mmdet.core import (bbox_transform_inv, multiclass_nms, bbox_target, weighted_cross_entropy, weighted_smoothl1, accuracy) @@ -10,7 +10,6 @@ class BBoxHead(nn.Module): regression respectively""" def __init__(self, - exclude_mal_box=True, with_avg_pool=False, with_cls=True, with_reg=True, @@ -31,7 +30,6 @@ def __init__(self, self.target_means = target_means self.target_stds = target_stds self.reg_class_agnostic = reg_class_agnostic - self.exclude_mal_box = exclude_mal_box in_channels = self.in_channels if self.with_avg_pool: @@ -61,7 +59,7 @@ def forward(self, x): bbox_pred = self.fc_reg(x) if self.with_reg else None return cls_score, bbox_pred - def bbox_target(self, pos_proposals, neg_proposals, pos_gt_bboxes, + def get_bbox_target(self, pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels, rcnn_train_cfg): reg_num_classes = 1 if self.reg_class_agnostic else self.num_classes cls_reg_targets = bbox_target( @@ -69,11 +67,10 @@ def bbox_target(self, pos_proposals, neg_proposals, pos_gt_bboxes, neg_proposals, pos_gt_bboxes, pos_gt_labels, - self.target_means, - self.target_stds, rcnn_train_cfg, reg_num_classes, - debug_imgs=self.debug_imgs) + target_means=self.target_means, + target_stds=self.target_stds) return cls_reg_targets def loss(self, cls_score, bbox_pred, labels, label_weights, bbox_targets, @@ -96,6 +93,7 @@ def get_det_bboxes(self, cls_score, bbox_pred, img_shape, + scale_factor, rescale=False, nms_cfg=None): if isinstance(cls_score, list): @@ -111,7 +109,7 @@ def get_det_bboxes(self, # TODO: add clip here if rescale: - bboxes /= img_shape[-1] + bboxes /= scale_factor.float() if nms_cfg is None: return bboxes, scores diff --git a/mmdet/models/bbox_heads/convfc_bbox_head.py b/mmdet/models/bbox_heads/convfc_bbox_head.py new file mode 100644 index 00000000000..02e2a6b6d85 --- /dev/null +++ b/mmdet/models/bbox_heads/convfc_bbox_head.py @@ -0,0 +1,174 @@ +import torch.nn as nn + +from .bbox_head import BBoxHead +from ..utils import ConvModule + + +class ConvFCRoIHead(BBoxHead): + """More general bbox head, with shared conv and fc layers and two optional + separated branches. + + /-> cls convs -> cls fcs -> cls + shared convs -> shared fcs + \-> reg convs -> reg fcs -> reg + """ + + def __init__(self, + num_shared_convs=0, + num_shared_fcs=0, + num_cls_convs=0, + num_cls_fcs=0, + num_reg_convs=0, + num_reg_fcs=0, + conv_out_channels=256, + fc_out_channels=1024, + *args, + **kwargs): + super(ConvFCRoIHead, self).__init__(*args, **kwargs) + assert (num_shared_convs + num_shared_fcs + num_cls_convs + num_cls_fcs + + num_reg_convs + num_reg_fcs > 0) + if num_cls_convs > 0 or num_reg_convs > 0: + assert num_shared_fcs == 0 + if not self.with_cls: + assert num_cls_convs == 0 and num_cls_fcs == 0 + if not self.with_reg: + assert num_reg_convs == 0 and num_reg_fcs == 0 + self.num_shared_convs = num_shared_convs + self.num_shared_fcs = num_shared_fcs + self.num_cls_convs = num_cls_convs + self.num_cls_fcs = num_cls_fcs + self.num_reg_convs = num_reg_convs + self.num_reg_fcs = num_reg_fcs + self.conv_out_channels = conv_out_channels + self.fc_out_channels = fc_out_channels + + # add shared convs and fcs + self.shared_convs, self.shared_fcs, last_layer_dim = self._add_conv_fc_branch( + self.num_shared_convs, self.num_shared_fcs, self.in_channels, True) + self.shared_out_channels = last_layer_dim + + # add cls specific branch + self.cls_convs, self.cls_fcs, self.cls_last_dim = self._add_conv_fc_branch( + self.num_cls_convs, self.num_cls_fcs, self.shared_out_channels) + + # add reg specific branch + self.reg_convs, self.reg_fcs, self.reg_last_dim = self._add_conv_fc_branch( + self.num_reg_convs, self.num_reg_fcs, self.shared_out_channels) + + if self.num_shared_fcs == 0 and not self.with_avg_pool: + if self.num_cls_fcs == 0: + self.cls_last_dim *= (self.roi_feat_size * self.roi_feat_size) + if self.num_reg_fcs == 0: + self.reg_last_dim *= (self.roi_feat_size * self.roi_feat_size) + + self.relu = nn.ReLU(inplace=True) + # reconstruct fc_cls and fc_reg since input channels are changed + if self.with_cls: + self.fc_cls = nn.Linear(self.cls_last_dim, self.num_classes) + if self.with_reg: + out_dim_reg = (4 if self.reg_class_agnostic else + 4 * self.num_classes) + self.fc_reg = nn.Linear(self.reg_last_dim, out_dim_reg) + + def _add_conv_fc_branch(self, + num_branch_convs, + num_branch_fcs, + in_channels, + is_shared=False): + """Add shared or separable branch + + convs -> avg pool (optional) -> fcs + """ + last_layer_dim = in_channels + # add branch specific conv layers + branch_convs = nn.ModuleList() + if num_branch_convs > 0: + for i in range(num_branch_convs): + conv_in_channels = (last_layer_dim + if i == 0 else self.conv_out_channels) + branch_convs.append( + ConvModule( + conv_in_channels, + self.conv_out_channels, + 3, + padding=1, + normalize=self.normalize, + bias=self.with_bias)) + last_layer_dim = self.conv_out_channels + # add branch specific fc layers + branch_fcs = nn.ModuleList() + if num_branch_fcs > 0: + # for shared branch, only consider self.with_avg_pool + # for separated branches, also consider self.num_shared_fcs + if (is_shared + or self.num_shared_fcs == 0) and not self.with_avg_pool: + last_layer_dim *= (self.roi_feat_size * self.roi_feat_size) + for i in range(num_branch_fcs): + fc_in_channels = (last_layer_dim + if i == 0 else self.fc_out_channels) + branch_fcs.append( + nn.Linear(fc_in_channels, self.fc_out_channels)) + last_layer_dim = self.fc_out_channels + return branch_convs, branch_fcs, last_layer_dim + + def init_weights(self): + super(ConvFCRoIHead, self).init_weights() + for module_list in [self.shared_fcs, self.cls_fcs, self.reg_fcs]: + for m in module_list.modules(): + if isinstance(m, nn.Linear): + nn.init.xavier_uniform_(m.weight) + nn.init.constant_(m.bias, 0) + + def forward(self, x): + # shared part + if self.num_shared_convs > 0: + for conv in self.shared_convs: + x = conv(x) + + if self.num_shared_fcs > 0: + if self.with_avg_pool: + x = self.avg_pool(x) + x = x.view(x.size(0), -1) + for fc in self.shared_fcs: + x = self.relu(fc(x)) + # separate branches + x_cls = x + x_reg = x + + for conv in self.cls_convs: + x_cls = conv(x_cls) + if x_cls.dim() > 2: + if self.with_avg_pool: + x_cls = self.avg_pool(x_cls) + x_cls = x_cls.view(x_cls.size(0), -1) + for fc in self.cls_fcs: + x_cls = self.relu(fc(x_cls)) + + for conv in self.reg_convs: + x_reg = conv(x_reg) + if x_reg.dim() > 2: + if self.with_avg_pool: + x_reg = self.avg_pool(x_reg) + x_reg = x_reg.view(x_reg.size(0), -1) + for fc in self.reg_fcs: + x_reg = self.relu(fc(x_reg)) + + cls_score = self.fc_cls(x_cls) if self.with_cls else None + bbox_pred = self.fc_reg(x_reg) if self.with_reg else None + return cls_score, bbox_pred + + +class SharedFCRoIHead(ConvFCRoIHead): + + def __init__(self, num_fcs=2, fc_out_channels=1024, *args, **kwargs): + assert num_fcs >= 1 + super(SharedFCRoIHead, self).__init__( + num_shared_convs=0, + num_shared_fcs=num_fcs, + num_cls_convs=0, + num_cls_fcs=0, + num_reg_convs=0, + num_reg_fcs=0, + fc_out_channels=fc_out_channels, + *args, + **kwargs) diff --git a/mmdet/models/builder.py b/mmdet/models/builder.py index f109d851397..c3b058507fc 100644 --- a/mmdet/models/builder.py +++ b/mmdet/models/builder.py @@ -1,4 +1,5 @@ import mmcv +from mmcv import torchpack from torch import nn from . import (backbones, necks, roi_extractors, rpn_heads, bbox_heads, @@ -11,7 +12,7 @@ def _build_module(cfg, parrent=None): - return cfg if isinstance(cfg, nn.Module) else mmcv.obj_from_dict( + return cfg if isinstance(cfg, nn.Module) else torchpack.obj_from_dict( cfg, parrent) diff --git a/mmdet/models/detectors/__init__.py b/mmdet/models/detectors/__init__.py index e69de29bb2d..5b690f8d77d 100644 --- a/mmdet/models/detectors/__init__.py +++ b/mmdet/models/detectors/__init__.py @@ -0,0 +1 @@ +from .detector import Detector diff --git a/mmdet/models/detectors/two_stage.py b/mmdet/models/detectors/detector.py similarity index 60% rename from mmdet/models/detectors/two_stage.py rename to mmdet/models/detectors/detector.py index 0c057d606fb..80b7d4438cb 100644 --- a/mmdet/models/detectors/two_stage.py +++ b/mmdet/models/detectors/detector.py @@ -2,137 +2,141 @@ import torch.nn as nn from .. import builder -from mmdet.core.utils import tensor2imgs from mmdet.core import (bbox2roi, bbox_mapping, split_combined_gt_polys, - bbox_sampling, multiclass_nms, merge_aug_proposals, - merge_aug_bboxes, merge_aug_masks, bbox2result) + bbox2result, multiclass_nms, merge_aug_proposals, + merge_aug_bboxes, merge_aug_masks, sample_proposals) -class TwoStageDetector(nn.Module): - +class Detector(nn.Module): def __init__(self, backbone, - neck, - rpn_head, - roi_block, - bbox_head, - rpn_train_cfg, - rpn_test_cfg, - rcnn_train_cfg, - rcnn_test_cfg, + neck=None, + rpn_head=None, + roi_block=None, + bbox_head=None, mask_block=None, mask_head=None, + rpn_train_cfg=None, + rpn_test_cfg=None, + rcnn_train_cfg=None, + rcnn_test_cfg=None, pretrained=None): - super(TwoStageDetector, self).__init__() + super(Detector, self).__init__() self.backbone = builder.build_backbone(backbone) - self.neck = builder.build_neck(neck) if neck is not None else None - self.rpn_head = builder.build_rpn_head(rpn_head) - self.bbox_roi_extractor = builder.build_roi_block(roi_block) - self.bbox_head = builder.build_bbox_head(bbox_head) - self.mask_roi_extractor = builder.build_roi_block(mask_block) if ( - mask_block is not None) else None - self.mask_head = builder.build_mask_head(mask_head) if ( - mask_head is not None) else None - self.with_mask = False if self.mask_head is None else True - self.rpn_train_cfg = rpn_train_cfg - self.rpn_test_cfg = rpn_test_cfg - self.rcnn_train_cfg = rcnn_train_cfg - self.rcnn_test_cfg = rcnn_test_cfg + self.with_neck = True if neck is not None else False + if self.with_neck: + self.neck = builder.build_neck(neck) + + self.with_rpn = True if rpn_head is not None else False + if self.with_rpn: + self.rpn_head = builder.build_rpn_head(rpn_head) + self.rpn_train_cfg = rpn_train_cfg + self.rpn_test_cfg = rpn_test_cfg + + self.with_bbox = True if bbox_head is not None else False + if self.with_bbox: + self.bbox_roi_extractor = builder.build_roi_extractor(roi_block) + self.bbox_head = builder.build_bbox_head(bbox_head) + self.rcnn_train_cfg = rcnn_train_cfg + self.rcnn_test_cfg = rcnn_test_cfg + + self.with_mask = True if mask_head is not None else False + if self.with_mask: + self.mask_roi_extractor = builder.build_roi_extractor(mask_block) + self.mask_head = builder.build_mask_head(mask_head) + self.init_weights(pretrained=pretrained) def init_weights(self, pretrained=None): if pretrained is not None: print('load model from: {}'.format(pretrained)) self.backbone.init_weights(pretrained=pretrained) - if self.neck is not None: + if self.with_neck: if isinstance(self.neck, nn.Sequential): for m in self.neck: m.init_weights() else: self.neck.init_weights() - self.rpn_head.init_weights() - self.bbox_roi_extractor.init_weights() - self.bbox_head.init_weights() - if self.mask_roi_extractor is not None: + if self.with_rpn: + self.rpn_head.init_weights() + if self.with_bbox: + self.bbox_roi_extractor.init_weights() + self.bbox_head.init_weights() + if self.with_mask: self.mask_roi_extractor.init_weights() - if self.mask_head is not None: self.mask_head.init_weights() def forward(self, img, img_meta, gt_bboxes=None, + proposals=None, gt_labels=None, - gt_ignore=None, - gt_polys=None, + gt_bboxes_ignore=None, + gt_mask_polys=None, gt_poly_lens=None, num_polys_per_mask=None, return_loss=True, - return_bboxes=False, + return_bboxes=True, rescale=False): - if not return_loss: - return self.test(img, img_meta, rescale) + assert proposals is not None or self.with_rpn, "Only one of proposals file and RPN can exist." - if not self.with_mask: - assert (gt_polys is None and gt_poly_lens is None - and num_polys_per_mask is None) + if not return_loss: + return self.test(img, img_meta, proposals, rescale) else: - assert (gt_polys is not None and gt_poly_lens is not None - and num_polys_per_mask is not None) - gt_polys = split_combined_gt_polys(gt_polys, gt_poly_lens, - num_polys_per_mask) - - if self.rpn_train_cfg.get('debug', False): - self.rpn_head.debug_imgs = tensor2imgs(img) - if self.rcnn_train_cfg.get('debug', False): - self.bbox_head.debug_imgs = tensor2imgs(img) - if self.mask_head is not None: - self.mask_head.debug_imgs = tensor2imgs(img) - - img_shapes = img_meta['shape_scale'] + losses = dict() + img_shapes = img_meta['img_shape'] x = self.backbone(img) - if self.neck is not None: + + if self.with_neck: x = self.neck(x) - rpn_outs = self.rpn_head(x) - proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg) - proposal_list = self.rpn_head.get_proposals(*proposal_inputs) + if self.with_rpn: + rpn_outs = self.rpn_head(x) + rpn_loss_inputs = rpn_outs + (gt_bboxes, img_shapes, + self.rpn_train_cfg) + rpn_losses = self.rpn_head.loss(*rpn_loss_inputs) + losses.update(rpn_losses) - (pos_inds, neg_inds, pos_proposals, neg_proposals, - pos_assigned_gt_inds, pos_gt_bboxes, pos_gt_labels) = bbox_sampling( - proposal_list, gt_bboxes, gt_ignore, gt_labels, - self.rcnn_train_cfg) + if self.with_bbox: + if self.with_rpn: + proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg) + proposal_list = self.rpn_head.get_proposals(*proposal_inputs) + else: + proposal_list = proposals - labels, label_weights, bbox_targets, bbox_weights = \ - self.bbox_head.proposal_target( - pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels, - self.rcnn_train_cfg) + (pos_inds, neg_inds, pos_proposals, neg_proposals, + pos_assigned_gt_inds, + pos_gt_bboxes, pos_gt_labels) = sample_proposals( + proposal_list, gt_bboxes, gt_bboxes_ignore, gt_labels, + self.rcnn_train_cfg) - rois = bbox2roi([ - torch.cat([pos, neg], dim=0) - for pos, neg in zip(pos_proposals, neg_proposals) - ]) - # TODO: a more flexible way to configurate feat maps - roi_feats = self.bbox_roi_extractor( - x[:self.bbox_roi_extractor.num_inputs], rois) - cls_score, bbox_pred = self.bbox_head(roi_feats) + labels, label_weights, bbox_targets, bbox_weights = \ + self.bbox_head.get_bbox_target( + pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels, + self.rcnn_train_cfg) - losses = dict() - rpn_loss_inputs = rpn_outs + (gt_bboxes, img_shapes, - self.rpn_train_cfg) - rpn_losses = self.rpn_head.loss(*rpn_loss_inputs) - losses.update(rpn_losses) + rois = bbox2roi([ + torch.cat([pos, neg], dim=0) + for pos, neg in zip(pos_proposals, neg_proposals) + ]) + # TODO: a more flexible way to configurate feat maps + roi_feats = self.bbox_roi_extractor( + x[:self.bbox_roi_extractor.num_inputs], rois) + cls_score, bbox_pred = self.bbox_head(roi_feats) - loss_bbox = self.bbox_head.loss(cls_score, bbox_pred, labels, - label_weights, bbox_targets, - bbox_weights) - losses.update(loss_bbox) + loss_bbox = self.bbox_head.loss(cls_score, bbox_pred, labels, + label_weights, bbox_targets, + bbox_weights) + losses.update(loss_bbox) if self.with_mask: - mask_targets = self.mask_head.mask_target( - pos_proposals, pos_assigned_gt_inds, gt_polys, img_shapes, + gt_polys = split_combined_gt_polys(gt_mask_polys, gt_poly_lens, + num_polys_per_mask) + mask_targets = self.mask_head.get_mask_target( + pos_proposals, pos_assigned_gt_inds, gt_polys, img_meta, self.rcnn_train_cfg) pos_rois = bbox2roi(pos_proposals) mask_feats = self.mask_roi_extractor( @@ -142,36 +146,40 @@ def forward(self, torch.cat(pos_gt_labels)) return losses - def test(self, imgs, img_metas, rescale=False): + def test(self, imgs, img_metas, proposals=None, rescale=False): """Test w/ or w/o augmentations.""" assert isinstance(imgs, list) and isinstance(img_metas, list) assert len(imgs) == len(img_metas) img_per_gpu = imgs[0].size(0) assert img_per_gpu == 1 if len(imgs) == 1: - return self.simple_test(imgs[0], img_metas[0], rescale) + return self.simple_test(imgs[0], img_metas[0], proposals, rescale) else: - return self.aug_test(imgs, img_metas, rescale) - - def simple_test_bboxes(self, x, img_meta, rescale=False): - """Test only det bboxes without augmentation.""" + return self.aug_test(imgs, img_metas, proposals, rescale) - img_shapes = img_meta['shape_scale'] + def simple_test_rpn(self, x, img_meta): + img_shapes = img_meta['img_shape'] + scale_factor = img_meta['scale_factor'] rpn_outs = self.rpn_head(x) proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg) - proposal_list = self.rpn_head.get_proposals(*proposal_inputs) + proposal_list = self.rpn_head.get_proposals(*proposal_inputs)[0] + return proposal_list - rois = bbox2roi(proposal_list) + def simple_test_bboxes(self, x, img_meta, proposals, rescale=False): + """Test only det bboxes without augmentation.""" + rois = bbox2roi(proposals) roi_feats = self.bbox_roi_extractor( x[:len(self.bbox_roi_extractor.featmap_strides)], rois) cls_score, bbox_pred = self.bbox_head(roi_feats) # image shape of the first image in the batch (only one) - img_shape = img_shapes[0] + img_shape = img_meta['img_shape'][0] + scale_factor = img_meta['scale_factor'] det_bboxes, det_labels = self.bbox_head.get_det_bboxes( rois, cls_score, bbox_pred, img_shape, + scale_factor, rescale=rescale, nms_cfg=self.rcnn_test_cfg) return det_bboxes, det_labels @@ -183,41 +191,52 @@ def simple_test_mask(self, det_labels, rescale=False): # image shape of the first image in the batch (only one) - img_shape = img_meta['shape_scale'][0] + img_shape = img_meta['img_shape'][0] + scale_factor = img_meta['scale_factor'] if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] else: # if det_bboxes is rescaled to the original image size, we need to # rescale it back to the testing scale to obtain RoIs. - _bboxes = (det_bboxes[:, :4] * img_shape[-1] + _bboxes = (det_bboxes[:, :4] * scale_factor.float() if rescale else det_bboxes) mask_rois = bbox2roi([_bboxes]) mask_feats = self.mask_roi_extractor( x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois) mask_pred = self.mask_head(mask_feats) segm_result = self.mask_head.get_seg_masks( - mask_pred, det_bboxes, det_labels, img_shape, - self.rcnn_test_cfg, rescale) + mask_pred, + det_bboxes, + det_labels, + self.rcnn_test_cfg, + ori_scale=img_meta['ori_shape']) return segm_result - def simple_test(self, img, img_meta, rescale=False): + def simple_test(self, img, img_meta, proposals=None, rescale=False): """Test without augmentation.""" # get feature maps x = self.backbone(img) - if self.neck is not None: + if self.with_neck: x = self.neck(x) - det_bboxes, det_labels = self.simple_test_bboxes( - x, img_meta, rescale=rescale) - bbox_result = bbox2result(det_bboxes, det_labels, - self.bbox_head.num_classes) - if not self.with_mask: - return bbox_result - - segm_result = self.simple_test_mask( - x, img_meta, det_bboxes, det_labels, rescale=rescale) + if self.with_rpn: + proposals = self.simple_test_rpn(x, img_meta) + if self.with_bbox: + # BUG proposals shape? + det_bboxes, det_labels = self.simple_test_bboxes( + x, img_meta, [proposals], rescale=rescale) + bbox_result = bbox2result(det_bboxes, det_labels, + self.bbox_head.num_classes) + if not self.with_mask: + return bbox_result - return bbox_result, segm_result + segm_result = self.simple_test_mask( + x, img_meta, det_bboxes, det_labels, rescale=rescale) + return bbox_result, segm_result + else: + proposals[:, :4] /= img_meta['scale_factor'].float() + return proposals.cpu().numpy() + # TODO aug test haven't been verified def aug_test_bboxes(self, imgs, img_metas): """Test with augmentations for det bboxes.""" # step 1: get RPN proposals for augmented images, apply NMS to the diff --git a/mmdet/models/detectors/rpn.py b/mmdet/models/detectors/rpn.py deleted file mode 100644 index 6d80c9d9b10..00000000000 --- a/mmdet/models/detectors/rpn.py +++ /dev/null @@ -1,100 +0,0 @@ -import torch.nn as nn - -from mmdet.core import tensor2imgs, merge_aug_proposals, bbox_mapping -from .. import builder - - -class RPN(nn.Module): - - def __init__(self, - backbone, - neck, - rpn_head, - rpn_train_cfg, - rpn_test_cfg, - pretrained=None): - super(RPN, self).__init__() - self.backbone = builder.build_backbone(backbone) - self.neck = builder.build_neck(neck) if neck is not None else None - self.rpn_head = builder.build_rpn_head(rpn_head) - self.rpn_train_cfg = rpn_train_cfg - self.rpn_test_cfg = rpn_test_cfg - self.init_weights(pretrained=pretrained) - - def init_weights(self, pretrained=None): - if pretrained is not None: - print('load model from: {}'.format(pretrained)) - self.backbone.init_weights(pretrained=pretrained) - if self.neck is not None: - self.neck.init_weights() - self.rpn_head.init_weights() - - def forward(self, - img, - img_meta, - gt_bboxes=None, - return_loss=True, - return_bboxes=False, - rescale=False): - if not return_loss: - return self.test(img, img_meta, rescale) - - img_shapes = img_meta['shape_scale'] - - if self.rpn_train_cfg.get('debug', False): - self.rpn_head.debug_imgs = tensor2imgs(img) - - x = self.backbone(img) - if self.neck is not None: - x = self.neck(x) - rpn_outs = self.rpn_head(x) - - rpn_loss_inputs = rpn_outs + (gt_bboxes, img_shapes, - self.rpn_train_cfg) - losses = self.rpn_head.loss(*rpn_loss_inputs) - return losses - - def test(self, imgs, img_metas, rescale=False): - """Test w/ or w/o augmentations.""" - assert isinstance(imgs, list) and isinstance(img_metas, list) - assert len(imgs) == len(img_metas) - img_per_gpu = imgs[0].size(0) - assert img_per_gpu == 1 - if len(imgs) == 1: - return self.simple_test(imgs[0], img_metas[0], rescale) - else: - return self.aug_test(imgs, img_metas, rescale) - - def simple_test(self, img, img_meta, rescale=False): - img_shapes = img_meta['shape_scale'] - # get feature maps - x = self.backbone(img) - if self.neck is not None: - x = self.neck(x) - rpn_outs = self.rpn_head(x) - proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg) - proposals = self.rpn_head.get_proposals(*proposal_inputs)[0] - if rescale: - proposals[:, :4] /= img_shapes[0][-1] - return proposals.cpu().numpy() - - def aug_test(self, imgs, img_metas, rescale=False): - aug_proposals = [] - for img, img_meta in zip(imgs, img_metas): - x = self.backbone(img) - if self.neck is not None: - x = self.neck(x) - rpn_outs = self.rpn_head(x) - proposal_inputs = rpn_outs + (img_meta['shape_scale'], - self.rpn_test_cfg) - proposal_list = self.rpn_head.get_proposals(*proposal_inputs) - assert len(proposal_list) == 1 - aug_proposals.append(proposal_list[0]) # len(proposal_list) = 1 - merged_proposals = merge_aug_proposals(aug_proposals, img_metas, - self.rpn_test_cfg) - if not rescale: - img_shape = img_metas[0]['shape_scale'][0] - flip = img_metas[0]['flip'][0] - merged_proposals[:, :4] = bbox_mapping(merged_proposals[:, :4], - img_shape, flip) - return merged_proposals.cpu().numpy() diff --git a/mmdet/models/mask_heads/fcn_mask_head.py b/mmdet/models/mask_heads/fcn_mask_head.py index 28865a68f00..016c05204bd 100644 --- a/mmdet/models/mask_heads/fcn_mask_head.py +++ b/mmdet/models/mask_heads/fcn_mask_head.py @@ -3,10 +3,9 @@ import pycocotools.mask as mask_util import torch import torch.nn as nn -import torch.utils.checkpoint as cp -from ..common import ConvModule -from mmdet.core import mask_target, mask_cross_entropy +from ..utils import ConvModule +from mmdet.core import mask_cross_entropy, mask_target class FCNMaskHead(nn.Module): @@ -21,7 +20,6 @@ def __init__(self, upsample_ratio=2, num_classes=81, class_agnostic=False, - with_cp=False, normalize=None): super(FCNMaskHead, self).__init__() if upsample_method not in [None, 'deconv', 'nearest', 'bilinear']: @@ -39,7 +37,6 @@ def __init__(self, self.class_agnostic = class_agnostic self.normalize = normalize self.with_bias = normalize is None - self.with_cp = with_cp self.convs = nn.ModuleList() for i in range(self.num_convs): @@ -79,25 +76,9 @@ def init_weights(self): m.weight, mode='fan_out', nonlinearity='relu') nn.init.constant_(m.bias, 0) - def convs_forward(self, x): - - def m_lvl_convs_forward(x): - for conv in self.convs[1:-1]: - x = conv(x) - return x - - if self.num_convs > 0: - x = self.convs[0](x) - if self.num_convs > 1: - if self.with_cp and x.requires_grad: - x = cp.checkpoint(m_lvl_convs_forward, x) - else: - x = m_lvl_convs_forward(x) - x = self.convs[-1](x) - return x - def forward(self, x): - x = self.convs_forward(x) + for conv in self.convs: + x = conv(x) if self.upsample is not None: x = self.upsample(x) if self.upsample_method == 'deconv': @@ -105,24 +86,18 @@ def forward(self, x): mask_pred = self.conv_logits(x) return mask_pred - def mask_target(self, pos_proposals, pos_assigned_gt_inds, gt_masks, - img_shapes, rcnn_train_cfg): + def get_mask_target(self, pos_proposals, pos_assigned_gt_inds, gt_masks, + img_meta, rcnn_train_cfg): mask_targets = mask_target(pos_proposals, pos_assigned_gt_inds, - gt_masks, img_shapes, rcnn_train_cfg) + gt_masks, img_meta, rcnn_train_cfg) return mask_targets def loss(self, mask_pred, mask_targets, labels): loss_mask = mask_cross_entropy(mask_pred, mask_targets, labels) return loss_mask - def get_seg_masks(self, - mask_pred, - det_bboxes, - det_labels, - img_shape, - rcnn_test_cfg, - ori_scale, - rescale=True): + def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg, + ori_scale): """Get segmentation masks from mask_pred and bboxes Args: mask_pred (Tensor or ndarray): shape (n, #class+1, h, w). @@ -143,14 +118,11 @@ def get_seg_masks(self, cls_segms = [[] for _ in range(self.num_classes - 1)] bboxes = det_bboxes.cpu().numpy()[:, :4] labels = det_labels.cpu().numpy() + 1 - scale_factor = img_shape[-1] if rescale else 1.0 - img_h = ori_scale['height'] if rescale else np.round( - ori_scale['height'].item() * img_shape[-1].item()).astype(np.int32) - img_w = ori_scale['width'] if rescale else np.round( - ori_scale['width'].item() * img_shape[-1].item()).astype(np.int32) + img_h = ori_scale[0] + img_w = ori_scale[1] for i in range(bboxes.shape[0]): - bbox = (bboxes[i, :] / float(scale_factor)).astype(int) + bbox = bboxes[i, :].astype(int) label = labels[i] w = bbox[2] - bbox[0] + 1 h = bbox[3] - bbox[1] + 1 @@ -164,7 +136,7 @@ def get_seg_masks(self, im_mask = np.zeros((img_h, img_w), dtype=np.float32) - im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = mmcv.resize( + im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = mmcv.imresize( mask_pred_, (w, h)) # im_mask = cv2.resize(im_mask, (img_w, img_h)) im_mask = np.array( diff --git a/mmdet/models/necks/fpn.py b/mmdet/models/necks/fpn.py index c4734e18621..8b5b49826ba 100644 --- a/mmdet/models/necks/fpn.py +++ b/mmdet/models/necks/fpn.py @@ -1,7 +1,7 @@ import torch.nn as nn import torch.nn.functional as F -from ..common import ConvModule -from ..weight_init import xavier_init +from ..utils import ConvModule +from ..utils import xavier_init class FPN(nn.Module): diff --git a/mmdet/models/rpn_heads/rpn_head.py b/mmdet/models/rpn_heads/rpn_head.py index f2fce9ebe7a..7ffd441f694 100644 --- a/mmdet/models/rpn_heads/rpn_head.py +++ b/mmdet/models/rpn_heads/rpn_head.py @@ -9,8 +9,8 @@ weighted_cross_entropy, weighted_smoothl1, weighted_binary_cross_entropy) from mmdet.ops import nms -from ..misc import multi_apply -from ..weight_init import normal_init +from ..utils import multi_apply +from ..utils import normal_init class RPNHead(nn.Module): diff --git a/mmdet/models/common/__init__.py b/mmdet/models/utils/__init__.py similarity index 71% rename from mmdet/models/common/__init__.py rename to mmdet/models/utils/__init__.py index 1a611c25106..f11af964480 100644 --- a/mmdet/models/common/__init__.py +++ b/mmdet/models/utils/__init__.py @@ -1,4 +1,6 @@ from .conv_module import ConvModule from .norm import build_norm_layer +from .misc import * +from .weight_init import * __all__ = ['ConvModule', 'build_norm_layer'] diff --git a/mmdet/models/common/conv_module.py b/mmdet/models/utils/conv_module.py similarity index 100% rename from mmdet/models/common/conv_module.py rename to mmdet/models/utils/conv_module.py diff --git a/mmdet/models/misc.py b/mmdet/models/utils/misc.py similarity index 100% rename from mmdet/models/misc.py rename to mmdet/models/utils/misc.py diff --git a/mmdet/models/common/norm.py b/mmdet/models/utils/norm.py similarity index 100% rename from mmdet/models/common/norm.py rename to mmdet/models/utils/norm.py diff --git a/mmdet/models/weight_init.py b/mmdet/models/utils/weight_init.py similarity index 100% rename from mmdet/models/weight_init.py rename to mmdet/models/utils/weight_init.py diff --git a/mmdet/nn/parallel/scatter_gather.py b/mmdet/nn/parallel/scatter_gather.py index 82511fd1db1..47f794e8916 100644 --- a/mmdet/nn/parallel/scatter_gather.py +++ b/mmdet/nn/parallel/scatter_gather.py @@ -1,7 +1,7 @@ import torch from ._functions import Scatter from torch.nn.parallel._functions import Scatter as OrigScatter -from detkit.datasets.utils import DataContainer +from mmdet.datasets.utils import DataContainer def scatter(inputs, target_gpus, dim=0): diff --git a/tools/eval.py b/tools/eval.py new file mode 100644 index 00000000000..20cc571e94b --- /dev/null +++ b/tools/eval.py @@ -0,0 +1,265 @@ +from argparse import ArgumentParser +from multiprocessing import Pool +import matplotlib.pyplot as plt +import numpy as np +import copy +import os + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval + + +def generate_area_range(splitRng=32, stop_size=128): + areaRng = [[0**2, 1e5**2], [0**2, 32**2], [32**2, 96**2], [96**2, 1e5**2]] + start = 0 + while start < stop_size: + end = start + splitRng + areaRng.append([start * start, end * end]) + start = end + areaRng.append([start * start, 1e5**2]) + return areaRng + + +def print_summarize(iouThr=None, + iouThrs=None, + precision=None, + recall=None, + areaRng_id=4, + areaRngs=None, + maxDets_id=2, + maxDets=None): + assert (precision is not None) or (recall is not None) + iStr = ' {:<18} {} @[ IoU={:<9} | size={:>5}-{:>5} | maxDets={:>3d} ] = {:0.3f}' + titleStr = 'Average Precision' if precision is not None else 'Average Recall' + typeStr = '(AP)' if precision is not None else '(AR)' + iouStr = '{:0.2f}:{:0.2f}'.format(iouThrs[0], iouThrs[-1]) \ + if iouThr is None else '{:0.2f}'.format(iouThr) + + aind = [areaRng_id] + mind = [maxDets_id] + if precision is not None: + # dimension of precision: [TxRxKxAxM] + s = precision + # IoU + if iouThr is not None: + t = np.where(iouThr == iouThrs)[0] + s = s[t] + s = s[:, :, :, aind, mind] + else: + # dimension of recall: [TxKxAxM] + s = recall + if iouThr is not None: + t = np.where(iouThr == iouThrs)[0] + s = s[t] + s = s[:, :, aind, mind] + if len(s[s > -1]) == 0: + mean_s = -1 + else: + mean_s = np.mean(s[s > -1]) + print( + iStr.format( + titleStr, typeStr, iouStr, np.sqrt(areaRngs[areaRng_id][0]), + np.sqrt(areaRngs[areaRng_id][1]) + if np.sqrt(areaRngs[areaRng_id][1]) < 999 else 'max', + maxDets[maxDets_id], mean_s)) + + +def eval_results(res_file, ann_file, res_types, splitRng): + for res_type in res_types: + assert res_type in ['proposal', 'bbox', 'segm', 'keypoints'] + + areaRng = generate_area_range(splitRng) + cocoGt = COCO(ann_file) + cocoDt = cocoGt.loadRes(res_file) + imgIds = cocoGt.getImgIds() + for res_type in res_types: + iou_type = 'bbox' if res_type == 'proposal' else res_type + cocoEval = COCOeval(cocoGt, cocoDt, iou_type) + cocoEval.params.imgIds = imgIds + if res_type == 'proposal': + cocoEval.params.useCats = 0 + cocoEval.params.maxDets = [100, 300, 1000] + cocoEval.params.areaRng = areaRng + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + ps = cocoEval.eval['precision'] + rc = cocoEval.eval['recall'] + for i in range(len(areaRng)): + print_summarize(None, cocoEval.params.iouThrs, ps, None, i, + areaRng, 2, cocoEval.params.maxDets) + + +def makeplot(rs, ps, outDir, class_name): + cs = np.vstack([ + np.ones((2, 3)), + np.array([.31, .51, .74]), + np.array([.75, .31, .30]), + np.array([.36, .90, .38]), + np.array([.50, .39, .64]), + np.array([1, .6, 0]) + ]) + areaNames = ['all', 'small', 'medium', 'large'] + types = ['C75', 'C50', 'Loc', 'Sim', 'Oth', 'BG', 'FN'] + for i in range(len(areaNames)): + area_ps = ps[..., i, 0] + figure_tile = class_name + '-' + areaNames[i] + aps = [ps_.mean() for ps_ in area_ps] + ps_curve = [ + ps_.mean(axis=1) if ps_.ndim > 1 else ps_ for ps_ in area_ps + ] + ps_curve.insert(0, np.zeros(ps_curve[0].shape)) + fig = plt.figure() + ax = plt.subplot(111) + for k in range(len(types)): + ax.plot(rs, ps_curve[k + 1], color=[0, 0, 0], linewidth=0.5) + ax.fill_between( + rs, + ps_curve[k], + ps_curve[k + 1], + color=cs[k], + label=str('[{:.3f}'.format(aps[k]) + ']' + types[k])) + plt.xlabel('recall') + plt.ylabel('precision') + plt.xlim(0, 1.) + plt.ylim(0, 1.) + plt.title(figure_tile) + plt.legend() + # plt.show() + fig.savefig(outDir + '/{}.png'.format(figure_tile)) + plt.close(fig) + + +def analyze_individual_category(k, cocoDt, cocoGt, catId, iou_type): + nm = cocoGt.loadCats(catId)[0] + print('--------------analyzing {}-{}---------------'.format( + k + 1, nm['name'])) + ps_ = {} + dt = copy.deepcopy(cocoDt) + nm = cocoGt.loadCats(catId)[0] + imgIds = cocoGt.getImgIds() + dt_anns = dt.dataset['annotations'] + select_dt_anns = [] + for ann in dt_anns: + if ann['category_id'] == catId: + select_dt_anns.append(ann) + dt.dataset['annotations'] = select_dt_anns + dt.createIndex() + # compute precision but ignore superclass confusion + gt = copy.deepcopy(cocoGt) + child_catIds = gt.getCatIds(supNms=[nm['supercategory']]) + for idx, ann in enumerate(gt.dataset['annotations']): + if (ann['category_id'] in child_catIds + and ann['category_id'] != catId): + gt.dataset['annotations'][idx]['ignore'] = 1 + gt.dataset['annotations'][idx]['iscrowd'] = 1 + gt.dataset['annotations'][idx]['category_id'] = catId + cocoEval = COCOeval(gt, copy.deepcopy(dt), iou_type) + cocoEval.params.imgIds = imgIds + cocoEval.params.maxDets = [100] + cocoEval.params.iouThrs = [.1] + cocoEval.params.useCats = 1 + cocoEval.evaluate() + cocoEval.accumulate() + ps_supercategory = cocoEval.eval['precision'][0, :, k, :, :] + ps_['ps_supercategory'] = ps_supercategory + # compute precision but ignore any class confusion + gt = copy.deepcopy(cocoGt) + for idx, ann in enumerate(gt.dataset['annotations']): + if ann['category_id'] != catId: + gt.dataset['annotations'][idx]['ignore'] = 1 + gt.dataset['annotations'][idx]['iscrowd'] = 1 + gt.dataset['annotations'][idx]['category_id'] = catId + cocoEval = COCOeval(gt, copy.deepcopy(dt), iou_type) + cocoEval.params.imgIds = imgIds + cocoEval.params.maxDets = [100] + cocoEval.params.iouThrs = [.1] + cocoEval.params.useCats = 1 + cocoEval.evaluate() + cocoEval.accumulate() + ps_allcategory = cocoEval.eval['precision'][0, :, k, :, :] + ps_['ps_allcategory'] = ps_allcategory + return k, ps_ + + +def analyze_results(res_file, ann_file, res_types, out_dir): + for res_type in res_types: + assert res_type in ['bbox', 'segm'] + + directory = os.path.dirname(out_dir + '/') + if not os.path.exists(directory): + print('-------------create {}-----------------'.format(out_dir)) + os.makedirs(directory) + + cocoGt = COCO(ann_file) + cocoDt = cocoGt.loadRes(res_file) + imgIds = cocoGt.getImgIds() + for res_type in res_types: + iou_type = res_type + cocoEval = COCOeval( + copy.deepcopy(cocoGt), copy.deepcopy(cocoDt), iou_type) + cocoEval.params.imgIds = imgIds + cocoEval.params.iouThrs = [.75, .5, .1] + cocoEval.params.maxDets = [100] + cocoEval.evaluate() + cocoEval.accumulate() + ps = cocoEval.eval['precision'] + ps = np.vstack([ps, np.zeros((4, *ps.shape[1:]))]) + catIds = cocoGt.getCatIds() + recThrs = cocoEval.params.recThrs + with Pool(processes=48) as pool: + args = [(k, cocoDt, cocoGt, catId, iou_type) + for k, catId in enumerate(catIds)] + analyze_results = pool.starmap(analyze_individual_category, args) + for k, catId in enumerate(catIds): + nm = cocoGt.loadCats(catId)[0] + print('--------------saving {}-{}---------------'.format( + k + 1, nm['name'])) + analyze_result = analyze_results[k] + assert k == analyze_result[0] + ps_supercategory = analyze_result[1]['ps_supercategory'] + ps_allcategory = analyze_result[1]['ps_allcategory'] + # compute precision but ignore superclass confusion + ps[3, :, k, :, :] = ps_supercategory + # compute precision but ignore any class confusion + ps[4, :, k, :, :] = ps_allcategory + # fill in background and false negative errors and plot + ps[ps == -1] = 0 + ps[5, :, k, :, :] = (ps[4, :, k, :, :] > 0) + ps[6, :, k, :, :] = 1.0 + makeplot(recThrs, ps[:, :, k], out_dir, nm['name']) + makeplot(recThrs, ps, out_dir, 'all') + + +def main(): + parser = ArgumentParser(description='COCO Evaluation') + parser.add_argument('result', help='result file path') + parser.add_argument( + '--ann', + default='/mnt/SSD/dataset/coco/annotations/instances_minival2017.json', + help='annotation file path') + parser.add_argument( + '--types', type=str, nargs='+', default=['bbox'], help='result types') + parser.add_argument( + '--analyze', action='store_true', help='whether to analyze results') + parser.add_argument( + '--out_dir', + type=str, + default=None, + help='dir to save analyze result images') + parser.add_argument( + '--splitRng', + type=int, + default=32, + help='range to split area in evaluation') + args = parser.parse_args() + if not args.analyze: + eval_results(args.result, args.ann, args.types, splitRng=args.splitRng) + else: + assert args.out_dir is not None + analyze_results( + args.result, args.ann, args.types, out_dir=args.out_dir) + + +if __name__ == '__main__': + main() diff --git a/tools/examples/r50_fpn_frcnn_1x.py b/tools/examples/r50_fpn_frcnn_1x.py new file mode 100644 index 00000000000..6814445f8e1 --- /dev/null +++ b/tools/examples/r50_fpn_frcnn_1x.py @@ -0,0 +1,125 @@ +# model settings +model = dict( + pretrained= + '/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth', + backbone=dict( + type='resnet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='fb'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + coarsest_stride=32, + anchor_scales=[8], + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0], + use_sigmoid_cls=True), + roi_block=dict( + type='SingleLevelRoI', + roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='SharedFCRoIHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=False)) +meta_params = dict( + rpn_train_cfg = dict( + pos_fraction=0.5, + pos_balance_sampling=False, + neg_pos_ub=256, + allowed_border=0, + anchor_batch_size=256, + pos_iou_thr=0.7, + neg_iou_thr=0.3, + neg_balance_thr=0, + min_pos_iou=1e-3, + pos_weight=-1, + smoothl1_beta=1 / 9.0, + debug=False), + rpn_test_cfg = dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0), + rcnn_train_cfg = dict( + pos_iou_thr=0.5, + neg_iou_thr=0.5, + crowd_thr=1.1, + roi_batch_size=512, + add_gt_as_proposals=True, + pos_fraction=0.25, + pos_balance_sampling=False, + neg_pos_ub=512, + neg_balance_thr=0, + pos_weight=-1, + debug=False), + rcnn_test_cfg = dict(score_thr=1e-3, max_per_img=100, nms_thr=0.5) +) +# dataset settings +data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True) +img_per_gpu = 1 +data_workers = 2 +train_dataset = dict( + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0.5) +test_dataset = dict( + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2) +# learning policy +lr_policy = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.333, + step=[8, 11]) +max_epoch = 12 +checkpoint_config = dict(interval=1) +dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1') +# logging settings +log_level = 'INFO' +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')), + ]) +# yapf:enable +work_dir = './model/r50_fpn_frcnn_1x' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/tools/examples/r50_fpn_maskrcnn_1x.py b/tools/examples/r50_fpn_maskrcnn_1x.py new file mode 100644 index 00000000000..49b32037ec5 --- /dev/null +++ b/tools/examples/r50_fpn_maskrcnn_1x.py @@ -0,0 +1,136 @@ +# model settings +model = dict( + pretrained= + '/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth', + backbone=dict( + type='resnet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='fb'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + coarsest_stride=32, + anchor_scales=[8], + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0], + use_sigmoid_cls=True), + roi_block=dict( + type='SingleLevelRoI', + roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='SharedFCRoIHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=False), + mask_block=dict( + type='SingleLevelRoI', + roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_head=dict( + type='FCNMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=81)) +meta_params = dict( + rpn_train_cfg=dict( + pos_fraction=0.5, + pos_balance_sampling=False, + neg_pos_ub=256, + allowed_border=0, + anchor_batch_size=256, + pos_iou_thr=0.7, + neg_iou_thr=0.3, + neg_balance_thr=0, + min_pos_iou=1e-3, + pos_weight=-1, + smoothl1_beta=1 / 9.0, + debug=False), + rpn_test_cfg=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0), + rcnn_train_cfg=dict( + mask_size=28, + pos_iou_thr=0.5, + neg_iou_thr=0.5, + crowd_thr=1.1, + roi_batch_size=512, + add_gt_as_proposals=True, + pos_fraction=0.25, + pos_balance_sampling=False, + neg_pos_ub=512, + neg_balance_thr=0, + pos_weight=-1, + debug=False), + rcnn_test_cfg=dict( + score_thr=1e-3, max_per_img=100, nms_thr=0.5, mask_thr_binary=0.5)) +# dataset settings +data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_per_gpu = 1 +data_workers = 2 +train_dataset = dict( + with_mask=True, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0.5) +test_dataset = dict( + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2) +# learning policy +lr_policy = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.333, + step=[8, 11]) +max_epoch = 12 +checkpoint_config = dict(interval=1) +dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1') +# logging settings +log_level = 'INFO' +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')), + ]) +# yapf:enable +work_dir = './model/r50_fpn_mask_rcnn_1x' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/tools/examples/r50_fpn_rpn_1x.py b/tools/examples/r50_fpn_rpn_1x.py new file mode 100644 index 00000000000..45c0a1a6c46 --- /dev/null +++ b/tools/examples/r50_fpn_rpn_1x.py @@ -0,0 +1,95 @@ +# model settings +model = dict( + pretrained= + '/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth', + backbone=dict( + type='resnet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='fb'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + coarsest_stride=32, + anchor_scales=[8], + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0], + use_sigmoid_cls=True)) +meta_params = dict( + rpn_train_cfg=dict( + pos_fraction=0.5, + pos_balance_sampling=False, + neg_pos_ub=256, + allowed_border=0, + anchor_batch_size=256, + pos_iou_thr=0.7, + neg_iou_thr=0.3, + neg_balance_thr=0, + min_pos_iou=1e-3, + pos_weight=-1, + smoothl1_beta=1 / 9.0, + debug=False), + rpn_test_cfg=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0)) +# dataset settings +data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_per_gpu = 1 +data_workers = 2 +train_dataset = dict( + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0.5) +test_dataset = dict( + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + test_mode=True) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2) +# learning policy +lr_policy = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.333, + step=[8, 11]) +max_epoch = 12 +checkpoint_config = dict(interval=1) +dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1') +# logging settings +log_level = 'INFO' +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')), + ]) +# yapf:enable +work_dir = './model/r50_fpn_1x' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/tools/test.py b/tools/test.py new file mode 100644 index 00000000000..2d062489100 --- /dev/null +++ b/tools/test.py @@ -0,0 +1,65 @@ +import os.path as osp +import sys +sys.path.append(osp.abspath(osp.join(__file__, '../../'))) +sys.path.append('/mnt/lustre/pangjiangmiao/sensenet_folder/mmcv') +import argparse + +import numpy as np +import torch + +import mmcv +from mmcv import Config +from mmcv.torchpack import load_checkpoint, parallel_test +from mmdet.core import _data_func, results2json +from mmdet.datasets import CocoDataset +from mmdet.datasets.data_engine import build_data +from mmdet.models import Detector + + +def parse_args(): + parser = argparse.ArgumentParser(description='MMDet test detector') + parser.add_argument('config', help='test config file path') + parser.add_argument('checkpoint', help='checkpoint file') + parser.add_argument('--world_size', default=1, type=int) + parser.add_argument('--out', help='output result file') + parser.add_argument( + '--out_json', action='store_true', help='get json output file') + args = parser.parse_args() + return args + + +args = parse_args() + + +def main(): + cfg = Config.fromfile(args.config) + cfg.model['pretrained'] = None + # TODO this img_per_gpu + cfg.img_per_gpu == 1 + + if args.world_size == 1: + # TODO verify this part + args.dist = False + args.img_per_gpu = cfg.img_per_gpu + args.data_workers = cfg.data_workers + model = Detector(**cfg.model, **meta_params) + load_checkpoint(model, args.checkpoint) + test_loader = build_data(cfg.test_dataset, args) + model = torch.nn.DataParallel(model, device_ids=0) + # TODO write single_test + outputs = single_test(test_loader, model) + else: + test_dataset = CocoDataset(**cfg.test_dataset) + model = dict(cfg.model, **cfg.meta_params) + outputs = parallel_test(Detector, model, + args.checkpoint, test_dataset, _data_func, + range(args.world_size)) + + if args.out: + mmcv.dump(outputs, args.out, protocol=4) + if args.out_json: + results2json(test_dataset, outputs, args.out + '.json') + + +if __name__ == '__main__': + main() diff --git a/tools/train.py b/tools/train.py new file mode 100644 index 00000000000..0cb2450acf5 --- /dev/null +++ b/tools/train.py @@ -0,0 +1,85 @@ +from __future__ import division +import argparse +import sys +import os.path as osp +sys.path.append(osp.abspath(osp.join(__file__, '../../'))) +sys.path.append('/mnt/lustre/pangjiangmiao/sensenet_folder/mmcv') + +import torch +import torch.multiprocessing as mp +from mmcv import Config +from mmcv.torchpack import Runner +from mmdet.core import (batch_processor, init_dist, broadcast_params, + DistOptimizerStepperHook, DistSamplerSeedHook) +from mmdet.datasets.data_engine import build_data +from mmdet.models import Detector +from mmdet.nn.parallel import MMDataParallel + + +def parse_args(): + parser = argparse.ArgumentParser(description='MMDet train val detector') + parser.add_argument('config', help='train config file path') + parser.add_argument('--validate', action='store_true', help='validate') + parser.add_argument( + '--dist', action='store_true', help='distributed training or not') + parser.add_argument('--world_size', default=1, type=int) + parser.add_argument('--rank', default=0, type=int) + args = parser.parse_args() + + return args + + +args = parse_args() + + +def main(): + # Enable distributed training or not + if args.dist: + print('Enable distributed training.') + mp.set_start_method("spawn", force=True) + init_dist( + args.world_size, + args.rank, + **cfg.dist_params) + else: + print('Disabled distributed training.') + + # Fetch config information + cfg = Config.fromfile(args.config) + # TODO more flexible + args.img_per_gpu = cfg.img_per_gpu + args.data_workers = cfg.data_workers + + # prepare training loader + train_loader = [build_data(cfg.train_dataset, args)] + if args.validate: + val_loader = build_data(cfg.val_dataset, args) + train_loader.append(val_loader) + + # build model + model = Detector(**cfg.model, **cfg.meta_params) + if args.dist: + model = model.cuda() + broadcast_params(model) + else: + device_ids = args.rank % torch.cuda.device_count() + model = MMDataParallel(model, device_ids=device_ids).cuda() + + # register hooks + runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, + cfg.log_level) + optimizer_stepper = DistOptimizerStepperHook( + **cfg.grad_clip_config) if args.dist else cfg.grad_clip_config + runner.register_training_hooks(cfg.lr_policy, optimizer_stepper, + cfg.checkpoint_config, cfg.log_config) + if args.dist: + runner.register_hook(DistSamplerSeedHook()) + if cfg.resume_from: + runner.resume(cfg.resume_from) + elif cfg.load_from: + runner.load_checkpoint(cfg.load_from) + runner.run(train_loader, cfg.workflow, cfg.max_epoch, args=args) + + +if __name__ == "__main__": + main() From fe0284cdc099f78574747567a3f343daa682541c Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Tue, 18 Sep 2018 16:58:05 +0800 Subject: [PATCH 03/81] fix extension to fit pytorch 0.4.1 api --- mmdet/ops/roi_align/src/roi_align_cuda.cpp | 4 ++-- mmdet/ops/roi_pool/src/roi_pool_cuda.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mmdet/ops/roi_align/src/roi_align_cuda.cpp b/mmdet/ops/roi_align/src/roi_align_cuda.cpp index e4c28c14226..8551bc51888 100644 --- a/mmdet/ops/roi_align/src/roi_align_cuda.cpp +++ b/mmdet/ops/roi_align/src/roi_align_cuda.cpp @@ -17,9 +17,9 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, const int pooled_height, const int pooled_width, at::Tensor bottom_grad); -#define CHECK_CUDA(x) AT_ASSERT(x.type().is_cuda(), #x " must be a CUDAtensor ") +#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") #define CHECK_CONTIGUOUS(x) \ - AT_ASSERT(x.is_contiguous(), #x " must be contiguous ") + AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") #define CHECK_INPUT(x) \ CHECK_CUDA(x); \ CHECK_CONTIGUOUS(x) diff --git a/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp b/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp index 799c151d192..b05e870600f 100644 --- a/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp +++ b/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp @@ -16,9 +16,9 @@ int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, const int num_rois, const int pooled_h, const int pooled_w, at::Tensor bottom_grad); -#define CHECK_CUDA(x) AT_ASSERT(x.type().is_cuda(), #x " must be a CUDAtensor ") +#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") #define CHECK_CONTIGUOUS(x) \ - AT_ASSERT(x.is_contiguous(), #x " must be contiguous ") + AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") #define CHECK_INPUT(x) \ CHECK_CUDA(x); \ CHECK_CONTIGUOUS(x) From 5137592075b39f240b9d45af549d5042bdbe89d5 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Sun, 23 Sep 2018 18:55:01 +0800 Subject: [PATCH 04/81] add fp16 support for forwarding --- mmdet/ops/roi_align/src/roi_align_kernel.cu | 92 +++++++++------------ mmdet/ops/roi_pool/gradcheck.py | 2 +- mmdet/ops/roi_pool/src/roi_pool_kernel.cu | 65 +++++---------- 3 files changed, 61 insertions(+), 98 deletions(-) diff --git a/mmdet/ops/roi_align/src/roi_align_kernel.cu b/mmdet/ops/roi_align/src/roi_align_kernel.cu index 31be093c038..341d858de52 100644 --- a/mmdet/ops/roi_align/src/roi_align_kernel.cu +++ b/mmdet/ops/roi_align/src/roi_align_kernel.cu @@ -1,14 +1,10 @@ #include +#include -#include -#include +using namespace at; // temporal fix for pytorch<=0.4.1 (see #9848) -#include -#include -#include - -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ +#define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ i += blockDim.x * gridDim.x) #define THREADS_PER_BLOCK 1024 @@ -28,10 +24,8 @@ __device__ scalar_t bilinear_interpolate(const scalar_t *bottom_data, return 0; } - if (y <= 0) - y = 0; - if (x <= 0) - x = 0; + if (y <= 0) y = 0; + if (x <= 0) x = 0; int y_low = (int)y; int x_low = (int)x; @@ -69,12 +63,13 @@ __device__ scalar_t bilinear_interpolate(const scalar_t *bottom_data, } template -__global__ void -ROIAlignForward(const int nthreads, const scalar_t *bottom_data, - const scalar_t *bottom_rois, const scalar_t spatial_scale, - const int sample_num, const int channels, const int height, - const int width, const int pooled_height, - const int pooled_width, scalar_t *top_data) { +__global__ void ROIAlignForward(const int nthreads, const scalar_t *bottom_data, + const scalar_t *bottom_rois, + const scalar_t spatial_scale, + const int sample_num, const int channels, + const int height, const int width, + const int pooled_height, const int pooled_width, + scalar_t *top_data) { CUDA_1D_KERNEL_LOOP(index, nthreads) { // (n, c, ph, pw) is an element in the aligned output int pw = index % pooled_width; @@ -101,7 +96,7 @@ ROIAlignForward(const int nthreads, const scalar_t *bottom_data, int sample_num_h = (sample_num > 0) ? sample_num - : ceil(roi_height / pooled_height); // e.g., = 2 + : ceil(roi_height / pooled_height); // e.g., = 2 int sample_num_w = (sample_num > 0) ? sample_num : ceil(roi_width / pooled_width); @@ -137,17 +132,17 @@ int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, const int pooled_height, const int pooled_width, at::Tensor output) { const int output_size = num_rois * pooled_height * pooled_width * channels; - AT_DISPATCH_FLOATING_TYPES( + AT_DISPATCH_FLOATING_TYPES_AND_HALF( features.type(), "ROIAlignLaucherForward", ([&] { const scalar_t *bottom_data = features.data(); const scalar_t *rois_data = rois.data(); scalar_t *top_data = output.data(); - ROIAlignForward< - scalar_t><<>>( - output_size, bottom_data, rois_data, scalar_t(spatial_scale), - sample_num, channels, height, width, pooled_height, pooled_width, - top_data); + ROIAlignForward + <<>>( + output_size, bottom_data, rois_data, scalar_t(spatial_scale), + sample_num, channels, height, width, pooled_height, + pooled_width, top_data); })); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err) { @@ -159,11 +154,12 @@ int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, } template -__device__ void -bilinear_interpolate_gradient(const int height, const int width, scalar_t y, - scalar_t x, scalar_t &w1, scalar_t &w2, - scalar_t &w3, scalar_t &w4, int &x_low, - int &x_high, int &y_low, int &y_high) { +__device__ void bilinear_interpolate_gradient(const int height, const int width, + scalar_t y, scalar_t x, + scalar_t &w1, scalar_t &w2, + scalar_t &w3, scalar_t &w4, + int &x_low, int &x_high, + int &y_low, int &y_high) { // deal with cases that inverse elements are out of feature map boundary if (y < -1.0 || y > height || x < -1.0 || x > width) { w1 = w2 = w3 = w4 = 0.; @@ -171,10 +167,8 @@ bilinear_interpolate_gradient(const int height, const int width, scalar_t y, return; } - if (y <= 0) - y = 0; - if (x <= 0) - x = 0; + if (y <= 0) y = 0; + if (x <= 0) x = 0; y_low = (int)y; x_low = (int)x; @@ -204,12 +198,11 @@ bilinear_interpolate_gradient(const int height, const int width, scalar_t y, } template -__global__ void -ROIAlignBackward(const int nthreads, const scalar_t *top_diff, - const scalar_t *bottom_rois, const scalar_t spatial_scale, - const int sample_num, const int channels, const int height, - const int width, const int pooled_height, - const int pooled_width, scalar_t *bottom_diff) { +__global__ void ROIAlignBackward( + const int nthreads, const scalar_t *top_diff, const scalar_t *bottom_rois, + const scalar_t spatial_scale, const int sample_num, const int channels, + const int height, const int width, const int pooled_height, + const int pooled_width, scalar_t *bottom_diff) { CUDA_1D_KERNEL_LOOP(index, nthreads) { // (n, c, ph, pw) is an element in the aligned output int pw = index % pooled_width; @@ -239,7 +232,7 @@ ROIAlignBackward(const int nthreads, const scalar_t *top_diff, int sample_num_h = (sample_num > 0) ? sample_num - : ceil(roi_height / pooled_height); // e.g., = 2 + : ceil(roi_height / pooled_height); // e.g., = 2 int sample_num_w = (sample_num > 0) ? sample_num : ceil(roi_width / pooled_width); @@ -279,13 +272,6 @@ ROIAlignBackward(const int nthreads, const scalar_t *top_diff, } } -template <> -__global__ void ROIAlignBackward( - const int nthreads, const double *top_diff, const double *bottom_rois, - const double spatial_scale, const int sample_num, const int channels, - const int height, const int width, const int pooled_height, - const int pooled_width, double *bottom_diff) {} - int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, const float spatial_scale, const int sample_num, const int channels, const int height, @@ -294,6 +280,7 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, at::Tensor bottom_grad) { const int output_size = num_rois * pooled_height * pooled_width * channels; + // TODO: use AT_DISPATCH_FLOATING_TYPES_AND_HALF when atomicAdd is resolved AT_DISPATCH_FLOATING_TYPES( top_grad.type(), "ROIAlignLaucherBackward", ([&] { const scalar_t *top_diff = top_grad.data(); @@ -304,10 +291,11 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, exit(-1); } - ROIAlignBackward< - scalar_t><<>>( - output_size, top_diff, rois_data, spatial_scale, sample_num, - channels, height, width, pooled_height, pooled_width, bottom_diff); + ROIAlignBackward + <<>>( + output_size, top_diff, rois_data, spatial_scale, sample_num, + channels, height, width, pooled_height, pooled_width, + bottom_diff); })); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err) { diff --git a/mmdet/ops/roi_pool/gradcheck.py b/mmdet/ops/roi_pool/gradcheck.py index dfc08b2e138..c27d317a03b 100644 --- a/mmdet/ops/roi_pool/gradcheck.py +++ b/mmdet/ops/roi_pool/gradcheck.py @@ -4,7 +4,7 @@ import os.path as osp import sys sys.path.append(osp.abspath(osp.join(__file__, '../../'))) -from roi_pooling import RoIPool +from roi_pool import RoIPool feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda() rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55], diff --git a/mmdet/ops/roi_pool/src/roi_pool_kernel.cu b/mmdet/ops/roi_pool/src/roi_pool_kernel.cu index c94a9cd7850..d2cefa662f9 100644 --- a/mmdet/ops/roi_pool/src/roi_pool_kernel.cu +++ b/mmdet/ops/roi_pool/src/roi_pool_kernel.cu @@ -1,14 +1,10 @@ #include +#include -#include -#include +using namespace at; // temporal fix for pytorch<=0.4.1 (see #9848) -#include -#include -#include - -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ +#define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ i += blockDim.x * gridDim.x) #define THREADS_PER_BLOCK 1024 @@ -44,8 +40,7 @@ __global__ void ROIPoolForward(const int nthreads, const scalar_t *bottom_data, // force malformed rois to be 1x1 scalar_t roi_w = roi_x2 - roi_x1; scalar_t roi_h = roi_y2 - roi_y1; - if (roi_w <= 0 || roi_h <= 0) - continue; + if (roi_w <= 0 || roi_h <= 0) continue; scalar_t bin_size_w = roi_w / static_cast(pooled_w); scalar_t bin_size_h = roi_h / static_cast(pooled_h); @@ -68,7 +63,8 @@ __global__ void ROIPoolForward(const int nthreads, const scalar_t *bottom_data, bottom_data += (roi_batch_ind * channels + c) * height * width; // Define an empty pooling region to be zero - scalar_t max_val = is_empty ? 0 : bottom_data[bin_y1 * width + bin_x1] - 1; + scalar_t max_val = is_empty ? static_cast(0) + : bottom_data[bin_y1 * width + bin_x1] - 1; for (int h = bin_y1; h < bin_y2; ++h) { for (int w = bin_x1; w < bin_x2; ++w) { @@ -80,8 +76,7 @@ __global__ void ROIPoolForward(const int nthreads, const scalar_t *bottom_data, } } top_data[index] = max_val; - if (argmax_data != NULL) - argmax_data[index] = max_idx; + if (argmax_data != NULL) argmax_data[index] = max_idx; } } @@ -92,17 +87,18 @@ int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, at::Tensor output, at::Tensor argmax) { const int output_size = num_rois * channels * pooled_h * pooled_w; - AT_DISPATCH_FLOATING_TYPES( + AT_DISPATCH_FLOATING_TYPES_AND_HALF( features.type(), "ROIPoolLaucherForward", ([&] { const scalar_t *bottom_data = features.data(); const scalar_t *rois_data = rois.data(); scalar_t *top_data = output.data(); int *argmax_data = argmax.data(); - ROIPoolForward< - scalar_t><<>>( - output_size, bottom_data, rois_data, scalar_t(spatial_scale), - channels, height, width, pooled_h, pooled_w, top_data, argmax_data); + ROIPoolForward + <<>>( + output_size, bottom_data, rois_data, scalar_t(spatial_scale), + channels, height, width, pooled_h, pooled_w, top_data, + argmax_data); })); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err) { @@ -135,28 +131,6 @@ __global__ void ROIPoolBackward(const int nthreads, const scalar_t *top_diff, } } -template <> -__global__ void -ROIPoolBackward(const int nthreads, const double *top_diff, - const double *rois, const int *argmax_data, - const double spatial_scale, const int channels, - const int height, const int width, const int pooled_h, - const int pooled_w, double *bottom_diff) { - // CUDA_1D_KERNEL_LOOP(index, nthreads) { - // int pw = index % pooled_w; - // int ph = (index / pooled_w) % pooled_h; - // int c = (index / pooled_w / pooled_h) % channels; - // int n = index / pooled_w / pooled_h / channels; - - // int roi_batch_ind = rois[n * 5]; - // int bottom_index = argmax_data[(n * channels + c) * pooled_h * pooled_w + - // ph * pooled_w + pw]; - - // *(bottom_diff + (roi_batch_ind * channels + c) * height * width + - // bottom_index) +=top_diff[index]; - // } -} - int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, const at::Tensor argmax, const float spatial_scale, const int batch_size, const int channels, @@ -165,6 +139,7 @@ int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, const int pooled_w, at::Tensor bottom_grad) { const int output_size = num_rois * pooled_h * pooled_w * channels; + // TODO: use AT_DISPATCH_FLOATING_TYPES_AND_HALF when atomicAdd is resolved AT_DISPATCH_FLOATING_TYPES( top_grad.type(), "ROIPoolLaucherBackward", ([&] { const scalar_t *top_diff = top_grad.data(); @@ -177,11 +152,11 @@ int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, exit(-1); } - ROIPoolBackward< - scalar_t><<>>( - output_size, top_diff, rois_data, argmax_data, - scalar_t(spatial_scale), channels, height, width, pooled_h, - pooled_w, bottom_diff); + ROIPoolBackward + <<>>( + output_size, top_diff, rois_data, argmax_data, + scalar_t(spatial_scale), channels, height, width, pooled_h, + pooled_w, bottom_diff); })); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err) { From 314afb0f56c14cd604502cdd87b17172b4c88a7a Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Sun, 23 Sep 2018 22:42:03 +0800 Subject: [PATCH 05/81] refactor DataContainer and datasets --- mmdet/datasets/__init__.py | 5 +- mmdet/datasets/coco.py | 78 ++++++++-------- mmdet/datasets/data_engine.py | 29 ------ mmdet/datasets/loader/__init__.py | 7 ++ mmdet/datasets/loader/build_loader.py | 39 ++++++++ mmdet/datasets/{ => loader}/collate.py | 21 ++++- mmdet/datasets/{ => loader}/sampler.py | 2 - mmdet/datasets/transforms.py | 122 +++---------------------- mmdet/datasets/utils/data_container.py | 36 ++------ mmdet/datasets/utils/misc.py | 24 +++++ 10 files changed, 151 insertions(+), 212 deletions(-) delete mode 100644 mmdet/datasets/data_engine.py create mode 100644 mmdet/datasets/loader/__init__.py create mode 100644 mmdet/datasets/loader/build_loader.py rename mmdet/datasets/{ => loader}/collate.py (73%) rename mmdet/datasets/{ => loader}/sampler.py (98%) diff --git a/mmdet/datasets/__init__.py b/mmdet/datasets/__init__.py index 6045c2b0923..c5ec4e8f9f9 100644 --- a/mmdet/datasets/__init__.py +++ b/mmdet/datasets/__init__.py @@ -1,4 +1,3 @@ from .coco import CocoDataset -from .collate import * -from .sampler import * -from .transforms import * + +__all__ = ['CocoDataset'] diff --git a/mmdet/datasets/coco.py b/mmdet/datasets/coco.py index a7eedca6a2c..8e7d9feffb9 100644 --- a/mmdet/datasets/coco.py +++ b/mmdet/datasets/coco.py @@ -7,7 +7,7 @@ from .transforms import (ImageTransform, BboxTransform, PolyMaskTransform, Numpy2Tensor) -from .utils import show_ann, random_scale +from .utils import to_tensor, show_ann, random_scale from .utils import DataContainer as DC @@ -71,6 +71,7 @@ def parse_ann_info(ann_info, cat2label, with_mask=True): class CocoDataset(Dataset): + def __init__(self, ann_file, img_prefix, @@ -227,27 +228,28 @@ def __getitem__(self, idx): ann['mask_polys'], ann['poly_lens'], img_info['height'], img_info['width'], flip) - ori_shape = (img_info['height'], img_info['width']) + ori_shape = (img_info['height'], img_info['width'], 3) img_meta = dict( - ori_shape=DC(ori_shape), - img_shape=DC(img_shape), - scale_factor=DC(scale_factor), - flip=DC(flip)) + ori_shape=ori_shape, + img_shape=img_shape, + scale_factor=scale_factor, + flip=flip) data = dict( - img=DC(img, stack=True), - img_meta=img_meta, - gt_bboxes=DC(gt_bboxes)) + img=DC(to_tensor(img), stack=True), + img_meta=DC(img_meta, cpu_only=True), + gt_bboxes=DC(to_tensor(gt_bboxes))) if self.proposals is not None: - data['proposals'] = DC(proposals) + data['proposals'] = DC(to_tensor(proposals)) if self.with_label: - data['gt_labels'] = DC(gt_labels) + data['gt_labels'] = DC(to_tensor(gt_labels)) if self.with_crowd: - data['gt_bboxes_ignore'] = DC(gt_bboxes_ignore) + data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore)) if self.with_mask: - data['gt_mask_polys'] = DC(gt_mask_polys) - data['gt_poly_lens'] = DC(gt_poly_lens) - data['num_polys_per_mask'] = DC(num_polys_per_mask) + data['gt_masks'] = dict( + polys=DC(gt_mask_polys, cpu_only=True), + poly_lens=DC(gt_poly_lens, cpu_only=True), + polys_per_mask=DC(num_polys_per_mask, cpu_only=True)) return data def prepare_test_img(self, idx): @@ -258,37 +260,37 @@ def prepare_test_img(self, idx): if self.proposals is not None else None) def prepare_single(img, scale, flip, proposal=None): - _img, _img_shape, _scale_factor = self.img_transform( + _img, img_shape, scale_factor = self.img_transform( img, scale, flip) - img, img_shape, scale_factor = self.numpy2tensor( - _img, _img_shape, _scale_factor) - ori_shape = (img_info['height'], img_info['width']) - img_meta = dict( - ori_shape=ori_shape, + _img = to_tensor(_img) + _img_meta = dict( + ori_shape=(img_info['height'], img_info['width'], 3), img_shape=img_shape, scale_factor=scale_factor, flip=flip) if proposal is not None: - proposal = self.bbox_transform(proposal, _scale_factor, flip) - proposal = self.numpy2tensor(proposal) - return img, img_meta, proposal + _proposal = self.bbox_transform(proposal, scale_factor, flip) + _proposal = to_tensor(_proposal) + else: + _proposal = None + return _img, _img_meta, _proposal imgs = [] img_metas = [] proposals = [] for scale in self.img_scales: - img, img_meta, proposal = prepare_single(img, scale, False, - proposal) - imgs.append(img) - img_metas.append(img_meta) - proposals.append(proposal) + _img, _img_meta, _proposal = prepare_single( + img, scale, False, proposal) + imgs.append(_img) + img_metas.append(DC(_img_meta, cpu_only=True)) + proposals.append(_proposal) if self.flip_ratio > 0: - img, img_meta, prop = prepare_single(img, scale, True, - proposal) - imgs.append(img) - img_metas.append(img_meta) - proposals.append(prop) - if self.proposals is None: - return imgs, img_metas - else: - return imgs, img_metas, proposals + _img, _img_meta, _proposal = prepare_single( + img, scale, True, proposal) + imgs.append(_img) + img_metas.append(DC(_img_meta, cpu_only=True)) + proposals.append(_proposal) + data = dict(img=imgs, img_meta=img_metas) + if self.proposals is not None: + data['proposals'] = proposals + return data diff --git a/mmdet/datasets/data_engine.py b/mmdet/datasets/data_engine.py deleted file mode 100644 index 0c89f21878a..00000000000 --- a/mmdet/datasets/data_engine.py +++ /dev/null @@ -1,29 +0,0 @@ -from functools import partial -import torch -from .coco import CocoDataset -from .collate import collate -from .sampler import GroupSampler, DistributedGroupSampler - - -def build_data(cfg, args): - dataset = CocoDataset(**cfg) - - if args.dist: - sampler = DistributedGroupSampler(dataset, args.img_per_gpu, - args.world_size, args.rank) - batch_size = args.img_per_gpu - num_workers = args.data_workers - else: - sampler = GroupSampler(dataset, args.img_per_gpu) - batch_size = args.world_size * args.img_per_gpu - num_workers = args.world_size * args.data_workers - - loader = torch.utils.data.DataLoader( - dataset, - batch_size=args.img_per_gpu, - sampler=sampler, - num_workers=num_workers, - collate_fn=partial(collate, samples_per_gpu=args.img_per_gpu), - pin_memory=False) - - return loader diff --git a/mmdet/datasets/loader/__init__.py b/mmdet/datasets/loader/__init__.py new file mode 100644 index 00000000000..27796d0e9de --- /dev/null +++ b/mmdet/datasets/loader/__init__.py @@ -0,0 +1,7 @@ +from .build_loader import build_dataloader +from .collate import collate +from .sampler import GroupSampler, DistributedGroupSampler + +__all__ = [ + 'collate', 'GroupSampler', 'DistributedGroupSampler', 'build_dataloader' +] diff --git a/mmdet/datasets/loader/build_loader.py b/mmdet/datasets/loader/build_loader.py new file mode 100644 index 00000000000..a4e7d7d1e63 --- /dev/null +++ b/mmdet/datasets/loader/build_loader.py @@ -0,0 +1,39 @@ +from functools import partial + +from torch.utils.data import DataLoader + +from .collate import collate +from .sampler import GroupSampler, DistributedGroupSampler + + +def build_dataloader(dataset, + imgs_per_gpu, + workers_per_gpu, + num_gpus, + dist=True, + world_size=1, + rank=0, + **kwargs): + if dist: + sampler = DistributedGroupSampler(dataset, imgs_per_gpu, world_size, + rank) + batch_size = imgs_per_gpu + num_workers = workers_per_gpu + else: + sampler = GroupSampler(dataset, imgs_per_gpu) + batch_size = num_gpus * imgs_per_gpu + num_workers = num_gpus * workers_per_gpu + + if not kwargs.get('shuffle', True): + sampler = None + + data_loader = DataLoader( + dataset, + batch_size=batch_size, + sampler=sampler, + num_workers=num_workers, + collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu), + pin_memory=False, + **kwargs) + + return data_loader diff --git a/mmdet/datasets/collate.py b/mmdet/datasets/loader/collate.py similarity index 73% rename from mmdet/datasets/collate.py rename to mmdet/datasets/loader/collate.py index 44117d6f2d0..fa1335ca75a 100644 --- a/mmdet/datasets/collate.py +++ b/mmdet/datasets/loader/collate.py @@ -4,17 +4,24 @@ import torch.nn.functional as F from torch.utils.data.dataloader import default_collate -from .utils import DataContainer +from ..utils import DataContainer # https://github.com/pytorch/pytorch/issues/973 import resource rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) -__all__ = ['collate'] - def collate(batch, samples_per_gpu=1): + """Puts each data field into a tensor/DataContainer with outer dimension + batch size. + + Extend default_collate to add support for :type:`~mmdet.DataContainer`. + There are 3 cases for data containers. + 1. cpu_only = True, e.g., meta data + 2. cpu_only = False, stack = True, e.g., images tensors + 3. cpu_only = False, stack = False, e.g., gt bboxes + """ if not isinstance(batch, collections.Sequence): raise TypeError("{} is not supported.".format(batch.dtype)) @@ -22,7 +29,13 @@ def collate(batch, samples_per_gpu=1): if isinstance(batch[0], DataContainer): assert len(batch) % samples_per_gpu == 0 stacked = [] - if batch[0].stack: + if batch[0].cpu_only: + for i in range(0, len(batch), samples_per_gpu): + stacked.append( + [sample.data for sample in batch[i:i + samples_per_gpu]]) + return DataContainer( + stacked, batch[0].stack, batch[0].padding_value, cpu_only=True) + elif batch[0].stack: for i in range(0, len(batch), samples_per_gpu): assert isinstance(batch[i].data, torch.Tensor) # TODO: handle tensors other than 3d diff --git a/mmdet/datasets/sampler.py b/mmdet/datasets/loader/sampler.py similarity index 98% rename from mmdet/datasets/sampler.py rename to mmdet/datasets/loader/sampler.py index 74089821bf1..5c060cd926e 100644 --- a/mmdet/datasets/sampler.py +++ b/mmdet/datasets/loader/sampler.py @@ -7,8 +7,6 @@ from torch.distributed import get_world_size, get_rank from torch.utils.data.sampler import Sampler -__all__ = ['GroupSampler', 'DistributedGroupSampler'] - class GroupSampler(Sampler): diff --git a/mmdet/datasets/transforms.py b/mmdet/datasets/transforms.py index 1532fe074f2..3a41e8d4cd4 100644 --- a/mmdet/datasets/transforms.py +++ b/mmdet/datasets/transforms.py @@ -29,7 +29,7 @@ def __init__(self, self.size_divisor = size_divisor def __call__(self, img, scale, flip=False): - img, scale_factor = mmcv.imrescale(img, scale, True) + img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) img_shape = img.shape img = mmcv.imnorm(img, self.mean, self.std, self.to_rgb) if flip: @@ -39,76 +39,20 @@ def __call__(self, img, scale, flip=False): img = img.transpose(2, 0, 1) return img, img_shape, scale_factor - # img, scale = cvb.resize_keep_ar(img_or_path, max_long_edge, - # max_short_edge, True) - # shape_scale = np.array(img.shape + (scale, ), dtype=np.float32) - # if flip: - # img = img[:, ::-1, :].copy() - # if self.color_order == 'RGB': - # img = cvb.bgr2rgb(img) - # img = img.astype(np.float32) - # img -= self.color_mean - # img /= self.color_std - # if self.size_divisor is None: - # padded_img = img - # else: - # pad_h = int(np.ceil( - # img.shape[0] / self.size_divisor)) * self.size_divisor - # pad_w = int(np.ceil( - # img.shape[1] / self.size_divisor)) * self.size_divisor - # padded_img = cvb.pad_img(img, (pad_h, pad_w), pad_val=0) - # padded_img = padded_img.transpose(2, 0, 1) - # return padded_img, shape_scale - - -class ImageCrop(object): - """crop image patches and resize patches into fixed size - 1. (read and) flip image (if needed) - 2. crop image patches according to given bboxes - 3. resize patches into fixed size (default 224x224) - 4. normalize the image (if needed) - 5. transpose to (c, h, w) (if needed) - """ - def __init__(self, - normalize=True, - transpose=True, - color_order='RGB', - color_mean=(0, 0, 0), - color_std=(1, 1, 1)): - self.normalize = normalize - self.transpose = transpose - - assert color_order in ['RGB', 'BGR'] - self.color_order = color_order - self.color_mean = np.array(color_mean, dtype=np.float32) - self.color_std = np.array(color_std, dtype=np.float32) - - def __call__(self, - img_or_path, - bboxes, - crop_size, - scale_ratio=1.0, - flip=False): - img = cvb.read_img(img_or_path) - if flip: - img = img[:, ::-1, :].copy() - crop_imgs = cvb.crop_img( - img, - bboxes[:, :4], - scale_ratio=scale_ratio, - pad_fill=self.color_mean) - processed_crop_imgs_list = [] - for i in range(len(crop_imgs)): - crop_img = crop_imgs[i] - crop_img = cvb.resize(crop_img, crop_size) - crop_img = crop_img.astype(np.float32) - crop_img -= self.color_mean - crop_img /= self.color_std - processed_crop_imgs_list.append(crop_img) - processed_crop_imgs = np.stack(processed_crop_imgs_list, axis=0) - processed_crop_imgs = processed_crop_imgs.transpose(0, 3, 1, 2) - return processed_crop_imgs +def bbox_flip(bboxes, img_shape): + """Flip bboxes horizontally. + + Args: + bboxes(ndarray): shape (..., 4*k) + img_shape(tuple): (height, width) + """ + assert bboxes.shape[-1] % 4 == 0 + w = img_shape[1] + flipped = bboxes.copy() + flipped[..., 0::4] = w - bboxes[..., 2::4] - 1 + flipped[..., 2::4] = w - bboxes[..., 0::4] - 1 + return flipped class BboxTransform(object): @@ -124,7 +68,7 @@ def __init__(self, max_num_gts=None): def __call__(self, bboxes, img_shape, scale_factor, flip=False): gt_bboxes = bboxes * scale_factor if flip: - gt_bboxes = mmcv.bbox_flip(gt_bboxes, img_shape) + gt_bboxes = bbox_flip(gt_bboxes, img_shape) gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1]) gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0]) if self.max_num_gts is None: @@ -161,42 +105,6 @@ def __call__(self, gt_mask_polys, gt_poly_lens, img_h, img_w, flip=False): return gt_mask_polys, gt_poly_lens, num_polys_per_mask -class MaskTransform(object): - """Preprocess masks - 1. resize masks to expected size and stack to a single array - 2. flip the masks (if needed) - 3. pad the masks (if needed) - """ - - def __init__(self, max_num_gts, pad_size=None): - self.max_num_gts = max_num_gts - self.pad_size = pad_size - - def __call__(self, masks, img_size, flip=False): - max_long_edge = max(img_size) - max_short_edge = min(img_size) - masks = [ - cvb.resize_keep_ar( - mask, - max_long_edge, - max_short_edge, - interpolation=cvb.INTER_NEAREST) for mask in masks - ] - masks = np.stack(masks, axis=0) - if flip: - masks = masks[:, ::-1, :] - if self.pad_size is None: - pad_h = masks.shape[1] - pad_w = masks.shape[2] - else: - pad_size = self.pad_size if self.pad_size > 0 else max_long_edge - pad_h = pad_w = pad_size - padded_masks = np.zeros( - (self.max_num_gts, pad_h, pad_w), dtype=masks.dtype) - padded_masks[:masks.shape[0], :masks.shape[1], :masks.shape[2]] = masks - return padded_masks - - class Numpy2Tensor(object): def __init__(self): diff --git a/mmdet/datasets/utils/data_container.py b/mmdet/datasets/utils/data_container.py index c27beab37bb..d690f6798ce 100644 --- a/mmdet/datasets/utils/data_container.py +++ b/mmdet/datasets/utils/data_container.py @@ -1,32 +1,8 @@ import functools -from collections import Sequence -import mmcv -import numpy as np import torch -def to_tensor(data): - """Convert objects of various python types to :obj:`torch.Tensor`. - - Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, - :class:`Sequence`, :class:`int` and :class:`float`. - """ - if isinstance(data, np.ndarray): - return torch.from_numpy(data) - elif isinstance(data, torch.Tensor): - return data - elif isinstance(data, Sequence) and not mmcv.is_str(data): - return torch.tensor(data) - elif isinstance(data, int): - return torch.LongTensor([data]) - elif isinstance(data, float): - return torch.FloatTensor([data]) - else: - raise TypeError('type {} cannot be converted to tensor.'.format( - type(data))) - - def assert_tensor_type(func): @functools.wraps(func) @@ -41,11 +17,9 @@ def wrapper(*args, **kwargs): class DataContainer(object): - def __init__(self, data, stack=False, padding_value=0): - if isinstance(data, list): - self._data = data - else: - self._data = to_tensor(data) + def __init__(self, data, stack=False, padding_value=0, cpu_only=False): + self._data = data + self._cpu_only = cpu_only self._stack = stack self._padding_value = padding_value @@ -63,6 +37,10 @@ def datatype(self): else: return type(self.data) + @property + def cpu_only(self): + return self._cpu_only + @property def stack(self): return self._stack diff --git a/mmdet/datasets/utils/misc.py b/mmdet/datasets/utils/misc.py index 419c11ad084..22f67a1c35c 100644 --- a/mmdet/datasets/utils/misc.py +++ b/mmdet/datasets/utils/misc.py @@ -1,10 +1,34 @@ +from collections import Sequence + import mmcv +import torch import matplotlib.pyplot as plt import numpy as np import pycocotools.mask as maskUtils +def to_tensor(data): + """Convert objects of various python types to :obj:`torch.Tensor`. + + Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, + :class:`Sequence`, :class:`int` and :class:`float`. + """ + if isinstance(data, torch.Tensor): + return data + elif isinstance(data, np.ndarray): + return torch.from_numpy(data) + elif isinstance(data, Sequence) and not mmcv.is_str(data): + return torch.tensor(data) + elif isinstance(data, int): + return torch.LongTensor([data]) + elif isinstance(data, float): + return torch.FloatTensor([data]) + else: + raise TypeError('type {} cannot be converted to tensor.'.format( + type(data))) + + def random_scale(img_scales, mode='range'): """Randomly select a scale from a list of scales or scale ranges. From c40d818238a096868a50d0aa2f0fea32154f0160 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Sun, 23 Sep 2018 22:56:07 +0800 Subject: [PATCH 06/81] adjust the structure of detectors --- mmdet/models/__init__.py | 3 +- mmdet/models/backbones/resnet.py | 2 +- mmdet/models/bbox_heads/bbox_head.py | 4 +- mmdet/models/builder.py | 23 ++-- mmdet/models/detectors/__init__.py | 5 +- mmdet/models/detectors/base.py | 66 ++++++++++ mmdet/models/detectors/detector.py | 1 + mmdet/models/detectors/faster_rcnn.py | 0 mmdet/models/detectors/mask_rcnn.py | 0 mmdet/models/detectors/rpn.py | 86 +++++++++++++ mmdet/models/detectors/testing_mixins.py | 146 ++++++++++++++++++++++ mmdet/models/detectors/two_stage.py | 149 +++++++++++++++++++++++ mmdet/models/necks/fpn.py | 2 +- mmdet/models/rpn_heads/rpn_head.py | 44 +++---- 14 files changed, 490 insertions(+), 41 deletions(-) create mode 100644 mmdet/models/detectors/base.py create mode 100644 mmdet/models/detectors/faster_rcnn.py create mode 100644 mmdet/models/detectors/mask_rcnn.py create mode 100644 mmdet/models/detectors/rpn.py create mode 100644 mmdet/models/detectors/testing_mixins.py create mode 100644 mmdet/models/detectors/two_stage.py diff --git a/mmdet/models/__init__.py b/mmdet/models/__init__.py index 2209550509f..07930688e53 100644 --- a/mmdet/models/__init__.py +++ b/mmdet/models/__init__.py @@ -1 +1,2 @@ -from .detectors import Detector +from .detectors import * +from .builder import * diff --git a/mmdet/models/backbones/resnet.py b/mmdet/models/backbones/resnet.py index f8203accd4b..51bacc49970 100644 --- a/mmdet/models/backbones/resnet.py +++ b/mmdet/models/backbones/resnet.py @@ -1,7 +1,7 @@ import math import torch.nn as nn import torch.utils.checkpoint as cp -from torchpack import load_checkpoint +from mmcv.torchpack import load_checkpoint def conv3x3(in_planes, out_planes, stride=1, dilation=1): diff --git a/mmdet/models/bbox_heads/bbox_head.py b/mmdet/models/bbox_heads/bbox_head.py index 5f6e1136eed..da923ecf2d0 100644 --- a/mmdet/models/bbox_heads/bbox_head.py +++ b/mmdet/models/bbox_heads/bbox_head.py @@ -60,7 +60,7 @@ def forward(self, x): return cls_score, bbox_pred def get_bbox_target(self, pos_proposals, neg_proposals, pos_gt_bboxes, - pos_gt_labels, rcnn_train_cfg): + pos_gt_labels, rcnn_train_cfg): reg_num_classes = 1 if self.reg_class_agnostic else self.num_classes cls_reg_targets = bbox_target( pos_proposals, @@ -85,7 +85,7 @@ def loss(self, cls_score, bbox_pred, labels, label_weights, bbox_targets, bbox_pred, bbox_targets, bbox_weights, - ave_factor=bbox_targets.size(0)) + avg_factor=bbox_targets.size(0)) return losses def get_det_bboxes(self, diff --git a/mmdet/models/builder.py b/mmdet/models/builder.py index c3b058507fc..4bbc94aa41b 100644 --- a/mmdet/models/builder.py +++ b/mmdet/models/builder.py @@ -1,27 +1,26 @@ -import mmcv -from mmcv import torchpack +from mmcv import torchpack as tp from torch import nn from . import (backbones, necks, roi_extractors, rpn_heads, bbox_heads, - mask_heads) + mask_heads, detectors) __all__ = [ 'build_backbone', 'build_neck', 'build_rpn_head', 'build_roi_extractor', - 'build_bbox_head', 'build_mask_head' + 'build_bbox_head', 'build_mask_head', 'build_detector' ] -def _build_module(cfg, parrent=None): - return cfg if isinstance(cfg, nn.Module) else torchpack.obj_from_dict( - cfg, parrent) +def _build_module(cfg, parrent=None, default_args=None): + return cfg if isinstance(cfg, nn.Module) else tp.obj_from_dict( + cfg, parrent, default_args) -def build(cfg, parrent=None): +def build(cfg, parrent=None, default_args=None): if isinstance(cfg, list): - modules = [_build_module(cfg_, parrent) for cfg_ in cfg] + modules = [_build_module(cfg_, parrent, default_args) for cfg_ in cfg] return nn.Sequential(*modules) else: - return _build_module(cfg, parrent) + return _build_module(cfg, parrent, default_args) def build_backbone(cfg): @@ -46,3 +45,7 @@ def build_bbox_head(cfg): def build_mask_head(cfg): return build(cfg, mask_heads) + + +def build_detector(cfg, train_cfg=None, test_cfg=None): + return build(cfg, detectors, dict(train_cfg=train_cfg, test_cfg=test_cfg)) diff --git a/mmdet/models/detectors/__init__.py b/mmdet/models/detectors/__init__.py index 5b690f8d77d..fe3fc62a819 100644 --- a/mmdet/models/detectors/__init__.py +++ b/mmdet/models/detectors/__init__.py @@ -1 +1,4 @@ -from .detector import Detector +from .base import BaseDetector +from .rpn import RPN + +__all__ = ['BaseDetector', 'RPN'] diff --git a/mmdet/models/detectors/base.py b/mmdet/models/detectors/base.py new file mode 100644 index 00000000000..494f62208b1 --- /dev/null +++ b/mmdet/models/detectors/base.py @@ -0,0 +1,66 @@ +from abc import ABCMeta, abstractmethod + +import torch +import torch.nn as nn + + +class BaseDetector(nn.Module): + """Base class for detectors""" + + __metaclass__ = ABCMeta + + def __init__(self): + super(BaseDetector, self).__init__() + + @abstractmethod + def init_weights(self): + pass + + @abstractmethod + def extract_feat(self, imgs): + pass + + def extract_feats(self, imgs): + if isinstance(imgs, torch.Tensor): + return self.extract_feat(imgs) + elif isinstance(imgs, list): + for img in imgs: + yield self.extract_feat(img) + + @abstractmethod + def forward_train(self, imgs, img_metas, **kwargs): + pass + + @abstractmethod + def simple_test(self, img, img_meta, **kwargs): + pass + + @abstractmethod + def aug_test(self, imgs, img_metas, **kwargs): + pass + + def forward_test(self, imgs, img_metas, **kwargs): + for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: + if not isinstance(var, list): + raise TypeError('{} must be a list, but got {}'.format( + name, type(var))) + + num_augs = len(imgs) + if num_augs != len(img_metas): + raise ValueError( + 'num of augmentations ({}) != num of image meta ({})'.format( + len(imgs), len(img_metas))) + # TODO: remove the restriction of imgs_per_gpu == 1 when prepared + imgs_per_gpu = imgs[0].size(0) + assert imgs_per_gpu == 1 + + if num_augs == 1: + return self.simple_test(imgs[0], img_metas[0], **kwargs) + else: + return self.aug_test(imgs, img_metas, **kwargs) + + def forward(self, img, img_meta, return_loss=True, **kwargs): + if return_loss: + return self.forward_train(img, img_meta, **kwargs) + else: + return self.forward_test(img, img_meta, **kwargs) diff --git a/mmdet/models/detectors/detector.py b/mmdet/models/detectors/detector.py index 80b7d4438cb..363131e8ece 100644 --- a/mmdet/models/detectors/detector.py +++ b/mmdet/models/detectors/detector.py @@ -8,6 +8,7 @@ class Detector(nn.Module): + def __init__(self, backbone, neck=None, diff --git a/mmdet/models/detectors/faster_rcnn.py b/mmdet/models/detectors/faster_rcnn.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/mmdet/models/detectors/mask_rcnn.py b/mmdet/models/detectors/mask_rcnn.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/mmdet/models/detectors/rpn.py b/mmdet/models/detectors/rpn.py new file mode 100644 index 00000000000..8d3dfd17c6c --- /dev/null +++ b/mmdet/models/detectors/rpn.py @@ -0,0 +1,86 @@ +import mmcv + +from mmdet.core import tensor2imgs, bbox_mapping +from .base import BaseDetector +from .testing_mixins import RPNTestMixin +from .. import builder + + +class RPN(BaseDetector, RPNTestMixin): + + def __init__(self, + backbone, + neck, + rpn_head, + train_cfg, + test_cfg, + pretrained=None): + super(RPN, self).__init__() + self.backbone = builder.build_backbone(backbone) + self.neck = builder.build_neck(neck) if neck is not None else None + self.rpn_head = builder.build_rpn_head(rpn_head) + self.train_cfg = train_cfg + self.test_cfg = test_cfg + self.init_weights(pretrained=pretrained) + + def init_weights(self, pretrained=None): + if pretrained is not None: + print('load model from: {}'.format(pretrained)) + self.backbone.init_weights(pretrained=pretrained) + if self.neck is not None: + self.neck.init_weights() + self.rpn_head.init_weights() + + def extract_feat(self, img): + x = self.backbone(img) + if self.neck is not None: + x = self.neck(x) + return x + + def forward_train(self, img, img_meta, gt_bboxes=None): + if self.train_cfg.rpn.get('debug', False): + self.rpn_head.debug_imgs = tensor2imgs(img) + + x = self.extract_feat(img) + rpn_outs = self.rpn_head(x) + + rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta, self.train_cfg.rpn) + losses = self.rpn_head.loss(*rpn_loss_inputs) + return losses + + def simple_test(self, img, img_meta, rescale=False): + x = self.extract_feat(img) + proposal_list = self.simple_test_rpn(x, img_meta, self.test_cfg.rpn) + if rescale: + for proposals, meta in zip(proposal_list, img_meta): + proposals[:, :4] /= meta['scale_factor'] + # TODO: remove this restriction + return proposal_list[0].cpu().numpy() + + def aug_test(self, imgs, img_metas, rescale=False): + proposal_list = self.aug_test_rpn( + self.extract_feats(imgs), img_metas, self.test_cfg.rpn) + if not rescale: + for proposals, img_meta in zip(proposal_list, img_metas[0]): + img_shape = img_meta['img_shape'] + scale_factor = img_meta['scale_factor'] + flip = img_meta['flip'] + proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape, + scale_factor, flip) + # TODO: remove this restriction + return proposal_list[0].cpu().numpy() + + def show_result(self, data, result, img_norm_cfg): + """Show RPN proposals on the image. + + Although we assume batch size is 1, this method supports arbitrary + batch size. + """ + img_tensor = data['img'][0] + img_metas = data['img_meta'][0].data[0] + imgs = tensor2imgs(img_tensor, **img_norm_cfg) + assert len(imgs) == len(img_metas) + for img, img_meta in zip(imgs, img_metas): + h, w, _ = img_meta['img_shape'] + img_show = img[:h, :w, :] + mmcv.imshow_bboxes(img_show, result, top_k=20) diff --git a/mmdet/models/detectors/testing_mixins.py b/mmdet/models/detectors/testing_mixins.py new file mode 100644 index 00000000000..364fd4e6d1a --- /dev/null +++ b/mmdet/models/detectors/testing_mixins.py @@ -0,0 +1,146 @@ +from mmdet.core import (bbox2roi, bbox_mapping, merge_aug_proposals, + merge_aug_bboxes, merge_aug_masks, multiclass_nms) + + +class RPNTestMixin(object): + + def simple_test_rpn(self, x, img_meta, rpn_test_cfg): + rpn_outs = self.rpn_head(x) + proposal_inputs = rpn_outs + (img_meta, rpn_test_cfg) + proposal_list = self.rpn_head.get_proposals(*proposal_inputs) + return proposal_list + + def aug_test_rpn(self, feats, img_metas, rpn_test_cfg): + imgs_per_gpu = len(img_metas[0]) + aug_proposals = [[] for _ in range(imgs_per_gpu)] + for x, img_meta in zip(feats, img_metas): + proposal_list = self.simple_test_rpn(x, img_meta, rpn_test_cfg) + for i, proposals in enumerate(proposal_list): + aug_proposals[i].append(proposals) + # after merging, proposals will be rescaled to the original image size + merged_proposals = [ + merge_aug_proposals(proposals, img_meta, rpn_test_cfg) + for proposals, img_meta in zip(aug_proposals, img_metas) + ] + return merged_proposals + + +class BBoxTestMixin(object): + + def simple_test_bboxes(self, + x, + img_meta, + proposals, + rcnn_test_cfg, + rescale=False): + """Test only det bboxes without augmentation.""" + rois = bbox2roi(proposals) + roi_feats = self.bbox_roi_extractor( + x[:len(self.bbox_roi_extractor.featmap_strides)], rois) + cls_score, bbox_pred = self.bbox_head(roi_feats) + img_shape = img_meta[0]['img_shape'] + scale_factor = img_meta[0]['scale_factor'] + det_bboxes, det_labels = self.bbox_head.get_det_bboxes( + rois, + cls_score, + bbox_pred, + img_shape, + scale_factor, + rescale=rescale, + nms_cfg=rcnn_test_cfg) + return det_bboxes, det_labels + + def aug_test_bboxes(self, feats, img_metas, proposals, rcnn_test_cfg): + aug_bboxes = [] + aug_scores = [] + for x, img_meta in zip(feats, img_metas): + # only one image in the batch + img_shape = img_meta[0]['img_shape'] + scale_factor = img_meta[0]['scale_factor'] + flip = img_meta[0]['flip'] + proposals = bbox_mapping(proposals[:, :4], img_shape, scale_factor, + flip) + rois = bbox2roi([proposals]) + # recompute feature maps to save GPU memory + roi_feats = self.bbox_roi_extractor( + x[:len(self.bbox_roi_extractor.featmap_strides)], rois) + cls_score, bbox_pred = self.bbox_head(roi_feats) + bboxes, scores = self.bbox_head.get_det_bboxes( + rois, + cls_score, + bbox_pred, + img_shape, + rescale=False, + nms_cfg=None) + aug_bboxes.append(bboxes) + aug_scores.append(scores) + # after merging, bboxes will be rescaled to the original image size + merged_bboxes, merged_scores = merge_aug_bboxes( + aug_bboxes, aug_scores, img_metas, self.rcnn_test_cfg) + det_bboxes, det_labels = multiclass_nms( + merged_bboxes, merged_scores, self.rcnn_test_cfg.score_thr, + self.rcnn_test_cfg.nms_thr, self.rcnn_test_cfg.max_per_img) + return det_bboxes, det_labels + + +class MaskTestMixin(object): + + def simple_test_mask(self, + x, + img_meta, + det_bboxes, + det_labels, + rescale=False): + # image shape of the first image in the batch (only one) + img_shape = img_meta[0]['img_shape'] + scale_factor = img_meta[0]['scale_factor'] + if det_bboxes.shape[0] == 0: + segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] + else: + # if det_bboxes is rescaled to the original image size, we need to + # rescale it back to the testing scale to obtain RoIs. + _bboxes = (det_bboxes[:, :4] * scale_factor + if rescale else det_bboxes) + mask_rois = bbox2roi([_bboxes]) + mask_feats = self.mask_roi_extractor( + x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois) + mask_pred = self.mask_head(mask_feats) + segm_result = self.mask_head.get_seg_masks( + mask_pred, det_bboxes, det_labels, img_shape, + self.rcnn_test_cfg, rescale) + return segm_result + + def aug_test_mask(self, + feats, + img_metas, + det_bboxes, + det_labels, + rescale=False): + if rescale: + _det_bboxes = det_bboxes + else: + _det_bboxes = det_bboxes.clone() + _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor'] + if det_bboxes.shape[0] == 0: + segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] + else: + aug_masks = [] + for x, img_meta in zip(feats, img_metas): + img_shape = img_meta[0]['img_shape'] + scale_factor = img_meta[0]['scale_factor'] + flip = img_meta[0]['flip'] + _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, + scale_factor, flip) + mask_rois = bbox2roi([_bboxes]) + mask_feats = self.mask_roi_extractor( + x[:len(self.mask_roi_extractor.featmap_strides)], + mask_rois) + mask_pred = self.mask_head(mask_feats) + # convert to numpy array to save memory + aug_masks.append(mask_pred.sigmoid().cpu().numpy()) + merged_masks = merge_aug_masks(aug_masks, img_metas, + self.rcnn_test_cfg) + segm_result = self.mask_head.get_seg_masks( + merged_masks, _det_bboxes, det_labels, + img_metas[0]['shape_scale'][0], self.rcnn_test_cfg, rescale) + return segm_result diff --git a/mmdet/models/detectors/two_stage.py b/mmdet/models/detectors/two_stage.py new file mode 100644 index 00000000000..db497fd6986 --- /dev/null +++ b/mmdet/models/detectors/two_stage.py @@ -0,0 +1,149 @@ +import torch +import torch.nn as nn + +from .base import Detector +from .testing_mixins import RPNTestMixin, BBoxTestMixin +from .. import builder +from mmdet.core import bbox2roi, bbox2result, sample_proposals + + +class TwoStageDetector(Detector, RPNTestMixin, BBoxTestMixin): + + def __init__(self, + backbone, + neck=None, + rpn_head=None, + bbox_roi_extractor=None, + bbox_head=None, + train_cfg=None, + test_cfg=None, + pretrained=None): + super(Detector, self).__init__() + self.backbone = builder.build_backbone(backbone) + + self.with_neck = True if neck is not None else False + if self.with_neck: + self.neck = builder.build_neck(neck) + + self.with_rpn = True if rpn_head is not None else False + if self.with_rpn: + self.rpn_head = builder.build_rpn_head(rpn_head) + + self.with_bbox = True if bbox_head is not None else False + if self.with_bbox: + self.bbox_roi_extractor = builder.build_roi_extractor( + bbox_roi_extractor) + self.bbox_head = builder.build_bbox_head(bbox_head) + + self.train_cfg = train_cfg + self.test_cfg = test_cfg + + self.init_weights(pretrained=pretrained) + + def init_weights(self, pretrained=None): + if pretrained is not None: + print('load model from: {}'.format(pretrained)) + self.backbone.init_weights(pretrained=pretrained) + if self.with_neck: + if isinstance(self.neck, nn.Sequential): + for m in self.neck: + m.init_weights() + else: + self.neck.init_weights() + if self.with_rpn: + self.rpn_head.init_weights() + if self.with_bbox: + self.bbox_roi_extractor.init_weights() + self.bbox_head.init_weights() + + def extract_feat(self, img): + x = self.backbone(img) + if self.with_neck: + x = self.neck(x) + return x + + def forward_train(self, + img, + img_meta, + gt_bboxes, + gt_bboxes_ignore, + gt_labels, + proposals=None): + losses = dict() + + x = self.extract_feat(img) + + if self.with_rpn: + rpn_outs = self.rpn_head(x) + rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta, + self.train_cfg.rpn) + rpn_losses = self.rpn_head.loss(*rpn_loss_inputs) + losses.update(rpn_losses) + + proposal_inputs = rpn_outs + (img_meta, self.self.test_cfg.rpn) + proposal_list = self.rpn_head.get_proposals(*proposal_inputs) + + else: + proposal_list = proposals + + (pos_inds, neg_inds, pos_proposals, neg_proposals, + pos_assigned_gt_inds, + pos_gt_bboxes, pos_gt_labels) = sample_proposals( + proposal_list, gt_bboxes, gt_bboxes_ignore, gt_labels, + self.train_cfg.rcnn) + + labels, label_weights, bbox_targets, bbox_weights = \ + self.bbox_head.get_bbox_target( + pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels, + self.train_cfg.rcnn) + + rois = bbox2roi([ + torch.cat([pos, neg], dim=0) + for pos, neg in zip(pos_proposals, neg_proposals) + ]) + # TODO: a more flexible way to configurate feat maps + roi_feats = self.bbox_roi_extractor( + x[:self.bbox_roi_extractor.num_inputs], rois) + cls_score, bbox_pred = self.bbox_head(roi_feats) + + loss_bbox = self.bbox_head.loss(cls_score, bbox_pred, labels, + label_weights, bbox_targets, + bbox_weights) + losses.update(loss_bbox) + + return losses + + def simple_test(self, img, img_meta, proposals=None, rescale=False): + """Test without augmentation.""" + x = self.extract_feat(img) + if proposals is None: + proposals = self.simple_test_rpn(x, img_meta) + if self.with_bbox: + # BUG proposals shape? + det_bboxes, det_labels = self.simple_test_bboxes( + x, img_meta, [proposals], rescale=rescale) + bbox_result = bbox2result(det_bboxes, det_labels, + self.bbox_head.num_classes) + return bbox_result + else: + proposals[:, :4] /= img_meta['scale_factor'].float() + return proposals.cpu().numpy() + + def aug_test(self, imgs, img_metas, rescale=False): + """Test with augmentations. + + If rescale is False, then returned bboxes and masks will fit the scale + of imgs[0]. + """ + proposals = self.aug_test_rpn( + self.extract_feats(imgs), img_metas, self.rpn_test_cfg) + det_bboxes, det_labels = self.aug_test_bboxes( + self.extract_feats(imgs), img_metas, proposals, self.rcnn_test_cfg) + if rescale: + _det_bboxes = det_bboxes + else: + _det_bboxes = det_bboxes.clone() + _det_bboxes[:, :4] *= img_metas[0]['shape_scale'][0][-1] + bbox_result = bbox2result(_det_bboxes, det_labels, + self.bbox_head.num_classes) + return bbox_result diff --git a/mmdet/models/necks/fpn.py b/mmdet/models/necks/fpn.py index 8b5b49826ba..b4e21864bff 100644 --- a/mmdet/models/necks/fpn.py +++ b/mmdet/models/necks/fpn.py @@ -101,7 +101,7 @@ def forward(self, inputs): # build top-down path used_backbone_levels = len(laterals) for i in range(used_backbone_levels - 1, 0, -1): - laterals[i - 1] += F.upsample( + laterals[i - 1] += F.interpolate( laterals[i], scale_factor=2, mode='nearest') # build outputs diff --git a/mmdet/models/rpn_heads/rpn_head.py b/mmdet/models/rpn_heads/rpn_head.py index 7ffd441f694..e81f19310e8 100644 --- a/mmdet/models/rpn_heads/rpn_head.py +++ b/mmdet/models/rpn_heads/rpn_head.py @@ -9,8 +9,7 @@ weighted_cross_entropy, weighted_smoothl1, weighted_binary_cross_entropy) from mmdet.ops import nms -from ..utils import multi_apply -from ..utils import normal_init +from ..utils import multi_apply, normal_init class RPNHead(nn.Module): @@ -66,14 +65,14 @@ def forward_single(self, x): def forward(self, feats): return multi_apply(self.forward_single, feats) - def get_anchors(self, featmap_sizes, img_shapes): + def get_anchors(self, featmap_sizes, img_metas): """Get anchors given a list of feature map sizes, and get valid flags at the same time. (Extra padding regions should be marked as invalid) """ # calculate actual image shapes padded_img_shapes = [] - for img_shape in img_shapes: - h, w = img_shape[:2] + for img_meta in img_metas: + h, w = img_meta['img_shape'][:2] padded_h = int( np.ceil(h / self.coarsest_stride) * self.coarsest_stride) padded_w = int( @@ -83,7 +82,7 @@ def get_anchors(self, featmap_sizes, img_shapes): # len = feature levels anchor_list = [] # len = imgs per gpu - valid_flag_list = [[] for _ in range(len(img_shapes))] + valid_flag_list = [[] for _ in range(len(img_metas))] for i in range(len(featmap_sizes)): anchor_stride = self.anchor_strides[i] anchors = self.anchor_generators[i].grid_anchors( @@ -103,26 +102,22 @@ def get_anchors(self, featmap_sizes, img_shapes): def loss_single(self, rpn_cls_score, rpn_bbox_pred, labels, label_weights, bbox_targets, bbox_weights, num_total_samples, cfg): + # classification loss labels = labels.contiguous().view(-1) label_weights = label_weights.contiguous().view(-1) - bbox_targets = bbox_targets.contiguous().view(-1, 4) - bbox_weights = bbox_weights.contiguous().view(-1, 4) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.permute(0, 2, 3, 1).contiguous().view(-1) - loss_cls = weighted_binary_cross_entropy( - rpn_cls_score, - labels, - label_weights, - ave_factor=num_total_samples) + criterion = weighted_binary_cross_entropy else: rpn_cls_score = rpn_cls_score.permute(0, 2, 3, 1).contiguous().view(-1, 2) - loss_cls = weighted_cross_entropy( - rpn_cls_score, - labels, - label_weights, - ave_factor=num_total_samples) + criterion = weighted_cross_entropy + loss_cls = criterion( + rpn_cls_score, labels, label_weights, avg_factor=num_total_samples) + # regression loss + bbox_targets = bbox_targets.contiguous().view(-1, 4) + bbox_weights = bbox_weights.contiguous().view(-1, 4) rpn_bbox_pred = rpn_bbox_pred.permute(0, 2, 3, 1).contiguous().view( -1, 4) loss_reg = weighted_smoothl1( @@ -130,7 +125,7 @@ def loss_single(self, rpn_cls_score, rpn_bbox_pred, labels, label_weights, bbox_targets, bbox_weights, beta=cfg.smoothl1_beta, - ave_factor=num_total_samples) + avg_factor=num_total_samples) return loss_cls, loss_reg def loss(self, rpn_cls_scores, rpn_bbox_preds, gt_bboxes, img_shapes, cfg): @@ -158,8 +153,8 @@ def loss(self, rpn_cls_scores, rpn_bbox_preds, gt_bboxes, img_shapes, cfg): cfg=cfg) return dict(loss_rpn_cls=losses_cls, loss_rpn_reg=losses_reg) - def get_proposals(self, rpn_cls_scores, rpn_bbox_preds, img_shapes, cfg): - img_per_gpu = len(img_shapes) + def get_proposals(self, rpn_cls_scores, rpn_bbox_preds, img_meta, cfg): + num_imgs = len(img_meta) featmap_sizes = [featmap.size()[-2:] for featmap in rpn_cls_scores] mlvl_anchors = [ self.anchor_generators[idx].grid_anchors(featmap_sizes[idx], @@ -167,7 +162,7 @@ def get_proposals(self, rpn_cls_scores, rpn_bbox_preds, img_shapes, cfg): for idx in range(len(featmap_sizes)) ] proposal_list = [] - for img_id in range(img_per_gpu): + for img_id in range(num_imgs): rpn_cls_score_list = [ rpn_cls_scores[idx][img_id].detach() for idx in range(len(rpn_cls_scores)) @@ -177,10 +172,9 @@ def get_proposals(self, rpn_cls_scores, rpn_bbox_preds, img_shapes, cfg): for idx in range(len(rpn_bbox_preds)) ] assert len(rpn_cls_score_list) == len(rpn_bbox_pred_list) - img_shape = img_shapes[img_id] proposals = self._get_proposals_single( rpn_cls_score_list, rpn_bbox_pred_list, mlvl_anchors, - img_shape, cfg) + img_meta[img_id]['img_shape'], cfg) proposal_list.append(proposals) return proposal_list @@ -195,7 +189,7 @@ def _get_proposals_single(self, rpn_cls_scores, rpn_bbox_preds, if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.permute(1, 2, 0).contiguous().view(-1) - rpn_cls_prob = F.sigmoid(rpn_cls_score) + rpn_cls_prob = rpn_cls_score.sigmoid() scores = rpn_cls_prob else: rpn_cls_score = rpn_cls_score.permute(1, 2, From a32b43b248dc826fb37d4d318ee5e48fab99be7a Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Sun, 23 Sep 2018 22:59:12 +0800 Subject: [PATCH 07/81] bug fix and cleaning --- .gitignore | 3 +- mmdet/core/__init__.py | 2 - mmdet/core/bbox_ops/sampling.py | 9 +- mmdet/core/bbox_ops/transforms.py | 8 +- mmdet/core/eval/__init__.py | 3 +- mmdet/core/eval/coco_utils.py | 26 ++ mmdet/core/losses/losses.py | 71 ++--- mmdet/core/mask_ops/utils.py | 4 - mmdet/core/post_processing/merge_augs.py | 18 +- mmdet/core/rpn_ops/anchor_generator.py | 5 +- mmdet/core/rpn_ops/anchor_target.py | 33 ++- mmdet/core/test_engine.py | 14 - mmdet/core/train_engine.py | 40 --- mmdet/core/utils/dist_utils.py | 13 +- mmdet/core/utils/hooks.py | 2 +- mmdet/core/utils/misc.py | 22 +- mmdet/nn/parallel/scatter_gather.py | 18 +- setup.py | 2 +- tools/coco_eval.py | 23 ++ .../{examples => configs}/r50_fpn_frcnn_1x.py | 93 +++--- .../r50_fpn_maskrcnn_1x.py | 87 +++--- tools/{examples => configs}/r50_fpn_rpn_1x.py | 80 +++--- tools/dist_train.sh | 9 + tools/eval.py | 265 ------------------ tools/test.py | 103 ++++--- tools/train.py | 121 +++++--- 26 files changed, 456 insertions(+), 618 deletions(-) create mode 100644 mmdet/core/eval/coco_utils.py delete mode 100644 mmdet/core/test_engine.py delete mode 100644 mmdet/core/train_engine.py create mode 100644 tools/coco_eval.py rename tools/{examples => configs}/r50_fpn_frcnn_1x.py (64%) rename tools/{examples => configs}/r50_fpn_maskrcnn_1x.py (72%) rename tools/{examples => configs}/r50_fpn_rpn_1x.py (54%) create mode 100755 tools/dist_train.sh delete mode 100644 tools/eval.py diff --git a/.gitignore b/.gitignore index ffbae97a51e..d5ef5f5a1a6 100644 --- a/.gitignore +++ b/.gitignore @@ -104,4 +104,5 @@ venv.bak/ .mypy_cache/ # cython generated cpp -mmdet/ops/nms/*.cpp \ No newline at end of file +mmdet/ops/nms/*.cpp +data diff --git a/mmdet/core/__init__.py b/mmdet/core/__init__.py index 52ed690e668..0327750379f 100644 --- a/mmdet/core/__init__.py +++ b/mmdet/core/__init__.py @@ -1,5 +1,3 @@ -from .train_engine import * -from .test_engine import * from .rpn_ops import * from .bbox_ops import * from .mask_ops import * diff --git a/mmdet/core/bbox_ops/sampling.py b/mmdet/core/bbox_ops/sampling.py index eed82049640..d751f8ede43 100644 --- a/mmdet/core/bbox_ops/sampling.py +++ b/mmdet/core/bbox_ops/sampling.py @@ -244,6 +244,7 @@ def bbox_sampling(assigned_gt_inds, num_expected_pos = int(num_expected * pos_fraction) pos_inds = sample_positives(assigned_gt_inds, num_expected_pos, pos_balance_sampling) + pos_inds = pos_inds.unique() num_sampled_pos = pos_inds.numel() num_neg_max = int( neg_pos_ub * @@ -252,10 +253,10 @@ def bbox_sampling(assigned_gt_inds, neg_inds = sample_negatives(assigned_gt_inds, num_expected_neg, max_overlaps, neg_balance_thr, neg_hard_fraction) + neg_inds = neg_inds.unique() return pos_inds, neg_inds - def sample_proposals(proposals_list, gt_bboxes_list, gt_crowds_list, gt_labels_list, cfg): cfg_list = [cfg for _ in range(len(proposals_list))] @@ -265,11 +266,7 @@ def sample_proposals(proposals_list, gt_bboxes_list, gt_crowds_list, return tuple(map(list, zip(*results))) -def sample_proposals_single(proposals, - gt_bboxes, - gt_crowds, - gt_labels, - cfg): +def sample_proposals_single(proposals, gt_bboxes, gt_crowds, gt_labels, cfg): proposals = proposals[:, :4] assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps = \ bbox_assign( diff --git a/mmdet/core/bbox_ops/transforms.py b/mmdet/core/bbox_ops/transforms.py index a9f1e2a45fa..ca45d157dce 100644 --- a/mmdet/core/bbox_ops/transforms.py +++ b/mmdet/core/bbox_ops/transforms.py @@ -84,18 +84,18 @@ def bbox_flip(bboxes, img_shape): return mmcv.bbox_flip(bboxes, img_shape) -def bbox_mapping(bboxes, img_shape, flip): +def bbox_mapping(bboxes, img_shape, scale_factor, flip): """Map bboxes from the original image scale to testing scale""" - new_bboxes = bboxes * img_shape[-1] + new_bboxes = bboxes * scale_factor if flip: new_bboxes = bbox_flip(new_bboxes, img_shape) return new_bboxes -def bbox_mapping_back(bboxes, img_shape, flip): +def bbox_mapping_back(bboxes, img_shape, scale_factor, flip): """Map bboxes from testing scale to original image scale""" new_bboxes = bbox_flip(bboxes, img_shape) if flip else bboxes - new_bboxes = new_bboxes / img_shape[-1] + new_bboxes = new_bboxes / scale_factor return new_bboxes diff --git a/mmdet/core/eval/__init__.py b/mmdet/core/eval/__init__.py index fe4893a0af6..c46d860d4b1 100644 --- a/mmdet/core/eval/__init__.py +++ b/mmdet/core/eval/__init__.py @@ -1,6 +1,7 @@ from .class_names import (voc_classes, imagenet_det_classes, imagenet_vid_classes, coco_classes, dataset_aliases, get_classes) +from .coco_utils import coco_eval from .mean_ap import average_precision, eval_map, print_map_summary from .recall import (eval_recalls, print_recall_summary, plot_num_recall, plot_iou_recall) @@ -9,5 +10,5 @@ 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', 'coco_classes', 'dataset_aliases', 'get_classes', 'average_precision', 'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary', - 'plot_num_recall', 'plot_iou_recall' + 'plot_num_recall', 'plot_iou_recall', 'coco_eval' ] diff --git a/mmdet/core/eval/coco_utils.py b/mmdet/core/eval/coco_utils.py new file mode 100644 index 00000000000..cff6f678e4f --- /dev/null +++ b/mmdet/core/eval/coco_utils.py @@ -0,0 +1,26 @@ +import mmcv +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval + + +def coco_eval(result_file, result_types, coco, max_dets=(100, 300, 1000)): + assert result_file.endswith('.json') + for res_type in result_types: + assert res_type in ['proposal', 'bbox', 'segm', 'keypoints'] + + if mmcv.is_str(coco): + coco = COCO(coco) + assert isinstance(coco, COCO) + + coco_dets = coco.loadRes(result_file) + img_ids = coco.getImgIds() + for res_type in result_types: + iou_type = 'bbox' if res_type == 'proposal' else res_type + cocoEval = COCOeval(coco, coco_dets, iou_type) + cocoEval.params.imgIds = img_ids + if res_type == 'proposal': + cocoEval.params.useCats = 0 + cocoEval.params.maxDets = list(max_dets) + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() diff --git a/mmdet/core/losses/losses.py b/mmdet/core/losses/losses.py index 575c91d0536..ce3f963ca71 100644 --- a/mmdet/core/losses/losses.py +++ b/mmdet/core/losses/losses.py @@ -1,30 +1,28 @@ -# TODO merge naive and weighted loss to one function. +# TODO merge naive and weighted loss. import torch import torch.nn.functional as F -from ..bbox_ops import bbox_transform_inv, bbox_overlaps +def weighted_nll_loss(pred, label, weight, avg_factor=None): + if avg_factor is None: + avg_factor = max(torch.sum(weight > 0).float().item(), 1.) + raw = F.nll_loss(pred, label, reduction='none') + return torch.sum(raw * weight)[None] / avg_factor -def weighted_nll_loss(pred, label, weight, ave_factor=None): - if ave_factor is None: - ave_factor = max(torch.sum(weight > 0).float().item(), 1.) - raw = F.nll_loss(pred, label, size_average=False, reduce=False) - return torch.sum(raw * weight)[None] / ave_factor +def weighted_cross_entropy(pred, label, weight, avg_factor=None): + if avg_factor is None: + avg_factor = max(torch.sum(weight > 0).float().item(), 1.) + raw = F.cross_entropy(pred, label, reduction='none') + return torch.sum(raw * weight)[None] / avg_factor -def weighted_cross_entropy(pred, label, weight, ave_factor=None): - if ave_factor is None: - ave_factor = max(torch.sum(weight > 0).float().item(), 1.) - raw = F.cross_entropy(pred, label, size_average=False, reduce=False) - return torch.sum(raw * weight)[None] / ave_factor - -def weighted_binary_cross_entropy(pred, label, weight, ave_factor=None): - if ave_factor is None: - ave_factor = max(torch.sum(weight > 0).float().item(), 1.) +def weighted_binary_cross_entropy(pred, label, weight, avg_factor=None): + if avg_factor is None: + avg_factor = max(torch.sum(weight > 0).float().item(), 1.) return F.binary_cross_entropy_with_logits( pred, label.float(), weight.float(), - size_average=False)[None] / ave_factor + reduction='sum')[None] / avg_factor def sigmoid_focal_loss(pred, @@ -46,13 +44,13 @@ def weighted_sigmoid_focal_loss(pred, weight, gamma=2.0, alpha=0.25, - ave_factor=None, + avg_factor=None, num_classes=80): - if ave_factor is None: - ave_factor = torch.sum(weight > 0).float().item() / num_classes + 1e-6 + if avg_factor is None: + avg_factor = torch.sum(weight > 0).float().item() / num_classes + 1e-6 return sigmoid_focal_loss( pred, target, weight, gamma=gamma, alpha=alpha, - size_average=False)[None] / ave_factor + reduction='sum')[None] / avg_factor def mask_cross_entropy(pred, target, label): @@ -60,7 +58,7 @@ def mask_cross_entropy(pred, target, label): inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) pred_slice = pred[inds, label].squeeze(1) return F.binary_cross_entropy_with_logits( - pred_slice, target, size_average=True)[None] + pred_slice, target, reduction='sum')[None] def weighted_mask_cross_entropy(pred, target, weight, label): @@ -73,24 +71,27 @@ def weighted_mask_cross_entropy(pred, target, weight, label): pred_slice, target, weight, size_average=False)[None] / num_samples -def smooth_l1_loss(pred, target, beta=1.0, size_average=True, reduce=True): +def smooth_l1_loss(pred, target, beta=1.0, reduction='elementwise_mean'): assert beta > 0 assert pred.size() == target.size() and target.numel() > 0 diff = torch.abs(pred - target) loss = torch.where(diff < beta, 0.5 * diff * diff / beta, diff - 0.5 * beta) - if size_average: - loss /= pred.numel() - if reduce: - loss = loss.sum() - return loss - - -def weighted_smoothl1(pred, target, weight, beta=1.0, ave_factor=None): - if ave_factor is None: - ave_factor = torch.sum(weight > 0).float().item() / 4 + 1e-6 - loss = smooth_l1_loss(pred, target, beta, size_average=False, reduce=False) - return torch.sum(loss * weight)[None] / ave_factor + reduction = F._Reduction.get_enum(reduction) + # none: 0, elementwise_mean:1, sum: 2 + if reduction == 0: + return loss + elif reduction == 1: + return loss.sum() / pred.numel() + elif reduction == 2: + return loss.sum() + + +def weighted_smoothl1(pred, target, weight, beta=1.0, avg_factor=None): + if avg_factor is None: + avg_factor = torch.sum(weight > 0).float().item() / 4 + 1e-6 + loss = smooth_l1_loss(pred, target, beta, reduction='none') + return torch.sum(loss * weight)[None] / avg_factor def accuracy(pred, target, topk=1): diff --git a/mmdet/core/mask_ops/utils.py b/mmdet/core/mask_ops/utils.py index 2802430007e..4da4a8369b1 100644 --- a/mmdet/core/mask_ops/utils.py +++ b/mmdet/core/mask_ops/utils.py @@ -1,7 +1,3 @@ -import cvbase as cvb -import numpy as np -import pycocotools.mask as mask_utils - import mmcv diff --git a/mmdet/core/post_processing/merge_augs.py b/mmdet/core/post_processing/merge_augs.py index 35dfce24f91..0472aaf80fd 100644 --- a/mmdet/core/post_processing/merge_augs.py +++ b/mmdet/core/post_processing/merge_augs.py @@ -1,8 +1,8 @@ import torch -from mmdet.ops import nms import numpy as np +from mmdet.ops import nms from ..bbox_ops import bbox_mapping_back @@ -21,11 +21,12 @@ def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg): """ recovered_proposals = [] for proposals, img_info in zip(aug_proposals, img_metas): - shape_scale = img_info['shape_scale'][0] - flip = img_info['flip'][0] + img_shape = img_info['img_shape'] + scale_factor = img_info['scale_factor'] + flip = img_info['flip'] _proposals = proposals.clone() - _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], shape_scale, - flip) + _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape, + scale_factor, flip) recovered_proposals.append(_proposals) aug_proposals = torch.cat(recovered_proposals, dim=0) nms_keep = nms(aug_proposals, rpn_test_cfg.nms_thr, @@ -53,9 +54,10 @@ def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg): """ recovered_bboxes = [] for bboxes, img_info in zip(aug_bboxes, img_metas): - shape_scale = img_info['shape_scale'][0] - flip = img_info['flip'][0] - bboxes = bbox_mapping_back(bboxes, shape_scale, flip) + img_shape = img_info['img_shape'] + scale_factor = img_info['scale_factor'] + flip = img_info['flip'] + bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip) recovered_bboxes.append(bboxes) bboxes = torch.stack(recovered_bboxes).mean(dim=0) if aug_scores is None: diff --git a/mmdet/core/rpn_ops/anchor_generator.py b/mmdet/core/rpn_ops/anchor_generator.py index e7a1fa256fb..84600be331e 100644 --- a/mmdet/core/rpn_ops/anchor_generator.py +++ b/mmdet/core/rpn_ops/anchor_generator.py @@ -50,15 +50,18 @@ def _meshgrid(self, x, y, row_major=True): return yy, xx def grid_anchors(self, featmap_size, stride=16, device='cuda'): + base_anchors = self.base_anchors.to(device) + feat_h, feat_w = featmap_size shift_x = torch.arange(0, feat_w, device=device) * stride shift_y = torch.arange(0, feat_h, device=device) * stride shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1) + shifts = shifts.type_as(base_anchors) # first feat_w elements correspond to the first row of shifts # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get # shifted anchors (K, A, 4), reshape to (K*A, 4) - base_anchors = self.base_anchors.to(device) + all_anchors = base_anchors[None, :, :] + shifts[:, None, :] all_anchors = all_anchors.view(-1, 4) # first A rows correspond to A anchors of (0, 0) in feature map, diff --git a/mmdet/core/rpn_ops/anchor_target.py b/mmdet/core/rpn_ops/anchor_target.py index a6bba8ed221..6062633c0c2 100644 --- a/mmdet/core/rpn_ops/anchor_target.py +++ b/mmdet/core/rpn_ops/anchor_target.py @@ -4,12 +4,14 @@ def anchor_target(anchor_list, valid_flag_list, featmap_sizes, gt_bboxes_list, - img_shapes, target_means, target_stds, cfg): - """Compute anchor regression and classification targets + img_metas, target_means, target_stds, cfg): + """Compute regression and classification targets for anchors. + + There may be multiple feature levels, Args: anchor_list(list): anchors of each feature map level - featuremap_sizes(list): feature map sizes + featmap_sizes(list): feature map sizes gt_bboxes_list(list): ground truth bbox of images in a mini-batch img_shapes(list): shape of each image in a mini-batch cfg(dict): configs @@ -17,15 +19,16 @@ def anchor_target(anchor_list, valid_flag_list, featmap_sizes, gt_bboxes_list, Returns: tuple """ - if len(featmap_sizes) == len(anchor_list): + num_imgs = len(img_metas) + num_levels = len(featmap_sizes) + if len(anchor_list) == num_levels: all_anchors = torch.cat(anchor_list, 0) anchor_nums = [anchors.size(0) for anchors in anchor_list] use_isomerism_anchors = False - elif len(img_shapes) == len(anchor_list): + elif len(anchor_list) == num_imgs: # using different anchors for different images all_anchors_list = [ - torch.cat(anchor_list[img_id], 0) - for img_id in range(len(img_shapes)) + torch.cat(anchor_list[img_id], 0) for img_id in range(num_imgs) ] anchor_nums = [anchors.size(0) for anchors in anchor_list[0]] use_isomerism_anchors = True @@ -37,7 +40,7 @@ def anchor_target(anchor_list, valid_flag_list, featmap_sizes, gt_bboxes_list, all_bbox_targets = [] all_bbox_weights = [] num_total_sampled = 0 - for img_id in range(len(img_shapes)): + for img_id in range(num_imgs): if isinstance(valid_flag_list[img_id], list): valid_flags = torch.cat(valid_flag_list[img_id], 0) else: @@ -45,7 +48,7 @@ def anchor_target(anchor_list, valid_flag_list, featmap_sizes, gt_bboxes_list, if use_isomerism_anchors: all_anchors = all_anchors_list[img_id] inside_flags = anchor_inside_flags(all_anchors, valid_flags, - img_shapes[img_id][:2], + img_metas[img_id]['img_shape'][:2], cfg.allowed_border) if not inside_flags.any(): return None @@ -83,7 +86,7 @@ def anchor_target(anchor_list, valid_flag_list, featmap_sizes, gt_bboxes_list, def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means, target_stds, cfg): - num_total_anchors = all_anchors.size(0) + # assign gt and sample anchors anchors = all_anchors[inside_flags, :] assigned_gt_inds, argmax_overlaps, max_overlaps = bbox_assign( anchors, @@ -99,10 +102,9 @@ def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means, bbox_targets = torch.zeros_like(anchors) bbox_weights = torch.zeros_like(anchors) labels = torch.zeros_like(assigned_gt_inds) - label_weights = torch.zeros_like(assigned_gt_inds, dtype=torch.float) + label_weights = torch.zeros_like(assigned_gt_inds, dtype=anchors.dtype) if len(pos_inds) > 0: - pos_inds = unique(pos_inds) pos_anchors = anchors[pos_inds, :] pos_gt_bbox = gt_bboxes[assigned_gt_inds[pos_inds] - 1, :] pos_bbox_targets = bbox_transform(pos_anchors, pos_gt_bbox, @@ -115,10 +117,10 @@ def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means, else: label_weights[pos_inds] = cfg.pos_weight if len(neg_inds) > 0: - neg_inds = unique(neg_inds) label_weights[neg_inds] = 1.0 # map up to original set of anchors + num_total_anchors = all_anchors.size(0) labels = unmap(labels, num_total_anchors, inside_flags) label_weights = unmap(label_weights, num_total_anchors, inside_flags) bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) @@ -127,8 +129,9 @@ def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means, return (labels, label_weights, bbox_targets, bbox_weights, pos_inds, neg_inds) + def anchor_inside_flags(all_anchors, valid_flags, img_shape, allowed_border=0): - img_h, img_w = img_shape.float() + img_h, img_w = img_shape[:2] if allowed_border >= 0: inside_flags = valid_flags & \ (all_anchors[:, 0] >= -allowed_border) & \ @@ -139,6 +142,7 @@ def anchor_inside_flags(all_anchors, valid_flags, img_shape, allowed_border=0): inside_flags = valid_flags return inside_flags + def unique(tensor): if tensor.is_cuda: u_tensor = np.unique(tensor.cpu().numpy()) @@ -146,6 +150,7 @@ def unique(tensor): else: return torch.unique(tensor) + def unmap(data, count, inds, fill=0): """ Unmap a subset of item (data) back to the original set of items (of size count) """ diff --git a/mmdet/core/test_engine.py b/mmdet/core/test_engine.py deleted file mode 100644 index 4825beda640..00000000000 --- a/mmdet/core/test_engine.py +++ /dev/null @@ -1,14 +0,0 @@ -from mmdet.datasets import collate -from mmdet.nn.parallel import scatter - -__all__ = ['_data_func'] - -def _data_func(data, gpu_id): - imgs, img_metas = tuple( - scatter(collate([data], samples_per_gpu=1), [gpu_id])[0]) - return dict( - img=imgs, - img_meta=img_metas, - return_loss=False, - return_bboxes=True, - rescale=True) diff --git a/mmdet/core/train_engine.py b/mmdet/core/train_engine.py deleted file mode 100644 index cc745faad87..00000000000 --- a/mmdet/core/train_engine.py +++ /dev/null @@ -1,40 +0,0 @@ -import numpy as np -import torch -from collections import OrderedDict -from mmdet.nn.parallel import scatter - - -def parse_losses(losses): - log_vars = OrderedDict() - for loss_key, loss_value in losses.items(): - if isinstance(loss_value, dict): - for _key, _value in loss_value.items(): - if isinstance(_value, list): - _value = sum([_loss.mean() for _loss in _value]) - else: - _value = _value.mean() - log_vars[_keys] = _value - elif isinstance(loss_value, list): - log_vars[loss_key] = sum(_loss.mean() for _loss in loss_value) - else: - log_vars[loss_key] = loss_value.mean() - - loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key) - log_vars['loss'] = loss - for _key, _value in log_vars.items(): - log_vars[_key] = _value.item() - - return loss, log_vars - - -def batch_processor(model, data, train_mode, args=None): - data = scatter(data, [torch.cuda.current_device()])[0] - losses = model(**data) - loss, log_vars = parse_losses(losses) - - outputs = dict( - loss=loss / args.world_size, - log_vars=log_vars, - num_samples=len(data['img'].data)) - - return outputs diff --git a/mmdet/core/utils/dist_utils.py b/mmdet/core/utils/dist_utils.py index 47279c7bf8f..79644b80617 100644 --- a/mmdet/core/utils/dist_utils.py +++ b/mmdet/core/utils/dist_utils.py @@ -3,11 +3,11 @@ import torch.multiprocessing as mp import torch.distributed as dist from torch.nn.utils import clip_grad -from mmcv.torchpack import Hook, OptimizerStepperHook +from mmcv.torchpack import Hook, OptimizerHook __all__ = [ - 'init_dist', 'average_gradients', 'broadcast_params', - 'DistOptimizerStepperHook', 'DistSamplerSeedHook' + 'init_dist', 'average_gradients', 'broadcast_params', 'DistOptimizerHook', + 'DistSamplerSeedHook' ] @@ -40,17 +40,16 @@ def broadcast_params(model): dist.broadcast(p, 0) -class DistOptimizerStepperHook(OptimizerStepperHook): +class DistOptimizerHook(OptimizerHook): def after_train_iter(self, runner): runner.optimizer.zero_grad() runner.outputs['loss'].backward() average_gradients(runner.model) - if self.grad_clip: + if self.grad_clip is not None: clip_grad.clip_grad_norm_( filter(lambda p: p.requires_grad, runner.model.parameters()), - max_norm=self.max_norm, - norm_type=self.norm_type) + **self.grad_clip) runner.optimizer.step() diff --git a/mmdet/core/utils/hooks.py b/mmdet/core/utils/hooks.py index f97e1fb2906..8a52d11ba41 100644 --- a/mmdet/core/utils/hooks.py +++ b/mmdet/core/utils/hooks.py @@ -7,7 +7,7 @@ import numpy as np import torch from mmcv.torchpack import Hook -from mmdet.datasets import collate +from mmdet.datasets.loader import collate from mmdet.nn.parallel import scatter from pycocotools.cocoeval import COCOeval diff --git a/mmdet/core/utils/misc.py b/mmdet/core/utils/misc.py index 0f9c05e4577..5b09456d3b1 100644 --- a/mmdet/core/utils/misc.py +++ b/mmdet/core/utils/misc.py @@ -1,5 +1,3 @@ -import subprocess - import mmcv import numpy as np import torch @@ -7,20 +5,14 @@ __all__ = ['tensor2imgs', 'unique', 'unmap', 'results2json'] -def tensor2imgs(tensor, - color_order='RGB', - color_mean=(0.485, 0.456, 0.406), - color_std=(0.229, 0.224, 0.225)): - assert color_order in ['RGB', 'BGR'] - img_per_gpu = tensor.size(0) - color_mean = np.array(color_mean, dtype=np.float32) - color_std = np.array(color_std, dtype=np.float32) +def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): + num_imgs = tensor.size(0) + mean = np.array(mean, dtype=np.float32) + std = np.array(std, dtype=np.float32) imgs = [] - for img_id in range(img_per_gpu): + for img_id in range(num_imgs): img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) - if color_order == 'RGB': - img = mmcv.rgb2bgr(img) - img = img * color_std + color_mean + img = mmcv.imdenorm(img, mean, std, to_bgr=to_rgb).astype(np.uint8) imgs.append(np.ascontiguousarray(img)) return imgs @@ -45,6 +37,7 @@ def unmap(data, count, inds, fill=0): ret[inds, :] = data return ret + def xyxy2xywh(bbox): _bbox = bbox.tolist() return [ @@ -54,6 +47,7 @@ def xyxy2xywh(bbox): _bbox[3] - _bbox[1] + 1, ] + def det2json(dataset, results): json_results = [] for idx in range(len(dataset)): diff --git a/mmdet/nn/parallel/scatter_gather.py b/mmdet/nn/parallel/scatter_gather.py index 47f794e8916..f5f7c588f4b 100644 --- a/mmdet/nn/parallel/scatter_gather.py +++ b/mmdet/nn/parallel/scatter_gather.py @@ -14,14 +14,24 @@ def scatter(inputs, target_gpus, dim=0): def scatter_map(obj): if isinstance(obj, torch.Tensor): return OrigScatter.apply(target_gpus, None, dim, obj) - if isinstance(obj, DataContainer) and isinstance(obj.data, list): - return Scatter.forward(target_gpus, obj.data) + if isinstance(obj, DataContainer): + # print('data container', obj) + if obj.cpu_only: + return obj.data + else: + return Scatter.forward(target_gpus, obj.data) if isinstance(obj, tuple) and len(obj) > 0: return list(zip(*map(scatter_map, obj))) if isinstance(obj, list) and len(obj) > 0: - return list(map(list, zip(*map(scatter_map, obj)))) + # print('list', obj) + out = list(map(list, zip(*map(scatter_map, obj)))) + # print('list out', out) + return out if isinstance(obj, dict) and len(obj) > 0: - return list(map(type(obj), zip(*map(scatter_map, obj.items())))) + # print('dict\n', obj) + out = list(map(type(obj), zip(*map(scatter_map, obj.items())))) + # print('dict output\n', out) + return out return [obj for targets in target_gpus] # After scatter_map is called, a scatter_map cell will exist. This cell diff --git a/setup.py b/setup.py index 8ed19bd5a81..759e8ebf37e 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ def readme(): def get_version(): - version_file = 'mmcv/version.py' + version_file = 'mmdet/version.py' with open(version_file, 'r') as f: exec(compile(f.read(), version_file, 'exec')) return locals()['__version__'] diff --git a/tools/coco_eval.py b/tools/coco_eval.py new file mode 100644 index 00000000000..93554be29a2 --- /dev/null +++ b/tools/coco_eval.py @@ -0,0 +1,23 @@ +from argparse import ArgumentParser + +from mmdet.core import coco_eval + + +def main(): + parser = ArgumentParser(description='COCO Evaluation') + parser.add_argument('result', help='result file path') + parser.add_argument('--ann', help='annotation file path') + parser.add_argument( + '--types', type=str, nargs='+', default=['bbox'], help='result types') + parser.add_argument( + '--max-dets', + type=int, + nargs='+', + default=[100, 300, 1000], + help='result types') + args = parser.parse_args() + coco_eval(args.result, args.types, args.ann, args.max_dets) + + +if __name__ == '__main__': + main() diff --git a/tools/examples/r50_fpn_frcnn_1x.py b/tools/configs/r50_fpn_frcnn_1x.py similarity index 64% rename from tools/examples/r50_fpn_frcnn_1x.py rename to tools/configs/r50_fpn_frcnn_1x.py index 6814445f8e1..5389fb9f2c9 100644 --- a/tools/examples/r50_fpn_frcnn_1x.py +++ b/tools/configs/r50_fpn_frcnn_1x.py @@ -1,7 +1,7 @@ # model settings model = dict( - pretrained= - '/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth', + type='FasterRCNN', + pretrained='modelzoo://resnet50', backbone=dict( type='resnet', depth=50, @@ -25,7 +25,7 @@ target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0], use_sigmoid_cls=True), - roi_block=dict( + bbox_roi_extractor=dict( type='SingleLevelRoI', roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), out_channels=256, @@ -40,8 +40,9 @@ target_means=[0., 0., 0., 0.], target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=False)) -meta_params = dict( - rpn_train_cfg = dict( +# model training and testing settings +train_cfg = dict( + rpn=dict( pos_fraction=0.5, pos_balance_sampling=False, neg_pos_ub=256, @@ -54,14 +55,7 @@ pos_weight=-1, smoothl1_beta=1 / 9.0, debug=False), - rpn_test_cfg = dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn_train_cfg = dict( + rcnn=dict( pos_iou_thr=0.5, neg_iou_thr=0.5, crowd_thr=1.1, @@ -72,54 +66,65 @@ neg_pos_ub=512, neg_balance_thr=0, pos_weight=-1, - debug=False), - rcnn_test_cfg = dict(score_thr=1e-3, max_per_img=100, nms_thr=0.5) -) + debug=False)) +test_cfg = dict( + rpn=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0), + rcnn=dict(score_thr=1e-3, max_per_img=100, nms_thr=0.5)) # dataset settings -data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/' +dataset_type = 'CocoDataset' +data_root = '../data/coco/' img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], - std=[58.395, 57.12, 57.375], - to_rgb=True) -img_per_gpu = 1 -data_workers = 2 -train_dataset = dict( - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5) -test_dataset = dict( - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32) + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0.5), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + flip_ratio=0, + img_norm_cfg=img_norm_cfg, + size_divisor=32)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) # learning policy -lr_policy = dict( +lr_config = dict( policy='step', warmup='linear', warmup_iters=500, - warmup_ratio=0.333, + warmup_ratio=1.0 / 3, step=[8, 11]) -max_epoch = 12 checkpoint_config = dict(interval=1) -dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1') -# logging settings -log_level = 'INFO' # yapf:disable log_config = dict( interval=50, hooks=[ dict(type='TextLoggerHook'), - # ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')), + # dict(type='TensorboardLoggerHook', log_dir=work_dir + '/log') ]) # yapf:enable -work_dir = './model/r50_fpn_frcnn_1x' +# runtime settings +total_epochs = 12 +device_ids = range(8) +dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1') +log_level = 'INFO' +work_dir = './work_dirs/fpn_faster_rcnn_r50_1x' load_from = None resume_from = None workflow = [('train', 1)] diff --git a/tools/examples/r50_fpn_maskrcnn_1x.py b/tools/configs/r50_fpn_maskrcnn_1x.py similarity index 72% rename from tools/examples/r50_fpn_maskrcnn_1x.py rename to tools/configs/r50_fpn_maskrcnn_1x.py index 49b32037ec5..6d1b367a598 100644 --- a/tools/examples/r50_fpn_maskrcnn_1x.py +++ b/tools/configs/r50_fpn_maskrcnn_1x.py @@ -1,7 +1,7 @@ # model settings model = dict( - pretrained= - '/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth', + type='MaskRCNN', + pretrained='modelzoo://resnet50', backbone=dict( type='resnet', depth=50, @@ -25,7 +25,7 @@ target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0], use_sigmoid_cls=True), - roi_block=dict( + bbox_roi_extractor=dict( type='SingleLevelRoI', roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), out_channels=256, @@ -40,7 +40,7 @@ target_means=[0., 0., 0., 0.], target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=False), - mask_block=dict( + mask_roi_extractor=dict( type='SingleLevelRoI', roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), out_channels=256, @@ -51,8 +51,9 @@ in_channels=256, conv_out_channels=256, num_classes=81)) -meta_params = dict( - rpn_train_cfg=dict( +# model training and testing settings +train_cfg = dict( + rpn=dict( pos_fraction=0.5, pos_balance_sampling=False, neg_pos_ub=256, @@ -65,14 +66,7 @@ pos_weight=-1, smoothl1_beta=1 / 9.0, debug=False), - rpn_test_cfg=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn_train_cfg=dict( + rcnn=dict( mask_size=28, pos_iou_thr=0.5, neg_iou_thr=0.5, @@ -84,44 +78,52 @@ neg_pos_ub=512, neg_balance_thr=0, pos_weight=-1, - debug=False), - rcnn_test_cfg=dict( + debug=False)) +test_cfg = dict( + rpn=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0), + rcnn=dict( score_thr=1e-3, max_per_img=100, nms_thr=0.5, mask_thr_binary=0.5)) # dataset settings -data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/' +dataset_type = 'CocoDataset' +data_root = '../data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -img_per_gpu = 1 -data_workers = 2 -train_dataset = dict( - with_mask=True, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5) -test_dataset = dict( - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0.5), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + flip_ratio=0, + img_norm_cfg=img_norm_cfg, + size_divisor=32)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) # learning policy -lr_policy = dict( +lr_config = dict( policy='step', warmup='linear', warmup_iters=500, warmup_ratio=0.333, step=[8, 11]) -max_epoch = 12 checkpoint_config = dict(interval=1) -dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1') -# logging settings -log_level = 'INFO' # yapf:disable log_config = dict( interval=50, @@ -130,7 +132,12 @@ # ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')), ]) # yapf:enable -work_dir = './model/r50_fpn_mask_rcnn_1x' +# runtime settings +total_epochs = 12 +device_ids = range(8) +dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1') +log_level = 'INFO' +work_dir = './work_dirs/fpn_mask_rcnn_r50_1x' load_from = None resume_from = None workflow = [('train', 1)] diff --git a/tools/examples/r50_fpn_rpn_1x.py b/tools/configs/r50_fpn_rpn_1x.py similarity index 54% rename from tools/examples/r50_fpn_rpn_1x.py rename to tools/configs/r50_fpn_rpn_1x.py index 45c0a1a6c46..df31a59e4b0 100644 --- a/tools/examples/r50_fpn_rpn_1x.py +++ b/tools/configs/r50_fpn_rpn_1x.py @@ -1,7 +1,7 @@ # model settings model = dict( - pretrained= - '/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth', + type='RPN', + pretrained='modelzoo://resnet50', backbone=dict( type='resnet', depth=50, @@ -25,8 +25,9 @@ target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0], use_sigmoid_cls=True)) -meta_params = dict( - rpn_train_cfg=dict( +# model training and testing settings +train_cfg = dict( + rpn=dict( pos_fraction=0.5, pos_balance_sampling=False, neg_pos_ub=256, @@ -38,8 +39,9 @@ min_pos_iou=1e-3, pos_weight=-1, smoothl1_beta=1 / 9.0, - debug=False), - rpn_test_cfg=dict( + debug=False)) +test_cfg = dict( + rpn=dict( nms_across_levels=False, nms_pre=2000, nms_post=2000, @@ -47,49 +49,61 @@ nms_thr=0.7, min_bbox_size=0)) # dataset settings -data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/' +dataset_type = 'CocoDataset' +data_root = '../data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -img_per_gpu = 1 -data_workers = 2 -train_dataset = dict( - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5) -test_dataset = dict( - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - test_mode=True) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0.5, + with_mask=False, + with_crowd=False, + with_label=False, + test_mode=False), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + flip_ratio=0, + img_norm_cfg=img_norm_cfg, + size_divisor=32, + with_mask=False, + with_label=False, + test_mode=True)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2) -# learning policy -lr_policy = dict( +# runner configs +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +lr_config = dict( policy='step', warmup='linear', warmup_iters=500, - warmup_ratio=0.333, + warmup_ratio=1.0 / 3, step=[8, 11]) -max_epoch = 12 checkpoint_config = dict(interval=1) -dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1') -# logging settings -log_level = 'INFO' # yapf:disable log_config = dict( interval=50, hooks=[ dict(type='TextLoggerHook'), - # ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')), + # dict(type='TensorboardLoggerHook', log_dir=work_dir + '/log') ]) # yapf:enable -work_dir = './model/r50_fpn_1x' +# runtime settings +total_epochs = 12 +device_ids = range(8) +dist_params = dict(backend='gloo', port='29500', master_ip='127.0.0.1') +log_level = 'INFO' +work_dir = './work_dirs/fpn_rpn_r50_1x' load_from = None resume_from = None workflow = [('train', 1)] diff --git a/tools/dist_train.sh b/tools/dist_train.sh new file mode 100755 index 00000000000..0214b05d22f --- /dev/null +++ b/tools/dist_train.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +PYTHON=${PYTHON:-"python"} + +$PYTHON train.py $1 --dist --world-size $2 --rank 0 & +let MAX_RANK=$2-1 +for i in `seq 1 $MAX_RANK`; do + $PYTHON train.py $1 --dist --world-size $2 --rank $i > /dev/null 2>&1 & +done diff --git a/tools/eval.py b/tools/eval.py deleted file mode 100644 index 20cc571e94b..00000000000 --- a/tools/eval.py +++ /dev/null @@ -1,265 +0,0 @@ -from argparse import ArgumentParser -from multiprocessing import Pool -import matplotlib.pyplot as plt -import numpy as np -import copy -import os - -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval - - -def generate_area_range(splitRng=32, stop_size=128): - areaRng = [[0**2, 1e5**2], [0**2, 32**2], [32**2, 96**2], [96**2, 1e5**2]] - start = 0 - while start < stop_size: - end = start + splitRng - areaRng.append([start * start, end * end]) - start = end - areaRng.append([start * start, 1e5**2]) - return areaRng - - -def print_summarize(iouThr=None, - iouThrs=None, - precision=None, - recall=None, - areaRng_id=4, - areaRngs=None, - maxDets_id=2, - maxDets=None): - assert (precision is not None) or (recall is not None) - iStr = ' {:<18} {} @[ IoU={:<9} | size={:>5}-{:>5} | maxDets={:>3d} ] = {:0.3f}' - titleStr = 'Average Precision' if precision is not None else 'Average Recall' - typeStr = '(AP)' if precision is not None else '(AR)' - iouStr = '{:0.2f}:{:0.2f}'.format(iouThrs[0], iouThrs[-1]) \ - if iouThr is None else '{:0.2f}'.format(iouThr) - - aind = [areaRng_id] - mind = [maxDets_id] - if precision is not None: - # dimension of precision: [TxRxKxAxM] - s = precision - # IoU - if iouThr is not None: - t = np.where(iouThr == iouThrs)[0] - s = s[t] - s = s[:, :, :, aind, mind] - else: - # dimension of recall: [TxKxAxM] - s = recall - if iouThr is not None: - t = np.where(iouThr == iouThrs)[0] - s = s[t] - s = s[:, :, aind, mind] - if len(s[s > -1]) == 0: - mean_s = -1 - else: - mean_s = np.mean(s[s > -1]) - print( - iStr.format( - titleStr, typeStr, iouStr, np.sqrt(areaRngs[areaRng_id][0]), - np.sqrt(areaRngs[areaRng_id][1]) - if np.sqrt(areaRngs[areaRng_id][1]) < 999 else 'max', - maxDets[maxDets_id], mean_s)) - - -def eval_results(res_file, ann_file, res_types, splitRng): - for res_type in res_types: - assert res_type in ['proposal', 'bbox', 'segm', 'keypoints'] - - areaRng = generate_area_range(splitRng) - cocoGt = COCO(ann_file) - cocoDt = cocoGt.loadRes(res_file) - imgIds = cocoGt.getImgIds() - for res_type in res_types: - iou_type = 'bbox' if res_type == 'proposal' else res_type - cocoEval = COCOeval(cocoGt, cocoDt, iou_type) - cocoEval.params.imgIds = imgIds - if res_type == 'proposal': - cocoEval.params.useCats = 0 - cocoEval.params.maxDets = [100, 300, 1000] - cocoEval.params.areaRng = areaRng - cocoEval.evaluate() - cocoEval.accumulate() - cocoEval.summarize() - ps = cocoEval.eval['precision'] - rc = cocoEval.eval['recall'] - for i in range(len(areaRng)): - print_summarize(None, cocoEval.params.iouThrs, ps, None, i, - areaRng, 2, cocoEval.params.maxDets) - - -def makeplot(rs, ps, outDir, class_name): - cs = np.vstack([ - np.ones((2, 3)), - np.array([.31, .51, .74]), - np.array([.75, .31, .30]), - np.array([.36, .90, .38]), - np.array([.50, .39, .64]), - np.array([1, .6, 0]) - ]) - areaNames = ['all', 'small', 'medium', 'large'] - types = ['C75', 'C50', 'Loc', 'Sim', 'Oth', 'BG', 'FN'] - for i in range(len(areaNames)): - area_ps = ps[..., i, 0] - figure_tile = class_name + '-' + areaNames[i] - aps = [ps_.mean() for ps_ in area_ps] - ps_curve = [ - ps_.mean(axis=1) if ps_.ndim > 1 else ps_ for ps_ in area_ps - ] - ps_curve.insert(0, np.zeros(ps_curve[0].shape)) - fig = plt.figure() - ax = plt.subplot(111) - for k in range(len(types)): - ax.plot(rs, ps_curve[k + 1], color=[0, 0, 0], linewidth=0.5) - ax.fill_between( - rs, - ps_curve[k], - ps_curve[k + 1], - color=cs[k], - label=str('[{:.3f}'.format(aps[k]) + ']' + types[k])) - plt.xlabel('recall') - plt.ylabel('precision') - plt.xlim(0, 1.) - plt.ylim(0, 1.) - plt.title(figure_tile) - plt.legend() - # plt.show() - fig.savefig(outDir + '/{}.png'.format(figure_tile)) - plt.close(fig) - - -def analyze_individual_category(k, cocoDt, cocoGt, catId, iou_type): - nm = cocoGt.loadCats(catId)[0] - print('--------------analyzing {}-{}---------------'.format( - k + 1, nm['name'])) - ps_ = {} - dt = copy.deepcopy(cocoDt) - nm = cocoGt.loadCats(catId)[0] - imgIds = cocoGt.getImgIds() - dt_anns = dt.dataset['annotations'] - select_dt_anns = [] - for ann in dt_anns: - if ann['category_id'] == catId: - select_dt_anns.append(ann) - dt.dataset['annotations'] = select_dt_anns - dt.createIndex() - # compute precision but ignore superclass confusion - gt = copy.deepcopy(cocoGt) - child_catIds = gt.getCatIds(supNms=[nm['supercategory']]) - for idx, ann in enumerate(gt.dataset['annotations']): - if (ann['category_id'] in child_catIds - and ann['category_id'] != catId): - gt.dataset['annotations'][idx]['ignore'] = 1 - gt.dataset['annotations'][idx]['iscrowd'] = 1 - gt.dataset['annotations'][idx]['category_id'] = catId - cocoEval = COCOeval(gt, copy.deepcopy(dt), iou_type) - cocoEval.params.imgIds = imgIds - cocoEval.params.maxDets = [100] - cocoEval.params.iouThrs = [.1] - cocoEval.params.useCats = 1 - cocoEval.evaluate() - cocoEval.accumulate() - ps_supercategory = cocoEval.eval['precision'][0, :, k, :, :] - ps_['ps_supercategory'] = ps_supercategory - # compute precision but ignore any class confusion - gt = copy.deepcopy(cocoGt) - for idx, ann in enumerate(gt.dataset['annotations']): - if ann['category_id'] != catId: - gt.dataset['annotations'][idx]['ignore'] = 1 - gt.dataset['annotations'][idx]['iscrowd'] = 1 - gt.dataset['annotations'][idx]['category_id'] = catId - cocoEval = COCOeval(gt, copy.deepcopy(dt), iou_type) - cocoEval.params.imgIds = imgIds - cocoEval.params.maxDets = [100] - cocoEval.params.iouThrs = [.1] - cocoEval.params.useCats = 1 - cocoEval.evaluate() - cocoEval.accumulate() - ps_allcategory = cocoEval.eval['precision'][0, :, k, :, :] - ps_['ps_allcategory'] = ps_allcategory - return k, ps_ - - -def analyze_results(res_file, ann_file, res_types, out_dir): - for res_type in res_types: - assert res_type in ['bbox', 'segm'] - - directory = os.path.dirname(out_dir + '/') - if not os.path.exists(directory): - print('-------------create {}-----------------'.format(out_dir)) - os.makedirs(directory) - - cocoGt = COCO(ann_file) - cocoDt = cocoGt.loadRes(res_file) - imgIds = cocoGt.getImgIds() - for res_type in res_types: - iou_type = res_type - cocoEval = COCOeval( - copy.deepcopy(cocoGt), copy.deepcopy(cocoDt), iou_type) - cocoEval.params.imgIds = imgIds - cocoEval.params.iouThrs = [.75, .5, .1] - cocoEval.params.maxDets = [100] - cocoEval.evaluate() - cocoEval.accumulate() - ps = cocoEval.eval['precision'] - ps = np.vstack([ps, np.zeros((4, *ps.shape[1:]))]) - catIds = cocoGt.getCatIds() - recThrs = cocoEval.params.recThrs - with Pool(processes=48) as pool: - args = [(k, cocoDt, cocoGt, catId, iou_type) - for k, catId in enumerate(catIds)] - analyze_results = pool.starmap(analyze_individual_category, args) - for k, catId in enumerate(catIds): - nm = cocoGt.loadCats(catId)[0] - print('--------------saving {}-{}---------------'.format( - k + 1, nm['name'])) - analyze_result = analyze_results[k] - assert k == analyze_result[0] - ps_supercategory = analyze_result[1]['ps_supercategory'] - ps_allcategory = analyze_result[1]['ps_allcategory'] - # compute precision but ignore superclass confusion - ps[3, :, k, :, :] = ps_supercategory - # compute precision but ignore any class confusion - ps[4, :, k, :, :] = ps_allcategory - # fill in background and false negative errors and plot - ps[ps == -1] = 0 - ps[5, :, k, :, :] = (ps[4, :, k, :, :] > 0) - ps[6, :, k, :, :] = 1.0 - makeplot(recThrs, ps[:, :, k], out_dir, nm['name']) - makeplot(recThrs, ps, out_dir, 'all') - - -def main(): - parser = ArgumentParser(description='COCO Evaluation') - parser.add_argument('result', help='result file path') - parser.add_argument( - '--ann', - default='/mnt/SSD/dataset/coco/annotations/instances_minival2017.json', - help='annotation file path') - parser.add_argument( - '--types', type=str, nargs='+', default=['bbox'], help='result types') - parser.add_argument( - '--analyze', action='store_true', help='whether to analyze results') - parser.add_argument( - '--out_dir', - type=str, - default=None, - help='dir to save analyze result images') - parser.add_argument( - '--splitRng', - type=int, - default=32, - help='range to split area in evaluation') - args = parser.parse_args() - if not args.analyze: - eval_results(args.result, args.ann, args.types, splitRng=args.splitRng) - else: - assert args.out_dir is not None - analyze_results( - args.result, args.ann, args.types, out_dir=args.out_dir) - - -if __name__ == '__main__': - main() diff --git a/tools/test.py b/tools/test.py index 2d062489100..773136d4c8a 100644 --- a/tools/test.py +++ b/tools/test.py @@ -1,29 +1,53 @@ -import os.path as osp -import sys -sys.path.append(osp.abspath(osp.join(__file__, '../../'))) -sys.path.append('/mnt/lustre/pangjiangmiao/sensenet_folder/mmcv') import argparse -import numpy as np import torch - import mmcv -from mmcv import Config -from mmcv.torchpack import load_checkpoint, parallel_test -from mmdet.core import _data_func, results2json -from mmdet.datasets import CocoDataset -from mmdet.datasets.data_engine import build_data -from mmdet.models import Detector +from mmcv.torchpack import load_checkpoint, parallel_test, obj_from_dict + +from mmdet import datasets +from mmdet.core import results2json, coco_eval +from mmdet.datasets.loader import collate, build_dataloader +from mmdet.models import build_detector, detectors +from mmdet.nn.parallel import scatter, MMDataParallel + + +def single_test(model, data_loader, show=False): + model.eval() + results = [] + prog_bar = mmcv.ProgressBar(len(data_loader.dataset)) + for i, data in enumerate(data_loader): + with torch.no_grad(): + result = model(**data, return_loss=False, rescale=not show) + results.append(result) + + if show: + model.module.show_result(data, result, + data_loader.dataset.img_norm_cfg) + + batch_size = data['img'][0].size(0) + for _ in range(batch_size): + prog_bar.update() + return results + + +def _data_func(data, device_id): + data = scatter(collate([data], samples_per_gpu=1), [device_id])[0] + return dict(**data, return_loss=False, rescale=True) def parse_args(): parser = argparse.ArgumentParser(description='MMDet test detector') parser.add_argument('config', help='test config file path') parser.add_argument('checkpoint', help='checkpoint file') - parser.add_argument('--world_size', default=1, type=int) + parser.add_argument('--gpus', default=1, type=int) parser.add_argument('--out', help='output result file') parser.add_argument( - '--out_json', action='store_true', help='get json output file') + '--eval', + type=str, + nargs='+', + choices=['proposal', 'bbox', 'segm', 'keypoints'], + help='eval types') + parser.add_argument('--show', action='store_true', help='show results') args = parser.parse_args() return args @@ -32,33 +56,38 @@ def parse_args(): def main(): - cfg = Config.fromfile(args.config) - cfg.model['pretrained'] = None - # TODO this img_per_gpu - cfg.img_per_gpu == 1 - - if args.world_size == 1: - # TODO verify this part - args.dist = False - args.img_per_gpu = cfg.img_per_gpu - args.data_workers = cfg.data_workers - model = Detector(**cfg.model, **meta_params) + cfg = mmcv.Config.fromfile(args.config) + cfg.model.pretrained = None + cfg.data.test.test_mode = True + + dataset = obj_from_dict(cfg.data.test, datasets) + if args.gpus == 1: + model = build_detector( + cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) load_checkpoint(model, args.checkpoint) - test_loader = build_data(cfg.test_dataset, args) - model = torch.nn.DataParallel(model, device_ids=0) - # TODO write single_test - outputs = single_test(test_loader, model) + model = MMDataParallel(model, device_ids=[0]) + + data_loader = build_dataloader( + dataset, + imgs_per_gpu=1, + workers_per_gpu=cfg.data.workers_per_gpu, + num_gpus=1, + dist=False, + shuffle=False) + outputs = single_test(model, data_loader, args.show) else: - test_dataset = CocoDataset(**cfg.test_dataset) - model = dict(cfg.model, **cfg.meta_params) - outputs = parallel_test(Detector, model, - args.checkpoint, test_dataset, _data_func, - range(args.world_size)) + model_args = cfg.model.copy() + model_args.update(train_cfg=None, test_cfg=cfg.test_cfg) + model_type = getattr(detectors, model_args.pop('type')) + outputs = parallel_test(model_type, model_args, args.checkpoint, + dataset, _data_func, range(args.gpus)) if args.out: - mmcv.dump(outputs, args.out, protocol=4) - if args.out_json: - results2json(test_dataset, outputs, args.out + '.json') + mmcv.dump(outputs, args.out) + if args.eval: + json_file = args.out + '.json' + results2json(dataset, outputs, json_file) + coco_eval(json_file, args.eval, dataset.coco) if __name__ == '__main__': diff --git a/tools/train.py b/tools/train.py index 0cb2450acf5..742bd1bbf03 100644 --- a/tools/train.py +++ b/tools/train.py @@ -1,28 +1,61 @@ from __future__ import division + import argparse -import sys -import os.path as osp -sys.path.append(osp.abspath(osp.join(__file__, '../../'))) -sys.path.append('/mnt/lustre/pangjiangmiao/sensenet_folder/mmcv') +from collections import OrderedDict import torch -import torch.multiprocessing as mp from mmcv import Config -from mmcv.torchpack import Runner -from mmdet.core import (batch_processor, init_dist, broadcast_params, - DistOptimizerStepperHook, DistSamplerSeedHook) -from mmdet.datasets.data_engine import build_data -from mmdet.models import Detector -from mmdet.nn.parallel import MMDataParallel +from mmcv.torchpack import Runner, obj_from_dict + +from mmdet import datasets +from mmdet.core import init_dist, DistOptimizerHook, DistSamplerSeedHook +from mmdet.datasets.loader import build_dataloader +from mmdet.models import build_detector +from mmdet.nn.parallel import MMDataParallel, MMDistributedDataParallel + + +def parse_losses(losses): + log_vars = OrderedDict() + for loss_name, loss_value in losses.items(): + if isinstance(loss_value, torch.Tensor): + log_vars[loss_name] = loss_value.mean() + elif isinstance(loss_value, list): + log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value) + else: + raise TypeError( + '{} is not a tensor or list of tensors'.format(loss_name)) + + loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key) + + log_vars['loss'] = loss + for name in log_vars: + log_vars[name] = log_vars[name].item() + + return loss, log_vars + + +def batch_processor(model, data, train_mode, args=None): + losses = model(**data) + loss, log_vars = parse_losses(losses) + + outputs = dict( + loss=loss / args.world_size, + log_vars=log_vars, + num_samples=len(data['img'].data)) + + return outputs def parse_args(): - parser = argparse.ArgumentParser(description='MMDet train val detector') + parser = argparse.ArgumentParser(description='Train a detector') parser.add_argument('config', help='train config file path') - parser.add_argument('--validate', action='store_true', help='validate') parser.add_argument( - '--dist', action='store_true', help='distributed training or not') - parser.add_argument('--world_size', default=1, type=int) + '--validate', + action='store_true', + help='whether to add a validate phase') + parser.add_argument( + '--dist', action='store_true', help='use distributed training or not') + parser.add_argument('--world-size', default=1, type=int) parser.add_argument('--rank', default=0, type=int) args = parser.parse_args() @@ -33,53 +66,57 @@ def parse_args(): def main(): - # Enable distributed training or not + # get config from file + cfg = Config.fromfile(args.config) + cfg.update(world_size=args.world_size, rank=args.rank) + + # init distributed environment if necessary if args.dist: print('Enable distributed training.') - mp.set_start_method("spawn", force=True) - init_dist( - args.world_size, - args.rank, - **cfg.dist_params) + init_dist(args.world_size, args.rank, **cfg.dist_params) else: print('Disabled distributed training.') - # Fetch config information - cfg = Config.fromfile(args.config) - # TODO more flexible - args.img_per_gpu = cfg.img_per_gpu - args.data_workers = cfg.data_workers - - # prepare training loader - train_loader = [build_data(cfg.train_dataset, args)] + # prepare data loaders + train_dataset = obj_from_dict(cfg.data.train, datasets) + data_loaders = [ + build_dataloader( + train_dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, + len(cfg.device_ids), args.dist, cfg.world_size, cfg.rank) + ] if args.validate: - val_loader = build_data(cfg.val_dataset, args) - train_loader.append(val_loader) + val_dataset = obj_from_dict(cfg.data.val, datasets) + data_loaders.append( + build_dataloader( + val_dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, + len(cfg.device_ids), args.dist, cfg.world_size, cfg.rank)) # build model - model = Detector(**cfg.model, **cfg.meta_params) + model = build_detector( + cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) if args.dist: - model = model.cuda() - broadcast_params(model) + model = MMDistributedDataParallel( + model, device_ids=[cfg.rank], broadcast_buffers=False).cuda() else: - device_ids = args.rank % torch.cuda.device_count() - model = MMDataParallel(model, device_ids=device_ids).cuda() + model = MMDataParallel(model, device_ids=cfg.device_ids).cuda() - # register hooks + # build runner runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, cfg.log_level) - optimizer_stepper = DistOptimizerStepperHook( - **cfg.grad_clip_config) if args.dist else cfg.grad_clip_config - runner.register_training_hooks(cfg.lr_policy, optimizer_stepper, + # register hooks + optimizer_config = DistOptimizerHook( + **cfg.optimizer_config) if args.dist else cfg.optimizer_config + runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config) if args.dist: runner.register_hook(DistSamplerSeedHook()) + if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) - runner.run(train_loader, cfg.workflow, cfg.max_epoch, args=args) + runner.run(data_loaders, cfg.workflow, cfg.total_epochs, args=args) -if __name__ == "__main__": +if __name__ == '__main__': main() From ffd94ba7e00e3eccd988269126c4cd02f86af9e8 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Sun, 23 Sep 2018 23:05:59 +0800 Subject: [PATCH 08/81] remove args from batch_processor --- tools/train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/train.py b/tools/train.py index 742bd1bbf03..2111546a39d 100644 --- a/tools/train.py +++ b/tools/train.py @@ -34,7 +34,7 @@ def parse_losses(losses): return loss, log_vars -def batch_processor(model, data, train_mode, args=None): +def batch_processor(model, data, train_mode): losses = model(**data) loss, log_vars = parse_losses(losses) @@ -115,7 +115,7 @@ def main(): runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) - runner.run(data_loaders, cfg.workflow, cfg.total_epochs, args=args) + runner.run(data_loaders, cfg.workflow, cfg.total_epochs) if __name__ == '__main__': From 56b9749c128aeae02afa19fb602b913e8922cbc2 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Mon, 24 Sep 2018 15:34:18 +0800 Subject: [PATCH 09/81] modify distributed training api and use coalesced all_reduce --- mmdet/core/utils/dist_utils.py | 82 ++++++++++++++++++++------- mmdet/datasets/loader/build_loader.py | 4 +- tools/configs/r50_fpn_frcnn_1x.py | 3 +- tools/configs/r50_fpn_maskrcnn_1x.py | 3 +- tools/configs/r50_fpn_rpn_1x.py | 3 +- tools/train.py | 54 +++++++++--------- 6 files changed, 93 insertions(+), 56 deletions(-) diff --git a/mmdet/core/utils/dist_utils.py b/mmdet/core/utils/dist_utils.py index 79644b80617..2c0a9283964 100644 --- a/mmdet/core/utils/dist_utils.py +++ b/mmdet/core/utils/dist_utils.py @@ -1,51 +1,89 @@ import os +from collections import OrderedDict + import torch import torch.multiprocessing as mp import torch.distributed as dist +from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors from torch.nn.utils import clip_grad from mmcv.torchpack import Hook, OptimizerHook __all__ = [ - 'init_dist', 'average_gradients', 'broadcast_params', 'DistOptimizerHook', - 'DistSamplerSeedHook' + 'init_dist', 'reduce_grads', 'DistOptimizerHook', 'DistSamplerSeedHook' ] -def init_dist(world_size, - rank, - backend='gloo', - master_ip='127.0.0.1', - port=29500): +def init_dist(launcher, backend='nccl', **kwargs): if mp.get_start_method(allow_none=True) is None: mp.set_start_method('spawn') + if launcher == 'pytorch': + _init_dist_pytorch(backend, **kwargs) + elif launcher == 'mpi': + _init_dist_pytorch(backend, **kwargs) + elif launcher == 'slurm': + _init_dist_pytorch(backend, **kwargs) + else: + raise ValueError('Invalid launcher type: {}'.format(launcher)) + + +def _init_dist_pytorch(backend, **kwargs): + # TODO: use local_rank instead of rank % num_gpus + rank = int(os.environ['RANK']) num_gpus = torch.cuda.device_count() torch.cuda.set_device(rank % num_gpus) - os.environ['MASTER_ADDR'] = master_ip - os.environ['MASTER_PORT'] = str(port) - if backend == 'nccl': - dist.init_process_group(backend='nccl') - else: - dist.init_process_group( - backend='gloo', rank=rank, world_size=world_size) + dist.init_process_group(backend=backend, **kwargs) + + +def _init_dist_mpi(backend, **kwargs): + raise NotImplementedError -def average_gradients(model): - for param in model.parameters(): - if param.requires_grad and not (param.grad is None): - dist.all_reduce(param.grad.data) +def _init_dist_slurm(backend, **kwargs): + raise NotImplementedError -def broadcast_params(model): - for p in model.state_dict().values(): - dist.broadcast(p, 0) +# modified from https://github.com/NVIDIA/apex/blob/master/apex/parallel/distributed.py#L9 +def coalesce_all_reduce(tensors): + buckets = OrderedDict() + for tensor in tensors: + tp = tensor.type() + if tp not in buckets: + buckets[tp] = [] + buckets[tp].append(tensor) + + for tp in buckets: + bucket = buckets[tp] + coalesced = _flatten_dense_tensors(bucket) + dist.all_reduce(coalesced) + coalesced /= dist.get_world_size() + + for buf, synced in zip(bucket, + _unflatten_dense_tensors(coalesced, bucket)): + buf.copy_(synced) + + +def reduce_grads(model, coalesce=True): + grads = [ + param.grad.data for param in model.parameters() + if param.requires_grad and param.grad is not None + ] + if coalesce: + coalesce_all_reduce(grads) + else: + for tensor in grads: + dist.all_reduce(tensor) class DistOptimizerHook(OptimizerHook): + def __init__(self, grad_clip=None, coalesce=True): + self.grad_clip = grad_clip + self.coalesce = coalesce + def after_train_iter(self, runner): runner.optimizer.zero_grad() runner.outputs['loss'].backward() - average_gradients(runner.model) + reduce_grads(runner.model, self.coalesce) if self.grad_clip is not None: clip_grad.clip_grad_norm_( filter(lambda p: p.requires_grad, runner.model.parameters()), diff --git a/mmdet/datasets/loader/build_loader.py b/mmdet/datasets/loader/build_loader.py index a4e7d7d1e63..34fe2d2f0b0 100644 --- a/mmdet/datasets/loader/build_loader.py +++ b/mmdet/datasets/loader/build_loader.py @@ -1,5 +1,6 @@ from functools import partial +from mmcv.torchpack import get_dist_info from torch.utils.data import DataLoader from .collate import collate @@ -11,10 +12,9 @@ def build_dataloader(dataset, workers_per_gpu, num_gpus, dist=True, - world_size=1, - rank=0, **kwargs): if dist: + rank, world_size = get_dist_info() sampler = DistributedGroupSampler(dataset, imgs_per_gpu, world_size, rank) batch_size = imgs_per_gpu diff --git a/tools/configs/r50_fpn_frcnn_1x.py b/tools/configs/r50_fpn_frcnn_1x.py index 5389fb9f2c9..71505ae6e93 100644 --- a/tools/configs/r50_fpn_frcnn_1x.py +++ b/tools/configs/r50_fpn_frcnn_1x.py @@ -121,8 +121,7 @@ # yapf:enable # runtime settings total_epochs = 12 -device_ids = range(8) -dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1') +dist_params = dict(backend='nccl') log_level = 'INFO' work_dir = './work_dirs/fpn_faster_rcnn_r50_1x' load_from = None diff --git a/tools/configs/r50_fpn_maskrcnn_1x.py b/tools/configs/r50_fpn_maskrcnn_1x.py index 6d1b367a598..e6b353585f7 100644 --- a/tools/configs/r50_fpn_maskrcnn_1x.py +++ b/tools/configs/r50_fpn_maskrcnn_1x.py @@ -134,8 +134,7 @@ # yapf:enable # runtime settings total_epochs = 12 -device_ids = range(8) -dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1') +dist_params = dict(backend='nccl') log_level = 'INFO' work_dir = './work_dirs/fpn_mask_rcnn_r50_1x' load_from = None diff --git a/tools/configs/r50_fpn_rpn_1x.py b/tools/configs/r50_fpn_rpn_1x.py index df31a59e4b0..c982f0402b3 100644 --- a/tools/configs/r50_fpn_rpn_1x.py +++ b/tools/configs/r50_fpn_rpn_1x.py @@ -100,8 +100,7 @@ # yapf:enable # runtime settings total_epochs = 12 -device_ids = range(8) -dist_params = dict(backend='gloo', port='29500', master_ip='127.0.0.1') +dist_params = dict(backend='gloo') log_level = 'INFO' work_dir = './work_dirs/fpn_rpn_r50_1x' load_from = None diff --git a/tools/train.py b/tools/train.py index 2111546a39d..f6161a6a931 100644 --- a/tools/train.py +++ b/tools/train.py @@ -39,9 +39,7 @@ def batch_processor(model, data, train_mode): loss, log_vars = parse_losses(losses) outputs = dict( - loss=loss / args.world_size, - log_vars=log_vars, - num_samples=len(data['img'].data)) + loss=loss, log_vars=log_vars, num_samples=len(data['img'].data)) return outputs @@ -54,61 +52,65 @@ def parse_args(): action='store_true', help='whether to add a validate phase') parser.add_argument( - '--dist', action='store_true', help='use distributed training or not') - parser.add_argument('--world-size', default=1, type=int) - parser.add_argument('--rank', default=0, type=int) + '--gpus', type=int, default=1, help='number of gpus to use') + parser.add_argument( + '--launcher', + choices=['none', 'pytorch', 'slurm', 'mpi'], + default='none', + help='job launcher') + parser.add_argument('--local_rank', type=int, default=0) args = parser.parse_args() return args -args = parse_args() - - def main(): - # get config from file + args = parse_args() + cfg = Config.fromfile(args.config) - cfg.update(world_size=args.world_size, rank=args.rank) + cfg.update(gpus=args.gpus) # init distributed environment if necessary - if args.dist: - print('Enable distributed training.') - init_dist(args.world_size, args.rank, **cfg.dist_params) - else: + if args.launcher == 'none': + dist = False print('Disabled distributed training.') + else: + dist = True + print('Enabled distributed training.') + init_dist(args.launcher, **cfg.dist_args) # prepare data loaders train_dataset = obj_from_dict(cfg.data.train, datasets) data_loaders = [ - build_dataloader( - train_dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, - len(cfg.device_ids), args.dist, cfg.world_size, cfg.rank) + build_dataloader(train_dataset, cfg.data.imgs_per_gpu, + cfg.data.workers_per_gpu, cfg.gpus, dist) ] if args.validate: val_dataset = obj_from_dict(cfg.data.val, datasets) data_loaders.append( - build_dataloader( - val_dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, - len(cfg.device_ids), args.dist, cfg.world_size, cfg.rank)) + build_dataloader(val_dataset, cfg.data.imgs_per_gpu, + cfg.data.workers_per_gpu, cfg.gpus, dist)) # build model model = build_detector( cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) - if args.dist: + if dist: model = MMDistributedDataParallel( - model, device_ids=[cfg.rank], broadcast_buffers=False).cuda() + model, + device_ids=[torch.cuda.current_device()], + broadcast_buffers=False).cuda() else: - model = MMDataParallel(model, device_ids=cfg.device_ids).cuda() + model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda() # build runner runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, cfg.log_level) # register hooks optimizer_config = DistOptimizerHook( - **cfg.optimizer_config) if args.dist else cfg.optimizer_config + **cfg.optimizer_config) if dist else cfg.optimizer_config runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config) - if args.dist: + if dist: runner.register_hook(DistSamplerSeedHook()) if cfg.resume_from: From 12239608abf23f4a9eae5dfe4dc95ec0d23bbee8 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Mon, 24 Sep 2018 15:39:13 +0800 Subject: [PATCH 10/81] fix typo --- tools/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/train.py b/tools/train.py index f6161a6a931..10a9172c68e 100644 --- a/tools/train.py +++ b/tools/train.py @@ -77,7 +77,7 @@ def main(): else: dist = True print('Enabled distributed training.') - init_dist(args.launcher, **cfg.dist_args) + init_dist(args.launcher, **cfg.dist_params) # prepare data loaders train_dataset = obj_from_dict(cfg.data.train, datasets) From ed3b239251123a197c13345f448e81b9300e1224 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Mon, 24 Sep 2018 16:29:18 +0800 Subject: [PATCH 11/81] fix typo --- mmdet/core/utils/dist_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mmdet/core/utils/dist_utils.py b/mmdet/core/utils/dist_utils.py index 2c0a9283964..7ffa7a09348 100644 --- a/mmdet/core/utils/dist_utils.py +++ b/mmdet/core/utils/dist_utils.py @@ -19,9 +19,9 @@ def init_dist(launcher, backend='nccl', **kwargs): if launcher == 'pytorch': _init_dist_pytorch(backend, **kwargs) elif launcher == 'mpi': - _init_dist_pytorch(backend, **kwargs) + _init_dist_mpi(backend, **kwargs) elif launcher == 'slurm': - _init_dist_pytorch(backend, **kwargs) + _init_dist_slurm(backend, **kwargs) else: raise ValueError('Invalid launcher type: {}'.format(launcher)) From 6ec1fd18e8d2e5141f33c92098dc883be47bd193 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Tue, 25 Sep 2018 01:47:02 +0800 Subject: [PATCH 12/81] modify MMDistributedDataParallel, no longer inherited from DistributedDataParallel --- mmdet/nn/parallel/distributed.py | 44 ++++++++++++++++++++++++++++++-- tools/dist_train.sh | 6 +---- tools/train.py | 5 +--- 3 files changed, 44 insertions(+), 11 deletions(-) diff --git a/mmdet/nn/parallel/distributed.py b/mmdet/nn/parallel/distributed.py index 2809778ad93..1db0ea6d00f 100644 --- a/mmdet/nn/parallel/distributed.py +++ b/mmdet/nn/parallel/distributed.py @@ -1,9 +1,49 @@ -from torch.nn.parallel import DistributedDataParallel +import torch +import torch.distributed as dist +import torch.nn as nn +from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors, + _take_tensors) from .scatter_gather import scatter_kwargs -class MMDistributedDataParallel(DistributedDataParallel): +class MMDistributedDataParallel(nn.Module): + + def __init__(self, module, dim=0, broadcast_buffers=True): + super(MMDistributedDataParallel, self).__init__() + self.module = module + self.dim = dim + self.broadcast_buffers = broadcast_buffers + + self.first_synced = False + self.broadcast_bucket_size = 32 * 1024 * 1024 + + def _dist_broadcast_coalesced(self, tensors, buffer_size): + for tensors in _take_tensors(tensors, buffer_size): + flat_tensors = _flatten_dense_tensors(tensors) + dist.broadcast(flat_tensors, 0) + for tensor, synced in zip( + tensors, _unflatten_dense_tensors(flat_tensors, tensors)): + tensor.copy_(synced) + + def sync_params(self): + module_states = list(self.module.state_dict().values()) + if len(module_states) > 0: + self._dist_broadcast_coalesced(module_states, + self.broadcast_bucket_size) + if self.broadcast_buffers: + buffers = [b.data for b in self.module._all_buffers()] + if len(buffers) > 0: + self._dist_broadcast_coalesced(buffers, + self.broadcast_bucket_size) def scatter(self, inputs, kwargs, device_ids): return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) + + def forward(self, *inputs, **kwargs): + if not self.first_synced: + self.sync_params() + self.first_synced = True + inputs, kwargs = self.scatter(inputs, kwargs, + [torch.cuda.current_device()]) + return self.module(*inputs[0], **kwargs[0]) diff --git a/tools/dist_train.sh b/tools/dist_train.sh index 0214b05d22f..8b79c6158da 100755 --- a/tools/dist_train.sh +++ b/tools/dist_train.sh @@ -2,8 +2,4 @@ PYTHON=${PYTHON:-"python"} -$PYTHON train.py $1 --dist --world-size $2 --rank 0 & -let MAX_RANK=$2-1 -for i in `seq 1 $MAX_RANK`; do - $PYTHON train.py $1 --dist --world-size $2 --rank $i > /dev/null 2>&1 & -done +$PYTHON -m torch.distributed.launch --nproc_per_node=$2 train.py $1 --launcher pytorch \ No newline at end of file diff --git a/tools/train.py b/tools/train.py index 10a9172c68e..8acb6308496 100644 --- a/tools/train.py +++ b/tools/train.py @@ -95,10 +95,7 @@ def main(): model = build_detector( cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) if dist: - model = MMDistributedDataParallel( - model, - device_ids=[torch.cuda.current_device()], - broadcast_buffers=False).cuda() + model = MMDistributedDataParallel(model).cuda() else: model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda() From 47f236693255475ee11039828652d5ee852c36b8 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Tue, 25 Sep 2018 01:51:38 +0800 Subject: [PATCH 13/81] move _sync_param from forward to init --- mmdet/nn/parallel/distributed.py | 7 ++----- tools/train.py | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/mmdet/nn/parallel/distributed.py b/mmdet/nn/parallel/distributed.py index 1db0ea6d00f..a2e1d557b3e 100644 --- a/mmdet/nn/parallel/distributed.py +++ b/mmdet/nn/parallel/distributed.py @@ -15,8 +15,8 @@ def __init__(self, module, dim=0, broadcast_buffers=True): self.dim = dim self.broadcast_buffers = broadcast_buffers - self.first_synced = False self.broadcast_bucket_size = 32 * 1024 * 1024 + self._sync_params() def _dist_broadcast_coalesced(self, tensors, buffer_size): for tensors in _take_tensors(tensors, buffer_size): @@ -26,7 +26,7 @@ def _dist_broadcast_coalesced(self, tensors, buffer_size): tensors, _unflatten_dense_tensors(flat_tensors, tensors)): tensor.copy_(synced) - def sync_params(self): + def _sync_params(self): module_states = list(self.module.state_dict().values()) if len(module_states) > 0: self._dist_broadcast_coalesced(module_states, @@ -41,9 +41,6 @@ def scatter(self, inputs, kwargs, device_ids): return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) def forward(self, *inputs, **kwargs): - if not self.first_synced: - self.sync_params() - self.first_synced = True inputs, kwargs = self.scatter(inputs, kwargs, [torch.cuda.current_device()]) return self.module(*inputs[0], **kwargs[0]) diff --git a/tools/train.py b/tools/train.py index 8acb6308496..8fd43807967 100644 --- a/tools/train.py +++ b/tools/train.py @@ -95,7 +95,7 @@ def main(): model = build_detector( cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) if dist: - model = MMDistributedDataParallel(model).cuda() + model = MMDistributedDataParallel(model.cuda()) else: model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda() From 8f6f6ab34e38bbeed25b90c230e1fb20704c3b39 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Tue, 25 Sep 2018 21:27:58 +0800 Subject: [PATCH 14/81] move from models to core --- mmdet/core/utils/misc.py | 16 ++++++++-------- mmdet/models/utils/__init__.py | 1 - mmdet/models/utils/misc.py | 9 --------- 3 files changed, 8 insertions(+), 18 deletions(-) delete mode 100644 mmdet/models/utils/misc.py diff --git a/mmdet/core/utils/misc.py b/mmdet/core/utils/misc.py index 5b09456d3b1..02d0b40c1e7 100644 --- a/mmdet/core/utils/misc.py +++ b/mmdet/core/utils/misc.py @@ -1,8 +1,10 @@ +from functools import partial + import mmcv import numpy as np -import torch +from six.moves import map, zip -__all__ = ['tensor2imgs', 'unique', 'unmap', 'results2json'] +__all__ = ['tensor2imgs', 'multi_apply', 'unmap', 'results2json'] def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): @@ -17,12 +19,10 @@ def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): return imgs -def unique(tensor): - if tensor.is_cuda: - u_tensor = np.unique(tensor.cpu().numpy()) - return tensor.new_tensor(u_tensor) - else: - return torch.unique(tensor) +def multi_apply(func, *args, **kwargs): + pfunc = partial(func, **kwargs) if kwargs else func + map_results = map(pfunc, *args) + return tuple(map(list, zip(*map_results))) def unmap(data, count, inds, fill=0): diff --git a/mmdet/models/utils/__init__.py b/mmdet/models/utils/__init__.py index f11af964480..f9c0dac6f53 100644 --- a/mmdet/models/utils/__init__.py +++ b/mmdet/models/utils/__init__.py @@ -1,6 +1,5 @@ from .conv_module import ConvModule from .norm import build_norm_layer -from .misc import * from .weight_init import * __all__ = ['ConvModule', 'build_norm_layer'] diff --git a/mmdet/models/utils/misc.py b/mmdet/models/utils/misc.py deleted file mode 100644 index ad52b587ac1..00000000000 --- a/mmdet/models/utils/misc.py +++ /dev/null @@ -1,9 +0,0 @@ -from functools import partial - -from six.moves import map, zip - - -def multi_apply(func, *args, **kwargs): - pfunc = partial(func, **kwargs) if kwargs else func - map_results = map(pfunc, *args) - return tuple(map(list, zip(*map_results))) From fb05c8688c7dcb3eaa847080e9d4bf62c2837311 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Tue, 25 Sep 2018 21:46:30 +0800 Subject: [PATCH 15/81] refactor rpn target computing --- mmdet/core/rpn_ops/anchor_target.py | 161 ++++++++++++--------------- mmdet/datasets/coco.py | 6 +- mmdet/datasets/transforms.py | 5 +- mmdet/models/rpn_heads/rpn_head.py | 90 +++++++++------ tools/configs/r50_fpn_frcnn_1x.py | 1 - tools/configs/r50_fpn_maskrcnn_1x.py | 1 - tools/configs/r50_fpn_rpn_1x.py | 3 +- 7 files changed, 137 insertions(+), 130 deletions(-) diff --git a/mmdet/core/rpn_ops/anchor_target.py b/mmdet/core/rpn_ops/anchor_target.py index 6062633c0c2..3cf651b5c46 100644 --- a/mmdet/core/rpn_ops/anchor_target.py +++ b/mmdet/core/rpn_ops/anchor_target.py @@ -1,93 +1,85 @@ import torch -import numpy as np -from ..bbox_ops import (bbox_assign, bbox_transform, bbox_sampling) +from ..bbox_ops import bbox_assign, bbox_transform, bbox_sampling +from ..utils import multi_apply -def anchor_target(anchor_list, valid_flag_list, featmap_sizes, gt_bboxes_list, - img_metas, target_means, target_stds, cfg): - """Compute regression and classification targets for anchors. - There may be multiple feature levels, +def anchor_target(anchor_list, valid_flag_list, gt_bboxes_list, img_metas, + target_means, target_stds, cfg): + """Compute regression and classification targets for anchors. Args: - anchor_list(list): anchors of each feature map level - featmap_sizes(list): feature map sizes - gt_bboxes_list(list): ground truth bbox of images in a mini-batch - img_shapes(list): shape of each image in a mini-batch - cfg(dict): configs + anchor_list (list[list]): Multi level anchors of each image. + valid_flag_list (list[list]): Multi level valid flags of each image. + gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image. + img_metas (list[dict]): Meta info of each image. + target_means (Iterable): Mean value of regression targets. + target_stds (Iterable): Std value of regression targets. + cfg (dict): RPN train configs. Returns: tuple """ num_imgs = len(img_metas) - num_levels = len(featmap_sizes) - if len(anchor_list) == num_levels: - all_anchors = torch.cat(anchor_list, 0) - anchor_nums = [anchors.size(0) for anchors in anchor_list] - use_isomerism_anchors = False - elif len(anchor_list) == num_imgs: - # using different anchors for different images - all_anchors_list = [ - torch.cat(anchor_list[img_id], 0) for img_id in range(num_imgs) - ] - anchor_nums = [anchors.size(0) for anchors in anchor_list[0]] - use_isomerism_anchors = True - else: - raise ValueError('length of anchor_list should be equal to number of ' - 'feature lvls or number of images in a batch') - all_labels = [] - all_label_weights = [] - all_bbox_targets = [] - all_bbox_weights = [] - num_total_sampled = 0 - for img_id in range(num_imgs): - if isinstance(valid_flag_list[img_id], list): - valid_flags = torch.cat(valid_flag_list[img_id], 0) - else: - valid_flags = valid_flag_list[img_id] - if use_isomerism_anchors: - all_anchors = all_anchors_list[img_id] - inside_flags = anchor_inside_flags(all_anchors, valid_flags, - img_metas[img_id]['img_shape'][:2], - cfg.allowed_border) - if not inside_flags.any(): - return None - gt_bboxes = gt_bboxes_list[img_id] - anchor_targets = anchor_target_single(all_anchors, inside_flags, - gt_bboxes, target_means, - target_stds, cfg) - (labels, label_weights, bbox_targets, bbox_weights, pos_inds, - neg_inds) = anchor_targets - all_labels.append(labels) - all_label_weights.append(label_weights) - all_bbox_targets.append(bbox_targets) - all_bbox_weights.append(bbox_weights) - num_total_sampled += max(pos_inds.numel() + neg_inds.numel(), 1) - all_labels = torch.stack(all_labels, 0) - all_label_weights = torch.stack(all_label_weights, 0) - all_bbox_targets = torch.stack(all_bbox_targets, 0) - all_bbox_weights = torch.stack(all_bbox_weights, 0) - # split into different feature levels - labels_list = [] - label_weights_list = [] - bbox_targets_list = [] - bbox_weights_list = [] + assert len(anchor_list) == len(valid_flag_list) == num_imgs + + # anchor number of multi levels + num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] + # concat all level anchors and flags to a single tensor + for i in range(num_imgs): + assert len(anchor_list[i]) == len(valid_flag_list[i]) + anchor_list[i] = torch.cat(anchor_list[i]) + valid_flag_list[i] = torch.cat(valid_flag_list[i]) + + # compute targets for each image + means_replicas = [target_means for _ in range(num_imgs)] + stds_replicas = [target_stds for _ in range(num_imgs)] + cfg_replicas = [cfg for _ in range(num_imgs)] + (all_labels, all_label_weights, all_bbox_targets, + all_bbox_weights, pos_inds_list, neg_inds_list) = multi_apply( + anchor_target_single, anchor_list, valid_flag_list, gt_bboxes_list, + img_metas, means_replicas, stds_replicas, cfg_replicas) + # no valid anchors + if any([labels is None for labels in all_labels]): + return None + # sampled anchors of all images + num_total_samples = sum([ + max(pos_inds.numel() + neg_inds.numel(), 1) + for pos_inds, neg_inds in zip(pos_inds_list, neg_inds_list) + ]) + # split targets to a list w.r.t. multiple levels + labels_list = images_to_levels(all_labels, num_level_anchors) + label_weights_list = images_to_levels(all_label_weights, num_level_anchors) + bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors) + bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors) + return (labels_list, label_weights_list, bbox_targets_list, + bbox_weights_list, num_total_samples) + + +def images_to_levels(target, num_level_anchors): + """Convert targets by image to targets by feature level. + + [target_img0, target_img1] -> [target_level0, target_level1, ...] + """ + target = torch.stack(target, 0) + level_targets = [] start = 0 - for anchor_num in anchor_nums: - end = start + anchor_num - labels_list.append(all_labels[:, start:end].squeeze(0)) - label_weights_list.append(all_label_weights[:, start:end].squeeze(0)) - bbox_targets_list.append(all_bbox_targets[:, start:end].squeeze(0)) - bbox_weights_list.append(all_bbox_weights[:, start:end].squeeze(0)) + for n in num_level_anchors: + end = start + n + level_targets.append(target[:, start:end].squeeze(0)) start = end - return (labels_list, label_weights_list, bbox_targets_list, - bbox_weights_list, num_total_sampled) + return level_targets -def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means, - target_stds, cfg): +def anchor_target_single(flat_anchors, valid_flags, gt_bboxes, img_meta, + target_means, target_stds, cfg): + inside_flags = anchor_inside_flags(flat_anchors, valid_flags, + img_meta['img_shape'][:2], + cfg.allowed_border) + if not inside_flags.any(): + return (None, ) * 6 # assign gt and sample anchors - anchors = all_anchors[inside_flags, :] + anchors = flat_anchors[inside_flags, :] assigned_gt_inds, argmax_overlaps, max_overlaps = bbox_assign( anchors, gt_bboxes, @@ -120,7 +112,7 @@ def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means, label_weights[neg_inds] = 1.0 # map up to original set of anchors - num_total_anchors = all_anchors.size(0) + num_total_anchors = flat_anchors.size(0) labels = unmap(labels, num_total_anchors, inside_flags) label_weights = unmap(label_weights, num_total_anchors, inside_flags) bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) @@ -130,27 +122,20 @@ def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means, neg_inds) -def anchor_inside_flags(all_anchors, valid_flags, img_shape, allowed_border=0): +def anchor_inside_flags(flat_anchors, valid_flags, img_shape, + allowed_border=0): img_h, img_w = img_shape[:2] if allowed_border >= 0: inside_flags = valid_flags & \ - (all_anchors[:, 0] >= -allowed_border) & \ - (all_anchors[:, 1] >= -allowed_border) & \ - (all_anchors[:, 2] < img_w + allowed_border) & \ - (all_anchors[:, 3] < img_h + allowed_border) + (flat_anchors[:, 0] >= -allowed_border) & \ + (flat_anchors[:, 1] >= -allowed_border) & \ + (flat_anchors[:, 2] < img_w + allowed_border) & \ + (flat_anchors[:, 3] < img_h + allowed_border) else: inside_flags = valid_flags return inside_flags -def unique(tensor): - if tensor.is_cuda: - u_tensor = np.unique(tensor.cpu().numpy()) - return tensor.new_tensor(u_tensor) - else: - return torch.unique(tensor) - - def unmap(data, count, inds, fill=0): """ Unmap a subset of item (data) back to the original set of items (of size count) """ diff --git a/mmdet/datasets/coco.py b/mmdet/datasets/coco.py index 8e7d9feffb9..b803360072b 100644 --- a/mmdet/datasets/coco.py +++ b/mmdet/datasets/coco.py @@ -212,7 +212,7 @@ def __getitem__(self, idx): # apply transforms flip = True if np.random.rand() < self.flip_ratio else False img_scale = random_scale(self.img_scales) # sample a scale - img, img_shape, scale_factor = self.img_transform( + img, img_shape, pad_shape, scale_factor = self.img_transform( img, img_scale, flip) if self.proposals is not None: proposals = self.bbox_transform(proposals, img_shape, @@ -232,6 +232,7 @@ def __getitem__(self, idx): img_meta = dict( ori_shape=ori_shape, img_shape=img_shape, + pad_shape=pad_shape, scale_factor=scale_factor, flip=flip) @@ -260,12 +261,13 @@ def prepare_test_img(self, idx): if self.proposals is not None else None) def prepare_single(img, scale, flip, proposal=None): - _img, img_shape, scale_factor = self.img_transform( + _img, img_shape, pad_shape, scale_factor = self.img_transform( img, scale, flip) _img = to_tensor(_img) _img_meta = dict( ori_shape=(img_info['height'], img_info['width'], 3), img_shape=img_shape, + pad_shape=pad_shape, scale_factor=scale_factor, flip=flip) if proposal is not None: diff --git a/mmdet/datasets/transforms.py b/mmdet/datasets/transforms.py index 3a41e8d4cd4..6cdba4e972e 100644 --- a/mmdet/datasets/transforms.py +++ b/mmdet/datasets/transforms.py @@ -36,8 +36,11 @@ def __call__(self, img, scale, flip=False): img = mmcv.imflip(img) if self.size_divisor is not None: img = mmcv.impad_to_multiple(img, self.size_divisor) + pad_shape = img.shape + else: + pad_shape = img_shape img = img.transpose(2, 0, 1) - return img, img_shape, scale_factor + return img, img_shape, pad_shape, scale_factor def bbox_flip(bboxes, img_shape): diff --git a/mmdet/models/rpn_heads/rpn_head.py b/mmdet/models/rpn_heads/rpn_head.py index e81f19310e8..68a81833e09 100644 --- a/mmdet/models/rpn_heads/rpn_head.py +++ b/mmdet/models/rpn_heads/rpn_head.py @@ -6,18 +6,35 @@ import torch.nn.functional as F from mmdet.core import (AnchorGenerator, anchor_target, bbox_transform_inv, - weighted_cross_entropy, weighted_smoothl1, + multi_apply, weighted_cross_entropy, weighted_smoothl1, weighted_binary_cross_entropy) from mmdet.ops import nms -from ..utils import multi_apply, normal_init +from ..utils import normal_init class RPNHead(nn.Module): + """Network head of RPN. + + / - rpn_cls (1x1 conv) + input - rpn_conv (3x3 conv) - + \ - rpn_reg (1x1 conv) + + Args: + in_channels (int): Number of channels in the input feature map. + feat_channels (int): Number of channels for the RPN feature map. + anchor_scales (Iterable): Anchor scales. + anchor_ratios (Iterable): Anchor aspect ratios. + anchor_strides (Iterable): Anchor strides. + anchor_base_sizes (Iterable): Anchor base sizes. + target_means (Iterable): Mean values of regression targets. + target_stds (Iterable): Std values of regression targets. + use_sigmoid_cls (bool): Whether to use sigmoid loss for classification. + (softmax by default) + """ def __init__(self, in_channels, - feat_channels=512, - coarsest_stride=32, + feat_channels=256, anchor_scales=[8, 16, 32], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], @@ -28,7 +45,6 @@ def __init__(self, super(RPNHead, self).__init__() self.in_channels = in_channels self.feat_channels = feat_channels - self.coarsest_stride = coarsest_stride self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides @@ -66,38 +82,42 @@ def forward(self, feats): return multi_apply(self.forward_single, feats) def get_anchors(self, featmap_sizes, img_metas): - """Get anchors given a list of feature map sizes, and get valid flags - at the same time. (Extra padding regions should be marked as invalid) + """Get anchors according to feature map sizes. + + Args: + featmap_sizes (list[tuple]): Multi-level feature map sizes. + img_metas (list[dict]): Image meta info. + + Returns: + tuple: anchors of each image, valid flags of each image """ - # calculate actual image shapes - padded_img_shapes = [] - for img_meta in img_metas: - h, w = img_meta['img_shape'][:2] - padded_h = int( - np.ceil(h / self.coarsest_stride) * self.coarsest_stride) - padded_w = int( - np.ceil(w / self.coarsest_stride) * self.coarsest_stride) - padded_img_shapes.append((padded_h, padded_w)) - # generate anchors for different feature levels - # len = feature levels - anchor_list = [] - # len = imgs per gpu - valid_flag_list = [[] for _ in range(len(img_metas))] - for i in range(len(featmap_sizes)): - anchor_stride = self.anchor_strides[i] + num_imgs = len(img_metas) + num_levels = len(featmap_sizes) + + # since feature map sizes of all images are the same, we only compute + # anchors for one time + multi_level_anchors = [] + for i in range(num_levels): anchors = self.anchor_generators[i].grid_anchors( - featmap_sizes[i], anchor_stride) - anchor_list.append(anchors) - # for each image in this feature level, get valid flags - featmap_size = featmap_sizes[i] - for img_id, (h, w) in enumerate(padded_img_shapes): - valid_feat_h = min( - int(np.ceil(h / anchor_stride)), featmap_size[0]) - valid_feat_w = min( - int(np.ceil(w / anchor_stride)), featmap_size[1]) + featmap_sizes[i], self.anchor_strides[i]) + multi_level_anchors.append(anchors) + anchor_list = [multi_level_anchors for _ in range(num_imgs)] + + # for each image, we compute valid flags of multi level anchors + valid_flag_list = [] + for img_id, img_meta in enumerate(img_metas): + multi_level_flags = [] + for i in range(num_levels): + anchor_stride = self.anchor_strides[i] + feat_h, feat_w = featmap_sizes[i] + h, w, _ = img_meta['pad_shape'] + valid_feat_h = min(int(np.ceil(h / anchor_stride)), feat_h) + valid_feat_w = min(int(np.ceil(w / anchor_stride)), feat_w) flags = self.anchor_generators[i].valid_flags( - featmap_size, (valid_feat_h, valid_feat_w)) - valid_flag_list[img_id].append(flags) + (feat_h, feat_w), (valid_feat_h, valid_feat_w)) + multi_level_flags.append(flags) + valid_flag_list.append(multi_level_flags) + return anchor_list, valid_flag_list def loss_single(self, rpn_cls_score, rpn_bbox_pred, labels, label_weights, @@ -135,7 +155,7 @@ def loss(self, rpn_cls_scores, rpn_bbox_preds, gt_bboxes, img_shapes, cfg): anchor_list, valid_flag_list = self.get_anchors( featmap_sizes, img_shapes) cls_reg_targets = anchor_target( - anchor_list, valid_flag_list, featmap_sizes, gt_bboxes, img_shapes, + anchor_list, valid_flag_list, gt_bboxes, img_shapes, self.target_means, self.target_stds, cfg) if cls_reg_targets is None: return None diff --git a/tools/configs/r50_fpn_frcnn_1x.py b/tools/configs/r50_fpn_frcnn_1x.py index 71505ae6e93..4ce93e623e3 100644 --- a/tools/configs/r50_fpn_frcnn_1x.py +++ b/tools/configs/r50_fpn_frcnn_1x.py @@ -18,7 +18,6 @@ type='RPNHead', in_channels=256, feat_channels=256, - coarsest_stride=32, anchor_scales=[8], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], diff --git a/tools/configs/r50_fpn_maskrcnn_1x.py b/tools/configs/r50_fpn_maskrcnn_1x.py index e6b353585f7..931f051b356 100644 --- a/tools/configs/r50_fpn_maskrcnn_1x.py +++ b/tools/configs/r50_fpn_maskrcnn_1x.py @@ -18,7 +18,6 @@ type='RPNHead', in_channels=256, feat_channels=256, - coarsest_stride=32, anchor_scales=[8], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], diff --git a/tools/configs/r50_fpn_rpn_1x.py b/tools/configs/r50_fpn_rpn_1x.py index c982f0402b3..a00cab9de80 100644 --- a/tools/configs/r50_fpn_rpn_1x.py +++ b/tools/configs/r50_fpn_rpn_1x.py @@ -18,7 +18,6 @@ type='RPNHead', in_channels=256, feat_channels=256, - coarsest_stride=32, anchor_scales=[8], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], @@ -104,5 +103,5 @@ log_level = 'INFO' work_dir = './work_dirs/fpn_rpn_r50_1x' load_from = None -resume_from = None +resume_from = None workflow = [('train', 1)] From 86313b69abbc062e7ae9d039ffd63bb86f77f857 Mon Sep 17 00:00:00 2001 From: pangjm Date: Tue, 25 Sep 2018 23:28:27 +0800 Subject: [PATCH 16/81] rm useless files --- TDL.md | 29 --- mmdet/models/detectors/detector.py | 349 ----------------------------- 2 files changed, 378 deletions(-) delete mode 100644 TDL.md delete mode 100644 mmdet/models/detectors/detector.py diff --git a/TDL.md b/TDL.md deleted file mode 100644 index 1679338c047..00000000000 --- a/TDL.md +++ /dev/null @@ -1,29 +0,0 @@ -### MMCV -- [ ] Implement the attr 'get' of 'Config' -- [ ] Config bugs: None type to '{}' with addict -- [ ] Default logger should be only with gpu0 -- [ ] Unit Test: mmcv and mmcv.torchpack - - -### MMDetection - -#### Basic -- [ ] Implement training function without distributed -- [ ] Verify nccl/nccl2/gloo -- [ ] Replace UGLY code: params plug in 'args' to reach a global flow -- [ ] Replace 'print' by 'logger' - - -#### Testing -- [ ] Implement distributed testing -- [ ] Implement single gpu testing - - -#### Refactor -- [ ] Re-consider params names -- [ ] Refactor functions in 'core' -- [ ] Merge single test & aug test as one function, so as other redundancy - -#### New features -- [ ] Plug loss params into Config -- [ ] Multi-head communication diff --git a/mmdet/models/detectors/detector.py b/mmdet/models/detectors/detector.py deleted file mode 100644 index 363131e8ece..00000000000 --- a/mmdet/models/detectors/detector.py +++ /dev/null @@ -1,349 +0,0 @@ -import torch -import torch.nn as nn - -from .. import builder -from mmdet.core import (bbox2roi, bbox_mapping, split_combined_gt_polys, - bbox2result, multiclass_nms, merge_aug_proposals, - merge_aug_bboxes, merge_aug_masks, sample_proposals) - - -class Detector(nn.Module): - - def __init__(self, - backbone, - neck=None, - rpn_head=None, - roi_block=None, - bbox_head=None, - mask_block=None, - mask_head=None, - rpn_train_cfg=None, - rpn_test_cfg=None, - rcnn_train_cfg=None, - rcnn_test_cfg=None, - pretrained=None): - super(Detector, self).__init__() - self.backbone = builder.build_backbone(backbone) - - self.with_neck = True if neck is not None else False - if self.with_neck: - self.neck = builder.build_neck(neck) - - self.with_rpn = True if rpn_head is not None else False - if self.with_rpn: - self.rpn_head = builder.build_rpn_head(rpn_head) - self.rpn_train_cfg = rpn_train_cfg - self.rpn_test_cfg = rpn_test_cfg - - self.with_bbox = True if bbox_head is not None else False - if self.with_bbox: - self.bbox_roi_extractor = builder.build_roi_extractor(roi_block) - self.bbox_head = builder.build_bbox_head(bbox_head) - self.rcnn_train_cfg = rcnn_train_cfg - self.rcnn_test_cfg = rcnn_test_cfg - - self.with_mask = True if mask_head is not None else False - if self.with_mask: - self.mask_roi_extractor = builder.build_roi_extractor(mask_block) - self.mask_head = builder.build_mask_head(mask_head) - - self.init_weights(pretrained=pretrained) - - def init_weights(self, pretrained=None): - if pretrained is not None: - print('load model from: {}'.format(pretrained)) - self.backbone.init_weights(pretrained=pretrained) - if self.with_neck: - if isinstance(self.neck, nn.Sequential): - for m in self.neck: - m.init_weights() - else: - self.neck.init_weights() - if self.with_rpn: - self.rpn_head.init_weights() - if self.with_bbox: - self.bbox_roi_extractor.init_weights() - self.bbox_head.init_weights() - if self.with_mask: - self.mask_roi_extractor.init_weights() - self.mask_head.init_weights() - - def forward(self, - img, - img_meta, - gt_bboxes=None, - proposals=None, - gt_labels=None, - gt_bboxes_ignore=None, - gt_mask_polys=None, - gt_poly_lens=None, - num_polys_per_mask=None, - return_loss=True, - return_bboxes=True, - rescale=False): - assert proposals is not None or self.with_rpn, "Only one of proposals file and RPN can exist." - - if not return_loss: - return self.test(img, img_meta, proposals, rescale) - else: - losses = dict() - - img_shapes = img_meta['img_shape'] - x = self.backbone(img) - - if self.with_neck: - x = self.neck(x) - - if self.with_rpn: - rpn_outs = self.rpn_head(x) - rpn_loss_inputs = rpn_outs + (gt_bboxes, img_shapes, - self.rpn_train_cfg) - rpn_losses = self.rpn_head.loss(*rpn_loss_inputs) - losses.update(rpn_losses) - - if self.with_bbox: - if self.with_rpn: - proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg) - proposal_list = self.rpn_head.get_proposals(*proposal_inputs) - else: - proposal_list = proposals - - (pos_inds, neg_inds, pos_proposals, neg_proposals, - pos_assigned_gt_inds, - pos_gt_bboxes, pos_gt_labels) = sample_proposals( - proposal_list, gt_bboxes, gt_bboxes_ignore, gt_labels, - self.rcnn_train_cfg) - - labels, label_weights, bbox_targets, bbox_weights = \ - self.bbox_head.get_bbox_target( - pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels, - self.rcnn_train_cfg) - - rois = bbox2roi([ - torch.cat([pos, neg], dim=0) - for pos, neg in zip(pos_proposals, neg_proposals) - ]) - # TODO: a more flexible way to configurate feat maps - roi_feats = self.bbox_roi_extractor( - x[:self.bbox_roi_extractor.num_inputs], rois) - cls_score, bbox_pred = self.bbox_head(roi_feats) - - loss_bbox = self.bbox_head.loss(cls_score, bbox_pred, labels, - label_weights, bbox_targets, - bbox_weights) - losses.update(loss_bbox) - - if self.with_mask: - gt_polys = split_combined_gt_polys(gt_mask_polys, gt_poly_lens, - num_polys_per_mask) - mask_targets = self.mask_head.get_mask_target( - pos_proposals, pos_assigned_gt_inds, gt_polys, img_meta, - self.rcnn_train_cfg) - pos_rois = bbox2roi(pos_proposals) - mask_feats = self.mask_roi_extractor( - x[:self.mask_roi_extractor.num_inputs], pos_rois) - mask_pred = self.mask_head(mask_feats) - losses['loss_mask'] = self.mask_head.loss(mask_pred, mask_targets, - torch.cat(pos_gt_labels)) - return losses - - def test(self, imgs, img_metas, proposals=None, rescale=False): - """Test w/ or w/o augmentations.""" - assert isinstance(imgs, list) and isinstance(img_metas, list) - assert len(imgs) == len(img_metas) - img_per_gpu = imgs[0].size(0) - assert img_per_gpu == 1 - if len(imgs) == 1: - return self.simple_test(imgs[0], img_metas[0], proposals, rescale) - else: - return self.aug_test(imgs, img_metas, proposals, rescale) - - def simple_test_rpn(self, x, img_meta): - img_shapes = img_meta['img_shape'] - scale_factor = img_meta['scale_factor'] - rpn_outs = self.rpn_head(x) - proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg) - proposal_list = self.rpn_head.get_proposals(*proposal_inputs)[0] - return proposal_list - - def simple_test_bboxes(self, x, img_meta, proposals, rescale=False): - """Test only det bboxes without augmentation.""" - rois = bbox2roi(proposals) - roi_feats = self.bbox_roi_extractor( - x[:len(self.bbox_roi_extractor.featmap_strides)], rois) - cls_score, bbox_pred = self.bbox_head(roi_feats) - # image shape of the first image in the batch (only one) - img_shape = img_meta['img_shape'][0] - scale_factor = img_meta['scale_factor'] - det_bboxes, det_labels = self.bbox_head.get_det_bboxes( - rois, - cls_score, - bbox_pred, - img_shape, - scale_factor, - rescale=rescale, - nms_cfg=self.rcnn_test_cfg) - return det_bboxes, det_labels - - def simple_test_mask(self, - x, - img_meta, - det_bboxes, - det_labels, - rescale=False): - # image shape of the first image in the batch (only one) - img_shape = img_meta['img_shape'][0] - scale_factor = img_meta['scale_factor'] - if det_bboxes.shape[0] == 0: - segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] - else: - # if det_bboxes is rescaled to the original image size, we need to - # rescale it back to the testing scale to obtain RoIs. - _bboxes = (det_bboxes[:, :4] * scale_factor.float() - if rescale else det_bboxes) - mask_rois = bbox2roi([_bboxes]) - mask_feats = self.mask_roi_extractor( - x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois) - mask_pred = self.mask_head(mask_feats) - segm_result = self.mask_head.get_seg_masks( - mask_pred, - det_bboxes, - det_labels, - self.rcnn_test_cfg, - ori_scale=img_meta['ori_shape']) - return segm_result - - def simple_test(self, img, img_meta, proposals=None, rescale=False): - """Test without augmentation.""" - # get feature maps - x = self.backbone(img) - if self.with_neck: - x = self.neck(x) - if self.with_rpn: - proposals = self.simple_test_rpn(x, img_meta) - if self.with_bbox: - # BUG proposals shape? - det_bboxes, det_labels = self.simple_test_bboxes( - x, img_meta, [proposals], rescale=rescale) - bbox_result = bbox2result(det_bboxes, det_labels, - self.bbox_head.num_classes) - if not self.with_mask: - return bbox_result - - segm_result = self.simple_test_mask( - x, img_meta, det_bboxes, det_labels, rescale=rescale) - return bbox_result, segm_result - else: - proposals[:, :4] /= img_meta['scale_factor'].float() - return proposals.cpu().numpy() - - # TODO aug test haven't been verified - def aug_test_bboxes(self, imgs, img_metas): - """Test with augmentations for det bboxes.""" - # step 1: get RPN proposals for augmented images, apply NMS to the - # union of all proposals. - aug_proposals = [] - for img, img_meta in zip(imgs, img_metas): - x = self.backbone(img) - if self.neck is not None: - x = self.neck(x) - rpn_outs = self.rpn_head(x) - proposal_inputs = rpn_outs + (img_meta['shape_scale'], - self.rpn_test_cfg) - proposal_list = self.rpn_head.get_proposals(*proposal_inputs) - assert len(proposal_list) == 1 - aug_proposals.append(proposal_list[0]) # len(proposal_list) = 1 - # after merging, proposals will be rescaled to the original image size - merged_proposals = merge_aug_proposals(aug_proposals, img_metas, - self.rpn_test_cfg) - # step 2: Given merged proposals, predict bboxes for augmented images, - # output the union of these bboxes. - aug_bboxes = [] - aug_scores = [] - for img, img_meta in zip(imgs, img_metas): - # only one image in the batch - img_shape = img_meta['shape_scale'][0] - flip = img_meta['flip'][0] - proposals = bbox_mapping(merged_proposals[:, :4], img_shape, flip) - rois = bbox2roi([proposals]) - # recompute feature maps to save GPU memory - x = self.backbone(img) - if self.neck is not None: - x = self.neck(x) - roi_feats = self.bbox_roi_extractor( - x[:len(self.bbox_roi_extractor.featmap_strides)], rois) - cls_score, bbox_pred = self.bbox_head(roi_feats) - bboxes, scores = self.bbox_head.get_det_bboxes( - rois, - cls_score, - bbox_pred, - img_shape, - rescale=False, - nms_cfg=None) - aug_bboxes.append(bboxes) - aug_scores.append(scores) - # after merging, bboxes will be rescaled to the original image size - merged_bboxes, merged_scores = merge_aug_bboxes( - aug_bboxes, aug_scores, img_metas, self.rcnn_test_cfg) - det_bboxes, det_labels = multiclass_nms( - merged_bboxes, merged_scores, self.rcnn_test_cfg.score_thr, - self.rcnn_test_cfg.nms_thr, self.rcnn_test_cfg.max_per_img) - return det_bboxes, det_labels - - def aug_test_mask(self, - imgs, - img_metas, - det_bboxes, - det_labels, - rescale=False): - # step 3: Given merged bboxes, predict masks for augmented images, - # scores of masks are averaged across augmented images. - if rescale: - _det_bboxes = det_bboxes - else: - _det_bboxes = det_bboxes.clone() - _det_bboxes[:, :4] *= img_metas[0]['shape_scale'][0][-1] - if det_bboxes.shape[0] == 0: - segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] - else: - aug_masks = [] - for img, img_meta in zip(imgs, img_metas): - img_shape = img_meta['shape_scale'][0] - flip = img_meta['flip'][0] - _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, flip) - mask_rois = bbox2roi([_bboxes]) - x = self.backbone(img) - if self.neck is not None: - x = self.neck(x) - mask_feats = self.mask_roi_extractor( - x[:len(self.mask_roi_extractor.featmap_strides)], - mask_rois) - mask_pred = self.mask_head(mask_feats) - # convert to numpy array to save memory - aug_masks.append(mask_pred.sigmoid().cpu().numpy()) - merged_masks = merge_aug_masks(aug_masks, img_metas, - self.rcnn_test_cfg) - segm_result = self.mask_head.get_seg_masks( - merged_masks, _det_bboxes, det_labels, - img_metas[0]['shape_scale'][0], self.rcnn_test_cfg, rescale) - return segm_result - - def aug_test(self, imgs, img_metas, rescale=False): - """Test with augmentations. - If rescale is False, then returned bboxes and masks will fit the scale - if imgs[0]. - """ - # aug test det bboxes - det_bboxes, det_labels = self.aug_test_bboxes(imgs, img_metas) - if rescale: - _det_bboxes = det_bboxes - else: - _det_bboxes = det_bboxes.clone() - _det_bboxes[:, :4] *= img_metas[0]['shape_scale'][0][-1] - bbox_result = bbox2result(_det_bboxes, det_labels, - self.bbox_head.num_classes) - if not self.with_mask: - return bbox_result - segm_result = self.aug_test_mask( - imgs, img_metas, det_bboxes, det_labels, rescale=rescale) - return bbox_result, segm_result From 75ece9908c26b0de1efe82da73baff5b99666cad Mon Sep 17 00:00:00 2001 From: pangjm Date: Tue, 25 Sep 2018 23:34:53 +0800 Subject: [PATCH 17/81] add Faster RCNN & Mask RCNN training API and some test related --- mmdet/core/bbox_ops/__init__.py | 5 +- mmdet/core/losses/losses.py | 2 +- mmdet/core/mask_ops/__init__.py | 4 +- mmdet/core/mask_ops/mask_target.py | 20 +-- mmdet/core/mask_ops/utils.py | 21 ++- mmdet/models/detectors/__init__.py | 4 +- mmdet/models/detectors/faster_rcnn.py | 23 ++++ mmdet/models/detectors/mask_rcnn.py | 27 ++++ mmdet/models/detectors/two_stage.py | 139 +++++++++++++------- mmdet/models/mask_heads/fcn_mask_head.py | 12 +- mmdet/models/roi_extractors/single_level.py | 31 +++++ 11 files changed, 209 insertions(+), 79 deletions(-) diff --git a/mmdet/core/bbox_ops/__init__.py b/mmdet/core/bbox_ops/__init__.py index dbdbb970648..885dab67c5a 100644 --- a/mmdet/core/bbox_ops/__init__.py +++ b/mmdet/core/bbox_ops/__init__.py @@ -1,7 +1,6 @@ from .geometry import bbox_overlaps from .sampling import (random_choice, bbox_assign, bbox_assign_via_overlaps, - bbox_sampling, sample_positives, sample_negatives, - sample_proposals) + bbox_sampling, sample_positives, sample_negatives) from .transforms import (bbox_transform, bbox_transform_inv, bbox_flip, bbox_mapping, bbox_mapping_back, bbox2roi, roi2bbox, bbox2result) @@ -12,5 +11,5 @@ 'bbox_assign_via_overlaps', 'bbox_sampling', 'sample_positives', 'sample_negatives', 'bbox_transform', 'bbox_transform_inv', 'bbox_flip', 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', - 'bbox_target', 'sample_proposals' + 'bbox_target' ] diff --git a/mmdet/core/losses/losses.py b/mmdet/core/losses/losses.py index ce3f963ca71..4f183e13d8a 100644 --- a/mmdet/core/losses/losses.py +++ b/mmdet/core/losses/losses.py @@ -58,7 +58,7 @@ def mask_cross_entropy(pred, target, label): inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) pred_slice = pred[inds, label].squeeze(1) return F.binary_cross_entropy_with_logits( - pred_slice, target, reduction='sum')[None] + pred_slice, target, reduction='elementwise_mean')[None] def weighted_mask_cross_entropy(pred, target, weight, label): diff --git a/mmdet/core/mask_ops/__init__.py b/mmdet/core/mask_ops/__init__.py index 4669ba1f910..ea6179c5c38 100644 --- a/mmdet/core/mask_ops/__init__.py +++ b/mmdet/core/mask_ops/__init__.py @@ -1,11 +1,11 @@ from .segms import (flip_segms, polys_to_mask, mask_to_bbox, polys_to_mask_wrt_box, polys_to_boxes, rle_mask_voting, rle_mask_nms, rle_masks_to_boxes) -from .utils import split_combined_gt_polys +from .utils import split_combined_polys from .mask_target import mask_target __all__ = [ 'flip_segms', 'polys_to_mask', 'mask_to_bbox', 'polys_to_mask_wrt_box', 'polys_to_boxes', 'rle_mask_voting', 'rle_mask_nms', 'rle_masks_to_boxes', - 'split_combined_gt_polys', 'mask_target' + 'split_combined_polys', 'mask_target' ] diff --git a/mmdet/core/mask_ops/mask_target.py b/mmdet/core/mask_ops/mask_target.py index 3fb65e35874..5f635992aac 100644 --- a/mmdet/core/mask_ops/mask_target.py +++ b/mmdet/core/mask_ops/mask_target.py @@ -4,27 +4,31 @@ from .segms import polys_to_mask_wrt_box -def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_polys_list, - img_meta, cfg): +def mask_target(pos_proposals_list, + pos_assigned_gt_inds_list, + gt_polys_list, + img_meta, + cfg): cfg_list = [cfg for _ in range(len(pos_proposals_list))] - img_metas = [img_meta for _ in range(len(pos_proposals_list))] mask_targets = map(mask_target_single, pos_proposals_list, - pos_assigned_gt_inds_list, gt_polys_list, img_metas, + pos_assigned_gt_inds_list, gt_polys_list, img_meta, cfg_list) mask_targets = torch.cat(tuple(mask_targets), dim=0) return mask_targets -def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_polys, - img_meta, cfg): - +def mask_target_single(pos_proposals, + pos_assigned_gt_inds, + gt_polys, + img_meta, + cfg): mask_size = cfg.mask_size num_pos = pos_proposals.size(0) mask_targets = pos_proposals.new_zeros((num_pos, mask_size, mask_size)) if num_pos > 0: pos_proposals = pos_proposals.cpu().numpy() pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() - scale_factor = img_meta['scale_factor'][0].cpu().numpy() + scale_factor = img_meta['scale_factor'] for i in range(num_pos): bbox = pos_proposals[i, :] / scale_factor polys = gt_polys[pos_assigned_gt_inds[i]] diff --git a/mmdet/core/mask_ops/utils.py b/mmdet/core/mask_ops/utils.py index 4da4a8369b1..a68312b179e 100644 --- a/mmdet/core/mask_ops/utils.py +++ b/mmdet/core/mask_ops/utils.py @@ -1,7 +1,7 @@ import mmcv -def split_combined_gt_polys(gt_polys, gt_poly_lens, num_polys_per_mask): +def split_combined_polys(polys, poly_lens, polys_per_mask): """Split the combined 1-D polys into masks. A mask is represented as a list of polys, and a poly is represented as @@ -9,9 +9,9 @@ def split_combined_gt_polys(gt_polys, gt_poly_lens, num_polys_per_mask): tensor. Here we need to split the tensor into original representations. Args: - gt_polys (list): a list (length = image num) of 1-D tensors - gt_poly_lens (list): a list (length = image num) of poly length - num_polys_per_mask (list): a list (length = image num) of poly number + polys (list): a list (length = image num) of 1-D tensors + poly_lens (list): a list (length = image num) of poly length + polys_per_mask (list): a list (length = image num) of poly number of each mask Returns: @@ -19,13 +19,12 @@ def split_combined_gt_polys(gt_polys, gt_poly_lens, num_polys_per_mask): list (length = poly num) of numpy array """ mask_polys_list = [] - for img_id in range(len(gt_polys)): - gt_polys_single = gt_polys[img_id].cpu().numpy() - gt_polys_lens_single = gt_poly_lens[img_id].cpu().numpy().tolist() - num_polys_per_mask_single = num_polys_per_mask[ - img_id].cpu().numpy().tolist() + for img_id in range(len(polys)): + polys_single = polys[img_id] + polys_lens_single = poly_lens[img_id].tolist() + polys_per_mask_single = polys_per_mask[img_id].tolist() - split_gt_polys = mmcv.slice_list(gt_polys_single, gt_polys_lens_single) - mask_polys = mmcv.slice_list(split_gt_polys, num_polys_per_mask_single) + split_polys = mmcv.slice_list(polys_single, polys_lens_single) + mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single) mask_polys_list.append(mask_polys) return mask_polys_list diff --git a/mmdet/models/detectors/__init__.py b/mmdet/models/detectors/__init__.py index fe3fc62a819..b8914c1e5d3 100644 --- a/mmdet/models/detectors/__init__.py +++ b/mmdet/models/detectors/__init__.py @@ -1,4 +1,6 @@ from .base import BaseDetector from .rpn import RPN +from .faster_rcnn import FasterRCNN +from .mask_rcnn import MaskRCNN -__all__ = ['BaseDetector', 'RPN'] +__all__ = ['BaseDetector', 'RPN', 'FasterRCNN', 'MaskRCNN'] diff --git a/mmdet/models/detectors/faster_rcnn.py b/mmdet/models/detectors/faster_rcnn.py index e69de29bb2d..dd31f60c1d8 100644 --- a/mmdet/models/detectors/faster_rcnn.py +++ b/mmdet/models/detectors/faster_rcnn.py @@ -0,0 +1,23 @@ +from .two_stage import TwoStageDetector + + +class FasterRCNN(TwoStageDetector): + + def __init__(self, + backbone, + neck, + rpn_head, + bbox_roi_extractor, + bbox_head, + train_cfg, + test_cfg, + pretrained=None): + super(FasterRCNN, self).__init__( + backbone=backbone, + neck=neck, + rpn_head=rpn_head, + bbox_roi_extractor=bbox_roi_extractor, + bbox_head=bbox_head, + train_cfg=train_cfg, + test_cfg=test_cfg, + pretrained=pretrained) diff --git a/mmdet/models/detectors/mask_rcnn.py b/mmdet/models/detectors/mask_rcnn.py index e69de29bb2d..684598aa013 100644 --- a/mmdet/models/detectors/mask_rcnn.py +++ b/mmdet/models/detectors/mask_rcnn.py @@ -0,0 +1,27 @@ +from .two_stage import TwoStageDetector + + +class MaskRCNN(TwoStageDetector): + + def __init__(self, + backbone, + neck, + rpn_head, + bbox_roi_extractor, + bbox_head, + mask_roi_extractor, + mask_head, + train_cfg, + test_cfg, + pretrained=None): + super(MaskRCNN, self).__init__( + backbone=backbone, + neck=neck, + rpn_head=rpn_head, + bbox_roi_extractor=bbox_roi_extractor, + bbox_head=bbox_head, + mask_roi_extractor=mask_roi_extractor, + mask_head=mask_head, + train_cfg=train_cfg, + test_cfg=test_cfg, + pretrained=pretrained) diff --git a/mmdet/models/detectors/two_stage.py b/mmdet/models/detectors/two_stage.py index db497fd6986..6889c8ab138 100644 --- a/mmdet/models/detectors/two_stage.py +++ b/mmdet/models/detectors/two_stage.py @@ -1,13 +1,14 @@ import torch import torch.nn as nn -from .base import Detector -from .testing_mixins import RPNTestMixin, BBoxTestMixin +from .base import BaseDetector +from .test_mixins import RPNTestMixin, BBoxTestMixin, MaskTestMixin from .. import builder -from mmdet.core import bbox2roi, bbox2result, sample_proposals +from mmdet.core import bbox2roi, bbox2result, split_combined_polys, multi_apply -class TwoStageDetector(Detector, RPNTestMixin, BBoxTestMixin): +class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin, + MaskTestMixin): def __init__(self, backbone, @@ -15,13 +16,16 @@ def __init__(self, rpn_head=None, bbox_roi_extractor=None, bbox_head=None, + mask_roi_extractor=None, + mask_head=None, train_cfg=None, test_cfg=None, pretrained=None): - super(Detector, self).__init__() + super(TwoStageDetector, self).__init__() self.backbone = builder.build_backbone(backbone) self.with_neck = True if neck is not None else False + assert self.with_neck, "TwoStageDetector must be implemented with FPN now." if self.with_neck: self.neck = builder.build_neck(neck) @@ -35,6 +39,12 @@ def __init__(self, bbox_roi_extractor) self.bbox_head = builder.build_bbox_head(bbox_head) + self.with_mask = True if mask_head is not None else False + if self.with_mask: + self.mask_roi_extractor = builder.build_roi_extractor( + mask_roi_extractor) + self.mask_head = builder.build_mask_head(mask_head) + self.train_cfg = train_cfg self.test_cfg = test_cfg @@ -68,6 +78,7 @@ def forward_train(self, gt_bboxes, gt_bboxes_ignore, gt_labels, + gt_masks=None, proposals=None): losses = dict() @@ -80,54 +91,73 @@ def forward_train(self, rpn_losses = self.rpn_head.loss(*rpn_loss_inputs) losses.update(rpn_losses) - proposal_inputs = rpn_outs + (img_meta, self.self.test_cfg.rpn) + proposal_inputs = rpn_outs + (img_meta, self.test_cfg.rpn) proposal_list = self.rpn_head.get_proposals(*proposal_inputs) - else: proposal_list = proposals - (pos_inds, neg_inds, pos_proposals, neg_proposals, - pos_assigned_gt_inds, - pos_gt_bboxes, pos_gt_labels) = sample_proposals( - proposal_list, gt_bboxes, gt_bboxes_ignore, gt_labels, - self.train_cfg.rcnn) - - labels, label_weights, bbox_targets, bbox_weights = \ - self.bbox_head.get_bbox_target( - pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels, + if self.with_bbox: + rcnn_train_cfg_list = [ + self.train_cfg.rcnn for _ in range(len(proposal_list)) + ] + (pos_proposals, neg_proposals, pos_assigned_gt_inds, pos_gt_bboxes, + pos_gt_labels) = multi_apply( + self.bbox_roi_extractor.sample_proposals, proposal_list, + gt_bboxes, gt_bboxes_ignore, gt_labels, rcnn_train_cfg_list) + labels, label_weights, bbox_targets, bbox_weights = \ + self.bbox_head.get_bbox_target(pos_proposals, neg_proposals, + pos_gt_bboxes, pos_gt_labels, self.train_cfg.rcnn) + + rois = bbox2roi([ + torch.cat([pos, neg], dim=0) + for pos, neg in zip(pos_proposals, neg_proposals) + ]) + # TODO: a more flexible way to configurate feat maps + roi_feats = self.bbox_roi_extractor( + x[:self.bbox_roi_extractor.num_inputs], rois) + cls_score, bbox_pred = self.bbox_head(roi_feats) + + loss_bbox = self.bbox_head.loss(cls_score, bbox_pred, labels, + label_weights, bbox_targets, + bbox_weights) + losses.update(loss_bbox) + + if self.with_mask: + gt_polys = split_combined_polys(**gt_masks) + mask_targets = self.mask_head.get_mask_target( + pos_proposals, pos_assigned_gt_inds, gt_polys, img_meta, self.train_cfg.rcnn) - - rois = bbox2roi([ - torch.cat([pos, neg], dim=0) - for pos, neg in zip(pos_proposals, neg_proposals) - ]) - # TODO: a more flexible way to configurate feat maps - roi_feats = self.bbox_roi_extractor( - x[:self.bbox_roi_extractor.num_inputs], rois) - cls_score, bbox_pred = self.bbox_head(roi_feats) - - loss_bbox = self.bbox_head.loss(cls_score, bbox_pred, labels, - label_weights, bbox_targets, - bbox_weights) - losses.update(loss_bbox) + pos_rois = bbox2roi(pos_proposals) + mask_feats = self.mask_roi_extractor( + x[:self.mask_roi_extractor.num_inputs], pos_rois) + mask_pred = self.mask_head(mask_feats) + loss_mask = self.mask_head.loss(mask_pred, mask_targets, + torch.cat(pos_gt_labels)) + losses.update(loss_mask) return losses def simple_test(self, img, img_meta, proposals=None, rescale=False): """Test without augmentation.""" + assert proposals == None, "Fast RCNN hasn't been implemented." + assert self.with_bbox, "Bbox head must be implemented." + x = self.extract_feat(img) - if proposals is None: - proposals = self.simple_test_rpn(x, img_meta) - if self.with_bbox: - # BUG proposals shape? - det_bboxes, det_labels = self.simple_test_bboxes( - x, img_meta, [proposals], rescale=rescale) - bbox_result = bbox2result(det_bboxes, det_labels, - self.bbox_head.num_classes) - return bbox_result + + proposal_list = self.simple_test_rpn( + x, img_meta, self.test_cfg.rpn) if proposals is None else proposals + + det_bboxes, det_labels = self.simple_test_bboxes( + x, img_meta, proposal_list, self.test_cfg.rcnn, rescale=rescale) + bbox_results = bbox2result(det_bboxes, det_labels, + self.bbox_head.num_classes) + + if self.with_mask: + segm_results = self.simple_test_mask( + x, img_meta, det_bboxes, det_labels, rescale=rescale) + return bbox_results, segm_results else: - proposals[:, :4] /= img_meta['scale_factor'].float() - return proposals.cpu().numpy() + return bbox_results def aug_test(self, imgs, img_metas, rescale=False): """Test with augmentations. @@ -135,15 +165,28 @@ def aug_test(self, imgs, img_metas, rescale=False): If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ - proposals = self.aug_test_rpn( - self.extract_feats(imgs), img_metas, self.rpn_test_cfg) + # recompute self.extract_feats(imgs) because of 'yield' and memory + proposal_list = self.aug_test_rpn( + self.extract_feats(imgs), img_metas, self.test_cfg.rpn) det_bboxes, det_labels = self.aug_test_bboxes( - self.extract_feats(imgs), img_metas, proposals, self.rcnn_test_cfg) + self.extract_feats(imgs), img_metas, proposal_list, + self.test_cfg.rcnn) + if rescale: _det_bboxes = det_bboxes else: _det_bboxes = det_bboxes.clone() - _det_bboxes[:, :4] *= img_metas[0]['shape_scale'][0][-1] - bbox_result = bbox2result(_det_bboxes, det_labels, - self.bbox_head.num_classes) - return bbox_result + _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor'] + bbox_results = bbox2result(_det_bboxes, det_labels, + self.bbox_head.num_classes) + + # det_bboxes always keep the original scale + if self.with_mask: + segm_results = self.aug_test_mask( + self.extract_feats(imgs), + img_metas, + det_bboxes, + det_labels) + return bbox_results, segm_results + else: + return bbox_results diff --git a/mmdet/models/mask_heads/fcn_mask_head.py b/mmdet/models/mask_heads/fcn_mask_head.py index 016c05204bd..fa89ef7ffe3 100644 --- a/mmdet/models/mask_heads/fcn_mask_head.py +++ b/mmdet/models/mask_heads/fcn_mask_head.py @@ -93,11 +93,13 @@ def get_mask_target(self, pos_proposals, pos_assigned_gt_inds, gt_masks, return mask_targets def loss(self, mask_pred, mask_targets, labels): + loss = dict() loss_mask = mask_cross_entropy(mask_pred, mask_targets, labels) - return loss_mask + loss['loss_mask'] = loss_mask + return loss def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg, - ori_scale): + ori_shape): """Get segmentation masks from mask_pred and bboxes Args: mask_pred (Tensor or ndarray): shape (n, #class+1, h, w). @@ -108,7 +110,7 @@ def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg, det_labels (Tensor): shape (n, ) img_shape (Tensor): shape (3, ) rcnn_test_cfg (dict): rcnn testing config - rescale (bool): whether rescale masks to original image size + ori_shape: original image size Returns: list[list]: encoded masks """ @@ -118,8 +120,8 @@ def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg, cls_segms = [[] for _ in range(self.num_classes - 1)] bboxes = det_bboxes.cpu().numpy()[:, :4] labels = det_labels.cpu().numpy() + 1 - img_h = ori_scale[0] - img_w = ori_scale[1] + img_h = ori_shape[0] + img_w = ori_shape[1] for i in range(bboxes.shape[0]): bbox = bboxes[i, :].astype(int) diff --git a/mmdet/models/roi_extractors/single_level.py b/mmdet/models/roi_extractors/single_level.py index 3e37ac83d6f..fa247a520be 100644 --- a/mmdet/models/roi_extractors/single_level.py +++ b/mmdet/models/roi_extractors/single_level.py @@ -4,6 +4,7 @@ import torch.nn as nn from mmdet import ops +from mmdet.core import bbox_assign, bbox_sampling class SingleLevelRoI(nn.Module): @@ -51,6 +52,36 @@ def map_roi_levels(self, rois, num_levels): target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long() return target_lvls + def sample_proposals(self, proposals, gt_bboxes, gt_crowds, gt_labels, + cfg): + proposals = proposals[:, :4] + assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps = \ + bbox_assign(proposals, gt_bboxes, gt_crowds, gt_labels, + cfg.pos_iou_thr, cfg.neg_iou_thr, cfg.pos_iou_thr, cfg.crowd_thr) + + if cfg.add_gt_as_proposals: + proposals = torch.cat([gt_bboxes, proposals], dim=0) + gt_assign_self = torch.arange( + 1, + len(gt_labels) + 1, + dtype=torch.long, + device=proposals.device) + assigned_gt_inds = torch.cat([gt_assign_self, assigned_gt_inds]) + assigned_labels = torch.cat([gt_labels, assigned_labels]) + + pos_inds, neg_inds = bbox_sampling( + assigned_gt_inds, cfg.roi_batch_size, cfg.pos_fraction, + cfg.neg_pos_ub, cfg.pos_balance_sampling, max_overlaps, + cfg.neg_balance_thr) + + pos_proposals = proposals[pos_inds] + neg_proposals = proposals[neg_inds] + pos_assigned_gt_inds = assigned_gt_inds[pos_inds] - 1 + pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :] + pos_gt_labels = assigned_labels[pos_inds] + + return (pos_proposals, neg_proposals, pos_assigned_gt_inds, pos_gt_bboxes, pos_gt_labels) + def forward(self, feats, rois): """Extract roi features with the roi layer. If multiple feature levels are used, then rois are mapped to corresponding levels according to From 2c293e1728c5848e1ab73761777184472052ccd6 Mon Sep 17 00:00:00 2001 From: pangjm Date: Tue, 25 Sep 2018 23:36:21 +0800 Subject: [PATCH 18/81] add simple & aug test for FasterRCNN & Mask RCNN --- mmdet/core/bbox_ops/sampling.py | 35 ---------------- mmdet/core/post_processing/merge_augs.py | 10 ++--- mmdet/core/utils/__init__.py | 15 +++++-- mmdet/core/utils/dist_utils.py | 4 -- mmdet/core/utils/hooks.py | 5 --- mmdet/core/utils/misc.py | 33 +++++++-------- mmdet/models/bbox_heads/bbox_head.py | 2 +- mmdet/models/detectors/rpn.py | 2 +- .../{testing_mixins.py => test_mixins.py} | 40 ++++++++----------- tools/configs/r50_fpn_frcnn_1x.py | 16 ++++++-- tools/configs/r50_fpn_maskrcnn_1x.py | 18 ++++++--- 11 files changed, 76 insertions(+), 104 deletions(-) rename mmdet/models/detectors/{testing_mixins.py => test_mixins.py} (82%) diff --git a/mmdet/core/bbox_ops/sampling.py b/mmdet/core/bbox_ops/sampling.py index d751f8ede43..bcee761e10e 100644 --- a/mmdet/core/bbox_ops/sampling.py +++ b/mmdet/core/bbox_ops/sampling.py @@ -255,38 +255,3 @@ def bbox_sampling(assigned_gt_inds, neg_hard_fraction) neg_inds = neg_inds.unique() return pos_inds, neg_inds - - -def sample_proposals(proposals_list, gt_bboxes_list, gt_crowds_list, - gt_labels_list, cfg): - cfg_list = [cfg for _ in range(len(proposals_list))] - results = map(sample_proposals_single, proposals_list, gt_bboxes_list, - gt_crowds_list, gt_labels_list, cfg_list) - # list of tuple to tuple of list - return tuple(map(list, zip(*results))) - - -def sample_proposals_single(proposals, gt_bboxes, gt_crowds, gt_labels, cfg): - proposals = proposals[:, :4] - assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps = \ - bbox_assign( - proposals, gt_bboxes, gt_crowds, gt_labels, cfg.pos_iou_thr, - cfg.neg_iou_thr, cfg.pos_iou_thr, cfg.crowd_thr) - if cfg.add_gt_as_proposals: - proposals = torch.cat([gt_bboxes, proposals], dim=0) - gt_assign_self = torch.arange( - 1, len(gt_labels) + 1, dtype=torch.long, device=proposals.device) - assigned_gt_inds = torch.cat([gt_assign_self, assigned_gt_inds]) - assigned_labels = torch.cat([gt_labels, assigned_labels]) - - pos_inds, neg_inds = bbox_sampling( - assigned_gt_inds, cfg.roi_batch_size, cfg.pos_fraction, cfg.neg_pos_ub, - cfg.pos_balance_sampling, max_overlaps, cfg.neg_balance_thr) - pos_proposals = proposals[pos_inds] - neg_proposals = proposals[neg_inds] - pos_assigned_gt_inds = assigned_gt_inds[pos_inds] - 1 - pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :] - pos_gt_labels = assigned_labels[pos_inds] - - return (pos_inds, neg_inds, pos_proposals, neg_proposals, - pos_assigned_gt_inds, pos_gt_bboxes, pos_gt_labels) diff --git a/mmdet/core/post_processing/merge_augs.py b/mmdet/core/post_processing/merge_augs.py index 0472aaf80fd..2b8d861a674 100644 --- a/mmdet/core/post_processing/merge_augs.py +++ b/mmdet/core/post_processing/merge_augs.py @@ -54,9 +54,9 @@ def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg): """ recovered_bboxes = [] for bboxes, img_info in zip(aug_bboxes, img_metas): - img_shape = img_info['img_shape'] - scale_factor = img_info['scale_factor'] - flip = img_info['flip'] + img_shape = img_info[0]['img_shape'] + scale_factor = img_info[0]['scale_factor'] + flip = img_info[0]['flip'] bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip) recovered_bboxes.append(bboxes) bboxes = torch.stack(recovered_bboxes).mean(dim=0) @@ -75,7 +75,7 @@ def merge_aug_scores(aug_scores): return np.mean(aug_scores, axis=0) -def merge_aug_masks(aug_masks, bboxes, img_metas, rcnn_test_cfg, weights=None): +def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None): """Merge augmented mask prediction. Args: @@ -87,7 +87,7 @@ def merge_aug_masks(aug_masks, bboxes, img_metas, rcnn_test_cfg, weights=None): tuple: (bboxes, scores) """ recovered_masks = [ - mask if not img_info['flip'][0] else mask[..., ::-1] + mask if not img_info[0]['flip'] else mask[..., ::-1] for mask, img_info in zip(aug_masks, img_metas) ] if weights is None: diff --git a/mmdet/core/utils/__init__.py b/mmdet/core/utils/__init__.py index 2b6e79d62e6..30c9c9e5c83 100644 --- a/mmdet/core/utils/__init__.py +++ b/mmdet/core/utils/__init__.py @@ -1,3 +1,12 @@ -from .dist_utils import * -from .hooks import * -from .misc import * +from .dist_utils import (init_dist, reduce_grads, DistOptimizerHook, + DistSamplerSeedHook) +from .hooks import (EmptyCacheHook, DistEvalHook, DistEvalRecallHook, + CocoDistEvalmAPHook) +from .misc import tensor2imgs, unmap, results2json, multi_apply + +__all__ = [ + 'init_dist', 'reduce_grads', 'DistOptimizerHook', 'DistSamplerSeedHook', + 'EmptyCacheHook', 'DistEvalHook', 'DistEvalRecallHook', + 'CocoDistEvalmAPHook', 'tensor2imgs', 'unmap', 'results2json', + 'multi_apply' +] diff --git a/mmdet/core/utils/dist_utils.py b/mmdet/core/utils/dist_utils.py index 7ffa7a09348..4bc986ca73f 100644 --- a/mmdet/core/utils/dist_utils.py +++ b/mmdet/core/utils/dist_utils.py @@ -8,10 +8,6 @@ from torch.nn.utils import clip_grad from mmcv.torchpack import Hook, OptimizerHook -__all__ = [ - 'init_dist', 'reduce_grads', 'DistOptimizerHook', 'DistSamplerSeedHook' -] - def init_dist(launcher, backend='nccl', **kwargs): if mp.get_start_method(allow_none=True) is None: diff --git a/mmdet/core/utils/hooks.py b/mmdet/core/utils/hooks.py index 8a52d11ba41..05441601ba7 100644 --- a/mmdet/core/utils/hooks.py +++ b/mmdet/core/utils/hooks.py @@ -13,11 +13,6 @@ from ..eval import eval_recalls -__all__ = [ - 'EmptyCacheHook', 'DistEvalHook', 'DistEvalRecallHook', - 'CocoDistEvalmAPHook' -] - class EmptyCacheHook(Hook): diff --git a/mmdet/core/utils/misc.py b/mmdet/core/utils/misc.py index 02d0b40c1e7..d34ff94302c 100644 --- a/mmdet/core/utils/misc.py +++ b/mmdet/core/utils/misc.py @@ -4,9 +4,6 @@ import numpy as np from six.moves import map, zip -__all__ = ['tensor2imgs', 'multi_apply', 'unmap', 'results2json'] - - def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): num_imgs = tensor.size(0) mean = np.array(mean, dtype=np.float32) @@ -48,6 +45,21 @@ def xyxy2xywh(bbox): ] +def proposal2json(dataset, results): + json_results = [] + for idx in range(len(dataset)): + img_id = dataset.img_ids[idx] + bboxes = results[idx] + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['bbox'] = xyxy2xywh(bboxes[i]) + data['score'] = float(bboxes[i][4]) + data['category_id'] = 1 + json_results.append(data) + return json_results + + def det2json(dataset, results): json_results = [] for idx in range(len(dataset)): @@ -85,21 +97,6 @@ def segm2json(dataset, results): return json_results -def proposal2json(dataset, results): - json_results = [] - for idx in range(len(dataset)): - img_id = dataset.img_ids[idx] - bboxes = results[idx] - for i in range(bboxes.shape[0]): - data = dict() - data['image_id'] = img_id - data['bbox'] = xyxy2xywh(bboxes[i]) - data['score'] = float(bboxes[i][4]) - data['category_id'] = 1 - json_results.append(data) - return json_results - - def results2json(dataset, results, out_file): if isinstance(results[0], list): json_results = det2json(dataset, results) diff --git a/mmdet/models/bbox_heads/bbox_head.py b/mmdet/models/bbox_heads/bbox_head.py index da923ecf2d0..941903aba54 100644 --- a/mmdet/models/bbox_heads/bbox_head.py +++ b/mmdet/models/bbox_heads/bbox_head.py @@ -109,7 +109,7 @@ def get_det_bboxes(self, # TODO: add clip here if rescale: - bboxes /= scale_factor.float() + bboxes /= scale_factor if nms_cfg is None: return bboxes, scores diff --git a/mmdet/models/detectors/rpn.py b/mmdet/models/detectors/rpn.py index 8d3dfd17c6c..29173cce7a7 100644 --- a/mmdet/models/detectors/rpn.py +++ b/mmdet/models/detectors/rpn.py @@ -2,7 +2,7 @@ from mmdet.core import tensor2imgs, bbox_mapping from .base import BaseDetector -from .testing_mixins import RPNTestMixin +from .test_mixins import RPNTestMixin from .. import builder diff --git a/mmdet/models/detectors/testing_mixins.py b/mmdet/models/detectors/test_mixins.py similarity index 82% rename from mmdet/models/detectors/testing_mixins.py rename to mmdet/models/detectors/test_mixins.py index 364fd4e6d1a..2fd3b18d093 100644 --- a/mmdet/models/detectors/testing_mixins.py +++ b/mmdet/models/detectors/test_mixins.py @@ -50,7 +50,7 @@ def simple_test_bboxes(self, nms_cfg=rcnn_test_cfg) return det_bboxes, det_labels - def aug_test_bboxes(self, feats, img_metas, proposals, rcnn_test_cfg): + def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg): aug_bboxes = [] aug_scores = [] for x, img_meta in zip(feats, img_metas): @@ -58,8 +58,9 @@ def aug_test_bboxes(self, feats, img_metas, proposals, rcnn_test_cfg): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] - proposals = bbox_mapping(proposals[:, :4], img_shape, scale_factor, - flip) + # TODO more flexible + proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, + scale_factor, flip) rois = bbox2roi([proposals]) # recompute feature maps to save GPU memory roi_feats = self.bbox_roi_extractor( @@ -70,16 +71,17 @@ def aug_test_bboxes(self, feats, img_metas, proposals, rcnn_test_cfg): cls_score, bbox_pred, img_shape, + scale_factor, rescale=False, nms_cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( - aug_bboxes, aug_scores, img_metas, self.rcnn_test_cfg) + aug_bboxes, aug_scores, img_metas, self.test_cfg.rcnn) det_bboxes, det_labels = multiclass_nms( - merged_bboxes, merged_scores, self.rcnn_test_cfg.score_thr, - self.rcnn_test_cfg.nms_thr, self.rcnn_test_cfg.max_per_img) + merged_bboxes, merged_scores, self.test_cfg.rcnn.score_thr, + self.test_cfg.rcnn.nms_thr, self.test_cfg.rcnn.max_per_img) return det_bboxes, det_labels @@ -92,7 +94,7 @@ def simple_test_mask(self, det_labels, rescale=False): # image shape of the first image in the batch (only one) - img_shape = img_meta[0]['img_shape'] + ori_shape = img_meta[0]['ori_shape'] scale_factor = img_meta[0]['scale_factor'] if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] @@ -106,21 +108,11 @@ def simple_test_mask(self, x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois) mask_pred = self.mask_head(mask_feats) segm_result = self.mask_head.get_seg_masks( - mask_pred, det_bboxes, det_labels, img_shape, - self.rcnn_test_cfg, rescale) + mask_pred, det_bboxes, det_labels, self.test_cfg.rcnn, + ori_shape) return segm_result - def aug_test_mask(self, - feats, - img_metas, - det_bboxes, - det_labels, - rescale=False): - if rescale: - _det_bboxes = det_bboxes - else: - _det_bboxes = det_bboxes.clone() - _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor'] + def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels): if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] else: @@ -139,8 +131,10 @@ def aug_test_mask(self, # convert to numpy array to save memory aug_masks.append(mask_pred.sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, img_metas, - self.rcnn_test_cfg) + self.test_cfg.rcnn) + + ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head.get_seg_masks( - merged_masks, _det_bboxes, det_labels, - img_metas[0]['shape_scale'][0], self.rcnn_test_cfg, rescale) + merged_masks, det_bboxes, det_labels, self.test_cfg.rcnn, + ori_shape) return segm_result diff --git a/tools/configs/r50_fpn_frcnn_1x.py b/tools/configs/r50_fpn_frcnn_1x.py index 4ce93e623e3..156b8b2aa4e 100644 --- a/tools/configs/r50_fpn_frcnn_1x.py +++ b/tools/configs/r50_fpn_frcnn_1x.py @@ -90,7 +90,11 @@ img_scale=(1333, 800), img_norm_cfg=img_norm_cfg, size_divisor=32, - flip_ratio=0.5), + flip_ratio=0.5, + with_mask=False, + with_crowd=True, + with_label=True, + test_mode=False), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', @@ -98,7 +102,10 @@ img_scale=(1333, 800), flip_ratio=0, img_norm_cfg=img_norm_cfg, - size_divisor=32)) + size_divisor=32, + with_mask=False, + with_label=False, + test_mode=True)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) @@ -112,7 +119,7 @@ checkpoint_config = dict(interval=1) # yapf:disable log_config = dict( - interval=50, + interval=20, hooks=[ dict(type='TextLoggerHook'), # dict(type='TensorboardLoggerHook', log_dir=work_dir + '/log') @@ -120,7 +127,8 @@ # yapf:enable # runtime settings total_epochs = 12 -dist_params = dict(backend='nccl') +device_ids = range(8) +dist_params = dict(backend='nccl', port='29500') log_level = 'INFO' work_dir = './work_dirs/fpn_faster_rcnn_r50_1x' load_from = None diff --git a/tools/configs/r50_fpn_maskrcnn_1x.py b/tools/configs/r50_fpn_maskrcnn_1x.py index 931f051b356..5697bca4a58 100644 --- a/tools/configs/r50_fpn_maskrcnn_1x.py +++ b/tools/configs/r50_fpn_maskrcnn_1x.py @@ -103,7 +103,11 @@ img_scale=(1333, 800), img_norm_cfg=img_norm_cfg, size_divisor=32, - flip_ratio=0.5), + flip_ratio=0.5, + with_mask=True, + with_crowd=True, + with_label=True, + test_mode=False), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', @@ -111,7 +115,10 @@ img_scale=(1333, 800), flip_ratio=0, img_norm_cfg=img_norm_cfg, - size_divisor=32)) + size_divisor=32, + with_mask=False, + with_label=False, + test_mode=True)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) @@ -120,12 +127,12 @@ policy='step', warmup='linear', warmup_iters=500, - warmup_ratio=0.333, + warmup_ratio=1.0 / 3, step=[8, 11]) checkpoint_config = dict(interval=1) # yapf:disable log_config = dict( - interval=50, + interval=20, hooks=[ dict(type='TextLoggerHook'), # ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')), @@ -133,7 +140,8 @@ # yapf:enable # runtime settings total_epochs = 12 -dist_params = dict(backend='nccl') +device_ids = range(8) +dist_params = dict(backend='nccl', port='29500') log_level = 'INFO' work_dir = './work_dirs/fpn_mask_rcnn_r50_1x' load_from = None From 3fd28528014606363559260afd787592fabb5b6e Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Wed, 26 Sep 2018 17:04:37 +0800 Subject: [PATCH 19/81] mmdet.nn.parallel -> mmdet.core.parallel --- mmdet/core/__init__.py | 1 + mmdet/{nn => core}/parallel/__init__.py | 0 mmdet/{nn => core}/parallel/_functions.py | 0 mmdet/{nn => core}/parallel/data_parallel.py | 0 mmdet/{nn => core}/parallel/distributed.py | 0 mmdet/{nn => core}/parallel/scatter_gather.py | 3 ++- mmdet/core/utils/hooks.py | 4 ++-- mmdet/nn/__init__.py | 1 - tools/test.py | 3 +-- tools/train.py | 4 ++-- 10 files changed, 8 insertions(+), 8 deletions(-) rename mmdet/{nn => core}/parallel/__init__.py (100%) rename mmdet/{nn => core}/parallel/_functions.py (100%) rename mmdet/{nn => core}/parallel/data_parallel.py (100%) rename mmdet/{nn => core}/parallel/distributed.py (100%) rename mmdet/{nn => core}/parallel/scatter_gather.py (99%) delete mode 100644 mmdet/nn/__init__.py diff --git a/mmdet/core/__init__.py b/mmdet/core/__init__.py index 0327750379f..1eb03f76acd 100644 --- a/mmdet/core/__init__.py +++ b/mmdet/core/__init__.py @@ -3,5 +3,6 @@ from .mask_ops import * from .losses import * from .eval import * +from .parallel import * from .post_processing import * from .utils import * diff --git a/mmdet/nn/parallel/__init__.py b/mmdet/core/parallel/__init__.py similarity index 100% rename from mmdet/nn/parallel/__init__.py rename to mmdet/core/parallel/__init__.py diff --git a/mmdet/nn/parallel/_functions.py b/mmdet/core/parallel/_functions.py similarity index 100% rename from mmdet/nn/parallel/_functions.py rename to mmdet/core/parallel/_functions.py diff --git a/mmdet/nn/parallel/data_parallel.py b/mmdet/core/parallel/data_parallel.py similarity index 100% rename from mmdet/nn/parallel/data_parallel.py rename to mmdet/core/parallel/data_parallel.py diff --git a/mmdet/nn/parallel/distributed.py b/mmdet/core/parallel/distributed.py similarity index 100% rename from mmdet/nn/parallel/distributed.py rename to mmdet/core/parallel/distributed.py diff --git a/mmdet/nn/parallel/scatter_gather.py b/mmdet/core/parallel/scatter_gather.py similarity index 99% rename from mmdet/nn/parallel/scatter_gather.py rename to mmdet/core/parallel/scatter_gather.py index f5f7c588f4b..02849dc01bc 100644 --- a/mmdet/nn/parallel/scatter_gather.py +++ b/mmdet/core/parallel/scatter_gather.py @@ -1,6 +1,7 @@ import torch -from ._functions import Scatter from torch.nn.parallel._functions import Scatter as OrigScatter + +from ._functions import Scatter from mmdet.datasets.utils import DataContainer diff --git a/mmdet/core/utils/hooks.py b/mmdet/core/utils/hooks.py index 05441601ba7..9772d4d64f1 100644 --- a/mmdet/core/utils/hooks.py +++ b/mmdet/core/utils/hooks.py @@ -7,11 +7,11 @@ import numpy as np import torch from mmcv.torchpack import Hook -from mmdet.datasets.loader import collate -from mmdet.nn.parallel import scatter from pycocotools.cocoeval import COCOeval from ..eval import eval_recalls +from ..parallel import scatter +from mmdet.datasets.loader import collate class EmptyCacheHook(Hook): diff --git a/mmdet/nn/__init__.py b/mmdet/nn/__init__.py deleted file mode 100644 index 1b627f5e7b8..00000000000 --- a/mmdet/nn/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .parallel import MMDataParallel, MMDistributedDataParallel diff --git a/tools/test.py b/tools/test.py index 773136d4c8a..0a43cdc3165 100644 --- a/tools/test.py +++ b/tools/test.py @@ -5,10 +5,9 @@ from mmcv.torchpack import load_checkpoint, parallel_test, obj_from_dict from mmdet import datasets -from mmdet.core import results2json, coco_eval +from mmdet.core import scatter, MMDataParallel, results2json, coco_eval from mmdet.datasets.loader import collate, build_dataloader from mmdet.models import build_detector, detectors -from mmdet.nn.parallel import scatter, MMDataParallel def single_test(model, data_loader, show=False): diff --git a/tools/train.py b/tools/train.py index 8fd43807967..fd47b137562 100644 --- a/tools/train.py +++ b/tools/train.py @@ -8,10 +8,10 @@ from mmcv.torchpack import Runner, obj_from_dict from mmdet import datasets -from mmdet.core import init_dist, DistOptimizerHook, DistSamplerSeedHook +from mmdet.core import (init_dist, DistOptimizerHook, DistSamplerSeedHook, + MMDataParallel, MMDistributedDataParallel) from mmdet.datasets.loader import build_dataloader from mmdet.models import build_detector -from mmdet.nn.parallel import MMDataParallel, MMDistributedDataParallel def parse_losses(losses): From 1d2d355517ac226269359d42a1d4b6c354ef9f13 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Wed, 26 Sep 2018 21:13:46 +0800 Subject: [PATCH 20/81] fix eval hooks --- mmdet/core/eval/__init__.py | 11 +- mmdet/core/eval/coco_utils.py | 75 +++++++++ mmdet/core/eval/eval_hooks.py | 168 ++++++++++++++++++++ mmdet/core/utils/__init__.py | 9 +- mmdet/core/utils/dist_utils.py | 4 +- mmdet/core/utils/hooks.py | 229 --------------------------- mmdet/core/utils/misc.py | 75 +-------- tools/configs/r50_fpn_frcnn_1x.py | 18 ++- tools/configs/r50_fpn_maskrcnn_1x.py | 18 ++- tools/configs/r50_fpn_rpn_1x.py | 20 ++- tools/dist_train.sh | 2 +- tools/test.py | 2 +- tools/train.py | 10 +- 13 files changed, 309 insertions(+), 332 deletions(-) create mode 100644 mmdet/core/eval/eval_hooks.py diff --git a/mmdet/core/eval/__init__.py b/mmdet/core/eval/__init__.py index c46d860d4b1..b5df6595a0e 100644 --- a/mmdet/core/eval/__init__.py +++ b/mmdet/core/eval/__init__.py @@ -1,14 +1,17 @@ from .class_names import (voc_classes, imagenet_det_classes, imagenet_vid_classes, coco_classes, dataset_aliases, get_classes) -from .coco_utils import coco_eval +from .coco_utils import coco_eval, results2json +from .eval_hooks import DistEvalHook, DistEvalRecallHook, CocoDistEvalmAPHook from .mean_ap import average_precision, eval_map, print_map_summary from .recall import (eval_recalls, print_recall_summary, plot_num_recall, plot_iou_recall) __all__ = [ 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', - 'coco_classes', 'dataset_aliases', 'get_classes', 'average_precision', - 'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary', - 'plot_num_recall', 'plot_iou_recall', 'coco_eval' + 'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval', + 'results2json', 'DistEvalHook', 'DistEvalRecallHook', + 'CocoDistEvalmAPHook', 'average_precision', 'eval_map', + 'print_map_summary', 'eval_recalls', 'print_recall_summary', + 'plot_num_recall', 'plot_iou_recall' ] diff --git a/mmdet/core/eval/coco_utils.py b/mmdet/core/eval/coco_utils.py index cff6f678e4f..719e70a75e0 100644 --- a/mmdet/core/eval/coco_utils.py +++ b/mmdet/core/eval/coco_utils.py @@ -1,4 +1,5 @@ import mmcv +import numpy as np from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval @@ -24,3 +25,77 @@ def coco_eval(result_file, result_types, coco, max_dets=(100, 300, 1000)): cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() + + +def xyxy2xywh(bbox): + _bbox = bbox.tolist() + return [ + _bbox[0], + _bbox[1], + _bbox[2] - _bbox[0] + 1, + _bbox[3] - _bbox[1] + 1, + ] + + +def proposal2json(dataset, results): + json_results = [] + for idx in range(len(dataset)): + img_id = dataset.img_ids[idx] + bboxes = results[idx] + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['bbox'] = xyxy2xywh(bboxes[i]) + data['score'] = float(bboxes[i][4]) + data['category_id'] = 1 + json_results.append(data) + return json_results + + +def det2json(dataset, results): + json_results = [] + for idx in range(len(dataset)): + img_id = dataset.img_ids[idx] + result = results[idx] + for label in range(len(result)): + bboxes = result[label] + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['bbox'] = xyxy2xywh(bboxes[i]) + data['score'] = float(bboxes[i][4]) + data['category_id'] = dataset.cat_ids[label] + json_results.append(data) + return json_results + + +def segm2json(dataset, results): + json_results = [] + for idx in range(len(dataset)): + img_id = dataset.img_ids[idx] + det, seg = results[idx] + for label in range(len(det)): + bboxes = det[label] + segms = seg[label] + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['bbox'] = xyxy2xywh(bboxes[i]) + data['score'] = float(bboxes[i][4]) + data['category_id'] = dataset.cat_ids[label] + segms[i]['counts'] = segms[i]['counts'].decode() + data['segmentation'] = segms[i] + json_results.append(data) + return json_results + + +def results2json(dataset, results, out_file): + if isinstance(results[0], list): + json_results = det2json(dataset, results) + elif isinstance(results[0], tuple): + json_results = segm2json(dataset, results) + elif isinstance(results[0], np.ndarray): + json_results = proposal2json(dataset, results) + else: + raise TypeError('invalid type of results') + mmcv.dump(json_results, out_file) diff --git a/mmdet/core/eval/eval_hooks.py b/mmdet/core/eval/eval_hooks.py new file mode 100644 index 00000000000..2393449bc68 --- /dev/null +++ b/mmdet/core/eval/eval_hooks.py @@ -0,0 +1,168 @@ +import os +import os.path as osp +import shutil +import time + +import mmcv +import numpy as np +import torch +from mmcv.torchpack import Hook, obj_from_dict +from pycocotools.cocoeval import COCOeval +from torch.utils.data import Dataset + +from .coco_utils import results2json +from .recall import eval_recalls +from ..parallel import scatter +from mmdet import datasets +from mmdet.datasets.loader import collate + + +class DistEvalHook(Hook): + + def __init__(self, dataset, interval=1): + if isinstance(dataset, Dataset): + self.dataset = dataset + elif isinstance(dataset, dict): + self.dataset = obj_from_dict(dataset, datasets, + {'test_mode': True}) + else: + raise TypeError( + 'dataset must be a Dataset object or a dict, not {}'.format( + type(dataset))) + self.interval = interval + self.lock_dir = None + + def _barrier(self, rank, world_size): + """Due to some issues with `torch.distributed.barrier()`, we have to + implement this ugly barrier function. + """ + if rank == 0: + for i in range(1, world_size): + tmp = osp.join(self.lock_dir, '{}.pkl'.format(i)) + while not (osp.exists(tmp)): + time.sleep(1) + for i in range(1, world_size): + tmp = osp.join(self.lock_dir, '{}.pkl'.format(i)) + os.remove(tmp) + else: + tmp = osp.join(self.lock_dir, '{}.pkl'.format(rank)) + mmcv.dump([], tmp) + while osp.exists(tmp): + time.sleep(1) + + def before_run(self, runner): + self.lock_dir = osp.join(runner.work_dir, '.lock_map_hook') + if runner.rank == 0: + if osp.exists(self.lock_dir): + shutil.rmtree(self.lock_dir) + mmcv.mkdir_or_exist(self.lock_dir) + + def after_train_epoch(self, runner): + if not self.every_n_epochs(runner, self.interval): + return + runner.model.eval() + results = [None for _ in range(len(self.dataset))] + prog_bar = mmcv.ProgressBar(len(self.dataset)) + for idx in range(runner.rank, len(self.dataset), runner.world_size): + data = self.dataset[idx] + data_gpu = scatter( + collate([data], samples_per_gpu=1), + [torch.cuda.current_device()])[0] + + # compute output + with torch.no_grad(): + result = runner.model( + **data_gpu, return_loss=False, rescale=True) + results[idx] = result + + batch_size = runner.world_size + for _ in range(batch_size): + prog_bar.update() + + if runner.rank == 0: + print('\n') + self._barrier(runner.rank, runner.world_size) + for i in range(1, runner.world_size): + tmp_file = osp.join(runner.work_dir, 'temp_{}.pkl'.format(i)) + tmp_results = mmcv.load(tmp_file) + for idx in range(i, len(results), runner.world_size): + results[idx] = tmp_results[idx] + os.remove(tmp_file) + self.evaluate(runner, results) + else: + tmp_file = osp.join(runner.work_dir, + 'temp_{}.pkl'.format(runner.rank)) + mmcv.dump(results, tmp_file) + self._barrier(runner.rank, runner.world_size) + self._barrier(runner.rank, runner.world_size) + + def evaluate(self): + raise NotImplementedError + + +class DistEvalRecallHook(DistEvalHook): + + def __init__(self, + dataset, + proposal_nums=(100, 300, 1000), + iou_thrs=np.arange(0.5, 0.96, 0.05)): + super(DistEvalRecallHook, self).__init__(dataset) + self.proposal_nums = np.array(proposal_nums, dtype=np.int32) + self.iou_thrs = np.array(iou_thrs, dtype=np.float32) + + def evaluate(self, runner, results): + # the official coco evaluation is too slow, here we use our own + # implementation instead, which may get slightly different results + gt_bboxes = [] + for i in range(len(self.dataset)): + img_id = self.dataset.img_ids[i] + ann_ids = self.dataset.coco.getAnnIds(imgIds=img_id) + ann_info = self.dataset.coco.loadAnns(ann_ids) + if len(ann_info) == 0: + gt_bboxes.append(np.zeros((0, 4))) + continue + bboxes = [] + for ann in ann_info: + if ann.get('ignore', False) or ann['iscrowd']: + continue + x1, y1, w, h = ann['bbox'] + bboxes.append([x1, y1, x1 + w - 1, y1 + h - 1]) + bboxes = np.array(bboxes, dtype=np.float32) + if bboxes.shape[0] == 0: + bboxes = np.zeros((0, 4)) + gt_bboxes.append(bboxes) + + recalls = eval_recalls( + gt_bboxes, + results, + self.proposal_nums, + self.iou_thrs, + print_summary=False) + ar = recalls.mean(axis=1) + for i, num in enumerate(self.proposal_nums): + runner.log_buffer.output['AR@{}'.format(num)] = ar[i] + runner.log_buffer.ready = True + + +class CocoDistEvalmAPHook(DistEvalHook): + + def evaluate(self, runner, results): + tmp_file = osp.join(runner.work_dir, 'temp_0.json') + results2json(self.dataset, results, tmp_file) + + res_types = ['bbox', + 'segm'] if runner.model.module.with_mask else ['bbox'] + cocoGt = self.dataset.coco + cocoDt = cocoGt.loadRes(tmp_file) + imgIds = cocoGt.getImgIds() + for res_type in res_types: + iou_type = res_type + cocoEval = COCOeval(cocoGt, cocoDt, iou_type) + cocoEval.params.imgIds = imgIds + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + field = '{}_mAP'.format(res_type) + runner.log_buffer.output[field] = cocoEval.stats[0] + runner.log_buffer.ready = True + os.remove(tmp_file) diff --git a/mmdet/core/utils/__init__.py b/mmdet/core/utils/__init__.py index 30c9c9e5c83..e04da6a9a5c 100644 --- a/mmdet/core/utils/__init__.py +++ b/mmdet/core/utils/__init__.py @@ -1,12 +1,9 @@ from .dist_utils import (init_dist, reduce_grads, DistOptimizerHook, DistSamplerSeedHook) -from .hooks import (EmptyCacheHook, DistEvalHook, DistEvalRecallHook, - CocoDistEvalmAPHook) -from .misc import tensor2imgs, unmap, results2json, multi_apply +from .hooks import EmptyCacheHook +from .misc import tensor2imgs, unmap, multi_apply __all__ = [ 'init_dist', 'reduce_grads', 'DistOptimizerHook', 'DistSamplerSeedHook', - 'EmptyCacheHook', 'DistEvalHook', 'DistEvalRecallHook', - 'CocoDistEvalmAPHook', 'tensor2imgs', 'unmap', 'results2json', - 'multi_apply' + 'EmptyCacheHook', 'tensor2imgs', 'unmap', 'multi_apply' ] diff --git a/mmdet/core/utils/dist_utils.py b/mmdet/core/utils/dist_utils.py index 4bc986ca73f..2a5d7659df7 100644 --- a/mmdet/core/utils/dist_utils.py +++ b/mmdet/core/utils/dist_utils.py @@ -39,7 +39,7 @@ def _init_dist_slurm(backend, **kwargs): # modified from https://github.com/NVIDIA/apex/blob/master/apex/parallel/distributed.py#L9 -def coalesce_all_reduce(tensors): +def all_reduce_coalesced(tensors): buckets = OrderedDict() for tensor in tensors: tp = tensor.type() @@ -64,7 +64,7 @@ def reduce_grads(model, coalesce=True): if param.requires_grad and param.grad is not None ] if coalesce: - coalesce_all_reduce(grads) + all_reduce_coalesced(grads) else: for tensor in grads: dist.all_reduce(tensor) diff --git a/mmdet/core/utils/hooks.py b/mmdet/core/utils/hooks.py index 9772d4d64f1..72eb3438efa 100644 --- a/mmdet/core/utils/hooks.py +++ b/mmdet/core/utils/hooks.py @@ -1,17 +1,5 @@ -import os -import os.path as osp -import shutil -import time - -import mmcv -import numpy as np import torch from mmcv.torchpack import Hook -from pycocotools.cocoeval import COCOeval - -from ..eval import eval_recalls -from ..parallel import scatter -from mmdet.datasets.loader import collate class EmptyCacheHook(Hook): @@ -21,220 +9,3 @@ def before_epoch(self, runner): def after_epoch(self, runner): torch.cuda.empty_cache() - - -class DistEvalHook(Hook): - - def __init__(self, dataset, interval=1): - self.dataset = dataset - self.interval = interval - self.lock_dir = None - - def _barrier(self, rank, world_size): - """Due to some issues with `torch.distributed.barrier()`, we have to - implement this ugly barrier function. - """ - if rank == 0: - for i in range(1, world_size): - tmp = osp.join(self.lock_dir, '{}.pkl'.format(i)) - while not (osp.exists(tmp)): - time.sleep(1) - for i in range(1, world_size): - tmp = osp.join(self.lock_dir, '{}.pkl'.format(i)) - os.remove(tmp) - else: - tmp = osp.join(self.lock_dir, '{}.pkl'.format(rank)) - mmcv.dump([], tmp) - while osp.exists(tmp): - time.sleep(1) - - def before_run(self, runner): - self.lock_dir = osp.join(runner.work_dir, '.lock_map_hook') - if runner.rank == 0: - if osp.exists(self.lock_dir): - shutil.rmtree(self.lock_dir) - mmcv.mkdir_or_exist(self.lock_dir) - - def after_train_epoch(self, runner): - if not self.every_n_epochs(runner, self.interval): - return - runner.model.eval() - results = [None for _ in range(len(self.dataset))] - prog_bar = mmcv.ProgressBar(len(self.dataset)) - for idx in range(runner.rank, len(self.dataset), runner.world_size): - data = self.dataset[idx] - device_id = torch.cuda.current_device() - imgs_data = tuple( - scatter(collate([data], samples_per_gpu=1), [device_id])[0]) - - # compute output - with torch.no_grad(): - result = runner.model( - *imgs_data, - return_loss=False, - return_bboxes=True, - rescale=True) - results[idx] = result - - batch_size = runner.world_size - for _ in range(batch_size): - prog_bar.update() - - if runner.rank == 0: - print('\n') - self._barrier(runner.rank, runner.world_size) - for i in range(1, runner.world_size): - tmp_file = osp.join(runner.work_dir, 'temp_{}.pkl'.format(i)) - tmp_results = mmcv.load(tmp_file) - for idx in range(i, len(results), runner.world_size): - results[idx] = tmp_results[idx] - os.remove(tmp_file) - self.evaluate(runner, results) - else: - tmp_file = osp.join(runner.work_dir, - 'temp_{}.pkl'.format(runner.rank)) - mmcv.dump(results, tmp_file) - self._barrier(runner.rank, runner.world_size) - self._barrier(runner.rank, runner.world_size) - - def evaluate(self): - raise NotImplementedError - - -class CocoEvalMixin(object): - - def _xyxy2xywh(self, bbox): - _bbox = bbox.tolist() - return [ - _bbox[0], - _bbox[1], - _bbox[2] - _bbox[0] + 1, - _bbox[3] - _bbox[1] + 1, - ] - - def det2json(self, dataset, results): - json_results = [] - for idx in range(len(dataset)): - img_id = dataset.img_ids[idx] - result = results[idx] - for label in range(len(result)): - bboxes = result[label] - for i in range(bboxes.shape[0]): - data = dict() - data['image_id'] = img_id - data['bbox'] = self._xyxy2xywh(bboxes[i]) - data['score'] = float(bboxes[i][4]) - data['category_id'] = dataset.cat_ids[label] - json_results.append(data) - return json_results - - def segm2json(self, dataset, results): - json_results = [] - for idx in range(len(dataset)): - img_id = dataset.img_ids[idx] - det, seg = results[idx] - for label in range(len(det)): - bboxes = det[label] - segms = seg[label] - for i in range(bboxes.shape[0]): - data = dict() - data['image_id'] = img_id - data['bbox'] = self._xyxy2xywh(bboxes[i]) - data['score'] = float(bboxes[i][4]) - data['category_id'] = dataset.cat_ids[label] - segms[i]['counts'] = segms[i]['counts'].decode() - data['segmentation'] = segms[i] - json_results.append(data) - return json_results - - def proposal2json(self, dataset, results): - json_results = [] - for idx in range(len(dataset)): - img_id = dataset.img_ids[idx] - bboxes = results[idx] - for i in range(bboxes.shape[0]): - data = dict() - data['image_id'] = img_id - data['bbox'] = self._xyxy2xywh(bboxes[i]) - data['score'] = float(bboxes[i][4]) - data['category_id'] = 1 - json_results.append(data) - return json_results - - def results2json(self, dataset, results, out_file): - if isinstance(results[0], list): - json_results = self.det2json(dataset, results) - elif isinstance(results[0], tuple): - json_results = self.segm2json(dataset, results) - elif isinstance(results[0], np.ndarray): - json_results = self.proposal2json(dataset, results) - else: - raise TypeError('invalid type of results') - mmcv.dump(json_results, out_file, file_format='json') - - -class DistEvalRecallHook(DistEvalHook): - - def __init__(self, - dataset, - proposal_nums=(100, 300, 1000), - iou_thrs=np.arange(0.5, 0.96, 0.05)): - super(DistEvalRecallHook, self).__init__(dataset) - self.proposal_nums = np.array(proposal_nums, dtype=np.int32) - self.iou_thrs = np.array(iou_thrs, dtype=np.float32) - - def evaluate(self, runner, results): - # official coco evaluation is too slow, here we use our own - # implementation, which may get slightly different results - gt_bboxes = [] - for i in range(len(self.dataset)): - img_id = self.dataset.img_ids[i] - ann_ids = self.dataset.coco.getAnnIds(imgIds=img_id) - ann_info = self.dataset.coco.loadAnns(ann_ids) - if len(ann_info) == 0: - gt_bboxes.append(np.zeros((0, 4))) - continue - bboxes = [] - for ann in ann_info: - if ann.get('ignore', False) or ann['iscrowd']: - continue - x1, y1, w, h = ann['bbox'] - bboxes.append([x1, y1, x1 + w - 1, y1 + h - 1]) - bboxes = np.array(bboxes, dtype=np.float32) - if bboxes.shape[0] == 0: - bboxes = np.zeros((0, 4)) - gt_bboxes.append(bboxes) - - recalls = eval_recalls( - gt_bboxes, - results, - self.proposal_nums, - self.iou_thrs, - print_summary=False) - ar = recalls.mean(axis=1) - for i, num in enumerate(self.proposal_nums): - runner.log_buffer.output['AR@{}'.format(num)] = ar[i] - runner.log_buffer.ready = True - - -class CocoDistEvalmAPHook(DistEvalHook, CocoEvalMixin): - - def evaluate(self, runner, results): - tmp_file = osp.join(runner.work_dir, 'temp_0.json') - self.results2json(self.dataset, results, tmp_file) - - res_types = ['bbox', 'segm'] if runner.model.with_mask else ['bbox'] - cocoGt = self.dataset.coco - cocoDt = cocoGt.loadRes(tmp_file) - imgIds = cocoGt.getImgIds() - for res_type in res_types: - iou_type = res_type - cocoEval = COCOeval(cocoGt, cocoDt, iou_type) - cocoEval.params.imgIds = imgIds - cocoEval.evaluate() - cocoEval.accumulate() - cocoEval.summarize() - field = '{}_mAP'.format(res_type) - runner.log_buffer.output[field] = cocoEval.stats[0] - runner.log_buffer.ready = True - os.remove(tmp_file) diff --git a/mmdet/core/utils/misc.py b/mmdet/core/utils/misc.py index d34ff94302c..fd8211ef68d 100644 --- a/mmdet/core/utils/misc.py +++ b/mmdet/core/utils/misc.py @@ -4,6 +4,7 @@ import numpy as np from six.moves import map, zip + def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): num_imgs = tensor.size(0) mean = np.array(mean, dtype=np.float32) @@ -33,77 +34,3 @@ def unmap(data, count, inds, fill=0): ret = data.new_full(new_size, fill) ret[inds, :] = data return ret - - -def xyxy2xywh(bbox): - _bbox = bbox.tolist() - return [ - _bbox[0], - _bbox[1], - _bbox[2] - _bbox[0] + 1, - _bbox[3] - _bbox[1] + 1, - ] - - -def proposal2json(dataset, results): - json_results = [] - for idx in range(len(dataset)): - img_id = dataset.img_ids[idx] - bboxes = results[idx] - for i in range(bboxes.shape[0]): - data = dict() - data['image_id'] = img_id - data['bbox'] = xyxy2xywh(bboxes[i]) - data['score'] = float(bboxes[i][4]) - data['category_id'] = 1 - json_results.append(data) - return json_results - - -def det2json(dataset, results): - json_results = [] - for idx in range(len(dataset)): - img_id = dataset.img_ids[idx] - result = results[idx] - for label in range(len(result)): - bboxes = result[label] - for i in range(bboxes.shape[0]): - data = dict() - data['image_id'] = img_id - data['bbox'] = xyxy2xywh(bboxes[i]) - data['score'] = float(bboxes[i][4]) - data['category_id'] = dataset.cat_ids[label] - json_results.append(data) - return json_results - - -def segm2json(dataset, results): - json_results = [] - for idx in range(len(dataset)): - img_id = dataset.img_ids[idx] - det, seg = results[idx] - for label in range(len(det)): - bboxes = det[label] - segms = seg[label] - for i in range(bboxes.shape[0]): - data = dict() - data['image_id'] = img_id - data['bbox'] = xyxy2xywh(bboxes[i]) - data['score'] = float(bboxes[i][4]) - data['category_id'] = dataset.cat_ids[label] - segms[i]['counts'] = segms[i]['counts'].decode() - data['segmentation'] = segms[i] - json_results.append(data) - return json_results - - -def results2json(dataset, results, out_file): - if isinstance(results[0], list): - json_results = det2json(dataset, results) - elif isinstance(results[0], tuple): - json_results = segm2json(dataset, results) - elif isinstance(results[0], np.ndarray): - json_results = proposal2json(dataset, results) - else: - raise TypeError('invalid type of results') - mmcv.dump(json_results, out_file) diff --git a/tools/configs/r50_fpn_frcnn_1x.py b/tools/configs/r50_fpn_frcnn_1x.py index 156b8b2aa4e..23903e084e2 100644 --- a/tools/configs/r50_fpn_frcnn_1x.py +++ b/tools/configs/r50_fpn_frcnn_1x.py @@ -93,16 +93,26 @@ flip_ratio=0.5, with_mask=False, with_crowd=True, - with_label=True, - test_mode=False), - test=dict( + with_label=True), + val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, flip_ratio=0, + with_mask=False, + with_crowd=True, + with_label=True), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), img_norm_cfg=img_norm_cfg, size_divisor=32, + flip_ratio=0, with_mask=False, with_label=False, test_mode=True)) @@ -128,7 +138,7 @@ # runtime settings total_epochs = 12 device_ids = range(8) -dist_params = dict(backend='nccl', port='29500') +dist_params = dict(backend='gloo') log_level = 'INFO' work_dir = './work_dirs/fpn_faster_rcnn_r50_1x' load_from = None diff --git a/tools/configs/r50_fpn_maskrcnn_1x.py b/tools/configs/r50_fpn_maskrcnn_1x.py index 5697bca4a58..41c2a1476dd 100644 --- a/tools/configs/r50_fpn_maskrcnn_1x.py +++ b/tools/configs/r50_fpn_maskrcnn_1x.py @@ -106,16 +106,26 @@ flip_ratio=0.5, with_mask=True, with_crowd=True, - with_label=True, - test_mode=False), - test=dict( + with_label=True), + val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, flip_ratio=0, + with_mask=True, + with_crowd=True, + with_label=True), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), img_norm_cfg=img_norm_cfg, size_divisor=32, + flip_ratio=0, with_mask=False, with_label=False, test_mode=True)) @@ -141,7 +151,7 @@ # runtime settings total_epochs = 12 device_ids = range(8) -dist_params = dict(backend='nccl', port='29500') +dist_params = dict(backend='gloo') log_level = 'INFO' work_dir = './work_dirs/fpn_mask_rcnn_r50_1x' load_from = None diff --git a/tools/configs/r50_fpn_rpn_1x.py b/tools/configs/r50_fpn_rpn_1x.py index a00cab9de80..1f14f72235b 100644 --- a/tools/configs/r50_fpn_rpn_1x.py +++ b/tools/configs/r50_fpn_rpn_1x.py @@ -65,16 +65,26 @@ flip_ratio=0.5, with_mask=False, with_crowd=False, - with_label=False, - test_mode=False), - test=dict( + with_label=False), + val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, flip_ratio=0, + with_mask=False, + with_crowd=False, + with_label=False), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), img_norm_cfg=img_norm_cfg, size_divisor=32, + flip_ratio=0, with_mask=False, with_label=False, test_mode=True)) @@ -103,5 +113,5 @@ log_level = 'INFO' work_dir = './work_dirs/fpn_rpn_r50_1x' load_from = None -resume_from = None -workflow = [('train', 1)] +resume_from = None +workflow = [('train', 1), ('val', 1)] diff --git a/tools/dist_train.sh b/tools/dist_train.sh index 8b79c6158da..7bb903bd3dc 100755 --- a/tools/dist_train.sh +++ b/tools/dist_train.sh @@ -2,4 +2,4 @@ PYTHON=${PYTHON:-"python"} -$PYTHON -m torch.distributed.launch --nproc_per_node=$2 train.py $1 --launcher pytorch \ No newline at end of file +$PYTHON -m torch.distributed.launch --nproc_per_node=$2 train.py $1 --launcher pytorch $3 diff --git a/tools/test.py b/tools/test.py index 0a43cdc3165..4c87f4eeee7 100644 --- a/tools/test.py +++ b/tools/test.py @@ -59,7 +59,7 @@ def main(): cfg.model.pretrained = None cfg.data.test.test_mode = True - dataset = obj_from_dict(cfg.data.test, datasets) + dataset = obj_from_dict(cfg.data.test, datasets, dict(test_mode=True)) if args.gpus == 1: model = build_detector( cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) diff --git a/tools/train.py b/tools/train.py index fd47b137562..6e1b9d91aaf 100644 --- a/tools/train.py +++ b/tools/train.py @@ -9,9 +9,10 @@ from mmdet import datasets from mmdet.core import (init_dist, DistOptimizerHook, DistSamplerSeedHook, - MMDataParallel, MMDistributedDataParallel) + MMDataParallel, MMDistributedDataParallel, + DistEvalRecallHook, CocoDistEvalmAPHook) from mmdet.datasets.loader import build_dataloader -from mmdet.models import build_detector +from mmdet.models import build_detector, RPN def parse_losses(losses): @@ -109,6 +110,11 @@ def main(): cfg.checkpoint_config, cfg.log_config) if dist: runner.register_hook(DistSamplerSeedHook()) + # register eval hooks + if isinstance(model.module, RPN): + runner.register_hook(DistEvalRecallHook(cfg.data.val)) + elif cfg.data.val.type == 'CocoDataset': + runner.register_hook(CocoDistEvalmAPHook(cfg.data.val)) if cfg.resume_from: runner.resume(cfg.resume_from) From 4802ff6ec4e9f37c6c5ca91bbf989e86ba20d2a4 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Wed, 26 Sep 2018 23:18:27 +0800 Subject: [PATCH 21/81] remove comments --- mmdet/core/parallel/scatter_gather.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mmdet/core/parallel/scatter_gather.py b/mmdet/core/parallel/scatter_gather.py index 02849dc01bc..5a7d4c146e0 100644 --- a/mmdet/core/parallel/scatter_gather.py +++ b/mmdet/core/parallel/scatter_gather.py @@ -16,7 +16,6 @@ def scatter_map(obj): if isinstance(obj, torch.Tensor): return OrigScatter.apply(target_gpus, None, dim, obj) if isinstance(obj, DataContainer): - # print('data container', obj) if obj.cpu_only: return obj.data else: @@ -24,14 +23,10 @@ def scatter_map(obj): if isinstance(obj, tuple) and len(obj) > 0: return list(zip(*map(scatter_map, obj))) if isinstance(obj, list) and len(obj) > 0: - # print('list', obj) out = list(map(list, zip(*map(scatter_map, obj)))) - # print('list out', out) return out if isinstance(obj, dict) and len(obj) > 0: - # print('dict\n', obj) out = list(map(type(obj), zip(*map(scatter_map, obj.items())))) - # print('dict output\n', out) return out return [obj for targets in target_gpus] From ace89f7c610894dbdbf5af129e5faee7c5d70552 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Wed, 26 Sep 2018 23:42:48 +0800 Subject: [PATCH 22/81] suppress logging for processes whose rank > 0 --- mmdet/models/backbones/resnet.py | 5 ++++- mmdet/models/detectors/base.py | 10 +++++---- mmdet/models/detectors/rpn.py | 3 +-- mmdet/models/detectors/two_stage.py | 32 ++++++++++++++--------------- tools/configs/r50_fpn_rpn_1x.py | 2 +- tools/dist_train.sh | 2 +- tools/train.py | 21 ++++++++++++++++--- 7 files changed, 46 insertions(+), 29 deletions(-) diff --git a/mmdet/models/backbones/resnet.py b/mmdet/models/backbones/resnet.py index 51bacc49970..e1ff4de45ea 100644 --- a/mmdet/models/backbones/resnet.py +++ b/mmdet/models/backbones/resnet.py @@ -1,4 +1,6 @@ +import logging import math + import torch.nn as nn import torch.utils.checkpoint as cp from mmcv.torchpack import load_checkpoint @@ -241,7 +243,8 @@ def __init__(self, def init_weights(self, pretrained=None): if isinstance(pretrained, str): - load_checkpoint(self, pretrained, strict=False) + logger = logging.getLogger() + load_checkpoint(self, pretrained, strict=False, logger=logger) elif pretrained is None: for m in self.modules(): if isinstance(m, nn.Conv2d): diff --git a/mmdet/models/detectors/base.py b/mmdet/models/detectors/base.py index 494f62208b1..3b2040312ee 100644 --- a/mmdet/models/detectors/base.py +++ b/mmdet/models/detectors/base.py @@ -1,3 +1,4 @@ +import logging from abc import ABCMeta, abstractmethod import torch @@ -12,10 +13,6 @@ class BaseDetector(nn.Module): def __init__(self): super(BaseDetector, self).__init__() - @abstractmethod - def init_weights(self): - pass - @abstractmethod def extract_feat(self, imgs): pass @@ -39,6 +36,11 @@ def simple_test(self, img, img_meta, **kwargs): def aug_test(self, imgs, img_metas, **kwargs): pass + def init_weights(self, pretrained=None): + if pretrained is not None: + logger = logging.getLogger() + logger.info('load model from: {}'.format(pretrained)) + def forward_test(self, imgs, img_metas, **kwargs): for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: if not isinstance(var, list): diff --git a/mmdet/models/detectors/rpn.py b/mmdet/models/detectors/rpn.py index 29173cce7a7..a291006fdd5 100644 --- a/mmdet/models/detectors/rpn.py +++ b/mmdet/models/detectors/rpn.py @@ -24,8 +24,7 @@ def __init__(self, self.init_weights(pretrained=pretrained) def init_weights(self, pretrained=None): - if pretrained is not None: - print('load model from: {}'.format(pretrained)) + super(RPN, self).init_weights(pretrained) self.backbone.init_weights(pretrained=pretrained) if self.neck is not None: self.neck.init_weights() diff --git a/mmdet/models/detectors/two_stage.py b/mmdet/models/detectors/two_stage.py index 6889c8ab138..4bcb1855a95 100644 --- a/mmdet/models/detectors/two_stage.py +++ b/mmdet/models/detectors/two_stage.py @@ -24,10 +24,11 @@ def __init__(self, super(TwoStageDetector, self).__init__() self.backbone = builder.build_backbone(backbone) - self.with_neck = True if neck is not None else False - assert self.with_neck, "TwoStageDetector must be implemented with FPN now." - if self.with_neck: + if neck is not None: + self.with_neck = True self.neck = builder.build_neck(neck) + else: + raise NotImplementedError self.with_rpn = True if rpn_head is not None else False if self.with_rpn: @@ -51,8 +52,7 @@ def __init__(self, self.init_weights(pretrained=pretrained) def init_weights(self, pretrained=None): - if pretrained is not None: - print('load model from: {}'.format(pretrained)) + super(TwoStageDetector, self).init_weights(pretrained) self.backbone.init_weights(pretrained=pretrained) if self.with_neck: if isinstance(self.neck, nn.Sequential): @@ -104,9 +104,10 @@ def forward_train(self, pos_gt_labels) = multi_apply( self.bbox_roi_extractor.sample_proposals, proposal_list, gt_bboxes, gt_bboxes_ignore, gt_labels, rcnn_train_cfg_list) - labels, label_weights, bbox_targets, bbox_weights = \ - self.bbox_head.get_bbox_target(pos_proposals, neg_proposals, - pos_gt_bboxes, pos_gt_labels, self.train_cfg.rcnn) + (labels, label_weights, bbox_targets, + bbox_weights) = self.bbox_head.get_bbox_target( + pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels, + self.train_cfg.rcnn) rois = bbox2roi([ torch.cat([pos, neg], dim=0) @@ -139,7 +140,7 @@ def forward_train(self, def simple_test(self, img, img_meta, proposals=None, rescale=False): """Test without augmentation.""" - assert proposals == None, "Fast RCNN hasn't been implemented." + assert proposals is None, "Fast RCNN hasn't been implemented." assert self.with_bbox, "Bbox head must be implemented." x = self.extract_feat(img) @@ -152,12 +153,12 @@ def simple_test(self, img, img_meta, proposals=None, rescale=False): bbox_results = bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) - if self.with_mask: + if not self.with_mask: + return bbox_results + else: segm_results = self.simple_test_mask( x, img_meta, det_bboxes, det_labels, rescale=rescale) return bbox_results, segm_results - else: - return bbox_results def aug_test(self, imgs, img_metas, rescale=False): """Test with augmentations. @@ -165,7 +166,7 @@ def aug_test(self, imgs, img_metas, rescale=False): If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ - # recompute self.extract_feats(imgs) because of 'yield' and memory + # recompute feats to save memory proposal_list = self.aug_test_rpn( self.extract_feats(imgs), img_metas, self.test_cfg.rpn) det_bboxes, det_labels = self.aug_test_bboxes( @@ -183,10 +184,7 @@ def aug_test(self, imgs, img_metas, rescale=False): # det_bboxes always keep the original scale if self.with_mask: segm_results = self.aug_test_mask( - self.extract_feats(imgs), - img_metas, - det_bboxes, - det_labels) + self.extract_feats(imgs), img_metas, det_bboxes, det_labels) return bbox_results, segm_results else: return bbox_results diff --git a/tools/configs/r50_fpn_rpn_1x.py b/tools/configs/r50_fpn_rpn_1x.py index 1f14f72235b..385b511b9f4 100644 --- a/tools/configs/r50_fpn_rpn_1x.py +++ b/tools/configs/r50_fpn_rpn_1x.py @@ -114,4 +114,4 @@ work_dir = './work_dirs/fpn_rpn_r50_1x' load_from = None resume_from = None -workflow = [('train', 1), ('val', 1)] +workflow = [('train', 1)] diff --git a/tools/dist_train.sh b/tools/dist_train.sh index 7bb903bd3dc..b13cb68a456 100755 --- a/tools/dist_train.sh +++ b/tools/dist_train.sh @@ -2,4 +2,4 @@ PYTHON=${PYTHON:-"python"} -$PYTHON -m torch.distributed.launch --nproc_per_node=$2 train.py $1 --launcher pytorch $3 +$PYTHON -m torch.distributed.launch --nproc_per_node=$2 train.py $1 --launcher pytorch ${@:3} diff --git a/tools/train.py b/tools/train.py index 6e1b9d91aaf..8cf8a2f2082 100644 --- a/tools/train.py +++ b/tools/train.py @@ -1,6 +1,7 @@ from __future__ import division import argparse +import logging from collections import OrderedDict import torch @@ -45,9 +46,17 @@ def batch_processor(model, data, train_mode): return outputs +def get_logger(log_level): + logging.basicConfig( + format='%(asctime)s - %(levelname)s - %(message)s', level=log_level) + logger = logging.getLogger() + return logger + + def parse_args(): parser = argparse.ArgumentParser(description='Train a detector') parser.add_argument('config', help='train config file path') + parser.add_argument('--work_dir', help='the dir to save logs and models') parser.add_argument( '--validate', action='store_true', @@ -69,16 +78,22 @@ def main(): args = parse_args() cfg = Config.fromfile(args.config) - cfg.update(gpus=args.gpus) + if args.work_dir is not None: + cfg.work_dir = args.work_dir + cfg.gpus = args.gpus + + logger = get_logger(cfg.log_level) # init distributed environment if necessary if args.launcher == 'none': dist = False - print('Disabled distributed training.') + logger.info('Disabled distributed training.') else: dist = True - print('Enabled distributed training.') init_dist(args.launcher, **cfg.dist_params) + if torch.distributed.get_rank() != 0: + logger.setLevel('ERROR') + logger.info('Enabled distributed training.') # prepare data loaders train_dataset = obj_from_dict(cfg.data.train, datasets) From 4c1c1a3b7b779d2f2eaa5130ae08c453f19cfc88 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Fri, 28 Sep 2018 00:04:51 +0800 Subject: [PATCH 23/81] add a choice 'proposal_fast' to eval script --- mmdet/core/eval/__init__.py | 11 ++++---- mmdet/core/eval/coco_utils.py | 52 +++++++++++++++++++++++++++++++++-- mmdet/core/eval/eval_hooks.py | 34 ++++------------------- tools/coco_eval.py | 9 ++++-- tools/train.py | 4 +-- 5 files changed, 70 insertions(+), 40 deletions(-) diff --git a/mmdet/core/eval/__init__.py b/mmdet/core/eval/__init__.py index b5df6595a0e..026234fce31 100644 --- a/mmdet/core/eval/__init__.py +++ b/mmdet/core/eval/__init__.py @@ -1,8 +1,9 @@ from .class_names import (voc_classes, imagenet_det_classes, imagenet_vid_classes, coco_classes, dataset_aliases, get_classes) -from .coco_utils import coco_eval, results2json -from .eval_hooks import DistEvalHook, DistEvalRecallHook, CocoDistEvalmAPHook +from .coco_utils import coco_eval, fast_eval_recall, results2json +from .eval_hooks import (DistEvalHook, CocoDistEvalRecallHook, + CocoDistEvalmAPHook) from .mean_ap import average_precision, eval_map, print_map_summary from .recall import (eval_recalls, print_recall_summary, plot_num_recall, plot_iou_recall) @@ -10,8 +11,8 @@ __all__ = [ 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', 'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval', - 'results2json', 'DistEvalHook', 'DistEvalRecallHook', - 'CocoDistEvalmAPHook', 'average_precision', 'eval_map', - 'print_map_summary', 'eval_recalls', 'print_recall_summary', + 'fast_eval_recall', 'results2json', 'DistEvalHook', + 'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision', + 'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary', 'plot_num_recall', 'plot_iou_recall' ] diff --git a/mmdet/core/eval/coco_utils.py b/mmdet/core/eval/coco_utils.py index 719e70a75e0..e9fdb41649c 100644 --- a/mmdet/core/eval/coco_utils.py +++ b/mmdet/core/eval/coco_utils.py @@ -3,17 +3,28 @@ from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval +from .recall import eval_recalls + def coco_eval(result_file, result_types, coco, max_dets=(100, 300, 1000)): - assert result_file.endswith('.json') for res_type in result_types: - assert res_type in ['proposal', 'bbox', 'segm', 'keypoints'] + assert res_type in [ + 'proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints' + ] if mmcv.is_str(coco): coco = COCO(coco) assert isinstance(coco, COCO) + if res_type == 'proposal_fast': + ar = fast_eval_recall(result_file, coco, max_dets) + for i, num in enumerate(max_dets): + print('AR@{}\t= {:.4f}'.format(num, ar[i])) + return + + assert result_file.endswith('.json') coco_dets = coco.loadRes(result_file) + img_ids = coco.getImgIds() for res_type in result_types: iou_type = 'bbox' if res_type == 'proposal' else res_type @@ -27,6 +38,43 @@ def coco_eval(result_file, result_types, coco, max_dets=(100, 300, 1000)): cocoEval.summarize() +def fast_eval_recall(results, + coco, + max_dets, + iou_thrs=np.arange(0.5, 0.96, 0.05)): + if mmcv.is_str(results): + assert results.endswith('.pkl') + results = mmcv.load(results) + elif not isinstance(results, list): + raise TypeError( + 'results must be a list of numpy arrays or a filename, not {}'. + format(type(results))) + + gt_bboxes = [] + img_ids = coco.getImgIds() + for i in range(len(img_ids)): + ann_ids = coco.getAnnIds(imgIds=img_ids[i]) + ann_info = coco.loadAnns(ann_ids) + if len(ann_info) == 0: + gt_bboxes.append(np.zeros((0, 4))) + continue + bboxes = [] + for ann in ann_info: + if ann.get('ignore', False) or ann['iscrowd']: + continue + x1, y1, w, h = ann['bbox'] + bboxes.append([x1, y1, x1 + w - 1, y1 + h - 1]) + bboxes = np.array(bboxes, dtype=np.float32) + if bboxes.shape[0] == 0: + bboxes = np.zeros((0, 4)) + gt_bboxes.append(bboxes) + + recalls = eval_recalls( + gt_bboxes, results, max_dets, iou_thrs, print_summary=False) + ar = recalls.mean(axis=1) + return ar + + def xyxy2xywh(bbox): _bbox = bbox.tolist() return [ diff --git a/mmdet/core/eval/eval_hooks.py b/mmdet/core/eval/eval_hooks.py index 2393449bc68..3439ee0f37f 100644 --- a/mmdet/core/eval/eval_hooks.py +++ b/mmdet/core/eval/eval_hooks.py @@ -10,7 +10,7 @@ from pycocotools.cocoeval import COCOeval from torch.utils.data import Dataset -from .coco_utils import results2json +from .coco_utils import results2json, fast_eval_recall from .recall import eval_recalls from ..parallel import scatter from mmdet import datasets @@ -100,45 +100,21 @@ def evaluate(self): raise NotImplementedError -class DistEvalRecallHook(DistEvalHook): +class CocoDistEvalRecallHook(DistEvalHook): def __init__(self, dataset, proposal_nums=(100, 300, 1000), iou_thrs=np.arange(0.5, 0.96, 0.05)): - super(DistEvalRecallHook, self).__init__(dataset) + super(CocoDistEvalRecallHook, self).__init__(dataset) self.proposal_nums = np.array(proposal_nums, dtype=np.int32) self.iou_thrs = np.array(iou_thrs, dtype=np.float32) def evaluate(self, runner, results): # the official coco evaluation is too slow, here we use our own # implementation instead, which may get slightly different results - gt_bboxes = [] - for i in range(len(self.dataset)): - img_id = self.dataset.img_ids[i] - ann_ids = self.dataset.coco.getAnnIds(imgIds=img_id) - ann_info = self.dataset.coco.loadAnns(ann_ids) - if len(ann_info) == 0: - gt_bboxes.append(np.zeros((0, 4))) - continue - bboxes = [] - for ann in ann_info: - if ann.get('ignore', False) or ann['iscrowd']: - continue - x1, y1, w, h = ann['bbox'] - bboxes.append([x1, y1, x1 + w - 1, y1 + h - 1]) - bboxes = np.array(bboxes, dtype=np.float32) - if bboxes.shape[0] == 0: - bboxes = np.zeros((0, 4)) - gt_bboxes.append(bboxes) - - recalls = eval_recalls( - gt_bboxes, - results, - self.proposal_nums, - self.iou_thrs, - print_summary=False) - ar = recalls.mean(axis=1) + ar = fast_eval_recall(results, self.dataset.coco, self.proposal_nums, + self.iou_thrs) for i, num in enumerate(self.proposal_nums): runner.log_buffer.output['AR@{}'.format(num)] = ar[i] runner.log_buffer.ready = True diff --git a/tools/coco_eval.py b/tools/coco_eval.py index 93554be29a2..65e114ca280 100644 --- a/tools/coco_eval.py +++ b/tools/coco_eval.py @@ -8,13 +8,18 @@ def main(): parser.add_argument('result', help='result file path') parser.add_argument('--ann', help='annotation file path') parser.add_argument( - '--types', type=str, nargs='+', default=['bbox'], help='result types') + '--types', + type=str, + nargs='+', + choices=['proposal_fast', 'proposal', 'bbox', 'segm', 'keypoint'], + default=['bbox'], + help='result types') parser.add_argument( '--max-dets', type=int, nargs='+', default=[100, 300, 1000], - help='result types') + help='proposal numbers, only used for recall evaluation') args = parser.parse_args() coco_eval(args.result, args.types, args.ann, args.max_dets) diff --git a/tools/train.py b/tools/train.py index 8cf8a2f2082..f596f5693d2 100644 --- a/tools/train.py +++ b/tools/train.py @@ -11,7 +11,7 @@ from mmdet import datasets from mmdet.core import (init_dist, DistOptimizerHook, DistSamplerSeedHook, MMDataParallel, MMDistributedDataParallel, - DistEvalRecallHook, CocoDistEvalmAPHook) + CocoDistEvalRecallHook, CocoDistEvalmAPHook) from mmdet.datasets.loader import build_dataloader from mmdet.models import build_detector, RPN @@ -127,7 +127,7 @@ def main(): runner.register_hook(DistSamplerSeedHook()) # register eval hooks if isinstance(model.module, RPN): - runner.register_hook(DistEvalRecallHook(cfg.data.val)) + runner.register_hook(CocoDistEvalRecallHook(cfg.data.val)) elif cfg.data.val.type == 'CocoDataset': runner.register_hook(CocoDistEvalmAPHook(cfg.data.val)) From 48259049a3ee5cf874aa009c17493978e2e958e7 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Fri, 28 Sep 2018 00:34:54 +0800 Subject: [PATCH 24/81] bug fix for all_reduce when coalesce is False --- mmdet/core/utils/dist_utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mmdet/core/utils/dist_utils.py b/mmdet/core/utils/dist_utils.py index 2a5d7659df7..07b1592e738 100644 --- a/mmdet/core/utils/dist_utils.py +++ b/mmdet/core/utils/dist_utils.py @@ -47,11 +47,12 @@ def all_reduce_coalesced(tensors): buckets[tp] = [] buckets[tp].append(tensor) + world_size = dist.get_world_size() for tp in buckets: bucket = buckets[tp] coalesced = _flatten_dense_tensors(bucket) dist.all_reduce(coalesced) - coalesced /= dist.get_world_size() + coalesced.div_(world_size) for buf, synced in zip(bucket, _unflatten_dense_tensors(coalesced, bucket)): @@ -66,8 +67,9 @@ def reduce_grads(model, coalesce=True): if coalesce: all_reduce_coalesced(grads) else: + world_size = dist.get_world_size() for tensor in grads: - dist.all_reduce(tensor) + dist.all_reduce(tensor.div_(world_size)) class DistOptimizerHook(OptimizerHook): From 6a9bf56369c7e606488d96835bbe5dde3c03706e Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Fri, 28 Sep 2018 23:39:34 +0800 Subject: [PATCH 25/81] allow manually setting random seeds --- tools/train.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/train.py b/tools/train.py index f596f5693d2..dde6b06b5c3 100644 --- a/tools/train.py +++ b/tools/train.py @@ -4,6 +4,7 @@ import logging from collections import OrderedDict +import numpy as np import torch from mmcv import Config from mmcv.torchpack import Runner, obj_from_dict @@ -53,6 +54,12 @@ def get_logger(log_level): return logger +def set_random_seed(seed): + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + + def parse_args(): parser = argparse.ArgumentParser(description='Train a detector') parser.add_argument('config', help='train config file path') @@ -63,6 +70,7 @@ def parse_args(): help='whether to add a validate phase') parser.add_argument( '--gpus', type=int, default=1, help='number of gpus to use') + parser.add_argument('--seed', type=int, help='random seed') parser.add_argument( '--launcher', choices=['none', 'pytorch', 'slurm', 'mpi'], @@ -84,6 +92,11 @@ def main(): logger = get_logger(cfg.log_level) + # set random seed if specified + if args.seed is not None: + logger.info('Set random seed to {}'.format(args.seed)) + set_random_seed(args.seed) + # init distributed environment if necessary if args.launcher == 'none': dist = False From 6122574f611ec834219bb6f0f3f047d7374db2e1 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Sun, 30 Sep 2018 15:49:03 +0800 Subject: [PATCH 26/81] add git hash to version info --- mmdet/__init__.py | 2 +- mmdet/version.py | 1 - setup.py | 118 ++++++++++++++++++++++++++++++++++++---------- 3 files changed, 94 insertions(+), 27 deletions(-) delete mode 100644 mmdet/version.py diff --git a/mmdet/__init__.py b/mmdet/__init__.py index 58f3ace6c03..8b5e1ac77ad 100644 --- a/mmdet/__init__.py +++ b/mmdet/__init__.py @@ -1 +1 @@ -from .version import __version__ +from .version import __version__, short_version diff --git a/mmdet/version.py b/mmdet/version.py deleted file mode 100644 index 2b8877c5057..00000000000 --- a/mmdet/version.py +++ /dev/null @@ -1 +0,0 @@ -__version__ = '0.5.0' diff --git a/setup.py b/setup.py index 759e8ebf37e..02dede74779 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,6 @@ +import os +import subprocess +import time from setuptools import find_packages, setup @@ -7,34 +10,99 @@ def readme(): return content +MAJOR = 0 +MINOR = 5 +PATCH = 0 +SUFFIX = '' +SHORT_VERSION = '{}.{}.{}{}'.format(MAJOR, MINOR, PATCH, SUFFIX) + +version_file = 'mmdet/version.py' + + +def get_git_hash(): + + def _minimal_ext_cmd(cmd): + # construct minimal environment + env = {} + for k in ['SYSTEMROOT', 'PATH', 'HOME']: + v = os.environ.get(k) + if v is not None: + env[k] = v + # LANGUAGE is used on win32 + env['LANGUAGE'] = 'C' + env['LANG'] = 'C' + env['LC_ALL'] = 'C' + out = subprocess.Popen( + cmd, stdout=subprocess.PIPE, env=env).communicate()[0] + return out + + try: + out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) + sha = out.strip().decode('ascii') + except OSError: + sha = 'unknown' + + return sha + + +def get_hash(): + if os.path.exists('.git'): + sha = get_git_hash()[:7] + elif os.path.exists(version_file): + try: + from mmdet.version import __version__ + sha = __version__.split('+')[-1] + except ImportError: + raise ImportError('Unable to get git version') + else: + sha = 'unknown' + + return sha + + +def write_version_py(): + content = """# GENERATED VERSION FILE +# TIME: {} + +__version__ = '{}' +short_version = '{}' +""" + sha = get_hash() + VERSION = SHORT_VERSION + '+' + sha + + with open(version_file, 'w') as f: + f.write(content.format(time.asctime(), VERSION, SHORT_VERSION)) + + def get_version(): - version_file = 'mmdet/version.py' with open(version_file, 'r') as f: exec(compile(f.read(), version_file, 'exec')) return locals()['__version__'] -setup( - name='mmdet', - version=get_version(), - description='Open MMLab Detection Toolbox', - long_description=readme(), - keywords='computer vision, object detection', - packages=find_packages(), - classifiers=[ - 'Development Status :: 4 - Beta', - 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', - 'Operating System :: OS Independent', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Topic :: Utilities', - ], - license='GPLv3', - setup_requires=['pytest-runner'], - tests_require=['pytest'], - install_requires=['numpy', 'matplotlib', 'six', 'terminaltables'], - zip_safe=False) +if __name__ == '__main__': + write_version_py() + setup( + name='mmdet', + version=get_version(), + description='Open MMLab Detection Toolbox', + long_description=readme(), + keywords='computer vision, object detection', + packages=find_packages(), + classifiers=[ + 'Development Status :: 4 - Beta', + 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Topic :: Utilities', + ], + license='GPLv3', + setup_requires=['pytest-runner'], + tests_require=['pytest'], + install_requires=['numpy', 'matplotlib', 'six', 'terminaltables'], + zip_safe=False) From 1ae7b06f97bc7761a20438519051b3b2982e3992 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Sun, 30 Sep 2018 16:43:51 +0800 Subject: [PATCH 27/81] save mmdet version in checkpoint as meta info --- .gitignore | 1 + tools/train.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index d5ef5f5a1a6..01c47d6e277 100644 --- a/.gitignore +++ b/.gitignore @@ -105,4 +105,5 @@ venv.bak/ # cython generated cpp mmdet/ops/nms/*.cpp +mmdet/version.py data diff --git a/tools/train.py b/tools/train.py index dde6b06b5c3..78f8a553c5f 100644 --- a/tools/train.py +++ b/tools/train.py @@ -9,7 +9,7 @@ from mmcv import Config from mmcv.torchpack import Runner, obj_from_dict -from mmdet import datasets +from mmdet import datasets, __version__ from mmdet.core import (init_dist, DistOptimizerHook, DistSamplerSeedHook, MMDataParallel, MMDistributedDataParallel, CocoDistEvalRecallHook, CocoDistEvalmAPHook) @@ -89,6 +89,8 @@ def main(): if args.work_dir is not None: cfg.work_dir = args.work_dir cfg.gpus = args.gpus + # add mmdet version to checkpoint as meta data + cfg.checkpoint_config.meta = dict(mmdet_version=__version__) logger = get_logger(cfg.log_level) From bdaa8f4294cb525a3f7fd4a1b9ac74ee65446126 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Sun, 30 Sep 2018 17:10:15 +0800 Subject: [PATCH 28/81] update training settings --- tools/configs/r50_fpn_frcnn_1x.py | 3 ++- tools/configs/r50_fpn_maskrcnn_1x.py | 3 ++- tools/configs/r50_fpn_rpn_1x.py | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/configs/r50_fpn_frcnn_1x.py b/tools/configs/r50_fpn_frcnn_1x.py index 23903e084e2..83ba65b896c 100644 --- a/tools/configs/r50_fpn_frcnn_1x.py +++ b/tools/configs/r50_fpn_frcnn_1x.py @@ -46,11 +46,12 @@ pos_balance_sampling=False, neg_pos_ub=256, allowed_border=0, + crowd_thr=1.1, anchor_batch_size=256, pos_iou_thr=0.7, neg_iou_thr=0.3, neg_balance_thr=0, - min_pos_iou=1e-3, + min_pos_iou=0.3, pos_weight=-1, smoothl1_beta=1 / 9.0, debug=False), diff --git a/tools/configs/r50_fpn_maskrcnn_1x.py b/tools/configs/r50_fpn_maskrcnn_1x.py index 41c2a1476dd..2fa719980cf 100644 --- a/tools/configs/r50_fpn_maskrcnn_1x.py +++ b/tools/configs/r50_fpn_maskrcnn_1x.py @@ -57,11 +57,12 @@ pos_balance_sampling=False, neg_pos_ub=256, allowed_border=0, + crowd_thr=1.1, anchor_batch_size=256, pos_iou_thr=0.7, neg_iou_thr=0.3, neg_balance_thr=0, - min_pos_iou=1e-3, + min_pos_iou=0.3, pos_weight=-1, smoothl1_beta=1 / 9.0, debug=False), diff --git a/tools/configs/r50_fpn_rpn_1x.py b/tools/configs/r50_fpn_rpn_1x.py index 385b511b9f4..91f5f08e887 100644 --- a/tools/configs/r50_fpn_rpn_1x.py +++ b/tools/configs/r50_fpn_rpn_1x.py @@ -31,11 +31,12 @@ pos_balance_sampling=False, neg_pos_ub=256, allowed_border=0, + crowd_thr=1.1, anchor_batch_size=256, pos_iou_thr=0.7, neg_iou_thr=0.3, neg_balance_thr=0, - min_pos_iou=1e-3, + min_pos_iou=0.3, pos_weight=-1, smoothl1_beta=1 / 9.0, debug=False)) From 6028a16e8b5dfd9c8a7bf0752864a5317ffd8014 Mon Sep 17 00:00:00 2001 From: pangjm Date: Mon, 1 Oct 2018 22:21:04 +0800 Subject: [PATCH 29/81] padded mask during maskrcnn test --- mmdet/models/backbones/resnet.py | 13 ++++-- mmdet/models/mask_heads/fcn_mask_head.py | 52 ++++++++++++++++++++---- tools/train.py | 13 +++--- 3 files changed, 61 insertions(+), 17 deletions(-) diff --git a/mmdet/models/backbones/resnet.py b/mmdet/models/backbones/resnet.py index e1ff4de45ea..fbb3f09c2de 100644 --- a/mmdet/models/backbones/resnet.py +++ b/mmdet/models/backbones/resnet.py @@ -200,7 +200,8 @@ def __init__(self, frozen_stages=-1, style='fb', sync_bn=False, - with_cp=False): + with_cp=False, + strict_frozen=False): super(ResNet, self).__init__() if not len(layers) == len(strides) == len(dilations): raise ValueError( @@ -241,6 +242,8 @@ def __init__(self, self.feat_dim = block.expansion * 64 * 2**(len(layers) - 1) self.with_cp = with_cp + self.strict_frozen = strict_frozen + def init_weights(self, pretrained=None): if isinstance(pretrained, str): logger = logging.getLogger() @@ -278,6 +281,9 @@ def train(self, mode=True): for m in self.modules(): if isinstance(m, nn.BatchNorm2d): m.eval() + if self.strict_frozen: + for params in m.parameters(): + params.requires_grad = False if mode and self.frozen_stages >= 0: for param in self.conv1.parameters(): param.requires_grad = False @@ -310,7 +316,8 @@ def resnet(depth, frozen_stages=-1, style='fb', sync_bn=False, - with_cp=False): + with_cp=False, + strict_frozen=False): """Constructs a ResNet model. Args: @@ -324,5 +331,5 @@ def resnet(depth, raise KeyError('invalid depth {} for resnet'.format(depth)) block, layers = resnet_cfg[depth] model = ResNet(block, layers[:num_stages], strides, dilations, out_indices, - frozen_stages, style, sync_bn, with_cp) + frozen_stages, style, sync_bn, with_cp, strict_frozen) return model diff --git a/mmdet/models/mask_heads/fcn_mask_head.py b/mmdet/models/mask_heads/fcn_mask_head.py index fa89ef7ffe3..d90188ac772 100644 --- a/mmdet/models/mask_heads/fcn_mask_head.py +++ b/mmdet/models/mask_heads/fcn_mask_head.py @@ -87,7 +87,7 @@ def forward(self, x): return mask_pred def get_mask_target(self, pos_proposals, pos_assigned_gt_inds, gt_masks, - img_meta, rcnn_train_cfg): + img_meta, rcnn_train_cfg): mask_targets = mask_target(pos_proposals, pos_assigned_gt_inds, gt_masks, img_meta, rcnn_train_cfg) return mask_targets @@ -117,12 +117,19 @@ def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg, if isinstance(mask_pred, torch.Tensor): mask_pred = mask_pred.sigmoid().cpu().numpy() assert isinstance(mask_pred, np.ndarray) + cls_segms = [[] for _ in range(self.num_classes - 1)] + mask_size = mask_pred.shape[-1] bboxes = det_bboxes.cpu().numpy()[:, :4] labels = det_labels.cpu().numpy() + 1 img_h = ori_shape[0] img_w = ori_shape[1] + scale = (mask_size + 2.0) / mask_size + bboxes = np.round(self._bbox_scaling(bboxes, scale)).astype(np.int32) + padded_mask = np.zeros( + (mask_size + 2, mask_size + 2), dtype=np.float32) + for i in range(bboxes.shape[0]): bbox = bboxes[i, :].astype(int) label = labels[i] @@ -132,18 +139,45 @@ def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg, h = max(h, 1) if not self.class_agnostic: - mask_pred_ = mask_pred[i, label, :, :] + padded_mask[1:-1, 1:-1] = mask_pred[i, label, :, :] else: - mask_pred_ = mask_pred[i, 0, :, :] + padded_mask[1:-1, 1:-1] = mask_pred[i, 0, :, :] + mask = mmcv.imresize(padded_mask, (w, h)) + mask = np.array( + mask > rcnn_test_cfg.mask_thr_binary, dtype=np.uint8) + im_mask = np.zeros((img_h, img_w), dtype=np.uint8) + + x0 = max(bbox[0], 0) + x1 = min(bbox[2] + 1, img_w) + y0 = max(bbox[1], 0) + y1 = min(bbox[3] + 1, img_h) - im_mask = np.zeros((img_h, img_w), dtype=np.float32) + im_mask[y0:y1, x0:x1] = mask[(y0 - bbox[1]):(y1 - bbox[1]), ( + x0 - bbox[0]):(x1 - bbox[0])] - im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = mmcv.imresize( - mask_pred_, (w, h)) - # im_mask = cv2.resize(im_mask, (img_w, img_h)) - im_mask = np.array( - im_mask > rcnn_test_cfg.mask_thr_binary, dtype=np.uint8) rle = mask_util.encode( np.array(im_mask[:, :, np.newaxis], order='F'))[0] cls_segms[label - 1].append(rle) return cls_segms + + def _bbox_scaling(self, bboxes, scale, clip_shape=None): + """Scaling bboxes and clip the boundary(optional) + Args: + bboxes(ndarray): shape(..., 4) + scale(float): scaling factor + clip(None or tuple): (h, w) + Returns: + ndarray: scaled bboxes + """ + if float(scale) == 1.0: + scaled_bboxes = bboxes.copy() + else: + w = bboxes[..., 2] - bboxes[..., 0] + 1 + h = bboxes[..., 3] - bboxes[..., 1] + 1 + dw = (w * (scale - 1)) * 0.5 + dh = (h * (scale - 1)) * 0.5 + scaled_bboxes = bboxes + np.stack((-dw, -dh, dw, dh), axis=-1) + if clip_shape is not None: + return bbox_clip(scaled_bboxes, clip_shape) + else: + return scaled_bboxes diff --git a/tools/train.py b/tools/train.py index 78f8a553c5f..6ca0464e5d2 100644 --- a/tools/train.py +++ b/tools/train.py @@ -116,11 +116,6 @@ def main(): build_dataloader(train_dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, cfg.gpus, dist) ] - if args.validate: - val_dataset = obj_from_dict(cfg.data.val, datasets) - data_loaders.append( - build_dataloader(val_dataset, cfg.data.imgs_per_gpu, - cfg.data.workers_per_gpu, cfg.gpus, dist)) # build model model = build_detector( @@ -133,6 +128,14 @@ def main(): # build runner runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, cfg.log_level) + + if args.validate: + val_dataset = obj_from_dict(cfg.data.test, datasets) + runner.register_hook(CocoDistEvalmAPHook(val_dataset)) + # data_loaders.append( + # build_dataloader(val_dataset, cfg.data.imgs_per_gpu, + # cfg.data.workers_per_gpu, cfg.gpus, dist)) + # register hooks optimizer_config = DistOptimizerHook( **cfg.optimizer_config) if dist else cfg.optimizer_config From 58e82cdfc438eebe9bd4fa11ede120f8abd7e489 Mon Sep 17 00:00:00 2001 From: pangjm Date: Mon, 1 Oct 2018 22:25:53 +0800 Subject: [PATCH 30/81] minor modify --- tools/train.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/train.py b/tools/train.py index 6ca0464e5d2..2aa681afa35 100644 --- a/tools/train.py +++ b/tools/train.py @@ -116,6 +116,11 @@ def main(): build_dataloader(train_dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, cfg.gpus, dist) ] + if args.validate: + val_dataset = obj_from_dict(cfg.data.test, datasets) + data_loaders.append( + build_dataloader(val_dataset, cfg.data.imgs_per_gpu, + cfg.data.workers_per_gpu, cfg.gpus, dist)) # build model model = build_detector( @@ -131,10 +136,9 @@ def main(): if args.validate: val_dataset = obj_from_dict(cfg.data.test, datasets) - runner.register_hook(CocoDistEvalmAPHook(val_dataset)) - # data_loaders.append( - # build_dataloader(val_dataset, cfg.data.imgs_per_gpu, - # cfg.data.workers_per_gpu, cfg.gpus, dist)) + data_loaders.append( + build_dataloader(val_dataset, cfg.data.imgs_per_gpu, + cfg.data.workers_per_gpu, cfg.gpus, dist)) # register hooks optimizer_config = DistOptimizerHook( From c3e4b78d5f10a9e8e5893a3a5afd18a0257fc103 Mon Sep 17 00:00:00 2001 From: pangjm Date: Mon, 1 Oct 2018 22:29:00 +0800 Subject: [PATCH 31/81] validate api transfer to hook --- tools/train.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tools/train.py b/tools/train.py index 2aa681afa35..3241174da73 100644 --- a/tools/train.py +++ b/tools/train.py @@ -134,12 +134,6 @@ def main(): runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, cfg.log_level) - if args.validate: - val_dataset = obj_from_dict(cfg.data.test, datasets) - data_loaders.append( - build_dataloader(val_dataset, cfg.data.imgs_per_gpu, - cfg.data.workers_per_gpu, cfg.gpus, dist)) - # register hooks optimizer_config = DistOptimizerHook( **cfg.optimizer_config) if dist else cfg.optimizer_config @@ -148,10 +142,11 @@ def main(): if dist: runner.register_hook(DistSamplerSeedHook()) # register eval hooks - if isinstance(model.module, RPN): - runner.register_hook(CocoDistEvalRecallHook(cfg.data.val)) - elif cfg.data.val.type == 'CocoDataset': - runner.register_hook(CocoDistEvalmAPHook(cfg.data.val)) + if args.validate: + if isinstance(model.module, RPN): + runner.register_hook(CocoDistEvalRecallHook(cfg.data.val)) + elif cfg.data.val.type == 'CocoDataset': + runner.register_hook(CocoDistEvalmAPHook(cfg.data.val)) if cfg.resume_from: runner.resume(cfg.resume_from) From 00ce0b36aa8b12141cb1e8df20c4244c11dec155 Mon Sep 17 00:00:00 2001 From: pangjm Date: Mon, 1 Oct 2018 22:37:54 +0800 Subject: [PATCH 32/81] rm val dataset --- tools/train.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tools/train.py b/tools/train.py index 3241174da73..03c87cce474 100644 --- a/tools/train.py +++ b/tools/train.py @@ -116,11 +116,6 @@ def main(): build_dataloader(train_dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, cfg.gpus, dist) ] - if args.validate: - val_dataset = obj_from_dict(cfg.data.test, datasets) - data_loaders.append( - build_dataloader(val_dataset, cfg.data.imgs_per_gpu, - cfg.data.workers_per_gpu, cfg.gpus, dist)) # build model model = build_detector( From d9ecb4d6d2c7152f526c48016b8377090f994196 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Wed, 3 Oct 2018 17:31:17 +0800 Subject: [PATCH 33/81] add min_pos_iou config field for train_cfg.rcnn --- mmdet/models/roi_extractors/single_level.py | 2 +- tools/configs/r50_fpn_frcnn_1x.py | 1 + tools/configs/r50_fpn_maskrcnn_1x.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/mmdet/models/roi_extractors/single_level.py b/mmdet/models/roi_extractors/single_level.py index fa247a520be..b850d6ab680 100644 --- a/mmdet/models/roi_extractors/single_level.py +++ b/mmdet/models/roi_extractors/single_level.py @@ -57,7 +57,7 @@ def sample_proposals(self, proposals, gt_bboxes, gt_crowds, gt_labels, proposals = proposals[:, :4] assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps = \ bbox_assign(proposals, gt_bboxes, gt_crowds, gt_labels, - cfg.pos_iou_thr, cfg.neg_iou_thr, cfg.pos_iou_thr, cfg.crowd_thr) + cfg.pos_iou_thr, cfg.neg_iou_thr, cfg.min_pos_iou, cfg.crowd_thr) if cfg.add_gt_as_proposals: proposals = torch.cat([gt_bboxes, proposals], dim=0) diff --git a/tools/configs/r50_fpn_frcnn_1x.py b/tools/configs/r50_fpn_frcnn_1x.py index 83ba65b896c..e15cbdbfec5 100644 --- a/tools/configs/r50_fpn_frcnn_1x.py +++ b/tools/configs/r50_fpn_frcnn_1x.py @@ -65,6 +65,7 @@ pos_balance_sampling=False, neg_pos_ub=512, neg_balance_thr=0, + min_pos_iou=1.1, pos_weight=-1, debug=False)) test_cfg = dict( diff --git a/tools/configs/r50_fpn_maskrcnn_1x.py b/tools/configs/r50_fpn_maskrcnn_1x.py index 2fa719980cf..5ecdaf44da9 100644 --- a/tools/configs/r50_fpn_maskrcnn_1x.py +++ b/tools/configs/r50_fpn_maskrcnn_1x.py @@ -77,6 +77,7 @@ pos_balance_sampling=False, neg_pos_ub=512, neg_balance_thr=0, + min_pos_iou=1.1, pos_weight=-1, debug=False)) test_cfg = dict( From 698afa7d5fbe10d47becc43e93777f1b92d179bb Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Wed, 3 Oct 2018 21:47:22 +0800 Subject: [PATCH 34/81] add default result visualization for base detector --- mmdet/core/eval/class_names.py | 2 +- mmdet/models/detectors/base.py | 39 ++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/mmdet/core/eval/class_names.py b/mmdet/core/eval/class_names.py index b68e9135dca..04f806315b7 100644 --- a/mmdet/core/eval/class_names.py +++ b/mmdet/core/eval/class_names.py @@ -95,7 +95,7 @@ def get_classes(dataset): if mmcv.is_str(dataset): if dataset in alias2name: - labels = eval(alias2name[dataset] + '_labels()') + labels = eval(alias2name[dataset] + '_classes()') else: raise ValueError('Unrecognized dataset: {}'.format(dataset)) else: diff --git a/mmdet/models/detectors/base.py b/mmdet/models/detectors/base.py index 3b2040312ee..93a05c8594e 100644 --- a/mmdet/models/detectors/base.py +++ b/mmdet/models/detectors/base.py @@ -1,9 +1,13 @@ import logging from abc import ABCMeta, abstractmethod +import mmcv +import numpy as np import torch import torch.nn as nn +from mmdet.core import tensor2imgs, get_classes + class BaseDetector(nn.Module): """Base class for detectors""" @@ -66,3 +70,38 @@ def forward(self, img, img_meta, return_loss=True, **kwargs): return self.forward_train(img, img_meta, **kwargs) else: return self.forward_test(img, img_meta, **kwargs) + + def show_result(self, + data, + result, + img_norm_cfg, + dataset='coco', + score_thr=0.3): + img_tensor = data['img'][0] + img_metas = data['img_meta'][0].data[0] + imgs = tensor2imgs(img_tensor, **img_norm_cfg) + assert len(imgs) == len(img_metas) + + if isinstance(dataset, str): + class_names = get_classes(dataset) + elif isinstance(dataset, list): + class_names = dataset + else: + raise TypeError('dataset must be a valid dataset name or a list' + ' of class names, not {}'.format(type(dataset))) + + for img, img_meta in zip(imgs, img_metas): + h, w, _ = img_meta['img_shape'] + img_show = img[:h, :w, :] + labels = [ + np.full(bbox.shape[0], i, dtype=np.int32) + for i, bbox in enumerate(result) + ] + labels = np.concatenate(labels) + bboxes = np.vstack(result) + mmcv.imshow_det_bboxes( + img_show, + bboxes, + labels, + class_names=class_names, + score_thr=score_thr) From a185ba35302a449464199cbd5be71c9542d39b3e Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Thu, 4 Oct 2018 20:26:37 +0800 Subject: [PATCH 35/81] modify mask target computation --- mmdet/core/mask_ops/mask_target.py | 43 ++++--- mmdet/datasets/coco.py | 137 +++++++++++------------ mmdet/datasets/transforms.py | 35 ++++-- mmdet/models/detectors/test_mixins.py | 4 +- mmdet/models/detectors/two_stage.py | 5 +- mmdet/models/mask_heads/fcn_mask_head.py | 73 ++++-------- tools/train.py | 1 + 7 files changed, 141 insertions(+), 157 deletions(-) diff --git a/mmdet/core/mask_ops/mask_target.py b/mmdet/core/mask_ops/mask_target.py index 5f635992aac..be93dfc2893 100644 --- a/mmdet/core/mask_ops/mask_target.py +++ b/mmdet/core/mask_ops/mask_target.py @@ -1,39 +1,36 @@ import torch import numpy as np +import mmcv -from .segms import polys_to_mask_wrt_box - -def mask_target(pos_proposals_list, - pos_assigned_gt_inds_list, - gt_polys_list, - img_meta, +def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list, cfg): cfg_list = [cfg for _ in range(len(pos_proposals_list))] mask_targets = map(mask_target_single, pos_proposals_list, - pos_assigned_gt_inds_list, gt_polys_list, img_meta, - cfg_list) - mask_targets = torch.cat(tuple(mask_targets), dim=0) + pos_assigned_gt_inds_list, gt_masks_list, cfg_list) + mask_targets = torch.cat(list(mask_targets)) return mask_targets -def mask_target_single(pos_proposals, - pos_assigned_gt_inds, - gt_polys, - img_meta, - cfg): +def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg): mask_size = cfg.mask_size num_pos = pos_proposals.size(0) - mask_targets = pos_proposals.new_zeros((num_pos, mask_size, mask_size)) + mask_targets = [] if num_pos > 0: - pos_proposals = pos_proposals.cpu().numpy() + proposals_np = pos_proposals.cpu().numpy() pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() - scale_factor = img_meta['scale_factor'] for i in range(num_pos): - bbox = pos_proposals[i, :] / scale_factor - polys = gt_polys[pos_assigned_gt_inds[i]] - mask = polys_to_mask_wrt_box(polys, bbox, mask_size) - mask = np.array(mask > 0, dtype=np.float32) - mask_targets[i, ...] = torch.from_numpy(mask).to( - mask_targets.device) + gt_mask = gt_masks[pos_assigned_gt_inds[i]] + bbox = proposals_np[i, :].astype(np.int32) + x1, y1, x2, y2 = bbox + w = np.maximum(x2 - x1 + 1, 1) + h = np.maximum(y2 - y1 + 1, 1) + # mask is uint8 both before and after resizing + target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w], + (mask_size, mask_size)) + mask_targets.append(target) + mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to( + pos_proposals.device) + else: + mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size)) return mask_targets diff --git a/mmdet/datasets/coco.py b/mmdet/datasets/coco.py index b803360072b..63b42b383dc 100644 --- a/mmdet/datasets/coco.py +++ b/mmdet/datasets/coco.py @@ -5,71 +5,12 @@ from pycocotools.coco import COCO from torch.utils.data import Dataset -from .transforms import (ImageTransform, BboxTransform, PolyMaskTransform, +from .transforms import (ImageTransform, BboxTransform, MaskTransform, Numpy2Tensor) from .utils import to_tensor, show_ann, random_scale from .utils import DataContainer as DC -def parse_ann_info(ann_info, cat2label, with_mask=True): - """Parse bbox and mask annotation. - - Args: - ann_info (list[dict]): Annotation info of an image. - cat2label (dict): The mapping from category ids to labels. - with_mask (bool): Whether to parse mask annotations. - - Returns: - tuple: gt_bboxes, gt_labels and gt_mask_info - """ - gt_bboxes = [] - gt_labels = [] - gt_bboxes_ignore = [] - # each mask consists of one or several polys, each poly is a list of float. - if with_mask: - gt_mask_polys = [] - gt_poly_lens = [] - for i, ann in enumerate(ann_info): - if ann.get('ignore', False): - continue - x1, y1, w, h = ann['bbox'] - if ann['area'] <= 0 or w < 1 or h < 1: - continue - bbox = [x1, y1, x1 + w - 1, y1 + h - 1] - if ann['iscrowd']: - gt_bboxes_ignore.append(bbox) - else: - gt_bboxes.append(bbox) - gt_labels.append(cat2label[ann['category_id']]) - if with_mask: - # Note polys are not resized - mask_polys = [ - p for p in ann['segmentation'] if len(p) >= 6 - ] # valid polygons have >= 3 points (6 coordinates) - poly_lens = [len(p) for p in mask_polys] - gt_mask_polys.append(mask_polys) - gt_poly_lens.extend(poly_lens) - if gt_bboxes: - gt_bboxes = np.array(gt_bboxes, dtype=np.float32) - gt_labels = np.array(gt_labels, dtype=np.int64) - else: - gt_bboxes = np.zeros((0, 4), dtype=np.float32) - gt_labels = np.array([], dtype=np.int64) - - if gt_bboxes_ignore: - gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32) - else: - gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32) - - ann = dict( - bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore) - - if with_mask: - ann['mask_polys'] = gt_mask_polys - ann['poly_lens'] = gt_poly_lens - return ann - - class CocoDataset(Dataset): def __init__(self, @@ -138,7 +79,7 @@ def __init__(self, self.img_transform = ImageTransform( size_divisor=self.size_divisor, **self.img_norm_cfg) self.bbox_transform = BboxTransform() - self.mask_transform = PolyMaskTransform() + self.mask_transform = MaskTransform() self.numpy2tensor = Numpy2Tensor() def __len__(self): @@ -162,6 +103,67 @@ def _load_ann_info(self, idx): ann_info = self.coco.loadAnns(ann_ids) return ann_info + def _parse_ann_info(self, ann_info, with_mask=True): + """Parse bbox and mask annotation. + + Args: + ann_info (list[dict]): Annotation info of an image. + with_mask (bool): Whether to parse mask annotations. + + Returns: + dict: A dict containing the following keys: bboxes, bboxes_ignore, + labels, masks, mask_polys, poly_lens. + """ + gt_bboxes = [] + gt_labels = [] + gt_bboxes_ignore = [] + # each mask consists of one or several polys, each poly is a list of float. + if with_mask: + gt_masks = [] + gt_mask_polys = [] + gt_poly_lens = [] + for i, ann in enumerate(ann_info): + if ann.get('ignore', False): + continue + x1, y1, w, h = ann['bbox'] + if ann['area'] <= 0 or w < 1 or h < 1: + continue + bbox = [x1, y1, x1 + w - 1, y1 + h - 1] + if ann['iscrowd']: + gt_bboxes_ignore.append(bbox) + else: + gt_bboxes.append(bbox) + gt_labels.append(self.cat2label[ann['category_id']]) + if with_mask: + gt_masks.append(self.coco.annToMask(ann)) + mask_polys = [ + p for p in ann['segmentation'] if len(p) >= 6 + ] # valid polygons have >= 3 points (6 coordinates) + poly_lens = [len(p) for p in mask_polys] + gt_mask_polys.append(mask_polys) + gt_poly_lens.extend(poly_lens) + if gt_bboxes: + gt_bboxes = np.array(gt_bboxes, dtype=np.float32) + gt_labels = np.array(gt_labels, dtype=np.int64) + else: + gt_bboxes = np.zeros((0, 4), dtype=np.float32) + gt_labels = np.array([], dtype=np.int64) + + if gt_bboxes_ignore: + gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32) + else: + gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32) + + ann = dict( + bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore) + + if with_mask: + ann['masks'] = gt_masks + # poly format is not used in the current implementation + ann['mask_polys'] = gt_mask_polys + ann['poly_lens'] = gt_poly_lens + return ann + def _set_group_flag(self): """Set flag according to image aspect ratio. @@ -200,7 +202,7 @@ def __getitem__(self, idx): idx = self._rand_another(idx) continue - ann = parse_ann_info(ann_info, self.cat2label, self.with_mask) + ann = self._parse_ann_info(ann_info, self.with_mask) gt_bboxes = ann['bboxes'] gt_labels = ann['labels'] gt_bboxes_ignore = ann['bboxes_ignore'] @@ -223,10 +225,8 @@ def __getitem__(self, idx): scale_factor, flip) if self.with_mask: - gt_mask_polys, gt_poly_lens, num_polys_per_mask = \ - self.mask_transform( - ann['mask_polys'], ann['poly_lens'], - img_info['height'], img_info['width'], flip) + gt_masks = self.mask_transform(ann['masks'], pad_shape, + scale_factor, flip) ori_shape = (img_info['height'], img_info['width'], 3) img_meta = dict( @@ -247,10 +247,7 @@ def __getitem__(self, idx): if self.with_crowd: data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore)) if self.with_mask: - data['gt_masks'] = dict( - polys=DC(gt_mask_polys, cpu_only=True), - poly_lens=DC(gt_poly_lens, cpu_only=True), - polys_per_mask=DC(num_polys_per_mask, cpu_only=True)) + data['gt_masks'] = DC(gt_masks, cpu_only=True) return data def prepare_test_img(self, idx): diff --git a/mmdet/datasets/transforms.py b/mmdet/datasets/transforms.py index 6cdba4e972e..d9d51c7b28b 100644 --- a/mmdet/datasets/transforms.py +++ b/mmdet/datasets/transforms.py @@ -10,7 +10,8 @@ class ImageTransform(object): - """Preprocess an image + """Preprocess an image. + 1. rescale the image to expected size 2. normalize the image 3. flip the image (if needed) @@ -59,7 +60,8 @@ def bbox_flip(bboxes, img_shape): class BboxTransform(object): - """Preprocess gt bboxes + """Preprocess gt bboxes. + 1. rescale bboxes according to image size 2. flip bboxes (if needed) 3. pad the first dimension to `max_num_gts` @@ -84,17 +86,12 @@ def __call__(self, bboxes, img_shape, scale_factor, flip=False): class PolyMaskTransform(object): + """Preprocess polygons.""" def __init__(self): pass def __call__(self, gt_mask_polys, gt_poly_lens, img_h, img_w, flip=False): - """ - Args: - gt_mask_polys(list): a list of masks, each mask is a list of polys, - each poly is a list of numbers - gt_poly_lens(list): a list of int, indicating the size of each poly - """ if flip: gt_mask_polys = segms.flip_segms(gt_mask_polys, img_h, img_w) num_polys_per_mask = np.array( @@ -108,6 +105,28 @@ def __call__(self, gt_mask_polys, gt_poly_lens, img_h, img_w, flip=False): return gt_mask_polys, gt_poly_lens, num_polys_per_mask +class MaskTransform(object): + """Preprocess masks. + + 1. resize masks to expected size and stack to a single array + 2. flip the masks (if needed) + 3. pad the masks (if needed) + """ + + def __call__(self, masks, pad_shape, scale_factor, flip=False): + masks = [ + mmcv.imrescale(mask, scale_factor, interpolation='nearest') + for mask in masks + ] + if flip: + masks = [mask[:, ::-1] for mask in masks] + padded_masks = [ + mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks + ] + padded_masks = np.stack(padded_masks, axis=0) + return padded_masks + + class Numpy2Tensor(object): def __init__(self): diff --git a/mmdet/models/detectors/test_mixins.py b/mmdet/models/detectors/test_mixins.py index 2fd3b18d093..77ba244f1a3 100644 --- a/mmdet/models/detectors/test_mixins.py +++ b/mmdet/models/detectors/test_mixins.py @@ -108,8 +108,8 @@ def simple_test_mask(self, x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois) mask_pred = self.mask_head(mask_feats) segm_result = self.mask_head.get_seg_masks( - mask_pred, det_bboxes, det_labels, self.test_cfg.rcnn, - ori_shape) + mask_pred, _bboxes, det_labels, self.test_cfg.rcnn, ori_shape, + scale_factor, rescale) return segm_result def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels): diff --git a/mmdet/models/detectors/two_stage.py b/mmdet/models/detectors/two_stage.py index 4bcb1855a95..c0e81eec40b 100644 --- a/mmdet/models/detectors/two_stage.py +++ b/mmdet/models/detectors/two_stage.py @@ -4,7 +4,7 @@ from .base import BaseDetector from .test_mixins import RPNTestMixin, BBoxTestMixin, MaskTestMixin from .. import builder -from mmdet.core import bbox2roi, bbox2result, split_combined_polys, multi_apply +from mmdet.core import bbox2roi, bbox2result, multi_apply class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin, @@ -124,9 +124,8 @@ def forward_train(self, losses.update(loss_bbox) if self.with_mask: - gt_polys = split_combined_polys(**gt_masks) mask_targets = self.mask_head.get_mask_target( - pos_proposals, pos_assigned_gt_inds, gt_polys, img_meta, + pos_proposals, pos_assigned_gt_inds, gt_masks, self.train_cfg.rcnn) pos_rois = bbox2roi(pos_proposals) mask_feats = self.mask_roi_extractor( diff --git a/mmdet/models/mask_heads/fcn_mask_head.py b/mmdet/models/mask_heads/fcn_mask_head.py index d90188ac772..ba46bea77e1 100644 --- a/mmdet/models/mask_heads/fcn_mask_head.py +++ b/mmdet/models/mask_heads/fcn_mask_head.py @@ -87,9 +87,9 @@ def forward(self, x): return mask_pred def get_mask_target(self, pos_proposals, pos_assigned_gt_inds, gt_masks, - img_meta, rcnn_train_cfg): + rcnn_train_cfg): mask_targets = mask_target(pos_proposals, pos_assigned_gt_inds, - gt_masks, img_meta, rcnn_train_cfg) + gt_masks, rcnn_train_cfg) return mask_targets def loss(self, mask_pred, mask_targets, labels): @@ -99,8 +99,9 @@ def loss(self, mask_pred, mask_targets, labels): return loss def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg, - ori_shape): - """Get segmentation masks from mask_pred and bboxes + ori_shape, scale_factor, rescale): + """Get segmentation masks from mask_pred and bboxes. + Args: mask_pred (Tensor or ndarray): shape (n, #class+1, h, w). For single-scale testing, mask_pred is the direct output of @@ -111,6 +112,7 @@ def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg, img_shape (Tensor): shape (3, ) rcnn_test_cfg (dict): rcnn testing config ori_shape: original image size + Returns: list[list]: encoded masks """ @@ -119,65 +121,34 @@ def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg, assert isinstance(mask_pred, np.ndarray) cls_segms = [[] for _ in range(self.num_classes - 1)] - mask_size = mask_pred.shape[-1] bboxes = det_bboxes.cpu().numpy()[:, :4] labels = det_labels.cpu().numpy() + 1 - img_h = ori_shape[0] - img_w = ori_shape[1] - scale = (mask_size + 2.0) / mask_size - bboxes = np.round(self._bbox_scaling(bboxes, scale)).astype(np.int32) - padded_mask = np.zeros( - (mask_size + 2, mask_size + 2), dtype=np.float32) + if rescale: + img_h, img_w = ori_shape[:2] + else: + img_h = np.round(ori_shape[0] * scale_factor).astype(np.int32) + img_w = np.round(ori_shape[1] * scale_factor).astype(np.int32) + scale_factor = 1.0 for i in range(bboxes.shape[0]): - bbox = bboxes[i, :].astype(int) + bbox = (bboxes[i, :] / scale_factor).astype(np.int32) label = labels[i] - w = bbox[2] - bbox[0] + 1 - h = bbox[3] - bbox[1] + 1 - w = max(w, 1) - h = max(h, 1) + w = max(bbox[2] - bbox[0] + 1, 1) + h = max(bbox[3] - bbox[1] + 1, 1) if not self.class_agnostic: - padded_mask[1:-1, 1:-1] = mask_pred[i, label, :, :] + mask_pred_ = mask_pred[i, label, :, :] else: - padded_mask[1:-1, 1:-1] = mask_pred[i, 0, :, :] - mask = mmcv.imresize(padded_mask, (w, h)) - mask = np.array( - mask > rcnn_test_cfg.mask_thr_binary, dtype=np.uint8) + mask_pred_ = mask_pred[i, 0, :, :] im_mask = np.zeros((img_h, img_w), dtype=np.uint8) - x0 = max(bbox[0], 0) - x1 = min(bbox[2] + 1, img_w) - y0 = max(bbox[1], 0) - y1 = min(bbox[3] + 1, img_h) - - im_mask[y0:y1, x0:x1] = mask[(y0 - bbox[1]):(y1 - bbox[1]), ( - x0 - bbox[0]):(x1 - bbox[0])] - + bbox_mask = mmcv.imresize(mask_pred_, (w, h)) + bbox_mask = (bbox_mask > rcnn_test_cfg.mask_thr_binary).astype( + np.uint8) + im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = bbox_mask rle = mask_util.encode( np.array(im_mask[:, :, np.newaxis], order='F'))[0] cls_segms[label - 1].append(rle) - return cls_segms - def _bbox_scaling(self, bboxes, scale, clip_shape=None): - """Scaling bboxes and clip the boundary(optional) - Args: - bboxes(ndarray): shape(..., 4) - scale(float): scaling factor - clip(None or tuple): (h, w) - Returns: - ndarray: scaled bboxes - """ - if float(scale) == 1.0: - scaled_bboxes = bboxes.copy() - else: - w = bboxes[..., 2] - bboxes[..., 0] + 1 - h = bboxes[..., 3] - bboxes[..., 1] + 1 - dw = (w * (scale - 1)) * 0.5 - dh = (h * (scale - 1)) * 0.5 - scaled_bboxes = bboxes + np.stack((-dw, -dh, dw, dh), axis=-1) - if clip_shape is not None: - return bbox_clip(scaled_bboxes, clip_shape) - else: - return scaled_bboxes + return cls_segms diff --git a/tools/train.py b/tools/train.py index 03c87cce474..88890e6607f 100644 --- a/tools/train.py +++ b/tools/train.py @@ -91,6 +91,7 @@ def main(): cfg.gpus = args.gpus # add mmdet version to checkpoint as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__) + cfg.checkpoint_config.config = cfg.text logger = get_logger(cfg.log_level) From b54931660c35e4f38482e3559dccf5f75b356a9b Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Thu, 4 Oct 2018 21:05:39 +0800 Subject: [PATCH 36/81] adapt to mmcv api changes --- mmdet/core/eval/eval_hooks.py | 2 +- mmdet/core/utils/dist_utils.py | 2 +- mmdet/core/utils/hooks.py | 2 +- mmdet/core/utils/misc.py | 3 ++- mmdet/datasets/loader/build_loader.py | 2 +- mmdet/datasets/transforms.py | 2 +- mmdet/models/backbones/resnet.py | 2 +- mmdet/models/builder.py | 4 ++-- tools/test.py | 2 +- tools/train.py | 2 +- 10 files changed, 12 insertions(+), 11 deletions(-) diff --git a/mmdet/core/eval/eval_hooks.py b/mmdet/core/eval/eval_hooks.py index 3439ee0f37f..c02aec975c7 100644 --- a/mmdet/core/eval/eval_hooks.py +++ b/mmdet/core/eval/eval_hooks.py @@ -6,7 +6,7 @@ import mmcv import numpy as np import torch -from mmcv.torchpack import Hook, obj_from_dict +from mmcv.runner import Hook, obj_from_dict from pycocotools.cocoeval import COCOeval from torch.utils.data import Dataset diff --git a/mmdet/core/utils/dist_utils.py b/mmdet/core/utils/dist_utils.py index 07b1592e738..fc102c60d4e 100644 --- a/mmdet/core/utils/dist_utils.py +++ b/mmdet/core/utils/dist_utils.py @@ -6,7 +6,7 @@ import torch.distributed as dist from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors from torch.nn.utils import clip_grad -from mmcv.torchpack import Hook, OptimizerHook +from mmcv.runner import Hook, OptimizerHook def init_dist(launcher, backend='nccl', **kwargs): diff --git a/mmdet/core/utils/hooks.py b/mmdet/core/utils/hooks.py index 72eb3438efa..7186ad75ccc 100644 --- a/mmdet/core/utils/hooks.py +++ b/mmdet/core/utils/hooks.py @@ -1,5 +1,5 @@ import torch -from mmcv.torchpack import Hook +from mmcv.runner import Hook class EmptyCacheHook(Hook): diff --git a/mmdet/core/utils/misc.py b/mmdet/core/utils/misc.py index fd8211ef68d..262f168e646 100644 --- a/mmdet/core/utils/misc.py +++ b/mmdet/core/utils/misc.py @@ -12,7 +12,8 @@ def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): imgs = [] for img_id in range(num_imgs): img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) - img = mmcv.imdenorm(img, mean, std, to_bgr=to_rgb).astype(np.uint8) + img = mmcv.imdenormalize( + img, mean, std, to_bgr=to_rgb).astype(np.uint8) imgs.append(np.ascontiguousarray(img)) return imgs diff --git a/mmdet/datasets/loader/build_loader.py b/mmdet/datasets/loader/build_loader.py index 34fe2d2f0b0..70f439926a9 100644 --- a/mmdet/datasets/loader/build_loader.py +++ b/mmdet/datasets/loader/build_loader.py @@ -1,6 +1,6 @@ from functools import partial -from mmcv.torchpack import get_dist_info +from mmcv.runner import get_dist_info from torch.utils.data import DataLoader from .collate import collate diff --git a/mmdet/datasets/transforms.py b/mmdet/datasets/transforms.py index 6cdba4e972e..19bfe05e116 100644 --- a/mmdet/datasets/transforms.py +++ b/mmdet/datasets/transforms.py @@ -31,7 +31,7 @@ def __init__(self, def __call__(self, img, scale, flip=False): img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) img_shape = img.shape - img = mmcv.imnorm(img, self.mean, self.std, self.to_rgb) + img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) if flip: img = mmcv.imflip(img) if self.size_divisor is not None: diff --git a/mmdet/models/backbones/resnet.py b/mmdet/models/backbones/resnet.py index fbb3f09c2de..458de92095e 100644 --- a/mmdet/models/backbones/resnet.py +++ b/mmdet/models/backbones/resnet.py @@ -3,7 +3,7 @@ import torch.nn as nn import torch.utils.checkpoint as cp -from mmcv.torchpack import load_checkpoint +from mmcv.runner import load_checkpoint def conv3x3(in_planes, out_planes, stride=1, dilation=1): diff --git a/mmdet/models/builder.py b/mmdet/models/builder.py index 4bbc94aa41b..bdf0ac3d16f 100644 --- a/mmdet/models/builder.py +++ b/mmdet/models/builder.py @@ -1,4 +1,4 @@ -from mmcv import torchpack as tp +from mmcv.runner import obj_from_dict from torch import nn from . import (backbones, necks, roi_extractors, rpn_heads, bbox_heads, @@ -11,7 +11,7 @@ def _build_module(cfg, parrent=None, default_args=None): - return cfg if isinstance(cfg, nn.Module) else tp.obj_from_dict( + return cfg if isinstance(cfg, nn.Module) else obj_from_dict( cfg, parrent, default_args) diff --git a/tools/test.py b/tools/test.py index 4c87f4eeee7..f1fb9cda91e 100644 --- a/tools/test.py +++ b/tools/test.py @@ -2,7 +2,7 @@ import torch import mmcv -from mmcv.torchpack import load_checkpoint, parallel_test, obj_from_dict +from mmcv.runner import load_checkpoint, parallel_test, obj_from_dict from mmdet import datasets from mmdet.core import scatter, MMDataParallel, results2json, coco_eval diff --git a/tools/train.py b/tools/train.py index 03c87cce474..41b66f354d2 100644 --- a/tools/train.py +++ b/tools/train.py @@ -7,7 +7,7 @@ import numpy as np import torch from mmcv import Config -from mmcv.torchpack import Runner, obj_from_dict +from mmcv.runner import Runner, obj_from_dict from mmdet import datasets, __version__ from mmdet.core import (init_dist, DistOptimizerHook, DistSamplerSeedHook, From 80d95e7674a3d23d4c9a2f64326a6cf86f2e7739 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Fri, 5 Oct 2018 14:06:47 +0800 Subject: [PATCH 37/81] rename resnet style from fb/msra to pytorch/caffe --- mmdet/models/backbones/resnet.py | 30 +++++++++++++++++----------- tools/configs/r50_fpn_frcnn_1x.py | 2 +- tools/configs/r50_fpn_maskrcnn_1x.py | 2 +- tools/configs/r50_fpn_rpn_1x.py | 2 +- 4 files changed, 21 insertions(+), 15 deletions(-) diff --git a/mmdet/models/backbones/resnet.py b/mmdet/models/backbones/resnet.py index 458de92095e..371f4f59fec 100644 --- a/mmdet/models/backbones/resnet.py +++ b/mmdet/models/backbones/resnet.py @@ -27,7 +27,7 @@ def __init__(self, stride=1, dilation=1, downsample=None, - style='fb'): + style='pytorch'): super(BasicBlock, self).__init__() self.conv1 = conv3x3(inplanes, planes, stride, dilation) self.bn1 = nn.BatchNorm2d(planes) @@ -66,15 +66,16 @@ def __init__(self, stride=1, dilation=1, downsample=None, - style='fb', + style='pytorch', with_cp=False): - """Bottleneck block - if style is "fb", the stride-two layer is the 3x3 conv layer, - if style is "msra", the stride-two layer is the first 1x1 conv layer + """Bottleneck block. + + If style is "pytorch", the stride-two layer is the 3x3 conv layer, + if it is "caffe", the stride-two layer is the first 1x1 conv layer. """ super(Bottleneck, self).__init__() - assert style in ['fb', 'msra'] - if style == 'fb': + assert style in ['pytorch', 'caffe'] + if style == 'pytorch': conv1_stride = 1 conv2_stride = stride else: @@ -141,7 +142,7 @@ def make_res_layer(block, blocks, stride=1, dilation=1, - style='fb', + style='pytorch', with_cp=False): downsample = None if stride != 1 or inplanes != planes * block.expansion: @@ -175,7 +176,12 @@ def make_res_layer(block, class ResHead(nn.Module): - def __init__(self, block, num_blocks, stride=2, dilation=1, style='fb'): + def __init__(self, + block, + num_blocks, + stride=2, + dilation=1, + style='pytorch'): self.layer4 = make_res_layer( block, 1024, @@ -198,7 +204,7 @@ def __init__(self, dilations=(1, 1, 1, 1), out_indices=(0, 1, 2, 3), frozen_stages=-1, - style='fb', + style='pytorch', sync_bn=False, with_cp=False, strict_frozen=False): @@ -237,7 +243,7 @@ def __init__(self, style=self.style, with_cp=with_cp) self.inplanes = planes * block.expansion - setattr(self, layer_name, res_layer) + self.add_module(layer_name, res_layer) self.res_layers.append(layer_name) self.feat_dim = block.expansion * 64 * 2**(len(layers) - 1) self.with_cp = with_cp @@ -314,7 +320,7 @@ def resnet(depth, dilations=(1, 1, 1, 1), out_indices=(2, ), frozen_stages=-1, - style='fb', + style='pytorch', sync_bn=False, with_cp=False, strict_frozen=False): diff --git a/tools/configs/r50_fpn_frcnn_1x.py b/tools/configs/r50_fpn_frcnn_1x.py index e15cbdbfec5..82082df0087 100644 --- a/tools/configs/r50_fpn_frcnn_1x.py +++ b/tools/configs/r50_fpn_frcnn_1x.py @@ -8,7 +8,7 @@ num_stages=4, out_indices=(0, 1, 2, 3), frozen_stages=1, - style='fb'), + style='pytorch'), neck=dict( type='FPN', in_channels=[256, 512, 1024, 2048], diff --git a/tools/configs/r50_fpn_maskrcnn_1x.py b/tools/configs/r50_fpn_maskrcnn_1x.py index 5ecdaf44da9..ad618573299 100644 --- a/tools/configs/r50_fpn_maskrcnn_1x.py +++ b/tools/configs/r50_fpn_maskrcnn_1x.py @@ -8,7 +8,7 @@ num_stages=4, out_indices=(0, 1, 2, 3), frozen_stages=1, - style='fb'), + style='pytorch'), neck=dict( type='FPN', in_channels=[256, 512, 1024, 2048], diff --git a/tools/configs/r50_fpn_rpn_1x.py b/tools/configs/r50_fpn_rpn_1x.py index 91f5f08e887..dfed976a249 100644 --- a/tools/configs/r50_fpn_rpn_1x.py +++ b/tools/configs/r50_fpn_rpn_1x.py @@ -8,7 +8,7 @@ num_stages=4, out_indices=(0, 1, 2, 3), frozen_stages=1, - style='fb'), + style='pytorch'), neck=dict( type='FPN', in_channels=[256, 512, 1024, 2048], From a88bba42c32cb36c566d264d65ea2728834878a1 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Fri, 5 Oct 2018 15:51:47 +0800 Subject: [PATCH 38/81] some renaming --- mmdet/core/bbox_ops/__init__.py | 11 ++--- mmdet/core/bbox_ops/bbox_target.py | 53 ++++++++------------- mmdet/core/bbox_ops/sampling.py | 52 +++++++++++--------- mmdet/core/bbox_ops/transforms.py | 33 ++++++++----- mmdet/core/eval/eval_hooks.py | 1 - mmdet/core/eval/mean_ap.py | 52 ++++++++++---------- mmdet/core/losses/losses.py | 4 +- mmdet/core/rpn_ops/anchor_target.py | 6 +-- mmdet/models/bbox_heads/bbox_head.py | 7 ++- mmdet/models/roi_extractors/single_level.py | 20 ++++---- mmdet/models/rpn_heads/rpn_head.py | 7 ++- 11 files changed, 125 insertions(+), 121 deletions(-) diff --git a/mmdet/core/bbox_ops/__init__.py b/mmdet/core/bbox_ops/__init__.py index 885dab67c5a..22163f75ef5 100644 --- a/mmdet/core/bbox_ops/__init__.py +++ b/mmdet/core/bbox_ops/__init__.py @@ -1,15 +1,14 @@ from .geometry import bbox_overlaps -from .sampling import (random_choice, bbox_assign, bbox_assign_via_overlaps, +from .sampling import (random_choice, bbox_assign, bbox_assign_wrt_overlaps, bbox_sampling, sample_positives, sample_negatives) -from .transforms import (bbox_transform, bbox_transform_inv, bbox_flip, - bbox_mapping, bbox_mapping_back, bbox2roi, roi2bbox, - bbox2result) +from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping, + bbox_mapping_back, bbox2roi, roi2bbox, bbox2result) from .bbox_target import bbox_target __all__ = [ 'bbox_overlaps', 'random_choice', 'bbox_assign', - 'bbox_assign_via_overlaps', 'bbox_sampling', 'sample_positives', - 'sample_negatives', 'bbox_transform', 'bbox_transform_inv', 'bbox_flip', + 'bbox_assign_wrt_overlaps', 'bbox_sampling', 'sample_positives', + 'sample_negatives', 'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', 'bbox_target' ] diff --git a/mmdet/core/bbox_ops/bbox_target.py b/mmdet/core/bbox_ops/bbox_target.py index ce1f885e184..2e205c3850c 100644 --- a/mmdet/core/bbox_ops/bbox_target.py +++ b/mmdet/core/bbox_ops/bbox_target.py @@ -1,8 +1,7 @@ -import mmcv import torch -from .geometry import bbox_overlaps -from .transforms import bbox_transform, bbox_transform_inv +from .transforms import bbox2delta +from ..utils import multi_apply def bbox_target(pos_proposals_list, @@ -13,33 +12,23 @@ def bbox_target(pos_proposals_list, reg_num_classes=1, target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0], - return_list=False): - img_per_gpu = len(pos_proposals_list) - all_labels = [] - all_label_weights = [] - all_bbox_targets = [] - all_bbox_weights = [] - for img_id in range(img_per_gpu): - pos_proposals = pos_proposals_list[img_id] - neg_proposals = neg_proposals_list[img_id] - pos_gt_bboxes = pos_gt_bboxes_list[img_id] - pos_gt_labels = pos_gt_labels_list[img_id] - debug_img = debug_imgs[img_id] if cfg.debug else None - labels, label_weights, bbox_targets, bbox_weights = proposal_target_single( - pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels, - reg_num_classes, cfg, target_means, target_stds) - all_labels.append(labels) - all_label_weights.append(label_weights) - all_bbox_targets.append(bbox_targets) - all_bbox_weights.append(bbox_weights) + concat=True): + labels, label_weights, bbox_targets, bbox_weights = multi_apply( + proposal_target_single, + pos_proposals_list, + neg_proposals_list, + pos_gt_bboxes_list, + pos_gt_labels_list, + cfg=cfg, + reg_num_classes=reg_num_classes, + target_means=target_means, + target_stds=target_stds) - if return_list: - return all_labels, all_label_weights, all_bbox_targets, all_bbox_weights - - labels = torch.cat(all_labels, 0) - label_weights = torch.cat(all_label_weights, 0) - bbox_targets = torch.cat(all_bbox_targets, 0) - bbox_weights = torch.cat(all_bbox_weights, 0) + if concat: + labels = torch.cat(labels, 0) + label_weights = torch.cat(label_weights, 0) + bbox_targets = torch.cat(bbox_targets, 0) + bbox_weights = torch.cat(bbox_weights, 0) return labels, label_weights, bbox_targets, bbox_weights @@ -47,8 +36,8 @@ def proposal_target_single(pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels, - reg_num_classes, cfg, + reg_num_classes=1, target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0]): num_pos = pos_proposals.size(0) @@ -62,8 +51,8 @@ def proposal_target_single(pos_proposals, labels[:num_pos] = pos_gt_labels pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight label_weights[:num_pos] = pos_weight - pos_bbox_targets = bbox_transform(pos_proposals, pos_gt_bboxes, - target_means, target_stds) + pos_bbox_targets = bbox2delta(pos_proposals, pos_gt_bboxes, + target_means, target_stds) bbox_targets[:num_pos, :] = pos_bbox_targets bbox_weights[:num_pos, :] = 1 if num_neg > 0: diff --git a/mmdet/core/bbox_ops/sampling.py b/mmdet/core/bbox_ops/sampling.py index bcee761e10e..28043182acf 100644 --- a/mmdet/core/bbox_ops/sampling.py +++ b/mmdet/core/bbox_ops/sampling.py @@ -20,30 +20,36 @@ def random_choice(gallery, num): def bbox_assign(proposals, gt_bboxes, - gt_crowd_bboxes=None, + gt_bboxes_ignore=None, gt_labels=None, pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=.0, crowd_thr=-1): - """Assign a corresponding gt bbox or background to each proposal/anchor - This function assign a gt bbox to every proposal, each proposals will be - assigned with -1, 0, or a positive number. -1 means don't care, 0 means - negative sample, positive number is the index (1-based) of assigned gt. - If gt_crowd_bboxes is not None, proposals which have iof(intersection over foreground) - with crowd bboxes over crowd_thr will be ignored + """Assign a corresponding gt bbox or background to each proposal/anchor. + + Each proposals will be assigned with `-1`, `0`, or a positive integer. + + - -1: don't care + - 0: negative sample, no assigned gt + - positive integer: positive sample, index (1-based) of assigned gt + + If `gt_bboxes_ignore` is specified, bboxes which have iof (intersection + over foreground) with `gt_bboxes_ignore` above `crowd_thr` will be ignored. + Args: - proposals(Tensor): proposals or RPN anchors, shape (n, 4) - gt_bboxes(Tensor): shape (k, 4) - gt_crowd_bboxes(Tensor): shape(m, 4) - gt_labels(Tensor, optional): shape (k, ) - pos_iou_thr(float): iou threshold for positive bboxes - neg_iou_thr(float or tuple): iou threshold for negative bboxes - min_pos_iou(float): minimum iou for a bbox to be considered as a positive bbox, - for RPN, it is usually set as 0, for Fast R-CNN, - it is usually set as pos_iou_thr - crowd_thr: ignore proposals which have iof(intersection over foreground) with - crowd bboxes over crowd_thr + proposals (Tensor): Proposals or RPN anchors, shape (n, 4). + gt_bboxes (Tensor): Ground truth bboxes, shape (k, 4). + gt_bboxes_ignore (Tensor, optional): shape(m, 4). + gt_labels (Tensor, optional): shape (k, ). + pos_iou_thr (float): IoU threshold for positive bboxes. + neg_iou_thr (float or tuple): IoU threshold for negative bboxes. + min_pos_iou (float): Minimum iou for a bbox to be considered as a + positive bbox. For RPN, it is usually set as 0.3, for Fast R-CNN, + it is usually set as pos_iou_thr + crowd_thr (float): IoF threshold for ignoring bboxes. Negative value + for not ignoring any bboxes. + Returns: tuple: (assigned_gt_inds, argmax_overlaps, max_overlaps), shape (n, ) """ @@ -54,20 +60,20 @@ def bbox_assign(proposals, raise ValueError('No gt bbox or proposals') # ignore proposals according to crowd bboxes - if (crowd_thr > 0) and (gt_crowd_bboxes is - not None) and (gt_crowd_bboxes.numel() > 0): - crowd_overlaps = bbox_overlaps(proposals, gt_crowd_bboxes, mode='iof') + if (crowd_thr > 0) and (gt_bboxes_ignore is + not None) and (gt_bboxes_ignore.numel() > 0): + crowd_overlaps = bbox_overlaps(proposals, gt_bboxes_ignore, mode='iof') crowd_max_overlaps, _ = crowd_overlaps.max(dim=1) crowd_bboxes_inds = torch.nonzero( crowd_max_overlaps > crowd_thr).long() if crowd_bboxes_inds.numel() > 0: overlaps[crowd_bboxes_inds, :] = -1 - return bbox_assign_via_overlaps(overlaps, gt_labels, pos_iou_thr, + return bbox_assign_wrt_overlaps(overlaps, gt_labels, pos_iou_thr, neg_iou_thr, min_pos_iou) -def bbox_assign_via_overlaps(overlaps, +def bbox_assign_wrt_overlaps(overlaps, gt_labels=None, pos_iou_thr=0.5, neg_iou_thr=0.5, diff --git a/mmdet/core/bbox_ops/transforms.py b/mmdet/core/bbox_ops/transforms.py index ca45d157dce..0d8f6f44f20 100644 --- a/mmdet/core/bbox_ops/transforms.py +++ b/mmdet/core/bbox_ops/transforms.py @@ -3,7 +3,7 @@ import torch -def bbox_transform(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]): +def bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]): assert proposals.size() == gt.size() proposals = proposals.float() @@ -31,12 +31,12 @@ def bbox_transform(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]): return deltas -def bbox_transform_inv(rois, - deltas, - means=[0, 0, 0, 0], - stds=[1, 1, 1, 1], - max_shape=None, - wh_ratio_clip=16 / 1000): +def delta2bbox(rois, + deltas, + means=[0, 0, 0, 0], + stds=[1, 1, 1, 1], + max_shape=None, + wh_ratio_clip=16 / 1000): means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4) stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4) denorm_deltas = deltas * stds + means @@ -69,10 +69,14 @@ def bbox_transform_inv(rois, def bbox_flip(bboxes, img_shape): - """Flip bboxes horizontally + """Flip bboxes horizontally. + Args: - bboxes(Tensor): shape (..., 4*k) - img_shape(Tensor): image shape + bboxes(Tensor or ndarray): Shape (..., 4*k) + img_shape(tuple): Image shape. + + Returns: + Same type as `bboxes`: Flipped bboxes. """ if isinstance(bboxes, torch.Tensor): assert bboxes.shape[-1] % 4 == 0 @@ -101,8 +105,11 @@ def bbox_mapping_back(bboxes, img_shape, scale_factor, flip): def bbox2roi(bbox_list): """Convert a list of bboxes to roi format. + Args: - bbox_list (Tensor): a list of bboxes corresponding to a list of images + bbox_list (list[Tensor]): a list of bboxes corresponding to a batch + of images. + Returns: Tensor: shape (n, 5), [batch_ind, x1, y1, x2, y2] """ @@ -129,11 +136,13 @@ def roi2bbox(rois): def bbox2result(bboxes, labels, num_classes): - """Convert detection results to a list of numpy arrays + """Convert detection results to a list of numpy arrays. + Args: bboxes (Tensor): shape (n, 5) labels (Tensor): shape (n, ) num_classes (int): class number, including background class + Returns: list(ndarray): bbox results of each class """ diff --git a/mmdet/core/eval/eval_hooks.py b/mmdet/core/eval/eval_hooks.py index c02aec975c7..870830ef396 100644 --- a/mmdet/core/eval/eval_hooks.py +++ b/mmdet/core/eval/eval_hooks.py @@ -11,7 +11,6 @@ from torch.utils.data import Dataset from .coco_utils import results2json, fast_eval_recall -from .recall import eval_recalls from ..parallel import scatter from mmdet import datasets from mmdet.datasets.loader import collate diff --git a/mmdet/core/eval/mean_ap.py b/mmdet/core/eval/mean_ap.py index 9a33f764040..5f47c1368af 100644 --- a/mmdet/core/eval/mean_ap.py +++ b/mmdet/core/eval/mean_ap.py @@ -9,9 +9,9 @@ def average_precision(recalls, precisions, mode='area'): """Calculate average precision (for single or multiple scales). Args: - recalls(ndarray): shape (num_scales, num_dets) or (num_dets, ) - precisions(ndarray): shape (num_scales, num_dets) or (num_dets, ) - mode(str): 'area' or '11points', 'area' means calculating the area + recalls (ndarray): shape (num_scales, num_dets) or (num_dets, ) + precisions (ndarray): shape (num_scales, num_dets) or (num_dets, ) + mode (str): 'area' or '11points', 'area' means calculating the area under precision-recall curve, '11points' means calculating the average precision of recalls at [0, 0.1, ..., 1] @@ -60,11 +60,11 @@ def tpfp_imagenet(det_bboxes, """Check if detected bboxes are true positive or false positive. Args: - det_bbox(ndarray): the detected bbox - gt_bboxes(ndarray): ground truth bboxes of this image - gt_ignore(ndarray): indicate if gts are ignored for evaluation or not - default_iou_thr(float): the iou thresholds for medium and large bboxes - area_ranges(list or None): gt bbox area ranges + det_bbox (ndarray): the detected bbox + gt_bboxes (ndarray): ground truth bboxes of this image + gt_ignore (ndarray): indicate if gts are ignored for evaluation or not + default_iou_thr (float): the iou thresholds for medium and large bboxes + area_ranges (list or None): gt bbox area ranges Returns: tuple: two arrays (tp, fp) whose elements are 0 and 1 @@ -115,10 +115,10 @@ def tpfp_imagenet(det_bboxes, max_iou = ious[i, j] matched_gt = j # there are 4 cases for a det bbox: - # 1. this det bbox matches a gt, tp = 1, fp = 0 - # 2. this det bbox matches an ignored gt, tp = 0, fp = 0 - # 3. this det bbox matches no gt and within area range, tp = 0, fp = 1 - # 4. this det bbox matches no gt but is beyond area range, tp = 0, fp = 0 + # 1. it matches a gt, tp = 1, fp = 0 + # 2. it matches an ignored gt, tp = 0, fp = 0 + # 3. it matches no gt and within area range, tp = 0, fp = 1 + # 4. it matches no gt but is beyond area range, tp = 0, fp = 0 if matched_gt >= 0: gt_covered[matched_gt] = 1 if not (gt_ignore[matched_gt] or gt_area_ignore[matched_gt]): @@ -137,10 +137,10 @@ def tpfp_default(det_bboxes, gt_bboxes, gt_ignore, iou_thr, area_ranges=None): """Check if detected bboxes are true positive or false positive. Args: - det_bbox(ndarray): the detected bbox - gt_bboxes(ndarray): ground truth bboxes of this image - gt_ignore(ndarray): indicate if gts are ignored for evaluation or not - iou_thr(float): the iou thresholds + det_bbox (ndarray): the detected bbox + gt_bboxes (ndarray): ground truth bboxes of this image + gt_ignore (ndarray): indicate if gts are ignored for evaluation or not + iou_thr (float): the iou thresholds Returns: tuple: (tp, fp), two arrays whose elements are 0 and 1 @@ -227,15 +227,16 @@ def eval_map(det_results, """Evaluate mAP of a dataset. Args: - det_results(list): a list of list, [[cls1_det, cls2_det, ...], ...] - gt_bboxes(list): ground truth bboxes of each image, a list of K*4 array - gt_labels(list): ground truth labels of each image, a list of K array - gt_ignore(list): gt ignore indicators of each image, a list of K array - scale_ranges(list, optional): [(min1, max1), (min2, max2), ...] - iou_thr(float): IoU threshold - dataset(None or str): dataset name, there are minor differences in + det_results (list): a list of list, [[cls1_det, cls2_det, ...], ...] + gt_bboxes (list): ground truth bboxes of each image, a list of K*4 + array. + gt_labels (list): ground truth labels of each image, a list of K array + gt_ignore (list): gt ignore indicators of each image, a list of K array + scale_ranges (list, optional): [(min1, max1), (min2, max2), ...] + iou_thr (float): IoU threshold + dataset (None or str): dataset name, there are minor differences in metrics for different datsets, e.g. "voc07", "imagenet_det", etc. - print_summary(bool): whether to print the mAP summary + print_summary (bool): whether to print the mAP summary Returns: tuple: (mAP, [dict, dict, ...]) @@ -265,7 +266,8 @@ def eval_map(det_results, area_ranges) for j in range(len(cls_dets)) ] tp, fp = tuple(zip(*tpfp)) - # calculate gt number of each scale, gts ignored or beyond scale are not counted + # calculate gt number of each scale, gts ignored or beyond scale + # are not counted num_gts = np.zeros(num_scales, dtype=int) for j, bbox in enumerate(cls_gts): if area_ranges is None: diff --git a/mmdet/core/losses/losses.py b/mmdet/core/losses/losses.py index 4f183e13d8a..d0e642f807c 100644 --- a/mmdet/core/losses/losses.py +++ b/mmdet/core/losses/losses.py @@ -30,13 +30,13 @@ def sigmoid_focal_loss(pred, weight, gamma=2.0, alpha=0.25, - size_average=True): + reduction='elementwise_mean'): pred_sigmoid = pred.sigmoid() pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) weight = (alpha * target + (1 - alpha) * (1 - target)) * weight weight = weight * pt.pow(gamma) return F.binary_cross_entropy_with_logits( - pred, target, weight, size_average=size_average) + pred, target, weight, size_average=reduction) def weighted_sigmoid_focal_loss(pred, diff --git a/mmdet/core/rpn_ops/anchor_target.py b/mmdet/core/rpn_ops/anchor_target.py index 3cf651b5c46..f449507499e 100644 --- a/mmdet/core/rpn_ops/anchor_target.py +++ b/mmdet/core/rpn_ops/anchor_target.py @@ -1,6 +1,6 @@ import torch -from ..bbox_ops import bbox_assign, bbox_transform, bbox_sampling +from ..bbox_ops import bbox_assign, bbox2delta, bbox_sampling from ..utils import multi_apply @@ -99,8 +99,8 @@ def anchor_target_single(flat_anchors, valid_flags, gt_bboxes, img_meta, if len(pos_inds) > 0: pos_anchors = anchors[pos_inds, :] pos_gt_bbox = gt_bboxes[assigned_gt_inds[pos_inds] - 1, :] - pos_bbox_targets = bbox_transform(pos_anchors, pos_gt_bbox, - target_means, target_stds) + pos_bbox_targets = bbox2delta(pos_anchors, pos_gt_bbox, target_means, + target_stds) bbox_targets[pos_inds, :] = pos_bbox_targets bbox_weights[pos_inds, :] = 1.0 labels[pos_inds] = 1 diff --git a/mmdet/models/bbox_heads/bbox_head.py b/mmdet/models/bbox_heads/bbox_head.py index 941903aba54..67dba039592 100644 --- a/mmdet/models/bbox_heads/bbox_head.py +++ b/mmdet/models/bbox_heads/bbox_head.py @@ -1,7 +1,7 @@ import torch.nn as nn import torch.nn.functional as F -from mmdet.core import (bbox_transform_inv, multiclass_nms, bbox_target, +from mmdet.core import (delta2bbox, multiclass_nms, bbox_target, weighted_cross_entropy, weighted_smoothl1, accuracy) @@ -101,9 +101,8 @@ def get_det_bboxes(self, scores = F.softmax(cls_score, dim=1) if cls_score is not None else None if bbox_pred is not None: - bboxes = bbox_transform_inv(rois[:, 1:], bbox_pred, - self.target_means, self.target_stds, - img_shape) + bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means, + self.target_stds, img_shape) else: bboxes = rois[:, 1:] # TODO: add clip here diff --git a/mmdet/models/roi_extractors/single_level.py b/mmdet/models/roi_extractors/single_level.py index b850d6ab680..6aa29e598e5 100644 --- a/mmdet/models/roi_extractors/single_level.py +++ b/mmdet/models/roi_extractors/single_level.py @@ -41,10 +41,10 @@ def build_roi_layers(self, layer_cfg, featmap_strides): def map_roi_levels(self, rois, num_levels): """Map rois to corresponding feature levels (0-based) by scales. - scale < finest_scale: level 0 - finest_scale <= scale < finest_scale * 2: level 1 - finest_scale * 2 <= scale < finest_scale * 4: level 2 - scale >= finest_scale * 4: level 3 + - scale < finest_scale: level 0 + - finest_scale <= scale < finest_scale * 2: level 1 + - finest_scale * 2 <= scale < finest_scale * 4: level 2 + - scale >= finest_scale * 4: level 3 """ scale = torch.sqrt( (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1)) @@ -52,12 +52,13 @@ def map_roi_levels(self, rois, num_levels): target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long() return target_lvls - def sample_proposals(self, proposals, gt_bboxes, gt_crowds, gt_labels, - cfg): + def sample_proposals(self, proposals, gt_bboxes, gt_bboxes_ignore, + gt_labels, cfg): proposals = proposals[:, :4] assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps = \ - bbox_assign(proposals, gt_bboxes, gt_crowds, gt_labels, - cfg.pos_iou_thr, cfg.neg_iou_thr, cfg.min_pos_iou, cfg.crowd_thr) + bbox_assign(proposals, gt_bboxes, gt_bboxes_ignore, gt_labels, + cfg.pos_iou_thr, cfg.neg_iou_thr, cfg.min_pos_iou, + cfg.crowd_thr) if cfg.add_gt_as_proposals: proposals = torch.cat([gt_bboxes, proposals], dim=0) @@ -80,7 +81,8 @@ def sample_proposals(self, proposals, gt_bboxes, gt_crowds, gt_labels, pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :] pos_gt_labels = assigned_labels[pos_inds] - return (pos_proposals, neg_proposals, pos_assigned_gt_inds, pos_gt_bboxes, pos_gt_labels) + return (pos_proposals, neg_proposals, pos_assigned_gt_inds, + pos_gt_bboxes, pos_gt_labels) def forward(self, feats, rois): """Extract roi features with the roi layer. If multiple feature levels diff --git a/mmdet/models/rpn_heads/rpn_head.py b/mmdet/models/rpn_heads/rpn_head.py index 68a81833e09..e67d7ae973f 100644 --- a/mmdet/models/rpn_heads/rpn_head.py +++ b/mmdet/models/rpn_heads/rpn_head.py @@ -5,7 +5,7 @@ import torch.nn as nn import torch.nn.functional as F -from mmdet.core import (AnchorGenerator, anchor_target, bbox_transform_inv, +from mmdet.core import (AnchorGenerator, anchor_target, delta2bbox, multi_apply, weighted_cross_entropy, weighted_smoothl1, weighted_binary_cross_entropy) from mmdet.ops import nms @@ -225,9 +225,8 @@ def _get_proposals_single(self, rpn_cls_scores, rpn_bbox_preds, rpn_bbox_pred = rpn_bbox_pred[order, :] anchors = anchors[order, :] scores = scores[order] - proposals = bbox_transform_inv(anchors, rpn_bbox_pred, - self.target_means, self.target_stds, - img_shape) + proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means, + self.target_stds, img_shape) w = proposals[:, 2] - proposals[:, 0] + 1 h = proposals[:, 3] - proposals[:, 1] + 1 valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & From d543084c03df443cc9487be7109b920f86cf79dc Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Fri, 5 Oct 2018 16:43:15 +0800 Subject: [PATCH 39/81] add with_* decorators for detector --- mmdet/models/detectors/base.py | 12 ++++++++++++ mmdet/models/detectors/rpn.py | 4 ++-- mmdet/models/detectors/two_stage.py | 14 +++++++------- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/mmdet/models/detectors/base.py b/mmdet/models/detectors/base.py index 93a05c8594e..d1b0fce1283 100644 --- a/mmdet/models/detectors/base.py +++ b/mmdet/models/detectors/base.py @@ -17,6 +17,18 @@ class BaseDetector(nn.Module): def __init__(self): super(BaseDetector, self).__init__() + @property + def with_neck(self): + return hasattr(self, 'neck') and self.neck is not None + + @property + def with_bbox(self): + return hasattr(self, 'bbox_head') and self.bbox_head is not None + + @property + def with_mask(self): + return hasattr(self, 'mask_head') and self.mask_head is not None + @abstractmethod def extract_feat(self, imgs): pass diff --git a/mmdet/models/detectors/rpn.py b/mmdet/models/detectors/rpn.py index a291006fdd5..9d700fe3e3c 100644 --- a/mmdet/models/detectors/rpn.py +++ b/mmdet/models/detectors/rpn.py @@ -26,13 +26,13 @@ def __init__(self, def init_weights(self, pretrained=None): super(RPN, self).init_weights(pretrained) self.backbone.init_weights(pretrained=pretrained) - if self.neck is not None: + if self.with_neck: self.neck.init_weights() self.rpn_head.init_weights() def extract_feat(self, img): x = self.backbone(img) - if self.neck is not None: + if self.with_neck: x = self.neck(x) return x diff --git a/mmdet/models/detectors/two_stage.py b/mmdet/models/detectors/two_stage.py index 4bcb1855a95..ad78a8254ca 100644 --- a/mmdet/models/detectors/two_stage.py +++ b/mmdet/models/detectors/two_stage.py @@ -25,23 +25,19 @@ def __init__(self, self.backbone = builder.build_backbone(backbone) if neck is not None: - self.with_neck = True self.neck = builder.build_neck(neck) else: raise NotImplementedError - self.with_rpn = True if rpn_head is not None else False - if self.with_rpn: + if rpn_head is not None: self.rpn_head = builder.build_rpn_head(rpn_head) - self.with_bbox = True if bbox_head is not None else False - if self.with_bbox: + if bbox_head is not None: self.bbox_roi_extractor = builder.build_roi_extractor( bbox_roi_extractor) self.bbox_head = builder.build_bbox_head(bbox_head) - self.with_mask = True if mask_head is not None else False - if self.with_mask: + if mask_head is not None: self.mask_roi_extractor = builder.build_roi_extractor( mask_roi_extractor) self.mask_head = builder.build_mask_head(mask_head) @@ -51,6 +47,10 @@ def __init__(self, self.init_weights(pretrained=pretrained) + @property + def with_rpn(self): + return hasattr(self, 'rpn_head') and self.rpn_head is not None + def init_weights(self, pretrained=None): super(TwoStageDetector, self).init_weights(pretrained) self.backbone.init_weights(pretrained=pretrained) From 1d5754fa84abee266f86d91d1bf1d0b668e7b257 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Fri, 5 Oct 2018 17:13:28 +0800 Subject: [PATCH 40/81] remove useless loss --- mmdet/core/losses/losses.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/mmdet/core/losses/losses.py b/mmdet/core/losses/losses.py index d0e642f807c..14b49f5cb90 100644 --- a/mmdet/core/losses/losses.py +++ b/mmdet/core/losses/losses.py @@ -36,7 +36,7 @@ def sigmoid_focal_loss(pred, weight = (alpha * target + (1 - alpha) * (1 - target)) * weight weight = weight * pt.pow(gamma) return F.binary_cross_entropy_with_logits( - pred, target, weight, size_average=reduction) + pred, target, weight, reduction=reduction) def weighted_sigmoid_focal_loss(pred, @@ -61,16 +61,6 @@ def mask_cross_entropy(pred, target, label): pred_slice, target, reduction='elementwise_mean')[None] -def weighted_mask_cross_entropy(pred, target, weight, label): - num_rois = pred.size()[0] - num_samples = torch.sum(weight > 0).float().item() + 1e-6 - assert num_samples >= 1 - inds = torch.arange(0, num_rois).long().cuda() - pred_slice = pred[inds, label].squeeze(1) - return F.binary_cross_entropy_with_logits( - pred_slice, target, weight, size_average=False)[None] / num_samples - - def smooth_l1_loss(pred, target, beta=1.0, reduction='elementwise_mean'): assert beta > 0 assert pred.size() == target.size() and target.numel() > 0 From 3570ecd3cc0ab2f703ca4661eed6033e81905491 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Fri, 5 Oct 2018 17:17:48 +0800 Subject: [PATCH 41/81] bug fix --- mmdet/core/losses/__init__.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/mmdet/core/losses/__init__.py b/mmdet/core/losses/__init__.py index 3e4447ff0a6..661f0d64266 100644 --- a/mmdet/core/losses/__init__.py +++ b/mmdet/core/losses/__init__.py @@ -1,12 +1,11 @@ -from .losses import ( - weighted_nll_loss, weighted_cross_entropy, weighted_binary_cross_entropy, - sigmoid_focal_loss, weighted_sigmoid_focal_loss, mask_cross_entropy, - weighted_mask_cross_entropy, smooth_l1_loss, weighted_smoothl1, accuracy) +from .losses import (weighted_nll_loss, weighted_cross_entropy, + weighted_binary_cross_entropy, sigmoid_focal_loss, + weighted_sigmoid_focal_loss, mask_cross_entropy, + smooth_l1_loss, weighted_smoothl1, accuracy) __all__ = [ 'weighted_nll_loss', 'weighted_cross_entropy', 'weighted_binary_cross_entropy', 'sigmoid_focal_loss', - 'weighted_sigmoid_focal_loss', 'mask_cross_entropy', - 'weighted_mask_cross_entropy', 'smooth_l1_loss', 'weighted_smoothl1', - 'accuracy' + 'weighted_sigmoid_focal_loss', 'mask_cross_entropy', 'smooth_l1_loss', + 'weighted_smoothl1', 'accuracy' ] From b9ade8c69707a18dd72a7a28cda63fcf0e28964e Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Fri, 5 Oct 2018 20:23:50 +0800 Subject: [PATCH 42/81] vis results for Mask RCNN and update test thr from 0.001 to 0.05 --- mmdet/models/detectors/mask_rcnn.py | 27 +++++++++++++++++---------- tools/configs/r50_fpn_frcnn_1x.py | 2 +- tools/configs/r50_fpn_maskrcnn_1x.py | 2 +- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/mmdet/models/detectors/mask_rcnn.py b/mmdet/models/detectors/mask_rcnn.py index 684598aa013..25a363e398f 100644 --- a/mmdet/models/detectors/mask_rcnn.py +++ b/mmdet/models/detectors/mask_rcnn.py @@ -15,13 +15,20 @@ def __init__(self, test_cfg, pretrained=None): super(MaskRCNN, self).__init__( - backbone=backbone, - neck=neck, - rpn_head=rpn_head, - bbox_roi_extractor=bbox_roi_extractor, - bbox_head=bbox_head, - mask_roi_extractor=mask_roi_extractor, - mask_head=mask_head, - train_cfg=train_cfg, - test_cfg=test_cfg, - pretrained=pretrained) + backbone=backbone, + neck=neck, + rpn_head=rpn_head, + bbox_roi_extractor=bbox_roi_extractor, + bbox_head=bbox_head, + mask_roi_extractor=mask_roi_extractor, + mask_head=mask_head, + train_cfg=train_cfg, + test_cfg=test_cfg, + pretrained=pretrained) + + def show_result(self, data, result, img_norm_cfg, **kwargs): + # TODO: show segmentation masks + assert isinstance(result, tuple) + assert len(result) == 2 # (bbox_results, segm_results) + super(MaskRCNN, self).show_result(data, result[0], img_norm_cfg, + **kwargs) diff --git a/tools/configs/r50_fpn_frcnn_1x.py b/tools/configs/r50_fpn_frcnn_1x.py index 82082df0087..6ab3dbc3617 100644 --- a/tools/configs/r50_fpn_frcnn_1x.py +++ b/tools/configs/r50_fpn_frcnn_1x.py @@ -76,7 +76,7 @@ max_num=2000, nms_thr=0.7, min_bbox_size=0), - rcnn=dict(score_thr=1e-3, max_per_img=100, nms_thr=0.5)) + rcnn=dict(score_thr=0.05, max_per_img=100, nms_thr=0.5)) # dataset settings dataset_type = 'CocoDataset' data_root = '../data/coco/' diff --git a/tools/configs/r50_fpn_maskrcnn_1x.py b/tools/configs/r50_fpn_maskrcnn_1x.py index ad618573299..677176c56b6 100644 --- a/tools/configs/r50_fpn_maskrcnn_1x.py +++ b/tools/configs/r50_fpn_maskrcnn_1x.py @@ -89,7 +89,7 @@ nms_thr=0.7, min_bbox_size=0), rcnn=dict( - score_thr=1e-3, max_per_img=100, nms_thr=0.5, mask_thr_binary=0.5)) + score_thr=0.05, max_per_img=100, nms_thr=0.5, mask_thr_binary=0.5)) # dataset settings dataset_type = 'CocoDataset' data_root = '../data/coco/' From c086822d92203fc56021b16855289f06f96fe40e Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Sun, 7 Oct 2018 14:19:00 +0800 Subject: [PATCH 43/81] minor fix --- tools/configs/r50_fpn_frcnn_1x.py | 2 +- tools/configs/r50_fpn_maskrcnn_1x.py | 4 ++-- tools/train.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/configs/r50_fpn_frcnn_1x.py b/tools/configs/r50_fpn_frcnn_1x.py index 6ab3dbc3617..09167dd09b1 100644 --- a/tools/configs/r50_fpn_frcnn_1x.py +++ b/tools/configs/r50_fpn_frcnn_1x.py @@ -131,7 +131,7 @@ checkpoint_config = dict(interval=1) # yapf:disable log_config = dict( - interval=20, + interval=50, hooks=[ dict(type='TextLoggerHook'), # dict(type='TensorboardLoggerHook', log_dir=work_dir + '/log') diff --git a/tools/configs/r50_fpn_maskrcnn_1x.py b/tools/configs/r50_fpn_maskrcnn_1x.py index 677176c56b6..35dab5633c9 100644 --- a/tools/configs/r50_fpn_maskrcnn_1x.py +++ b/tools/configs/r50_fpn_maskrcnn_1x.py @@ -144,10 +144,10 @@ checkpoint_config = dict(interval=1) # yapf:disable log_config = dict( - interval=20, + interval=50, hooks=[ dict(type='TextLoggerHook'), - # ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')), + # dict(type='TensorboardLoggerHook', log_dir=work_dir + '/log') ]) # yapf:enable # runtime settings diff --git a/tools/train.py b/tools/train.py index 1c573515acb..f60b5c0bffd 100644 --- a/tools/train.py +++ b/tools/train.py @@ -90,8 +90,8 @@ def main(): cfg.work_dir = args.work_dir cfg.gpus = args.gpus # add mmdet version to checkpoint as meta data - cfg.checkpoint_config.meta = dict(mmdet_version=__version__) - cfg.checkpoint_config.config = cfg.text + cfg.checkpoint_config.meta = dict( + mmdet_version=__version__, config=cfg.text) logger = get_logger(cfg.log_level) From f2705100c8f9e83b7155fb838cf27be9f0ba1889 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Sun, 7 Oct 2018 15:36:02 +0800 Subject: [PATCH 44/81] renaming and refactoring for bbox methods --- mmdet/core/bbox_ops/__init__.py | 11 +- mmdet/core/bbox_ops/sampling.py | 129 ++++++++++++++++---- mmdet/models/detectors/two_stage.py | 13 +- mmdet/models/roi_extractors/__init__.py | 4 +- mmdet/models/roi_extractors/single_level.py | 64 ++++------ tools/configs/r50_fpn_frcnn_1x.py | 2 +- tools/configs/r50_fpn_maskrcnn_1x.py | 4 +- 7 files changed, 143 insertions(+), 84 deletions(-) diff --git a/mmdet/core/bbox_ops/__init__.py b/mmdet/core/bbox_ops/__init__.py index 22163f75ef5..a5c21dce52f 100644 --- a/mmdet/core/bbox_ops/__init__.py +++ b/mmdet/core/bbox_ops/__init__.py @@ -1,14 +1,15 @@ from .geometry import bbox_overlaps from .sampling import (random_choice, bbox_assign, bbox_assign_wrt_overlaps, - bbox_sampling, sample_positives, sample_negatives) + bbox_sampling, bbox_sampling_pos, bbox_sampling_neg, + sample_bboxes) from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping, bbox_mapping_back, bbox2roi, roi2bbox, bbox2result) from .bbox_target import bbox_target __all__ = [ 'bbox_overlaps', 'random_choice', 'bbox_assign', - 'bbox_assign_wrt_overlaps', 'bbox_sampling', 'sample_positives', - 'sample_negatives', 'bbox2delta', 'delta2bbox', 'bbox_flip', - 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', - 'bbox_target' + 'bbox_assign_wrt_overlaps', 'bbox_sampling', 'bbox_sampling_pos', + 'bbox_sampling_neg', 'sample_bboxes', 'bbox2delta', 'delta2bbox', + 'bbox_flip', 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', + 'bbox2result', 'bbox_target' ] diff --git a/mmdet/core/bbox_ops/sampling.py b/mmdet/core/bbox_ops/sampling.py index 28043182acf..80f8c8207cc 100644 --- a/mmdet/core/bbox_ops/sampling.py +++ b/mmdet/core/bbox_ops/sampling.py @@ -78,27 +78,32 @@ def bbox_assign_wrt_overlaps(overlaps, pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=.0): - """Assign a corresponding gt bbox or background to each proposal/anchor - This function assign a gt bbox to every proposal, each proposals will be + """Assign a corresponding gt bbox or background to each proposal/anchor. + + This method assign a gt bbox to every proposal, each proposals will be assigned with -1, 0, or a positive number. -1 means don't care, 0 means negative sample, positive number is the index (1-based) of assigned gt. The assignment is done in following steps, the order matters: + 1. assign every anchor to -1 2. assign proposals whose iou with all gts < neg_iou_thr to 0 3. for each anchor, if the iou with its nearest gt >= pos_iou_thr, assign it to that bbox 4. for each gt bbox, assign its nearest proposals(may be more than one) to itself + Args: - overlaps(Tensor): overlaps between n proposals and k gt_bboxes, shape(n, k) - gt_labels(Tensor, optional): shape (k, ) - pos_iou_thr(float): iou threshold for positive bboxes - neg_iou_thr(float or tuple): iou threshold for negative bboxes - min_pos_iou(float): minimum iou for a bbox to be considered as a positive bbox, - for RPN, it is usually set as 0, for Fast R-CNN, - it is usually set as pos_iou_thr + overlaps (Tensor): Overlaps between n proposals and k gt_bboxes, + shape(n, k). + gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ). + pos_iou_thr (float): IoU threshold for positive bboxes. + neg_iou_thr (float or tuple): IoU threshold for negative bboxes. + min_pos_iou (float): Minimum IoU for a bbox to be considered as a + positive bbox. This argument only affects the 4th step. + Returns: - tuple: (assigned_gt_inds, argmax_overlaps, max_overlaps), shape (n, ) + tuple: (assigned_gt_inds, [assigned_labels], argmax_overlaps, + max_overlaps), shape (n, ) """ num_bboxes, num_gts = overlaps.size(0), overlaps.size(1) # 1. assign -1 by default @@ -144,8 +149,9 @@ def bbox_assign_wrt_overlaps(overlaps, return assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps -def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True): - """Balance sampling for positive bboxes/anchors +def bbox_sampling_pos(assigned_gt_inds, num_expected, balance_sampling=True): + """Balance sampling for positive bboxes/anchors. + 1. calculate average positive num for each gt: num_per_gt 2. sample at most num_per_gt positives for each gt 3. random sampling from rest anchors if not enough fg @@ -186,15 +192,16 @@ def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True): return sampled_inds -def sample_negatives(assigned_gt_inds, - num_expected, - max_overlaps=None, - balance_thr=0, - hard_fraction=0.5): - """Balance sampling for negative bboxes/anchors - negative samples are split into 2 set: hard(balance_thr <= iou < neg_iou_thr) - and easy(iou < balance_thr), around equal number of bg are sampled - from each set. +def bbox_sampling_neg(assigned_gt_inds, + num_expected, + max_overlaps=None, + balance_thr=0, + hard_fraction=0.5): + """Balance sampling for negative bboxes/anchors. + + Negative samples are split into 2 set: hard (balance_thr <= iou < + neg_iou_thr) and easy(iou < balance_thr). The sampling ratio is controlled + by `hard_fraction`. """ neg_inds = torch.nonzero(assigned_gt_inds == 0) if neg_inds.numel() != 0: @@ -247,17 +254,87 @@ def bbox_sampling(assigned_gt_inds, max_overlaps=None, neg_balance_thr=0, neg_hard_fraction=0.5): + """Sample positive and negative bboxes given assigned results. + + Args: + assigned_gt_inds (Tensor): Assigned gt indices for each bbox. + num_expected (int): Expected total samples (pos and neg). + pos_fraction (float): Positive sample fraction. + neg_pos_ub (float): Negative/Positive upper bound. + pos_balance_sampling(bool): Whether to sample positive samples around + each gt bbox evenly. + max_overlaps (Tensor, optional): For each bbox, the max IoU of all gts. + Used for negative balance sampling only. + neg_balance_thr (float, optional): IoU threshold for simple/hard + negative balance sampling. + neg_hard_fraction (float, optional): Fraction of hard negative samples + for negative balance sampling. + + Returns: + tuple[Tensor]: positive bbox indices, negative bbox indices. + """ num_expected_pos = int(num_expected * pos_fraction) - pos_inds = sample_positives(assigned_gt_inds, num_expected_pos, - pos_balance_sampling) + pos_inds = bbox_sampling_pos(assigned_gt_inds, num_expected_pos, + pos_balance_sampling) + # We found that sampled indices have duplicated items occasionally. + # (mab be a bug of PyTorch) pos_inds = pos_inds.unique() num_sampled_pos = pos_inds.numel() num_neg_max = int( neg_pos_ub * num_sampled_pos) if num_sampled_pos > 0 else int(neg_pos_ub) num_expected_neg = min(num_neg_max, num_expected - num_sampled_pos) - neg_inds = sample_negatives(assigned_gt_inds, num_expected_neg, - max_overlaps, neg_balance_thr, - neg_hard_fraction) + neg_inds = bbox_sampling_neg(assigned_gt_inds, num_expected_neg, + max_overlaps, neg_balance_thr, + neg_hard_fraction) neg_inds = neg_inds.unique() return pos_inds, neg_inds + + +def sample_bboxes(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg): + """Sample positive and negative bboxes. + + This is a simple implementation of bbox sampling given candidates and + ground truth bboxes, which includes 3 steps. + + 1. Assign gt to each bbox. + 2. Add gt bboxes to the sampling pool (optional). + 3. Perform positive and negative sampling. + + Args: + bboxes (Tensor): Boxes to be sampled from. + gt_bboxes (Tensor): Ground truth bboxes. + gt_bboxes_ignore (Tensor): Ignored ground truth bboxes. In MS COCO, + `crowd` bboxes are considered as ignored. + gt_labels (Tensor): Class labels of ground truth bboxes. + cfg (dict): Sampling configs. + + Returns: + tuple[Tensor]: pos_bboxes, neg_bboxes, pos_assigned_gt_inds, + pos_gt_bboxes, pos_gt_labels + """ + bboxes = bboxes[:, :4] + assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps = \ + bbox_assign(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, + cfg.pos_iou_thr, cfg.neg_iou_thr, cfg.min_pos_iou, + cfg.crowd_thr) + + if cfg.add_gt_as_proposals: + bboxes = torch.cat([gt_bboxes, bboxes], dim=0) + gt_assign_self = torch.arange( + 1, len(gt_labels) + 1, dtype=torch.long, device=bboxes.device) + assigned_gt_inds = torch.cat([gt_assign_self, assigned_gt_inds]) + assigned_labels = torch.cat([gt_labels, assigned_labels]) + + pos_inds, neg_inds = bbox_sampling( + assigned_gt_inds, cfg.roi_batch_size, cfg.pos_fraction, cfg.neg_pos_ub, + cfg.pos_balance_sampling, max_overlaps, cfg.neg_balance_thr) + + pos_bboxes = bboxes[pos_inds] + neg_bboxes = bboxes[neg_inds] + pos_assigned_gt_inds = assigned_gt_inds[pos_inds] - 1 + pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :] + pos_gt_labels = assigned_labels[pos_inds] + + return (pos_bboxes, neg_bboxes, pos_assigned_gt_inds, pos_gt_bboxes, + pos_gt_labels) diff --git a/mmdet/models/detectors/two_stage.py b/mmdet/models/detectors/two_stage.py index f69db22ced9..8573d83215f 100644 --- a/mmdet/models/detectors/two_stage.py +++ b/mmdet/models/detectors/two_stage.py @@ -4,7 +4,7 @@ from .base import BaseDetector from .test_mixins import RPNTestMixin, BBoxTestMixin, MaskTestMixin from .. import builder -from mmdet.core import bbox2roi, bbox2result, multi_apply +from mmdet.core import sample_bboxes, bbox2roi, bbox2result, multi_apply class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin, @@ -97,13 +97,14 @@ def forward_train(self, proposal_list = proposals if self.with_bbox: - rcnn_train_cfg_list = [ - self.train_cfg.rcnn for _ in range(len(proposal_list)) - ] (pos_proposals, neg_proposals, pos_assigned_gt_inds, pos_gt_bboxes, pos_gt_labels) = multi_apply( - self.bbox_roi_extractor.sample_proposals, proposal_list, - gt_bboxes, gt_bboxes_ignore, gt_labels, rcnn_train_cfg_list) + sample_bboxes, + proposal_list, + gt_bboxes, + gt_bboxes_ignore, + gt_labels, + cfg=self.train_cfg.rcnn) (labels, label_weights, bbox_targets, bbox_weights) = self.bbox_head.get_bbox_target( pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels, diff --git a/mmdet/models/roi_extractors/__init__.py b/mmdet/models/roi_extractors/__init__.py index e76e689753f..9161708ce13 100644 --- a/mmdet/models/roi_extractors/__init__.py +++ b/mmdet/models/roi_extractors/__init__.py @@ -1,3 +1,3 @@ -from .single_level import SingleLevelRoI +from .single_level import SingleRoIExtractor -__all__ = ['SingleLevelRoI'] +__all__ = ['SingleRoIExtractor'] diff --git a/mmdet/models/roi_extractors/single_level.py b/mmdet/models/roi_extractors/single_level.py index 6aa29e598e5..3f97a631f98 100644 --- a/mmdet/models/roi_extractors/single_level.py +++ b/mmdet/models/roi_extractors/single_level.py @@ -4,19 +4,27 @@ import torch.nn as nn from mmdet import ops -from mmdet.core import bbox_assign, bbox_sampling -class SingleLevelRoI(nn.Module): - """Extract RoI features from a single level feature map. Each RoI is - mapped to a level according to its scale.""" +class SingleRoIExtractor(nn.Module): + """Extract RoI features from a single level feature map. + + If there are mulitple input feature levels, each RoI is mapped to a level + according to its scale. + + Args: + roi_layer (dict): Specify RoI layer type and arguments. + out_channels (int): Output channels of RoI layers. + featmap_strides (int): Strides of input feature maps. + finest_scale (int): Scale threshold of mapping to level 0. + """ def __init__(self, roi_layer, out_channels, featmap_strides, finest_scale=56): - super(SingleLevelRoI, self).__init__() + super(SingleRoIExtractor, self).__init__() self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides) self.out_channels = out_channels self.featmap_strides = featmap_strides @@ -24,6 +32,7 @@ def __init__(self, @property def num_inputs(self): + """int: Input feature map levels.""" return len(self.featmap_strides) def init_weights(self): @@ -39,12 +48,19 @@ def build_roi_layers(self, layer_cfg, featmap_strides): return roi_layers def map_roi_levels(self, rois, num_levels): - """Map rois to corresponding feature levels (0-based) by scales. + """Map rois to corresponding feature levels by scales. - scale < finest_scale: level 0 - finest_scale <= scale < finest_scale * 2: level 1 - finest_scale * 2 <= scale < finest_scale * 4: level 2 - scale >= finest_scale * 4: level 3 + + Args: + rois (Tensor): Input RoIs, shape (k, 5). + num_levels (int): Total level number. + + Returns: + Tensor: Level index (0-based) of each RoI, shape (k, ) """ scale = torch.sqrt( (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1)) @@ -52,43 +68,7 @@ def map_roi_levels(self, rois, num_levels): target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long() return target_lvls - def sample_proposals(self, proposals, gt_bboxes, gt_bboxes_ignore, - gt_labels, cfg): - proposals = proposals[:, :4] - assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps = \ - bbox_assign(proposals, gt_bboxes, gt_bboxes_ignore, gt_labels, - cfg.pos_iou_thr, cfg.neg_iou_thr, cfg.min_pos_iou, - cfg.crowd_thr) - - if cfg.add_gt_as_proposals: - proposals = torch.cat([gt_bboxes, proposals], dim=0) - gt_assign_self = torch.arange( - 1, - len(gt_labels) + 1, - dtype=torch.long, - device=proposals.device) - assigned_gt_inds = torch.cat([gt_assign_self, assigned_gt_inds]) - assigned_labels = torch.cat([gt_labels, assigned_labels]) - - pos_inds, neg_inds = bbox_sampling( - assigned_gt_inds, cfg.roi_batch_size, cfg.pos_fraction, - cfg.neg_pos_ub, cfg.pos_balance_sampling, max_overlaps, - cfg.neg_balance_thr) - - pos_proposals = proposals[pos_inds] - neg_proposals = proposals[neg_inds] - pos_assigned_gt_inds = assigned_gt_inds[pos_inds] - 1 - pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :] - pos_gt_labels = assigned_labels[pos_inds] - - return (pos_proposals, neg_proposals, pos_assigned_gt_inds, - pos_gt_bboxes, pos_gt_labels) - def forward(self, feats, rois): - """Extract roi features with the roi layer. If multiple feature levels - are used, then rois are mapped to corresponding levels according to - their scales. - """ if len(feats) == 1: return self.roi_layers[0](feats[0], rois) diff --git a/tools/configs/r50_fpn_frcnn_1x.py b/tools/configs/r50_fpn_frcnn_1x.py index 09167dd09b1..044c654ffa2 100644 --- a/tools/configs/r50_fpn_frcnn_1x.py +++ b/tools/configs/r50_fpn_frcnn_1x.py @@ -25,7 +25,7 @@ target_stds=[1.0, 1.0, 1.0, 1.0], use_sigmoid_cls=True), bbox_roi_extractor=dict( - type='SingleLevelRoI', + type='SingleRoIExtractor', roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), out_channels=256, featmap_strides=[4, 8, 16, 32]), diff --git a/tools/configs/r50_fpn_maskrcnn_1x.py b/tools/configs/r50_fpn_maskrcnn_1x.py index 35dab5633c9..881a7498f62 100644 --- a/tools/configs/r50_fpn_maskrcnn_1x.py +++ b/tools/configs/r50_fpn_maskrcnn_1x.py @@ -25,7 +25,7 @@ target_stds=[1.0, 1.0, 1.0, 1.0], use_sigmoid_cls=True), bbox_roi_extractor=dict( - type='SingleLevelRoI', + type='SingleRoIExtractor', roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), out_channels=256, featmap_strides=[4, 8, 16, 32]), @@ -40,7 +40,7 @@ target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=False), mask_roi_extractor=dict( - type='SingleLevelRoI', + type='SingleRoIExtractor', roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), out_channels=256, featmap_strides=[4, 8, 16, 32]), From 630486ff99225fea61bcd95bc47def7e76bcf685 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Sun, 7 Oct 2018 16:00:30 +0800 Subject: [PATCH 45/81] fix setup.py to include .so files --- setup.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 02dede74779..81dd749f14b 100644 --- a/setup.py +++ b/setup.py @@ -88,7 +88,9 @@ def get_version(): description='Open MMLab Detection Toolbox', long_description=readme(), keywords='computer vision, object detection', + url='https://github.com/open-mmlab/mmdetection', packages=find_packages(), + package_data={'mmdet.ops': ['*/*.so']}, classifiers=[ 'Development Status :: 4 - Beta', 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', @@ -99,10 +101,11 @@ def get_version(): 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', - 'Topic :: Utilities', ], license='GPLv3', setup_requires=['pytest-runner'], tests_require=['pytest'], - install_requires=['numpy', 'matplotlib', 'six', 'terminaltables'], + install_requires=[ + 'numpy', 'matplotlib', 'six', 'terminaltables', 'pycocotools' + ], zip_safe=False) From 1a70c3a5cd0e77c6777f34b625c737994fcbf735 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Sun, 7 Oct 2018 17:08:04 +0800 Subject: [PATCH 46/81] fix flake8 error --- mmdet/__init__.py | 2 ++ mmdet/core/__init__.py | 16 ++++++++-------- mmdet/core/mask_ops/segms.py | 1 + mmdet/core/rpn_ops/__init__.py | 6 ++++-- mmdet/core/utils/dist_utils.py | 3 ++- mmdet/datasets/__init__.py | 9 ++++++++- mmdet/datasets/coco.py | 5 ++++- mmdet/datasets/utils/__init__.py | 4 +++- mmdet/models/__init__.py | 11 +++++++++-- mmdet/models/backbones/__init__.py | 2 ++ mmdet/models/bbox_heads/convfc_bbox_head.py | 16 ++++++++++------ mmdet/models/necks/fpn.py | 3 ++- mmdet/models/utils/__init__.py | 7 +++++-- mmdet/ops/__init__.py | 2 ++ mmdet/ops/nms/__init__.py | 2 ++ mmdet/ops/roi_align/__init__.py | 2 ++ mmdet/ops/roi_align/gradcheck.py | 2 +- mmdet/ops/roi_pool/__init__.py | 2 ++ mmdet/ops/roi_pool/gradcheck.py | 2 +- setup.py | 2 +- tools/test.py | 2 +- tools/train.py | 2 +- 22 files changed, 73 insertions(+), 30 deletions(-) diff --git a/mmdet/__init__.py b/mmdet/__init__.py index 8b5e1ac77ad..1c4f7e8fcc5 100644 --- a/mmdet/__init__.py +++ b/mmdet/__init__.py @@ -1 +1,3 @@ from .version import __version__, short_version + +__all__ = ['__version__', 'short_version'] diff --git a/mmdet/core/__init__.py b/mmdet/core/__init__.py index 1eb03f76acd..81ee7311bcd 100644 --- a/mmdet/core/__init__.py +++ b/mmdet/core/__init__.py @@ -1,8 +1,8 @@ -from .rpn_ops import * -from .bbox_ops import * -from .mask_ops import * -from .losses import * -from .eval import * -from .parallel import * -from .post_processing import * -from .utils import * +from .rpn_ops import * # noqa: F401, F403 +from .bbox_ops import * # noqa: F401, F403 +from .mask_ops import * # noqa: F401, F403 +from .losses import * # noqa: F401, F403 +from .eval import * # noqa: F401, F403 +from .parallel import * # noqa: F401, F403 +from .post_processing import * # noqa: F401, F403 +from .utils import * # noqa: F401, F403 diff --git a/mmdet/core/mask_ops/segms.py b/mmdet/core/mask_ops/segms.py index b2ae6b69a1f..9809aae3a27 100644 --- a/mmdet/core/mask_ops/segms.py +++ b/mmdet/core/mask_ops/segms.py @@ -1,3 +1,4 @@ +# flake8: noqa # This file is copied from Detectron. # Copyright (c) 2017-present, Facebook, Inc. diff --git a/mmdet/core/rpn_ops/__init__.py b/mmdet/core/rpn_ops/__init__.py index 4d5f9244dde..0ff430a4be1 100644 --- a/mmdet/core/rpn_ops/__init__.py +++ b/mmdet/core/rpn_ops/__init__.py @@ -1,2 +1,4 @@ -from .anchor_generator import * -from .anchor_target import * +from .anchor_generator import AnchorGenerator +from .anchor_target import anchor_target + +__all__ = ['AnchorGenerator', 'anchor_target'] diff --git a/mmdet/core/utils/dist_utils.py b/mmdet/core/utils/dist_utils.py index fc102c60d4e..e0361f9e41a 100644 --- a/mmdet/core/utils/dist_utils.py +++ b/mmdet/core/utils/dist_utils.py @@ -38,7 +38,8 @@ def _init_dist_slurm(backend, **kwargs): raise NotImplementedError -# modified from https://github.com/NVIDIA/apex/blob/master/apex/parallel/distributed.py#L9 +# modified from +# https://github.com/NVIDIA/apex/blob/master/apex/parallel/distributed.py#L9 def all_reduce_coalesced(tensors): buckets = OrderedDict() for tensor in tensors: diff --git a/mmdet/datasets/__init__.py b/mmdet/datasets/__init__.py index c5ec4e8f9f9..6c3c8e4d77c 100644 --- a/mmdet/datasets/__init__.py +++ b/mmdet/datasets/__init__.py @@ -1,3 +1,10 @@ from .coco import CocoDataset +from .loader import (collate, GroupSampler, DistributedGroupSampler, + build_dataloader) +from .utils import DataContainer, to_tensor, random_scale, show_ann -__all__ = ['CocoDataset'] +__all__ = [ + 'CocoDataset', 'collate', 'GroupSampler', 'DistributedGroupSampler', + 'build_dataloader', 'DataContainer', 'to_tensor', 'random_scale', + 'show_ann' +] diff --git a/mmdet/datasets/coco.py b/mmdet/datasets/coco.py index 63b42b383dc..f5463873e1e 100644 --- a/mmdet/datasets/coco.py +++ b/mmdet/datasets/coco.py @@ -117,7 +117,10 @@ def _parse_ann_info(self, ann_info, with_mask=True): gt_bboxes = [] gt_labels = [] gt_bboxes_ignore = [] - # each mask consists of one or several polys, each poly is a list of float. + # Two formats are provided. + # 1. mask: a binary map of the same size of the image. + # 2. polys: each mask consists of one or several polys, each poly is a + # list of float. if with_mask: gt_masks = [] gt_mask_polys = [] diff --git a/mmdet/datasets/utils/__init__.py b/mmdet/datasets/utils/__init__.py index de3ea43bdf4..4a46a9f97d2 100644 --- a/mmdet/datasets/utils/__init__.py +++ b/mmdet/datasets/utils/__init__.py @@ -1,2 +1,4 @@ from .data_container import DataContainer -from .misc import * +from .misc import to_tensor, random_scale, show_ann + +__all__ = ['DataContainer', 'to_tensor', 'random_scale', 'show_ann'] diff --git a/mmdet/models/__init__.py b/mmdet/models/__init__.py index 07930688e53..aca6399e45e 100644 --- a/mmdet/models/__init__.py +++ b/mmdet/models/__init__.py @@ -1,2 +1,9 @@ -from .detectors import * -from .builder import * +from .detectors import BaseDetector, RPN, FasterRCNN, MaskRCNN +from .builder import (build_neck, build_rpn_head, build_roi_extractor, + build_bbox_head, build_mask_head, build_detector) + +__all__ = [ + 'BaseDetector', 'RPN', 'FasterRCNN', 'MaskRCNN', 'build_backbone', + 'build_neck', 'build_rpn_head', 'build_roi_extractor', 'build_bbox_head', + 'build_mask_head', 'build_detector' +] diff --git a/mmdet/models/backbones/__init__.py b/mmdet/models/backbones/__init__.py index f9e21e83d14..107507ceaf6 100644 --- a/mmdet/models/backbones/__init__.py +++ b/mmdet/models/backbones/__init__.py @@ -1 +1,3 @@ from .resnet import resnet + +__all__ = ['resnet'] diff --git a/mmdet/models/bbox_heads/convfc_bbox_head.py b/mmdet/models/bbox_heads/convfc_bbox_head.py index 02e2a6b6d85..f7bd7f80a9f 100644 --- a/mmdet/models/bbox_heads/convfc_bbox_head.py +++ b/mmdet/models/bbox_heads/convfc_bbox_head.py @@ -43,17 +43,21 @@ def __init__(self, self.fc_out_channels = fc_out_channels # add shared convs and fcs - self.shared_convs, self.shared_fcs, last_layer_dim = self._add_conv_fc_branch( - self.num_shared_convs, self.num_shared_fcs, self.in_channels, True) + self.shared_convs, self.shared_fcs, last_layer_dim = \ + self._add_conv_fc_branch( + self.num_shared_convs, self.num_shared_fcs, self.in_channels, + True) self.shared_out_channels = last_layer_dim # add cls specific branch - self.cls_convs, self.cls_fcs, self.cls_last_dim = self._add_conv_fc_branch( - self.num_cls_convs, self.num_cls_fcs, self.shared_out_channels) + self.cls_convs, self.cls_fcs, self.cls_last_dim = \ + self._add_conv_fc_branch( + self.num_cls_convs, self.num_cls_fcs, self.shared_out_channels) # add reg specific branch - self.reg_convs, self.reg_fcs, self.reg_last_dim = self._add_conv_fc_branch( - self.num_reg_convs, self.num_reg_fcs, self.shared_out_channels) + self.reg_convs, self.reg_fcs, self.reg_last_dim = \ + self._add_conv_fc_branch( + self.num_reg_convs, self.num_reg_fcs, self.shared_out_channels) if self.num_shared_fcs == 0 and not self.with_avg_pool: if self.num_cls_fcs == 0: diff --git a/mmdet/models/necks/fpn.py b/mmdet/models/necks/fpn.py index b4e21864bff..6a256cae364 100644 --- a/mmdet/models/necks/fpn.py +++ b/mmdet/models/necks/fpn.py @@ -111,7 +111,8 @@ def forward(self, inputs): ] # part 2: add extra levels if self.num_outs > len(outs): - # use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN) + # use max pool to get more levels on top of outputs + # (e.g., Faster R-CNN, Mask R-CNN) if not self.add_extra_convs: for i in range(self.num_outs - used_backbone_levels): outs.append(F.max_pool2d(outs[-1], 1, stride=2)) diff --git a/mmdet/models/utils/__init__.py b/mmdet/models/utils/__init__.py index f9c0dac6f53..c759ca9aba1 100644 --- a/mmdet/models/utils/__init__.py +++ b/mmdet/models/utils/__init__.py @@ -1,5 +1,8 @@ from .conv_module import ConvModule from .norm import build_norm_layer -from .weight_init import * +from .weight_init import xavier_init, normal_init, uniform_init, kaiming_init -__all__ = ['ConvModule', 'build_norm_layer'] +__all__ = [ + 'ConvModule', 'build_norm_layer', 'xavier_init', 'normal_init', + 'uniform_init', 'kaiming_init' +] diff --git a/mmdet/ops/__init__.py b/mmdet/ops/__init__.py index 52e5808016c..5b63224c347 100644 --- a/mmdet/ops/__init__.py +++ b/mmdet/ops/__init__.py @@ -1,3 +1,5 @@ from .nms import nms, soft_nms from .roi_align import RoIAlign, roi_align from .roi_pool import RoIPool, roi_pool + +__all__ = ['nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool'] diff --git a/mmdet/ops/nms/__init__.py b/mmdet/ops/nms/__init__.py index 1cf8569b97b..c4407041ad7 100644 --- a/mmdet/ops/nms/__init__.py +++ b/mmdet/ops/nms/__init__.py @@ -1 +1,3 @@ from .nms_wrapper import nms, soft_nms + +__all__ = ['nms', 'soft_nms'] diff --git a/mmdet/ops/roi_align/__init__.py b/mmdet/ops/roi_align/__init__.py index ae27e21d6c7..4cb037904a2 100644 --- a/mmdet/ops/roi_align/__init__.py +++ b/mmdet/ops/roi_align/__init__.py @@ -1,2 +1,4 @@ from .functions.roi_align import roi_align from .modules.roi_align import RoIAlign + +__all__ = ['roi_align', 'RoIAlign'] diff --git a/mmdet/ops/roi_align/gradcheck.py b/mmdet/ops/roi_align/gradcheck.py index e2c51e64bb7..394cd69c506 100644 --- a/mmdet/ops/roi_align/gradcheck.py +++ b/mmdet/ops/roi_align/gradcheck.py @@ -5,7 +5,7 @@ import os.path as osp import sys sys.path.append(osp.abspath(osp.join(__file__, '../../'))) -from roi_align import RoIAlign +from roi_align import RoIAlign # noqa: E402 feat_size = 15 spatial_scale = 1.0 / 8 diff --git a/mmdet/ops/roi_pool/__init__.py b/mmdet/ops/roi_pool/__init__.py index 9c8506d319d..eb2c57eabd6 100644 --- a/mmdet/ops/roi_pool/__init__.py +++ b/mmdet/ops/roi_pool/__init__.py @@ -1,2 +1,4 @@ from .functions.roi_pool import roi_pool from .modules.roi_pool import RoIPool + +__all__ = ['roi_pool', 'RoIPool'] diff --git a/mmdet/ops/roi_pool/gradcheck.py b/mmdet/ops/roi_pool/gradcheck.py index c27d317a03b..c39616086a2 100644 --- a/mmdet/ops/roi_pool/gradcheck.py +++ b/mmdet/ops/roi_pool/gradcheck.py @@ -4,7 +4,7 @@ import os.path as osp import sys sys.path.append(osp.abspath(osp.join(__file__, '../../'))) -from roi_pool import RoIPool +from roi_pool import RoIPool # noqa: E402 feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda() rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55], diff --git a/setup.py b/setup.py index 81dd749f14b..7cb44e538e3 100644 --- a/setup.py +++ b/setup.py @@ -61,7 +61,7 @@ def get_hash(): def write_version_py(): - content = """# GENERATED VERSION FILE + content = """# GENERATED VERSION FILE # TIME: {} __version__ = '{}' diff --git a/tools/test.py b/tools/test.py index f1fb9cda91e..4e2ecd2fd09 100644 --- a/tools/test.py +++ b/tools/test.py @@ -6,7 +6,7 @@ from mmdet import datasets from mmdet.core import scatter, MMDataParallel, results2json, coco_eval -from mmdet.datasets.loader import collate, build_dataloader +from mmdet.datasets import collate, build_dataloader from mmdet.models import build_detector, detectors diff --git a/tools/train.py b/tools/train.py index f60b5c0bffd..b72adebbc19 100644 --- a/tools/train.py +++ b/tools/train.py @@ -13,7 +13,7 @@ from mmdet.core import (init_dist, DistOptimizerHook, DistSamplerSeedHook, MMDataParallel, MMDistributedDataParallel, CocoDistEvalRecallHook, CocoDistEvalmAPHook) -from mmdet.datasets.loader import build_dataloader +from mmdet.datasets import build_dataloader from mmdet.models import build_detector, RPN From d92be9dcaa819023a6179570a512a2e3856499dc Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Sun, 7 Oct 2018 19:37:13 +0800 Subject: [PATCH 47/81] some renaming --- mmdet/core/__init__.py | 2 +- mmdet/core/{rpn_ops => anchor}/__init__.py | 0 mmdet/core/{rpn_ops => anchor}/anchor_generator.py | 0 mmdet/core/{rpn_ops => anchor}/anchor_target.py | 0 mmdet/core/utils/__init__.py | 3 +-- mmdet/core/utils/hooks.py | 11 ----------- 6 files changed, 2 insertions(+), 14 deletions(-) rename mmdet/core/{rpn_ops => anchor}/__init__.py (100%) rename mmdet/core/{rpn_ops => anchor}/anchor_generator.py (100%) rename mmdet/core/{rpn_ops => anchor}/anchor_target.py (100%) delete mode 100644 mmdet/core/utils/hooks.py diff --git a/mmdet/core/__init__.py b/mmdet/core/__init__.py index 81ee7311bcd..20e124bd21f 100644 --- a/mmdet/core/__init__.py +++ b/mmdet/core/__init__.py @@ -1,4 +1,4 @@ -from .rpn_ops import * # noqa: F401, F403 +from .anchor import * # noqa: F401, F403 from .bbox_ops import * # noqa: F401, F403 from .mask_ops import * # noqa: F401, F403 from .losses import * # noqa: F401, F403 diff --git a/mmdet/core/rpn_ops/__init__.py b/mmdet/core/anchor/__init__.py similarity index 100% rename from mmdet/core/rpn_ops/__init__.py rename to mmdet/core/anchor/__init__.py diff --git a/mmdet/core/rpn_ops/anchor_generator.py b/mmdet/core/anchor/anchor_generator.py similarity index 100% rename from mmdet/core/rpn_ops/anchor_generator.py rename to mmdet/core/anchor/anchor_generator.py diff --git a/mmdet/core/rpn_ops/anchor_target.py b/mmdet/core/anchor/anchor_target.py similarity index 100% rename from mmdet/core/rpn_ops/anchor_target.py rename to mmdet/core/anchor/anchor_target.py diff --git a/mmdet/core/utils/__init__.py b/mmdet/core/utils/__init__.py index e04da6a9a5c..9e67c7f47c9 100644 --- a/mmdet/core/utils/__init__.py +++ b/mmdet/core/utils/__init__.py @@ -1,9 +1,8 @@ from .dist_utils import (init_dist, reduce_grads, DistOptimizerHook, DistSamplerSeedHook) -from .hooks import EmptyCacheHook from .misc import tensor2imgs, unmap, multi_apply __all__ = [ 'init_dist', 'reduce_grads', 'DistOptimizerHook', 'DistSamplerSeedHook', - 'EmptyCacheHook', 'tensor2imgs', 'unmap', 'multi_apply' + 'tensor2imgs', 'unmap', 'multi_apply' ] diff --git a/mmdet/core/utils/hooks.py b/mmdet/core/utils/hooks.py deleted file mode 100644 index 7186ad75ccc..00000000000 --- a/mmdet/core/utils/hooks.py +++ /dev/null @@ -1,11 +0,0 @@ -import torch -from mmcv.runner import Hook - - -class EmptyCacheHook(Hook): - - def before_epoch(self, runner): - torch.cuda.empty_cache() - - def after_epoch(self, runner): - torch.cuda.empty_cache() From 24990bca738e8e46dcd891833bfb33f5bf4dd218 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Mon, 8 Oct 2018 13:26:13 +0800 Subject: [PATCH 48/81] rename bbox_ops/mask_ops to bbox/mask --- mmdet/core/__init__.py | 4 ++-- mmdet/core/anchor/anchor_target.py | 2 +- mmdet/core/{bbox_ops => bbox}/__init__.py | 0 mmdet/core/{bbox_ops => bbox}/bbox_target.py | 0 mmdet/core/{bbox_ops => bbox}/geometry.py | 0 mmdet/core/{bbox_ops => bbox}/sampling.py | 9 ++++++--- mmdet/core/{bbox_ops => bbox}/transforms.py | 0 mmdet/core/{mask_ops => mask}/__init__.py | 0 mmdet/core/{mask_ops => mask}/mask_target.py | 0 mmdet/core/{mask_ops => mask}/segms.py | 0 mmdet/core/{mask_ops => mask}/utils.py | 0 mmdet/core/post_processing/merge_augs.py | 2 +- mmdet/datasets/transforms.py | 2 +- 13 files changed, 11 insertions(+), 8 deletions(-) rename mmdet/core/{bbox_ops => bbox}/__init__.py (100%) rename mmdet/core/{bbox_ops => bbox}/bbox_target.py (100%) rename mmdet/core/{bbox_ops => bbox}/geometry.py (100%) rename mmdet/core/{bbox_ops => bbox}/sampling.py (98%) rename mmdet/core/{bbox_ops => bbox}/transforms.py (100%) rename mmdet/core/{mask_ops => mask}/__init__.py (100%) rename mmdet/core/{mask_ops => mask}/mask_target.py (100%) rename mmdet/core/{mask_ops => mask}/segms.py (100%) rename mmdet/core/{mask_ops => mask}/utils.py (100%) diff --git a/mmdet/core/__init__.py b/mmdet/core/__init__.py index 20e124bd21f..05788f1eb91 100644 --- a/mmdet/core/__init__.py +++ b/mmdet/core/__init__.py @@ -1,6 +1,6 @@ from .anchor import * # noqa: F401, F403 -from .bbox_ops import * # noqa: F401, F403 -from .mask_ops import * # noqa: F401, F403 +from .bbox import * # noqa: F401, F403 +from .mask import * # noqa: F401, F403 from .losses import * # noqa: F401, F403 from .eval import * # noqa: F401, F403 from .parallel import * # noqa: F401, F403 diff --git a/mmdet/core/anchor/anchor_target.py b/mmdet/core/anchor/anchor_target.py index f449507499e..ad81e390e6d 100644 --- a/mmdet/core/anchor/anchor_target.py +++ b/mmdet/core/anchor/anchor_target.py @@ -1,6 +1,6 @@ import torch -from ..bbox_ops import bbox_assign, bbox2delta, bbox_sampling +from ..bbox import bbox_assign, bbox2delta, bbox_sampling from ..utils import multi_apply diff --git a/mmdet/core/bbox_ops/__init__.py b/mmdet/core/bbox/__init__.py similarity index 100% rename from mmdet/core/bbox_ops/__init__.py rename to mmdet/core/bbox/__init__.py diff --git a/mmdet/core/bbox_ops/bbox_target.py b/mmdet/core/bbox/bbox_target.py similarity index 100% rename from mmdet/core/bbox_ops/bbox_target.py rename to mmdet/core/bbox/bbox_target.py diff --git a/mmdet/core/bbox_ops/geometry.py b/mmdet/core/bbox/geometry.py similarity index 100% rename from mmdet/core/bbox_ops/geometry.py rename to mmdet/core/bbox/geometry.py diff --git a/mmdet/core/bbox_ops/sampling.py b/mmdet/core/bbox/sampling.py similarity index 98% rename from mmdet/core/bbox_ops/sampling.py rename to mmdet/core/bbox/sampling.py index 80f8c8207cc..976cd9507f2 100644 --- a/mmdet/core/bbox_ops/sampling.py +++ b/mmdet/core/bbox/sampling.py @@ -5,6 +5,11 @@ def random_choice(gallery, num): + """Random select some elements from the gallery. + + It seems that Pytorch's implementation is slower than numpy so we use numpy + to randperm the indices. + """ assert len(gallery) >= num if isinstance(gallery, list): gallery = np.array(gallery) @@ -12,9 +17,7 @@ def random_choice(gallery, num): np.random.shuffle(cands) rand_inds = cands[:num] if not isinstance(gallery, np.ndarray): - rand_inds = torch.from_numpy(rand_inds).long() - if gallery.is_cuda: - rand_inds = rand_inds.cuda(gallery.get_device()) + rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device) return gallery[rand_inds] diff --git a/mmdet/core/bbox_ops/transforms.py b/mmdet/core/bbox/transforms.py similarity index 100% rename from mmdet/core/bbox_ops/transforms.py rename to mmdet/core/bbox/transforms.py diff --git a/mmdet/core/mask_ops/__init__.py b/mmdet/core/mask/__init__.py similarity index 100% rename from mmdet/core/mask_ops/__init__.py rename to mmdet/core/mask/__init__.py diff --git a/mmdet/core/mask_ops/mask_target.py b/mmdet/core/mask/mask_target.py similarity index 100% rename from mmdet/core/mask_ops/mask_target.py rename to mmdet/core/mask/mask_target.py diff --git a/mmdet/core/mask_ops/segms.py b/mmdet/core/mask/segms.py similarity index 100% rename from mmdet/core/mask_ops/segms.py rename to mmdet/core/mask/segms.py diff --git a/mmdet/core/mask_ops/utils.py b/mmdet/core/mask/utils.py similarity index 100% rename from mmdet/core/mask_ops/utils.py rename to mmdet/core/mask/utils.py diff --git a/mmdet/core/post_processing/merge_augs.py b/mmdet/core/post_processing/merge_augs.py index 2b8d861a674..00f65b049cc 100644 --- a/mmdet/core/post_processing/merge_augs.py +++ b/mmdet/core/post_processing/merge_augs.py @@ -3,7 +3,7 @@ import numpy as np from mmdet.ops import nms -from ..bbox_ops import bbox_mapping_back +from ..bbox import bbox_mapping_back def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg): diff --git a/mmdet/datasets/transforms.py b/mmdet/datasets/transforms.py index a7e72e0ee48..d2daad15046 100644 --- a/mmdet/datasets/transforms.py +++ b/mmdet/datasets/transforms.py @@ -2,7 +2,7 @@ import numpy as np import torch -from mmdet.core.mask_ops import segms +from mmdet.core.mask import segms __all__ = [ 'ImageTransform', 'BboxTransform', 'PolyMaskTransform', 'Numpy2Tensor' From d6e08c8a3f72b5f0fdc0ac6f657f210c260ca6e8 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Mon, 8 Oct 2018 13:32:13 +0800 Subject: [PATCH 49/81] delete segms.py --- mmdet/core/mask/__init__.py | 9 +- mmdet/core/mask/segms.py | 272 ----------------------------------- mmdet/datasets/transforms.py | 28 +--- 3 files changed, 3 insertions(+), 306 deletions(-) delete mode 100644 mmdet/core/mask/segms.py diff --git a/mmdet/core/mask/__init__.py b/mmdet/core/mask/__init__.py index ea6179c5c38..b703b55d3eb 100644 --- a/mmdet/core/mask/__init__.py +++ b/mmdet/core/mask/__init__.py @@ -1,11 +1,4 @@ -from .segms import (flip_segms, polys_to_mask, mask_to_bbox, - polys_to_mask_wrt_box, polys_to_boxes, rle_mask_voting, - rle_mask_nms, rle_masks_to_boxes) from .utils import split_combined_polys from .mask_target import mask_target -__all__ = [ - 'flip_segms', 'polys_to_mask', 'mask_to_bbox', 'polys_to_mask_wrt_box', - 'polys_to_boxes', 'rle_mask_voting', 'rle_mask_nms', 'rle_masks_to_boxes', - 'split_combined_polys', 'mask_target' -] +__all__ = ['split_combined_polys', 'mask_target'] diff --git a/mmdet/core/mask/segms.py b/mmdet/core/mask/segms.py deleted file mode 100644 index 9809aae3a27..00000000000 --- a/mmdet/core/mask/segms.py +++ /dev/null @@ -1,272 +0,0 @@ -# flake8: noqa -# This file is copied from Detectron. - -# Copyright (c) 2017-present, Facebook, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -############################################################################## -"""Functions for interacting with segmentation masks in the COCO format. -The following terms are used in this module - mask: a binary mask encoded as a 2D numpy array - segm: a segmentation mask in one of the two COCO formats (polygon or RLE) - polygon: COCO's polygon format - RLE: COCO's run length encoding format -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import numpy as np -import pycocotools.mask as mask_util - - -def flip_segms(segms, height, width): - """Left/right flip each mask in a list of masks.""" - - def _flip_poly(poly, width): - flipped_poly = np.array(poly) - flipped_poly[0::2] = width - np.array(poly[0::2]) - 1 - return flipped_poly.tolist() - - def _flip_rle(rle, height, width): - if 'counts' in rle and type(rle['counts']) == list: - # Magic RLE format handling painfully discovered by looking at the - # COCO API showAnns function. - rle = mask_util.frPyObjects([rle], height, width) - mask = mask_util.decode(rle) - mask = mask[:, ::-1, :] - rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8)) - return rle - - flipped_segms = [] - for segm in segms: - if type(segm) == list: - # Polygon format - flipped_segms.append([_flip_poly(poly, width) for poly in segm]) - else: - # RLE format - assert type(segm) == dict - flipped_segms.append(_flip_rle(segm, height, width)) - return flipped_segms - - -def polys_to_mask(polygons, height, width): - """Convert from the COCO polygon segmentation format to a binary mask - encoded as a 2D array of data type numpy.float32. The polygon segmentation - is understood to be enclosed inside a height x width image. The resulting - mask is therefore of shape (height, width). - """ - rle = mask_util.frPyObjects(polygons, height, width) - mask = np.array(mask_util.decode(rle), dtype=np.float32) - # Flatten in case polygons was a list - mask = np.sum(mask, axis=2) - mask = np.array(mask > 0, dtype=np.float32) - return mask - - -def mask_to_bbox(mask): - """Compute the tight bounding box of a binary mask.""" - xs = np.where(np.sum(mask, axis=0) > 0)[0] - ys = np.where(np.sum(mask, axis=1) > 0)[0] - - if len(xs) == 0 or len(ys) == 0: - return None - - x0 = xs[0] - x1 = xs[-1] - y0 = ys[0] - y1 = ys[-1] - return np.array((x0, y0, x1, y1), dtype=np.float32) - - -def polys_to_mask_wrt_box(polygons, box, M): - """Convert from the COCO polygon segmentation format to a binary mask - encoded as a 2D array of data type numpy.float32. The polygon segmentation - is understood to be enclosed in the given box and rasterized to an M x M - mask. The resulting mask is therefore of shape (M, M). - """ - w = box[2] - box[0] - h = box[3] - box[1] - - w = np.maximum(w, 1) - h = np.maximum(h, 1) - - polygons_norm = [] - for poly in polygons: - p = np.array(poly, dtype=np.float32) - p[0::2] = (p[0::2] - box[0]) * M / w - p[1::2] = (p[1::2] - box[1]) * M / h - polygons_norm.append(p) - - rle = mask_util.frPyObjects(polygons_norm, M, M) - mask = np.array(mask_util.decode(rle), dtype=np.float32) - # Flatten in case polygons was a list - mask = np.sum(mask, axis=2) - mask = np.array(mask > 0, dtype=np.float32) - return mask - - -def polys_to_boxes(polys): - """Convert a list of polygons into an array of tight bounding boxes.""" - boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32) - for i in range(len(polys)): - poly = polys[i] - x0 = min(min(p[::2]) for p in poly) - x1 = max(max(p[::2]) for p in poly) - y0 = min(min(p[1::2]) for p in poly) - y1 = max(max(p[1::2]) for p in poly) - boxes_from_polys[i, :] = [x0, y0, x1, y1] - - return boxes_from_polys - - -def rle_mask_voting(top_masks, - all_masks, - all_dets, - iou_thresh, - binarize_thresh, - method='AVG'): - """Returns new masks (in correspondence with `top_masks`) by combining - multiple overlapping masks coming from the pool of `all_masks`. Two methods - for combining masks are supported: 'AVG' uses a weighted average of - overlapping mask pixels; 'UNION' takes the union of all mask pixels. - """ - if len(top_masks) == 0: - return - - all_not_crowd = [False] * len(all_masks) - top_to_all_overlaps = mask_util.iou(top_masks, all_masks, all_not_crowd) - decoded_all_masks = [ - np.array(mask_util.decode(rle), dtype=np.float32) for rle in all_masks - ] - decoded_top_masks = [ - np.array(mask_util.decode(rle), dtype=np.float32) for rle in top_masks - ] - all_boxes = all_dets[:, :4].astype(np.int32) - all_scores = all_dets[:, 4] - - # Fill box support with weights - mask_shape = decoded_all_masks[0].shape - mask_weights = np.zeros((len(all_masks), mask_shape[0], mask_shape[1])) - for k in range(len(all_masks)): - ref_box = all_boxes[k] - x_0 = max(ref_box[0], 0) - x_1 = min(ref_box[2] + 1, mask_shape[1]) - y_0 = max(ref_box[1], 0) - y_1 = min(ref_box[3] + 1, mask_shape[0]) - mask_weights[k, y_0:y_1, x_0:x_1] = all_scores[k] - mask_weights = np.maximum(mask_weights, 1e-5) - - top_segms_out = [] - for k in range(len(top_masks)): - # Corner case of empty mask - if decoded_top_masks[k].sum() == 0: - top_segms_out.append(top_masks[k]) - continue - - inds_to_vote = np.where(top_to_all_overlaps[k] >= iou_thresh)[0] - # Only matches itself - if len(inds_to_vote) == 1: - top_segms_out.append(top_masks[k]) - continue - - masks_to_vote = [decoded_all_masks[i] for i in inds_to_vote] - if method == 'AVG': - ws = mask_weights[inds_to_vote] - soft_mask = np.average(masks_to_vote, axis=0, weights=ws) - mask = np.array(soft_mask > binarize_thresh, dtype=np.uint8) - elif method == 'UNION': - # Any pixel that's on joins the mask - soft_mask = np.sum(masks_to_vote, axis=0) - mask = np.array(soft_mask > 1e-5, dtype=np.uint8) - else: - raise NotImplementedError('Method {} is unknown'.format(method)) - rle = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0] - top_segms_out.append(rle) - - return top_segms_out - - -def rle_mask_nms(masks, dets, thresh, mode='IOU'): - """Performs greedy non-maximum suppression based on an overlap measurement - between masks. The type of measurement is determined by `mode` and can be - either 'IOU' (standard intersection over union) or 'IOMA' (intersection over - mininum area). - """ - if len(masks) == 0: - return [] - if len(masks) == 1: - return [0] - - if mode == 'IOU': - # Computes ious[m1, m2] = area(intersect(m1, m2)) / area(union(m1, m2)) - all_not_crowds = [False] * len(masks) - ious = mask_util.iou(masks, masks, all_not_crowds) - elif mode == 'IOMA': - # Computes ious[m1, m2] = area(intersect(m1, m2)) / min(area(m1), area(m2)) - all_crowds = [True] * len(masks) - # ious[m1, m2] = area(intersect(m1, m2)) / area(m2) - ious = mask_util.iou(masks, masks, all_crowds) - # ... = max(area(intersect(m1, m2)) / area(m2), - # area(intersect(m2, m1)) / area(m1)) - ious = np.maximum(ious, ious.transpose()) - elif mode == 'CONTAINMENT': - # Computes ious[m1, m2] = area(intersect(m1, m2)) / area(m2) - # Which measures how much m2 is contained inside m1 - all_crowds = [True] * len(masks) - ious = mask_util.iou(masks, masks, all_crowds) - else: - raise NotImplementedError('Mode {} is unknown'.format(mode)) - - scores = dets[:, 4] - order = np.argsort(-scores) - - keep = [] - while order.size > 0: - i = order[0] - keep.append(i) - ovr = ious[i, order[1:]] - inds_to_keep = np.where(ovr <= thresh)[0] - order = order[inds_to_keep + 1] - - return keep - - -def rle_masks_to_boxes(masks): - """Computes the bounding box of each mask in a list of RLE encoded masks.""" - if len(masks) == 0: - return [] - - decoded_masks = [ - np.array(mask_util.decode(rle), dtype=np.float32) for rle in masks - ] - - def get_bounds(flat_mask): - inds = np.where(flat_mask > 0)[0] - return inds.min(), inds.max() - - boxes = np.zeros((len(decoded_masks), 4)) - keep = [True] * len(decoded_masks) - for i, mask in enumerate(decoded_masks): - if mask.sum() == 0: - keep[i] = False - continue - flat_mask = mask.sum(axis=0) - x0, x1 = get_bounds(flat_mask) - flat_mask = mask.sum(axis=1) - y0, y1 = get_bounds(flat_mask) - boxes[i, :] = (x0, y0, x1, y1) - - return boxes, np.where(keep)[0] diff --git a/mmdet/datasets/transforms.py b/mmdet/datasets/transforms.py index d2daad15046..09b4f1c6614 100644 --- a/mmdet/datasets/transforms.py +++ b/mmdet/datasets/transforms.py @@ -2,11 +2,7 @@ import numpy as np import torch -from mmdet.core.mask import segms - -__all__ = [ - 'ImageTransform', 'BboxTransform', 'PolyMaskTransform', 'Numpy2Tensor' -] +__all__ = ['ImageTransform', 'BboxTransform', 'MaskTransform', 'Numpy2Tensor'] class ImageTransform(object): @@ -85,26 +81,6 @@ def __call__(self, bboxes, img_shape, scale_factor, flip=False): return padded_bboxes -class PolyMaskTransform(object): - """Preprocess polygons.""" - - def __init__(self): - pass - - def __call__(self, gt_mask_polys, gt_poly_lens, img_h, img_w, flip=False): - if flip: - gt_mask_polys = segms.flip_segms(gt_mask_polys, img_h, img_w) - num_polys_per_mask = np.array( - [len(mask_polys) for mask_polys in gt_mask_polys], dtype=np.int64) - gt_poly_lens = np.array(gt_poly_lens, dtype=np.int64) - gt_mask_polys = [ - np.concatenate(mask_polys).astype(np.float32) - for mask_polys in gt_mask_polys - ] - gt_mask_polys = np.concatenate(gt_mask_polys) - return gt_mask_polys, gt_poly_lens, num_polys_per_mask - - class MaskTransform(object): """Preprocess masks. @@ -119,7 +95,7 @@ def __call__(self, masks, pad_shape, scale_factor, flip=False): for mask in masks ] if flip: - masks = [mask[:, ::-1] for mask in masks] + masks = [mmcv.imflip(mask) for mask in masks] padded_masks = [ mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks ] From 6eb52aed82d0b56f3f27efe22de4e0f580d55fe8 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Mon, 8 Oct 2018 13:41:08 +0800 Subject: [PATCH 50/81] move config dir out of tools --- {tools/configs => configs}/r50_fpn_frcnn_1x.py | 0 {tools/configs => configs}/r50_fpn_maskrcnn_1x.py | 0 {tools/configs => configs}/r50_fpn_rpn_1x.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename {tools/configs => configs}/r50_fpn_frcnn_1x.py (100%) rename {tools/configs => configs}/r50_fpn_maskrcnn_1x.py (100%) rename {tools/configs => configs}/r50_fpn_rpn_1x.py (100%) diff --git a/tools/configs/r50_fpn_frcnn_1x.py b/configs/r50_fpn_frcnn_1x.py similarity index 100% rename from tools/configs/r50_fpn_frcnn_1x.py rename to configs/r50_fpn_frcnn_1x.py diff --git a/tools/configs/r50_fpn_maskrcnn_1x.py b/configs/r50_fpn_maskrcnn_1x.py similarity index 100% rename from tools/configs/r50_fpn_maskrcnn_1x.py rename to configs/r50_fpn_maskrcnn_1x.py diff --git a/tools/configs/r50_fpn_rpn_1x.py b/configs/r50_fpn_rpn_1x.py similarity index 100% rename from tools/configs/r50_fpn_rpn_1x.py rename to configs/r50_fpn_rpn_1x.py From 854ed4055ac1b0aa5c980465e6e5dfc73243b2a3 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Mon, 8 Oct 2018 13:46:19 +0800 Subject: [PATCH 51/81] minor updates for train/test scripts --- tools/test.py | 7 +++---- tools/train.py | 8 +++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/tools/test.py b/tools/test.py index 4e2ecd2fd09..c0bfd2558e4 100644 --- a/tools/test.py +++ b/tools/test.py @@ -44,17 +44,16 @@ def parse_args(): '--eval', type=str, nargs='+', - choices=['proposal', 'bbox', 'segm', 'keypoints'], + choices=['proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'], help='eval types') parser.add_argument('--show', action='store_true', help='show results') args = parser.parse_args() return args -args = parse_args() - - def main(): + args = parse_args() + cfg = mmcv.Config.fromfile(args.config) cfg.model.pretrained = None cfg.data.test.test_mode = True diff --git a/tools/train.py b/tools/train.py index b72adebbc19..07a918d6903 100644 --- a/tools/train.py +++ b/tools/train.py @@ -2,6 +2,7 @@ import argparse import logging +import random from collections import OrderedDict import numpy as np @@ -55,6 +56,7 @@ def get_logger(log_level): def set_random_seed(seed): + random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) @@ -89,7 +91,7 @@ def main(): if args.work_dir is not None: cfg.work_dir = args.work_dir cfg.gpus = args.gpus - # add mmdet version to checkpoint as meta data + # save mmdet version in checkpoint as meta data cfg.checkpoint_config.meta = dict( mmdet_version=__version__, config=cfg.text) @@ -103,13 +105,13 @@ def main(): # init distributed environment if necessary if args.launcher == 'none': dist = False - logger.info('Disabled distributed training.') + logger.info('Non-distributed training.') else: dist = True init_dist(args.launcher, **cfg.dist_params) if torch.distributed.get_rank() != 0: logger.setLevel('ERROR') - logger.info('Enabled distributed training.') + logger.info('Distributed training.') # prepare data loaders train_dataset = obj_from_dict(cfg.data.train, datasets) From 72a3dddca15fbd70376350a361d738aa9ca5f6e7 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Mon, 8 Oct 2018 14:14:04 +0800 Subject: [PATCH 52/81] fix path issues --- configs/r50_fpn_frcnn_1x.py | 2 +- configs/r50_fpn_maskrcnn_1x.py | 2 +- configs/r50_fpn_rpn_1x.py | 2 +- tools/dist_train.sh | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/configs/r50_fpn_frcnn_1x.py b/configs/r50_fpn_frcnn_1x.py index 044c654ffa2..a4b9838fc4d 100644 --- a/configs/r50_fpn_frcnn_1x.py +++ b/configs/r50_fpn_frcnn_1x.py @@ -79,7 +79,7 @@ rcnn=dict(score_thr=0.05, max_per_img=100, nms_thr=0.5)) # dataset settings dataset_type = 'CocoDataset' -data_root = '../data/coco/' +data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) data = dict( diff --git a/configs/r50_fpn_maskrcnn_1x.py b/configs/r50_fpn_maskrcnn_1x.py index 881a7498f62..fa88de7ae31 100644 --- a/configs/r50_fpn_maskrcnn_1x.py +++ b/configs/r50_fpn_maskrcnn_1x.py @@ -92,7 +92,7 @@ score_thr=0.05, max_per_img=100, nms_thr=0.5, mask_thr_binary=0.5)) # dataset settings dataset_type = 'CocoDataset' -data_root = '../data/coco/' +data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) data = dict( diff --git a/configs/r50_fpn_rpn_1x.py b/configs/r50_fpn_rpn_1x.py index dfed976a249..4c0bb41dba4 100644 --- a/configs/r50_fpn_rpn_1x.py +++ b/configs/r50_fpn_rpn_1x.py @@ -50,7 +50,7 @@ min_bbox_size=0)) # dataset settings dataset_type = 'CocoDataset' -data_root = '../data/coco/' +data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) data = dict( diff --git a/tools/dist_train.sh b/tools/dist_train.sh index b13cb68a456..fa68297226b 100755 --- a/tools/dist_train.sh +++ b/tools/dist_train.sh @@ -2,4 +2,4 @@ PYTHON=${PYTHON:-"python"} -$PYTHON -m torch.distributed.launch --nproc_per_node=$2 train.py $1 --launcher pytorch ${@:3} +$PYTHON -m torch.distributed.launch --nproc_per_node=$2 $(dirname "$0")/train.py $1 --launcher pytorch ${@:3} From 86137cda5d237ec272e2f55ec94c499035f1cfa0 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Mon, 8 Oct 2018 16:26:01 +0800 Subject: [PATCH 53/81] support chunk when reducing grads --- mmdet/core/utils/__init__.py | 7 ++-- mmdet/core/utils/dist_utils.py | 71 +++++++++++++++------------------- 2 files changed, 35 insertions(+), 43 deletions(-) diff --git a/mmdet/core/utils/__init__.py b/mmdet/core/utils/__init__.py index 9e67c7f47c9..981dab7fb0d 100644 --- a/mmdet/core/utils/__init__.py +++ b/mmdet/core/utils/__init__.py @@ -1,8 +1,7 @@ -from .dist_utils import (init_dist, reduce_grads, DistOptimizerHook, - DistSamplerSeedHook) +from .dist_utils import init_dist, allreduce_grads, DistOptimizerHook from .misc import tensor2imgs, unmap, multi_apply __all__ = [ - 'init_dist', 'reduce_grads', 'DistOptimizerHook', 'DistSamplerSeedHook', - 'tensor2imgs', 'unmap', 'multi_apply' + 'init_dist', 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', + 'unmap', 'multi_apply' ] diff --git a/mmdet/core/utils/dist_utils.py b/mmdet/core/utils/dist_utils.py index e0361f9e41a..c7748db661f 100644 --- a/mmdet/core/utils/dist_utils.py +++ b/mmdet/core/utils/dist_utils.py @@ -4,9 +4,9 @@ import torch import torch.multiprocessing as mp import torch.distributed as dist -from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors -from torch.nn.utils import clip_grad -from mmcv.runner import Hook, OptimizerHook +from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors, + _take_tensors) +from mmcv.runner import OptimizerHook def init_dist(launcher, backend='nccl', **kwargs): @@ -38,59 +38,52 @@ def _init_dist_slurm(backend, **kwargs): raise NotImplementedError -# modified from -# https://github.com/NVIDIA/apex/blob/master/apex/parallel/distributed.py#L9 -def all_reduce_coalesced(tensors): - buckets = OrderedDict() - for tensor in tensors: - tp = tensor.type() - if tp not in buckets: - buckets[tp] = [] - buckets[tp].append(tensor) - - world_size = dist.get_world_size() - for tp in buckets: - bucket = buckets[tp] - coalesced = _flatten_dense_tensors(bucket) - dist.all_reduce(coalesced) - coalesced.div_(world_size) - - for buf, synced in zip(bucket, - _unflatten_dense_tensors(coalesced, bucket)): - buf.copy_(synced) - - -def reduce_grads(model, coalesce=True): +def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): + if bucket_size_mb > 0: + bucket_size_bytes = bucket_size_mb * 1024 * 1024 + buckets = _take_tensors(tensors, bucket_size_bytes) + else: + buckets = OrderedDict() + for tensor in tensors: + tp = tensor.type() + if tp not in buckets: + buckets[tp] = [] + buckets[tp].append(tensor) + buckets = buckets.values() + + for bucket in buckets: + flat_tensors = _flatten_dense_tensors(bucket) + dist.all_reduce(flat_tensors) + flat_tensors.div_(world_size) + for tensor, synced in zip( + bucket, _unflatten_dense_tensors(flat_tensors, bucket)): + tensor.copy_(synced) + + +def allreduce_grads(model, coalesce=True, bucket_size_mb=-1): grads = [ param.grad.data for param in model.parameters() if param.requires_grad and param.grad is not None ] + world_size = dist.get_world_size() if coalesce: - all_reduce_coalesced(grads) + _allreduce_coalesced(grads, world_size, bucket_size_mb) else: - world_size = dist.get_world_size() for tensor in grads: dist.all_reduce(tensor.div_(world_size)) class DistOptimizerHook(OptimizerHook): - def __init__(self, grad_clip=None, coalesce=True): + def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1): self.grad_clip = grad_clip self.coalesce = coalesce + self.bucket_size_mb = bucket_size_mb def after_train_iter(self, runner): runner.optimizer.zero_grad() runner.outputs['loss'].backward() - reduce_grads(runner.model, self.coalesce) + allreduce_grads(runner.model, self.coalesce, self.bucket_size_mb) if self.grad_clip is not None: - clip_grad.clip_grad_norm_( - filter(lambda p: p.requires_grad, runner.model.parameters()), - **self.grad_clip) + self.clip_grads(runner.model.parameters()) runner.optimizer.step() - - -class DistSamplerSeedHook(Hook): - - def before_epoch(self, runner): - runner.data_loader.sampler.set_epoch(runner.epoch) From 9028eb27d9f48f665efe0327a80e2014d9db68ad Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Mon, 8 Oct 2018 16:26:13 +0800 Subject: [PATCH 54/81] minor fix --- mmdet/datasets/transforms.py | 2 +- tools/train.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mmdet/datasets/transforms.py b/mmdet/datasets/transforms.py index 09b4f1c6614..ddb2fb2c2f4 100644 --- a/mmdet/datasets/transforms.py +++ b/mmdet/datasets/transforms.py @@ -95,7 +95,7 @@ def __call__(self, masks, pad_shape, scale_factor, flip=False): for mask in masks ] if flip: - masks = [mmcv.imflip(mask) for mask in masks] + masks = [mask[:, ::-1] for mask in masks] padded_masks = [ mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks ] diff --git a/tools/train.py b/tools/train.py index 07a918d6903..f778a777285 100644 --- a/tools/train.py +++ b/tools/train.py @@ -8,12 +8,12 @@ import numpy as np import torch from mmcv import Config -from mmcv.runner import Runner, obj_from_dict +from mmcv.runner import Runner, obj_from_dict, DistSamplerSeedHook from mmdet import datasets, __version__ -from mmdet.core import (init_dist, DistOptimizerHook, DistSamplerSeedHook, - MMDataParallel, MMDistributedDataParallel, - CocoDistEvalRecallHook, CocoDistEvalmAPHook) +from mmdet.core import (init_dist, DistOptimizerHook, MMDataParallel, + MMDistributedDataParallel, CocoDistEvalRecallHook, + CocoDistEvalmAPHook) from mmdet.datasets import build_dataloader from mmdet.models import build_detector, RPN From 2f3b541841348e822905cc4e3adfeeaff95b7525 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Mon, 8 Oct 2018 19:29:51 +0800 Subject: [PATCH 55/81] use nccl as the default comm backend --- configs/r50_fpn_frcnn_1x.py | 2 +- configs/r50_fpn_maskrcnn_1x.py | 2 +- configs/r50_fpn_rpn_1x.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/configs/r50_fpn_frcnn_1x.py b/configs/r50_fpn_frcnn_1x.py index a4b9838fc4d..d5547ee1001 100644 --- a/configs/r50_fpn_frcnn_1x.py +++ b/configs/r50_fpn_frcnn_1x.py @@ -140,7 +140,7 @@ # runtime settings total_epochs = 12 device_ids = range(8) -dist_params = dict(backend='gloo') +dist_params = dict(backend='nccl') log_level = 'INFO' work_dir = './work_dirs/fpn_faster_rcnn_r50_1x' load_from = None diff --git a/configs/r50_fpn_maskrcnn_1x.py b/configs/r50_fpn_maskrcnn_1x.py index fa88de7ae31..09d2fb92231 100644 --- a/configs/r50_fpn_maskrcnn_1x.py +++ b/configs/r50_fpn_maskrcnn_1x.py @@ -153,7 +153,7 @@ # runtime settings total_epochs = 12 device_ids = range(8) -dist_params = dict(backend='gloo') +dist_params = dict(backend='nccl') log_level = 'INFO' work_dir = './work_dirs/fpn_mask_rcnn_r50_1x' load_from = None diff --git a/configs/r50_fpn_rpn_1x.py b/configs/r50_fpn_rpn_1x.py index 4c0bb41dba4..d2f28c2dfe6 100644 --- a/configs/r50_fpn_rpn_1x.py +++ b/configs/r50_fpn_rpn_1x.py @@ -110,7 +110,7 @@ # yapf:enable # runtime settings total_epochs = 12 -dist_params = dict(backend='gloo') +dist_params = dict(backend='nccl') log_level = 'INFO' work_dir = './work_dirs/fpn_rpn_r50_1x' load_from = None From e4d7e11062b39dac98a7d4302e28ce4ba406440f Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Mon, 8 Oct 2018 20:49:12 +0800 Subject: [PATCH 56/81] rename eval to evaluation, losses to loss --- mmdet/core/{eval => evaluation}/__init__.py | 0 mmdet/core/{eval => evaluation}/bbox_overlaps.py | 0 mmdet/core/{eval => evaluation}/class_names.py | 0 mmdet/core/{eval => evaluation}/coco_utils.py | 0 mmdet/core/{eval => evaluation}/eval_hooks.py | 3 +-- mmdet/core/{eval => evaluation}/mean_ap.py | 0 mmdet/core/{eval => evaluation}/recall.py | 0 mmdet/core/{losses => loss}/__init__.py | 0 mmdet/core/{losses => loss}/losses.py | 0 9 files changed, 1 insertion(+), 2 deletions(-) rename mmdet/core/{eval => evaluation}/__init__.py (100%) rename mmdet/core/{eval => evaluation}/bbox_overlaps.py (100%) rename mmdet/core/{eval => evaluation}/class_names.py (100%) rename mmdet/core/{eval => evaluation}/coco_utils.py (100%) rename mmdet/core/{eval => evaluation}/eval_hooks.py (98%) rename mmdet/core/{eval => evaluation}/mean_ap.py (100%) rename mmdet/core/{eval => evaluation}/recall.py (100%) rename mmdet/core/{losses => loss}/__init__.py (100%) rename mmdet/core/{losses => loss}/losses.py (100%) diff --git a/mmdet/core/eval/__init__.py b/mmdet/core/evaluation/__init__.py similarity index 100% rename from mmdet/core/eval/__init__.py rename to mmdet/core/evaluation/__init__.py diff --git a/mmdet/core/eval/bbox_overlaps.py b/mmdet/core/evaluation/bbox_overlaps.py similarity index 100% rename from mmdet/core/eval/bbox_overlaps.py rename to mmdet/core/evaluation/bbox_overlaps.py diff --git a/mmdet/core/eval/class_names.py b/mmdet/core/evaluation/class_names.py similarity index 100% rename from mmdet/core/eval/class_names.py rename to mmdet/core/evaluation/class_names.py diff --git a/mmdet/core/eval/coco_utils.py b/mmdet/core/evaluation/coco_utils.py similarity index 100% rename from mmdet/core/eval/coco_utils.py rename to mmdet/core/evaluation/coco_utils.py diff --git a/mmdet/core/eval/eval_hooks.py b/mmdet/core/evaluation/eval_hooks.py similarity index 98% rename from mmdet/core/eval/eval_hooks.py rename to mmdet/core/evaluation/eval_hooks.py index 870830ef396..a83b80dbfe7 100644 --- a/mmdet/core/eval/eval_hooks.py +++ b/mmdet/core/evaluation/eval_hooks.py @@ -7,13 +7,12 @@ import numpy as np import torch from mmcv.runner import Hook, obj_from_dict +from mmcv.parallel import scatter, collate from pycocotools.cocoeval import COCOeval from torch.utils.data import Dataset from .coco_utils import results2json, fast_eval_recall -from ..parallel import scatter from mmdet import datasets -from mmdet.datasets.loader import collate class DistEvalHook(Hook): diff --git a/mmdet/core/eval/mean_ap.py b/mmdet/core/evaluation/mean_ap.py similarity index 100% rename from mmdet/core/eval/mean_ap.py rename to mmdet/core/evaluation/mean_ap.py diff --git a/mmdet/core/eval/recall.py b/mmdet/core/evaluation/recall.py similarity index 100% rename from mmdet/core/eval/recall.py rename to mmdet/core/evaluation/recall.py diff --git a/mmdet/core/losses/__init__.py b/mmdet/core/loss/__init__.py similarity index 100% rename from mmdet/core/losses/__init__.py rename to mmdet/core/loss/__init__.py diff --git a/mmdet/core/losses/losses.py b/mmdet/core/loss/losses.py similarity index 100% rename from mmdet/core/losses/losses.py rename to mmdet/core/loss/losses.py From 12a857016598b7c67d033e4799fc1b038d1546e3 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Mon, 8 Oct 2018 21:28:59 +0800 Subject: [PATCH 57/81] move parallel module to mmcv --- mmdet/core/__init__.py | 5 +- mmdet/core/parallel/__init__.py | 7 -- mmdet/core/parallel/_functions.py | 74 ---------------------- mmdet/core/parallel/data_parallel.py | 9 --- mmdet/core/parallel/distributed.py | 46 -------------- mmdet/core/parallel/scatter_gather.py | 54 ---------------- mmdet/datasets/__init__.py | 10 ++- mmdet/datasets/coco.py | 2 +- mmdet/datasets/loader/__init__.py | 3 +- mmdet/datasets/loader/build_loader.py | 7 +- mmdet/datasets/loader/collate.py | 70 -------------------- mmdet/datasets/{utils/misc.py => utils.py} | 17 ----- mmdet/datasets/utils/__init__.py | 4 -- mmdet/datasets/utils/data_container.py | 58 ----------------- tools/test.py | 3 +- tools/train.py | 4 +- 16 files changed, 18 insertions(+), 355 deletions(-) delete mode 100644 mmdet/core/parallel/__init__.py delete mode 100644 mmdet/core/parallel/_functions.py delete mode 100644 mmdet/core/parallel/data_parallel.py delete mode 100644 mmdet/core/parallel/distributed.py delete mode 100644 mmdet/core/parallel/scatter_gather.py delete mode 100644 mmdet/datasets/loader/collate.py rename mmdet/datasets/{utils/misc.py => utils.py} (75%) delete mode 100644 mmdet/datasets/utils/__init__.py delete mode 100644 mmdet/datasets/utils/data_container.py diff --git a/mmdet/core/__init__.py b/mmdet/core/__init__.py index 05788f1eb91..645d5be29c0 100644 --- a/mmdet/core/__init__.py +++ b/mmdet/core/__init__.py @@ -1,8 +1,7 @@ from .anchor import * # noqa: F401, F403 from .bbox import * # noqa: F401, F403 from .mask import * # noqa: F401, F403 -from .losses import * # noqa: F401, F403 -from .eval import * # noqa: F401, F403 -from .parallel import * # noqa: F401, F403 +from .loss import * # noqa: F401, F403 +from .evaluation import * # noqa: F401, F403 from .post_processing import * # noqa: F401, F403 from .utils import * # noqa: F401, F403 diff --git a/mmdet/core/parallel/__init__.py b/mmdet/core/parallel/__init__.py deleted file mode 100644 index 0ea0a58e4a5..00000000000 --- a/mmdet/core/parallel/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from .data_parallel import MMDataParallel -from .distributed import MMDistributedDataParallel -from .scatter_gather import scatter, scatter_kwargs - -__all__ = [ - 'MMDataParallel', 'MMDistributedDataParallel', 'scatter', 'scatter_kwargs' -] diff --git a/mmdet/core/parallel/_functions.py b/mmdet/core/parallel/_functions.py deleted file mode 100644 index 75bb954dce4..00000000000 --- a/mmdet/core/parallel/_functions.py +++ /dev/null @@ -1,74 +0,0 @@ -import torch -from torch.nn.parallel._functions import _get_stream - - -def scatter(input, devices, streams=None): - """Scatters tensor across multiple GPUs. - """ - if streams is None: - streams = [None] * len(devices) - - if isinstance(input, list): - chunk_size = (len(input) - 1) // len(devices) + 1 - outputs = [ - scatter(input[i], [devices[i // chunk_size]], - [streams[i // chunk_size]]) for i in range(len(input)) - ] - return outputs - elif isinstance(input, torch.Tensor): - output = input.contiguous() - # TODO: copy to a pinned buffer first (if copying from CPU) - stream = streams[0] if output.numel() > 0 else None - with torch.cuda.device(devices[0]), torch.cuda.stream(stream): - output = output.cuda(devices[0], non_blocking=True) - return output - else: - raise Exception('Unknown type {}.'.format(type(input))) - - -def synchronize_stream(output, devices, streams): - if isinstance(output, list): - chunk_size = len(output) // len(devices) - for i in range(len(devices)): - for j in range(chunk_size): - synchronize_stream(output[i * chunk_size + j], [devices[i]], - [streams[i]]) - elif isinstance(output, torch.Tensor): - if output.numel() != 0: - with torch.cuda.device(devices[0]): - main_stream = torch.cuda.current_stream() - main_stream.wait_stream(streams[0]) - output.record_stream(main_stream) - else: - raise Exception('Unknown type {}.'.format(type(output))) - - -def get_input_device(input): - if isinstance(input, list): - for item in input: - input_device = get_input_device(item) - if input_device != -1: - return input_device - return -1 - elif isinstance(input, torch.Tensor): - return input.get_device() if input.is_cuda else -1 - else: - raise Exception('Unknown type {}.'.format(type(input))) - - -class Scatter(object): - - @staticmethod - def forward(target_gpus, input): - input_device = get_input_device(input) - streams = None - if input_device == -1: - # Perform CPU to GPU copies in a background stream - streams = [_get_stream(device) for device in target_gpus] - - outputs = scatter(input, target_gpus, streams) - # Synchronize with the copy stream - if streams is not None: - synchronize_stream(outputs, target_gpus, streams) - - return tuple(outputs) diff --git a/mmdet/core/parallel/data_parallel.py b/mmdet/core/parallel/data_parallel.py deleted file mode 100644 index 6735cb4afb7..00000000000 --- a/mmdet/core/parallel/data_parallel.py +++ /dev/null @@ -1,9 +0,0 @@ -from torch.nn.parallel import DataParallel - -from .scatter_gather import scatter_kwargs - - -class MMDataParallel(DataParallel): - - def scatter(self, inputs, kwargs, device_ids): - return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) diff --git a/mmdet/core/parallel/distributed.py b/mmdet/core/parallel/distributed.py deleted file mode 100644 index a2e1d557b3e..00000000000 --- a/mmdet/core/parallel/distributed.py +++ /dev/null @@ -1,46 +0,0 @@ -import torch -import torch.distributed as dist -import torch.nn as nn -from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors, - _take_tensors) - -from .scatter_gather import scatter_kwargs - - -class MMDistributedDataParallel(nn.Module): - - def __init__(self, module, dim=0, broadcast_buffers=True): - super(MMDistributedDataParallel, self).__init__() - self.module = module - self.dim = dim - self.broadcast_buffers = broadcast_buffers - - self.broadcast_bucket_size = 32 * 1024 * 1024 - self._sync_params() - - def _dist_broadcast_coalesced(self, tensors, buffer_size): - for tensors in _take_tensors(tensors, buffer_size): - flat_tensors = _flatten_dense_tensors(tensors) - dist.broadcast(flat_tensors, 0) - for tensor, synced in zip( - tensors, _unflatten_dense_tensors(flat_tensors, tensors)): - tensor.copy_(synced) - - def _sync_params(self): - module_states = list(self.module.state_dict().values()) - if len(module_states) > 0: - self._dist_broadcast_coalesced(module_states, - self.broadcast_bucket_size) - if self.broadcast_buffers: - buffers = [b.data for b in self.module._all_buffers()] - if len(buffers) > 0: - self._dist_broadcast_coalesced(buffers, - self.broadcast_bucket_size) - - def scatter(self, inputs, kwargs, device_ids): - return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) - - def forward(self, *inputs, **kwargs): - inputs, kwargs = self.scatter(inputs, kwargs, - [torch.cuda.current_device()]) - return self.module(*inputs[0], **kwargs[0]) diff --git a/mmdet/core/parallel/scatter_gather.py b/mmdet/core/parallel/scatter_gather.py deleted file mode 100644 index 5a7d4c146e0..00000000000 --- a/mmdet/core/parallel/scatter_gather.py +++ /dev/null @@ -1,54 +0,0 @@ -import torch -from torch.nn.parallel._functions import Scatter as OrigScatter - -from ._functions import Scatter -from mmdet.datasets.utils import DataContainer - - -def scatter(inputs, target_gpus, dim=0): - """Scatter inputs to target gpus. - - The only difference from original :func:`scatter` is to add support for - :type:`~mmdet.DataContainer`. - """ - - def scatter_map(obj): - if isinstance(obj, torch.Tensor): - return OrigScatter.apply(target_gpus, None, dim, obj) - if isinstance(obj, DataContainer): - if obj.cpu_only: - return obj.data - else: - return Scatter.forward(target_gpus, obj.data) - if isinstance(obj, tuple) and len(obj) > 0: - return list(zip(*map(scatter_map, obj))) - if isinstance(obj, list) and len(obj) > 0: - out = list(map(list, zip(*map(scatter_map, obj)))) - return out - if isinstance(obj, dict) and len(obj) > 0: - out = list(map(type(obj), zip(*map(scatter_map, obj.items())))) - return out - return [obj for targets in target_gpus] - - # After scatter_map is called, a scatter_map cell will exist. This cell - # has a reference to the actual function scatter_map, which has references - # to a closure that has a reference to the scatter_map cell (because the - # fn is recursive). To avoid this reference cycle, we set the function to - # None, clearing the cell - try: - return scatter_map(inputs) - finally: - scatter_map = None - - -def scatter_kwargs(inputs, kwargs, target_gpus, dim=0): - """Scatter with support for kwargs dictionary""" - inputs = scatter(inputs, target_gpus, dim) if inputs else [] - kwargs = scatter(kwargs, target_gpus, dim) if kwargs else [] - if len(inputs) < len(kwargs): - inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) - elif len(kwargs) < len(inputs): - kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) - inputs = tuple(inputs) - kwargs = tuple(kwargs) - return inputs, kwargs diff --git a/mmdet/datasets/__init__.py b/mmdet/datasets/__init__.py index 6c3c8e4d77c..425ea72535a 100644 --- a/mmdet/datasets/__init__.py +++ b/mmdet/datasets/__init__.py @@ -1,10 +1,8 @@ from .coco import CocoDataset -from .loader import (collate, GroupSampler, DistributedGroupSampler, - build_dataloader) -from .utils import DataContainer, to_tensor, random_scale, show_ann +from .loader import GroupSampler, DistributedGroupSampler, build_dataloader +from .utils import to_tensor, random_scale, show_ann __all__ = [ - 'CocoDataset', 'collate', 'GroupSampler', 'DistributedGroupSampler', - 'build_dataloader', 'DataContainer', 'to_tensor', 'random_scale', - 'show_ann' + 'CocoDataset', 'GroupSampler', 'DistributedGroupSampler', + 'build_dataloader', 'to_tensor', 'random_scale', 'show_ann' ] diff --git a/mmdet/datasets/coco.py b/mmdet/datasets/coco.py index f5463873e1e..3cd0a6d5ca2 100644 --- a/mmdet/datasets/coco.py +++ b/mmdet/datasets/coco.py @@ -2,13 +2,13 @@ import mmcv import numpy as np +from mmcv.parallel import DataContainer as DC from pycocotools.coco import COCO from torch.utils.data import Dataset from .transforms import (ImageTransform, BboxTransform, MaskTransform, Numpy2Tensor) from .utils import to_tensor, show_ann, random_scale -from .utils import DataContainer as DC class CocoDataset(Dataset): diff --git a/mmdet/datasets/loader/__init__.py b/mmdet/datasets/loader/__init__.py index 27796d0e9de..a3d4fdd2cbb 100644 --- a/mmdet/datasets/loader/__init__.py +++ b/mmdet/datasets/loader/__init__.py @@ -1,7 +1,6 @@ from .build_loader import build_dataloader -from .collate import collate from .sampler import GroupSampler, DistributedGroupSampler __all__ = [ - 'collate', 'GroupSampler', 'DistributedGroupSampler', 'build_dataloader' + 'GroupSampler', 'DistributedGroupSampler', 'build_dataloader' ] diff --git a/mmdet/datasets/loader/build_loader.py b/mmdet/datasets/loader/build_loader.py index 70f439926a9..d3b342b32b8 100644 --- a/mmdet/datasets/loader/build_loader.py +++ b/mmdet/datasets/loader/build_loader.py @@ -1,11 +1,16 @@ from functools import partial from mmcv.runner import get_dist_info +from mmcv.parallel import collate from torch.utils.data import DataLoader -from .collate import collate from .sampler import GroupSampler, DistributedGroupSampler +# https://github.com/pytorch/pytorch/issues/973 +import resource +rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) +resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) + def build_dataloader(dataset, imgs_per_gpu, diff --git a/mmdet/datasets/loader/collate.py b/mmdet/datasets/loader/collate.py deleted file mode 100644 index fa1335ca75a..00000000000 --- a/mmdet/datasets/loader/collate.py +++ /dev/null @@ -1,70 +0,0 @@ -import collections - -import torch -import torch.nn.functional as F -from torch.utils.data.dataloader import default_collate - -from ..utils import DataContainer - -# https://github.com/pytorch/pytorch/issues/973 -import resource -rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) -resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) - - -def collate(batch, samples_per_gpu=1): - """Puts each data field into a tensor/DataContainer with outer dimension - batch size. - - Extend default_collate to add support for :type:`~mmdet.DataContainer`. - There are 3 cases for data containers. - 1. cpu_only = True, e.g., meta data - 2. cpu_only = False, stack = True, e.g., images tensors - 3. cpu_only = False, stack = False, e.g., gt bboxes - """ - - if not isinstance(batch, collections.Sequence): - raise TypeError("{} is not supported.".format(batch.dtype)) - - if isinstance(batch[0], DataContainer): - assert len(batch) % samples_per_gpu == 0 - stacked = [] - if batch[0].cpu_only: - for i in range(0, len(batch), samples_per_gpu): - stacked.append( - [sample.data for sample in batch[i:i + samples_per_gpu]]) - return DataContainer( - stacked, batch[0].stack, batch[0].padding_value, cpu_only=True) - elif batch[0].stack: - for i in range(0, len(batch), samples_per_gpu): - assert isinstance(batch[i].data, torch.Tensor) - # TODO: handle tensors other than 3d - assert batch[i].dim() == 3 - c, h, w = batch[0].size() - for sample in batch[i:i + samples_per_gpu]: - assert c == sample.size(0) - h = max(h, sample.size(1)) - w = max(w, sample.size(2)) - padded_samples = [ - F.pad( - sample.data, - (0, w - sample.size(2), 0, h - sample.size(1)), - value=sample.padding_value) - for sample in batch[i:i + samples_per_gpu] - ] - stacked.append(default_collate(padded_samples)) - else: - for i in range(0, len(batch), samples_per_gpu): - stacked.append( - [sample.data for sample in batch[i:i + samples_per_gpu]]) - return DataContainer(stacked, batch[0].stack, batch[0].padding_value) - elif isinstance(batch[0], collections.Sequence): - transposed = zip(*batch) - return [collate(samples, samples_per_gpu) for samples in transposed] - elif isinstance(batch[0], collections.Mapping): - return { - key: collate([d[key] for d in batch], samples_per_gpu) - for key in batch[0] - } - else: - return default_collate(batch) diff --git a/mmdet/datasets/utils/misc.py b/mmdet/datasets/utils.py similarity index 75% rename from mmdet/datasets/utils/misc.py rename to mmdet/datasets/utils.py index 22f67a1c35c..5a248ef6890 100644 --- a/mmdet/datasets/utils/misc.py +++ b/mmdet/datasets/utils.py @@ -5,7 +5,6 @@ import matplotlib.pyplot as plt import numpy as np -import pycocotools.mask as maskUtils def to_tensor(data): @@ -68,19 +67,3 @@ def show_ann(coco, img, ann_info): plt.axis('off') coco.showAnns(ann_info) plt.show() - - -def draw_bbox_and_segm(img, results, dataset, score_thr=0.5): - bbox_results, segm_results = results - hi_bboxes = [] - for cls_bboxes, cls_segms in zip(bbox_results, segm_results): - if len(cls_bboxes) == 0: - hi_bboxes.append(cls_bboxes) - continue - inds = np.where(cls_bboxes[:, -1] > score_thr)[0] - hi_bboxes.append(cls_bboxes[inds, :]) - color_mask = np.random.random((1, 3)) - for i in inds: - mask = maskUtils.decode(cls_segms[i]).astype(np.bool) - img[mask] = img[mask] * 0.5 + color_mask * 0.5 - mmcv.draw_bboxes_with_label(np.ascontiguousarray(img), hi_bboxes, dataset) diff --git a/mmdet/datasets/utils/__init__.py b/mmdet/datasets/utils/__init__.py deleted file mode 100644 index 4a46a9f97d2..00000000000 --- a/mmdet/datasets/utils/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .data_container import DataContainer -from .misc import to_tensor, random_scale, show_ann - -__all__ = ['DataContainer', 'to_tensor', 'random_scale', 'show_ann'] diff --git a/mmdet/datasets/utils/data_container.py b/mmdet/datasets/utils/data_container.py deleted file mode 100644 index d690f6798ce..00000000000 --- a/mmdet/datasets/utils/data_container.py +++ /dev/null @@ -1,58 +0,0 @@ -import functools - -import torch - - -def assert_tensor_type(func): - - @functools.wraps(func) - def wrapper(*args, **kwargs): - if not isinstance(args[0].data, torch.Tensor): - raise AttributeError('{} has no attribute {} for type {}'.format( - args[0].__class__.__name__, func.__name__, args[0].datatype)) - return func(*args, **kwargs) - - return wrapper - - -class DataContainer(object): - - def __init__(self, data, stack=False, padding_value=0, cpu_only=False): - self._data = data - self._cpu_only = cpu_only - self._stack = stack - self._padding_value = padding_value - - def __repr__(self): - return '{}({})'.format(self.__class__.__name__, repr(self.data)) - - @property - def data(self): - return self._data - - @property - def datatype(self): - if isinstance(self.data, torch.Tensor): - return self.data.type() - else: - return type(self.data) - - @property - def cpu_only(self): - return self._cpu_only - - @property - def stack(self): - return self._stack - - @property - def padding_value(self): - return self._padding_value - - @assert_tensor_type - def size(self, *args, **kwargs): - return self.data.size(*args, **kwargs) - - @assert_tensor_type - def dim(self): - return self.data.dim() diff --git a/tools/test.py b/tools/test.py index c0bfd2558e4..3b1ce2d2e04 100644 --- a/tools/test.py +++ b/tools/test.py @@ -3,9 +3,10 @@ import torch import mmcv from mmcv.runner import load_checkpoint, parallel_test, obj_from_dict +from mmcv.parallel import scatter, MMDataParallel from mmdet import datasets -from mmdet.core import scatter, MMDataParallel, results2json, coco_eval +from mmdet.core import results2json, coco_eval from mmdet.datasets import collate, build_dataloader from mmdet.models import build_detector, detectors diff --git a/tools/train.py b/tools/train.py index f778a777285..237ec2b21f5 100644 --- a/tools/train.py +++ b/tools/train.py @@ -9,10 +9,10 @@ import torch from mmcv import Config from mmcv.runner import Runner, obj_from_dict, DistSamplerSeedHook +from mmcv.parallel import MMDataParallel, MMDistributedDataParallel from mmdet import datasets, __version__ -from mmdet.core import (init_dist, DistOptimizerHook, MMDataParallel, - MMDistributedDataParallel, CocoDistEvalRecallHook, +from mmdet.core import (init_dist, DistOptimizerHook, CocoDistEvalRecallHook, CocoDistEvalmAPHook) from mmdet.datasets import build_dataloader from mmdet.models import build_detector, RPN From 61340ba2d597e79ebc7d1b53e424fa5ae88ea64d Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Mon, 8 Oct 2018 23:49:08 +0800 Subject: [PATCH 58/81] rename config files --- configs/{r50_fpn_frcnn_1x.py => faster_rcnn_r50_fpn_1x.py} | 4 ++-- configs/{r50_fpn_maskrcnn_1x.py => mask_rcnn_r50_fpn_1x.py} | 4 ++-- configs/{r50_fpn_rpn_1x.py => rpn_r50_fpn_1x.py} | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) rename configs/{r50_fpn_frcnn_1x.py => faster_rcnn_r50_fpn_1x.py} (97%) rename configs/{r50_fpn_maskrcnn_1x.py => mask_rcnn_r50_fpn_1x.py} (97%) rename configs/{r50_fpn_rpn_1x.py => rpn_r50_fpn_1x.py} (96%) diff --git a/configs/r50_fpn_frcnn_1x.py b/configs/faster_rcnn_r50_fpn_1x.py similarity index 97% rename from configs/r50_fpn_frcnn_1x.py rename to configs/faster_rcnn_r50_fpn_1x.py index d5547ee1001..f4803f0b045 100644 --- a/configs/r50_fpn_frcnn_1x.py +++ b/configs/faster_rcnn_r50_fpn_1x.py @@ -134,7 +134,7 @@ interval=50, hooks=[ dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook', log_dir=work_dir + '/log') + # dict(type='TensorboardLoggerHook') ]) # yapf:enable # runtime settings @@ -142,7 +142,7 @@ device_ids = range(8) dist_params = dict(backend='nccl') log_level = 'INFO' -work_dir = './work_dirs/fpn_faster_rcnn_r50_1x' +work_dir = './work_dirs/faster_rcnn_r50_fpn_1x' load_from = None resume_from = None workflow = [('train', 1)] diff --git a/configs/r50_fpn_maskrcnn_1x.py b/configs/mask_rcnn_r50_fpn_1x.py similarity index 97% rename from configs/r50_fpn_maskrcnn_1x.py rename to configs/mask_rcnn_r50_fpn_1x.py index 09d2fb92231..4760821e244 100644 --- a/configs/r50_fpn_maskrcnn_1x.py +++ b/configs/mask_rcnn_r50_fpn_1x.py @@ -147,7 +147,7 @@ interval=50, hooks=[ dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook', log_dir=work_dir + '/log') + # dict(type='TensorboardLoggerHook') ]) # yapf:enable # runtime settings @@ -155,7 +155,7 @@ device_ids = range(8) dist_params = dict(backend='nccl') log_level = 'INFO' -work_dir = './work_dirs/fpn_mask_rcnn_r50_1x' +work_dir = './work_dirs/mask_rcnn_r50_fpn_1x' load_from = None resume_from = None workflow = [('train', 1)] diff --git a/configs/r50_fpn_rpn_1x.py b/configs/rpn_r50_fpn_1x.py similarity index 96% rename from configs/r50_fpn_rpn_1x.py rename to configs/rpn_r50_fpn_1x.py index d2f28c2dfe6..4e45eb9e41b 100644 --- a/configs/r50_fpn_rpn_1x.py +++ b/configs/rpn_r50_fpn_1x.py @@ -105,14 +105,14 @@ interval=50, hooks=[ dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook', log_dir=work_dir + '/log') + # dict(type='TensorboardLoggerHook') ]) # yapf:enable # runtime settings total_epochs = 12 dist_params = dict(backend='nccl') log_level = 'INFO' -work_dir = './work_dirs/fpn_rpn_r50_1x' +work_dir = './work_dirs/rpn_r50_fpn_1x' load_from = None resume_from = None workflow = [('train', 1)] From 64f812ecd7271bb1b1afe0b8c435c2d2d4c42727 Mon Sep 17 00:00:00 2001 From: pangjm Date: Wed, 10 Oct 2018 14:05:59 +0800 Subject: [PATCH 59/81] fix fast rcnn bugs --- mmdet/datasets/coco.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/mmdet/datasets/coco.py b/mmdet/datasets/coco.py index 3cd0a6d5ca2..9049f1af970 100644 --- a/mmdet/datasets/coco.py +++ b/mmdet/datasets/coco.py @@ -53,8 +53,14 @@ def __init__(self, # color channel order and normalize configs self.img_norm_cfg = img_norm_cfg # proposals - self.proposals = mmcv.load( - proposal_file) if proposal_file is not None else None + # TODO: revise _filter_imgs to be more flexible + if proposal_file is not None: + self.proposals = mmcv.load(proposal_file) + ori_ids = self.coco.getImgIds() + sorted_idx = [ori_ids.index(id) for id in self.img_ids] + self.proposals = [self.proposals[idx] for idx in sorted_idx] + else: + self.proposals = None self.num_max_proposals = num_max_proposals # flip ratio self.flip_ratio = flip_ratio @@ -271,7 +277,8 @@ def prepare_single(img, scale, flip, proposal=None): scale_factor=scale_factor, flip=flip) if proposal is not None: - _proposal = self.bbox_transform(proposal, scale_factor, flip) + _proposal = self.bbox_transform(proposal, img_shape, + scale_factor, flip) _proposal = to_tensor(_proposal) else: _proposal = None From 06125a48527e8270663263aae7a65ecc142c681e Mon Sep 17 00:00:00 2001 From: pangjm Date: Wed, 10 Oct 2018 14:06:27 +0800 Subject: [PATCH 60/81] fix test tools bugs --- tools/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/test.py b/tools/test.py index 3b1ce2d2e04..2552e7af787 100644 --- a/tools/test.py +++ b/tools/test.py @@ -3,11 +3,11 @@ import torch import mmcv from mmcv.runner import load_checkpoint, parallel_test, obj_from_dict -from mmcv.parallel import scatter, MMDataParallel +from mmcv.parallel import scatter, collate, MMDataParallel from mmdet import datasets from mmdet.core import results2json, coco_eval -from mmdet.datasets import collate, build_dataloader +from mmdet.datasets import build_dataloader from mmdet.models import build_detector, detectors From 2988e04d80dc16f10a4d6945cd8e188d75800bea Mon Sep 17 00:00:00 2001 From: pangjm Date: Wed, 10 Oct 2018 14:09:13 +0800 Subject: [PATCH 61/81] add fast rcnn api & fix minor bugs --- mmdet/models/detectors/__init__.py | 3 ++- mmdet/models/detectors/fast_rcnn.py | 25 +++++++++++++++++++++++++ mmdet/models/detectors/two_stage.py | 3 ++- 3 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 mmdet/models/detectors/fast_rcnn.py diff --git a/mmdet/models/detectors/__init__.py b/mmdet/models/detectors/__init__.py index b8914c1e5d3..29a64dd09b6 100644 --- a/mmdet/models/detectors/__init__.py +++ b/mmdet/models/detectors/__init__.py @@ -1,6 +1,7 @@ from .base import BaseDetector from .rpn import RPN +from .faster_rcnn import FastRCNN from .faster_rcnn import FasterRCNN from .mask_rcnn import MaskRCNN -__all__ = ['BaseDetector', 'RPN', 'FasterRCNN', 'MaskRCNN'] +__all__ = ['BaseDetector', 'RPN', 'FastRCNN', 'FasterRCNN', 'MaskRCNN'] diff --git a/mmdet/models/detectors/fast_rcnn.py b/mmdet/models/detectors/fast_rcnn.py new file mode 100644 index 00000000000..0dbf17a9ab9 --- /dev/null +++ b/mmdet/models/detectors/fast_rcnn.py @@ -0,0 +1,25 @@ +from .two_stage import TwoStageDetector + + +class FastRCNN(TwoStageDetector): + + def __init__(self, + backbone, + neck, + bbox_roi_extractor, + bbox_head, + train_cfg, + test_cfg, + mask_roi_extractor=None, + mask_head=None, + pretrained=None): + super(FastRCNN, self).__init__( + backbone=backbone, + neck=neck, + bbox_roi_extractor=bbox_roi_extractor, + bbox_head=bbox_head, + train_cfg=train_cfg, + test_cfg=test_cfg, + mask_roi_extractor=mask_roi_extractor, + mask_head=mask_head, + pretrained=pretrained) diff --git a/mmdet/models/detectors/two_stage.py b/mmdet/models/detectors/two_stage.py index 8573d83215f..3cd68388790 100644 --- a/mmdet/models/detectors/two_stage.py +++ b/mmdet/models/detectors/two_stage.py @@ -146,7 +146,8 @@ def simple_test(self, img, img_meta, proposals=None, rescale=False): x = self.extract_feat(img) proposal_list = self.simple_test_rpn( - x, img_meta, self.test_cfg.rpn) if proposals is None else proposals + x, img_meta, + self.test_cfg.rpn) if proposals is None else proposals[0] det_bboxes, det_labels = self.simple_test_bboxes( x, img_meta, proposal_list, self.test_cfg.rcnn, rescale=rescale) From 1c49e6728085a2078663f8ee978e60ef3029e6c7 Mon Sep 17 00:00:00 2001 From: pangjm Date: Wed, 10 Oct 2018 14:14:59 +0800 Subject: [PATCH 62/81] minor ifx --- mmdet/models/detectors/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mmdet/models/detectors/__init__.py b/mmdet/models/detectors/__init__.py index 29a64dd09b6..c911d1723d1 100644 --- a/mmdet/models/detectors/__init__.py +++ b/mmdet/models/detectors/__init__.py @@ -1,6 +1,6 @@ from .base import BaseDetector from .rpn import RPN -from .faster_rcnn import FastRCNN +from .fast_rcnn import FastRCNN from .faster_rcnn import FasterRCNN from .mask_rcnn import MaskRCNN From 1b79f2c1e7f7e530d26041e1700b9aee5ae19505 Mon Sep 17 00:00:00 2001 From: pangjm Date: Wed, 10 Oct 2018 14:47:08 +0800 Subject: [PATCH 63/81] revise fast test & fix aug test bug --- mmdet/models/detectors/fast_rcnn.py | 39 ++++++++++++++++++++------- mmdet/models/detectors/test_mixins.py | 9 +++++-- mmdet/models/detectors/two_stage.py | 2 +- 3 files changed, 38 insertions(+), 12 deletions(-) diff --git a/mmdet/models/detectors/fast_rcnn.py b/mmdet/models/detectors/fast_rcnn.py index 0dbf17a9ab9..fd80a87f69d 100644 --- a/mmdet/models/detectors/fast_rcnn.py +++ b/mmdet/models/detectors/fast_rcnn.py @@ -14,12 +14,33 @@ def __init__(self, mask_head=None, pretrained=None): super(FastRCNN, self).__init__( - backbone=backbone, - neck=neck, - bbox_roi_extractor=bbox_roi_extractor, - bbox_head=bbox_head, - train_cfg=train_cfg, - test_cfg=test_cfg, - mask_roi_extractor=mask_roi_extractor, - mask_head=mask_head, - pretrained=pretrained) + backbone=backbone, + neck=neck, + bbox_roi_extractor=bbox_roi_extractor, + bbox_head=bbox_head, + train_cfg=train_cfg, + test_cfg=test_cfg, + mask_roi_extractor=mask_roi_extractor, + mask_head=mask_head, + pretrained=pretrained) + + def forward_test(self, imgs, img_metas, proposals, **kwargs): + for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: + if not isinstance(var, list): + raise TypeError('{} must be a list, but got {}'.format( + name, type(var))) + + num_augs = len(imgs) + if num_augs != len(img_metas): + raise ValueError( + 'num of augmentations ({}) != num of image meta ({})'.format( + len(imgs), len(img_metas))) + # TODO: remove the restriction of imgs_per_gpu == 1 when prepared + imgs_per_gpu = imgs[0].size(0) + assert imgs_per_gpu == 1 + + if num_augs == 1: + return self.simple_test(imgs[0], img_metas[0], proposals[0], + **kwargs) + else: + return self.aug_test(imgs, img_metas, proposals, **kwargs) diff --git a/mmdet/models/detectors/test_mixins.py b/mmdet/models/detectors/test_mixins.py index 77ba244f1a3..38136f47545 100644 --- a/mmdet/models/detectors/test_mixins.py +++ b/mmdet/models/detectors/test_mixins.py @@ -135,6 +135,11 @@ def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels): ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head.get_seg_masks( - merged_masks, det_bboxes, det_labels, self.test_cfg.rcnn, - ori_shape) + merged_masks, + det_bboxes, + det_labels, + self.test_cfg.rcnn, + ori_shape, + scale_factor=1.0, + rescale=False) return segm_result diff --git a/mmdet/models/detectors/two_stage.py b/mmdet/models/detectors/two_stage.py index 3cd68388790..b2f2839f93c 100644 --- a/mmdet/models/detectors/two_stage.py +++ b/mmdet/models/detectors/two_stage.py @@ -147,7 +147,7 @@ def simple_test(self, img, img_meta, proposals=None, rescale=False): proposal_list = self.simple_test_rpn( x, img_meta, - self.test_cfg.rpn) if proposals is None else proposals[0] + self.test_cfg.rpn) if proposals is None else proposals det_bboxes, det_labels = self.simple_test_bboxes( x, img_meta, proposal_list, self.test_cfg.rcnn, rescale=rescale) From 2819a74b23c2b844be8140fc6b75a2eb1a07b30b Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Wed, 10 Oct 2018 20:05:11 +0800 Subject: [PATCH 64/81] update resnet backbone --- configs/faster_rcnn_r50_fpn_1x.py | 2 +- configs/mask_rcnn_r50_fpn_1x.py | 2 +- configs/rpn_r50_fpn_1x.py | 2 +- mmdet/models/backbones/__init__.py | 4 +- mmdet/models/backbones/resnet.py | 144 ++++++++++++----------------- 5 files changed, 63 insertions(+), 91 deletions(-) diff --git a/configs/faster_rcnn_r50_fpn_1x.py b/configs/faster_rcnn_r50_fpn_1x.py index f4803f0b045..b15405e0997 100644 --- a/configs/faster_rcnn_r50_fpn_1x.py +++ b/configs/faster_rcnn_r50_fpn_1x.py @@ -3,7 +3,7 @@ type='FasterRCNN', pretrained='modelzoo://resnet50', backbone=dict( - type='resnet', + type='ResNet', depth=50, num_stages=4, out_indices=(0, 1, 2, 3), diff --git a/configs/mask_rcnn_r50_fpn_1x.py b/configs/mask_rcnn_r50_fpn_1x.py index 4760821e244..e2d47217cc4 100644 --- a/configs/mask_rcnn_r50_fpn_1x.py +++ b/configs/mask_rcnn_r50_fpn_1x.py @@ -3,7 +3,7 @@ type='MaskRCNN', pretrained='modelzoo://resnet50', backbone=dict( - type='resnet', + type='ResNet', depth=50, num_stages=4, out_indices=(0, 1, 2, 3), diff --git a/configs/rpn_r50_fpn_1x.py b/configs/rpn_r50_fpn_1x.py index 4e45eb9e41b..7f1b6d0ca39 100644 --- a/configs/rpn_r50_fpn_1x.py +++ b/configs/rpn_r50_fpn_1x.py @@ -3,7 +3,7 @@ type='RPN', pretrained='modelzoo://resnet50', backbone=dict( - type='resnet', + type='ResNet', depth=50, num_stages=4, out_indices=(0, 1, 2, 3), diff --git a/mmdet/models/backbones/__init__.py b/mmdet/models/backbones/__init__.py index 107507ceaf6..0f82f92aad1 100644 --- a/mmdet/models/backbones/__init__.py +++ b/mmdet/models/backbones/__init__.py @@ -1,3 +1,3 @@ -from .resnet import resnet +from .resnet import ResNet -__all__ = ['resnet'] +__all__ = ['ResNet'] diff --git a/mmdet/models/backbones/resnet.py b/mmdet/models/backbones/resnet.py index 371f4f59fec..66684b154b5 100644 --- a/mmdet/models/backbones/resnet.py +++ b/mmdet/models/backbones/resnet.py @@ -1,8 +1,9 @@ import logging -import math import torch.nn as nn import torch.utils.checkpoint as cp + +from mmcv.cnn import constant_init, kaiming_init from mmcv.runner import load_checkpoint @@ -27,7 +28,8 @@ def __init__(self, stride=1, dilation=1, downsample=None, - style='pytorch'): + style='pytorch', + with_cp=False): super(BasicBlock, self).__init__() self.conv1 = conv3x3(inplanes, planes, stride, dilation) self.bn1 = nn.BatchNorm2d(planes) @@ -37,6 +39,7 @@ def __init__(self, self.downsample = downsample self.stride = stride self.dilation = dilation + assert not with_cp def forward(self, x): residual = x @@ -69,7 +72,6 @@ def __init__(self, style='pytorch', with_cp=False): """Bottleneck block. - If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is "caffe", the stride-two layer is the first 1x1 conv layer. """ @@ -174,64 +176,73 @@ def make_res_layer(block, return nn.Sequential(*layers) -class ResHead(nn.Module): - - def __init__(self, - block, - num_blocks, - stride=2, - dilation=1, - style='pytorch'): - self.layer4 = make_res_layer( - block, - 1024, - 512, - num_blocks, - stride=stride, - dilation=dilation, - style=style) - - def forward(self, x): - return self.layer4(x) +class ResNet(nn.Module): + """ResNet backbone. + Args: + depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. + num_stages (int): Resnet stages, normally 4. + strides (Sequence[int]): Strides of the first block of each stage. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. + frozen_stages (int): Stages to be frozen (all param fixed). -1 means + not freezing any parameters. + bn_eval (bool): Whether to set BN layers to eval mode, namely, freeze + running stats (mean and var). + bn_frozen (bool): Whether to freeze weight and bias of BN layers. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + """ -class ResNet(nn.Module): + arch_settings = { + 18: (BasicBlock, (2, 2, 2, 2)), + 34: (BasicBlock, (3, 4, 6, 3)), + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)) + } def __init__(self, - block, - layers, + depth, + num_stages=4, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1), out_indices=(0, 1, 2, 3), - frozen_stages=-1, style='pytorch', - sync_bn=False, - with_cp=False, - strict_frozen=False): + frozen_stages=-1, + bn_eval=True, + bn_frozen=False, + with_cp=False): super(ResNet, self).__init__() - if not len(layers) == len(strides) == len(dilations): - raise ValueError( - 'The number of layers, strides and dilations must be equal, ' - 'but found have {} layers, {} strides and {} dilations'.format( - len(layers), len(strides), len(dilations))) - assert max(out_indices) < len(layers) + if depth not in self.arch_settings: + raise KeyError('invalid depth {} for resnet'.format(depth)) + assert num_stages >= 1 and num_stages <= 4 + block, stage_blocks = self.arch_settings[depth] + stage_blocks = stage_blocks[:num_stages] + assert len(strides) == len(dilations) == num_stages + assert max(out_indices) < num_stages + self.out_indices = out_indices - self.frozen_stages = frozen_stages self.style = style - self.sync_bn = sync_bn + self.frozen_stages = frozen_stages + self.bn_eval = bn_eval + self.bn_frozen = bn_frozen + self.with_cp = with_cp + self.inplanes = 64 self.conv1 = nn.Conv2d( 3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) - self.res_layers = [] - for i, num_blocks in enumerate(layers): + self.res_layers = [] + for i, num_blocks in enumerate(stage_blocks): stride = strides[i] dilation = dilations[i] - - layer_name = 'layer{}'.format(i + 1) planes = 64 * 2**i res_layer = make_res_layer( block, @@ -243,12 +254,11 @@ def __init__(self, style=self.style, with_cp=with_cp) self.inplanes = planes * block.expansion + layer_name = 'layer{}'.format(i + 1) self.add_module(layer_name, res_layer) self.res_layers.append(layer_name) - self.feat_dim = block.expansion * 64 * 2**(len(layers) - 1) - self.with_cp = with_cp - self.strict_frozen = strict_frozen + self.feat_dim = block.expansion * 64 * 2**(len(stage_blocks) - 1) def init_weights(self, pretrained=None): if isinstance(pretrained, str): @@ -257,11 +267,9 @@ def init_weights(self, pretrained=None): elif pretrained is None: for m in self.modules(): if isinstance(m, nn.Conv2d): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - nn.init.normal_(m.weight, 0, math.sqrt(2. / n)) + kaiming_init(m) elif isinstance(m, nn.BatchNorm2d): - nn.init.constant_(m.weight, 1) - nn.init.constant_(m.bias, 0) + constant_init(m, 1) else: raise TypeError('pretrained must be a str or None') @@ -283,11 +291,11 @@ def forward(self, x): def train(self, mode=True): super(ResNet, self).train(mode) - if not self.sync_bn: + if self.bn_eval: for m in self.modules(): if isinstance(m, nn.BatchNorm2d): m.eval() - if self.strict_frozen: + if self.bn_frozen: for params in m.parameters(): params.requires_grad = False if mode and self.frozen_stages >= 0: @@ -303,39 +311,3 @@ def train(self, mode=True): mod.eval() for param in mod.parameters(): param.requires_grad = False - - -resnet_cfg = { - 18: (BasicBlock, (2, 2, 2, 2)), - 34: (BasicBlock, (3, 4, 6, 3)), - 50: (Bottleneck, (3, 4, 6, 3)), - 101: (Bottleneck, (3, 4, 23, 3)), - 152: (Bottleneck, (3, 8, 36, 3)) -} - - -def resnet(depth, - num_stages=4, - strides=(1, 2, 2, 2), - dilations=(1, 1, 1, 1), - out_indices=(2, ), - frozen_stages=-1, - style='pytorch', - sync_bn=False, - with_cp=False, - strict_frozen=False): - """Constructs a ResNet model. - - Args: - depth (int): depth of resnet, from {18, 34, 50, 101, 152} - num_stages (int): num of resnet stages, normally 4 - strides (list): strides of the first block of each stage - dilations (list): dilation of each stage - out_indices (list): output from which stages - """ - if depth not in resnet_cfg: - raise KeyError('invalid depth {} for resnet'.format(depth)) - block, layers = resnet_cfg[depth] - model = ResNet(block, layers[:num_stages], strides, dilations, out_indices, - frozen_stages, style, sync_bn, with_cp, strict_frozen) - return model From cb1525144c766c1362f1cf65421890e25f8f859c Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Wed, 10 Oct 2018 20:05:53 +0800 Subject: [PATCH 65/81] import FastRCNN to higher level, update hooks --- mmdet/core/evaluation/eval_hooks.py | 4 ++++ mmdet/models/__init__.py | 10 ++++++---- mmdet/models/detectors/__init__.py | 6 +++++- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/mmdet/core/evaluation/eval_hooks.py b/mmdet/core/evaluation/eval_hooks.py index a83b80dbfe7..bec25eff6d7 100644 --- a/mmdet/core/evaluation/eval_hooks.py +++ b/mmdet/core/evaluation/eval_hooks.py @@ -55,6 +55,10 @@ def before_run(self, runner): shutil.rmtree(self.lock_dir) mmcv.mkdir_or_exist(self.lock_dir) + def after_run(self, runner): + if runner.rank == 0: + shutil.rmtree(self.lock_dir) + def after_train_epoch(self, runner): if not self.every_n_epochs(runner, self.interval): return diff --git a/mmdet/models/__init__.py b/mmdet/models/__init__.py index aca6399e45e..8232fda616c 100644 --- a/mmdet/models/__init__.py +++ b/mmdet/models/__init__.py @@ -1,9 +1,11 @@ -from .detectors import BaseDetector, RPN, FasterRCNN, MaskRCNN +from .detectors import (BaseDetector, TwoStageDetector, RPN, FastRCNN, + FasterRCNN, MaskRCNN) from .builder import (build_neck, build_rpn_head, build_roi_extractor, build_bbox_head, build_mask_head, build_detector) __all__ = [ - 'BaseDetector', 'RPN', 'FasterRCNN', 'MaskRCNN', 'build_backbone', - 'build_neck', 'build_rpn_head', 'build_roi_extractor', 'build_bbox_head', - 'build_mask_head', 'build_detector' + 'BaseDetector', 'TwoStageDetector', 'RPN', 'FastRCNN', 'FasterRCNN', + 'MaskRCNN', 'build_backbone', 'build_neck', 'build_rpn_head', + 'build_roi_extractor', 'build_bbox_head', 'build_mask_head', + 'build_detector' ] diff --git a/mmdet/models/detectors/__init__.py b/mmdet/models/detectors/__init__.py index c911d1723d1..a784d5f3456 100644 --- a/mmdet/models/detectors/__init__.py +++ b/mmdet/models/detectors/__init__.py @@ -1,7 +1,11 @@ from .base import BaseDetector +from .two_stage import TwoStageDetector from .rpn import RPN from .fast_rcnn import FastRCNN from .faster_rcnn import FasterRCNN from .mask_rcnn import MaskRCNN -__all__ = ['BaseDetector', 'RPN', 'FastRCNN', 'FasterRCNN', 'MaskRCNN'] +__all__ = [ + 'BaseDetector', 'TwoStageDetector', 'RPN', 'FastRCNN', 'FasterRCNN', + 'MaskRCNN' +] From 8418887ef1b26b167063034f559e163cfd77cb20 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Wed, 10 Oct 2018 20:16:04 +0800 Subject: [PATCH 66/81] setup travis --- .travis.yml | 13 +++++++++++++ setup.py | 3 ++- 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000000..dd089151133 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,13 @@ +dist: trusty +language: python + +install: + - pip install flake8 + +python: + - "2.7" + - "3.5" + - "3.6" + +script: + - flake8 \ No newline at end of file diff --git a/setup.py b/setup.py index 7cb44e538e3..1ebe317956f 100644 --- a/setup.py +++ b/setup.py @@ -106,6 +106,7 @@ def get_version(): setup_requires=['pytest-runner'], tests_require=['pytest'], install_requires=[ - 'numpy', 'matplotlib', 'six', 'terminaltables', 'pycocotools' + 'mmcv', 'numpy', 'matplotlib', 'six', 'terminaltables', + 'pycocotools' ], zip_safe=False) From 470b4d4ce579114de07d2a4ac1cd33cb9a17d463 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Wed, 10 Oct 2018 20:34:08 +0800 Subject: [PATCH 67/81] fix flake8 error in python 2 --- mmdet/core/evaluation/eval_hooks.py | 2 +- mmdet/models/detectors/base.py | 8 +++----- tools/test.py | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/mmdet/core/evaluation/eval_hooks.py b/mmdet/core/evaluation/eval_hooks.py index bec25eff6d7..1402f7f3c73 100644 --- a/mmdet/core/evaluation/eval_hooks.py +++ b/mmdet/core/evaluation/eval_hooks.py @@ -74,7 +74,7 @@ def after_train_epoch(self, runner): # compute output with torch.no_grad(): result = runner.model( - **data_gpu, return_loss=False, rescale=True) + return_loss=False, rescale=True, **data_gpu) results[idx] = result batch_size = runner.world_size diff --git a/mmdet/models/detectors/base.py b/mmdet/models/detectors/base.py index d1b0fce1283..e617b0e306d 100644 --- a/mmdet/models/detectors/base.py +++ b/mmdet/models/detectors/base.py @@ -34,11 +34,9 @@ def extract_feat(self, imgs): pass def extract_feats(self, imgs): - if isinstance(imgs, torch.Tensor): - return self.extract_feat(imgs) - elif isinstance(imgs, list): - for img in imgs: - yield self.extract_feat(img) + assert isinstance(imgs, list) + for img in imgs: + yield self.extract_feat(img) @abstractmethod def forward_train(self, imgs, img_metas, **kwargs): diff --git a/tools/test.py b/tools/test.py index 2552e7af787..e1552e58209 100644 --- a/tools/test.py +++ b/tools/test.py @@ -17,7 +17,7 @@ def single_test(model, data_loader, show=False): prog_bar = mmcv.ProgressBar(len(data_loader.dataset)) for i, data in enumerate(data_loader): with torch.no_grad(): - result = model(**data, return_loss=False, rescale=not show) + result = model(return_loss=False, rescale=not show, **data) results.append(result) if show: From 31dfdebb34633a5cc758ba6274ae257f5089b208 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Wed, 10 Oct 2018 20:37:06 +0800 Subject: [PATCH 68/81] minor fix --- mmdet/models/detectors/base.py | 1 - tools/test.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/mmdet/models/detectors/base.py b/mmdet/models/detectors/base.py index e617b0e306d..6d26dc3a5ab 100644 --- a/mmdet/models/detectors/base.py +++ b/mmdet/models/detectors/base.py @@ -3,7 +3,6 @@ import mmcv import numpy as np -import torch import torch.nn as nn from mmdet.core import tensor2imgs, get_classes diff --git a/tools/test.py b/tools/test.py index e1552e58209..b322bb203f4 100644 --- a/tools/test.py +++ b/tools/test.py @@ -32,7 +32,7 @@ def single_test(model, data_loader, show=False): def _data_func(data, device_id): data = scatter(collate([data], samples_per_gpu=1), [device_id])[0] - return dict(**data, return_loss=False, rescale=True) + return dict(return_loss=False, rescale=True, **data) def parse_args(): From 164562ee503a9d9eea7d32b877a627924f7dfb44 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Wed, 10 Oct 2018 21:44:55 +0800 Subject: [PATCH 69/81] bug fix for proposal evaluation --- mmdet/core/evaluation/coco_utils.py | 4 ++-- tools/test.py | 17 +++++++++++++---- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/mmdet/core/evaluation/coco_utils.py b/mmdet/core/evaluation/coco_utils.py index e9fdb41649c..0ed056b2e6d 100644 --- a/mmdet/core/evaluation/coco_utils.py +++ b/mmdet/core/evaluation/coco_utils.py @@ -16,8 +16,8 @@ def coco_eval(result_file, result_types, coco, max_dets=(100, 300, 1000)): coco = COCO(coco) assert isinstance(coco, COCO) - if res_type == 'proposal_fast': - ar = fast_eval_recall(result_file, coco, max_dets) + if result_types == ['proposal_fast']: + ar = fast_eval_recall(result_file, coco, np.array(max_dets)) for i, num in enumerate(max_dets): print('AR@{}\t= {:.4f}'.format(num, ar[i])) return diff --git a/tools/test.py b/tools/test.py index b322bb203f4..8552561b623 100644 --- a/tools/test.py +++ b/tools/test.py @@ -55,6 +55,9 @@ def parse_args(): def main(): args = parse_args() + if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): + raise ValueError('The output file must be a pkl file.') + cfg = mmcv.Config.fromfile(args.config) cfg.model.pretrained = None cfg.data.test.test_mode = True @@ -82,11 +85,17 @@ def main(): dataset, _data_func, range(args.gpus)) if args.out: + print('writing results to {}'.format(args.out)) mmcv.dump(outputs, args.out) - if args.eval: - json_file = args.out + '.json' - results2json(dataset, outputs, json_file) - coco_eval(json_file, args.eval, dataset.coco) + eval_types = args.eval + if eval_types: + print('Starting evaluate {}'.format(' and '.join(eval_types))) + if eval_types == ['proposal_fast']: + result_file = args.out + else: + result_file = args.out + '.json' + results2json(dataset, outputs, result_file) + coco_eval(result_file, eval_types, dataset.coco) if __name__ == '__main__': From 3ce9f1222a092977ad2d37c21b2f00d6be9ead39 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Wed, 10 Oct 2018 21:50:57 +0800 Subject: [PATCH 70/81] add an argument to specify process per gpu --- tools/test.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tools/test.py b/tools/test.py index 8552561b623..dc8dc5e85ce 100644 --- a/tools/test.py +++ b/tools/test.py @@ -39,7 +39,13 @@ def parse_args(): parser = argparse.ArgumentParser(description='MMDet test detector') parser.add_argument('config', help='test config file path') parser.add_argument('checkpoint', help='checkpoint file') - parser.add_argument('--gpus', default=1, type=int) + parser.add_argument( + '--gpus', default=1, type=int, help='GPU number used for testing') + parser.add_argument( + '--proc_per_gpu', + default=1, + type=int, + help='Number of processes per GPU') parser.add_argument('--out', help='output result file') parser.add_argument( '--eval', @@ -81,8 +87,14 @@ def main(): model_args = cfg.model.copy() model_args.update(train_cfg=None, test_cfg=cfg.test_cfg) model_type = getattr(detectors, model_args.pop('type')) - outputs = parallel_test(model_type, model_args, args.checkpoint, - dataset, _data_func, range(args.gpus)) + outputs = parallel_test( + model_type, + model_args, + args.checkpoint, + dataset, + _data_func, + range(args.gpus), + workers_per_gpu=args.proc_per_gpu) if args.out: print('writing results to {}'.format(args.out)) From 52f7840ec046153cac284f84f519f9b8d759a637 Mon Sep 17 00:00:00 2001 From: pangjm Date: Thu, 11 Oct 2018 16:02:34 +0800 Subject: [PATCH 71/81] update fast rcnn configs --- configs/fast_mask_rcnn_r50_fpn_1x.py | 132 +++++++++++++++++++++++++++ configs/fast_rcnn_r50_fpn_1x.py | 118 ++++++++++++++++++++++++ configs/faster_rcnn_r50_fpn_1x.py | 3 +- configs/mask_rcnn_r50_fpn_1x.py | 3 +- 4 files changed, 252 insertions(+), 4 deletions(-) create mode 100644 configs/fast_mask_rcnn_r50_fpn_1x.py create mode 100644 configs/fast_rcnn_r50_fpn_1x.py diff --git a/configs/fast_mask_rcnn_r50_fpn_1x.py b/configs/fast_mask_rcnn_r50_fpn_1x.py new file mode 100644 index 00000000000..1878a2762a4 --- /dev/null +++ b/configs/fast_mask_rcnn_r50_fpn_1x.py @@ -0,0 +1,132 @@ +# model settings +model = dict( + type='FastRCNN', + pretrained='modelzoo://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='SharedFCRoIHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=False), + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_head=dict( + type='FCNMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=81)) +# model training and testing settings +train_cfg = dict( + rcnn=dict( + mask_size=28, + pos_iou_thr=0.5, + neg_iou_thr=0.5, + crowd_thr=1.1, + roi_batch_size=512, + add_gt_as_proposals=True, + pos_fraction=0.25, + pos_balance_sampling=False, + neg_pos_ub=512, + neg_balance_thr=0, + min_pos_iou=0.5, + pos_weight=-1, + debug=False)) +test_cfg = dict( + rcnn=dict( + score_thr=0.05, max_per_img=100, nms_thr=0.5, mask_thr_binary=0.5)) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + proposal_file=data_root + 'proposals/train2017_r50_fpn_rpn_1x.pkl', + flip_ratio=0.5, + with_mask=True, + with_crowd=True, + with_label=True), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + proposal_file=data_root + 'proposals/val2017_r50_fpn_rpn_1x.pkl', + size_divisor=32, + flip_ratio=0, + with_mask=True, + with_crowd=True, + with_label=True), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + proposal_file=data_root + 'proposals/val2017_r50_fpn_rpn_1x.pkl', + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_label=False, + test_mode=True)) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[8, 11]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=20, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +total_epochs = 12 +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/fast_mask_rcnn_r50_fpn_1x' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/configs/fast_rcnn_r50_fpn_1x.py b/configs/fast_rcnn_r50_fpn_1x.py new file mode 100644 index 00000000000..bdff0523c8d --- /dev/null +++ b/configs/fast_rcnn_r50_fpn_1x.py @@ -0,0 +1,118 @@ +# model settings +model = dict( + type='FastRCNN', + pretrained='modelzoo://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='SharedFCRoIHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=False)) +# model training and testing settings +train_cfg = dict( + rcnn=dict( + pos_iou_thr=0.5, + neg_iou_thr=0.5, + crowd_thr=1.1, + roi_batch_size=512, + add_gt_as_proposals=True, + pos_fraction=0.25, + pos_balance_sampling=False, + neg_pos_ub=512, + neg_balance_thr=0, + min_pos_iou=0.5, + pos_weight=-1, + debug=False)) +test_cfg = dict(rcnn=dict(score_thr=0.05, max_per_img=100, nms_thr=0.5)) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + proposal_file=data_root + 'proposals/train2017_r50_fpn_rpn_1x.pkl', + flip_ratio=0.5, + with_mask=False, + with_crowd=True, + with_label=True), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + proposal_file=data_root + 'proposals/val2017_r50_fpn_rpn_1x.pkl', + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_crowd=True, + with_label=True), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + proposal_file=data_root + 'proposals/val2017_r50_fpn_rpn_1x.pkl', + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_label=False, + test_mode=True)) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[8, 11]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=20, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +total_epochs = 12 +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/fast_rcnn_r50_fpn_1x' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/configs/faster_rcnn_r50_fpn_1x.py b/configs/faster_rcnn_r50_fpn_1x.py index b15405e0997..1c06c4cca7e 100644 --- a/configs/faster_rcnn_r50_fpn_1x.py +++ b/configs/faster_rcnn_r50_fpn_1x.py @@ -65,7 +65,7 @@ pos_balance_sampling=False, neg_pos_ub=512, neg_balance_thr=0, - min_pos_iou=1.1, + min_pos_iou=0.5, pos_weight=-1, debug=False)) test_cfg = dict( @@ -139,7 +139,6 @@ # yapf:enable # runtime settings total_epochs = 12 -device_ids = range(8) dist_params = dict(backend='nccl') log_level = 'INFO' work_dir = './work_dirs/faster_rcnn_r50_fpn_1x' diff --git a/configs/mask_rcnn_r50_fpn_1x.py b/configs/mask_rcnn_r50_fpn_1x.py index e2d47217cc4..8868cf6ebd9 100644 --- a/configs/mask_rcnn_r50_fpn_1x.py +++ b/configs/mask_rcnn_r50_fpn_1x.py @@ -77,7 +77,7 @@ pos_balance_sampling=False, neg_pos_ub=512, neg_balance_thr=0, - min_pos_iou=1.1, + min_pos_iou=0.5, pos_weight=-1, debug=False)) test_cfg = dict( @@ -152,7 +152,6 @@ # yapf:enable # runtime settings total_epochs = 12 -device_ids = range(8) dist_params = dict(backend='nccl') log_level = 'INFO' work_dir = './work_dirs/mask_rcnn_r50_fpn_1x' From 003ffbe3dcb9d604e8aeb1b9e066f145e2a99a4a Mon Sep 17 00:00:00 2001 From: pangjm Date: Thu, 11 Oct 2018 16:31:48 +0800 Subject: [PATCH 72/81] minor revision --- configs/fast_mask_rcnn_r50_fpn_1x.py | 2 +- configs/fast_rcnn_r50_fpn_1x.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/fast_mask_rcnn_r50_fpn_1x.py b/configs/fast_mask_rcnn_r50_fpn_1x.py index 1878a2762a4..4281c161813 100644 --- a/configs/fast_mask_rcnn_r50_fpn_1x.py +++ b/configs/fast_mask_rcnn_r50_fpn_1x.py @@ -116,7 +116,7 @@ checkpoint_config = dict(interval=1) # yapf:disable log_config = dict( - interval=20, + interval=50, hooks=[ dict(type='TextLoggerHook'), # dict(type='TensorboardLoggerHook') diff --git a/configs/fast_rcnn_r50_fpn_1x.py b/configs/fast_rcnn_r50_fpn_1x.py index bdff0523c8d..47ec415cf0a 100644 --- a/configs/fast_rcnn_r50_fpn_1x.py +++ b/configs/fast_rcnn_r50_fpn_1x.py @@ -102,7 +102,7 @@ checkpoint_config = dict(interval=1) # yapf:disable log_config = dict( - interval=20, + interval=50, hooks=[ dict(type='TextLoggerHook'), # dict(type='TensorboardLoggerHook') From bce7c8c88ba6365b02126713cbd4e8f9140120e3 Mon Sep 17 00:00:00 2001 From: myownskyW7 <727032989@qq.com> Date: Thu, 11 Oct 2018 23:30:00 +0800 Subject: [PATCH 73/81] add high level api --- mmdet/api/__init__.py | 4 ++ mmdet/api/inference.py | 54 ++++++++++++++++++ mmdet/api/train.py | 120 +++++++++++++++++++++++++++++++++++++++ tools/train.py | 125 ++++------------------------------------- 4 files changed, 188 insertions(+), 115 deletions(-) create mode 100644 mmdet/api/__init__.py create mode 100644 mmdet/api/inference.py create mode 100644 mmdet/api/train.py diff --git a/mmdet/api/__init__.py b/mmdet/api/__init__.py new file mode 100644 index 00000000000..970492f6306 --- /dev/null +++ b/mmdet/api/__init__.py @@ -0,0 +1,4 @@ +from .train import train_detector +from .inference import inference_detector + +__all__ = ['train_detector', 'inference_detector'] diff --git a/mmdet/api/inference.py b/mmdet/api/inference.py new file mode 100644 index 00000000000..47b7de39b37 --- /dev/null +++ b/mmdet/api/inference.py @@ -0,0 +1,54 @@ +import mmcv +import numpy as np +import torch + +from mmdet.datasets import to_tensor +from mmdet.datasets.transforms import ImageTransform +from mmdet.core import get_classes + + +def _prepare_data(img, img_transform, cfg, device): + ori_shape = img.shape + img, img_shape, pad_shape, scale_factor = img_transform( + img, scale=cfg.data.test.img_scale) + img = to_tensor(img).to(device).unsqueeze(0) + img_meta = [ + dict( + ori_shape=ori_shape, + img_shape=img_shape, + pad_shape=pad_shape, + scale_factor=scale_factor, + flip=False) + ] + return dict(img=[img], img_meta=[img_meta]) + + +def inference_detector(model, imgs, cfg, device='cuda:0'): + + imgs = imgs if isinstance(imgs, list) else [imgs] + img_transform = ImageTransform( + **cfg.img_norm_cfg, size_divisor=cfg.data.test.size_divisor) + model = model.to(device) + model.eval() + for img in imgs: + img = mmcv.imread(img) + data = _prepare_data(img, img_transform, cfg, device) + with torch.no_grad(): + result = model(**data, return_loss=False, rescale=True) + yield result + + +def show_result(img, result, dataset='coco', score_thr=0.3): + class_names = get_classes(dataset) + labels = [ + np.full(bbox.shape[0], i, dtype=np.int32) + for i, bbox in enumerate(result) + ] + labels = np.concatenate(labels) + bboxes = np.vstack(result) + mmcv.imshow_det_bboxes( + img.copy(), + bboxes, + labels, + class_names=class_names, + score_thr=score_thr) diff --git a/mmdet/api/train.py b/mmdet/api/train.py new file mode 100644 index 00000000000..28469a200b2 --- /dev/null +++ b/mmdet/api/train.py @@ -0,0 +1,120 @@ +from __future__ import division + +import logging +import random +from collections import OrderedDict + +import numpy as np +import torch +from mmcv.runner import Runner, DistSamplerSeedHook +from mmcv.parallel import MMDataParallel, MMDistributedDataParallel + +from mmdet import __version__ +from mmdet.core import (init_dist, DistOptimizerHook, CocoDistEvalRecallHook, + CocoDistEvalmAPHook) +from mmdet.datasets import build_dataloader +from mmdet.models import RPN + + +def parse_losses(losses): + log_vars = OrderedDict() + for loss_name, loss_value in losses.items(): + if isinstance(loss_value, torch.Tensor): + log_vars[loss_name] = loss_value.mean() + elif isinstance(loss_value, list): + log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value) + else: + raise TypeError( + '{} is not a tensor or list of tensors'.format(loss_name)) + + loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key) + + log_vars['loss'] = loss + for name in log_vars: + log_vars[name] = log_vars[name].item() + + return loss, log_vars + + +def batch_processor(model, data, train_mode): + losses = model(**data) + loss, log_vars = parse_losses(losses) + + outputs = dict( + loss=loss, log_vars=log_vars, num_samples=len(data['img'].data)) + + return outputs + + +def get_logger(log_level): + logging.basicConfig( + format='%(asctime)s - %(levelname)s - %(message)s', level=log_level) + logger = logging.getLogger() + return logger + + +def set_random_seed(seed): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + + +def train_detector(model, dataset, cfg): + # save mmdet version in checkpoint as meta data + cfg.checkpoint_config.meta = dict( + mmdet_version=__version__, config=cfg.text) + + logger = get_logger(cfg.log_level) + + # set random seed if specified + if cfg.seed is not None: + logger.info('Set random seed to {}'.format(cfg.seed)) + set_random_seed(cfg.seed) + + # init distributed environment if necessary + if cfg.launcher == 'none': + dist = False + logger.info('Non-distributed training.') + else: + dist = True + init_dist(cfg.launcher, **cfg.dist_params) + if torch.distributed.get_rank() != 0: + logger.setLevel('ERROR') + logger.info('Distributed training.') + + # prepare data loaders + data_loaders = [ + build_dataloader(dataset, cfg.data.imgs_per_gpu, + cfg.data.workers_per_gpu, cfg.gpus, dist) + ] + + # put model on gpus + if dist: + model = MMDistributedDataParallel(model.cuda()) + else: + model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda() + + # build runner + runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, + cfg.log_level) + + # register hooks + optimizer_config = DistOptimizerHook( + **cfg.optimizer_config) if dist else cfg.optimizer_config + runner.register_training_hooks(cfg.lr_config, optimizer_config, + cfg.checkpoint_config, cfg.log_config) + if dist: + runner.register_hook(DistSamplerSeedHook()) + # register eval hooks + if cfg.validate: + if isinstance(model.module, RPN): + runner.register_hook(CocoDistEvalRecallHook(cfg.data.val)) + elif cfg.data.val.type == 'CocoDataset': + runner.register_hook(CocoDistEvalmAPHook(cfg.data.val)) + + if cfg.resume_from: + runner.resume(cfg.resume_from) + elif cfg.load_from: + runner.load_checkpoint(cfg.load_from) + runner.run(data_loaders, cfg.workflow, cfg.total_epochs) \ No newline at end of file diff --git a/tools/train.py b/tools/train.py index 237ec2b21f5..839f27c85fb 100644 --- a/tools/train.py +++ b/tools/train.py @@ -1,65 +1,12 @@ from __future__ import division import argparse -import logging -import random -from collections import OrderedDict - -import numpy as np -import torch from mmcv import Config -from mmcv.runner import Runner, obj_from_dict, DistSamplerSeedHook -from mmcv.parallel import MMDataParallel, MMDistributedDataParallel - -from mmdet import datasets, __version__ -from mmdet.core import (init_dist, DistOptimizerHook, CocoDistEvalRecallHook, - CocoDistEvalmAPHook) -from mmdet.datasets import build_dataloader -from mmdet.models import build_detector, RPN - - -def parse_losses(losses): - log_vars = OrderedDict() - for loss_name, loss_value in losses.items(): - if isinstance(loss_value, torch.Tensor): - log_vars[loss_name] = loss_value.mean() - elif isinstance(loss_value, list): - log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value) - else: - raise TypeError( - '{} is not a tensor or list of tensors'.format(loss_name)) - - loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key) - - log_vars['loss'] = loss - for name in log_vars: - log_vars[name] = log_vars[name].item() - - return loss, log_vars - - -def batch_processor(model, data, train_mode): - losses = model(**data) - loss, log_vars = parse_losses(losses) - - outputs = dict( - loss=loss, log_vars=log_vars, num_samples=len(data['img'].data)) - - return outputs - - -def get_logger(log_level): - logging.basicConfig( - format='%(asctime)s - %(levelname)s - %(message)s', level=log_level) - logger = logging.getLogger() - return logger +from mmcv.runner import obj_from_dict - -def set_random_seed(seed): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - torch.cuda.manual_seed_all(seed) +from mmdet import datasets +from mmdet.api import train_detector +from mmdet.models import build_detector def parse_args(): @@ -86,71 +33,19 @@ def parse_args(): def main(): args = parse_args() - cfg = Config.fromfile(args.config) if args.work_dir is not None: cfg.work_dir = args.work_dir + cfg.validate = args.validate cfg.gpus = args.gpus - # save mmdet version in checkpoint as meta data - cfg.checkpoint_config.meta = dict( - mmdet_version=__version__, config=cfg.text) - - logger = get_logger(cfg.log_level) - - # set random seed if specified - if args.seed is not None: - logger.info('Set random seed to {}'.format(args.seed)) - set_random_seed(args.seed) - - # init distributed environment if necessary - if args.launcher == 'none': - dist = False - logger.info('Non-distributed training.') - else: - dist = True - init_dist(args.launcher, **cfg.dist_params) - if torch.distributed.get_rank() != 0: - logger.setLevel('ERROR') - logger.info('Distributed training.') - - # prepare data loaders - train_dataset = obj_from_dict(cfg.data.train, datasets) - data_loaders = [ - build_dataloader(train_dataset, cfg.data.imgs_per_gpu, - cfg.data.workers_per_gpu, cfg.gpus, dist) - ] - + cfg.seed = args.seed + cfg.launcher = args.launcher + cfg.local_rank = args.local_rank # build model model = build_detector( cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) - if dist: - model = MMDistributedDataParallel(model.cuda()) - else: - model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda() - - # build runner - runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, - cfg.log_level) - - # register hooks - optimizer_config = DistOptimizerHook( - **cfg.optimizer_config) if dist else cfg.optimizer_config - runner.register_training_hooks(cfg.lr_config, optimizer_config, - cfg.checkpoint_config, cfg.log_config) - if dist: - runner.register_hook(DistSamplerSeedHook()) - # register eval hooks - if args.validate: - if isinstance(model.module, RPN): - runner.register_hook(CocoDistEvalRecallHook(cfg.data.val)) - elif cfg.data.val.type == 'CocoDataset': - runner.register_hook(CocoDistEvalmAPHook(cfg.data.val)) - - if cfg.resume_from: - runner.resume(cfg.resume_from) - elif cfg.load_from: - runner.load_checkpoint(cfg.load_from) - runner.run(data_loaders, cfg.workflow, cfg.total_epochs) + train_dataset = obj_from_dict(cfg.data.train, datasets) + train_detector(model, train_dataset, cfg) if __name__ == '__main__': From e2ba8badf154cef53ca1db765b196e225ea013f4 Mon Sep 17 00:00:00 2001 From: myownskyW7 <727032989@qq.com> Date: Thu, 11 Oct 2018 23:35:46 +0800 Subject: [PATCH 74/81] high level api minor bugs fix --- mmdet/api/inference.py | 2 +- mmdet/api/train.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mmdet/api/inference.py b/mmdet/api/inference.py index 47b7de39b37..d452c665a9f 100644 --- a/mmdet/api/inference.py +++ b/mmdet/api/inference.py @@ -27,7 +27,7 @@ def inference_detector(model, imgs, cfg, device='cuda:0'): imgs = imgs if isinstance(imgs, list) else [imgs] img_transform = ImageTransform( - **cfg.img_norm_cfg, size_divisor=cfg.data.test.size_divisor) + size_divisor=cfg.data.test.size_divisor, **cfg.img_norm_cfg) model = model.to(device) model.eval() for img in imgs: diff --git a/mmdet/api/train.py b/mmdet/api/train.py index 28469a200b2..0084cb1f732 100644 --- a/mmdet/api/train.py +++ b/mmdet/api/train.py @@ -117,4 +117,4 @@ def train_detector(model, dataset, cfg): runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) - runner.run(data_loaders, cfg.workflow, cfg.total_epochs) \ No newline at end of file + runner.run(data_loaders, cfg.workflow, cfg.total_epochs) From 383ec3fb2753cf24662206468d65911f30d05543 Mon Sep 17 00:00:00 2001 From: myownskyW7 <727032989@qq.com> Date: Thu, 11 Oct 2018 23:40:40 +0800 Subject: [PATCH 75/81] high level api minor bugs fix again --- mmdet/api/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mmdet/api/inference.py b/mmdet/api/inference.py index d452c665a9f..0addd598e50 100644 --- a/mmdet/api/inference.py +++ b/mmdet/api/inference.py @@ -34,7 +34,7 @@ def inference_detector(model, imgs, cfg, device='cuda:0'): img = mmcv.imread(img) data = _prepare_data(img, img_transform, cfg, device) with torch.no_grad(): - result = model(**data, return_loss=False, rescale=True) + result = model(return_loss=False, rescale=True, **data) yield result From 7200bfcb042d71e5eeaf64b4ec8eb71e325c18d6 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Thu, 11 Oct 2018 23:49:04 +0800 Subject: [PATCH 76/81] minor fix for fast rcnn --- configs/fast_mask_rcnn_r50_fpn_1x.py | 6 +++--- configs/fast_rcnn_r50_fpn_1x.py | 6 +++--- mmdet/models/detectors/two_stage.py | 1 - 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/configs/fast_mask_rcnn_r50_fpn_1x.py b/configs/fast_mask_rcnn_r50_fpn_1x.py index 4281c161813..af2070f5d90 100644 --- a/configs/fast_mask_rcnn_r50_fpn_1x.py +++ b/configs/fast_mask_rcnn_r50_fpn_1x.py @@ -74,7 +74,7 @@ img_scale=(1333, 800), img_norm_cfg=img_norm_cfg, size_divisor=32, - proposal_file=data_root + 'proposals/train2017_r50_fpn_rpn_1x.pkl', + proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl', flip_ratio=0.5, with_mask=True, with_crowd=True, @@ -85,7 +85,7 @@ img_prefix=data_root + 'val2017/', img_scale=(1333, 800), img_norm_cfg=img_norm_cfg, - proposal_file=data_root + 'proposals/val2017_r50_fpn_rpn_1x.pkl', + proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', size_divisor=32, flip_ratio=0, with_mask=True, @@ -97,7 +97,7 @@ img_prefix=data_root + 'val2017/', img_scale=(1333, 800), img_norm_cfg=img_norm_cfg, - proposal_file=data_root + 'proposals/val2017_r50_fpn_rpn_1x.pkl', + proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', size_divisor=32, flip_ratio=0, with_mask=False, diff --git a/configs/fast_rcnn_r50_fpn_1x.py b/configs/fast_rcnn_r50_fpn_1x.py index 47ec415cf0a..397ab431e61 100644 --- a/configs/fast_rcnn_r50_fpn_1x.py +++ b/configs/fast_rcnn_r50_fpn_1x.py @@ -60,7 +60,7 @@ img_scale=(1333, 800), img_norm_cfg=img_norm_cfg, size_divisor=32, - proposal_file=data_root + 'proposals/train2017_r50_fpn_rpn_1x.pkl', + proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl', flip_ratio=0.5, with_mask=False, with_crowd=True, @@ -71,7 +71,7 @@ img_prefix=data_root + 'val2017/', img_scale=(1333, 800), img_norm_cfg=img_norm_cfg, - proposal_file=data_root + 'proposals/val2017_r50_fpn_rpn_1x.pkl', + proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', size_divisor=32, flip_ratio=0, with_mask=False, @@ -83,7 +83,7 @@ img_prefix=data_root + 'val2017/', img_scale=(1333, 800), img_norm_cfg=img_norm_cfg, - proposal_file=data_root + 'proposals/val2017_r50_fpn_rpn_1x.pkl', + proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', size_divisor=32, flip_ratio=0, with_mask=False, diff --git a/mmdet/models/detectors/two_stage.py b/mmdet/models/detectors/two_stage.py index b2f2839f93c..48a818d164c 100644 --- a/mmdet/models/detectors/two_stage.py +++ b/mmdet/models/detectors/two_stage.py @@ -140,7 +140,6 @@ def forward_train(self, def simple_test(self, img, img_meta, proposals=None, rescale=False): """Test without augmentation.""" - assert proposals is None, "Fast RCNN hasn't been implemented." assert self.with_bbox, "Bbox head must be implemented." x = self.extract_feat(img) From 641a4c0e844b641a1e7b2d0f63be6a9c943fa10d Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Fri, 12 Oct 2018 02:45:09 +0800 Subject: [PATCH 77/81] reorganize the training api --- mmdet/api/__init__.py | 6 +- mmdet/api/env.py | 57 ++++++++++++++ mmdet/api/train.py | 104 ++++++++++++++------------ mmdet/core/utils/__init__.py | 6 +- mmdet/core/utils/dist_utils.py | 32 -------- mmdet/datasets/loader/build_loader.py | 2 +- tools/train.py | 51 ++++++++++--- 7 files changed, 161 insertions(+), 97 deletions(-) create mode 100644 mmdet/api/env.py diff --git a/mmdet/api/__init__.py b/mmdet/api/__init__.py index 970492f6306..39544f2bf41 100644 --- a/mmdet/api/__init__.py +++ b/mmdet/api/__init__.py @@ -1,4 +1,8 @@ +from .env import init_dist, get_root_logger, set_random_seed from .train import train_detector from .inference import inference_detector -__all__ = ['train_detector', 'inference_detector'] +__all__ = [ + 'init_dist', 'get_root_logger', 'set_random_seed', 'train_detector', + 'inference_detector' +] diff --git a/mmdet/api/env.py b/mmdet/api/env.py new file mode 100644 index 00000000000..20cd26dee8f --- /dev/null +++ b/mmdet/api/env.py @@ -0,0 +1,57 @@ +import logging +import os +import random + +import numpy as np +import torch +import torch.distributed as dist +import torch.multiprocessing as mp +from mmcv.runner import get_dist_info + + +def init_dist(launcher, backend='nccl', **kwargs): + if mp.get_start_method(allow_none=True) is None: + mp.set_start_method('spawn') + if launcher == 'pytorch': + _init_dist_pytorch(backend, **kwargs) + elif launcher == 'mpi': + _init_dist_mpi(backend, **kwargs) + elif launcher == 'slurm': + _init_dist_slurm(backend, **kwargs) + else: + raise ValueError('Invalid launcher type: {}'.format(launcher)) + + +def _init_dist_pytorch(backend, **kwargs): + # TODO: use local_rank instead of rank % num_gpus + rank = int(os.environ['RANK']) + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(rank % num_gpus) + dist.init_process_group(backend=backend, **kwargs) + + +def _init_dist_mpi(backend, **kwargs): + raise NotImplementedError + + +def _init_dist_slurm(backend, **kwargs): + raise NotImplementedError + + +def set_random_seed(seed): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + + +def get_root_logger(log_level=logging.INFO): + logger = logging.getLogger() + if not logger.hasHandlers(): + logging.basicConfig( + format='%(asctime)s - %(levelname)s - %(message)s', + level=log_level) + rank, _ = get_dist_info() + if rank != 0: + logger.setLevel('ERROR') + return logger diff --git a/mmdet/api/train.py b/mmdet/api/train.py index 0084cb1f732..4c6e1bad23a 100644 --- a/mmdet/api/train.py +++ b/mmdet/api/train.py @@ -1,6 +1,5 @@ from __future__ import division -import logging import random from collections import OrderedDict @@ -9,11 +8,11 @@ from mmcv.runner import Runner, DistSamplerSeedHook from mmcv.parallel import MMDataParallel, MMDistributedDataParallel -from mmdet import __version__ -from mmdet.core import (init_dist, DistOptimizerHook, CocoDistEvalRecallHook, +from mmdet.core import (DistOptimizerHook, CocoDistEvalRecallHook, CocoDistEvalmAPHook) from mmdet.datasets import build_dataloader from mmdet.models import RPN +from .env import get_root_logger def parse_losses(losses): @@ -46,13 +45,6 @@ def batch_processor(model, data, train_mode): return outputs -def get_logger(log_level): - logging.basicConfig( - format='%(asctime)s - %(levelname)s - %(message)s', level=log_level) - logger = logging.getLogger() - return logger - - def set_random_seed(seed): random.seed(seed) np.random.seed(seed) @@ -60,58 +52,72 @@ def set_random_seed(seed): torch.cuda.manual_seed_all(seed) -def train_detector(model, dataset, cfg): - # save mmdet version in checkpoint as meta data - cfg.checkpoint_config.meta = dict( - mmdet_version=__version__, config=cfg.text) - - logger = get_logger(cfg.log_level) - - # set random seed if specified - if cfg.seed is not None: - logger.info('Set random seed to {}'.format(cfg.seed)) - set_random_seed(cfg.seed) +def train_detector(model, + dataset, + cfg, + distributed=False, + validate=False, + logger=None): + if logger is None: + logger = get_root_logger(cfg.log_level) - # init distributed environment if necessary - if cfg.launcher == 'none': - dist = False - logger.info('Non-distributed training.') + # start training + if distributed: + _dist_train(model, dataset, cfg, validate=validate) else: - dist = True - init_dist(cfg.launcher, **cfg.dist_params) - if torch.distributed.get_rank() != 0: - logger.setLevel('ERROR') - logger.info('Distributed training.') + _non_dist_train(model, dataset, cfg, validate=validate) + +def _dist_train(model, dataset, cfg, validate=False): # prepare data loaders data_loaders = [ - build_dataloader(dataset, cfg.data.imgs_per_gpu, - cfg.data.workers_per_gpu, cfg.gpus, dist) + build_dataloader( + dataset, + cfg.data.imgs_per_gpu, + cfg.data.workers_per_gpu, + dist=True) ] - # put model on gpus - if dist: - model = MMDistributedDataParallel(model.cuda()) - else: - model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda() - + model = MMDistributedDataParallel(model.cuda()) # build runner runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, cfg.log_level) - # register hooks - optimizer_config = DistOptimizerHook( - **cfg.optimizer_config) if dist else cfg.optimizer_config + optimizer_config = DistOptimizerHook(**cfg.optimizer_config) runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config) - if dist: - runner.register_hook(DistSamplerSeedHook()) - # register eval hooks - if cfg.validate: - if isinstance(model.module, RPN): - runner.register_hook(CocoDistEvalRecallHook(cfg.data.val)) - elif cfg.data.val.type == 'CocoDataset': - runner.register_hook(CocoDistEvalmAPHook(cfg.data.val)) + runner.register_hook(DistSamplerSeedHook()) + # register eval hooks + if validate: + if isinstance(model.module, RPN): + runner.register_hook(CocoDistEvalRecallHook(cfg.data.val)) + elif cfg.data.val.type == 'CocoDataset': + runner.register_hook(CocoDistEvalmAPHook(cfg.data.val)) + + if cfg.resume_from: + runner.resume(cfg.resume_from) + elif cfg.load_from: + runner.load_checkpoint(cfg.load_from) + runner.run(data_loaders, cfg.workflow, cfg.total_epochs) + + +def _non_dist_train(model, dataset, cfg, validate=False): + # prepare data loaders + data_loaders = [ + build_dataloader( + dataset, + cfg.data.imgs_per_gpu, + cfg.data.workers_per_gpu, + cfg.gpus, + dist=False) + ] + # put model on gpus + model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda() + # build runner + runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, + cfg.log_level) + runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config, + cfg.checkpoint_config, cfg.log_config) if cfg.resume_from: runner.resume(cfg.resume_from) diff --git a/mmdet/core/utils/__init__.py b/mmdet/core/utils/__init__.py index 981dab7fb0d..89e952ee5f9 100644 --- a/mmdet/core/utils/__init__.py +++ b/mmdet/core/utils/__init__.py @@ -1,7 +1,7 @@ -from .dist_utils import init_dist, allreduce_grads, DistOptimizerHook +from .dist_utils import allreduce_grads, DistOptimizerHook from .misc import tensor2imgs, unmap, multi_apply __all__ = [ - 'init_dist', 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', - 'unmap', 'multi_apply' + 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap', + 'multi_apply' ] diff --git a/mmdet/core/utils/dist_utils.py b/mmdet/core/utils/dist_utils.py index c7748db661f..ec84bb48693 100644 --- a/mmdet/core/utils/dist_utils.py +++ b/mmdet/core/utils/dist_utils.py @@ -1,43 +1,11 @@ -import os from collections import OrderedDict -import torch -import torch.multiprocessing as mp import torch.distributed as dist from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors, _take_tensors) from mmcv.runner import OptimizerHook -def init_dist(launcher, backend='nccl', **kwargs): - if mp.get_start_method(allow_none=True) is None: - mp.set_start_method('spawn') - if launcher == 'pytorch': - _init_dist_pytorch(backend, **kwargs) - elif launcher == 'mpi': - _init_dist_mpi(backend, **kwargs) - elif launcher == 'slurm': - _init_dist_slurm(backend, **kwargs) - else: - raise ValueError('Invalid launcher type: {}'.format(launcher)) - - -def _init_dist_pytorch(backend, **kwargs): - # TODO: use local_rank instead of rank % num_gpus - rank = int(os.environ['RANK']) - num_gpus = torch.cuda.device_count() - torch.cuda.set_device(rank % num_gpus) - dist.init_process_group(backend=backend, **kwargs) - - -def _init_dist_mpi(backend, **kwargs): - raise NotImplementedError - - -def _init_dist_slurm(backend, **kwargs): - raise NotImplementedError - - def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): if bucket_size_mb > 0: bucket_size_bytes = bucket_size_mb * 1024 * 1024 diff --git a/mmdet/datasets/loader/build_loader.py b/mmdet/datasets/loader/build_loader.py index d3b342b32b8..761d9aea188 100644 --- a/mmdet/datasets/loader/build_loader.py +++ b/mmdet/datasets/loader/build_loader.py @@ -15,7 +15,7 @@ def build_dataloader(dataset, imgs_per_gpu, workers_per_gpu, - num_gpus, + num_gpus=1, dist=True, **kwargs): if dist: diff --git a/tools/train.py b/tools/train.py index 839f27c85fb..52f302c3f0e 100644 --- a/tools/train.py +++ b/tools/train.py @@ -4,8 +4,9 @@ from mmcv import Config from mmcv.runner import obj_from_dict -from mmdet import datasets -from mmdet.api import train_detector +from mmdet import datasets, __version__ +from mmdet.api import (train_detector, init_dist, get_root_logger, + set_random_seed) from mmdet.models import build_detector @@ -16,10 +17,14 @@ def parse_args(): parser.add_argument( '--validate', action='store_true', - help='whether to add a validate phase') + help='whether to evaluate the checkpoint during training') parser.add_argument( - '--gpus', type=int, default=1, help='number of gpus to use') - parser.add_argument('--seed', type=int, help='random seed') + '--gpus', + type=int, + default=1, + help='number of gpus to use ' + '(only applicable to non-distributed training)') + parser.add_argument('--seed', type=int, default=None, help='random seed') parser.add_argument( '--launcher', choices=['none', 'pytorch', 'slurm', 'mpi'], @@ -33,19 +38,43 @@ def parse_args(): def main(): args = parse_args() + cfg = Config.fromfile(args.config) + # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir - cfg.validate = args.validate cfg.gpus = args.gpus - cfg.seed = args.seed - cfg.launcher = args.launcher - cfg.local_rank = args.local_rank - # build model + if cfg.checkpoint_config is not None: + # save mmdet version in checkpoints as meta data + cfg.checkpoint_config.meta = dict( + mmdet_version=__version__, config=cfg.text) + + # init distributed env first, since logger depends on the dist info. + if args.launcher == 'none': + distributed = False + else: + distributed = True + init_dist(args.launcher, **cfg.dist_params) + + # init logger before other steps + logger = get_root_logger(cfg.log_level) + logger.info('Distributed training: {}'.format(distributed)) + + # set random seeds + if args.seed is not None: + logger.info('Set random seed to {}'.format(args.seed)) + set_random_seed(args.seed) + model = build_detector( cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) train_dataset = obj_from_dict(cfg.data.train, datasets) - train_detector(model, train_dataset, cfg) + train_detector( + model, + train_dataset, + cfg, + distributed=distributed, + validate=args.validate, + logger=logger) if __name__ == '__main__': From 5d4da5e214de0979ffeb38cdae8b46a6fcb77fec Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Fri, 12 Oct 2018 02:49:43 +0800 Subject: [PATCH 78/81] remove useless method --- mmdet/api/train.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/mmdet/api/train.py b/mmdet/api/train.py index 4c6e1bad23a..2a589722a70 100644 --- a/mmdet/api/train.py +++ b/mmdet/api/train.py @@ -1,9 +1,7 @@ from __future__ import division -import random from collections import OrderedDict -import numpy as np import torch from mmcv.runner import Runner, DistSamplerSeedHook from mmcv.parallel import MMDataParallel, MMDistributedDataParallel @@ -45,13 +43,6 @@ def batch_processor(model, data, train_mode): return outputs -def set_random_seed(seed): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - torch.cuda.manual_seed_all(seed) - - def train_detector(model, dataset, cfg, From ca0b10916c22b45bdec75a6a72aaf94f7772cefa Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Fri, 12 Oct 2018 12:27:27 +0800 Subject: [PATCH 79/81] rename api to apis --- mmdet/{api => apis}/__init__.py | 0 mmdet/{api => apis}/env.py | 0 mmdet/{api => apis}/inference.py | 0 mmdet/{api => apis}/train.py | 0 tools/train.py | 4 ++-- 5 files changed, 2 insertions(+), 2 deletions(-) rename mmdet/{api => apis}/__init__.py (100%) rename mmdet/{api => apis}/env.py (100%) rename mmdet/{api => apis}/inference.py (100%) rename mmdet/{api => apis}/train.py (100%) diff --git a/mmdet/api/__init__.py b/mmdet/apis/__init__.py similarity index 100% rename from mmdet/api/__init__.py rename to mmdet/apis/__init__.py diff --git a/mmdet/api/env.py b/mmdet/apis/env.py similarity index 100% rename from mmdet/api/env.py rename to mmdet/apis/env.py diff --git a/mmdet/api/inference.py b/mmdet/apis/inference.py similarity index 100% rename from mmdet/api/inference.py rename to mmdet/apis/inference.py diff --git a/mmdet/api/train.py b/mmdet/apis/train.py similarity index 100% rename from mmdet/api/train.py rename to mmdet/apis/train.py diff --git a/tools/train.py b/tools/train.py index 52f302c3f0e..8e03628db5e 100644 --- a/tools/train.py +++ b/tools/train.py @@ -5,8 +5,8 @@ from mmcv.runner import obj_from_dict from mmdet import datasets, __version__ -from mmdet.api import (train_detector, init_dist, get_root_logger, - set_random_seed) +from mmdet.apis import (train_detector, init_dist, get_root_logger, + set_random_seed) from mmdet.models import build_detector From c9f7dc5cb75046d6aefbd5a2d6b4c382a1bb3d97 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Fri, 12 Oct 2018 15:27:33 +0800 Subject: [PATCH 80/81] update inference api --- mmdet/apis/__init__.py | 4 ++-- mmdet/apis/inference.py | 27 +++++++++++++++++++-------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/mmdet/apis/__init__.py b/mmdet/apis/__init__.py index 39544f2bf41..030b7de4102 100644 --- a/mmdet/apis/__init__.py +++ b/mmdet/apis/__init__.py @@ -1,8 +1,8 @@ from .env import init_dist, get_root_logger, set_random_seed from .train import train_detector -from .inference import inference_detector +from .inference import inference_detector, show_result __all__ = [ 'init_dist', 'get_root_logger', 'set_random_seed', 'train_detector', - 'inference_detector' + 'inference_detector', 'show_result' ] diff --git a/mmdet/apis/inference.py b/mmdet/apis/inference.py index 0addd598e50..a87323cee1a 100644 --- a/mmdet/apis/inference.py +++ b/mmdet/apis/inference.py @@ -23,19 +23,29 @@ def _prepare_data(img, img_transform, cfg, device): return dict(img=[img], img_meta=[img_meta]) -def inference_detector(model, imgs, cfg, device='cuda:0'): +def _inference_single(model, img, img_transform, cfg, device): + img = mmcv.imread(img) + data = _prepare_data(img, img_transform, cfg, device) + with torch.no_grad(): + result = model(return_loss=False, rescale=True, **data) + return result + + +def _inference_generator(model, imgs, img_transform, cfg, device): + for img in imgs: + yield _inference_single(model, img, img_transform, cfg, device) - imgs = imgs if isinstance(imgs, list) else [imgs] + +def inference_detector(model, imgs, cfg, device='cuda:0'): img_transform = ImageTransform( size_divisor=cfg.data.test.size_divisor, **cfg.img_norm_cfg) model = model.to(device) model.eval() - for img in imgs: - img = mmcv.imread(img) - data = _prepare_data(img, img_transform, cfg, device) - with torch.no_grad(): - result = model(return_loss=False, rescale=True, **data) - yield result + + if not isinstance(imgs, list): + return _inference_single(model, imgs, img_transform, cfg, device) + else: + return _inference_generator(model, imgs, img_transform, cfg, device) def show_result(img, result, dataset='coco', score_thr=0.3): @@ -46,6 +56,7 @@ def show_result(img, result, dataset='coco', score_thr=0.3): ] labels = np.concatenate(labels) bboxes = np.vstack(result) + img = mmcv.imread(img) mmcv.imshow_det_bboxes( img.copy(), bboxes, From 9b534162cb6e0b6321611477b30b06931c79b6d8 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Fri, 12 Oct 2018 16:06:31 +0800 Subject: [PATCH 81/81] minor fix --- mmdet/models/builder.py | 3 ++- mmdet/models/rpn_heads/rpn_head.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/mmdet/models/builder.py b/mmdet/models/builder.py index bdf0ac3d16f..ee5ae0b14b0 100644 --- a/mmdet/models/builder.py +++ b/mmdet/models/builder.py @@ -2,7 +2,7 @@ from torch import nn from . import (backbones, necks, roi_extractors, rpn_heads, bbox_heads, - mask_heads, detectors) + mask_heads) __all__ = [ 'build_backbone', 'build_neck', 'build_rpn_head', 'build_roi_extractor', @@ -48,4 +48,5 @@ def build_mask_head(cfg): def build_detector(cfg, train_cfg=None, test_cfg=None): + from . import detectors return build(cfg, detectors, dict(train_cfg=train_cfg, test_cfg=test_cfg)) diff --git a/mmdet/models/rpn_heads/rpn_head.py b/mmdet/models/rpn_heads/rpn_head.py index e67d7ae973f..61e6e199ac0 100644 --- a/mmdet/models/rpn_heads/rpn_head.py +++ b/mmdet/models/rpn_heads/rpn_head.py @@ -48,8 +48,8 @@ def __init__(self, self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides - self.anchor_base_sizes = anchor_strides.copy( - ) if anchor_base_sizes is None else anchor_base_sizes + self.anchor_base_sizes = list( + anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.target_means = target_means self.target_stds = target_stds self.use_sigmoid_cls = use_sigmoid_cls