forked from burke86/deepdisc
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
18 changed files
with
2,731 additions
and
146 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
from omegaconf import OmegaConf | ||
|
||
import detectron2.data.transforms as T | ||
from detectron2.config import LazyCall as L | ||
from detectron2.data import ( | ||
DatasetMapper, | ||
build_detection_test_loader, | ||
build_detection_train_loader, | ||
get_detection_dataset_dicts, | ||
) | ||
from detectron2.evaluation import COCOEvaluator | ||
|
||
dataloader = OmegaConf.create() | ||
|
||
dataloader.train = L(build_detection_train_loader)( | ||
dataset=L(get_detection_dataset_dicts)(names="coco_2017_train"), | ||
mapper=L(DatasetMapper)( | ||
is_train=True, | ||
augmentations=[ | ||
L(T.ResizeShortestEdge)( | ||
short_edge_length=(640, 672, 704, 736, 768, 800), | ||
sample_style="choice", | ||
max_size=1333, | ||
), | ||
L(T.RandomFlip)(horizontal=True), | ||
], | ||
image_format="BGR", | ||
use_instance_mask=True, | ||
), | ||
total_batch_size=16, | ||
num_workers=4, | ||
) | ||
|
||
dataloader.test = L(build_detection_test_loader)( | ||
dataset=L(get_detection_dataset_dicts)(names="coco_2017_val", filter_empty=False), | ||
mapper=L(DatasetMapper)( | ||
is_train=False, | ||
augmentations=[ | ||
L(T.ResizeShortestEdge)(short_edge_length=800, max_size=1333), | ||
], | ||
image_format="${...train.mapper.image_format}", | ||
), | ||
num_workers=4, | ||
) | ||
|
||
dataloader.evaluator = L(COCOEvaluator)( | ||
dataset_name="${..test.dataset.names}", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from detectron2.data.detection_utils import create_keypoint_hflip_indices | ||
|
||
from .coco import dataloader | ||
|
||
dataloader.train.dataset.min_keypoints = 1 | ||
dataloader.train.dataset.names = "keypoints_coco_2017_train" | ||
dataloader.test.dataset.names = "keypoints_coco_2017_val" | ||
|
||
dataloader.train.mapper.update( | ||
use_instance_mask=False, | ||
use_keypoint=True, | ||
keypoint_hflip_indices=create_keypoint_hflip_indices(dataloader.train.dataset.names), | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from detectron2.config import LazyCall as L | ||
from detectron2.evaluation import ( | ||
COCOEvaluator, | ||
COCOPanopticEvaluator, | ||
DatasetEvaluators, | ||
SemSegEvaluator, | ||
) | ||
|
||
from .coco import dataloader | ||
|
||
dataloader.train.dataset.names = "coco_2017_train_panoptic_separated" | ||
dataloader.train.dataset.filter_empty = False | ||
dataloader.test.dataset.names = "coco_2017_val_panoptic_separated" | ||
|
||
|
||
dataloader.evaluator = [ | ||
L(COCOEvaluator)( | ||
dataset_name="${...test.dataset.names}", | ||
), | ||
L(SemSegEvaluator)( | ||
dataset_name="${...test.dataset.names}", | ||
), | ||
L(COCOPanopticEvaluator)( | ||
dataset_name="${...test.dataset.names}", | ||
), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
constants = dict( | ||
imagenet_rgb256_mean=[123.675, 116.28, 103.53], | ||
imagenet_rgb256_std=[58.395, 57.12, 57.375], | ||
imagenet_bgr256_mean=[103.530, 116.280, 123.675], | ||
# When using pre-trained models in Detectron1 or any MSRA models, | ||
# std has been absorbed into its conv1 weights, so the std needs to be set 1. | ||
# Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std) | ||
imagenet_bgr256_std=[1.0, 1.0, 1.0], | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
from detectron2.config import LazyCall as L | ||
from detectron2.layers import ShapeSpec | ||
from detectron2.modeling.box_regression import Box2BoxTransform | ||
from detectron2.modeling.matcher import Matcher | ||
from detectron2.modeling.roi_heads import FastRCNNOutputLayers, FastRCNNConvFCHead, CascadeROIHeads | ||
|
||
from .mask_rcnn_fpn import model | ||
|
||
# arguments that don't exist for Cascade R-CNN | ||
[model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]] | ||
|
||
model.roi_heads.update( | ||
_target_=CascadeROIHeads, | ||
box_heads=[ | ||
L(FastRCNNConvFCHead)( | ||
input_shape=ShapeSpec(channels=256, height=7, width=7), | ||
conv_dims=[], | ||
fc_dims=[1024, 1024], | ||
) | ||
for k in range(3) | ||
], | ||
box_predictors=[ | ||
L(FastRCNNOutputLayers)( | ||
input_shape=ShapeSpec(channels=1024), | ||
test_score_thresh=0.05, | ||
box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)), | ||
cls_agnostic_bbox_reg=True, | ||
num_classes="${...num_classes}", | ||
) | ||
for (w1, w2) in [(10, 5), (20, 10), (30, 15)] | ||
], | ||
proposal_matchers=[ | ||
L(Matcher)(thresholds=[th], labels=[0, 1], allow_low_quality_matches=False) | ||
for th in [0.5, 0.6, 0.7] | ||
], | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
from detectron2.modeling.meta_arch.fcos import FCOS, FCOSHead | ||
|
||
from .retinanet import model | ||
|
||
model._target_ = FCOS | ||
|
||
del model.anchor_generator | ||
del model.box2box_transform | ||
del model.anchor_matcher | ||
del model.input_format | ||
|
||
# Use P5 instead of C5 to compute P6/P7 | ||
# (Sec 2.2 of https://arxiv.org/abs/2006.09214) | ||
model.backbone.top_block.in_feature = "p5" | ||
model.backbone.top_block.in_channels = 256 | ||
|
||
# New score threshold determined based on sqrt(cls_score * centerness) | ||
model.test_score_thresh = 0.2 | ||
model.test_nms_thresh = 0.6 | ||
|
||
model.head._target_ = FCOSHead | ||
del model.head.num_anchors | ||
model.head.norm = "GN" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
from detectron2.config import LazyCall as L | ||
from detectron2.layers import ShapeSpec | ||
from detectron2.modeling.poolers import ROIPooler | ||
from detectron2.modeling.roi_heads import KRCNNConvDeconvUpsampleHead | ||
|
||
from .mask_rcnn_fpn import model | ||
|
||
[model.roi_heads.pop(x) for x in ["mask_in_features", "mask_pooler", "mask_head"]] | ||
|
||
model.roi_heads.update( | ||
num_classes=1, | ||
keypoint_in_features=["p2", "p3", "p4", "p5"], | ||
keypoint_pooler=L(ROIPooler)( | ||
output_size=14, | ||
scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), | ||
sampling_ratio=0, | ||
pooler_type="ROIAlignV2", | ||
), | ||
keypoint_head=L(KRCNNConvDeconvUpsampleHead)( | ||
input_shape=ShapeSpec(channels=256, width=14, height=14), | ||
num_keypoints=17, | ||
conv_dims=[512] * 8, | ||
loss_normalizer="visible", | ||
), | ||
) | ||
|
||
# Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2. | ||
# 1000 proposals per-image is found to hurt box AP. | ||
# Therefore we increase it to 1500 per-image. | ||
model.proposal_generator.post_nms_topk = (1500, 1000) | ||
|
||
# Keypoint AP degrades (though box AP improves) when using plain L1 loss | ||
model.roi_heads.box_predictor.smooth_l1_beta = 0.5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
from detectron2.config import LazyCall as L | ||
from detectron2.layers import ShapeSpec | ||
from detectron2.modeling.meta_arch import GeneralizedRCNN | ||
from detectron2.modeling.anchor_generator import DefaultAnchorGenerator | ||
from detectron2.modeling.backbone import BasicStem, BottleneckBlock, ResNet | ||
from detectron2.modeling.box_regression import Box2BoxTransform | ||
from detectron2.modeling.matcher import Matcher | ||
from detectron2.modeling.poolers import ROIPooler | ||
from detectron2.modeling.proposal_generator import RPN, StandardRPNHead | ||
from detectron2.modeling.roi_heads import ( | ||
FastRCNNOutputLayers, | ||
MaskRCNNConvUpsampleHead, | ||
Res5ROIHeads, | ||
) | ||
|
||
from ..data.constants import constants | ||
|
||
model = L(GeneralizedRCNN)( | ||
backbone=L(ResNet)( | ||
stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"), | ||
stages=L(ResNet.make_default_stages)( | ||
depth=50, | ||
stride_in_1x1=True, | ||
norm="FrozenBN", | ||
), | ||
out_features=["res4"], | ||
), | ||
proposal_generator=L(RPN)( | ||
in_features=["res4"], | ||
head=L(StandardRPNHead)(in_channels=1024, num_anchors=15), | ||
anchor_generator=L(DefaultAnchorGenerator)( | ||
sizes=[[32, 64, 128, 256, 512]], | ||
aspect_ratios=[0.5, 1.0, 2.0], | ||
strides=[16], | ||
offset=0.0, | ||
), | ||
anchor_matcher=L(Matcher)( | ||
thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True | ||
), | ||
box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]), | ||
batch_size_per_image=256, | ||
positive_fraction=0.5, | ||
pre_nms_topk=(12000, 6000), | ||
post_nms_topk=(2000, 1000), | ||
nms_thresh=0.7, | ||
), | ||
roi_heads=L(Res5ROIHeads)( | ||
num_classes=80, | ||
batch_size_per_image=512, | ||
positive_fraction=0.25, | ||
proposal_matcher=L(Matcher)( | ||
thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False | ||
), | ||
in_features=["res4"], | ||
pooler=L(ROIPooler)( | ||
output_size=14, | ||
scales=(1.0 / 16,), | ||
sampling_ratio=0, | ||
pooler_type="ROIAlignV2", | ||
), | ||
res5=L(ResNet.make_stage)( | ||
block_class=BottleneckBlock, | ||
num_blocks=3, | ||
stride_per_block=[2, 1, 1], | ||
in_channels=1024, | ||
bottleneck_channels=512, | ||
out_channels=2048, | ||
norm="FrozenBN", | ||
stride_in_1x1=True, | ||
), | ||
box_predictor=L(FastRCNNOutputLayers)( | ||
input_shape=L(ShapeSpec)(channels="${...res5.out_channels}", height=1, width=1), | ||
test_score_thresh=0.05, | ||
box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)), | ||
num_classes="${..num_classes}", | ||
), | ||
mask_head=L(MaskRCNNConvUpsampleHead)( | ||
input_shape=L(ShapeSpec)( | ||
channels="${...res5.out_channels}", | ||
width="${...pooler.output_size}", | ||
height="${...pooler.output_size}", | ||
), | ||
num_classes="${..num_classes}", | ||
conv_dims=[256], | ||
), | ||
), | ||
pixel_mean=constants.imagenet_bgr256_mean, | ||
pixel_std=constants.imagenet_bgr256_std, | ||
input_format="BGR", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
from detectron2.config import LazyCall as L | ||
from detectron2.layers import ShapeSpec | ||
from detectron2.modeling.meta_arch import GeneralizedRCNN | ||
from detectron2.modeling.anchor_generator import DefaultAnchorGenerator | ||
from detectron2.modeling.backbone.fpn import LastLevelMaxPool | ||
from detectron2.modeling.backbone import BasicStem, FPN, ResNet | ||
from detectron2.modeling.box_regression import Box2BoxTransform | ||
from detectron2.modeling.matcher import Matcher | ||
from detectron2.modeling.poolers import ROIPooler | ||
from detectron2.modeling.proposal_generator import RPN, StandardRPNHead | ||
from detectron2.modeling.roi_heads import ( | ||
StandardROIHeads, | ||
FastRCNNOutputLayers, | ||
MaskRCNNConvUpsampleHead, | ||
FastRCNNConvFCHead, | ||
) | ||
|
||
from ..data.constants import constants | ||
|
||
model = L(GeneralizedRCNN)( | ||
backbone=L(FPN)( | ||
bottom_up=L(ResNet)( | ||
stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"), | ||
stages=L(ResNet.make_default_stages)( | ||
depth=50, | ||
stride_in_1x1=True, | ||
norm="FrozenBN", | ||
), | ||
out_features=["res2", "res3", "res4", "res5"], | ||
), | ||
in_features="${.bottom_up.out_features}", | ||
out_channels=256, | ||
top_block=L(LastLevelMaxPool)(), | ||
), | ||
proposal_generator=L(RPN)( | ||
in_features=["p2", "p3", "p4", "p5", "p6"], | ||
head=L(StandardRPNHead)(in_channels=256, num_anchors=3), | ||
anchor_generator=L(DefaultAnchorGenerator)( | ||
sizes=[[32], [64], [128], [256], [512]], | ||
aspect_ratios=[0.5, 1.0, 2.0], | ||
strides=[4, 8, 16, 32, 64], | ||
offset=0.0, | ||
), | ||
anchor_matcher=L(Matcher)( | ||
thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True | ||
), | ||
box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]), | ||
batch_size_per_image=256, | ||
positive_fraction=0.5, | ||
pre_nms_topk=(2000, 1000), | ||
post_nms_topk=(1000, 1000), | ||
nms_thresh=0.7, | ||
), | ||
roi_heads=L(StandardROIHeads)( | ||
num_classes=80, | ||
batch_size_per_image=512, | ||
positive_fraction=0.25, | ||
proposal_matcher=L(Matcher)( | ||
thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False | ||
), | ||
box_in_features=["p2", "p3", "p4", "p5"], | ||
box_pooler=L(ROIPooler)( | ||
output_size=7, | ||
scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), | ||
sampling_ratio=0, | ||
pooler_type="ROIAlignV2", | ||
), | ||
box_head=L(FastRCNNConvFCHead)( | ||
input_shape=ShapeSpec(channels=256, height=7, width=7), | ||
conv_dims=[], | ||
fc_dims=[1024, 1024], | ||
), | ||
box_predictor=L(FastRCNNOutputLayers)( | ||
input_shape=ShapeSpec(channels=1024), | ||
test_score_thresh=0.05, | ||
box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)), | ||
num_classes="${..num_classes}", | ||
), | ||
mask_in_features=["p2", "p3", "p4", "p5"], | ||
mask_pooler=L(ROIPooler)( | ||
output_size=14, | ||
scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), | ||
sampling_ratio=0, | ||
pooler_type="ROIAlignV2", | ||
), | ||
mask_head=L(MaskRCNNConvUpsampleHead)( | ||
input_shape=ShapeSpec(channels=256, width=14, height=14), | ||
num_classes="${..num_classes}", | ||
conv_dims=[256, 256, 256, 256, 256], | ||
), | ||
), | ||
pixel_mean=constants.imagenet_bgr256_mean, | ||
pixel_std=constants.imagenet_bgr256_std, | ||
input_format="BGR", | ||
) |
Oops, something went wrong.