Skip to content

Commit

Permalink
Merge commit '4981ff68c2ae05ccfe83e340d528bb4c3c130740' into pr/temp
Browse files Browse the repository at this point in the history
  • Loading branch information
FreyWang committed Sep 2, 2021
2 parents bf48690 + 4981ff6 commit 980e3bf
Show file tree
Hide file tree
Showing 17 changed files with 52 additions and 32 deletions.
6 changes: 3 additions & 3 deletions mmseg/datasets/pipelines/formating.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,9 +249,9 @@ class Collect(object):
keys (Sequence[str]): Keys of results to be collected in ``data``.
meta_keys (Sequence[str], optional): Meta keys to be converted to
``mmcv.DataContainer`` and collected in ``data[img_metas]``.
Default: ``('filename', 'ori_filename', 'ori_shape', 'img_shape',
'pad_shape', 'scale_factor', 'flip', 'flip_direction',
'img_norm_cfg')``
Default: (``filename``, ``ori_filename``, ``ori_shape``,
``img_shape``, ``pad_shape``, ``scale_factor``, ``flip``,
``flip_direction``, ``img_norm_cfg``)
"""

def __init__(self,
Expand Down
4 changes: 2 additions & 2 deletions mmseg/models/backbones/cgnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,8 @@ def forward(self, x):
class CGNet(BaseModule):
"""CGNet backbone.
A Light-weight Context Guided Network for Semantic Segmentation
arXiv: https://arxiv.org/abs/1811.08201
This backbone is the implementation of `A Light-weight Context Guided
Network for Semantic Segmentation <https://arxiv.org/abs/1811.08201>`_.
Args:
in_channels (int): Number of input image channels. Normally 3.
Expand Down
3 changes: 3 additions & 0 deletions mmseg/models/backbones/fast_scnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,9 @@ def forward(self, higher_res_feature, lower_res_feature):
class FastSCNN(BaseModule):
"""Fast-SCNN Backbone.
This backbone is the implementation of `Fast-SCNN: Fast Semantic
Segmentation Network <https://arxiv.org/abs/1902.04502>`_.
Args:
in_channels (int): Number of input image channels. Default: 3.
downsample_dw_channels (tuple[int]): Number of output channels after
Expand Down
4 changes: 2 additions & 2 deletions mmseg/models/backbones/hrnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,8 @@ def forward(self, x):
class HRNet(BaseModule):
"""HRNet backbone.
`High-Resolution Representations for Labeling Pixels and Regions
arXiv: <https://arxiv.org/abs/1904.04514>`_.
This backbone is the implementation of `High-Resolution Representations
for Labeling Pixels and Regions <https://arxiv.org/abs/1904.04514>`_.
Args:
extra (dict): Detailed configuration for each stage of HRNet.
Expand Down
6 changes: 3 additions & 3 deletions mmseg/models/backbones/mit.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,9 @@ def forward(self, x, hw_shape):
class MixVisionTransformer(BaseModule):
"""The backbone of Segformer.
A PyTorch implement of : `SegFormer: Simple and Efficient Design for
Semantic Segmentation with Transformers` -
https://arxiv.org/pdf/2105.15203.pdf
This backbone is the implementation of `SegFormer: Simple and
Efficient Design for Semantic Segmentation with
Transformers <https://arxiv.org/abs/2105.15203>`_.
Args:
in_channels (int): Number of input channels. Default: 3.
Expand Down
4 changes: 4 additions & 0 deletions mmseg/models/backbones/mobilenet_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
class MobileNetV2(BaseModule):
"""MobileNetV2 backbone.
This backbone is the implementation of
`MobileNetV2: Inverted Residuals and Linear Bottlenecks
<https://arxiv.org/abs/1801.04381>`_.
Args:
widen_factor (float): Width multiplier, multiply number of
channels in each layer by this amount. Default: 1.0.
Expand Down
3 changes: 3 additions & 0 deletions mmseg/models/backbones/resnest.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,9 @@ def _inner_forward(x):
class ResNeSt(ResNetV1d):
"""ResNeSt backbone.
This backbone is the implementation of `ResNeSt:
Split-Attention Networks <https://arxiv.org/abs/2004.08955>`_.
Args:
groups (int): Number of groups of Bottleneck. Default: 1
base_width (int): Base width of Bottleneck. Default: 4
Expand Down
12 changes: 7 additions & 5 deletions mmseg/models/backbones/resnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,9 @@ def _inner_forward(x):
class ResNet(BaseModule):
"""ResNet backbone.
This backbone is the improved implementation of `Deep Residual Learning
for Image Recognition <https://arxiv.org/abs/1512.03385>`_.
Args:
depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
in_channels (int): Number of input image channels. Default: 3.
Expand Down Expand Up @@ -686,11 +689,10 @@ def train(self, mode=True):
class ResNetV1c(ResNet):
"""ResNetV1c variant described in [1]_.
Compared with default ResNet(ResNetV1b), ResNetV1c replaces the 7x7 conv
in the input stem with three 3x3 convs.
References:
.. [1] https://arxiv.org/pdf/1812.01187.pdf
Compared with default ResNet(ResNetV1b), ResNetV1c replaces the 7x7 conv in
the input stem with three 3x3 convs. For more details please refer to `Bag
of Tricks for Image Classification with Convolutional Neural Networks
<https://arxiv.org/abs/1812.01187>`_.
"""

def __init__(self, **kwargs):
Expand Down
4 changes: 4 additions & 0 deletions mmseg/models/backbones/resnext.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ def __init__(self,
class ResNeXt(ResNet):
"""ResNeXt backbone.
This backbone is the implementation of `Aggregated
Residual Transformations for Deep Neural
Networks <https://arxiv.org/abs/1611.05431>`_.
Args:
depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
in_channels (int): Number of input image channels. Normally 3.
Expand Down
11 changes: 5 additions & 6 deletions mmseg/models/backbones/swin.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,13 +522,12 @@ def forward(self, x, hw_shape):

@BACKBONES.register_module()
class SwinTransformer(BaseModule):
""" Swin Transformer
A PyTorch implement of : `Swin Transformer:
Hierarchical Vision Transformer using Shifted Windows` -
https://arxiv.org/abs/2103.14030
"""Swin Transformer backbone.
Inspiration from
https://github.com/microsoft/Swin-Transformer
This backbone is the implementation of `Swin Transformer:
Hierarchical Vision Transformer using Shifted
Windows <https://arxiv.org/abs/2103.14030>`_.
Inspiration from https://github.com/microsoft/Swin-Transformer.
Args:
pretrain_img_size (int | tuple[int]): The size of input image when
Expand Down
6 changes: 3 additions & 3 deletions mmseg/models/backbones/unet.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,9 @@ def forward(self, x):
@BACKBONES.register_module()
class UNet(BaseModule):
"""UNet backbone.
U-Net: Convolutional Networks for Biomedical Image Segmentation.
https://arxiv.org/pdf/1505.04597.pdf
This backbone is the implementation of `U-Net: Convolutional Networks
for Biomedical Image Segmentation <https://arxiv.org/abs/1505.04597>`_.
Args:
in_channels (int): Number of input image channels. Default" 3.
Expand Down Expand Up @@ -277,7 +278,6 @@ class UNet(BaseModule):
The input image size should be divisible by the whole downsample rate
of the encoder. More detail of the whole downsample rate can be found
in UNet._check_input_divisible.
"""

def __init__(self,
Expand Down
6 changes: 3 additions & 3 deletions mmseg/models/backbones/vit.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,9 @@ def forward(self, x):
class VisionTransformer(BaseModule):
"""Vision Transformer.
A PyTorch implement of : `An Image is Worth 16x16 Words:
Transformers for Image Recognition at Scale` -
https://arxiv.org/abs/2010.11929
This backbone is the implementation of `An Image is Worth 16x16 Words:
Transformers for Image Recognition at
Scale <https://arxiv.org/abs/2010.11929>`_.
Args:
img_size (int | tuple): Input image size. Default: 224.
Expand Down
2 changes: 2 additions & 0 deletions mmseg/models/decode_heads/point_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ def calculate_uncertainty(seg_logits):
class PointHead(BaseCascadeDecodeHead):
"""A mask point head use in PointRend.
This head is implemented of `PointRend: Image Segmentation as
Rendering <https://arxiv.org/abs/1912.08193>`_.
``PointHead`` use shared multi-layer perceptron (equivalent to
nn.Conv1d) to predict the logit of input points. The fine-grained feature
and coarse feature will be concatenate together for predication.
Expand Down
4 changes: 3 additions & 1 deletion mmseg/models/decode_heads/sep_fcn_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ class DepthwiseSeparableFCNHead(FCNHead):
"""Depthwise-Separable Fully Convolutional Network for Semantic
Segmentation.
This head is implemented according to Fast-SCNN paper.
This head is implemented according to `Fast-SCNN: Fast Semantic
Segmentation Network <https://arxiv.org/abs/1902.04502>`_.
Args:
in_channels(int): Number of output channels of FFM.
channels(int): Number of middle-stage channels in the decode head.
Expand Down
4 changes: 2 additions & 2 deletions mmseg/models/necks/fpn.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
class FPN(BaseModule):
"""Feature Pyramid Network.
This is an implementation of - Feature Pyramid Networks for Object
Detection (https://arxiv.org/abs/1612.03144)
This neck is the implementation of `Feature Pyramid Networks for Object
Detection <https://arxiv.org/abs/1612.03144>`_.
Args:
in_channels (List[int]): Number of input channels per scale.
Expand Down
4 changes: 2 additions & 2 deletions mmseg/models/necks/mla_neck.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ def forward(self, inputs):
class MLANeck(nn.Module):
"""Multi-level Feature Aggregation.
The Multi-level Feature Aggregation construction of SETR:
https://arxiv.org/pdf/2012.15840.pdf
This neck is `The Multi-level Feature Aggregation construction of
SETR <https://arxiv.org/abs/2012.15840>`_.
Args:
Expand Down
1 change: 1 addition & 0 deletions mmseg/models/necks/multilevel_neck.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class MultiLevelNeck(nn.Module):
"""MultiLevelNeck.
A neck structure connect vit backbone and decoder_heads.
Args:
in_channels (List[int]): Number of input channels per scale.
out_channels (int): Number of output channels (used at each scale).
Expand Down

0 comments on commit 980e3bf

Please sign in to comment.