diff --git a/mmseg/datasets/pipelines/formating.py b/mmseg/datasets/pipelines/formating.py index 45824fc405..4e057c1b81 100644 --- a/mmseg/datasets/pipelines/formating.py +++ b/mmseg/datasets/pipelines/formating.py @@ -249,9 +249,9 @@ class Collect(object): keys (Sequence[str]): Keys of results to be collected in ``data``. meta_keys (Sequence[str], optional): Meta keys to be converted to ``mmcv.DataContainer`` and collected in ``data[img_metas]``. - Default: ``('filename', 'ori_filename', 'ori_shape', 'img_shape', - 'pad_shape', 'scale_factor', 'flip', 'flip_direction', - 'img_norm_cfg')`` + Default: (``filename``, ``ori_filename``, ``ori_shape``, + ``img_shape``, ``pad_shape``, ``scale_factor``, ``flip``, + ``flip_direction``, ``img_norm_cfg``) """ def __init__(self, diff --git a/mmseg/models/backbones/cgnet.py b/mmseg/models/backbones/cgnet.py index 67c06717ba..168194c106 100644 --- a/mmseg/models/backbones/cgnet.py +++ b/mmseg/models/backbones/cgnet.py @@ -187,8 +187,8 @@ def forward(self, x): class CGNet(BaseModule): """CGNet backbone. - A Light-weight Context Guided Network for Semantic Segmentation - arXiv: https://arxiv.org/abs/1811.08201 + This backbone is the implementation of `A Light-weight Context Guided + Network for Semantic Segmentation `_. Args: in_channels (int): Number of input image channels. Normally 3. diff --git a/mmseg/models/backbones/fast_scnn.py b/mmseg/models/backbones/fast_scnn.py index 95a434413b..cbfbcaf4f3 100644 --- a/mmseg/models/backbones/fast_scnn.py +++ b/mmseg/models/backbones/fast_scnn.py @@ -272,6 +272,9 @@ def forward(self, higher_res_feature, lower_res_feature): class FastSCNN(BaseModule): """Fast-SCNN Backbone. + This backbone is the implementation of `Fast-SCNN: Fast Semantic + Segmentation Network `_. + Args: in_channels (int): Number of input image channels. Default: 3. downsample_dw_channels (tuple[int]): Number of output channels after diff --git a/mmseg/models/backbones/hrnet.py b/mmseg/models/backbones/hrnet.py index a0b1e47cde..90feadcf62 100644 --- a/mmseg/models/backbones/hrnet.py +++ b/mmseg/models/backbones/hrnet.py @@ -218,8 +218,8 @@ def forward(self, x): class HRNet(BaseModule): """HRNet backbone. - `High-Resolution Representations for Labeling Pixels and Regions - arXiv: `_. + This backbone is the implementation of `High-Resolution Representations + for Labeling Pixels and Regions `_. Args: extra (dict): Detailed configuration for each stage of HRNet. diff --git a/mmseg/models/backbones/mit.py b/mmseg/models/backbones/mit.py index 90abfe539b..ee8bbfab45 100644 --- a/mmseg/models/backbones/mit.py +++ b/mmseg/models/backbones/mit.py @@ -246,9 +246,9 @@ def forward(self, x, hw_shape): class MixVisionTransformer(BaseModule): """The backbone of Segformer. - A PyTorch implement of : `SegFormer: Simple and Efficient Design for - Semantic Segmentation with Transformers` - - https://arxiv.org/pdf/2105.15203.pdf + This backbone is the implementation of `SegFormer: Simple and + Efficient Design for Semantic Segmentation with + Transformers `_. Args: in_channels (int): Number of input channels. Default: 3. diff --git a/mmseg/models/backbones/mobilenet_v2.py b/mmseg/models/backbones/mobilenet_v2.py index 988e29cdea..cbb9c6cd01 100644 --- a/mmseg/models/backbones/mobilenet_v2.py +++ b/mmseg/models/backbones/mobilenet_v2.py @@ -14,6 +14,10 @@ class MobileNetV2(BaseModule): """MobileNetV2 backbone. + This backbone is the implementation of + `MobileNetV2: Inverted Residuals and Linear Bottlenecks + `_. + Args: widen_factor (float): Width multiplier, multiply number of channels in each layer by this amount. Default: 1.0. diff --git a/mmseg/models/backbones/resnest.py b/mmseg/models/backbones/resnest.py index f47adb5302..91952c2caf 100644 --- a/mmseg/models/backbones/resnest.py +++ b/mmseg/models/backbones/resnest.py @@ -271,6 +271,9 @@ def _inner_forward(x): class ResNeSt(ResNetV1d): """ResNeSt backbone. + This backbone is the implementation of `ResNeSt: + Split-Attention Networks `_. + Args: groups (int): Number of groups of Bottleneck. Default: 1 base_width (int): Base width of Bottleneck. Default: 4 diff --git a/mmseg/models/backbones/resnet.py b/mmseg/models/backbones/resnet.py index f9a1ceb4e0..e8b961d5fa 100644 --- a/mmseg/models/backbones/resnet.py +++ b/mmseg/models/backbones/resnet.py @@ -311,6 +311,9 @@ def _inner_forward(x): class ResNet(BaseModule): """ResNet backbone. + This backbone is the improved implementation of `Deep Residual Learning + for Image Recognition `_. + Args: depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. in_channels (int): Number of input image channels. Default: 3. @@ -686,11 +689,10 @@ def train(self, mode=True): class ResNetV1c(ResNet): """ResNetV1c variant described in [1]_. - Compared with default ResNet(ResNetV1b), ResNetV1c replaces the 7x7 conv - in the input stem with three 3x3 convs. - - References: - .. [1] https://arxiv.org/pdf/1812.01187.pdf + Compared with default ResNet(ResNetV1b), ResNetV1c replaces the 7x7 conv in + the input stem with three 3x3 convs. For more details please refer to `Bag + of Tricks for Image Classification with Convolutional Neural Networks + `_. """ def __init__(self, **kwargs): diff --git a/mmseg/models/backbones/resnext.py b/mmseg/models/backbones/resnext.py index 450b77bb76..805c27bf33 100644 --- a/mmseg/models/backbones/resnext.py +++ b/mmseg/models/backbones/resnext.py @@ -88,6 +88,10 @@ def __init__(self, class ResNeXt(ResNet): """ResNeXt backbone. + This backbone is the implementation of `Aggregated + Residual Transformations for Deep Neural + Networks `_. + Args: depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. in_channels (int): Number of input image channels. Normally 3. diff --git a/mmseg/models/backbones/swin.py b/mmseg/models/backbones/swin.py index e3e835a032..424c456cb3 100644 --- a/mmseg/models/backbones/swin.py +++ b/mmseg/models/backbones/swin.py @@ -522,13 +522,12 @@ def forward(self, x, hw_shape): @BACKBONES.register_module() class SwinTransformer(BaseModule): - """ Swin Transformer - A PyTorch implement of : `Swin Transformer: - Hierarchical Vision Transformer using Shifted Windows` - - https://arxiv.org/abs/2103.14030 + """Swin Transformer backbone. - Inspiration from - https://github.com/microsoft/Swin-Transformer + This backbone is the implementation of `Swin Transformer: + Hierarchical Vision Transformer using Shifted + Windows `_. + Inspiration from https://github.com/microsoft/Swin-Transformer. Args: pretrain_img_size (int | tuple[int]): The size of input image when diff --git a/mmseg/models/backbones/unet.py b/mmseg/models/backbones/unet.py index 680c79e320..c2d33667f8 100644 --- a/mmseg/models/backbones/unet.py +++ b/mmseg/models/backbones/unet.py @@ -224,8 +224,9 @@ def forward(self, x): @BACKBONES.register_module() class UNet(BaseModule): """UNet backbone. - U-Net: Convolutional Networks for Biomedical Image Segmentation. - https://arxiv.org/pdf/1505.04597.pdf + + This backbone is the implementation of `U-Net: Convolutional Networks + for Biomedical Image Segmentation `_. Args: in_channels (int): Number of input image channels. Default" 3. @@ -277,7 +278,6 @@ class UNet(BaseModule): The input image size should be divisible by the whole downsample rate of the encoder. More detail of the whole downsample rate can be found in UNet._check_input_divisible. - """ def __init__(self, diff --git a/mmseg/models/backbones/vit.py b/mmseg/models/backbones/vit.py index 003fa537e6..668d278992 100644 --- a/mmseg/models/backbones/vit.py +++ b/mmseg/models/backbones/vit.py @@ -98,9 +98,9 @@ def forward(self, x): class VisionTransformer(BaseModule): """Vision Transformer. - A PyTorch implement of : `An Image is Worth 16x16 Words: - Transformers for Image Recognition at Scale` - - https://arxiv.org/abs/2010.11929 + This backbone is the implementation of `An Image is Worth 16x16 Words: + Transformers for Image Recognition at + Scale `_. Args: img_size (int | tuple): Input image size. Default: 224. diff --git a/mmseg/models/decode_heads/point_head.py b/mmseg/models/decode_heads/point_head.py index 4bc388cbc0..4470571144 100644 --- a/mmseg/models/decode_heads/point_head.py +++ b/mmseg/models/decode_heads/point_head.py @@ -36,6 +36,8 @@ def calculate_uncertainty(seg_logits): class PointHead(BaseCascadeDecodeHead): """A mask point head use in PointRend. + This head is implemented of `PointRend: Image Segmentation as + Rendering `_. ``PointHead`` use shared multi-layer perceptron (equivalent to nn.Conv1d) to predict the logit of input points. The fine-grained feature and coarse feature will be concatenate together for predication. diff --git a/mmseg/models/decode_heads/sep_fcn_head.py b/mmseg/models/decode_heads/sep_fcn_head.py index 5e22a66f7c..7f9658e08f 100644 --- a/mmseg/models/decode_heads/sep_fcn_head.py +++ b/mmseg/models/decode_heads/sep_fcn_head.py @@ -10,7 +10,9 @@ class DepthwiseSeparableFCNHead(FCNHead): """Depthwise-Separable Fully Convolutional Network for Semantic Segmentation. - This head is implemented according to Fast-SCNN paper. + This head is implemented according to `Fast-SCNN: Fast Semantic + Segmentation Network `_. + Args: in_channels(int): Number of output channels of FFM. channels(int): Number of middle-stage channels in the decode head. diff --git a/mmseg/models/necks/fpn.py b/mmseg/models/necks/fpn.py index 8461a75e49..bc237428e9 100644 --- a/mmseg/models/necks/fpn.py +++ b/mmseg/models/necks/fpn.py @@ -12,8 +12,8 @@ class FPN(BaseModule): """Feature Pyramid Network. - This is an implementation of - Feature Pyramid Networks for Object - Detection (https://arxiv.org/abs/1612.03144) + This neck is the implementation of `Feature Pyramid Networks for Object + Detection `_. Args: in_channels (List[int]): Number of input channels per scale. diff --git a/mmseg/models/necks/mla_neck.py b/mmseg/models/necks/mla_neck.py index 5fc3b98b0b..1513e296da 100644 --- a/mmseg/models/necks/mla_neck.py +++ b/mmseg/models/necks/mla_neck.py @@ -63,8 +63,8 @@ def forward(self, inputs): class MLANeck(nn.Module): """Multi-level Feature Aggregation. - The Multi-level Feature Aggregation construction of SETR: - https://arxiv.org/pdf/2012.15840.pdf + This neck is `The Multi-level Feature Aggregation construction of + SETR `_. Args: diff --git a/mmseg/models/necks/multilevel_neck.py b/mmseg/models/necks/multilevel_neck.py index cbf4b01176..5151f8762d 100644 --- a/mmseg/models/necks/multilevel_neck.py +++ b/mmseg/models/necks/multilevel_neck.py @@ -11,6 +11,7 @@ class MultiLevelNeck(nn.Module): """MultiLevelNeck. A neck structure connect vit backbone and decoder_heads. + Args: in_channels (List[int]): Number of input channels per scale. out_channels (int): Number of output channels (used at each scale).