diff --git a/configs/bottom_up/higherhrnet/coco/higher_hrnet32_coco_512x512.py b/configs/bottom_up/higherhrnet/coco/higher_hrnet32_coco_512x512.py
index 08966d7ffe..7a3a4be366 100644
--- a/configs/bottom_up/higherhrnet/coco/higher_hrnet32_coco_512x512.py
+++ b/configs/bottom_up/higherhrnet/coco/higher_hrnet32_coco_512x512.py
@@ -198,14 +198,3 @@
         data_cfg=data_cfg,
         pipeline=val_pipeline),
 )
-
-loss = dict(
-    type='MultiLossFactory',
-    num_stages=2,
-    ae_loss_type='exp',
-    with_ae_loss=[True, False],
-    push_loss_factor=[0.001, 0.001],
-    pull_loss_factor=[0.001, 0.001],
-    with_heatmaps_loss=[True, True],
-    heatmaps_loss_factor=[1.0, 1.0],
-)
diff --git a/configs/bottom_up/higherhrnet/coco/higher_hrnet32_coco_640x640.py b/configs/bottom_up/higherhrnet/coco/higher_hrnet32_coco_640x640.py
index 6ce95f2594..192005a6cc 100755
--- a/configs/bottom_up/higherhrnet/coco/higher_hrnet32_coco_640x640.py
+++ b/configs/bottom_up/higherhrnet/coco/higher_hrnet32_coco_640x640.py
@@ -199,14 +199,3 @@
         data_cfg=data_cfg,
         pipeline=val_pipeline),
 )
-
-loss = dict(
-    type='MultiLossFactory',
-    num_stages=2,
-    ae_loss_type='exp',
-    with_ae_loss=[True, False],
-    push_loss_factor=[0.001, 0.001],
-    pull_loss_factor=[0.001, 0.001],
-    with_heatmaps_loss=[True, True],
-    heatmaps_loss_factor=[1.0, 1.0],
-)
diff --git a/configs/bottom_up/higherhrnet/coco/higher_hrnet48_coco_512x512.py b/configs/bottom_up/higherhrnet/coco/higher_hrnet48_coco_512x512.py
index 575b8b4b12..3a53b8cd7d 100755
--- a/configs/bottom_up/higherhrnet/coco/higher_hrnet48_coco_512x512.py
+++ b/configs/bottom_up/higherhrnet/coco/higher_hrnet48_coco_512x512.py
@@ -199,14 +199,3 @@
         data_cfg=data_cfg,
         pipeline=val_pipeline),
 )
-
-loss = dict(
-    type='MultiLossFactory',
-    num_stages=2,
-    ae_loss_type='exp',
-    with_ae_loss=[True, False],
-    push_loss_factor=[0.001, 0.001],
-    pull_loss_factor=[0.001, 0.001],
-    with_heatmaps_loss=[True, True],
-    heatmaps_loss_factor=[1.0, 1.0],
-)
diff --git a/configs/bottom_up/hrnet/README.md b/configs/bottom_up/hrnet/README.md
new file mode 100644
index 0000000000..7a844590ac
--- /dev/null
+++ b/configs/bottom_up/hrnet/README.md
@@ -0,0 +1,35 @@
+# Associative Embedding (AE) + HRNet
+
+## Introduction
+```
+@inproceedings{newell2017associative,
+  title={Associative embedding: End-to-end learning for joint detection and grouping},
+  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
+  booktitle={Advances in neural information processing systems},
+  pages={2277--2287},
+  year={2017}
+}
+@inproceedings{sun2019deep,
+  title={Deep high-resolution representation learning for human pose estimation},
+  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+  pages={5693--5703},
+  year={2019}
+}
+```
+
+## Results and models
+
+### Results on COCO val2017 without multi-scale test
+
+| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [HRNet-w32](/configs/bottom_up/hrnet/coco/hrnet_w32_coco_512x512.py)  | 512x512 | 0.654 | 0.863 | 0.720 | 0.710 | 0.892 | [ckpt](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/bottom_up/hrnet_w32_coco_512x512-bcb8c247_20200816.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/bottom_up/hrnet_w32_coco_512x512_20200816.log.json) |
+| [HRNet-w48](/configs/bottom_up/hrnet/coco/hrnet_w48_coco_512x512.py)  | 512x512 | 0.665 | 0.860 | 0.727 | 0.716 | 0.889 | [ckpt](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/bottom_up/hrnet_w48_coco_512x512-cf72fcdf_20200816.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/bottom_up/hrnet_w48_coco_512x512_20200816.log.json) |
+
+### Results on COCO val2017 with multi-scale test with scales [2, 1, 0.5].
+
+| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [HRNet-w32](/configs/bottom_up/hrnet/coco/hrnet_w32_coco_512x512.py)  | 512x512 | 0.698 | 0.877 | 0.760 | 0.748 | 0.907 | [ckpt](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/bottom_up/hrnet_w32_coco_512x512-bcb8c247_20200816.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/bottom_up/hrnet_w32_coco_512x512_20200816.log.json) |
+| [HRNet-w48](/configs/bottom_up/hrnet/coco/hrnet_w48_coco_512x512.py)  | 512x512 | 0.712 | 0.880 | 0.771 | 0.757 | 0.909 | [ckpt](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/bottom_up/hrnet_w48_coco_512x512-cf72fcdf_20200816.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/bottom_up/hrnet_w48_coco_512x512_20200816.log.json) |
diff --git a/configs/bottom_up/hrnet/coco/hrnet_w32_coco_512x512.py b/configs/bottom_up/hrnet/coco/hrnet_w32_coco_512x512.py
new file mode 100644
index 0000000000..7907bd0d9c
--- /dev/null
+++ b/configs/bottom_up/hrnet/coco/hrnet_w32_coco_512x512.py
@@ -0,0 +1,196 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=100, metric='mAP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=512,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[128],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+    type='BottomUp',
+    pretrained='models/pytorch/imagenet/hrnet_w32-36af842e.pth',
+    backbone=dict(
+        type='HRNet',
+        in_channels=3,
+        extra=dict(
+            stage1=dict(
+                num_modules=1,
+                num_branches=1,
+                block='BOTTLENECK',
+                num_blocks=(4, ),
+                num_channels=(64, )),
+            stage2=dict(
+                num_modules=1,
+                num_branches=2,
+                block='BASIC',
+                num_blocks=(4, 4),
+                num_channels=(32, 64)),
+            stage3=dict(
+                num_modules=4,
+                num_branches=3,
+                block='BASIC',
+                num_blocks=(4, 4, 4),
+                num_channels=(32, 64, 128)),
+            stage4=dict(
+                num_modules=3,
+                num_branches=4,
+                block='BASIC',
+                num_blocks=(4, 4, 4, 4),
+                num_channels=(32, 64, 128, 256))),
+    ),
+    keypoint_head=dict(
+        type='BottomUpSimpleHead',
+        in_channels=32,
+        num_joints=17,
+        num_deconv_layers=0,
+        tag_per_joint=True,
+        with_ae_loss=[True],
+        extra=dict(final_conv_kernel=1, )),
+    train_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        img_size=data_cfg['image_size']),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_ae=[True],
+        project2image=True,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True),
+    loss_pose=dict(
+        type='MultiLossFactory',
+        num_joints=17,
+        num_stages=1,
+        ae_loss_type='exp',
+        with_ae_loss=[True],
+        push_loss_factor=[0.001],
+        pull_loss_factor=[0.001],
+        with_heatmaps_loss=[True],
+        heatmaps_loss_factor=[1.0],
+    ),
+)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='BottomUpGenerateTarget',
+        sigma=2,
+        max_num_people=30,
+    ),
+    dict(
+        type='Collect',
+        keys=['img', 'joints', 'targets', 'masks'],
+        meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=[
+            'img',
+        ],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=1,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/bottom_up/hrnet/coco/hrnet_w32_coco_640x640.py b/configs/bottom_up/hrnet/coco/hrnet_w32_coco_640x640.py
new file mode 100644
index 0000000000..ba63fde5c1
--- /dev/null
+++ b/configs/bottom_up/hrnet/coco/hrnet_w32_coco_640x640.py
@@ -0,0 +1,196 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=100, metric='mAP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=640,
+    base_size=320,
+    base_sigma=2,
+    heatmap_size=[160],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+    type='BottomUp',
+    pretrained='models/pytorch/imagenet/hrnet_w32-36af842e.pth',
+    backbone=dict(
+        type='HRNet',
+        in_channels=3,
+        extra=dict(
+            stage1=dict(
+                num_modules=1,
+                num_branches=1,
+                block='BOTTLENECK',
+                num_blocks=(4, ),
+                num_channels=(64, )),
+            stage2=dict(
+                num_modules=1,
+                num_branches=2,
+                block='BASIC',
+                num_blocks=(4, 4),
+                num_channels=(32, 64)),
+            stage3=dict(
+                num_modules=4,
+                num_branches=3,
+                block='BASIC',
+                num_blocks=(4, 4, 4),
+                num_channels=(32, 64, 128)),
+            stage4=dict(
+                num_modules=3,
+                num_branches=4,
+                block='BASIC',
+                num_blocks=(4, 4, 4, 4),
+                num_channels=(32, 64, 128, 256))),
+    ),
+    keypoint_head=dict(
+        type='BottomUpSimpleHead',
+        in_channels=32,
+        num_joints=17,
+        num_deconv_layers=0,
+        tag_per_joint=True,
+        with_ae_loss=[True],
+        extra=dict(final_conv_kernel=1, )),
+    train_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        img_size=data_cfg['image_size']),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_ae=[True],
+        project2image=True,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True),
+    loss_pose=dict(
+        type='MultiLossFactory',
+        num_joints=17,
+        num_stages=1,
+        ae_loss_type='exp',
+        with_ae_loss=[True],
+        push_loss_factor=[0.001],
+        pull_loss_factor=[0.001],
+        with_heatmaps_loss=[True],
+        heatmaps_loss_factor=[1.0],
+    ),
+)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='BottomUpGenerateTarget',
+        sigma=2,
+        max_num_people=30,
+    ),
+    dict(
+        type='Collect',
+        keys=['img', 'joints', 'targets', 'masks'],
+        meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=[
+            'img',
+        ],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=16,
+    workers_per_gpu=1,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/bottom_up/hrnet/coco/hrnet_w48_coco_512x512.py b/configs/bottom_up/hrnet/coco/hrnet_w48_coco_512x512.py
new file mode 100644
index 0000000000..7dc03c0e2b
--- /dev/null
+++ b/configs/bottom_up/hrnet/coco/hrnet_w48_coco_512x512.py
@@ -0,0 +1,196 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=100, metric='mAP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=512,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[128],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+    type='BottomUp',
+    pretrained='models/pytorch/imagenet/hrnet_w48-8ef0771d.pth',
+    backbone=dict(
+        type='HRNet',
+        in_channels=3,
+        extra=dict(
+            stage1=dict(
+                num_modules=1,
+                num_branches=1,
+                block='BOTTLENECK',
+                num_blocks=(4, ),
+                num_channels=(64, )),
+            stage2=dict(
+                num_modules=1,
+                num_branches=2,
+                block='BASIC',
+                num_blocks=(4, 4),
+                num_channels=(48, 96)),
+            stage3=dict(
+                num_modules=4,
+                num_branches=3,
+                block='BASIC',
+                num_blocks=(4, 4, 4),
+                num_channels=(48, 96, 192)),
+            stage4=dict(
+                num_modules=3,
+                num_branches=4,
+                block='BASIC',
+                num_blocks=(4, 4, 4, 4),
+                num_channels=(48, 96, 192, 384))),
+    ),
+    keypoint_head=dict(
+        type='BottomUpSimpleHead',
+        in_channels=48,
+        num_joints=17,
+        num_deconv_layers=0,
+        tag_per_joint=True,
+        with_ae_loss=[True],
+        extra=dict(final_conv_kernel=1, )),
+    train_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        img_size=data_cfg['image_size']),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_ae=[True],
+        project2image=True,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True),
+    loss_pose=dict(
+        type='MultiLossFactory',
+        num_joints=17,
+        num_stages=1,
+        ae_loss_type='exp',
+        with_ae_loss=[True],
+        push_loss_factor=[0.001],
+        pull_loss_factor=[0.001],
+        with_heatmaps_loss=[True],
+        heatmaps_loss_factor=[1.0],
+    ),
+)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='BottomUpGenerateTarget',
+        sigma=2,
+        max_num_people=30,
+    ),
+    dict(
+        type='Collect',
+        keys=['img', 'joints', 'targets', 'masks'],
+        meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=[
+            'img',
+        ],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=16,
+    workers_per_gpu=1,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/bottom_up/hrnet/coco/hrnet_w48_coco_640x640.py b/configs/bottom_up/hrnet/coco/hrnet_w48_coco_640x640.py
new file mode 100644
index 0000000000..e881ca7ee0
--- /dev/null
+++ b/configs/bottom_up/hrnet/coco/hrnet_w48_coco_640x640.py
@@ -0,0 +1,196 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=100, metric='mAP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=640,
+    base_size=320,
+    base_sigma=2,
+    heatmap_size=[160],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+    type='BottomUp',
+    pretrained='models/pytorch/imagenet/hrnet_w48-8ef0771d.pth',
+    backbone=dict(
+        type='HRNet',
+        in_channels=3,
+        extra=dict(
+            stage1=dict(
+                num_modules=1,
+                num_branches=1,
+                block='BOTTLENECK',
+                num_blocks=(4, ),
+                num_channels=(64, )),
+            stage2=dict(
+                num_modules=1,
+                num_branches=2,
+                block='BASIC',
+                num_blocks=(4, 4),
+                num_channels=(48, 96)),
+            stage3=dict(
+                num_modules=4,
+                num_branches=3,
+                block='BASIC',
+                num_blocks=(4, 4, 4),
+                num_channels=(48, 96, 192)),
+            stage4=dict(
+                num_modules=3,
+                num_branches=4,
+                block='BASIC',
+                num_blocks=(4, 4, 4, 4),
+                num_channels=(48, 96, 192, 384))),
+    ),
+    keypoint_head=dict(
+        type='BottomUpSimpleHead',
+        in_channels=48,
+        num_joints=17,
+        num_deconv_layers=0,
+        tag_per_joint=True,
+        with_ae_loss=[True],
+        extra=dict(final_conv_kernel=1, )),
+    train_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        img_size=data_cfg['image_size']),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_ae=[True],
+        project2image=True,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True),
+    loss_pose=dict(
+        type='MultiLossFactory',
+        num_joints=17,
+        num_stages=1,
+        ae_loss_type='exp',
+        with_ae_loss=[True],
+        push_loss_factor=[0.001],
+        pull_loss_factor=[0.001],
+        with_heatmaps_loss=[True],
+        heatmaps_loss_factor=[1.0],
+    ),
+)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='BottomUpGenerateTarget',
+        sigma=2,
+        max_num_people=30,
+    ),
+    dict(
+        type='Collect',
+        keys=['img', 'joints', 'targets', 'masks'],
+        meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=[
+            'img',
+        ],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=8,
+    workers_per_gpu=1,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/bottom_up/mobilenet/README.md b/configs/bottom_up/mobilenet/README.md
new file mode 100644
index 0000000000..d93f308da1
--- /dev/null
+++ b/configs/bottom_up/mobilenet/README.md
@@ -0,0 +1,33 @@
+# Associative Embedding (AE) + Mobilenetv2
+
+## Introduction
+```
+@inproceedings{newell2017associative,
+  title={Associative embedding: End-to-end learning for joint detection and grouping},
+  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
+  booktitle={Advances in neural information processing systems},
+  pages={2277--2287},
+  year={2017}
+}
+@inproceedings{sandler2018mobilenetv2,
+  title={Mobilenetv2: Inverted residuals and linear bottlenecks},
+  author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
+  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+  pages={4510--4520},
+  year={2018}
+}
+```
+
+## Results and models
+
+### Results on COCO val2017 without multi-scale test
+
+| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_mobilenetv2](/configs/bottom_up/mobilenet/coco/mobilenetv2_coco_512x512.py)  | 512x512 | 0.380 | 0.671 | 0.368 | 0.473 | 0.741 | [ckpt](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/bottom_up/mobilenetv2_coco_512x512-4d96e309_20200816.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/bottom_up/mobilenetv2_coco_512x512_20200816.log.json) |
+
+### Results on COCO val2017 with multi-scale test with scales [2, 1, 0.5].
+
+| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_mobilenetv2](/configs/bottom_up/mobilenet/coco/mobilenetv2_coco_512x512.py)  | 512x512 | 0.442 | 0.696 | 0.422 | 0.517 | 0.766 | [ckpt](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/bottom_up/hrnet_w32_coco_512x512-bcb8c247_20200816.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/bottom_up/hrnet_w32_coco_512x512_20200816.log.json) |
diff --git a/configs/bottom_up/mobilenet/coco/mobilenetv2_coco_512x512.py b/configs/bottom_up/mobilenet/coco/mobilenetv2_coco_512x512.py
new file mode 100644
index 0000000000..04237e4011
--- /dev/null
+++ b/configs/bottom_up/mobilenet/coco/mobilenetv2_coco_512x512.py
@@ -0,0 +1,167 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=100, metric='mAP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=512,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[128],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+    type='BottomUp',
+    pretrained='models/pytorch/imagenet/'
+    'mobilenet_v2_batch256_20200708-3b2dc3af.pth',
+    backbone=dict(type='MobileNetV2', widen_factor=1., out_indices=(7, )),
+    keypoint_head=dict(
+        type='BottomUpSimpleHead',
+        in_channels=1280,
+        num_joints=17,
+        tag_per_joint=True,
+        with_ae_loss=[True]),
+    train_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        img_size=data_cfg['image_size']),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_ae=[True],
+        project2image=True,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True),
+    loss_pose=dict(
+        type='MultiLossFactory',
+        num_joints=17,
+        num_stages=1,
+        ae_loss_type='exp',
+        with_ae_loss=[True],
+        push_loss_factor=[0.001],
+        pull_loss_factor=[0.001],
+        with_heatmaps_loss=[True],
+        heatmaps_loss_factor=[1.0],
+    ),
+)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='BottomUpGenerateTarget',
+        sigma=2,
+        max_num_people=30,
+    ),
+    dict(
+        type='Collect',
+        keys=['img', 'joints', 'targets', 'masks'],
+        meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=[
+            'img',
+        ],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=1,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/bottom_up/resnet/README.md b/configs/bottom_up/resnet/README.md
new file mode 100644
index 0000000000..b80e17aca2
--- /dev/null
+++ b/configs/bottom_up/resnet/README.md
@@ -0,0 +1,35 @@
+# Associative Embedding (AE) + ResNet
+
+## Introduction
+```
+@inproceedings{newell2017associative,
+  title={Associative embedding: End-to-end learning for joint detection and grouping},
+  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
+  booktitle={Advances in neural information processing systems},
+  pages={2277--2287},
+  year={2017}
+}
+@inproceedings{he2016deep,
+  title={Deep residual learning for image recognition},
+  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+  pages={770--778},
+  year={2016}
+}
+```
+
+## Results and models
+
+### Results on COCO val2017 without multi-scale test
+
+| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_resnet_50](/configs/bottom_up/resnet/coco/res50_coco_512x512.py)  | 512x512 | 0.466 | 0.742 | 0.479 | 0.552 | 0.797 | [ckpt](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/bottom_up/res50_coco_512x512-5521bead_20200816.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/bottom_up/res50_coco_512x512_20200816.log.json) |
+| [pose_resnet_101](/configs/bottom_up/resnet/coco/res101_coco_512x512.py)  | 512x512 | 0.554 | 0.807 | 0.599 | 0.622 | 0.841 | [ckpt](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/bottom_up/res101_coco_512x512-e0c95157_20200816.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/bottom_up/res101_coco_512x512_20200816.log.json) |
+
+### Results on COCO val2017 with multi-scale test with scales [2, 1, 0.5].
+
+| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_resnet_50](/configs/bottom_up/resnet/coco/res50_coco_512x512.py)  | 512x512 | 0.503 | 0.765 | 0.521 | 0.591 | 0.821 | [ckpt](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/bottom_up/res50_coco_512x512-5521bead_20200816.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/bottom_up/res50_coco_512x512_20200816.log.json) |
+| [pose_resnet_101](/configs/bottom_up/resnet/coco/res101_coco_512x512.py)  | 512x512 | 0.603 | 0.831 | 0.641 | 0.668 | 0.870 | [ckpt](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/bottom_up/res101_coco_512x512-e0c95157_20200816.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/bottom_up/res101_coco_512x512_20200816.log.json) |
diff --git a/configs/bottom_up/resnet/coco/res101_coco_512x512.py b/configs/bottom_up/resnet/coco/res101_coco_512x512.py
new file mode 100644
index 0000000000..9be26fa556
--- /dev/null
+++ b/configs/bottom_up/resnet/coco/res101_coco_512x512.py
@@ -0,0 +1,166 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=100, metric='mAP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=512,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[128],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+    type='BottomUp',
+    pretrained='models/pytorch/imagenet/resnet101-5d3b4d8f.pth',
+    backbone=dict(type='ResNet', depth=101),
+    keypoint_head=dict(
+        type='BottomUpSimpleHead',
+        in_channels=2048,
+        num_joints=17,
+        tag_per_joint=True,
+        with_ae_loss=[True]),
+    train_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        img_size=data_cfg['image_size']),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_ae=[True],
+        project2image=True,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True),
+    loss_pose=dict(
+        type='MultiLossFactory',
+        num_joints=17,
+        num_stages=1,
+        ae_loss_type='exp',
+        with_ae_loss=[True],
+        push_loss_factor=[0.001],
+        pull_loss_factor=[0.001],
+        with_heatmaps_loss=[True],
+        heatmaps_loss_factor=[1.0],
+    ),
+)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='BottomUpGenerateTarget',
+        sigma=2,
+        max_num_people=30,
+    ),
+    dict(
+        type='Collect',
+        keys=['img', 'joints', 'targets', 'masks'],
+        meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=[
+            'img',
+        ],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=16,
+    workers_per_gpu=1,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/bottom_up/resnet/coco/res101_coco_640x640.py b/configs/bottom_up/resnet/coco/res101_coco_640x640.py
new file mode 100644
index 0000000000..9001715ff2
--- /dev/null
+++ b/configs/bottom_up/resnet/coco/res101_coco_640x640.py
@@ -0,0 +1,166 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=100, metric='mAP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=640,
+    base_size=320,
+    base_sigma=2,
+    heatmap_size=[160],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+    type='BottomUp',
+    pretrained='models/pytorch/imagenet/resnet101-5d3b4d8f.pth',
+    backbone=dict(type='ResNet', depth=101),
+    keypoint_head=dict(
+        type='BottomUpSimpleHead',
+        in_channels=2048,
+        num_joints=17,
+        tag_per_joint=True,
+        with_ae_loss=[True]),
+    train_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        img_size=data_cfg['image_size']),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_ae=[True],
+        project2image=True,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True),
+    loss_pose=dict(
+        type='MultiLossFactory',
+        num_joints=17,
+        num_stages=1,
+        ae_loss_type='exp',
+        with_ae_loss=[True],
+        push_loss_factor=[0.001],
+        pull_loss_factor=[0.001],
+        with_heatmaps_loss=[True],
+        heatmaps_loss_factor=[1.0],
+    ),
+)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='BottomUpGenerateTarget',
+        sigma=2,
+        max_num_people=30,
+    ),
+    dict(
+        type='Collect',
+        keys=['img', 'joints', 'targets', 'masks'],
+        meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=[
+            'img',
+        ],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=16,
+    workers_per_gpu=1,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/bottom_up/resnet/coco/res152_coco_512x512.py b/configs/bottom_up/resnet/coco/res152_coco_512x512.py
new file mode 100644
index 0000000000..42bca05e43
--- /dev/null
+++ b/configs/bottom_up/resnet/coco/res152_coco_512x512.py
@@ -0,0 +1,166 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=100, metric='mAP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=512,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[128],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+    type='BottomUp',
+    pretrained='models/pytorch/imagenet/resnet152-b121ed2d.pth',
+    backbone=dict(type='ResNet', depth=152),
+    keypoint_head=dict(
+        type='BottomUpSimpleHead',
+        in_channels=2048,
+        num_joints=17,
+        tag_per_joint=True,
+        with_ae_loss=[True]),
+    train_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        img_size=data_cfg['image_size']),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_ae=[True],
+        project2image=True,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True),
+    loss_pose=dict(
+        type='MultiLossFactory',
+        num_joints=17,
+        num_stages=1,
+        ae_loss_type='exp',
+        with_ae_loss=[True],
+        push_loss_factor=[0.001],
+        pull_loss_factor=[0.001],
+        with_heatmaps_loss=[True],
+        heatmaps_loss_factor=[1.0],
+    ),
+)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='BottomUpGenerateTarget',
+        sigma=2,
+        max_num_people=30,
+    ),
+    dict(
+        type='Collect',
+        keys=['img', 'joints', 'targets', 'masks'],
+        meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=[
+            'img',
+        ],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=16,
+    workers_per_gpu=1,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/bottom_up/resnet/coco/res152_coco_640x640.py b/configs/bottom_up/resnet/coco/res152_coco_640x640.py
new file mode 100644
index 0000000000..c144940400
--- /dev/null
+++ b/configs/bottom_up/resnet/coco/res152_coco_640x640.py
@@ -0,0 +1,166 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=100, metric='mAP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=640,
+    base_size=320,
+    base_sigma=2,
+    heatmap_size=[160],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+    type='BottomUp',
+    pretrained='models/pytorch/imagenet/resnet152-b121ed2d.pth',
+    backbone=dict(type='ResNet', depth=152),
+    keypoint_head=dict(
+        type='BottomUpSimpleHead',
+        in_channels=2048,
+        num_joints=17,
+        tag_per_joint=True,
+        with_ae_loss=[True]),
+    train_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        img_size=data_cfg['image_size']),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_ae=[True],
+        project2image=True,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True),
+    loss_pose=dict(
+        type='MultiLossFactory',
+        num_joints=17,
+        num_stages=1,
+        ae_loss_type='exp',
+        with_ae_loss=[True],
+        push_loss_factor=[0.001],
+        pull_loss_factor=[0.001],
+        with_heatmaps_loss=[True],
+        heatmaps_loss_factor=[1.0],
+    ),
+)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='BottomUpGenerateTarget',
+        sigma=2,
+        max_num_people=30,
+    ),
+    dict(
+        type='Collect',
+        keys=['img', 'joints', 'targets', 'masks'],
+        meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=[
+            'img',
+        ],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=16,
+    workers_per_gpu=1,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/bottom_up/resnet/coco/res50_coco_512x512.py b/configs/bottom_up/resnet/coco/res50_coco_512x512.py
new file mode 100644
index 0000000000..463fbf7b26
--- /dev/null
+++ b/configs/bottom_up/resnet/coco/res50_coco_512x512.py
@@ -0,0 +1,166 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=100, metric='mAP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=512,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[128],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+    type='BottomUp',
+    pretrained='models/pytorch/imagenet/resnet50-19c8e357.pth',
+    backbone=dict(type='ResNet', depth=50),
+    keypoint_head=dict(
+        type='BottomUpSimpleHead',
+        in_channels=2048,
+        num_joints=17,
+        tag_per_joint=True,
+        with_ae_loss=[True]),
+    train_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        img_size=data_cfg['image_size']),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_ae=[True],
+        project2image=True,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True),
+    loss_pose=dict(
+        type='MultiLossFactory',
+        num_joints=17,
+        num_stages=1,
+        ae_loss_type='exp',
+        with_ae_loss=[True],
+        push_loss_factor=[0.001],
+        pull_loss_factor=[0.001],
+        with_heatmaps_loss=[True],
+        heatmaps_loss_factor=[1.0],
+    ),
+)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='BottomUpGenerateTarget',
+        sigma=2,
+        max_num_people=30,
+    ),
+    dict(
+        type='Collect',
+        keys=['img', 'joints', 'targets', 'masks'],
+        meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=[
+            'img',
+        ],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=1,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/bottom_up/resnet/coco/res50_coco_640x640.py b/configs/bottom_up/resnet/coco/res50_coco_640x640.py
new file mode 100644
index 0000000000..c2ccb86598
--- /dev/null
+++ b/configs/bottom_up/resnet/coco/res50_coco_640x640.py
@@ -0,0 +1,166 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=100, metric='mAP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=640,
+    base_size=320,
+    base_sigma=2,
+    heatmap_size=[160],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+    type='BottomUp',
+    pretrained='models/pytorch/imagenet/resnet50-19c8e357.pth',
+    backbone=dict(type='ResNet', depth=50),
+    keypoint_head=dict(
+        type='BottomUpSimpleHead',
+        in_channels=2048,
+        num_joints=17,
+        tag_per_joint=True,
+        with_ae_loss=[True]),
+    train_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        img_size=data_cfg['image_size']),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_ae=[True],
+        project2image=True,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True),
+    loss_pose=dict(
+        type='MultiLossFactory',
+        num_joints=17,
+        num_stages=1,
+        ae_loss_type='exp',
+        with_ae_loss=[True],
+        push_loss_factor=[0.001],
+        pull_loss_factor=[0.001],
+        with_heatmaps_loss=[True],
+        heatmaps_loss_factor=[1.0],
+    ),
+)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='BottomUpGenerateTarget',
+        sigma=2,
+        max_num_people=30,
+    ),
+    dict(
+        type='Collect',
+        keys=['img', 'joints', 'targets', 'masks'],
+        meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=[
+            'img',
+        ],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=1,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/mmpose/models/keypoint_heads/bottom_up_simple_head.py b/mmpose/models/keypoint_heads/bottom_up_simple_head.py
index 5e03768cd1..69154c33ac 100644
--- a/mmpose/models/keypoint_heads/bottom_up_simple_head.py
+++ b/mmpose/models/keypoint_heads/bottom_up_simple_head.py
@@ -1,5 +1,6 @@
 import torch.nn as nn
-from mmcv.cnn import build_conv_layer, normal_init
+from mmcv.cnn import (build_conv_layer, build_upsample_layer, constant_init,
+                      normal_init)
 
 from ..registry import HEADS
 
@@ -11,6 +12,12 @@ class BottomUpSimpleHead(nn.Module):
     Args:
         in_channels (int): Number of input channels.
         num_joints (int): Number of joints.
+        num_deconv_layers (int): Number of deconv layers.
+            num_deconv_layers should >= 0. Note that 0 means
+            no deconv layers.
+        num_deconv_filters (list|tuple): Number of filters.
+            If num_deconv_layers > 0, the length of
+        num_deconv_kernels (list|tuple): Kernel sizes.
         tag_per_joint (bool): If tag_per_joint is True,
             the dimension of tags equals to num_joints,
             else the dimension of tags is 1. Default: True
@@ -20,6 +27,9 @@ class BottomUpSimpleHead(nn.Module):
     def __init__(self,
                  in_channels,
                  num_joints,
+                 num_deconv_layers=3,
+                 num_deconv_filters=(256, 256, 256),
+                 num_deconv_kernels=(4, 4, 4),
                  tag_per_joint=True,
                  with_ae_loss=None,
                  extra=None):
@@ -35,6 +45,18 @@ def __init__(self,
         if extra is not None and not isinstance(extra, dict):
             raise TypeError('extra should be dict or None.')
 
+        if num_deconv_layers > 0:
+            self.deconv_layers = self._make_deconv_layer(
+                num_deconv_layers,
+                num_deconv_filters,
+                num_deconv_kernels,
+            )
+        elif num_deconv_layers == 0:
+            self.deconv_layers = nn.Identity()
+        else:
+            raise ValueError(
+                f'num_deconv_layers ({num_deconv_layers}) should >= 0.')
+
         if extra is not None and 'final_conv_kernel' in extra:
             assert extra['final_conv_kernel'] in [1, 3]
             if extra['final_conv_kernel'] == 3:
@@ -46,9 +68,10 @@ def __init__(self,
             kernel_size = 1
             padding = 0
 
-        self.final_layers = build_conv_layer(
+        self.final_layer = build_conv_layer(
             cfg=dict(type='Conv2d'),
-            in_channels=in_channels,
+            in_channels=num_deconv_filters[-1]
+            if num_deconv_layers > 0 else in_channels,
             out_channels=out_channels,
             kernel_size=kernel_size,
             stride=1,
@@ -59,12 +82,67 @@ def forward(self, x):
         if isinstance(x, list):
             x = x[0]
         final_outputs = []
-        y = self.final_layers(x)
+        x = self.deconv_layers(x)
+        y = self.final_layer(x)
         final_outputs.append(y)
         return final_outputs
 
+    def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
+        """Make deconv layers."""
+        if num_layers != len(num_filters):
+            error_msg = f'num_layers({num_layers}) ' \
+                        f'!= length of num_filters({len(num_filters)})'
+            raise ValueError(error_msg)
+        if num_layers != len(num_kernels):
+            error_msg = f'num_layers({num_layers}) ' \
+                        f'!= length of num_kernels({len(num_kernels)})'
+            raise ValueError(error_msg)
+
+        layers = []
+        for i in range(num_layers):
+            kernel, padding, output_padding = \
+                self._get_deconv_cfg(num_kernels[i])
+
+            planes = num_filters[i]
+            layers.append(
+                build_upsample_layer(
+                    dict(type='deconv'),
+                    in_channels=self.in_channels,
+                    out_channels=planes,
+                    kernel_size=kernel,
+                    stride=2,
+                    padding=padding,
+                    output_padding=output_padding,
+                    bias=False))
+            layers.append(nn.BatchNorm2d(planes))
+            layers.append(nn.ReLU(inplace=True))
+            self.in_channels = planes
+
+        return nn.Sequential(*layers)
+
+    def _get_deconv_cfg(self, deconv_kernel):
+        """Get configurations for deconv layers."""
+        if deconv_kernel == 4:
+            padding = 1
+            output_padding = 0
+        elif deconv_kernel == 3:
+            padding = 1
+            output_padding = 1
+        elif deconv_kernel == 2:
+            padding = 0
+            output_padding = 0
+        else:
+            raise ValueError(f'Not supported num_kernels ({deconv_kernel}).')
+
+        return deconv_kernel, padding, output_padding
+
     def init_weights(self):
         """Initialize model weights."""
-        for m in self.final_layers.modules():
+        for name, m in self.deconv_layers.named_modules():
+            if isinstance(m, nn.ConvTranspose2d):
+                normal_init(m, std=0.001)
+            elif isinstance(m, nn.BatchNorm2d):
+                constant_init(m, 1)
+        for m in self.final_layer.modules():
             if isinstance(m, nn.Conv2d):
                 normal_init(m, std=0.001, bias=0)
diff --git a/tests/test_model/test_bottom_up_forward.py b/tests/test_model/test_bottom_up_forward.py
index 61d1d027a5..6207104ccc 100644
--- a/tests/test_model/test_bottom_up_forward.py
+++ b/tests/test_model/test_bottom_up_forward.py
@@ -13,8 +13,10 @@ def test_bottomup_forward():
             type='BottomUpSimpleHead',
             in_channels=512,
             num_joints=17,
+            num_deconv_layers=0,
+            tag_per_joint=True,
             with_ae_loss=[True],
-            extra={'final_conv_kernel': 3}),
+            extra=dict(final_conv_kernel=1, )),
         train_cfg=dict(),
         test_cfg=dict(
             num_joints=17,
diff --git a/tests/test_model/test_bottom_up_head.py b/tests/test_model/test_bottom_up_head.py
index e96abd31ba..dc2b95b4fe 100644
--- a/tests/test_model/test_bottom_up_head.py
+++ b/tests/test_model/test_bottom_up_head.py
@@ -25,22 +25,23 @@ def test_bottom_up_simple_head():
         with_ae_loss=[True],
         extra={'final_conv_kernel': 3})
     head.init_weights()
-    assert head.final_layers.padding == (1, 1)
+    assert head.final_layer.padding == (1, 1)
     head = BottomUpSimpleHead(
         in_channels=512,
         num_joints=17,
         with_ae_loss=[True],
         extra={'final_conv_kernel': 1})
     head.init_weights()
-    assert head.final_layers.padding == (0, 0)
+    assert head.final_layer.padding == (0, 0)
     head = BottomUpSimpleHead(
         in_channels=512, num_joints=17, with_ae_loss=[True])
     head.init_weights()
-    assert head.final_layers.padding == (0, 0)
+    assert head.final_layer.padding == (0, 0)
     # test with_ae_loss
     head = BottomUpSimpleHead(
         in_channels=512,
         num_joints=17,
+        num_deconv_layers=0,
         with_ae_loss=[True],
         extra={'final_conv_kernel': 3})
     head.init_weights()
@@ -51,6 +52,7 @@ def test_bottom_up_simple_head():
     head = BottomUpSimpleHead(
         in_channels=512,
         num_joints=17,
+        num_deconv_layers=0,
         with_ae_loss=[False],
         extra={'final_conv_kernel': 3})
     head.init_weights()
@@ -62,6 +64,7 @@ def test_bottom_up_simple_head():
     head = BottomUpSimpleHead(
         in_channels=512,
         num_joints=17,
+        num_deconv_layers=0,
         tag_per_joint=False,
         with_ae_loss=[False],
         extra={'final_conv_kernel': 3})
@@ -73,6 +76,7 @@ def test_bottom_up_simple_head():
     head = BottomUpSimpleHead(
         in_channels=512,
         num_joints=17,
+        num_deconv_layers=0,
         tag_per_joint=False,
         with_ae_loss=[True],
         extra={'final_conv_kernel': 3})
@@ -84,6 +88,7 @@ def test_bottom_up_simple_head():
     head = BottomUpSimpleHead(
         in_channels=512,
         num_joints=17,
+        num_deconv_layers=0,
         tag_per_joint=False,
         with_ae_loss=[True],
         extra={'final_conv_kernel': 3})