From 05c58e27c61fb7dd7a8a9b2ebdc2ab7c5bf03c17 Mon Sep 17 00:00:00 2001
From: Tau <674106399@qq.com>
Date: Mon, 26 Sep 2022 17:00:55 +0800
Subject: [PATCH] [Docs] Refine docs (#1656)
---
.../coco/ipr_res50_8xb64-210e_coco-256x256.py | 8 +-
...pr_res50_debias-8xb64-210e_coco-256x256.py | 8 +-
.../ipr_res50_dsnt-8xb64-210e_coco-256x256.py | 8 +-
.../coco/resnet_debias_coco.md | 6 +-
.../coco/resnet_dsnt_coco.md | 8 +-
.../coco/resnet_ipr_coco.md | 15 +-
.../simcc/coco/mobilenetv2_coco.md | 55 +++
.../simcc/coco/resnet_coco.md | 7 +-
...netv2_wo-deconv-8xb64-210e_coco-256x192.py | 127 ++++++
...=> simcc_res50_8xb32-140e_coco-384x288.py} | 16 +-
.../simcc_res50_8xb64-210e_coco-256x192.py | 117 +++++
...mcc_vipnas-mbv3_8xb64-210e_coco-256x192.py | 122 ++++++
.../simcc/coco/vipnas_coco.md | 54 +++
...res50_wo-deconv-8xb64-210e_mpii-256x256.py | 123 ++++++
.../coco/mobilenetv2_rle_coco.md | 74 ++++
.../topdown_regression/coco/resnet_coco.md | 2 +-
.../coco/resnet_rle_coco.md | 4 +-
..._rle-pretrained-8xb64-210e_coco-256x192.py | 128 ++++++
.../td-reg_res101_8xb64-210e_coco-256x192.py | 8 +-
...-reg_res101_rle-8xb64-210e_coco-256x192.py | 8 +-
.../td-reg_res152_8xb64-210e_coco-256x192.py | 8 +-
...-reg_res152_rle-8xb64-210e_coco-256x192.py | 8 +-
...-reg_res152_rle-8xb64-210e_coco-384x288.py | 8 +-
.../td-reg_res50_8xb64-210e_coco-256x192.py | 8 +-
...d-reg_res50_rle-8xb64-210e_coco-256x192.py | 8 +-
..._rle-pretrained-8xb64-210e_coco-256x192.py | 8 +-
.../td-reg_res101_8xb64-210e_mpii-256x256.py | 12 +-
.../td-reg_res152_8xb64-210e_mpii-256x256.py | 12 +-
.../td-reg_res50_8xb64-210e_mpii-256x256.py | 12 +-
...d-reg_res50_rle-8xb64-210e_mpii-256x256.py | 12 +-
docs/en/migration.md | 18 +-
docs/en/overview.md | 2 +-
docs/en/user_guides/configs.md | 20 +
docs/src/papers/algorithms/dsnt.md | 2 +-
docs/src/papers/algorithms/ipr.md | 11 +-
docs/zh_cn/migration.md | 14 +-
docs/zh_cn/overview.md | 6 +-
docs/zh_cn/user_guides/configs.md | 20 +
mmpose/codecs/simcc_label.py | 44 +-
mmpose/models/heads/heatmap_heads/mix_head.py | 408 ++++++++++++++++++
.../models/heads/heatmap_heads/simcc_head.py | 73 +++-
.../heads/regression_heads/dsnt_head.py | 2 +-
mmpose/models/losses/classification_loss.py | 9 +-
mmpose/models/losses/loss_wrappers.py | 2 -
tests/test_codecs/test_simcc_label.py | 13 +-
.../test_heatmap_heads/test_simcc_head.py | 6 +-
46 files changed, 1496 insertions(+), 148 deletions(-)
create mode 100644 configs/body_2d_keypoint/simcc/coco/mobilenetv2_coco.md
create mode 100644 configs/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192.py
rename configs/body_2d_keypoint/simcc/coco/{simcc_res50_8xb64-140e_coco-384x288.py => simcc_res50_8xb32-140e_coco-384x288.py} (93%)
create mode 100644 configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192.py
create mode 100644 configs/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192.py
create mode 100644 configs/body_2d_keypoint/simcc/coco/vipnas_coco.md
create mode 100644 configs/body_2d_keypoint/simcc/mpii/simcc_res50_wo-deconv-8xb64-210e_mpii-256x256.py
create mode 100644 configs/body_2d_keypoint/topdown_regression/coco/mobilenetv2_rle_coco.md
create mode 100644 configs/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192.py
create mode 100644 mmpose/models/heads/heatmap_heads/mix_head.py
diff --git a/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py b/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py
index 995dcc1243..3fd5b06d88 100644
--- a/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py
+++ b/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py
@@ -17,7 +17,7 @@
dict(
type='MultiStepLR',
begin=0,
- end=210,
+ end=train_cfg['max_epochs'],
milestones=[170, 200],
gamma=0.1,
by_epoch=True)
@@ -26,9 +26,6 @@
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
-# hooks
-default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
-
# codec settings
codec = dict(
type='IntegralRegressionLabel',
@@ -132,6 +129,9 @@
))
test_dataloader = val_dataloader
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
# evaluators
val_evaluator = dict(
type='CocoMetric',
diff --git a/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256.py b/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256.py
index c18dab391a..78f7c34f60 100644
--- a/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256.py
+++ b/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256.py
@@ -17,7 +17,7 @@
dict(
type='MultiStepLR',
begin=0,
- end=210,
+ end=train_cfg['max_epochs'],
milestones=[170, 200],
gamma=0.1,
by_epoch=True)
@@ -26,9 +26,6 @@
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
-# hooks
-default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
-
# codec settings
codec = dict(
type='IntegralRegressionLabel',
@@ -131,6 +128,9 @@
))
test_dataloader = val_dataloader
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
# evaluators
val_evaluator = dict(
type='CocoMetric',
diff --git a/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256.py b/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256.py
index 247fae2388..e7b300f206 100644
--- a/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256.py
+++ b/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256.py
@@ -17,7 +17,7 @@
dict(
type='MultiStepLR',
begin=0,
- end=210,
+ end=train_cfg['max_epochs'],
milestones=[170, 200],
gamma=0.1,
by_epoch=True)
@@ -26,9 +26,6 @@
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
-# hooks
-default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
-
# codec settings
codec = dict(
type='IntegralRegressionLabel',
@@ -128,6 +125,9 @@
))
test_dataloader = val_dataloader
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
# evaluators
val_evaluator = dict(
type='CocoMetric',
diff --git a/configs/body_2d_keypoint/integral_regression/coco/resnet_debias_coco.md b/configs/body_2d_keypoint/integral_regression/coco/resnet_debias_coco.md
index 847227c6f3..0820fdd296 100644
--- a/configs/body_2d_keypoint/integral_regression/coco/resnet_debias_coco.md
+++ b/configs/body_2d_keypoint/integral_regression/coco/resnet_debias_coco.md
@@ -52,6 +52,6 @@
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
-| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
-| :------------------------------------------------------------------------------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :--------: | :-------: |
-| [debias-ipr_resnet_50](/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias--8xb64-210e_coco-256x256.py) | 256x256 | 0.633 | 0.860 | 0.703 | 0.730 | 0.919 | [ckpt](<>) | [log](<>) |
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [debias-ipr_resnet_50](/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias--8xb64-210e_coco-256x256.py) | 256x256 | 0.675 | 0.872 | 0.740 | 0.765 | 0.928 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256-055a7699_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256-055a7699_20220913.log.json) |
diff --git a/configs/body_2d_keypoint/integral_regression/coco/resnet_dsnt_coco.md b/configs/body_2d_keypoint/integral_regression/coco/resnet_dsnt_coco.md
index a9791240cb..2e79338b99 100644
--- a/configs/body_2d_keypoint/integral_regression/coco/resnet_dsnt_coco.md
+++ b/configs/body_2d_keypoint/integral_regression/coco/resnet_dsnt_coco.md
@@ -1,7 +1,7 @@
-DSNT (ECCV'2018)
+DSNT (2018)
```bibtex
@article{nibali2018numerical,
@@ -51,6 +51,6 @@
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
-| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
-| :----------------------------------------------------------------------------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :--------: | :-------: |
-| [ipr_resnet_50_dsnt](/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256.py) | 256x256 | 0.674 | 0.870 | 0.744 | 0.764 | 0.928 | [ckpt](<>) | [log](<>) |
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [ipr_resnet_50_dsnt](/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256.py) | 256x256 | 0.674 | 0.870 | 0.744 | 0.764 | 0.928 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256-441eedc0_20220913.pth) | \[loghttps://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256-441eedc0_20220913.log.json) |
diff --git a/configs/body_2d_keypoint/integral_regression/coco/resnet_ipr_coco.md b/configs/body_2d_keypoint/integral_regression/coco/resnet_ipr_coco.md
index c6cc2dcbfc..ce4fbae501 100644
--- a/configs/body_2d_keypoint/integral_regression/coco/resnet_ipr_coco.md
+++ b/configs/body_2d_keypoint/integral_regression/coco/resnet_ipr_coco.md
@@ -4,10 +4,11 @@
IPR (ECCV'2018)
```bibtex
-@article{sun2018integral,
- title={An Integral Pose Regression System for the ECCV2018 PoseTrack Challenge},
- author={Sun, Xiao and Li, Chuankang and Lin, Stephen},
- journal={arXiv preprint arXiv:1809.06079},
+@inproceedings{sun2018integral,
+ title={Integral human pose regression},
+ author={Sun, Xiao and Xiao, Bin and Wei, Fangyin and Liang, Shuang and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={529--545},
year={2018}
}
```
@@ -51,6 +52,6 @@
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
-| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
-| :------------------------------------------------------------------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :--------: | :-------: |
-| [ipr_resnet_50](/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py) | 256x256 | 0.633 | 0.860 | 0.703 | 0.730 | 0.919 | [ckpt](<>) | [log](<>) |
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [ipr_resnet_50](/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py) | 256x256 | 0.633 | 0.860 | 0.703 | 0.730 | 0.919 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256-a3898a33_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256-a3898a33_20220913.log.json) |
diff --git a/configs/body_2d_keypoint/simcc/coco/mobilenetv2_coco.md b/configs/body_2d_keypoint/simcc/coco/mobilenetv2_coco.md
new file mode 100644
index 0000000000..fa4d5e0d39
--- /dev/null
+++ b/configs/body_2d_keypoint/simcc/coco/mobilenetv2_coco.md
@@ -0,0 +1,55 @@
+
+
+
+SimCC (ECCV'2022)
+
+```bibtex
+@misc{https://doi.org/10.48550/arxiv.2107.03332,
+ title={SimCC: a Simple Coordinate Classification Perspective for Human Pose Estimation},
+ author={Li, Yanjie and Yang, Sen and Liu, Peidong and Zhang, Shoukui and Wang, Yunxiao and Wang, Zhicheng and Yang, Wankou and Xia, Shu-Tao},
+ year={2021}
+}
+```
+
+
+
+
+
+
+MobilenetV2 (CVPR'2018)
+
+```bibtex
+@inproceedings{sandler2018mobilenetv2,
+ title={Mobilenetv2: Inverted residuals and linear bottlenecks},
+ author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={4510--4520},
+ year={2018}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [simcc_mobilenetv2_wo_deconv](/configs/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192.py) | 256x192 | 0.620 | 0.855 | 0.697 | 0.678 | 0.902 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192-e0cc028d_20220922.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192-e0cc028d_20220922.log.json) |
diff --git a/configs/body_2d_keypoint/simcc/coco/resnet_coco.md b/configs/body_2d_keypoint/simcc/coco/resnet_coco.md
index 35f594a054..d6a60da064 100644
--- a/configs/body_2d_keypoint/simcc/coco/resnet_coco.md
+++ b/configs/body_2d_keypoint/simcc/coco/resnet_coco.md
@@ -50,6 +50,7 @@
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
-| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
-| :--------------------------------------------------------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :--------: | :-------: |
-| [simcc_resnet_50](/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-140e_coco-384x288.py) | 384x288 | 0.735 | 0.899 | 0.800 | 0.790 | 0.939 | [ckpt](<>) | [log](<>) |
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [simcc_resnet_50](/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192.py) | 256x192 | 0.721 | 0.900 | 0.798 | 0.781 | 0.937 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192-8e0f5b59_20220919.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192-8e0f5b59_20220919.log.json) |
+| [simcc_resnet_50](/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288.py) | 384x288 | 0.735 | 0.899 | 0.800 | 0.790 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288-45c3ba34_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288-45c3ba34_20220913.log.json) |
diff --git a/configs/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000..0999c99516
--- /dev/null
+++ b/configs/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192.py
@@ -0,0 +1,127 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=train_cfg['max_epochs'],
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel', input_size=(192, 256), sigma=6.0, simcc_split_ratio=2.0)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='MobileNetV2',
+ widen_factor=1.,
+ out_indices=(7, ),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='mmcls://mobilenet_v2',
+ )),
+ head=dict(
+ type='SimCCHead',
+ in_channels=1280,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ deconv_out_channels=None,
+ loss=dict(type='KLDiscretLoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+file_client_args = dict(backend='disk')
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', file_client_args=file_client_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(
+ type='GenerateTarget', target_type='keypoint_xy_label', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', file_client_args=file_client_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-140e_coco-384x288.py b/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288.py
similarity index 93%
rename from configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-140e_coco-384x288.py
rename to configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288.py
index 9ef61b2660..a3446fef1b 100644
--- a/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-140e_coco-384x288.py
+++ b/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288.py
@@ -17,17 +17,14 @@
dict(
type='MultiStepLR',
begin=0,
- end=140,
+ end=train_cfg['max_epochs'],
milestones=[90, 120],
gamma=0.1,
by_epoch=True)
]
# automatically scaling LR based on the actual training batch size
-auto_scale_lr = dict(base_batch_size=128)
-
-# hooks
-default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+auto_scale_lr = dict(base_batch_size=512)
# codec settings
codec = dict(
@@ -70,12 +67,10 @@
dict(type='GetBBoxCenterScale'),
dict(type='RandomFlip', direction='horizontal'),
dict(type='RandomHalfBody'),
- dict(
- type='RandomBBoxTransform', scale_factor=(0.7, 1.3), rotate_factor=80),
+ dict(type='RandomBBoxTransform'),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(
type='GenerateTarget', target_type='keypoint_xy_label', encoder=codec),
- # simcc needs transformed keypoints to calculate the training accuracy
dict(type='PackPoseInputs')
]
test_pipeline = [
@@ -87,7 +82,7 @@
# data loaders
train_dataloader = dict(
- batch_size=64,
+ batch_size=32,
num_workers=2,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
@@ -118,6 +113,9 @@
))
test_dataloader = val_dataloader
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
# evaluators
val_evaluator = dict(
type='CocoMetric',
diff --git a/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000..51555d601f
--- /dev/null
+++ b/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192.py
@@ -0,0 +1,117 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=1e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(type='MultiStepLR', milestones=[170, 200], gamma=0.1, by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel', input_size=(192, 256), sigma=6.0, simcc_split_ratio=2.0)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='SimCCHead',
+ in_channels=2048,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ loss=dict(type='KLDiscretLoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+file_client_args = dict(backend='disk')
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', file_client_args=file_client_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(
+ type='GenerateTarget', target_type='keypoint_xy_label', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+test_pipeline = [
+ dict(type='LoadImage', file_client_args=file_client_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=test_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/configs/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000..1b24ac23b2
--- /dev/null
+++ b/configs/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=train_cfg['max_epochs'],
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel', input_size=(192, 256), sigma=6.0, simcc_split_ratio=2.0)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(type='ViPNAS_MobileNetV3'),
+ head=dict(
+ type='SimCCHead',
+ in_channels=160,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ deconv_type='vipnas',
+ deconv_out_channels=(160, 160, 160),
+ deconv_num_groups=(160, 160, 160),
+ loss=dict(type='KLDiscretLoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+file_client_args = dict(backend='disk')
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', file_client_args=file_client_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(
+ type='GenerateTarget', target_type='keypoint_xy_label', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', file_client_args=file_client_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file=data_root + 'person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/configs/body_2d_keypoint/simcc/coco/vipnas_coco.md b/configs/body_2d_keypoint/simcc/coco/vipnas_coco.md
new file mode 100644
index 0000000000..a9d8b98fc3
--- /dev/null
+++ b/configs/body_2d_keypoint/simcc/coco/vipnas_coco.md
@@ -0,0 +1,54 @@
+
+
+
+SimCC (ECCV'2022)
+
+```bibtex
+@misc{https://doi.org/10.48550/arxiv.2107.03332,
+ title={SimCC: a Simple Coordinate Classification Perspective for Human Pose Estimation},
+ author={Li, Yanjie and Yang, Sen and Liu, Peidong and Zhang, Shoukui and Wang, Yunxiao and Wang, Zhicheng and Yang, Wankou and Xia, Shu-Tao},
+ year={2021}
+}
+```
+
+
+
+
+
+
+ViPNAS (CVPR'2021)
+
+```bibtex
+@article{xu2021vipnas,
+ title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search},
+ author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ year={2021}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [simcc_S-ViPNAS-MobileNetV3](/configs/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192.py) | 256x192 | 0.695 | 0.883 | 0.772 | 0.755 | 0.927 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192-719f3489_20220922.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192-719f3489_20220922.log.json) |
diff --git a/configs/body_2d_keypoint/simcc/mpii/simcc_res50_wo-deconv-8xb64-210e_mpii-256x256.py b/configs/body_2d_keypoint/simcc/mpii/simcc_res50_wo-deconv-8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000..d09d160764
--- /dev/null
+++ b/configs/body_2d_keypoint/simcc/mpii/simcc_res50_wo-deconv-8xb64-210e_mpii-256x256.py
@@ -0,0 +1,123 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=train_cfg['max_epochs'],
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel', input_size=(256, 256), sigma=6.0, simcc_split_ratio=2.0)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='SimCCHead',
+ in_channels=2048,
+ out_channels=16,
+ input_size=codec['input_size'],
+ in_featuremap_size=(8, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ deconv_out_channels=None,
+ loss=dict(type='KLDiscretLoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+file_client_args = dict(backend='disk')
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', file_client_args=file_client_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(
+ type='GenerateTarget', target_type='keypoint_xy_label', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', file_client_args=file_client_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file=f'{data_root}/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='pck/PCKh', rule='greater'))
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy', norm_item='head')
+test_evaluator = val_evaluator
diff --git a/configs/body_2d_keypoint/topdown_regression/coco/mobilenetv2_rle_coco.md b/configs/body_2d_keypoint/topdown_regression/coco/mobilenetv2_rle_coco.md
new file mode 100644
index 0000000000..eddf5a79d3
--- /dev/null
+++ b/configs/body_2d_keypoint/topdown_regression/coco/mobilenetv2_rle_coco.md
@@ -0,0 +1,74 @@
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+
+
+
+RLE (ICCV'2021)
+
+```bibtex
+@inproceedings{li2021human,
+ title={Human pose regression with residual log-likelihood estimation},
+ author={Li, Jiefeng and Bian, Siyuan and Zeng, Ailing and Wang, Can and Pang, Bo and Liu, Wentao and Lu, Cewu},
+ booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
+ pages={11025--11034},
+ year={2021}
+}
+```
+
+
+
+
+
+
+MobilenetV2 (CVPR'2018)
+
+```bibtex
+@inproceedings{sandler2018mobilenetv2,
+ title={Mobilenetv2: Inverted residuals and linear bottlenecks},
+ author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={4510--4520},
+ year={2018}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [deeppose_mobilenetv2_rle_pretrained](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192.py) | 256x192 | 0.593 | 0.836 | 0.660 | 0.644 | 0.877 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192-39b73bd5_20220922.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192-39b73bd5_20220922.log.json) |
diff --git a/configs/body_2d_keypoint/topdown_regression/coco/resnet_coco.md b/configs/body_2d_keypoint/topdown_regression/coco/resnet_coco.md
index f64971b21d..7ae0023cdb 100644
--- a/configs/body_2d_keypoint/topdown_regression/coco/resnet_coco.md
+++ b/configs/body_2d_keypoint/topdown_regression/coco/resnet_coco.md
@@ -54,6 +54,6 @@ Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 da
| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
-| [deeppose_resnet_50](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192.py) | 256x192 | 0.528 | 0.817 | 0.589 | 0.639 | 0.888 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res50_coco_256x192-f6de6c0e_20210205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res50_coco_256x192_20210205.log.json) |
+| [deeppose_resnet_50](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192.py) | 256x192 | 0.528 | 0.817 | 0.589 | 0.639 | 0.888 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192-72ef04f3_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192-72ef04f3_20220913.log.json) |
| [deeppose_resnet_101](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_8xb64-210e_coco-256x192.py) | 256x192 | 0.562 | 0.831 | 0.629 | 0.67 | 0.9 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192-2f247111_20210205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192_20210205.log.json) |
| [deeppose_resnet_152](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_8xb64-210e_coco-256x192.py) | 256x192 | 0.584 | 0.842 | 0.659 | 0.688 | 0.907 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192-7df89a88_20210205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192_20210205.log.json) |
diff --git a/configs/body_2d_keypoint/topdown_regression/coco/resnet_rle_coco.md b/configs/body_2d_keypoint/topdown_regression/coco/resnet_rle_coco.md
index ed531b259f..3f6f2796a9 100644
--- a/configs/body_2d_keypoint/topdown_regression/coco/resnet_rle_coco.md
+++ b/configs/body_2d_keypoint/topdown_regression/coco/resnet_rle_coco.md
@@ -71,8 +71,8 @@ Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 da
| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
-| [deeppose_resnet_50_rle](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192.py) | 256x192 | 0.706 | 0.888 | 0.776 | 0.753 | 0.924 | [ckpt](<>) | [log](<>) |
-| [deeppose_resnet_50_rle_pretrained](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192.py) | 256x192 | 0.719 | 0.891 | 0.788 | 0.764 | 0.925 | [ckpt](<>) | [log](<>) |
+| [deeppose_resnet_50_rle](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192.py) | 256x192 | 0.706 | 0.888 | 0.776 | 0.753 | 0.924 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192-d37efd64_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192-d37efd64_20220913.log.json) |
+| [deeppose_resnet_50_rle_pretrained](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192.py) | 256x192 | 0.719 | 0.891 | 0.788 | 0.764 | 0.925 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192-2cb494ee_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192-2cb494ee_20220913.log.json) |
| [deeppose_resnet_101_rle](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_rle-8xb64-210e_coco-256x192.py) | 256x192 | 0.722 | 0.894 | 0.794 | 0.768 | 0.93 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192_rle-16c3d461_20220615.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192_rle_20220615.log.json) |
| [deeppose_resnet_152_rle](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-256x192.py) | 256x192 | 0.731 | 0.897 | 0.805 | 0.777 | 0.933 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192_rle-c05bdccf_20220615.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192_rle_20220615.log.json) |
| [deeppose_resnet_152_rle](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-384x288.py) | 384x288 | 0.749 | 0.901 | 0.815 | 0.793 | 0.935 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_384x288_rle-b77c4c37_20220624.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_384x288_rle_20220624.log.json) |
diff --git a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000..6d776875c2
--- /dev/null
+++ b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192.py
@@ -0,0 +1,128 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=1e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=train_cfg['max_epochs'],
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(type='RegressionLabel', input_size=(192, 256))
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='MobileNetV2',
+ widen_factor=1.,
+ out_indices=(7, ),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/top_down/'
+ 'mobilenetv2/mobilenetv2_coco_256x192-d1e58e7b_20200727.pth')),
+ neck=dict(type='GlobalAveragePooling'),
+ head=dict(
+ type='RLEHead',
+ in_channels=1280,
+ num_joints=17,
+ loss=dict(type='RLELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ ),
+)
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+file_client_args = dict(backend='disk')
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', file_client_args=file_client_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', target_type='keypoint_label', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', file_client_args=file_client_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=f'{data_root}annotations/person_keypoints_val2017.json',
+ score_mode='bbox_rle')
+test_evaluator = val_evaluator
diff --git a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_8xb64-210e_coco-256x192.py
index dfc549d5e6..03ddc5cdb4 100644
--- a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_8xb64-210e_coco-256x192.py
+++ b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_8xb64-210e_coco-256x192.py
@@ -17,7 +17,7 @@
dict(
type='MultiStepLR',
begin=0,
- end=210,
+ end=train_cfg['max_epochs'],
milestones=[170, 200],
gamma=0.1,
by_epoch=True)
@@ -26,9 +26,6 @@
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
-# hooks
-default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
-
# codec settings
codec = dict(type='RegressionLabel', input_size=(192, 256))
@@ -113,6 +110,9 @@
))
test_dataloader = val_dataloader
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
# evaluators
val_evaluator = dict(
type='CocoMetric',
diff --git a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_rle-8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_rle-8xb64-210e_coco-256x192.py
index 7120de50bc..c3e065ec5f 100644
--- a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_rle-8xb64-210e_coco-256x192.py
+++ b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_rle-8xb64-210e_coco-256x192.py
@@ -17,7 +17,7 @@
dict(
type='MultiStepLR',
begin=0,
- end=210,
+ end=train_cfg['max_epochs'],
milestones=[170, 200],
gamma=0.1,
by_epoch=True)
@@ -26,9 +26,6 @@
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
-# hooks
-default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
-
# codec settings
codec = dict(type='RegressionLabel', input_size=(192, 256))
@@ -113,6 +110,9 @@
))
test_dataloader = val_dataloader
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
# evaluators
val_evaluator = dict(
type='CocoMetric',
diff --git a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_8xb64-210e_coco-256x192.py
index a1e2957f07..7d0cbe906b 100644
--- a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_8xb64-210e_coco-256x192.py
+++ b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_8xb64-210e_coco-256x192.py
@@ -17,7 +17,7 @@
dict(
type='MultiStepLR',
begin=0,
- end=210,
+ end=train_cfg['max_epochs'],
milestones=[170, 200],
gamma=0.1,
by_epoch=True)
@@ -26,9 +26,6 @@
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
-# hooks
-default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
-
# codec settings
codec = dict(type='RegressionLabel', input_size=(192, 256))
@@ -113,6 +110,9 @@
))
test_dataloader = val_dataloader
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
# evaluators
val_evaluator = dict(
type='CocoMetric',
diff --git a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-256x192.py
index 9865964366..7d7a816583 100644
--- a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-256x192.py
+++ b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-256x192.py
@@ -17,7 +17,7 @@
dict(
type='MultiStepLR',
begin=0,
- end=210,
+ end=train_cfg['max_epochs'],
milestones=[170, 200],
gamma=0.1,
by_epoch=True)
@@ -26,9 +26,6 @@
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
-# hooks
-default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
-
# codec settings
codec = dict(type='RegressionLabel', input_size=(192, 256))
@@ -113,6 +110,9 @@
))
test_dataloader = val_dataloader
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
# evaluators
val_evaluator = dict(
type='CocoMetric',
diff --git a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-384x288.py b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-384x288.py
index 33072cd044..d0b183dc33 100644
--- a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-384x288.py
+++ b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-384x288.py
@@ -17,7 +17,7 @@
dict(
type='MultiStepLR',
begin=0,
- end=210,
+ end=train_cfg['max_epochs'],
milestones=[170, 200],
gamma=0.1,
by_epoch=True)
@@ -26,9 +26,6 @@
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
-# hooks
-default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
-
# codec settings
codec = dict(type='RegressionLabel', input_size=(288, 384))
@@ -113,6 +110,9 @@
))
test_dataloader = val_dataloader
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
# evaluators
val_evaluator = dict(
type='CocoMetric',
diff --git a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192.py
index 96e1f1c000..a365acff1d 100644
--- a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192.py
+++ b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192.py
@@ -17,7 +17,7 @@
dict(
type='MultiStepLR',
begin=0,
- end=210,
+ end=train_cfg['max_epochs'],
milestones=[170, 200],
gamma=0.1,
by_epoch=True)
@@ -26,9 +26,6 @@
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
-# hooks
-default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
-
# codec settings
codec = dict(type='RegressionLabel', input_size=(192, 256))
@@ -113,6 +110,9 @@
))
test_dataloader = val_dataloader
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
# evaluators
val_evaluator = dict(
type='CocoMetric',
diff --git a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192.py
index 9aee092d54..851afb38cd 100644
--- a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192.py
+++ b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192.py
@@ -17,7 +17,7 @@
dict(
type='MultiStepLR',
begin=0,
- end=210,
+ end=train_cfg['max_epochs'],
milestones=[170, 200],
gamma=0.1,
by_epoch=True)
@@ -26,9 +26,6 @@
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
-# hooks
-default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
-
# codec settings
codec = dict(type='RegressionLabel', input_size=(192, 256))
@@ -113,6 +110,9 @@
))
test_dataloader = val_dataloader
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
# evaluators
val_evaluator = dict(
type='CocoMetric',
diff --git a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192.py
index 7bd35b9ccb..4a24f2aaaf 100644
--- a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192.py
+++ b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192.py
@@ -17,7 +17,7 @@
dict(
type='MultiStepLR',
begin=0,
- end=210,
+ end=train_cfg['max_epochs'],
milestones=[170, 200],
gamma=0.1,
by_epoch=True)
@@ -26,9 +26,6 @@
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
-# hooks
-default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
-
# codec settings
codec = dict(type='RegressionLabel', input_size=(192, 256))
@@ -119,6 +116,9 @@
))
test_dataloader = val_dataloader
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
# evaluators
val_evaluator = dict(
type='CocoMetric',
diff --git a/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res101_8xb64-210e_mpii-256x256.py b/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res101_8xb64-210e_mpii-256x256.py
index 763690be31..00dcc6f4d2 100644
--- a/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res101_8xb64-210e_mpii-256x256.py
+++ b/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res101_8xb64-210e_mpii-256x256.py
@@ -26,9 +26,6 @@
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
-# hooks
-default_hooks = dict(checkpoint=dict(save_best='pck/PCKh', rule='greater'))
-
# codec settings
codec = dict(type='RegressionLabel', input_size=(256, 256))
@@ -62,9 +59,11 @@
data_mode = 'topdown'
data_root = 'data/mpii/'
+file_client_args = dict(backend='disk')
+
# pipelines
train_pipeline = [
- dict(type='LoadImage', file_client_args={{_base_.file_client_args}}),
+ dict(type='LoadImage', file_client_args=file_client_args),
dict(type='GetBBoxCenterScale'),
dict(type='RandomFlip', direction='horizontal'),
dict(type='RandomBBoxTransform', shift_prob=0),
@@ -73,7 +72,7 @@
dict(type='PackPoseInputs')
]
val_pipeline = [
- dict(type='LoadImage', file_client_args={{_base_.file_client_args}}),
+ dict(type='LoadImage', file_client_args=file_client_args),
dict(type='GetBBoxCenterScale'),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='PackPoseInputs')
@@ -111,6 +110,9 @@
))
test_dataloader = val_dataloader
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='pck/PCKh', rule='greater'))
+
# evaluators
val_evaluator = dict(type='MpiiPCKAccuracy', norm_item='head')
test_evaluator = val_evaluator
diff --git a/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res152_8xb64-210e_mpii-256x256.py b/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res152_8xb64-210e_mpii-256x256.py
index d28444fb64..779ae37a84 100644
--- a/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res152_8xb64-210e_mpii-256x256.py
+++ b/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res152_8xb64-210e_mpii-256x256.py
@@ -26,9 +26,6 @@
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
-# hooks
-default_hooks = dict(checkpoint=dict(save_best='pck/PCKh', rule='greater'))
-
# codec settings
codec = dict(type='RegressionLabel', input_size=(256, 256))
@@ -62,9 +59,11 @@
data_mode = 'topdown'
data_root = 'data/mpii/'
+file_client_args = dict(backend='disk')
+
# pipelines
train_pipeline = [
- dict(type='LoadImage', file_client_args={{_base_.file_client_args}}),
+ dict(type='LoadImage', file_client_args=file_client_args),
dict(type='GetBBoxCenterScale'),
dict(type='RandomFlip', direction='horizontal'),
dict(type='RandomBBoxTransform', shift_prob=0),
@@ -73,7 +72,7 @@
dict(type='PackPoseInputs')
]
val_pipeline = [
- dict(type='LoadImage', file_client_args={{_base_.file_client_args}}),
+ dict(type='LoadImage', file_client_args=file_client_args),
dict(type='GetBBoxCenterScale'),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='PackPoseInputs')
@@ -111,6 +110,9 @@
))
test_dataloader = val_dataloader
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='pck/PCKh', rule='greater'))
+
# evaluators
val_evaluator = dict(type='MpiiPCKAccuracy', norm_item='head')
test_evaluator = val_evaluator
diff --git a/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_8xb64-210e_mpii-256x256.py b/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_8xb64-210e_mpii-256x256.py
index 33b062f509..87b03ac79e 100644
--- a/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_8xb64-210e_mpii-256x256.py
+++ b/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_8xb64-210e_mpii-256x256.py
@@ -26,9 +26,6 @@
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
-# hooks
-default_hooks = dict(checkpoint=dict(save_best='pck/PCKh', rule='greater'))
-
# codec settings
codec = dict(type='RegressionLabel', input_size=(256, 256))
@@ -62,9 +59,11 @@
data_mode = 'topdown'
data_root = 'data/mpii/'
+file_client_args = dict(backend='disk')
+
# pipelines
train_pipeline = [
- dict(type='LoadImage', file_client_args={{_base_.file_client_args}}),
+ dict(type='LoadImage', file_client_args=file_client_args),
dict(type='GetBBoxCenterScale'),
dict(type='RandomFlip', direction='horizontal'),
dict(type='RandomBBoxTransform', shift_prob=0),
@@ -73,7 +72,7 @@
dict(type='PackPoseInputs')
]
val_pipeline = [
- dict(type='LoadImage', file_client_args={{_base_.file_client_args}}),
+ dict(type='LoadImage', file_client_args=file_client_args),
dict(type='GetBBoxCenterScale'),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='PackPoseInputs')
@@ -111,6 +110,9 @@
))
test_dataloader = val_dataloader
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='pck/PCKh', rule='greater'))
+
# evaluators
val_evaluator = dict(type='MpiiPCKAccuracy', norm_item='head')
test_evaluator = val_evaluator
diff --git a/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_rle-8xb64-210e_mpii-256x256.py b/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_rle-8xb64-210e_mpii-256x256.py
index b8e3a87d39..1a62e710b1 100644
--- a/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_rle-8xb64-210e_mpii-256x256.py
+++ b/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_rle-8xb64-210e_mpii-256x256.py
@@ -26,9 +26,6 @@
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
-# hooks
-default_hooks = dict(checkpoint=dict(save_best='pck/PCKh', rule='greater'))
-
# codec settings
codec = dict(type='RegressionLabel', input_size=(256, 256))
@@ -62,9 +59,11 @@
data_mode = 'topdown'
data_root = 'data/mpii/'
+file_client_args = dict(backend='disk')
+
# pipelines
train_pipeline = [
- dict(type='LoadImage', file_client_args={{_base_.file_client_args}}),
+ dict(type='LoadImage', file_client_args=file_client_args),
dict(type='GetBBoxCenterScale'),
dict(type='RandomFlip', direction='horizontal'),
dict(type='RandomBBoxTransform', shift_prob=0),
@@ -73,7 +72,7 @@
dict(type='PackPoseInputs')
]
val_pipeline = [
- dict(type='LoadImage', file_client_args={{_base_.file_client_args}}),
+ dict(type='LoadImage', file_client_args=file_client_args),
dict(type='GetBBoxCenterScale'),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='PackPoseInputs')
@@ -111,6 +110,9 @@
))
test_dataloader = val_dataloader
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='pck/PCKh', rule='greater'))
+
# evaluators
val_evaluator = dict(type='MpiiPCKAccuracy', norm_item='head')
test_evaluator = val_evaluator
diff --git a/docs/en/migration.md b/docs/en/migration.md
index 3b94754101..e639f031cd 100644
--- a/docs/en/migration.md
+++ b/docs/en/migration.md
@@ -2,7 +2,7 @@
MMPose 1.0 has made significant BC-breaking changes, with modules redesigned and reorganized to reduce code redundancy and improve efficiency. For developers who have some deep-learning knowledge, this tutorial provides a migration guide.
-Whether you are **a user of the previous version of MMPose**, or **a new user wishing to migrate your Pytorch project to MMPose**, you can learn how to build a project based on MMpose 1.0 with this tutorial.
+Whether you are **a user of the previous version of MMPose**, or **a new user wishing to migrate your Pytorch project to MMPose**, you can learn how to build a project based on MMPose 1.0 with this tutorial.
```{note}
This tutorial covers what developers will concern when using MMPose 1.0:
@@ -123,9 +123,9 @@ dataset_info = dict(
### Dataset
-To use costom dataset in MMPose, we recommend converting the annotations into a supported format (e.g. COCO or MPII) and directly using our implementation of the corresponding dataset. If this is not applicable, you may need to implement your own dataset class.
+To use custom dataset in MMPose, we recommend converting the annotations into a supported format (e.g. COCO or MPII) and directly using our implementation of the corresponding dataset. If this is not applicable, you may need to implement your own dataset class.
-Most 2D keypoint datasets in MMPose **organize the annotations in a COCO-like style**. Thus we provide a base class [BaseCocoStyleDataset](mmpose/datasets/datasets/base/base_coco_style_dataset.py) for these datasets. We recommend that users subclass `BaseCocoStyleDataset` and override the methods as needed (usually `__init__()` and `_load_annotations()`) to extend to a new costom 2D keypoint dataset.
+Most 2D keypoint datasets in MMPose **organize the annotations in a COCO-like style**. Thus we provide a base class [BaseCocoStyleDataset](mmpose/datasets/datasets/base/base_coco_style_dataset.py) for these datasets. We recommend that users subclass `BaseCocoStyleDataset` and override the methods as needed (usually `__init__()` and `_load_annotations()`) to extend to a new custom 2D keypoint dataset.
```{note}
Please refer to [COCO](./dataset_zoo/2d_body_keypoint.md) for more details about the COCO data format.
@@ -305,6 +305,14 @@ In MMPose, we collect Encoding and Decoding processes into a **Codec**, in which
Currently we support the following types of Targets.
+- `heatmap`: Gaussian heatmaps
+- `keypoint_label`: keypoint representation (e.g. normalized coordinates)
+- `keypoint_xy_label`: axis-wise keypoint representation
+- `heatmap+keypoint_label`: Gaussian heatmaps and keypoint representation
+- `multiscale_heatmap`: multi-scale Gaussian heatmaps
+
+and the generated targets will be packed as follows.
+
- `heatmaps`: Gaussian heatmaps
- `keypoint_labels`: keypoint representation (e.g. normalized coordinates)
- `keypoint_x_labels`: keypoint x-axis representation
@@ -345,8 +353,8 @@ The following is an example of the implementation of `PoseDataSample` under the
def get_pose_data_sample(self):
# meta
pose_meta = dict(
- img_shape=(600, 900), # [h, w, c]
- crop_size=(256, 192), # [h, w]
+ img_shape=(600, 900), # [h, w, c]
+ crop_size=(256, 192), # [h, w]
heatmap_size=(64, 48), # [h, w]
)
diff --git a/docs/en/overview.md b/docs/en/overview.md
index 3733d1637c..fbeaac13f5 100644
--- a/docs/en/overview.md
+++ b/docs/en/overview.md
@@ -4,7 +4,7 @@ This chapter will introduce you to the overall framework of MMPose and provide l
## What is MMPose
-![image](https://user-images.githubusercontent.com/26127467/190981395-5ecf0146-f8a7-482f-a87f-b0c64dabf7cb.jpg)
+![overview](https://user-images.githubusercontent.com/13503330/191004511-508d3ec6-9ead-4c52-a522-4d9aa1f26027.png)
MMPose is a Pytorch-based pose estimation open-source toolkit, a member of the [OpenMMLab Project](https://github.com/open-mmlab). It contains a rich set of algorithms for 2d multi-person human pose estimation, 2d hand pose estimation, 2d face landmark detection, 133 keypoint whole-body human pose estimation, fashion landmark detection and animal pose estimation as well as related components and modules, below is its overall framework.
diff --git a/docs/en/user_guides/configs.md b/docs/en/user_guides/configs.md
index 8504d4a182..be73312e20 100644
--- a/docs/en/user_guides/configs.md
+++ b/docs/en/user_guides/configs.md
@@ -128,6 +128,19 @@ General configuration is stored alone in the `$MMPOSE/configs/_base_`, and inher
_base_ = ['../../../_base_/default_runtime.py'] # take the config file as the starting point of the relative path
```
+```{note}
+**Tips**
+
+CheckpointHook:
+
+- save_best: `'coco/AP'` for `CocoMetric`, `'pck/PCK@0.05'` for `PCKAccuracy`
+- max_keep_ckpts: the maximum checkpoints to keep. Defaults to -1, which means unlimited.
+
+Example:
+
+`default_hooks = dict(checkpoint=dict(save_best='pck/PCK@0.05', rule='greater', max_keep_ckpts=1))`
+```
+
### Data
Data configuration refers to the data processing related settings, mainly including:
@@ -204,6 +217,13 @@ val_dataloader = dict(
test_dataloader = val_dataloader # use val as test by default
```
+```{note}
+**Tips**
+
+You can set the random seed by doing: `randomness=dict(seed=0)`
+
+```
+
### Training
Training configuration refers to the training related settings including:
diff --git a/docs/src/papers/algorithms/dsnt.md b/docs/src/papers/algorithms/dsnt.md
index 979fa7df80..6a526429d6 100644
--- a/docs/src/papers/algorithms/dsnt.md
+++ b/docs/src/papers/algorithms/dsnt.md
@@ -3,7 +3,7 @@
-DSNT (ECCV'2018)
+DSNT (2018)
```bibtex
@article{nibali2018numerical,
diff --git a/docs/src/papers/algorithms/ipr.md b/docs/src/papers/algorithms/ipr.md
index 52a933bdb3..fca06b986a 100644
--- a/docs/src/papers/algorithms/ipr.md
+++ b/docs/src/papers/algorithms/ipr.md
@@ -1,4 +1,4 @@
-# DeepPose: Human pose estimation via deep neural networks
+# Integral Human Pose Regression
@@ -6,10 +6,11 @@
IPR (ECCV'2018)
```bibtex
-@article{sun2018integral,
- title={An Integral Pose Regression System for the ECCV2018 PoseTrack Challenge},
- author={Sun, Xiao and Li, Chuankang and Lin, Stephen},
- journal={arXiv preprint arXiv:1809.06079},
+@inproceedings{sun2018integral,
+ title={Integral human pose regression},
+ author={Sun, Xiao and Xiao, Bin and Wei, Fangyin and Liang, Shuang and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={529--545},
year={2018}
}
```
diff --git a/docs/zh_cn/migration.md b/docs/zh_cn/migration.md
index 004e5971f3..188c5a4af5 100644
--- a/docs/zh_cn/migration.md
+++ b/docs/zh_cn/migration.md
@@ -302,7 +302,19 @@ test_pipeline = [
在 MMPose 中,我们将编码和解码过程集合成一个编解码器(Codec),在其中实现 `encode()` 和 `decode()`。
-目前 MMPose 支持以下类型的监督目标:
+目前 MMPose 支持生成以下类型的监督目标:
+
+- `heatmap`: 高斯热图
+
+- `keypoint_label`: 关键点标签(如归一化的坐标值)
+
+- `keypoint_xy_label`: 单个坐标轴关键点标签
+
+- `heatmap+keypoint_label`: 同时生成高斯热图和关键点标签
+
+- `multiscale_heatmap`: 多尺度高斯热图
+
+生成的监督目标会按以下关键字进行封装:
- `heatmaps`:高斯热图
diff --git a/docs/zh_cn/overview.md b/docs/zh_cn/overview.md
index 4c8bf1d6d2..edfc9e7247 100644
--- a/docs/zh_cn/overview.md
+++ b/docs/zh_cn/overview.md
@@ -4,11 +4,11 @@
## 什么是 MMPose
-![image](https://user-images.githubusercontent.com/15977946/188659200-e5694ca7-28ff-43e5-ae33-acc1fdff7420.jpg)
+![overview](https://user-images.githubusercontent.com/13503330/191004511-508d3ec6-9ead-4c52-a522-4d9aa1f26027.png)
MMPose 是一款基于 Pytorch 的姿态估计开源工具箱,是 OpenMMLab 项目的成员之一,包含了丰富的 2D 多人姿态估计、2D 手部姿态估计、2D 人脸关键点检测、133关键点全身人体姿态估计、动物关键点检测、服饰关键点检测等算法以及相关的组件和模块,下面是它的整体框架:
-MMPose 由 7 个主要部分组成,apis、structures、datasets、codecs、models、engine、评估和可视化。
+MMPose 由 **8** 个主要部分组成,apis、structures、datasets、codecs、models、engine、evaluation 和 visualization。
- **apis** 提供用于模型推理的高级 API
@@ -18,8 +18,6 @@ MMPose 由 7 个主要部分组成,apis、structures、datasets、codecs、mod
- **transforms** 包含各种数据增强变换
-- **codecs** 提供训练目标生成与模型输出解码所需的编码器和解码器
-
- **codecs** 提供姿态编解码器:编码器用于将姿态信息(通常为关键点坐标)编码为模型学习目标(如热力图),解码器则用于将模型输出解码为姿态估计结果
- **models** 以模块化结构提供了姿态估计模型的各类组件
diff --git a/docs/zh_cn/user_guides/configs.md b/docs/zh_cn/user_guides/configs.md
index da7b448d3c..c57a486321 100644
--- a/docs/zh_cn/user_guides/configs.md
+++ b/docs/zh_cn/user_guides/configs.md
@@ -133,6 +133,19 @@ log_level = 'INFO' # 日志记录等级
_base_ = ['../../../_base_/default_runtime.py'] # 以运行时的config文件位置为相对路径起点
```
+```{note}
+**Tips**
+
+CheckpointHook:
+
+- save_best: `'coco/AP'` 用于 `CocoMetric`, `'pck/PCK@0.05'` 用于 `PCKAccuracy`
+- max_keep_ckpts: 最大保留ckpt数量,默认为-1,代表不限制
+
+样例:
+
+`default_hooks = dict(checkpoint=dict(save_best='pck/PCK@0.05', rule='greater', max_keep_ckpts=1))`
+```
+
### 数据配置
数据配置指数据处理相关的配置,主要包括:
@@ -209,6 +222,13 @@ val_dataloader = dict(
test_dataloader = val_dataloader # 默认情况下不区分验证集和测试集,用户根据需要来自行定义
```
+```{note}
+**Tips**
+
+设置随机种子: `randomness=dict(seed=0)`
+
+```
+
### 训练配置
训练配置指训练策略相关的配置,主要包括:
diff --git a/mmpose/codecs/simcc_label.py b/mmpose/codecs/simcc_label.py
index fe7f76d641..a02ce99ac3 100644
--- a/mmpose/codecs/simcc_label.py
+++ b/mmpose/codecs/simcc_label.py
@@ -29,6 +29,8 @@ class SimCCLabel(BaseKeypointCodec):
simcc_split_ratio (float): The ratio of the label size to the input
size. For example, if the input width is ``w``, the x label size
will be :math:`w*simcc_split_ratio`. Defaults to 2.0
+ label_smooth_weight (float): Label Smoothing weight. Defaults to 0.0
+ normalize (bool): Whether to normalize the heatmaps. Defaults to True.
.. _`SimCC: a Simple Coordinate Classification Perspective for Human Pose
Estimation`: https://arxiv.org/abs/2107.03332
@@ -39,14 +41,16 @@ def __init__(self,
smoothing_type: str = 'gaussian',
sigma: float = 6.0,
simcc_split_ratio: float = 2.0,
- label_smoothing: float = 0.0) -> None:
+ label_smooth_weight: float = 0.0,
+ normalize: bool = True) -> None:
super().__init__()
self.input_size = input_size
self.smoothing_type = smoothing_type
self.sigma = sigma
self.simcc_split_ratio = simcc_split_ratio
- self.label_smoothing = label_smoothing
+ self.label_smooth_weight = label_smooth_weight
+ self.normalize = normalize
if self.smoothing_type not in {'gaussian', 'standard'}:
raise ValueError(
@@ -54,13 +58,12 @@ def __init__(self,
f'{self.smoothing_type}. Should be one of '
'{"gaussian", "standard"}')
- if self.smoothing_type == 'gaussian' and self.label_smoothing > 0.0:
- raise ValueError(
- 'Attribute `label_smoothing` is only used for `standard` mode.'
- )
+ if self.smoothing_type == 'gaussian' and self.label_smooth_weight > 0:
+ raise ValueError('Attribute `label_smooth_weight` is only '
+ 'used for `standard` mode.')
- if self.label_smoothing < 0.0 or self.label_smoothing > 1.0:
- raise ValueError('`label_smoothing` should be in range [0, 1]')
+ if self.label_smooth_weight < 0.0 or self.label_smooth_weight > 1.0:
+ raise ValueError('`label_smooth_weight` should be in range [0, 1]')
def encode(
self,
@@ -153,7 +156,7 @@ def _generate_standard(
"""Encoding keypoints into SimCC labels with Standard Label Smoothing
strategy.
- Labels will be one-hot vectors if self.label_smoothing==0.0
+ Labels will be one-hot vectors if self.label_smooth_weight==0.0
"""
N, K, _ = keypoints.shape
@@ -180,12 +183,12 @@ def _generate_standard(
keypoint_weights[n, k] = 0
continue
- if self.label_smoothing > 0:
- target_x[n, k] = self.label_smoothing / (W - 1)
- target_y[n, k] = self.label_smoothing / (H - 1)
+ if self.label_smooth_weight > 0:
+ target_x[n, k] = self.label_smooth_weight / (W - 1)
+ target_y[n, k] = self.label_smooth_weight / (H - 1)
- target_x[n, k, mu_x] = 1.0 - self.label_smoothing
- target_y[n, k, mu_y] = 1.0 - self.label_smoothing
+ target_x[n, k, mu_x] = 1.0 - self.label_smooth_weight
+ target_y[n, k, mu_y] = 1.0 - self.label_smooth_weight
return target_x, target_y, keypoint_weights
@@ -232,11 +235,12 @@ def _generate_gaussian(
mu_x, mu_y = mu
- target_x[n,
- k] = (np.exp(-((x - mu_x)**2) / (2 * self.sigma**2))) / (
- self.sigma * np.sqrt(np.pi * 2))
- target_y[n,
- k] = (np.exp(-((y - mu_y)**2) / (2 * self.sigma**2))) / (
- self.sigma * np.sqrt(np.pi * 2))
+ target_x[n, k] = np.exp(-((x - mu_x)**2) / (2 * self.sigma**2))
+ target_y[n, k] = np.exp(-((y - mu_y)**2) / (2 * self.sigma**2))
+
+ if self.normalize:
+ norm_value = self.sigma * np.sqrt(np.pi * 2)
+ target_x /= norm_value
+ target_y /= norm_value
return target_x, target_y, keypoint_weights
diff --git a/mmpose/models/heads/heatmap_heads/mix_head.py b/mmpose/models/heads/heatmap_heads/mix_head.py
new file mode 100644
index 0000000000..911f138551
--- /dev/null
+++ b/mmpose/models/heads/heatmap_heads/mix_head.py
@@ -0,0 +1,408 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import Optional, Sequence, Tuple, Union
+
+import torch
+import torch.nn.functional as F
+from mmcv.cnn import build_conv_layer
+from torch import Tensor, nn
+
+from mmpose.evaluation.functional import simcc_pck_accuracy
+from mmpose.models.utils.tta import flip_vectors
+from mmpose.registry import KEYPOINT_CODECS, MODELS
+from mmpose.utils.tensor_utils import to_numpy
+from mmpose.utils.typing import (ConfigType, InstanceList, OptConfigType,
+ OptSampleList)
+from ..base_head import BaseHead
+
+OptIntSeq = Optional[Sequence[int]]
+
+
+@MODELS.register_module()
+class MixHead(BaseHead):
+ """Top-down heatmap head introduced in `SimCC`_ by Li et al (2022). The
+ head is composed of a few deconvolutional layers followed by a fully-
+ connected layer to generate 1d representation from low-resolution feature
+ maps.
+
+ Args:
+ in_channels (int | sequence[int]): Number of channels in the input
+ feature map
+ out_channels (int): Number of channels in the output heatmap
+ input_size (tuple): Input image size in shape [w, h]
+ in_featuremap_size (int | sequence[int]): Size of input feature map
+ simcc_split_ratio (float): Split ratio of pixels
+ deconv_type (str, optional): The type of deconv head which should
+ be one of the following options:
+
+ - ``'Heatmap'``: make deconv layers in `HeatmapHead`
+ - ``'ViPNAS'``: make deconv layers in `ViPNASHead`
+
+ Defaults to ``'Heatmap'``
+ deconv_out_channels (sequence[int]): The output channel number of each
+ deconv layer. Defaults to ``(256, 256, 256)``
+ deconv_kernel_sizes (sequence[int | tuple], optional): The kernel size
+ of each deconv layer. Each element should be either an integer for
+ both height and width dimensions, or a tuple of two integers for
+ the height and the width dimension respectively.Defaults to
+ ``(4, 4, 4)``
+ deconv_num_groups (Sequence[int], optional): The group number of each
+ deconv layer. Defaults to ``(16, 16, 16)``
+ conv_out_channels (sequence[int], optional): The output channel number
+ of each intermediate conv layer. ``None`` means no intermediate
+ conv layer between deconv layers and the final conv layer.
+ Defaults to ``None``
+ conv_kernel_sizes (sequence[int | tuple], optional): The kernel size
+ of each intermediate conv layer. Defaults to ``None``
+ input_transform (str): Transformation of input features which should
+ be one of the following options:
+
+ - ``'resize_concat'``: Resize multiple feature maps specified
+ by ``input_index`` to the same size as the first one and
+ concat these feature maps
+ - ``'select'``: Select feature map(s) specified by
+ ``input_index``. Multiple selected features will be
+ bundled into a tuple
+
+ Defaults to ``'select'``
+ input_index (int | sequence[int]): The feature map index used in the
+ input transformation. See also ``input_transform``. Defaults to -1
+ align_corners (bool): `align_corners` argument of
+ :func:`torch.nn.functional.interpolate` used in the input
+ transformation. Defaults to ``False``
+ loss (Config): Config of the keypoint loss. Defaults to use
+ :class:`KLDiscretLoss`
+ decoder (Config, optional): The decoder config that controls decoding
+ keypoint coordinates from the network output. Defaults to ``None``
+ init_cfg (Config, optional): Config to control the initialization. See
+ :attr:`default_init_cfg` for default settings
+
+ .. _`SimCC`: https://arxiv.org/abs/2107.03332
+ """
+
+ _version = 2
+
+ def __init__(
+ self,
+ in_channels: Union[int, Sequence[int]],
+ out_channels: int,
+ input_size: Tuple[int, int],
+ in_featuremap_size: Tuple[int, int],
+ simcc_split_ratio: float = 2.0,
+ debias: bool = False,
+ beta: float = 1.,
+ deconv_type: str = 'Heatmap',
+ deconv_out_channels: OptIntSeq = (256, 256, 256),
+ deconv_kernel_sizes: OptIntSeq = (4, 4, 4),
+ deconv_num_groups: OptIntSeq = (16, 16, 16),
+ conv_out_channels: OptIntSeq = None,
+ conv_kernel_sizes: OptIntSeq = None,
+ has_final_layer: bool = True,
+ input_transform: str = 'select',
+ input_index: Union[int, Sequence[int]] = -1,
+ align_corners: bool = False,
+ loss: ConfigType = dict(type='KLDiscretLoss', use_target_weight=True),
+ decoder: OptConfigType = None,
+ init_cfg: OptConfigType = None,
+ ):
+
+ if init_cfg is None:
+ init_cfg = self.default_init_cfg
+
+ super().__init__(init_cfg)
+
+ if deconv_type not in {'Heatmap', 'ViPNAS'}:
+ raise ValueError(
+ f'{self.__class__.__name__} got invalid `deconv_type` value'
+ f'{deconv_type}. Should be one of '
+ '{"Heatmap", "ViPNAS"}')
+
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.input_size = input_size
+ self.in_featuremap_size = in_featuremap_size
+ self.simcc_split_ratio = simcc_split_ratio
+ self.align_corners = align_corners
+ self.input_transform = input_transform
+ self.input_index = input_index
+ self.debias = debias
+ self.beta = beta
+ self.loss_module = MODELS.build(loss)
+ if decoder is not None:
+ self.decoder = KEYPOINT_CODECS.build(decoder)
+ else:
+ self.decoder = None
+
+ num_deconv = len(deconv_out_channels) if deconv_out_channels else 0
+ if num_deconv != 0:
+ self.heatmap_size = tuple(
+ [s * (2**num_deconv) for s in in_featuremap_size])
+
+ # deconv layers + 1x1 conv
+ self.deconv_head = self._make_deconv_head(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ deconv_type=deconv_type,
+ deconv_out_channels=deconv_out_channels,
+ deconv_kernel_sizes=deconv_kernel_sizes,
+ deconv_num_groups=deconv_num_groups,
+ conv_out_channels=conv_out_channels,
+ conv_kernel_sizes=conv_kernel_sizes,
+ has_final_layer=has_final_layer,
+ input_transform=input_transform,
+ input_index=input_index,
+ align_corners=align_corners)
+
+ if has_final_layer:
+ in_channels = out_channels
+ else:
+ in_channels = deconv_out_channels[-1]
+
+ else:
+ in_channels = self._get_in_channels()
+ self.deconv_head = None
+
+ if has_final_layer:
+ cfg = dict(
+ type='Conv2d',
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=1)
+ self.final_layer = build_conv_layer(cfg)
+ else:
+ self.final_layer = None
+
+ if self.input_transform == 'resize_concat':
+ if isinstance(in_featuremap_size, tuple):
+ self.heatmap_size = in_featuremap_size
+ elif isinstance(in_featuremap_size, list):
+ self.heatmap_size = in_featuremap_size[0]
+ elif self.input_transform == 'select':
+ if isinstance(in_featuremap_size, tuple):
+ self.heatmap_size = in_featuremap_size
+ elif isinstance(in_featuremap_size, list):
+ self.heatmap_size = in_featuremap_size[input_index]
+
+ if isinstance(in_channels, list):
+ raise ValueError(
+ f'{self.__class__.__name__} does not support selecting '
+ 'multiple input features.')
+
+ # Define SimCC layers
+ flatten_dims = self.heatmap_size[0] * self.heatmap_size[1]
+
+ W = int(self.input_size[0] * self.simcc_split_ratio)
+ H = int(self.input_size[1] * self.simcc_split_ratio)
+
+ self.mlp_head_x = nn.Linear(flatten_dims, W)
+ self.mlp_head_y = nn.Linear(flatten_dims, H)
+
+ self.linspace_x = torch.arange(0.0, 1.0 * W, 1).reshape(1, 1, W) / W
+ self.linspace_y = torch.arange(0.0, 1.0 * H, 1).reshape(1, 1, H) / H
+
+ self.linspace_x = nn.Parameter(self.linspace_x, requires_grad=False)
+ self.linspace_y = nn.Parameter(self.linspace_y, requires_grad=False)
+
+ def _make_deconv_head(self,
+ in_channels: Union[int, Sequence[int]],
+ out_channels: int,
+ deconv_type: str = 'Heatmap',
+ deconv_out_channels: OptIntSeq = (256, 256, 256),
+ deconv_kernel_sizes: OptIntSeq = (4, 4, 4),
+ deconv_num_groups: OptIntSeq = (16, 16, 16),
+ conv_out_channels: OptIntSeq = None,
+ conv_kernel_sizes: OptIntSeq = None,
+ has_final_layer: bool = True,
+ input_transform: str = 'select',
+ input_index: Union[int, Sequence[int]] = -1,
+ align_corners: bool = False) -> nn.Module:
+
+ if deconv_type == 'Heatmap':
+ deconv_head = MODELS.build(
+ dict(
+ type='HeatmapHead',
+ in_channels=self.in_channels,
+ out_channels=out_channels,
+ deconv_out_channels=deconv_out_channels,
+ deconv_kernel_sizes=deconv_kernel_sizes,
+ conv_out_channels=conv_out_channels,
+ conv_kernel_sizes=conv_kernel_sizes,
+ has_final_layer=has_final_layer,
+ input_transform=input_transform,
+ input_index=input_index,
+ align_corners=align_corners))
+ else:
+ deconv_head = MODELS.build(
+ dict(
+ type='ViPNASHead',
+ in_channels=in_channels,
+ out_channels=out_channels,
+ deconv_out_channels=deconv_out_channels,
+ deconv_num_groups=deconv_num_groups,
+ conv_out_channels=conv_out_channels,
+ conv_kernel_sizes=conv_kernel_sizes,
+ has_final_layer=has_final_layer,
+ input_transform=input_transform,
+ input_index=input_index,
+ align_corners=align_corners))
+
+ return deconv_head
+
+ def forward(self, feats: Tuple[Tensor]) -> Tuple[Tensor, Tensor]:
+ """Forward the network. The input is multi scale feature maps and the
+ output is the heatmap.
+
+ Args:
+ feats (Tuple[Tensor]): Multi scale feature maps.
+
+ Returns:
+ pred_x (Tensor): 1d representation of x.
+ pred_y (Tensor): 1d representation of y.
+ """
+ if self.deconv_head is None:
+ feats = self._transform_inputs(feats)
+ if self.final_layer is not None:
+ feats = self.final_layer(feats)
+ else:
+ feats = self.deconv_head(feats)
+
+ # flatten the output heatmap
+ x = torch.flatten(feats, 2)
+
+ simcc_x = self.mlp_head_x(x)
+ simcc_y = self.mlp_head_y(x)
+
+ pred_x = F.softmax(simcc_x * self.beta, dim=-1)
+ pred_x = (pred_x * self.linspace_x).sum(dim=-1, keepdim=True)
+
+ pred_y = F.softmax(simcc_y * self.beta, dim=-1)
+ pred_y = (pred_y * self.linspace_y).sum(dim=-1, keepdim=True)
+
+ if self.debias:
+ C_x = simcc_x.exp().sum(dim=-1, keepdim=True)
+ pred_x = C_x / (C_x - 1) * (pred_x - 1 / (2 * C_x))
+
+ C_y = simcc_y.exp().sum(dim=-1, keepdim=True)
+ pred_y = C_x / (C_y - 1) * (pred_y - 1 / (2 * C_y))
+
+ pred = torch.cat([pred_x, pred_y], dim=-1)
+ return pred, simcc_x, simcc_y
+
+ def predict(
+ self,
+ feats: Tuple[Tensor],
+ batch_data_samples: OptSampleList,
+ test_cfg: OptConfigType = {},
+ ) -> InstanceList:
+ """Predict results from features.
+
+ Args:
+ feats (Tuple[Tensor] | List[Tuple[Tensor]]): The multi-stage
+ features (or multiple multi-stage features in TTA)
+ batch_data_samples (List[:obj:`PoseDataSample`]): The batch
+ data samples
+ test_cfg (dict): The runtime config for testing process. Defaults
+ to {}
+
+ Returns:
+ List[InstanceData]: The pose predictions, each contains
+ the following fields:
+
+ - keypoints (np.ndarray): predicted keypoint coordinates in
+ shape (num_instances, K, D) where K is the keypoint number
+ and D is the keypoint dimension
+ - keypoint_scores (np.ndarray): predicted keypoint scores in
+ shape (num_instances, K)
+ - keypoint_x_labels (np.ndarray, optional): The predicted 1-D
+ intensity distribution in the x direction
+ - keypoint_y_labels (np.ndarray, optional): The predicted 1-D
+ intensity distribution in the y direction
+ """
+
+ if test_cfg.get('flip_test', False):
+ # TTA: flip test -> feats = [orig, flipped]
+ assert isinstance(feats, list) and len(feats) == 2
+ flip_indices = batch_data_samples[0].metainfo['flip_indices']
+ _feats, _feats_flip = feats
+
+ _batch_pred_x, _batch_pred_y = self.forward(_feats)
+
+ _batch_pred_x_flip, _batch_pred_y_flip = self.forward(_feats_flip)
+ _batch_pred_x_flip, _batch_pred_y_flip = flip_vectors(
+ _batch_pred_x_flip,
+ _batch_pred_y_flip,
+ flip_indices=flip_indices)
+
+ batch_pred_x = (_batch_pred_x + _batch_pred_x_flip) * 0.5
+ batch_pred_y = (_batch_pred_y + _batch_pred_y_flip) * 0.5
+ else:
+ batch_pred_x, batch_pred_y = self.forward(feats)
+
+ preds = self.decode((batch_pred_x, batch_pred_y))
+
+ if test_cfg.get('output_heatmaps', False):
+ for pred_instances, pred_x, pred_y in zip(preds,
+ to_numpy(batch_pred_x),
+ to_numpy(batch_pred_y)):
+
+ pred_instances.keypoint_x_labels = pred_x[None]
+ pred_instances.keypoint_y_labels = pred_y[None]
+
+ return preds
+
+ def loss(
+ self,
+ feats: Tuple[Tensor],
+ batch_data_samples: OptSampleList,
+ train_cfg: OptConfigType = {},
+ ) -> dict:
+ """Calculate losses from a batch of inputs and data samples."""
+
+ pred_x, pred_y = self.forward(feats)
+
+ gt_x = torch.cat([
+ d.gt_instance_labels.keypoint_x_labels for d in batch_data_samples
+ ],
+ dim=0)
+ gt_y = torch.cat([
+ d.gt_instance_labels.keypoint_y_labels for d in batch_data_samples
+ ],
+ dim=0)
+ keypoint_weights = torch.cat(
+ [
+ d.gt_instance_labels.keypoint_weights
+ for d in batch_data_samples
+ ],
+ dim=0,
+ )
+
+ pred_simcc = (pred_x, pred_y)
+ gt_simcc = (gt_x, gt_y)
+
+ # calculate losses
+ losses = dict()
+ loss = self.loss_module(pred_simcc, gt_simcc, keypoint_weights)
+
+ losses.update(loss_kpt=loss)
+
+ # calculate accuracy
+ _, avg_acc, _ = simcc_pck_accuracy(
+ output=to_numpy(pred_simcc),
+ target=to_numpy(gt_simcc),
+ simcc_split_ratio=self.simcc_split_ratio,
+ mask=to_numpy(keypoint_weights) > 0,
+ )
+
+ acc_pose = torch.tensor(avg_acc, device=gt_x.device)
+ losses.update(acc_pose=acc_pose)
+
+ return losses
+
+ @property
+ def default_init_cfg(self):
+ init_cfg = [
+ dict(
+ type='Normal', layer=['Conv2d', 'ConvTranspose2d'], std=0.001),
+ dict(type='Constant', layer='BatchNorm2d', val=1),
+ dict(type='Normal', layer=['Linear'], std=0.01, bias=0),
+ ]
+ return init_cfg
diff --git a/mmpose/models/heads/heatmap_heads/simcc_head.py b/mmpose/models/heads/heatmap_heads/simcc_head.py
index 697d285ea7..1a3e19f625 100644
--- a/mmpose/models/heads/heatmap_heads/simcc_head.py
+++ b/mmpose/models/heads/heatmap_heads/simcc_head.py
@@ -12,7 +12,6 @@
from mmpose.utils.typing import (ConfigType, InstanceList, OptConfigType,
OptSampleList)
from ..base_head import BaseHead
-from .heatmap_head import HeatmapHead
OptIntSeq = Optional[Sequence[int]]
@@ -31,6 +30,13 @@ class SimCCHead(BaseHead):
input_size (tuple): Input image size in shape [w, h]
in_featuremap_size (int | sequence[int]): Size of input feature map
simcc_split_ratio (float): Split ratio of pixels
+ deconv_type (str, optional): The type of deconv head which should
+ be one of the following options:
+
+ - ``'heatmap'``: make deconv layers in `HeatmapHead`
+ - ``'vipnas'``: make deconv layers in `ViPNASHead`
+
+ Defaults to ``'Heatmap'``
deconv_out_channels (sequence[int]): The output channel number of each
deconv layer. Defaults to ``(256, 256, 256)``
deconv_kernel_sizes (sequence[int | tuple], optional): The kernel size
@@ -38,6 +44,8 @@ class SimCCHead(BaseHead):
both height and width dimensions, or a tuple of two integers for
the height and the width dimension respectively.Defaults to
``(4, 4, 4)``
+ deconv_num_groups (Sequence[int], optional): The group number of each
+ deconv layer. Defaults to ``(16, 16, 16)``
conv_out_channels (sequence[int], optional): The output channel number
of each intermediate conv layer. ``None`` means no intermediate
conv layer between deconv layers and the final conv layer.
@@ -79,8 +87,10 @@ def __init__(
input_size: Tuple[int, int],
in_featuremap_size: Tuple[int, int],
simcc_split_ratio: float = 2.0,
+ deconv_type: str = 'heatmap',
deconv_out_channels: OptIntSeq = (256, 256, 256),
deconv_kernel_sizes: OptIntSeq = (4, 4, 4),
+ deconv_num_groups: OptIntSeq = (16, 16, 16),
conv_out_channels: OptIntSeq = None,
conv_kernel_sizes: OptIntSeq = None,
has_final_layer: bool = True,
@@ -97,6 +107,12 @@ def __init__(
super().__init__(init_cfg)
+ if deconv_type not in {'heatmap', 'vipnas'}:
+ raise ValueError(
+ f'{self.__class__.__name__} got invalid `deconv_type` value'
+ f'{deconv_type}. Should be one of '
+ '{"heatmap", "vipnas"}')
+
self.in_channels = in_channels
self.out_channels = out_channels
self.input_size = input_size
@@ -117,11 +133,13 @@ def __init__(
[s * (2**num_deconv) for s in in_featuremap_size])
# deconv layers + 1x1 conv
- self.simplebaseline_head = HeatmapHead(
+ self.deconv_head = self._make_deconv_head(
in_channels=in_channels,
out_channels=out_channels,
+ deconv_type=deconv_type,
deconv_out_channels=deconv_out_channels,
deconv_kernel_sizes=deconv_kernel_sizes,
+ deconv_num_groups=deconv_num_groups,
conv_out_channels=conv_out_channels,
conv_kernel_sizes=conv_kernel_sizes,
has_final_layer=has_final_layer,
@@ -136,7 +154,7 @@ def __init__(
else:
in_channels = self._get_in_channels()
- self.simplebaseline_head = None
+ self.deconv_head = None
if has_final_layer:
cfg = dict(
@@ -173,6 +191,51 @@ def __init__(
self.mlp_head_x = nn.Linear(flatten_dims, W)
self.mlp_head_y = nn.Linear(flatten_dims, H)
+ def _make_deconv_head(self,
+ in_channels: Union[int, Sequence[int]],
+ out_channels: int,
+ deconv_type: str = 'heatmap',
+ deconv_out_channels: OptIntSeq = (256, 256, 256),
+ deconv_kernel_sizes: OptIntSeq = (4, 4, 4),
+ deconv_num_groups: OptIntSeq = (16, 16, 16),
+ conv_out_channels: OptIntSeq = None,
+ conv_kernel_sizes: OptIntSeq = None,
+ has_final_layer: bool = True,
+ input_transform: str = 'select',
+ input_index: Union[int, Sequence[int]] = -1,
+ align_corners: bool = False) -> nn.Module:
+
+ if deconv_type == 'heatmap':
+ deconv_head = MODELS.build(
+ dict(
+ type='HeatmapHead',
+ in_channels=self.in_channels,
+ out_channels=out_channels,
+ deconv_out_channels=deconv_out_channels,
+ deconv_kernel_sizes=deconv_kernel_sizes,
+ conv_out_channels=conv_out_channels,
+ conv_kernel_sizes=conv_kernel_sizes,
+ has_final_layer=has_final_layer,
+ input_transform=input_transform,
+ input_index=input_index,
+ align_corners=align_corners))
+ else:
+ deconv_head = MODELS.build(
+ dict(
+ type='ViPNASHead',
+ in_channels=in_channels,
+ out_channels=out_channels,
+ deconv_out_channels=deconv_out_channels,
+ deconv_num_groups=deconv_num_groups,
+ conv_out_channels=conv_out_channels,
+ conv_kernel_sizes=conv_kernel_sizes,
+ has_final_layer=has_final_layer,
+ input_transform=input_transform,
+ input_index=input_index,
+ align_corners=align_corners))
+
+ return deconv_head
+
def forward(self, feats: Tuple[Tensor]) -> Tuple[Tensor, Tensor]:
"""Forward the network. The input is multi scale feature maps and the
output is the heatmap.
@@ -184,12 +247,12 @@ def forward(self, feats: Tuple[Tensor]) -> Tuple[Tensor, Tensor]:
pred_x (Tensor): 1d representation of x.
pred_y (Tensor): 1d representation of y.
"""
- if self.simplebaseline_head is None:
+ if self.deconv_head is None:
feats = self._transform_inputs(feats)
if self.final_layer is not None:
feats = self.final_layer(feats)
else:
- feats = self.simplebaseline_head(feats)
+ feats = self.deconv_head(feats)
# flatten the output heatmap
x = torch.flatten(feats, 2)
diff --git a/mmpose/models/heads/regression_heads/dsnt_head.py b/mmpose/models/heads/regression_heads/dsnt_head.py
index e938da17fc..3db11986bc 100644
--- a/mmpose/models/heads/regression_heads/dsnt_head.py
+++ b/mmpose/models/heads/regression_heads/dsnt_head.py
@@ -30,7 +30,7 @@ class DSNTHead(IntegralRegressionHead):
in_featuremap_size (int | sequence[int]): Size of input feature map
num_joints (int): Number of joints
lambda_t (int): Discard heatmap-based loss when current
- epoch > lambda_t
+ epoch > lambda_t. Defaults to -1.
debias (bool): Whether to remove the bias of Integral Pose Regression.
see `Removing the Bias of Integral Pose Regression`_ by Gu et al
(2021). Defaults to ``False``.
diff --git a/mmpose/models/losses/classification_loss.py b/mmpose/models/losses/classification_loss.py
index 3fe3a2f26e..6e4a07f014 100644
--- a/mmpose/models/losses/classification_loss.py
+++ b/mmpose/models/losses/classification_loss.py
@@ -8,7 +8,13 @@
@MODELS.register_module()
class BCELoss(nn.Module):
- """Binary Cross Entropy loss."""
+ """Binary Cross Entropy loss.
+
+ Args:
+ use_target_weight (bool): Option to use weighted loss.
+ Different joint types may have different target weights.
+ loss_weight (float): Weight of the loss. Default: 1.0.
+ """
def __init__(self, use_target_weight=False, loss_weight=1.):
super().__init__()
@@ -52,6 +58,7 @@ class JSDiscretLoss(nn.Module):
Args:
use_target_weight (bool): Option to use weighted loss.
Different joint types may have different target weights.
+ size_average (bool): Option to average the loss by the batch_size.
"""
def __init__(
diff --git a/mmpose/models/losses/loss_wrappers.py b/mmpose/models/losses/loss_wrappers.py
index fb33b9bcab..f374c0da71 100644
--- a/mmpose/models/losses/loss_wrappers.py
+++ b/mmpose/models/losses/loss_wrappers.py
@@ -24,8 +24,6 @@ def __init__(self, losses: list):
self.loss_modules = nn.ModuleList(loss_modules)
def forward(self, input_list, target_list, keypoint_weights=None):
- assert isinstance(input_list, list), ''
- assert isinstance(target_list, list), ''
assert len(input_list) == len(target_list), ''
losses = []
diff --git a/tests/test_codecs/test_simcc_label.py b/tests/test_codecs/test_simcc_label.py
index 77742fd776..98f02cc102 100644
--- a/tests/test_codecs/test_simcc_label.py
+++ b/tests/test_codecs/test_simcc_label.py
@@ -29,7 +29,7 @@ def setUp(self) -> None:
smoothing_type='standard',
sigma=5.0,
simcc_split_ratio=3.0,
- label_smoothing=0.1),
+ label_smooth_weight=0.1),
),
(
'simcc one-hot',
@@ -112,26 +112,27 @@ def test_errors(self):
'got invalid `smoothing_type`'):
_ = KEYPOINT_CODECS.build(cfg)
- # invalid label_smoothing in smoothing
+ # invalid label_smooth_weight in smoothing
cfg = dict(
type='SimCCLabel',
input_size=(192, 256),
smoothing_type='standard',
sigma=1.0,
simcc_split_ratio=2.0,
- label_smoothing=1.1)
+ label_smooth_weight=1.1)
- with self.assertRaisesRegex(ValueError, '`label_smoothing` should be'):
+ with self.assertRaisesRegex(ValueError,
+ '`label_smooth_weight` should be'):
_ = KEYPOINT_CODECS.build(cfg)
- # invalid label_smoothing for gaussian
+ # invalid label_smooth_weight for gaussian
cfg = dict(
type='SimCCLabel',
input_size=(192, 256),
smoothing_type='gaussian',
sigma=1.0,
simcc_split_ratio=2.0,
- label_smoothing=0.1)
+ label_smooth_weight=0.1)
with self.assertRaisesRegex(ValueError,
'is only used for `standard` mode.'):
diff --git a/tests/test_models/test_heads/test_heatmap_heads/test_simcc_head.py b/tests/test_models/test_heads/test_heatmap_heads/test_simcc_head.py
index bf67e70795..20af073e3f 100644
--- a/tests/test_models/test_heads/test_heatmap_heads/test_simcc_head.py
+++ b/tests/test_models/test_heads/test_heatmap_heads/test_simcc_head.py
@@ -64,7 +64,7 @@ def test_init(self):
smoothing_type='standard',
sigma=6.,
simcc_split_ratio=3.0,
- label_smoothing=0.1))
+ label_smooth_weight=0.1))
self.assertIsNotNone(head.decoder)
# w/ one-hot decoder
@@ -102,7 +102,7 @@ def test_predict(self):
smoothing_type='standard',
sigma=2.,
simcc_split_ratio=2.0,
- label_smoothing=0.1)
+ label_smooth_weight=0.1)
for decoder_cfg in [decoder_cfg1, decoder_cfg2, decoder_cfg3]:
# input transform: select
@@ -223,7 +223,7 @@ def test_loss(self):
smoothing_type='standard',
sigma=2.,
simcc_split_ratio=2.0,
- label_smoothing=0.1)
+ label_smooth_weight=0.1)
# decoder
for decoder_cfg in [decoder_cfg1, decoder_cfg2, decoder_cfg3]: