From 05c58e27c61fb7dd7a8a9b2ebdc2ab7c5bf03c17 Mon Sep 17 00:00:00 2001 From: Tau <674106399@qq.com> Date: Mon, 26 Sep 2022 17:00:55 +0800 Subject: [PATCH] [Docs] Refine docs (#1656) --- .../coco/ipr_res50_8xb64-210e_coco-256x256.py | 8 +- ...pr_res50_debias-8xb64-210e_coco-256x256.py | 8 +- .../ipr_res50_dsnt-8xb64-210e_coco-256x256.py | 8 +- .../coco/resnet_debias_coco.md | 6 +- .../coco/resnet_dsnt_coco.md | 8 +- .../coco/resnet_ipr_coco.md | 15 +- .../simcc/coco/mobilenetv2_coco.md | 55 +++ .../simcc/coco/resnet_coco.md | 7 +- ...netv2_wo-deconv-8xb64-210e_coco-256x192.py | 127 ++++++ ...=> simcc_res50_8xb32-140e_coco-384x288.py} | 16 +- .../simcc_res50_8xb64-210e_coco-256x192.py | 117 +++++ ...mcc_vipnas-mbv3_8xb64-210e_coco-256x192.py | 122 ++++++ .../simcc/coco/vipnas_coco.md | 54 +++ ...res50_wo-deconv-8xb64-210e_mpii-256x256.py | 123 ++++++ .../coco/mobilenetv2_rle_coco.md | 74 ++++ .../topdown_regression/coco/resnet_coco.md | 2 +- .../coco/resnet_rle_coco.md | 4 +- ..._rle-pretrained-8xb64-210e_coco-256x192.py | 128 ++++++ .../td-reg_res101_8xb64-210e_coco-256x192.py | 8 +- ...-reg_res101_rle-8xb64-210e_coco-256x192.py | 8 +- .../td-reg_res152_8xb64-210e_coco-256x192.py | 8 +- ...-reg_res152_rle-8xb64-210e_coco-256x192.py | 8 +- ...-reg_res152_rle-8xb64-210e_coco-384x288.py | 8 +- .../td-reg_res50_8xb64-210e_coco-256x192.py | 8 +- ...d-reg_res50_rle-8xb64-210e_coco-256x192.py | 8 +- ..._rle-pretrained-8xb64-210e_coco-256x192.py | 8 +- .../td-reg_res101_8xb64-210e_mpii-256x256.py | 12 +- .../td-reg_res152_8xb64-210e_mpii-256x256.py | 12 +- .../td-reg_res50_8xb64-210e_mpii-256x256.py | 12 +- ...d-reg_res50_rle-8xb64-210e_mpii-256x256.py | 12 +- docs/en/migration.md | 18 +- docs/en/overview.md | 2 +- docs/en/user_guides/configs.md | 20 + docs/src/papers/algorithms/dsnt.md | 2 +- docs/src/papers/algorithms/ipr.md | 11 +- docs/zh_cn/migration.md | 14 +- docs/zh_cn/overview.md | 6 +- docs/zh_cn/user_guides/configs.md | 20 + mmpose/codecs/simcc_label.py | 44 +- mmpose/models/heads/heatmap_heads/mix_head.py | 408 ++++++++++++++++++ .../models/heads/heatmap_heads/simcc_head.py | 73 +++- .../heads/regression_heads/dsnt_head.py | 2 +- mmpose/models/losses/classification_loss.py | 9 +- mmpose/models/losses/loss_wrappers.py | 2 - tests/test_codecs/test_simcc_label.py | 13 +- .../test_heatmap_heads/test_simcc_head.py | 6 +- 46 files changed, 1496 insertions(+), 148 deletions(-) create mode 100644 configs/body_2d_keypoint/simcc/coco/mobilenetv2_coco.md create mode 100644 configs/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192.py rename configs/body_2d_keypoint/simcc/coco/{simcc_res50_8xb64-140e_coco-384x288.py => simcc_res50_8xb32-140e_coco-384x288.py} (93%) create mode 100644 configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192.py create mode 100644 configs/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192.py create mode 100644 configs/body_2d_keypoint/simcc/coco/vipnas_coco.md create mode 100644 configs/body_2d_keypoint/simcc/mpii/simcc_res50_wo-deconv-8xb64-210e_mpii-256x256.py create mode 100644 configs/body_2d_keypoint/topdown_regression/coco/mobilenetv2_rle_coco.md create mode 100644 configs/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192.py create mode 100644 mmpose/models/heads/heatmap_heads/mix_head.py diff --git a/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py b/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py index 995dcc1243..3fd5b06d88 100644 --- a/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py +++ b/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py @@ -17,7 +17,7 @@ dict( type='MultiStepLR', begin=0, - end=210, + end=train_cfg['max_epochs'], milestones=[170, 200], gamma=0.1, by_epoch=True) @@ -26,9 +26,6 @@ # automatically scaling LR based on the actual training batch size auto_scale_lr = dict(base_batch_size=512) -# hooks -default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) - # codec settings codec = dict( type='IntegralRegressionLabel', @@ -132,6 +129,9 @@ )) test_dataloader = val_dataloader +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + # evaluators val_evaluator = dict( type='CocoMetric', diff --git a/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256.py b/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256.py index c18dab391a..78f7c34f60 100644 --- a/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256.py +++ b/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256.py @@ -17,7 +17,7 @@ dict( type='MultiStepLR', begin=0, - end=210, + end=train_cfg['max_epochs'], milestones=[170, 200], gamma=0.1, by_epoch=True) @@ -26,9 +26,6 @@ # automatically scaling LR based on the actual training batch size auto_scale_lr = dict(base_batch_size=512) -# hooks -default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) - # codec settings codec = dict( type='IntegralRegressionLabel', @@ -131,6 +128,9 @@ )) test_dataloader = val_dataloader +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + # evaluators val_evaluator = dict( type='CocoMetric', diff --git a/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256.py b/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256.py index 247fae2388..e7b300f206 100644 --- a/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256.py +++ b/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256.py @@ -17,7 +17,7 @@ dict( type='MultiStepLR', begin=0, - end=210, + end=train_cfg['max_epochs'], milestones=[170, 200], gamma=0.1, by_epoch=True) @@ -26,9 +26,6 @@ # automatically scaling LR based on the actual training batch size auto_scale_lr = dict(base_batch_size=512) -# hooks -default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) - # codec settings codec = dict( type='IntegralRegressionLabel', @@ -128,6 +125,9 @@ )) test_dataloader = val_dataloader +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + # evaluators val_evaluator = dict( type='CocoMetric', diff --git a/configs/body_2d_keypoint/integral_regression/coco/resnet_debias_coco.md b/configs/body_2d_keypoint/integral_regression/coco/resnet_debias_coco.md index 847227c6f3..0820fdd296 100644 --- a/configs/body_2d_keypoint/integral_regression/coco/resnet_debias_coco.md +++ b/configs/body_2d_keypoint/integral_regression/coco/resnet_debias_coco.md @@ -52,6 +52,6 @@ Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset -| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | -| :------------------------------------------------------------------------------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :--------: | :-------: | -| [debias-ipr_resnet_50](/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias--8xb64-210e_coco-256x256.py) | 256x256 | 0.633 | 0.860 | 0.703 | 0.730 | 0.919 | [ckpt](<>) | [log](<>) | +| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [debias-ipr_resnet_50](/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias--8xb64-210e_coco-256x256.py) | 256x256 | 0.675 | 0.872 | 0.740 | 0.765 | 0.928 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256-055a7699_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256-055a7699_20220913.log.json) | diff --git a/configs/body_2d_keypoint/integral_regression/coco/resnet_dsnt_coco.md b/configs/body_2d_keypoint/integral_regression/coco/resnet_dsnt_coco.md index a9791240cb..2e79338b99 100644 --- a/configs/body_2d_keypoint/integral_regression/coco/resnet_dsnt_coco.md +++ b/configs/body_2d_keypoint/integral_regression/coco/resnet_dsnt_coco.md @@ -1,7 +1,7 @@
-DSNT (ECCV'2018) +DSNT (2018) ```bibtex @article{nibali2018numerical, @@ -51,6 +51,6 @@ Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset -| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | -| :----------------------------------------------------------------------------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :--------: | :-------: | -| [ipr_resnet_50_dsnt](/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256.py) | 256x256 | 0.674 | 0.870 | 0.744 | 0.764 | 0.928 | [ckpt](<>) | [log](<>) | +| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [ipr_resnet_50_dsnt](/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256.py) | 256x256 | 0.674 | 0.870 | 0.744 | 0.764 | 0.928 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256-441eedc0_20220913.pth) | \[loghttps://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256-441eedc0_20220913.log.json) | diff --git a/configs/body_2d_keypoint/integral_regression/coco/resnet_ipr_coco.md b/configs/body_2d_keypoint/integral_regression/coco/resnet_ipr_coco.md index c6cc2dcbfc..ce4fbae501 100644 --- a/configs/body_2d_keypoint/integral_regression/coco/resnet_ipr_coco.md +++ b/configs/body_2d_keypoint/integral_regression/coco/resnet_ipr_coco.md @@ -4,10 +4,11 @@ IPR (ECCV'2018) ```bibtex -@article{sun2018integral, - title={An Integral Pose Regression System for the ECCV2018 PoseTrack Challenge}, - author={Sun, Xiao and Li, Chuankang and Lin, Stephen}, - journal={arXiv preprint arXiv:1809.06079}, +@inproceedings{sun2018integral, + title={Integral human pose regression}, + author={Sun, Xiao and Xiao, Bin and Wei, Fangyin and Liang, Shuang and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={529--545}, year={2018} } ``` @@ -51,6 +52,6 @@ Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset -| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | -| :------------------------------------------------------------------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :--------: | :-------: | -| [ipr_resnet_50](/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py) | 256x256 | 0.633 | 0.860 | 0.703 | 0.730 | 0.919 | [ckpt](<>) | [log](<>) | +| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [ipr_resnet_50](/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py) | 256x256 | 0.633 | 0.860 | 0.703 | 0.730 | 0.919 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256-a3898a33_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256-a3898a33_20220913.log.json) | diff --git a/configs/body_2d_keypoint/simcc/coco/mobilenetv2_coco.md b/configs/body_2d_keypoint/simcc/coco/mobilenetv2_coco.md new file mode 100644 index 0000000000..fa4d5e0d39 --- /dev/null +++ b/configs/body_2d_keypoint/simcc/coco/mobilenetv2_coco.md @@ -0,0 +1,55 @@ + + +
+SimCC (ECCV'2022) + +```bibtex +@misc{https://doi.org/10.48550/arxiv.2107.03332, + title={SimCC: a Simple Coordinate Classification Perspective for Human Pose Estimation}, + author={Li, Yanjie and Yang, Sen and Liu, Peidong and Zhang, Shoukui and Wang, Yunxiao and Wang, Zhicheng and Yang, Wankou and Xia, Shu-Tao}, + year={2021} +} +``` + +
+ + + +
+MobilenetV2 (CVPR'2018) + +```bibtex +@inproceedings{sandler2018mobilenetv2, + title={Mobilenetv2: Inverted residuals and linear bottlenecks}, + author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={4510--4520}, + year={2018} +} +``` + +
+ + + +
+COCO (ECCV'2014) + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +
+ +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [simcc_mobilenetv2_wo_deconv](/configs/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192.py) | 256x192 | 0.620 | 0.855 | 0.697 | 0.678 | 0.902 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192-e0cc028d_20220922.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192-e0cc028d_20220922.log.json) | diff --git a/configs/body_2d_keypoint/simcc/coco/resnet_coco.md b/configs/body_2d_keypoint/simcc/coco/resnet_coco.md index 35f594a054..d6a60da064 100644 --- a/configs/body_2d_keypoint/simcc/coco/resnet_coco.md +++ b/configs/body_2d_keypoint/simcc/coco/resnet_coco.md @@ -50,6 +50,7 @@ Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset -| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | -| :--------------------------------------------------------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :--------: | :-------: | -| [simcc_resnet_50](/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-140e_coco-384x288.py) | 384x288 | 0.735 | 0.899 | 0.800 | 0.790 | 0.939 | [ckpt](<>) | [log](<>) | +| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [simcc_resnet_50](/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192.py) | 256x192 | 0.721 | 0.900 | 0.798 | 0.781 | 0.937 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192-8e0f5b59_20220919.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192-8e0f5b59_20220919.log.json) | +| [simcc_resnet_50](/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288.py) | 384x288 | 0.735 | 0.899 | 0.800 | 0.790 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288-45c3ba34_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288-45c3ba34_20220913.log.json) | diff --git a/configs/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000..0999c99516 --- /dev/null +++ b/configs/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192.py @@ -0,0 +1,127 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=train_cfg['max_epochs'], + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='SimCCLabel', input_size=(192, 256), sigma=6.0, simcc_split_ratio=2.0) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='MobileNetV2', + widen_factor=1., + out_indices=(7, ), + init_cfg=dict( + type='Pretrained', + checkpoint='mmcls://mobilenet_v2', + )), + head=dict( + type='SimCCHead', + in_channels=1280, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + deconv_out_channels=None, + loss=dict(type='KLDiscretLoss', use_target_weight=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +file_client_args = dict(backend='disk') + +# pipelines +train_pipeline = [ + dict(type='LoadImage', file_client_args=file_client_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict( + type='GenerateTarget', target_type='keypoint_xy_label', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', file_client_args=file_client_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-140e_coco-384x288.py b/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288.py similarity index 93% rename from configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-140e_coco-384x288.py rename to configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288.py index 9ef61b2660..a3446fef1b 100644 --- a/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-140e_coco-384x288.py +++ b/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288.py @@ -17,17 +17,14 @@ dict( type='MultiStepLR', begin=0, - end=140, + end=train_cfg['max_epochs'], milestones=[90, 120], gamma=0.1, by_epoch=True) ] # automatically scaling LR based on the actual training batch size -auto_scale_lr = dict(base_batch_size=128) - -# hooks -default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) +auto_scale_lr = dict(base_batch_size=512) # codec settings codec = dict( @@ -70,12 +67,10 @@ dict(type='GetBBoxCenterScale'), dict(type='RandomFlip', direction='horizontal'), dict(type='RandomHalfBody'), - dict( - type='RandomBBoxTransform', scale_factor=(0.7, 1.3), rotate_factor=80), + dict(type='RandomBBoxTransform'), dict(type='TopdownAffine', input_size=codec['input_size']), dict( type='GenerateTarget', target_type='keypoint_xy_label', encoder=codec), - # simcc needs transformed keypoints to calculate the training accuracy dict(type='PackPoseInputs') ] test_pipeline = [ @@ -87,7 +82,7 @@ # data loaders train_dataloader = dict( - batch_size=64, + batch_size=32, num_workers=2, persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=True), @@ -118,6 +113,9 @@ )) test_dataloader = val_dataloader +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + # evaluators val_evaluator = dict( type='CocoMetric', diff --git a/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000..51555d601f --- /dev/null +++ b/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192.py @@ -0,0 +1,117 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=1e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict(type='MultiStepLR', milestones=[170, 200], gamma=0.1, by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='SimCCLabel', input_size=(192, 256), sigma=6.0, simcc_split_ratio=2.0) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='SimCCHead', + in_channels=2048, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + loss=dict(type='KLDiscretLoss', use_target_weight=True), + decoder=codec), + test_cfg=dict(flip_test=True)) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +file_client_args = dict(backend='disk') + +# pipelines +train_pipeline = [ + dict(type='LoadImage', file_client_args=file_client_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict( + type='GenerateTarget', target_type='keypoint_xy_label', encoder=codec), + dict(type='PackPoseInputs') +] +test_pipeline = [ + dict(type='LoadImage', file_client_args=file_client_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=test_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/configs/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000..1b24ac23b2 --- /dev/null +++ b/configs/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=train_cfg['max_epochs'], + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='SimCCLabel', input_size=(192, 256), sigma=6.0, simcc_split_ratio=2.0) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict(type='ViPNAS_MobileNetV3'), + head=dict( + type='SimCCHead', + in_channels=160, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + deconv_type='vipnas', + deconv_out_channels=(160, 160, 160), + deconv_num_groups=(160, 160, 160), + loss=dict(type='KLDiscretLoss', use_target_weight=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +file_client_args = dict(backend='disk') + +# pipelines +train_pipeline = [ + dict(type='LoadImage', file_client_args=file_client_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict( + type='GenerateTarget', target_type='keypoint_xy_label', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', file_client_args=file_client_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file=data_root + 'person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/configs/body_2d_keypoint/simcc/coco/vipnas_coco.md b/configs/body_2d_keypoint/simcc/coco/vipnas_coco.md new file mode 100644 index 0000000000..a9d8b98fc3 --- /dev/null +++ b/configs/body_2d_keypoint/simcc/coco/vipnas_coco.md @@ -0,0 +1,54 @@ + + +
+SimCC (ECCV'2022) + +```bibtex +@misc{https://doi.org/10.48550/arxiv.2107.03332, + title={SimCC: a Simple Coordinate Classification Perspective for Human Pose Estimation}, + author={Li, Yanjie and Yang, Sen and Liu, Peidong and Zhang, Shoukui and Wang, Yunxiao and Wang, Zhicheng and Yang, Wankou and Xia, Shu-Tao}, + year={2021} +} +``` + +
+ + + +
+ViPNAS (CVPR'2021) + +```bibtex +@article{xu2021vipnas, + title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search}, + author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + year={2021} +} +``` + +
+ + + +
+COCO (ECCV'2014) + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +
+ +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [simcc_S-ViPNAS-MobileNetV3](/configs/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192.py) | 256x192 | 0.695 | 0.883 | 0.772 | 0.755 | 0.927 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192-719f3489_20220922.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192-719f3489_20220922.log.json) | diff --git a/configs/body_2d_keypoint/simcc/mpii/simcc_res50_wo-deconv-8xb64-210e_mpii-256x256.py b/configs/body_2d_keypoint/simcc/mpii/simcc_res50_wo-deconv-8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000..d09d160764 --- /dev/null +++ b/configs/body_2d_keypoint/simcc/mpii/simcc_res50_wo-deconv-8xb64-210e_mpii-256x256.py @@ -0,0 +1,123 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=train_cfg['max_epochs'], + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='SimCCLabel', input_size=(256, 256), sigma=6.0, simcc_split_ratio=2.0) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='SimCCHead', + in_channels=2048, + out_channels=16, + input_size=codec['input_size'], + in_featuremap_size=(8, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + deconv_out_channels=None, + loss=dict(type='KLDiscretLoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + shift_coords=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +file_client_args = dict(backend='disk') + +# pipelines +train_pipeline = [ + dict(type='LoadImage', file_client_args=file_client_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict( + type='GenerateTarget', target_type='keypoint_xy_label', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', file_client_args=file_client_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file=f'{data_root}/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='pck/PCKh', rule='greater')) + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy', norm_item='head') +test_evaluator = val_evaluator diff --git a/configs/body_2d_keypoint/topdown_regression/coco/mobilenetv2_rle_coco.md b/configs/body_2d_keypoint/topdown_regression/coco/mobilenetv2_rle_coco.md new file mode 100644 index 0000000000..eddf5a79d3 --- /dev/null +++ b/configs/body_2d_keypoint/topdown_regression/coco/mobilenetv2_rle_coco.md @@ -0,0 +1,74 @@ + + +
+DeepPose (CVPR'2014) + +```bibtex +@inproceedings{toshev2014deeppose, + title={Deeppose: Human pose estimation via deep neural networks}, + author={Toshev, Alexander and Szegedy, Christian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={1653--1660}, + year={2014} +} +``` + +
+ + + +
+RLE (ICCV'2021) + +```bibtex +@inproceedings{li2021human, + title={Human pose regression with residual log-likelihood estimation}, + author={Li, Jiefeng and Bian, Siyuan and Zeng, Ailing and Wang, Can and Pang, Bo and Liu, Wentao and Lu, Cewu}, + booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, + pages={11025--11034}, + year={2021} +} +``` + +
+ + + +
+MobilenetV2 (CVPR'2018) + +```bibtex +@inproceedings{sandler2018mobilenetv2, + title={Mobilenetv2: Inverted residuals and linear bottlenecks}, + author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={4510--4520}, + year={2018} +} +``` + +
+ + + +
+COCO (ECCV'2014) + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +
+ +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [deeppose_mobilenetv2_rle_pretrained](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192.py) | 256x192 | 0.593 | 0.836 | 0.660 | 0.644 | 0.877 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192-39b73bd5_20220922.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192-39b73bd5_20220922.log.json) | diff --git a/configs/body_2d_keypoint/topdown_regression/coco/resnet_coco.md b/configs/body_2d_keypoint/topdown_regression/coco/resnet_coco.md index f64971b21d..7ae0023cdb 100644 --- a/configs/body_2d_keypoint/topdown_regression/coco/resnet_coco.md +++ b/configs/body_2d_keypoint/topdown_regression/coco/resnet_coco.md @@ -54,6 +54,6 @@ Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 da | Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | | :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | -| [deeppose_resnet_50](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192.py) | 256x192 | 0.528 | 0.817 | 0.589 | 0.639 | 0.888 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res50_coco_256x192-f6de6c0e_20210205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res50_coco_256x192_20210205.log.json) | +| [deeppose_resnet_50](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192.py) | 256x192 | 0.528 | 0.817 | 0.589 | 0.639 | 0.888 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192-72ef04f3_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192-72ef04f3_20220913.log.json) | | [deeppose_resnet_101](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_8xb64-210e_coco-256x192.py) | 256x192 | 0.562 | 0.831 | 0.629 | 0.67 | 0.9 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192-2f247111_20210205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192_20210205.log.json) | | [deeppose_resnet_152](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_8xb64-210e_coco-256x192.py) | 256x192 | 0.584 | 0.842 | 0.659 | 0.688 | 0.907 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192-7df89a88_20210205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192_20210205.log.json) | diff --git a/configs/body_2d_keypoint/topdown_regression/coco/resnet_rle_coco.md b/configs/body_2d_keypoint/topdown_regression/coco/resnet_rle_coco.md index ed531b259f..3f6f2796a9 100644 --- a/configs/body_2d_keypoint/topdown_regression/coco/resnet_rle_coco.md +++ b/configs/body_2d_keypoint/topdown_regression/coco/resnet_rle_coco.md @@ -71,8 +71,8 @@ Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 da | Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | | :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | -| [deeppose_resnet_50_rle](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192.py) | 256x192 | 0.706 | 0.888 | 0.776 | 0.753 | 0.924 | [ckpt](<>) | [log](<>) | -| [deeppose_resnet_50_rle_pretrained](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192.py) | 256x192 | 0.719 | 0.891 | 0.788 | 0.764 | 0.925 | [ckpt](<>) | [log](<>) | +| [deeppose_resnet_50_rle](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192.py) | 256x192 | 0.706 | 0.888 | 0.776 | 0.753 | 0.924 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192-d37efd64_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192-d37efd64_20220913.log.json) | +| [deeppose_resnet_50_rle_pretrained](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192.py) | 256x192 | 0.719 | 0.891 | 0.788 | 0.764 | 0.925 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192-2cb494ee_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192-2cb494ee_20220913.log.json) | | [deeppose_resnet_101_rle](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_rle-8xb64-210e_coco-256x192.py) | 256x192 | 0.722 | 0.894 | 0.794 | 0.768 | 0.93 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192_rle-16c3d461_20220615.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192_rle_20220615.log.json) | | [deeppose_resnet_152_rle](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-256x192.py) | 256x192 | 0.731 | 0.897 | 0.805 | 0.777 | 0.933 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192_rle-c05bdccf_20220615.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192_rle_20220615.log.json) | | [deeppose_resnet_152_rle](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-384x288.py) | 384x288 | 0.749 | 0.901 | 0.815 | 0.793 | 0.935 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_384x288_rle-b77c4c37_20220624.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_384x288_rle_20220624.log.json) | diff --git a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000..6d776875c2 --- /dev/null +++ b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192.py @@ -0,0 +1,128 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=1e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=train_cfg['max_epochs'], + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict(type='RegressionLabel', input_size=(192, 256)) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='MobileNetV2', + widen_factor=1., + out_indices=(7, ), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/top_down/' + 'mobilenetv2/mobilenetv2_coco_256x192-d1e58e7b_20200727.pth')), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='RLEHead', + in_channels=1280, + num_joints=17, + loss=dict(type='RLELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + shift_coords=True, + ), +) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +file_client_args = dict(backend='disk') + +# pipelines +train_pipeline = [ + dict(type='LoadImage', file_client_args=file_client_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', target_type='keypoint_label', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', file_client_args=file_client_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=f'{data_root}annotations/person_keypoints_val2017.json', + score_mode='bbox_rle') +test_evaluator = val_evaluator diff --git a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_8xb64-210e_coco-256x192.py index dfc549d5e6..03ddc5cdb4 100644 --- a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_8xb64-210e_coco-256x192.py +++ b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_8xb64-210e_coco-256x192.py @@ -17,7 +17,7 @@ dict( type='MultiStepLR', begin=0, - end=210, + end=train_cfg['max_epochs'], milestones=[170, 200], gamma=0.1, by_epoch=True) @@ -26,9 +26,6 @@ # automatically scaling LR based on the actual training batch size auto_scale_lr = dict(base_batch_size=512) -# hooks -default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) - # codec settings codec = dict(type='RegressionLabel', input_size=(192, 256)) @@ -113,6 +110,9 @@ )) test_dataloader = val_dataloader +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + # evaluators val_evaluator = dict( type='CocoMetric', diff --git a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_rle-8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_rle-8xb64-210e_coco-256x192.py index 7120de50bc..c3e065ec5f 100644 --- a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_rle-8xb64-210e_coco-256x192.py +++ b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_rle-8xb64-210e_coco-256x192.py @@ -17,7 +17,7 @@ dict( type='MultiStepLR', begin=0, - end=210, + end=train_cfg['max_epochs'], milestones=[170, 200], gamma=0.1, by_epoch=True) @@ -26,9 +26,6 @@ # automatically scaling LR based on the actual training batch size auto_scale_lr = dict(base_batch_size=512) -# hooks -default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) - # codec settings codec = dict(type='RegressionLabel', input_size=(192, 256)) @@ -113,6 +110,9 @@ )) test_dataloader = val_dataloader +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + # evaluators val_evaluator = dict( type='CocoMetric', diff --git a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_8xb64-210e_coco-256x192.py index a1e2957f07..7d0cbe906b 100644 --- a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_8xb64-210e_coco-256x192.py +++ b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_8xb64-210e_coco-256x192.py @@ -17,7 +17,7 @@ dict( type='MultiStepLR', begin=0, - end=210, + end=train_cfg['max_epochs'], milestones=[170, 200], gamma=0.1, by_epoch=True) @@ -26,9 +26,6 @@ # automatically scaling LR based on the actual training batch size auto_scale_lr = dict(base_batch_size=512) -# hooks -default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) - # codec settings codec = dict(type='RegressionLabel', input_size=(192, 256)) @@ -113,6 +110,9 @@ )) test_dataloader = val_dataloader +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + # evaluators val_evaluator = dict( type='CocoMetric', diff --git a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-256x192.py index 9865964366..7d7a816583 100644 --- a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-256x192.py +++ b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-256x192.py @@ -17,7 +17,7 @@ dict( type='MultiStepLR', begin=0, - end=210, + end=train_cfg['max_epochs'], milestones=[170, 200], gamma=0.1, by_epoch=True) @@ -26,9 +26,6 @@ # automatically scaling LR based on the actual training batch size auto_scale_lr = dict(base_batch_size=512) -# hooks -default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) - # codec settings codec = dict(type='RegressionLabel', input_size=(192, 256)) @@ -113,6 +110,9 @@ )) test_dataloader = val_dataloader +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + # evaluators val_evaluator = dict( type='CocoMetric', diff --git a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-384x288.py b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-384x288.py index 33072cd044..d0b183dc33 100644 --- a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-384x288.py +++ b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-384x288.py @@ -17,7 +17,7 @@ dict( type='MultiStepLR', begin=0, - end=210, + end=train_cfg['max_epochs'], milestones=[170, 200], gamma=0.1, by_epoch=True) @@ -26,9 +26,6 @@ # automatically scaling LR based on the actual training batch size auto_scale_lr = dict(base_batch_size=512) -# hooks -default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) - # codec settings codec = dict(type='RegressionLabel', input_size=(288, 384)) @@ -113,6 +110,9 @@ )) test_dataloader = val_dataloader +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + # evaluators val_evaluator = dict( type='CocoMetric', diff --git a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192.py index 96e1f1c000..a365acff1d 100644 --- a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192.py +++ b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192.py @@ -17,7 +17,7 @@ dict( type='MultiStepLR', begin=0, - end=210, + end=train_cfg['max_epochs'], milestones=[170, 200], gamma=0.1, by_epoch=True) @@ -26,9 +26,6 @@ # automatically scaling LR based on the actual training batch size auto_scale_lr = dict(base_batch_size=512) -# hooks -default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) - # codec settings codec = dict(type='RegressionLabel', input_size=(192, 256)) @@ -113,6 +110,9 @@ )) test_dataloader = val_dataloader +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + # evaluators val_evaluator = dict( type='CocoMetric', diff --git a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192.py index 9aee092d54..851afb38cd 100644 --- a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192.py +++ b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192.py @@ -17,7 +17,7 @@ dict( type='MultiStepLR', begin=0, - end=210, + end=train_cfg['max_epochs'], milestones=[170, 200], gamma=0.1, by_epoch=True) @@ -26,9 +26,6 @@ # automatically scaling LR based on the actual training batch size auto_scale_lr = dict(base_batch_size=512) -# hooks -default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) - # codec settings codec = dict(type='RegressionLabel', input_size=(192, 256)) @@ -113,6 +110,9 @@ )) test_dataloader = val_dataloader +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + # evaluators val_evaluator = dict( type='CocoMetric', diff --git a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192.py index 7bd35b9ccb..4a24f2aaaf 100644 --- a/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192.py +++ b/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192.py @@ -17,7 +17,7 @@ dict( type='MultiStepLR', begin=0, - end=210, + end=train_cfg['max_epochs'], milestones=[170, 200], gamma=0.1, by_epoch=True) @@ -26,9 +26,6 @@ # automatically scaling LR based on the actual training batch size auto_scale_lr = dict(base_batch_size=512) -# hooks -default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) - # codec settings codec = dict(type='RegressionLabel', input_size=(192, 256)) @@ -119,6 +116,9 @@ )) test_dataloader = val_dataloader +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + # evaluators val_evaluator = dict( type='CocoMetric', diff --git a/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res101_8xb64-210e_mpii-256x256.py b/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res101_8xb64-210e_mpii-256x256.py index 763690be31..00dcc6f4d2 100644 --- a/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res101_8xb64-210e_mpii-256x256.py +++ b/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res101_8xb64-210e_mpii-256x256.py @@ -26,9 +26,6 @@ # automatically scaling LR based on the actual training batch size auto_scale_lr = dict(base_batch_size=512) -# hooks -default_hooks = dict(checkpoint=dict(save_best='pck/PCKh', rule='greater')) - # codec settings codec = dict(type='RegressionLabel', input_size=(256, 256)) @@ -62,9 +59,11 @@ data_mode = 'topdown' data_root = 'data/mpii/' +file_client_args = dict(backend='disk') + # pipelines train_pipeline = [ - dict(type='LoadImage', file_client_args={{_base_.file_client_args}}), + dict(type='LoadImage', file_client_args=file_client_args), dict(type='GetBBoxCenterScale'), dict(type='RandomFlip', direction='horizontal'), dict(type='RandomBBoxTransform', shift_prob=0), @@ -73,7 +72,7 @@ dict(type='PackPoseInputs') ] val_pipeline = [ - dict(type='LoadImage', file_client_args={{_base_.file_client_args}}), + dict(type='LoadImage', file_client_args=file_client_args), dict(type='GetBBoxCenterScale'), dict(type='TopdownAffine', input_size=codec['input_size']), dict(type='PackPoseInputs') @@ -111,6 +110,9 @@ )) test_dataloader = val_dataloader +# hooks +default_hooks = dict(checkpoint=dict(save_best='pck/PCKh', rule='greater')) + # evaluators val_evaluator = dict(type='MpiiPCKAccuracy', norm_item='head') test_evaluator = val_evaluator diff --git a/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res152_8xb64-210e_mpii-256x256.py b/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res152_8xb64-210e_mpii-256x256.py index d28444fb64..779ae37a84 100644 --- a/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res152_8xb64-210e_mpii-256x256.py +++ b/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res152_8xb64-210e_mpii-256x256.py @@ -26,9 +26,6 @@ # automatically scaling LR based on the actual training batch size auto_scale_lr = dict(base_batch_size=512) -# hooks -default_hooks = dict(checkpoint=dict(save_best='pck/PCKh', rule='greater')) - # codec settings codec = dict(type='RegressionLabel', input_size=(256, 256)) @@ -62,9 +59,11 @@ data_mode = 'topdown' data_root = 'data/mpii/' +file_client_args = dict(backend='disk') + # pipelines train_pipeline = [ - dict(type='LoadImage', file_client_args={{_base_.file_client_args}}), + dict(type='LoadImage', file_client_args=file_client_args), dict(type='GetBBoxCenterScale'), dict(type='RandomFlip', direction='horizontal'), dict(type='RandomBBoxTransform', shift_prob=0), @@ -73,7 +72,7 @@ dict(type='PackPoseInputs') ] val_pipeline = [ - dict(type='LoadImage', file_client_args={{_base_.file_client_args}}), + dict(type='LoadImage', file_client_args=file_client_args), dict(type='GetBBoxCenterScale'), dict(type='TopdownAffine', input_size=codec['input_size']), dict(type='PackPoseInputs') @@ -111,6 +110,9 @@ )) test_dataloader = val_dataloader +# hooks +default_hooks = dict(checkpoint=dict(save_best='pck/PCKh', rule='greater')) + # evaluators val_evaluator = dict(type='MpiiPCKAccuracy', norm_item='head') test_evaluator = val_evaluator diff --git a/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_8xb64-210e_mpii-256x256.py b/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_8xb64-210e_mpii-256x256.py index 33b062f509..87b03ac79e 100644 --- a/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_8xb64-210e_mpii-256x256.py +++ b/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_8xb64-210e_mpii-256x256.py @@ -26,9 +26,6 @@ # automatically scaling LR based on the actual training batch size auto_scale_lr = dict(base_batch_size=512) -# hooks -default_hooks = dict(checkpoint=dict(save_best='pck/PCKh', rule='greater')) - # codec settings codec = dict(type='RegressionLabel', input_size=(256, 256)) @@ -62,9 +59,11 @@ data_mode = 'topdown' data_root = 'data/mpii/' +file_client_args = dict(backend='disk') + # pipelines train_pipeline = [ - dict(type='LoadImage', file_client_args={{_base_.file_client_args}}), + dict(type='LoadImage', file_client_args=file_client_args), dict(type='GetBBoxCenterScale'), dict(type='RandomFlip', direction='horizontal'), dict(type='RandomBBoxTransform', shift_prob=0), @@ -73,7 +72,7 @@ dict(type='PackPoseInputs') ] val_pipeline = [ - dict(type='LoadImage', file_client_args={{_base_.file_client_args}}), + dict(type='LoadImage', file_client_args=file_client_args), dict(type='GetBBoxCenterScale'), dict(type='TopdownAffine', input_size=codec['input_size']), dict(type='PackPoseInputs') @@ -111,6 +110,9 @@ )) test_dataloader = val_dataloader +# hooks +default_hooks = dict(checkpoint=dict(save_best='pck/PCKh', rule='greater')) + # evaluators val_evaluator = dict(type='MpiiPCKAccuracy', norm_item='head') test_evaluator = val_evaluator diff --git a/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_rle-8xb64-210e_mpii-256x256.py b/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_rle-8xb64-210e_mpii-256x256.py index b8e3a87d39..1a62e710b1 100644 --- a/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_rle-8xb64-210e_mpii-256x256.py +++ b/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_rle-8xb64-210e_mpii-256x256.py @@ -26,9 +26,6 @@ # automatically scaling LR based on the actual training batch size auto_scale_lr = dict(base_batch_size=512) -# hooks -default_hooks = dict(checkpoint=dict(save_best='pck/PCKh', rule='greater')) - # codec settings codec = dict(type='RegressionLabel', input_size=(256, 256)) @@ -62,9 +59,11 @@ data_mode = 'topdown' data_root = 'data/mpii/' +file_client_args = dict(backend='disk') + # pipelines train_pipeline = [ - dict(type='LoadImage', file_client_args={{_base_.file_client_args}}), + dict(type='LoadImage', file_client_args=file_client_args), dict(type='GetBBoxCenterScale'), dict(type='RandomFlip', direction='horizontal'), dict(type='RandomBBoxTransform', shift_prob=0), @@ -73,7 +72,7 @@ dict(type='PackPoseInputs') ] val_pipeline = [ - dict(type='LoadImage', file_client_args={{_base_.file_client_args}}), + dict(type='LoadImage', file_client_args=file_client_args), dict(type='GetBBoxCenterScale'), dict(type='TopdownAffine', input_size=codec['input_size']), dict(type='PackPoseInputs') @@ -111,6 +110,9 @@ )) test_dataloader = val_dataloader +# hooks +default_hooks = dict(checkpoint=dict(save_best='pck/PCKh', rule='greater')) + # evaluators val_evaluator = dict(type='MpiiPCKAccuracy', norm_item='head') test_evaluator = val_evaluator diff --git a/docs/en/migration.md b/docs/en/migration.md index 3b94754101..e639f031cd 100644 --- a/docs/en/migration.md +++ b/docs/en/migration.md @@ -2,7 +2,7 @@ MMPose 1.0 has made significant BC-breaking changes, with modules redesigned and reorganized to reduce code redundancy and improve efficiency. For developers who have some deep-learning knowledge, this tutorial provides a migration guide. -Whether you are **a user of the previous version of MMPose**, or **a new user wishing to migrate your Pytorch project to MMPose**, you can learn how to build a project based on MMpose 1.0 with this tutorial. +Whether you are **a user of the previous version of MMPose**, or **a new user wishing to migrate your Pytorch project to MMPose**, you can learn how to build a project based on MMPose 1.0 with this tutorial. ```{note} This tutorial covers what developers will concern when using MMPose 1.0: @@ -123,9 +123,9 @@ dataset_info = dict( ### Dataset -To use costom dataset in MMPose, we recommend converting the annotations into a supported format (e.g. COCO or MPII) and directly using our implementation of the corresponding dataset. If this is not applicable, you may need to implement your own dataset class. +To use custom dataset in MMPose, we recommend converting the annotations into a supported format (e.g. COCO or MPII) and directly using our implementation of the corresponding dataset. If this is not applicable, you may need to implement your own dataset class. -Most 2D keypoint datasets in MMPose **organize the annotations in a COCO-like style**. Thus we provide a base class [BaseCocoStyleDataset](mmpose/datasets/datasets/base/base_coco_style_dataset.py) for these datasets. We recommend that users subclass `BaseCocoStyleDataset` and override the methods as needed (usually `__init__()` and `_load_annotations()`) to extend to a new costom 2D keypoint dataset. +Most 2D keypoint datasets in MMPose **organize the annotations in a COCO-like style**. Thus we provide a base class [BaseCocoStyleDataset](mmpose/datasets/datasets/base/base_coco_style_dataset.py) for these datasets. We recommend that users subclass `BaseCocoStyleDataset` and override the methods as needed (usually `__init__()` and `_load_annotations()`) to extend to a new custom 2D keypoint dataset. ```{note} Please refer to [COCO](./dataset_zoo/2d_body_keypoint.md) for more details about the COCO data format. @@ -305,6 +305,14 @@ In MMPose, we collect Encoding and Decoding processes into a **Codec**, in which Currently we support the following types of Targets. +- `heatmap`: Gaussian heatmaps +- `keypoint_label`: keypoint representation (e.g. normalized coordinates) +- `keypoint_xy_label`: axis-wise keypoint representation +- `heatmap+keypoint_label`: Gaussian heatmaps and keypoint representation +- `multiscale_heatmap`: multi-scale Gaussian heatmaps + +and the generated targets will be packed as follows. + - `heatmaps`: Gaussian heatmaps - `keypoint_labels`: keypoint representation (e.g. normalized coordinates) - `keypoint_x_labels`: keypoint x-axis representation @@ -345,8 +353,8 @@ The following is an example of the implementation of `PoseDataSample` under the def get_pose_data_sample(self): # meta pose_meta = dict( - img_shape=(600, 900), # [h, w, c] - crop_size=(256, 192), # [h, w] + img_shape=(600, 900), # [h, w, c] + crop_size=(256, 192), # [h, w] heatmap_size=(64, 48), # [h, w] ) diff --git a/docs/en/overview.md b/docs/en/overview.md index 3733d1637c..fbeaac13f5 100644 --- a/docs/en/overview.md +++ b/docs/en/overview.md @@ -4,7 +4,7 @@ This chapter will introduce you to the overall framework of MMPose and provide l ## What is MMPose -![image](https://user-images.githubusercontent.com/26127467/190981395-5ecf0146-f8a7-482f-a87f-b0c64dabf7cb.jpg) +![overview](https://user-images.githubusercontent.com/13503330/191004511-508d3ec6-9ead-4c52-a522-4d9aa1f26027.png) MMPose is a Pytorch-based pose estimation open-source toolkit, a member of the [OpenMMLab Project](https://github.com/open-mmlab). It contains a rich set of algorithms for 2d multi-person human pose estimation, 2d hand pose estimation, 2d face landmark detection, 133 keypoint whole-body human pose estimation, fashion landmark detection and animal pose estimation as well as related components and modules, below is its overall framework. diff --git a/docs/en/user_guides/configs.md b/docs/en/user_guides/configs.md index 8504d4a182..be73312e20 100644 --- a/docs/en/user_guides/configs.md +++ b/docs/en/user_guides/configs.md @@ -128,6 +128,19 @@ General configuration is stored alone in the `$MMPOSE/configs/_base_`, and inher _base_ = ['../../../_base_/default_runtime.py'] # take the config file as the starting point of the relative path ``` +```{note} +**Tips** + +CheckpointHook: + +- save_best: `'coco/AP'` for `CocoMetric`, `'pck/PCK@0.05'` for `PCKAccuracy` +- max_keep_ckpts: the maximum checkpoints to keep. Defaults to -1, which means unlimited. + +Example: + +`default_hooks = dict(checkpoint=dict(save_best='pck/PCK@0.05', rule='greater', max_keep_ckpts=1))` +``` + ### Data Data configuration refers to the data processing related settings, mainly including: @@ -204,6 +217,13 @@ val_dataloader = dict( test_dataloader = val_dataloader # use val as test by default ``` +```{note} +**Tips** + +You can set the random seed by doing: `randomness=dict(seed=0)` + +``` + ### Training Training configuration refers to the training related settings including: diff --git a/docs/src/papers/algorithms/dsnt.md b/docs/src/papers/algorithms/dsnt.md index 979fa7df80..6a526429d6 100644 --- a/docs/src/papers/algorithms/dsnt.md +++ b/docs/src/papers/algorithms/dsnt.md @@ -3,7 +3,7 @@
-DSNT (ECCV'2018) +DSNT (2018) ```bibtex @article{nibali2018numerical, diff --git a/docs/src/papers/algorithms/ipr.md b/docs/src/papers/algorithms/ipr.md index 52a933bdb3..fca06b986a 100644 --- a/docs/src/papers/algorithms/ipr.md +++ b/docs/src/papers/algorithms/ipr.md @@ -1,4 +1,4 @@ -# DeepPose: Human pose estimation via deep neural networks +# Integral Human Pose Regression @@ -6,10 +6,11 @@ IPR (ECCV'2018) ```bibtex -@article{sun2018integral, - title={An Integral Pose Regression System for the ECCV2018 PoseTrack Challenge}, - author={Sun, Xiao and Li, Chuankang and Lin, Stephen}, - journal={arXiv preprint arXiv:1809.06079}, +@inproceedings{sun2018integral, + title={Integral human pose regression}, + author={Sun, Xiao and Xiao, Bin and Wei, Fangyin and Liang, Shuang and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={529--545}, year={2018} } ``` diff --git a/docs/zh_cn/migration.md b/docs/zh_cn/migration.md index 004e5971f3..188c5a4af5 100644 --- a/docs/zh_cn/migration.md +++ b/docs/zh_cn/migration.md @@ -302,7 +302,19 @@ test_pipeline = [ 在 MMPose 中,我们将编码和解码过程集合成一个编解码器(Codec),在其中实现 `encode()` 和 `decode()`。 -目前 MMPose 支持以下类型的监督目标: +目前 MMPose 支持生成以下类型的监督目标: + +- `heatmap`: 高斯热图 + +- `keypoint_label`: 关键点标签(如归一化的坐标值) + +- `keypoint_xy_label`: 单个坐标轴关键点标签 + +- `heatmap+keypoint_label`: 同时生成高斯热图和关键点标签 + +- `multiscale_heatmap`: 多尺度高斯热图 + +生成的监督目标会按以下关键字进行封装: - `heatmaps`:高斯热图 diff --git a/docs/zh_cn/overview.md b/docs/zh_cn/overview.md index 4c8bf1d6d2..edfc9e7247 100644 --- a/docs/zh_cn/overview.md +++ b/docs/zh_cn/overview.md @@ -4,11 +4,11 @@ ## 什么是 MMPose -![image](https://user-images.githubusercontent.com/15977946/188659200-e5694ca7-28ff-43e5-ae33-acc1fdff7420.jpg) +![overview](https://user-images.githubusercontent.com/13503330/191004511-508d3ec6-9ead-4c52-a522-4d9aa1f26027.png) MMPose 是一款基于 Pytorch 的姿态估计开源工具箱,是 OpenMMLab 项目的成员之一,包含了丰富的 2D 多人姿态估计、2D 手部姿态估计、2D 人脸关键点检测、133关键点全身人体姿态估计、动物关键点检测、服饰关键点检测等算法以及相关的组件和模块,下面是它的整体框架: -MMPose 由 7 个主要部分组成,apis、structures、datasets、codecs、models、engine、评估和可视化。 +MMPose 由 **8** 个主要部分组成,apis、structures、datasets、codecs、models、engine、evaluation 和 visualization。 - **apis** 提供用于模型推理的高级 API @@ -18,8 +18,6 @@ MMPose 由 7 个主要部分组成,apis、structures、datasets、codecs、mod - **transforms** 包含各种数据增强变换 -- **codecs** 提供训练目标生成与模型输出解码所需的编码器和解码器 - - **codecs** 提供姿态编解码器:编码器用于将姿态信息(通常为关键点坐标)编码为模型学习目标(如热力图),解码器则用于将模型输出解码为姿态估计结果 - **models** 以模块化结构提供了姿态估计模型的各类组件 diff --git a/docs/zh_cn/user_guides/configs.md b/docs/zh_cn/user_guides/configs.md index da7b448d3c..c57a486321 100644 --- a/docs/zh_cn/user_guides/configs.md +++ b/docs/zh_cn/user_guides/configs.md @@ -133,6 +133,19 @@ log_level = 'INFO' # 日志记录等级 _base_ = ['../../../_base_/default_runtime.py'] # 以运行时的config文件位置为相对路径起点 ``` +```{note} +**Tips** + +CheckpointHook: + +- save_best: `'coco/AP'` 用于 `CocoMetric`, `'pck/PCK@0.05'` 用于 `PCKAccuracy` +- max_keep_ckpts: 最大保留ckpt数量,默认为-1,代表不限制 + +样例: + +`default_hooks = dict(checkpoint=dict(save_best='pck/PCK@0.05', rule='greater', max_keep_ckpts=1))` +``` + ### 数据配置 数据配置指数据处理相关的配置,主要包括: @@ -209,6 +222,13 @@ val_dataloader = dict( test_dataloader = val_dataloader # 默认情况下不区分验证集和测试集,用户根据需要来自行定义 ``` +```{note} +**Tips** + +设置随机种子: `randomness=dict(seed=0)` + +``` + ### 训练配置 训练配置指训练策略相关的配置,主要包括: diff --git a/mmpose/codecs/simcc_label.py b/mmpose/codecs/simcc_label.py index fe7f76d641..a02ce99ac3 100644 --- a/mmpose/codecs/simcc_label.py +++ b/mmpose/codecs/simcc_label.py @@ -29,6 +29,8 @@ class SimCCLabel(BaseKeypointCodec): simcc_split_ratio (float): The ratio of the label size to the input size. For example, if the input width is ``w``, the x label size will be :math:`w*simcc_split_ratio`. Defaults to 2.0 + label_smooth_weight (float): Label Smoothing weight. Defaults to 0.0 + normalize (bool): Whether to normalize the heatmaps. Defaults to True. .. _`SimCC: a Simple Coordinate Classification Perspective for Human Pose Estimation`: https://arxiv.org/abs/2107.03332 @@ -39,14 +41,16 @@ def __init__(self, smoothing_type: str = 'gaussian', sigma: float = 6.0, simcc_split_ratio: float = 2.0, - label_smoothing: float = 0.0) -> None: + label_smooth_weight: float = 0.0, + normalize: bool = True) -> None: super().__init__() self.input_size = input_size self.smoothing_type = smoothing_type self.sigma = sigma self.simcc_split_ratio = simcc_split_ratio - self.label_smoothing = label_smoothing + self.label_smooth_weight = label_smooth_weight + self.normalize = normalize if self.smoothing_type not in {'gaussian', 'standard'}: raise ValueError( @@ -54,13 +58,12 @@ def __init__(self, f'{self.smoothing_type}. Should be one of ' '{"gaussian", "standard"}') - if self.smoothing_type == 'gaussian' and self.label_smoothing > 0.0: - raise ValueError( - 'Attribute `label_smoothing` is only used for `standard` mode.' - ) + if self.smoothing_type == 'gaussian' and self.label_smooth_weight > 0: + raise ValueError('Attribute `label_smooth_weight` is only ' + 'used for `standard` mode.') - if self.label_smoothing < 0.0 or self.label_smoothing > 1.0: - raise ValueError('`label_smoothing` should be in range [0, 1]') + if self.label_smooth_weight < 0.0 or self.label_smooth_weight > 1.0: + raise ValueError('`label_smooth_weight` should be in range [0, 1]') def encode( self, @@ -153,7 +156,7 @@ def _generate_standard( """Encoding keypoints into SimCC labels with Standard Label Smoothing strategy. - Labels will be one-hot vectors if self.label_smoothing==0.0 + Labels will be one-hot vectors if self.label_smooth_weight==0.0 """ N, K, _ = keypoints.shape @@ -180,12 +183,12 @@ def _generate_standard( keypoint_weights[n, k] = 0 continue - if self.label_smoothing > 0: - target_x[n, k] = self.label_smoothing / (W - 1) - target_y[n, k] = self.label_smoothing / (H - 1) + if self.label_smooth_weight > 0: + target_x[n, k] = self.label_smooth_weight / (W - 1) + target_y[n, k] = self.label_smooth_weight / (H - 1) - target_x[n, k, mu_x] = 1.0 - self.label_smoothing - target_y[n, k, mu_y] = 1.0 - self.label_smoothing + target_x[n, k, mu_x] = 1.0 - self.label_smooth_weight + target_y[n, k, mu_y] = 1.0 - self.label_smooth_weight return target_x, target_y, keypoint_weights @@ -232,11 +235,12 @@ def _generate_gaussian( mu_x, mu_y = mu - target_x[n, - k] = (np.exp(-((x - mu_x)**2) / (2 * self.sigma**2))) / ( - self.sigma * np.sqrt(np.pi * 2)) - target_y[n, - k] = (np.exp(-((y - mu_y)**2) / (2 * self.sigma**2))) / ( - self.sigma * np.sqrt(np.pi * 2)) + target_x[n, k] = np.exp(-((x - mu_x)**2) / (2 * self.sigma**2)) + target_y[n, k] = np.exp(-((y - mu_y)**2) / (2 * self.sigma**2)) + + if self.normalize: + norm_value = self.sigma * np.sqrt(np.pi * 2) + target_x /= norm_value + target_y /= norm_value return target_x, target_y, keypoint_weights diff --git a/mmpose/models/heads/heatmap_heads/mix_head.py b/mmpose/models/heads/heatmap_heads/mix_head.py new file mode 100644 index 0000000000..911f138551 --- /dev/null +++ b/mmpose/models/heads/heatmap_heads/mix_head.py @@ -0,0 +1,408 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Optional, Sequence, Tuple, Union + +import torch +import torch.nn.functional as F +from mmcv.cnn import build_conv_layer +from torch import Tensor, nn + +from mmpose.evaluation.functional import simcc_pck_accuracy +from mmpose.models.utils.tta import flip_vectors +from mmpose.registry import KEYPOINT_CODECS, MODELS +from mmpose.utils.tensor_utils import to_numpy +from mmpose.utils.typing import (ConfigType, InstanceList, OptConfigType, + OptSampleList) +from ..base_head import BaseHead + +OptIntSeq = Optional[Sequence[int]] + + +@MODELS.register_module() +class MixHead(BaseHead): + """Top-down heatmap head introduced in `SimCC`_ by Li et al (2022). The + head is composed of a few deconvolutional layers followed by a fully- + connected layer to generate 1d representation from low-resolution feature + maps. + + Args: + in_channels (int | sequence[int]): Number of channels in the input + feature map + out_channels (int): Number of channels in the output heatmap + input_size (tuple): Input image size in shape [w, h] + in_featuremap_size (int | sequence[int]): Size of input feature map + simcc_split_ratio (float): Split ratio of pixels + deconv_type (str, optional): The type of deconv head which should + be one of the following options: + + - ``'Heatmap'``: make deconv layers in `HeatmapHead` + - ``'ViPNAS'``: make deconv layers in `ViPNASHead` + + Defaults to ``'Heatmap'`` + deconv_out_channels (sequence[int]): The output channel number of each + deconv layer. Defaults to ``(256, 256, 256)`` + deconv_kernel_sizes (sequence[int | tuple], optional): The kernel size + of each deconv layer. Each element should be either an integer for + both height and width dimensions, or a tuple of two integers for + the height and the width dimension respectively.Defaults to + ``(4, 4, 4)`` + deconv_num_groups (Sequence[int], optional): The group number of each + deconv layer. Defaults to ``(16, 16, 16)`` + conv_out_channels (sequence[int], optional): The output channel number + of each intermediate conv layer. ``None`` means no intermediate + conv layer between deconv layers and the final conv layer. + Defaults to ``None`` + conv_kernel_sizes (sequence[int | tuple], optional): The kernel size + of each intermediate conv layer. Defaults to ``None`` + input_transform (str): Transformation of input features which should + be one of the following options: + + - ``'resize_concat'``: Resize multiple feature maps specified + by ``input_index`` to the same size as the first one and + concat these feature maps + - ``'select'``: Select feature map(s) specified by + ``input_index``. Multiple selected features will be + bundled into a tuple + + Defaults to ``'select'`` + input_index (int | sequence[int]): The feature map index used in the + input transformation. See also ``input_transform``. Defaults to -1 + align_corners (bool): `align_corners` argument of + :func:`torch.nn.functional.interpolate` used in the input + transformation. Defaults to ``False`` + loss (Config): Config of the keypoint loss. Defaults to use + :class:`KLDiscretLoss` + decoder (Config, optional): The decoder config that controls decoding + keypoint coordinates from the network output. Defaults to ``None`` + init_cfg (Config, optional): Config to control the initialization. See + :attr:`default_init_cfg` for default settings + + .. _`SimCC`: https://arxiv.org/abs/2107.03332 + """ + + _version = 2 + + def __init__( + self, + in_channels: Union[int, Sequence[int]], + out_channels: int, + input_size: Tuple[int, int], + in_featuremap_size: Tuple[int, int], + simcc_split_ratio: float = 2.0, + debias: bool = False, + beta: float = 1., + deconv_type: str = 'Heatmap', + deconv_out_channels: OptIntSeq = (256, 256, 256), + deconv_kernel_sizes: OptIntSeq = (4, 4, 4), + deconv_num_groups: OptIntSeq = (16, 16, 16), + conv_out_channels: OptIntSeq = None, + conv_kernel_sizes: OptIntSeq = None, + has_final_layer: bool = True, + input_transform: str = 'select', + input_index: Union[int, Sequence[int]] = -1, + align_corners: bool = False, + loss: ConfigType = dict(type='KLDiscretLoss', use_target_weight=True), + decoder: OptConfigType = None, + init_cfg: OptConfigType = None, + ): + + if init_cfg is None: + init_cfg = self.default_init_cfg + + super().__init__(init_cfg) + + if deconv_type not in {'Heatmap', 'ViPNAS'}: + raise ValueError( + f'{self.__class__.__name__} got invalid `deconv_type` value' + f'{deconv_type}. Should be one of ' + '{"Heatmap", "ViPNAS"}') + + self.in_channels = in_channels + self.out_channels = out_channels + self.input_size = input_size + self.in_featuremap_size = in_featuremap_size + self.simcc_split_ratio = simcc_split_ratio + self.align_corners = align_corners + self.input_transform = input_transform + self.input_index = input_index + self.debias = debias + self.beta = beta + self.loss_module = MODELS.build(loss) + if decoder is not None: + self.decoder = KEYPOINT_CODECS.build(decoder) + else: + self.decoder = None + + num_deconv = len(deconv_out_channels) if deconv_out_channels else 0 + if num_deconv != 0: + self.heatmap_size = tuple( + [s * (2**num_deconv) for s in in_featuremap_size]) + + # deconv layers + 1x1 conv + self.deconv_head = self._make_deconv_head( + in_channels=in_channels, + out_channels=out_channels, + deconv_type=deconv_type, + deconv_out_channels=deconv_out_channels, + deconv_kernel_sizes=deconv_kernel_sizes, + deconv_num_groups=deconv_num_groups, + conv_out_channels=conv_out_channels, + conv_kernel_sizes=conv_kernel_sizes, + has_final_layer=has_final_layer, + input_transform=input_transform, + input_index=input_index, + align_corners=align_corners) + + if has_final_layer: + in_channels = out_channels + else: + in_channels = deconv_out_channels[-1] + + else: + in_channels = self._get_in_channels() + self.deconv_head = None + + if has_final_layer: + cfg = dict( + type='Conv2d', + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1) + self.final_layer = build_conv_layer(cfg) + else: + self.final_layer = None + + if self.input_transform == 'resize_concat': + if isinstance(in_featuremap_size, tuple): + self.heatmap_size = in_featuremap_size + elif isinstance(in_featuremap_size, list): + self.heatmap_size = in_featuremap_size[0] + elif self.input_transform == 'select': + if isinstance(in_featuremap_size, tuple): + self.heatmap_size = in_featuremap_size + elif isinstance(in_featuremap_size, list): + self.heatmap_size = in_featuremap_size[input_index] + + if isinstance(in_channels, list): + raise ValueError( + f'{self.__class__.__name__} does not support selecting ' + 'multiple input features.') + + # Define SimCC layers + flatten_dims = self.heatmap_size[0] * self.heatmap_size[1] + + W = int(self.input_size[0] * self.simcc_split_ratio) + H = int(self.input_size[1] * self.simcc_split_ratio) + + self.mlp_head_x = nn.Linear(flatten_dims, W) + self.mlp_head_y = nn.Linear(flatten_dims, H) + + self.linspace_x = torch.arange(0.0, 1.0 * W, 1).reshape(1, 1, W) / W + self.linspace_y = torch.arange(0.0, 1.0 * H, 1).reshape(1, 1, H) / H + + self.linspace_x = nn.Parameter(self.linspace_x, requires_grad=False) + self.linspace_y = nn.Parameter(self.linspace_y, requires_grad=False) + + def _make_deconv_head(self, + in_channels: Union[int, Sequence[int]], + out_channels: int, + deconv_type: str = 'Heatmap', + deconv_out_channels: OptIntSeq = (256, 256, 256), + deconv_kernel_sizes: OptIntSeq = (4, 4, 4), + deconv_num_groups: OptIntSeq = (16, 16, 16), + conv_out_channels: OptIntSeq = None, + conv_kernel_sizes: OptIntSeq = None, + has_final_layer: bool = True, + input_transform: str = 'select', + input_index: Union[int, Sequence[int]] = -1, + align_corners: bool = False) -> nn.Module: + + if deconv_type == 'Heatmap': + deconv_head = MODELS.build( + dict( + type='HeatmapHead', + in_channels=self.in_channels, + out_channels=out_channels, + deconv_out_channels=deconv_out_channels, + deconv_kernel_sizes=deconv_kernel_sizes, + conv_out_channels=conv_out_channels, + conv_kernel_sizes=conv_kernel_sizes, + has_final_layer=has_final_layer, + input_transform=input_transform, + input_index=input_index, + align_corners=align_corners)) + else: + deconv_head = MODELS.build( + dict( + type='ViPNASHead', + in_channels=in_channels, + out_channels=out_channels, + deconv_out_channels=deconv_out_channels, + deconv_num_groups=deconv_num_groups, + conv_out_channels=conv_out_channels, + conv_kernel_sizes=conv_kernel_sizes, + has_final_layer=has_final_layer, + input_transform=input_transform, + input_index=input_index, + align_corners=align_corners)) + + return deconv_head + + def forward(self, feats: Tuple[Tensor]) -> Tuple[Tensor, Tensor]: + """Forward the network. The input is multi scale feature maps and the + output is the heatmap. + + Args: + feats (Tuple[Tensor]): Multi scale feature maps. + + Returns: + pred_x (Tensor): 1d representation of x. + pred_y (Tensor): 1d representation of y. + """ + if self.deconv_head is None: + feats = self._transform_inputs(feats) + if self.final_layer is not None: + feats = self.final_layer(feats) + else: + feats = self.deconv_head(feats) + + # flatten the output heatmap + x = torch.flatten(feats, 2) + + simcc_x = self.mlp_head_x(x) + simcc_y = self.mlp_head_y(x) + + pred_x = F.softmax(simcc_x * self.beta, dim=-1) + pred_x = (pred_x * self.linspace_x).sum(dim=-1, keepdim=True) + + pred_y = F.softmax(simcc_y * self.beta, dim=-1) + pred_y = (pred_y * self.linspace_y).sum(dim=-1, keepdim=True) + + if self.debias: + C_x = simcc_x.exp().sum(dim=-1, keepdim=True) + pred_x = C_x / (C_x - 1) * (pred_x - 1 / (2 * C_x)) + + C_y = simcc_y.exp().sum(dim=-1, keepdim=True) + pred_y = C_x / (C_y - 1) * (pred_y - 1 / (2 * C_y)) + + pred = torch.cat([pred_x, pred_y], dim=-1) + return pred, simcc_x, simcc_y + + def predict( + self, + feats: Tuple[Tensor], + batch_data_samples: OptSampleList, + test_cfg: OptConfigType = {}, + ) -> InstanceList: + """Predict results from features. + + Args: + feats (Tuple[Tensor] | List[Tuple[Tensor]]): The multi-stage + features (or multiple multi-stage features in TTA) + batch_data_samples (List[:obj:`PoseDataSample`]): The batch + data samples + test_cfg (dict): The runtime config for testing process. Defaults + to {} + + Returns: + List[InstanceData]: The pose predictions, each contains + the following fields: + + - keypoints (np.ndarray): predicted keypoint coordinates in + shape (num_instances, K, D) where K is the keypoint number + and D is the keypoint dimension + - keypoint_scores (np.ndarray): predicted keypoint scores in + shape (num_instances, K) + - keypoint_x_labels (np.ndarray, optional): The predicted 1-D + intensity distribution in the x direction + - keypoint_y_labels (np.ndarray, optional): The predicted 1-D + intensity distribution in the y direction + """ + + if test_cfg.get('flip_test', False): + # TTA: flip test -> feats = [orig, flipped] + assert isinstance(feats, list) and len(feats) == 2 + flip_indices = batch_data_samples[0].metainfo['flip_indices'] + _feats, _feats_flip = feats + + _batch_pred_x, _batch_pred_y = self.forward(_feats) + + _batch_pred_x_flip, _batch_pred_y_flip = self.forward(_feats_flip) + _batch_pred_x_flip, _batch_pred_y_flip = flip_vectors( + _batch_pred_x_flip, + _batch_pred_y_flip, + flip_indices=flip_indices) + + batch_pred_x = (_batch_pred_x + _batch_pred_x_flip) * 0.5 + batch_pred_y = (_batch_pred_y + _batch_pred_y_flip) * 0.5 + else: + batch_pred_x, batch_pred_y = self.forward(feats) + + preds = self.decode((batch_pred_x, batch_pred_y)) + + if test_cfg.get('output_heatmaps', False): + for pred_instances, pred_x, pred_y in zip(preds, + to_numpy(batch_pred_x), + to_numpy(batch_pred_y)): + + pred_instances.keypoint_x_labels = pred_x[None] + pred_instances.keypoint_y_labels = pred_y[None] + + return preds + + def loss( + self, + feats: Tuple[Tensor], + batch_data_samples: OptSampleList, + train_cfg: OptConfigType = {}, + ) -> dict: + """Calculate losses from a batch of inputs and data samples.""" + + pred_x, pred_y = self.forward(feats) + + gt_x = torch.cat([ + d.gt_instance_labels.keypoint_x_labels for d in batch_data_samples + ], + dim=0) + gt_y = torch.cat([ + d.gt_instance_labels.keypoint_y_labels for d in batch_data_samples + ], + dim=0) + keypoint_weights = torch.cat( + [ + d.gt_instance_labels.keypoint_weights + for d in batch_data_samples + ], + dim=0, + ) + + pred_simcc = (pred_x, pred_y) + gt_simcc = (gt_x, gt_y) + + # calculate losses + losses = dict() + loss = self.loss_module(pred_simcc, gt_simcc, keypoint_weights) + + losses.update(loss_kpt=loss) + + # calculate accuracy + _, avg_acc, _ = simcc_pck_accuracy( + output=to_numpy(pred_simcc), + target=to_numpy(gt_simcc), + simcc_split_ratio=self.simcc_split_ratio, + mask=to_numpy(keypoint_weights) > 0, + ) + + acc_pose = torch.tensor(avg_acc, device=gt_x.device) + losses.update(acc_pose=acc_pose) + + return losses + + @property + def default_init_cfg(self): + init_cfg = [ + dict( + type='Normal', layer=['Conv2d', 'ConvTranspose2d'], std=0.001), + dict(type='Constant', layer='BatchNorm2d', val=1), + dict(type='Normal', layer=['Linear'], std=0.01, bias=0), + ] + return init_cfg diff --git a/mmpose/models/heads/heatmap_heads/simcc_head.py b/mmpose/models/heads/heatmap_heads/simcc_head.py index 697d285ea7..1a3e19f625 100644 --- a/mmpose/models/heads/heatmap_heads/simcc_head.py +++ b/mmpose/models/heads/heatmap_heads/simcc_head.py @@ -12,7 +12,6 @@ from mmpose.utils.typing import (ConfigType, InstanceList, OptConfigType, OptSampleList) from ..base_head import BaseHead -from .heatmap_head import HeatmapHead OptIntSeq = Optional[Sequence[int]] @@ -31,6 +30,13 @@ class SimCCHead(BaseHead): input_size (tuple): Input image size in shape [w, h] in_featuremap_size (int | sequence[int]): Size of input feature map simcc_split_ratio (float): Split ratio of pixels + deconv_type (str, optional): The type of deconv head which should + be one of the following options: + + - ``'heatmap'``: make deconv layers in `HeatmapHead` + - ``'vipnas'``: make deconv layers in `ViPNASHead` + + Defaults to ``'Heatmap'`` deconv_out_channels (sequence[int]): The output channel number of each deconv layer. Defaults to ``(256, 256, 256)`` deconv_kernel_sizes (sequence[int | tuple], optional): The kernel size @@ -38,6 +44,8 @@ class SimCCHead(BaseHead): both height and width dimensions, or a tuple of two integers for the height and the width dimension respectively.Defaults to ``(4, 4, 4)`` + deconv_num_groups (Sequence[int], optional): The group number of each + deconv layer. Defaults to ``(16, 16, 16)`` conv_out_channels (sequence[int], optional): The output channel number of each intermediate conv layer. ``None`` means no intermediate conv layer between deconv layers and the final conv layer. @@ -79,8 +87,10 @@ def __init__( input_size: Tuple[int, int], in_featuremap_size: Tuple[int, int], simcc_split_ratio: float = 2.0, + deconv_type: str = 'heatmap', deconv_out_channels: OptIntSeq = (256, 256, 256), deconv_kernel_sizes: OptIntSeq = (4, 4, 4), + deconv_num_groups: OptIntSeq = (16, 16, 16), conv_out_channels: OptIntSeq = None, conv_kernel_sizes: OptIntSeq = None, has_final_layer: bool = True, @@ -97,6 +107,12 @@ def __init__( super().__init__(init_cfg) + if deconv_type not in {'heatmap', 'vipnas'}: + raise ValueError( + f'{self.__class__.__name__} got invalid `deconv_type` value' + f'{deconv_type}. Should be one of ' + '{"heatmap", "vipnas"}') + self.in_channels = in_channels self.out_channels = out_channels self.input_size = input_size @@ -117,11 +133,13 @@ def __init__( [s * (2**num_deconv) for s in in_featuremap_size]) # deconv layers + 1x1 conv - self.simplebaseline_head = HeatmapHead( + self.deconv_head = self._make_deconv_head( in_channels=in_channels, out_channels=out_channels, + deconv_type=deconv_type, deconv_out_channels=deconv_out_channels, deconv_kernel_sizes=deconv_kernel_sizes, + deconv_num_groups=deconv_num_groups, conv_out_channels=conv_out_channels, conv_kernel_sizes=conv_kernel_sizes, has_final_layer=has_final_layer, @@ -136,7 +154,7 @@ def __init__( else: in_channels = self._get_in_channels() - self.simplebaseline_head = None + self.deconv_head = None if has_final_layer: cfg = dict( @@ -173,6 +191,51 @@ def __init__( self.mlp_head_x = nn.Linear(flatten_dims, W) self.mlp_head_y = nn.Linear(flatten_dims, H) + def _make_deconv_head(self, + in_channels: Union[int, Sequence[int]], + out_channels: int, + deconv_type: str = 'heatmap', + deconv_out_channels: OptIntSeq = (256, 256, 256), + deconv_kernel_sizes: OptIntSeq = (4, 4, 4), + deconv_num_groups: OptIntSeq = (16, 16, 16), + conv_out_channels: OptIntSeq = None, + conv_kernel_sizes: OptIntSeq = None, + has_final_layer: bool = True, + input_transform: str = 'select', + input_index: Union[int, Sequence[int]] = -1, + align_corners: bool = False) -> nn.Module: + + if deconv_type == 'heatmap': + deconv_head = MODELS.build( + dict( + type='HeatmapHead', + in_channels=self.in_channels, + out_channels=out_channels, + deconv_out_channels=deconv_out_channels, + deconv_kernel_sizes=deconv_kernel_sizes, + conv_out_channels=conv_out_channels, + conv_kernel_sizes=conv_kernel_sizes, + has_final_layer=has_final_layer, + input_transform=input_transform, + input_index=input_index, + align_corners=align_corners)) + else: + deconv_head = MODELS.build( + dict( + type='ViPNASHead', + in_channels=in_channels, + out_channels=out_channels, + deconv_out_channels=deconv_out_channels, + deconv_num_groups=deconv_num_groups, + conv_out_channels=conv_out_channels, + conv_kernel_sizes=conv_kernel_sizes, + has_final_layer=has_final_layer, + input_transform=input_transform, + input_index=input_index, + align_corners=align_corners)) + + return deconv_head + def forward(self, feats: Tuple[Tensor]) -> Tuple[Tensor, Tensor]: """Forward the network. The input is multi scale feature maps and the output is the heatmap. @@ -184,12 +247,12 @@ def forward(self, feats: Tuple[Tensor]) -> Tuple[Tensor, Tensor]: pred_x (Tensor): 1d representation of x. pred_y (Tensor): 1d representation of y. """ - if self.simplebaseline_head is None: + if self.deconv_head is None: feats = self._transform_inputs(feats) if self.final_layer is not None: feats = self.final_layer(feats) else: - feats = self.simplebaseline_head(feats) + feats = self.deconv_head(feats) # flatten the output heatmap x = torch.flatten(feats, 2) diff --git a/mmpose/models/heads/regression_heads/dsnt_head.py b/mmpose/models/heads/regression_heads/dsnt_head.py index e938da17fc..3db11986bc 100644 --- a/mmpose/models/heads/regression_heads/dsnt_head.py +++ b/mmpose/models/heads/regression_heads/dsnt_head.py @@ -30,7 +30,7 @@ class DSNTHead(IntegralRegressionHead): in_featuremap_size (int | sequence[int]): Size of input feature map num_joints (int): Number of joints lambda_t (int): Discard heatmap-based loss when current - epoch > lambda_t + epoch > lambda_t. Defaults to -1. debias (bool): Whether to remove the bias of Integral Pose Regression. see `Removing the Bias of Integral Pose Regression`_ by Gu et al (2021). Defaults to ``False``. diff --git a/mmpose/models/losses/classification_loss.py b/mmpose/models/losses/classification_loss.py index 3fe3a2f26e..6e4a07f014 100644 --- a/mmpose/models/losses/classification_loss.py +++ b/mmpose/models/losses/classification_loss.py @@ -8,7 +8,13 @@ @MODELS.register_module() class BCELoss(nn.Module): - """Binary Cross Entropy loss.""" + """Binary Cross Entropy loss. + + Args: + use_target_weight (bool): Option to use weighted loss. + Different joint types may have different target weights. + loss_weight (float): Weight of the loss. Default: 1.0. + """ def __init__(self, use_target_weight=False, loss_weight=1.): super().__init__() @@ -52,6 +58,7 @@ class JSDiscretLoss(nn.Module): Args: use_target_weight (bool): Option to use weighted loss. Different joint types may have different target weights. + size_average (bool): Option to average the loss by the batch_size. """ def __init__( diff --git a/mmpose/models/losses/loss_wrappers.py b/mmpose/models/losses/loss_wrappers.py index fb33b9bcab..f374c0da71 100644 --- a/mmpose/models/losses/loss_wrappers.py +++ b/mmpose/models/losses/loss_wrappers.py @@ -24,8 +24,6 @@ def __init__(self, losses: list): self.loss_modules = nn.ModuleList(loss_modules) def forward(self, input_list, target_list, keypoint_weights=None): - assert isinstance(input_list, list), '' - assert isinstance(target_list, list), '' assert len(input_list) == len(target_list), '' losses = [] diff --git a/tests/test_codecs/test_simcc_label.py b/tests/test_codecs/test_simcc_label.py index 77742fd776..98f02cc102 100644 --- a/tests/test_codecs/test_simcc_label.py +++ b/tests/test_codecs/test_simcc_label.py @@ -29,7 +29,7 @@ def setUp(self) -> None: smoothing_type='standard', sigma=5.0, simcc_split_ratio=3.0, - label_smoothing=0.1), + label_smooth_weight=0.1), ), ( 'simcc one-hot', @@ -112,26 +112,27 @@ def test_errors(self): 'got invalid `smoothing_type`'): _ = KEYPOINT_CODECS.build(cfg) - # invalid label_smoothing in smoothing + # invalid label_smooth_weight in smoothing cfg = dict( type='SimCCLabel', input_size=(192, 256), smoothing_type='standard', sigma=1.0, simcc_split_ratio=2.0, - label_smoothing=1.1) + label_smooth_weight=1.1) - with self.assertRaisesRegex(ValueError, '`label_smoothing` should be'): + with self.assertRaisesRegex(ValueError, + '`label_smooth_weight` should be'): _ = KEYPOINT_CODECS.build(cfg) - # invalid label_smoothing for gaussian + # invalid label_smooth_weight for gaussian cfg = dict( type='SimCCLabel', input_size=(192, 256), smoothing_type='gaussian', sigma=1.0, simcc_split_ratio=2.0, - label_smoothing=0.1) + label_smooth_weight=0.1) with self.assertRaisesRegex(ValueError, 'is only used for `standard` mode.'): diff --git a/tests/test_models/test_heads/test_heatmap_heads/test_simcc_head.py b/tests/test_models/test_heads/test_heatmap_heads/test_simcc_head.py index bf67e70795..20af073e3f 100644 --- a/tests/test_models/test_heads/test_heatmap_heads/test_simcc_head.py +++ b/tests/test_models/test_heads/test_heatmap_heads/test_simcc_head.py @@ -64,7 +64,7 @@ def test_init(self): smoothing_type='standard', sigma=6., simcc_split_ratio=3.0, - label_smoothing=0.1)) + label_smooth_weight=0.1)) self.assertIsNotNone(head.decoder) # w/ one-hot decoder @@ -102,7 +102,7 @@ def test_predict(self): smoothing_type='standard', sigma=2., simcc_split_ratio=2.0, - label_smoothing=0.1) + label_smooth_weight=0.1) for decoder_cfg in [decoder_cfg1, decoder_cfg2, decoder_cfg3]: # input transform: select @@ -223,7 +223,7 @@ def test_loss(self): smoothing_type='standard', sigma=2., simcc_split_ratio=2.0, - label_smoothing=0.1) + label_smooth_weight=0.1) # decoder for decoder_cfg in [decoder_cfg1, decoder_cfg2, decoder_cfg3]: