diff --git a/configs/top_down/resnet/README.md b/configs/top_down/resnet/README.md
index 003ba6c2ef..9e4b3f27a3 100644
--- a/configs/top_down/resnet/README.md
+++ b/configs/top_down/resnet/README.md
@@ -25,6 +25,22 @@
| [pose_resnet_152](/configs/top_down/resnet/coco/res152_coco_384x288.py) | 384x288 | 0.750 | 0.908 | 0.821 | 0.800 | 0.942 | [ckpt](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/top_down/resnet/res152_coco_384x288-3860d4c9_20200709.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/top_down/resnet/res152_coco_384x288_20200709.log.json) |
+
+### Results on OCHuman test dataset with ground-truth bounding boxes
+
+Following the common setting, the models are trained on COCO train dataset, and evaluate on OCHuman dataset.
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_resnet_50](/configs/top_down/resnet/coco/res50_coco_256x192.py) | 256x192 | 0.546 | 0.726 | 0.593 | 0.592 | 0.755 | [ckpt](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/top_down/resnet/res50_coco_256x192-ec54d7f3_20200709.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/top_down/resnet/res50_coco_256x192_20200709.log.json) |
+| [pose_resnet_50](/configs/top_down/resnet/coco/res50_coco_384x288.py) | 384x288 | 0.539 | 0.723 | 0.574 | 0.588 | 0.756 | [ckpt](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/top_down/resnet/res50_coco_384x288-e6f795e9_20200709.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/top_down/resnet/res50_coco_384x288_20200709.log.json) |
+| [pose_resnet_101](/configs/top_down/resnet/coco/res101_coco_256x192.py) | 256x192 | 0.559 | 0.724 | 0.606 | 0.605 | 0.751 | [ckpt](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/top_down/resnet/res101_coco_256x192-6e6babf0_20200708.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/top_down/resnet/res101_coco_256x192_20200708.log.json) |
+| [pose_resnet_101](/configs/top_down/resnet/coco/res101_coco_384x288.py) | 384x288 | 0.571 | 0.715 | 0.615 | 0.615 | 0.748 | [ckpt](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/top_down/resnet/res101_coco_384x288-8c71bdc9_20200709.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/top_down/resnet/res101_coco_384x288_20200709.log.json) |
+| [pose_resnet_152](/configs/top_down/resnet/coco/res152_coco_256x192.py) | 256x192 | 0.570 | 0.725 | 0.617 | 0.616 | 0.754 | [ckpt](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/top_down/resnet/res152_coco_256x192-f6e307c2_20200709.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/top_down/resnet/res152_coco_256x192_20200709.log.json) |
+| [pose_resnet_152](/configs/top_down/resnet/coco/res152_coco_384x288.py) | 384x288 | 0.582 | 0.723 | 0.627 | 0.627 | 0.752 | [ckpt](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/top_down/resnet/res152_coco_384x288-3860d4c9_20200709.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/top_down/resnet/res152_coco_384x288_20200709.log.json) |
+
+
+
### Results on MPII val set.
| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
diff --git a/configs/top_down/resnet/ochuman/res101_ochuman_256x192.py b/configs/top_down/resnet/ochuman/res101_ochuman_256x192.py
new file mode 100755
index 0000000000..8862b809b8
--- /dev/null
+++ b/configs/top_down/resnet/ochuman/res101_ochuman_256x192.py
@@ -0,0 +1,146 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=5, metric='mAP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='models/pytorch/imagenet/resnet101-5d3b4d8f.pth',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopDownSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ ),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process=True,
+ shift_heatmap=True,
+ unbiased_decoding=False,
+ modulate_kernel=11),
+ loss_pose=dict(type='JointsMSELoss', use_target_weight=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ bbox_thr=1.0,
+ use_gt_bbox=True,
+ image_thr=0.0,
+ bbox_file='data/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ochuman'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file='data/coco/annotations/person_keypoints_train2017.json',
+ img_prefix='data/coco//train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline),
+ val=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline),
+ test=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_test_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline),
+)
diff --git a/configs/top_down/resnet/ochuman/res101_ochuman_384x288.py b/configs/top_down/resnet/ochuman/res101_ochuman_384x288.py
new file mode 100755
index 0000000000..6addb9d1ab
--- /dev/null
+++ b/configs/top_down/resnet/ochuman/res101_ochuman_384x288.py
@@ -0,0 +1,146 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=5, metric='mAP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='models/pytorch/imagenet/resnet101-5d3b4d8f.pth',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopDownSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ ),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process=True,
+ shift_heatmap=True,
+ unbiased_decoding=False,
+ modulate_kernel=11),
+ loss_pose=dict(type='JointsMSELoss', use_target_weight=True))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ bbox_thr=1.0,
+ use_gt_bbox=True,
+ image_thr=0.0,
+ bbox_file='data/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ochuman'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file='data/coco/annotations/person_keypoints_train2017.json',
+ img_prefix='data/coco//train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline),
+ val=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline),
+ test=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_test_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline),
+)
diff --git a/configs/top_down/resnet/ochuman/res152_ochuman_256x192.py b/configs/top_down/resnet/ochuman/res152_ochuman_256x192.py
new file mode 100755
index 0000000000..094651f47a
--- /dev/null
+++ b/configs/top_down/resnet/ochuman/res152_ochuman_256x192.py
@@ -0,0 +1,146 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=5, metric='mAP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='models/pytorch/imagenet/resnet152-b121ed2d.pth',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopDownSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ ),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process=True,
+ shift_heatmap=True,
+ unbiased_decoding=False,
+ modulate_kernel=11),
+ loss_pose=dict(type='JointsMSELoss', use_target_weight=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ bbox_thr=1.0,
+ use_gt_bbox=True,
+ image_thr=0.0,
+ bbox_file='data/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ochuman'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file='data/coco/annotations/person_keypoints_train2017.json',
+ img_prefix='data/coco//train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline),
+ val=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline),
+ test=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_test_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline),
+)
diff --git a/configs/top_down/resnet/ochuman/res152_ochuman_384x288.py b/configs/top_down/resnet/ochuman/res152_ochuman_384x288.py
new file mode 100755
index 0000000000..a63c33d182
--- /dev/null
+++ b/configs/top_down/resnet/ochuman/res152_ochuman_384x288.py
@@ -0,0 +1,146 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=5, metric='mAP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='models/pytorch/imagenet/resnet152-b121ed2d.pth',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopDownSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ ),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process=True,
+ shift_heatmap=True,
+ unbiased_decoding=False,
+ modulate_kernel=11),
+ loss_pose=dict(type='JointsMSELoss', use_target_weight=True))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ bbox_thr=1.0,
+ use_gt_bbox=True,
+ image_thr=0.0,
+ bbox_file='data/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ochuman'
+data = dict(
+ samples_per_gpu=48,
+ workers_per_gpu=2,
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file='data/coco/annotations/person_keypoints_train2017.json',
+ img_prefix='data/coco//train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline),
+ val=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline),
+ test=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_test_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline),
+)
diff --git a/configs/top_down/resnet/ochuman/res50_ochuman_256x192.py b/configs/top_down/resnet/ochuman/res50_ochuman_256x192.py
new file mode 100644
index 0000000000..5d14360ef1
--- /dev/null
+++ b/configs/top_down/resnet/ochuman/res50_ochuman_256x192.py
@@ -0,0 +1,146 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=1, metric='mAP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='models/pytorch/imagenet/resnet50-19c8e357.pth',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopDownSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ ),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process=True,
+ shift_heatmap=True,
+ unbiased_decoding=False,
+ modulate_kernel=11),
+ loss_pose=dict(type='JointsMSELoss', use_target_weight=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ bbox_thr=1.0,
+ use_gt_bbox=True,
+ image_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ochuman'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file='data/coco/annotations/person_keypoints_train2017.json',
+ img_prefix='data/coco//train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline),
+ val=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline),
+ test=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_test_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline),
+)
diff --git a/configs/top_down/resnet/ochuman/res50_ochuman_384x288.py b/configs/top_down/resnet/ochuman/res50_ochuman_384x288.py
new file mode 100755
index 0000000000..ddd6cf69dc
--- /dev/null
+++ b/configs/top_down/resnet/ochuman/res50_ochuman_384x288.py
@@ -0,0 +1,146 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=10)
+evaluation = dict(interval=5, metric='mAP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='models/pytorch/imagenet/resnet50-19c8e357.pth',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopDownSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ ),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process=True,
+ shift_heatmap=True,
+ unbiased_decoding=False,
+ modulate_kernel=11),
+ loss_pose=dict(type='JointsMSELoss', use_target_weight=True))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ bbox_thr=1.0,
+ use_gt_bbox=True,
+ image_thr=0.0,
+ bbox_file='data/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ochuman'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file='data/coco/annotations/person_keypoints_train2017.json',
+ img_prefix='data/coco//train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline),
+ val=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline),
+ test=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_test_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline),
+)
diff --git a/docs/getting_started.md b/docs/getting_started.md
index 871d4e8bd0..7485b0e6f5 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -64,6 +64,29 @@ mmpose
```
+**For OCHuman data**, please download the images and annotations from [OCHuman](https://github.com/liruilong940607/OCHumanApi),
+Move them under $MMPOSE/data, and make them look like this:
+
+```
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── ochuman
+ │-- annotations
+ │ │-- ochuman_coco_format_val_range_0.00_1.00.json
+ │ |-- ochuman_coco_format_test_range_0.00_1.00.json
+ |-- images
+ │-- 000001.jpg
+ │-- 000002.jpg
+ │-- 000003.jpg
+ │-- ...
+
+```
+
**For MPII-TRB data**, please download from [MPII Human Pose Dataset](http://human-pose.mpi-inf.mpg.de/).
Please download the annotation files from [mpii_trb_annotations](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/datasets/mpii_trb_annotations.tar).
Extract them under {MMPose}/data, and make them look like this:
diff --git a/mmpose/datasets/__init__.py b/mmpose/datasets/__init__.py
index bb4f926cb9..aba648c7d1 100644
--- a/mmpose/datasets/__init__.py
+++ b/mmpose/datasets/__init__.py
@@ -1,13 +1,14 @@
from .builder import build_dataloader, build_dataset
from .datasets import (BottomUpCocoDataset, TopDownCocoDataset,
TopDownMpiiDataset, TopDownMpiiTrbDataset,
- TopDownOneHand10KDataset)
+ TopDownOCHumanDataset, TopDownOneHand10KDataset)
from .pipelines import Compose
from .registry import DATASETS, PIPELINES
from .samplers import DistributedSampler
__all__ = [
'TopDownCocoDataset', 'BottomUpCocoDataset', 'TopDownMpiiTrbDataset',
- 'TopDownOneHand10KDataset', 'TopDownMpiiDataset', 'build_dataloader',
- 'build_dataset', 'Compose', 'DistributedSampler', 'DATASETS', 'PIPELINES'
+ 'TopDownOneHand10KDataset', 'TopDownMpiiDataset', 'TopDownOCHumanDataset',
+ 'build_dataloader', 'build_dataset', 'Compose', 'DistributedSampler',
+ 'DATASETS', 'PIPELINES'
]
diff --git a/mmpose/datasets/datasets/__init__.py b/mmpose/datasets/datasets/__init__.py
index 645f8005a5..50dc58578d 100644
--- a/mmpose/datasets/datasets/__init__.py
+++ b/mmpose/datasets/datasets/__init__.py
@@ -1,8 +1,10 @@
from .bottom_up import BottomUpCocoDataset
from .top_down import (TopDownCocoDataset, TopDownMpiiDataset,
- TopDownMpiiTrbDataset, TopDownOneHand10KDataset)
+ TopDownMpiiTrbDataset, TopDownOCHumanDataset,
+ TopDownOneHand10KDataset)
__all__ = [
'TopDownCocoDataset', 'BottomUpCocoDataset', 'TopDownMpiiDataset',
- 'TopDownMpiiTrbDataset', 'TopDownOneHand10KDataset'
+ 'TopDownMpiiTrbDataset', 'TopDownOneHand10KDataset',
+ 'TopDownOCHumanDataset'
]
diff --git a/mmpose/datasets/datasets/top_down/__init__.py b/mmpose/datasets/datasets/top_down/__init__.py
index fd9fc77ee8..72d6c6b6d1 100644
--- a/mmpose/datasets/datasets/top_down/__init__.py
+++ b/mmpose/datasets/datasets/top_down/__init__.py
@@ -1,9 +1,10 @@
from .topdown_coco_dataset import TopDownCocoDataset
from .topdown_mpii_dataset import TopDownMpiiDataset
from .topdown_mpii_trb_dataset import TopDownMpiiTrbDataset
+from .topdown_ochuman_dataset import TopDownOCHumanDataset
from .topdown_onehand10k_dataset import TopDownOneHand10KDataset
__all__ = [
'TopDownCocoDataset', 'TopDownMpiiTrbDataset', 'TopDownMpiiDataset',
- 'TopDownOneHand10KDataset'
+ 'TopDownOneHand10KDataset', 'TopDownOCHumanDataset'
]
diff --git a/mmpose/datasets/datasets/top_down/topdown_coco_dataset.py b/mmpose/datasets/datasets/top_down/topdown_coco_dataset.py
index 8d5873f9d3..eddbdedec5 100644
--- a/mmpose/datasets/datasets/top_down/topdown_coco_dataset.py
+++ b/mmpose/datasets/datasets/top_down/topdown_coco_dataset.py
@@ -16,6 +16,10 @@
class TopDownCocoDataset(TopDownBaseDataset):
"""CocoDataset dataset for top-down pose estimation.
+ `Microsoft COCO: Common Objects in Context' ECCV'2014
+ More details can be found in the `paper
+ `_ .
+
The dataset loads raw features and apply specified transforms
to return a dict containing the image tensors and other information.
diff --git a/mmpose/datasets/datasets/top_down/topdown_mpii_dataset.py b/mmpose/datasets/datasets/top_down/topdown_mpii_dataset.py
index ec7381e052..dc6ea1fa85 100644
--- a/mmpose/datasets/datasets/top_down/topdown_mpii_dataset.py
+++ b/mmpose/datasets/datasets/top_down/topdown_mpii_dataset.py
@@ -13,6 +13,10 @@
class TopDownMpiiDataset(TopDownBaseDataset):
"""MPII Dataset for top-down pose estimation.
+ `2D Human Pose Estimation: New Benchmark and State of the Art Analysis'
+ CVPR'2014. More details can be found in the `paper
+ `_ .
+
The dataset loads raw features and apply specified transforms
to return a dict containing the image tensors and other information.
diff --git a/mmpose/datasets/datasets/top_down/topdown_mpii_trb_dataset.py b/mmpose/datasets/datasets/top_down/topdown_mpii_trb_dataset.py
index 53286ef19d..f0c4260873 100644
--- a/mmpose/datasets/datasets/top_down/topdown_mpii_trb_dataset.py
+++ b/mmpose/datasets/datasets/top_down/topdown_mpii_trb_dataset.py
@@ -14,6 +14,10 @@
class TopDownMpiiTrbDataset(TopDownBaseDataset):
"""MPII-TRB Dataset dataset for top-down pose estimation.
+ `TRB: A Novel Triplet Representation for Understanding 2D Human Body'
+ ICCV'2019 More details can be found in the `paper
+ `_ .
+
The dataset loads raw features and apply specified transforms
to return a dict containing the image tensors and other information.
diff --git a/mmpose/datasets/datasets/top_down/topdown_ochuman_dataset.py b/mmpose/datasets/datasets/top_down/topdown_ochuman_dataset.py
new file mode 100644
index 0000000000..5f3b55eeb3
--- /dev/null
+++ b/mmpose/datasets/datasets/top_down/topdown_ochuman_dataset.py
@@ -0,0 +1,270 @@
+import os
+from collections import OrderedDict, defaultdict
+
+import numpy as np
+from pycocotools.coco import COCO
+
+from ....core.post_processing import oks_nms, soft_oks_nms
+from ...registry import DATASETS
+from .topdown_coco_dataset import TopDownCocoDataset
+
+
+def _get_mapping_id_name(imgs):
+ """
+ Args:
+ imgs (dict): dict of image info.
+ Returns:
+ id2name (dict): mapping image id to name.
+ name2id (dict): mapping image name to id.
+ """
+ id2name = {}
+ name2id = {}
+ for image_id, image in imgs.items():
+ file_name = image['file_name']
+ id2name[image_id] = file_name
+ name2id[file_name] = image_id
+
+ return id2name, name2id
+
+
+@DATASETS.register_module()
+class TopDownOCHumanDataset(TopDownCocoDataset):
+ """OChuman dataset for top-down pose estimation.
+
+ `Pose2Seg: Detection Free Human Instance Segmentation' CVPR'2019
+ More details can be found in the `paper
+ `_ .
+
+ "Occluded Human (OCHuman)" dataset contains 8110 heavily occluded
+ human instances within 4731 images. OCHuman dataset is designed for
+ validation and testing. To evaluate on OCHuman, the model should be
+ trained on COCO training set, and then test the robustness of the
+ model to occlusion using OCHuman.
+
+ OCHuman keypoint indexes (same as COCO)::
+
+ 0: 'nose',
+ 1: 'left_eye',
+ 2: 'right_eye',
+ 3: 'left_ear',
+ 4: 'right_ear',
+ 5: 'left_shoulder',
+ 6: 'right_shoulder',
+ 7: 'left_elbow',
+ 8: 'right_elbow',
+ 9: 'left_wrist',
+ 10: 'right_wrist',
+ 11: 'left_hip',
+ 12: 'right_hip',
+ 13: 'left_knee',
+ 14: 'right_knee',
+ 15: 'left_ankle',
+ 16: 'right_ankle'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ test_mode=False):
+ super(TopDownCocoDataset, self).__init__(
+ ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.image_thr = data_cfg['image_thr']
+
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+ self.bbox_thr = data_cfg['bbox_thr']
+
+ self.ann_info['flip_pairs'] = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10],
+ [11, 12], [13, 14], [15, 16]]
+
+ self.ann_info['upper_body_ids'] = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+ self.ann_info['lower_body_ids'] = (11, 12, 13, 14, 15, 16)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.ann_info['joint_weights'] = np.array(
+ [
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2,
+ 1.2, 1.5, 1.5
+ ],
+ dtype=np.float32).reshape((self.ann_info['num_joints'], 1))
+
+ self.coco = COCO(ann_file)
+
+ cats = [
+ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds())
+ ]
+ self.classes = ['__background__'] + cats
+ self.num_classes = len(self.classes)
+ self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
+ self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds()))
+ self._coco_ind_to_class_ind = dict([(self._class_to_coco_ind[cls],
+ self._class_to_ind[cls])
+ for cls in self.classes[1:]])
+ self.image_set_index = self.coco.getImgIds()
+ self.num_images = len(self.image_set_index)
+ self.id2name, self.name2id = _get_mapping_id_name(self.coco.imgs)
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ assert self.use_gt_bbox
+ gt_db = self._load_coco_keypoint_annotations()
+ return gt_db
+
+ def _load_coco_keypoint_annotation_kernel(self, index):
+ """load annotation from COCOAPI.
+
+ Note:
+ bbox:[x1, y1, w, h]
+ Args:
+ index: coco image id
+ Returns:
+ db entry
+ """
+ im_ann = self.coco.loadImgs(index)[0]
+ width = im_ann['width']
+ height = im_ann['height']
+ num_joints = self.ann_info['num_joints']
+
+ ann_ids = self.coco.getAnnIds(imgIds=index, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ # sanitize bboxes
+ valid_objs = []
+ for obj in objs:
+ x, y, w, h = obj['bbox']
+ x1 = max(0, x)
+ y1 = max(0, y)
+ x2 = min(width - 1, x1 + max(0, w - 1))
+ y2 = min(height - 1, y1 + max(0, h - 1))
+ if obj['area'] > 0 and x2 >= x1 and y2 >= y1:
+ obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+ valid_objs.append(obj)
+ objs = valid_objs
+
+ rec = []
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ center, scale = self._xywh2cs(*obj['clean_bbox'][:4])
+
+ image_file = os.path.join(self.img_prefix, self.id2name[index])
+ rec.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': 'ochuman',
+ 'bbox_score': 1
+ })
+
+ return rec
+
+ def evaluate(self, outputs, res_folder, metric='mAP', **kwargs):
+ """Evaluate coco keypoint results. The pose prediction results will be
+ saved in `${res_folder}/result_keypoints.json`.
+
+ Note:
+ num_keypoints: K
+
+ Args:
+ outputs (list(preds, boxes, image_path))
+ :preds (np.ndarray[1,K,3]): The first two dimensions are
+ coordinates, score is the third dimension of the array.
+ :boxes (np.ndarray[1,6]): [center[0], center[1], scale[0]
+ , scale[1],area, score]
+ :image_path (list[str]): For example, [ '/', 'v','a', 'l',
+ '2', '0', '1', '7', '/', '0', '0', '0', '0', '0',
+ '0', '3', '9', '7', '1', '3', '3', '.', 'j', 'p', 'g']
+ res_folder (str): Path of directory to save the results.
+ metric (str): Metric to be performed. Defaults: 'mAP'.
+
+ Returns:
+ name_value (dict): Evaluation results for evaluation metric.
+ """
+ assert metric == 'mAP'
+
+ res_file = os.path.join(res_folder, 'result_keypoints.json')
+
+ kpts = defaultdict(list)
+ for preds, boxes, image_path in outputs:
+ str_image_path = ''.join(image_path)
+ image_id = self.name2id[os.path.basename(str_image_path)]
+
+ kpts[image_id].append({
+ 'keypoints': preds[0],
+ 'center': boxes[0][0:2],
+ 'scale': boxes[0][2:4],
+ 'area': boxes[0][4],
+ 'score': boxes[0][5],
+ 'image_id': image_id,
+ })
+
+ # rescoring and oks nms
+ num_joints = self.ann_info['num_joints']
+ vis_thr = self.vis_thr
+ oks_thr = self.oks_thr
+ oks_nmsed_kpts = []
+ for img in kpts.keys():
+ img_kpts = kpts[img]
+ for n_p in img_kpts:
+ box_score = n_p['score']
+ kpt_score = 0
+ valid_num = 0
+ for n_jt in range(0, num_joints):
+ t_s = n_p['keypoints'][n_jt][2]
+ if t_s > vis_thr:
+ kpt_score = kpt_score + t_s
+ valid_num = valid_num + 1
+ if valid_num != 0:
+ kpt_score = kpt_score / valid_num
+ # rescoring
+ n_p['score'] = kpt_score * box_score
+
+ if self.soft_nms:
+ keep = soft_oks_nms(
+ [img_kpts[i] for i in range(len(img_kpts))], oks_thr)
+ else:
+ keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))],
+ oks_thr)
+
+ if len(keep) == 0:
+ oks_nmsed_kpts.append(img_kpts)
+ else:
+ oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep])
+
+ self._write_coco_keypoint_results(oks_nmsed_kpts, res_file)
+
+ info_str = self._do_python_keypoint_eval(res_file)
+ name_value = OrderedDict(info_str)
+
+ return name_value
diff --git a/mmpose/datasets/datasets/top_down/topdown_onehand10k_dataset.py b/mmpose/datasets/datasets/top_down/topdown_onehand10k_dataset.py
index f7549283d2..7e83bc8113 100644
--- a/mmpose/datasets/datasets/top_down/topdown_onehand10k_dataset.py
+++ b/mmpose/datasets/datasets/top_down/topdown_onehand10k_dataset.py
@@ -14,6 +14,11 @@
class TopDownOneHand10KDataset(TopDownBaseDataset):
"""OneHand10K dataset for top-down hand pose estimation.
+ `Mask-pose Cascaded CNN for 2D Hand Pose Estimation from
+ Single Color Images' TCSVT'2019
+ More details can be found in the `paper
+ `_ .
+
The dataset loads raw features and apply specified transforms
to return a dict containing the image tensors and other information.