Skip to content

Commit

Permalink
[Feature] Support ViTPose (#1876)
Browse files Browse the repository at this point in the history
  • Loading branch information
LareinaM authored Mar 14, 2023
1 parent a73a995 commit 936fed3
Show file tree
Hide file tree
Showing 14 changed files with 1,403 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
_base_ = ['../../../_base_/default_runtime.py']

# runtime
train_cfg = dict(max_epochs=210, val_interval=10)

# optimizer
custom_imports = dict(
imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'],
allow_failed_imports=False)

optim_wrapper = dict(
optimizer=dict(
type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1),
paramwise_cfg=dict(
num_layers=12,
layer_decay_rate=0.75,
custom_keys={
'bias': dict(decay_multi=0.0),
'pos_embed': dict(decay_mult=0.0),
'relative_position_bias_table': dict(decay_mult=0.0),
'norm': dict(decay_mult=0.0),
},
),
constructor='LayerDecayOptimWrapperConstructor',
clip_grad=dict(max_norm=1., norm_type=2),
)

# learning policy
param_scheduler = [
dict(
type='LinearLR', begin=0, end=500, start_factor=0.001,
by_epoch=False), # warm-up
dict(
type='MultiStepLR',
begin=0,
end=210,
milestones=[170, 200],
gamma=0.1,
by_epoch=True)
]

# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)

# hooks
default_hooks = dict(
checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))

# codec settings
codec = dict(
type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)

# model settings
model = dict(
type='TopdownPoseEstimator',
data_preprocessor=dict(
type='PoseDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True),
backbone=dict(
type='mmcls.VisionTransformer',
arch='base',
img_size=(256, 192),
patch_size=16,
qkv_bias=True,
drop_path_rate=0.3,
with_cls_token=False,
output_cls_token=False,
patch_cfg=dict(padding=2),
init_cfg=dict(
type='Pretrained',
checkpoint='pretrained/mae_pretrain_vit_base.pth'),
),
head=dict(
type='HeatmapHead',
in_channels=768,
out_channels=17,
deconv_out_channels=[],
deconv_kernel_sizes=[],
loss=dict(type='KeypointMSELoss', use_target_weight=True),
decoder=codec,
extra=dict(upsample=4, final_conv_kernel=3),
),
test_cfg=dict(
flip_test=True,
flip_mode='heatmap',
shift_heatmap=False,
))

# base dataset settings
data_root = 'data/coco/'
dataset_type = 'CocoDataset'
data_mode = 'topdown'

# pipelines
train_pipeline = [
dict(type='LoadImage', file_client_args={{_base_.file_client_args}}),
dict(type='GetBBoxCenterScale'),
dict(type='RandomFlip', direction='horizontal'),
dict(type='RandomHalfBody'),
dict(type='RandomBBoxTransform'),
dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
dict(type='GenerateTarget', encoder=codec),
dict(type='PackPoseInputs')
]
val_pipeline = [
dict(type='LoadImage', file_client_args={{_base_.file_client_args}}),
dict(type='GetBBoxCenterScale'),
dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
dict(type='PackPoseInputs')
]

# data loaders
train_dataloader = dict(
batch_size=64,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='annotations/person_keypoints_train2017.json',
data_prefix=dict(img='train2017/'),
pipeline=train_pipeline,
))
val_dataloader = dict(
batch_size=32,
num_workers=4,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='annotations/person_keypoints_val2017.json',
bbox_file='data/coco/person_detection_results/'
'COCO_val2017_detections_AP_H_56_person.json',
data_prefix=dict(img='val2017/'),
test_mode=True,
pipeline=val_pipeline,
))
test_dataloader = val_dataloader

# evaluators
val_evaluator = dict(
type='CocoMetric',
ann_file=data_root + 'annotations/person_keypoints_val2017.json')
test_evaluator = val_evaluator
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
_base_ = ['../../../_base_/default_runtime.py']

# runtime
train_cfg = dict(max_epochs=210, val_interval=10)

# optimizer
custom_imports = dict(
imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'],
allow_failed_imports=False)

optim_wrapper = dict(
optimizer=dict(
type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1),
paramwise_cfg=dict(
num_layers=12,
layer_decay_rate=0.75,
custom_keys={
'bias': dict(decay_multi=0.0),
'pos_embed': dict(decay_mult=0.0),
'relative_position_bias_table': dict(decay_mult=0.0),
'norm': dict(decay_mult=0.0),
},
),
constructor='LayerDecayOptimWrapperConstructor',
clip_grad=dict(max_norm=1., norm_type=2),
)

# learning policy
param_scheduler = [
dict(
type='LinearLR', begin=0, end=500, start_factor=0.001,
by_epoch=False), # warm-up
dict(
type='MultiStepLR',
begin=0,
end=210,
milestones=[170, 200],
gamma=0.1,
by_epoch=True)
]

# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)

# hooks
default_hooks = dict(
checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))

# codec settings
codec = dict(
type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)

# model settings
model = dict(
type='TopdownPoseEstimator',
data_preprocessor=dict(
type='PoseDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True),
backbone=dict(
type='mmcls.VisionTransformer',
arch='base',
img_size=(256, 192),
patch_size=16,
qkv_bias=True,
drop_path_rate=0.3,
with_cls_token=False,
output_cls_token=False,
patch_cfg=dict(padding=2),
init_cfg=dict(
type='Pretrained',
checkpoint='pretrained/mae_pretrain_vit_base.pth'),
),
head=dict(
type='HeatmapHead',
in_channels=768,
out_channels=17,
deconv_out_channels=(256, 256),
deconv_kernel_sizes=(4, 4),
loss=dict(type='KeypointMSELoss', use_target_weight=True),
decoder=codec),
test_cfg=dict(
flip_test=True,
flip_mode='heatmap',
shift_heatmap=False,
))

# base dataset settings
data_root = 'data/coco/'
dataset_type = 'CocoDataset'
data_mode = 'topdown'

# pipelines
train_pipeline = [
dict(type='LoadImage', file_client_args={{_base_.file_client_args}}),
dict(type='GetBBoxCenterScale'),
dict(type='RandomFlip', direction='horizontal'),
dict(type='RandomHalfBody'),
dict(type='RandomBBoxTransform'),
dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
dict(type='GenerateTarget', encoder=codec),
dict(type='PackPoseInputs')
]
val_pipeline = [
dict(type='LoadImage', file_client_args={{_base_.file_client_args}}),
dict(type='GetBBoxCenterScale'),
dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
dict(type='PackPoseInputs')
]

# data loaders
train_dataloader = dict(
batch_size=64,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='annotations/person_keypoints_train2017.json',
data_prefix=dict(img='train2017/'),
pipeline=train_pipeline,
))
val_dataloader = dict(
batch_size=32,
num_workers=4,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='annotations/person_keypoints_val2017.json',
bbox_file='data/coco/person_detection_results/'
'COCO_val2017_detections_AP_H_56_person.json',
data_prefix=dict(img='val2017/'),
test_mode=True,
pipeline=val_pipeline,
))
test_dataloader = val_dataloader

# evaluators
val_evaluator = dict(
type='CocoMetric',
ann_file=data_root + 'annotations/person_keypoints_val2017.json')
test_evaluator = val_evaluator
Loading

0 comments on commit 936fed3

Please sign in to comment.