Skip to content

Commit

Permalink
add x3d-m for uavhuman
Browse files Browse the repository at this point in the history
  • Loading branch information
lianghao02 committed Apr 8, 2024
1 parent d800d97 commit b68b6d0
Show file tree
Hide file tree
Showing 3 changed files with 184 additions and 0 deletions.
1 change: 1 addition & 0 deletions mmaction/models/backbones/x3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ def __init__(self,
self.gamma_d = gamma_d

self.pretrained = pretrained

self.in_channels = in_channels
# Hard coded, can be changed by gamma_w
self.base_channels = 24
Expand Down
24 changes: 24 additions & 0 deletions work_dir/uavhuman/model/x3d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# model settings
model = dict(
type='Recognizer3D',
backbone=dict(type='X3D',
gamma_w=1,
gamma_b=2.25,
gamma_d=2.2,
),
cls_head=dict(
type='X3DHead',
in_channels=432,
num_classes=155,
spatial_type='avg',
dropout_ratio=0.5,
fc1_bias=False,
average_clips='prob'),
data_preprocessor=dict(
type='ActionDataPreprocessor',
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5],
format_shape='NCTHW'),
# model training and testing settings
train_cfg=None,
test_cfg=None)
159 changes: 159 additions & 0 deletions work_dir/uavhuman/x3d_m_16x5x1_facebook-kinetics500-rgb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# -*- coding: utf-8 -*-
# @Time : 2024/04/07 20:54
# @Author : Liang Hao
# @FileName : x3d_m_16x5x1_facebook-kinetics500-rgb.py
# @Email : lianghao@whu.edu.cn

_base_ = [
'./model/x3d.py',
'../default_runtime.py'
]

model = dict(
backbone=dict(
pretrained="model_zoo/x3d_m_16x5x1_facebook-kinetics400-rgb_20201027-3f42382a.pth"
))


dataset_type = 'RawframeDataset'
data_root = '/data/dataset/uavhuman/rawframes'
data_root_val = '/data/dataset/uavhuman/rawframes'
split = 2 # official train/test splits. valid numbers: 1, 2, 3
ann_file_train = f'data/uavhuman/uavhuman_train_split_{split}_rawframes.txt'
ann_file_val = f'data/uavhuman/uavhuman_val_split_{split}_rawframes.txt'
ann_file_test = f'data/uavhuman/uavhuman_val_split_{split}_rawframes.txt'

file_client_args_train = dict(
io_backend='disk',
nori_file = 'data/uavhuman/uavhuman_train_split_1_nid.json',
dtype = 'uint8',
retry = 60
)

file_client_args_eval = dict(
io_backend='disk',
nori_file = 'data/uavhuman/uavhuman_val_split_1_nid.json',
dtype = 'uint8',
retry = 60
)

train_pipeline = [
# dict(type='DecordInit', **file_client_args),
dict(type='SampleFrames', clip_len=8, frame_interval=1, num_clips=1),
# dict(type='DecordDecode'),
dict(type='RawFrameDecodeNoir2', **file_client_args_train),
dict(type='Resize', scale=(620, 620)),
dict(type='RandomResizedCrop'),
dict(type='Resize', scale=(540, 540), keep_ratio=False),
# dict(type='ThreeCrop', crop_size=540),
dict(type='Flip', flip_ratio=0.5),
dict(type='FormatShape', input_format='NCTHW'),
dict(type='PackActionInputs')
]

val_pipeline = [
# dict(type='DecordInit', **file_client_args),
dict(
type='SampleFrames',
clip_len=8,
frame_interval=1,
num_clips=1,
test_mode=True),
# dict(type='DecordDecode'),
dict(type='RawFrameDecodeNoir2', **file_client_args_eval),
dict(type='Resize', scale=(540, 540), keep_ratio=False),
# dict(type='ThreeCrop', crop_size=540),
dict(type='FormatShape', input_format='NCTHW'),
dict(type='PackActionInputs')
]

test_pipeline = [
# dict(type='DecordInit', **file_client_args),
dict(
type='SampleFrames',
clip_len=8,
frame_interval=1,
num_clips=1,
test_mode=True),
# dict(type='DecordDecode'),
dict(type='RawFrameDecodeNoir2', **file_client_args_eval),
dict(type='Resize', scale=(540, 540), keep_ratio=False),
# dict(type='ThreeCrop', crop_size=540),
dict(type='FormatShape', input_format='NCTHW'),
dict(type='PackActionInputs')
]


train_dataloader = dict(
batch_size=3,
num_workers=8,
# 数据加载完并不会关闭worker进程,而是保持现有的worker进程
# 继续进行下一个Epoch的数据加载,加快训练速度,要求num_workers ≥ 1
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset = dict(
type=dataset_type,
ann_file=ann_file_train,
data_prefix=dict(img=data_root),
pipeline=train_pipeline
)
)

val_dataloader = dict(
batch_size=1,
num_workers=8,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
ann_file=ann_file_val,
data_prefix=dict(img=data_root_val),
pipeline=val_pipeline,
test_mode=True
)
)

test_dataloader = dict(
batch_size=1,
num_workers=8,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
ann_file=ann_file_test,
data_prefix=dict(img=data_root_val),
pipeline=test_pipeline,
test_mode=True
)
)


val_evaluator = dict(type='AccMetric')
test_evaluator = val_evaluator

train_cfg = dict(
type='EpochBasedTrainLoop', max_epochs=400, val_begin=1, val_interval=5)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')

optim_wrapper = dict(
optimizer=dict(
type='SGD', lr=2e-5, momentum=0.9, weight_decay=5e-5),
)

param_scheduler = [
dict(
type='CosineAnnealingLR',
T_max=400,
eta_min=1e-7,
by_epoch=True)
]

default_hooks = dict(
checkpoint=dict(interval=5, max_keep_ckpts=5), logger=dict(interval=100))

# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (8 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=24)

0 comments on commit b68b6d0

Please sign in to comment.