Skip to content

Commit

Permalink
Merge pull request #32 from Grutschus/27-set-up-a-pretrained-backbone…
Browse files Browse the repository at this point in the history
…-vit-and-a-training-pipeline

27 set up a pretrained backbone vit and a training pipeline
  • Loading branch information
Grutschus committed Nov 28, 2023
2 parents dbc4b74 + 2782e92 commit 1bb8ba3
Show file tree
Hide file tree
Showing 17 changed files with 1,019 additions and 49 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -160,4 +160,5 @@ cython_debug/
#.idea/

# Container files
**.sif
**.sif
/weights
16 changes: 16 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,22 @@
],
"console": "integratedTerminal",
"justMyCode": false
},
{
"name": "Debug Training",
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/mmaction2/tools/train.py",
"args": [
"configs/models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_base.py"
],
"console": "integratedTerminal",
"justMyCode": false,
"cwd": "${workspaceFolder}",
"env": {
"PYTHONPATH": "${workspaceFolder}",
"CUDA_VISIBLE_DEVICES": "2"
}
}
]
}
3 changes: 2 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"jupyter.debugJustMyCode": false,
"jupyter.notebookFileRoot": "${workspaceFolder}"
"jupyter.notebookFileRoot": "${workspaceFolder}",
"remote.SSH.remoteServerListenOnSocket": true
}
2 changes: 1 addition & 1 deletion configs/datasets/ds_uniformsample_existencelabel.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
)
ann_file = "tests/test_data/test_annotation.csv"
pipeline = [] # type: ignore
multiclass = True
multi_class = True
num_classes = 3
test_mode = True
113 changes: 113 additions & 0 deletions configs/datasets/high-quality-fall_runner-base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
"""Base `Runner` config for high-quality-fall dataset."""

dataset_type = "HighQualityFallDataset"

label_strategy = dict(
type="PriorityLabel",
label_description=dict(
names=["fall", "lying", "other"],
start_timestamp_names=["fall_start", "lying_start"],
end_timestamp_names=["fall_end", "lying_end"],
visible_names=["fall_visible", "lying_visible"],
other_class=2,
),
)

sampling_strategy = dict(type="UniformSampling", clip_len=10)


# TRAIN
ann_file_train = "data/Fall_Simulation_Data/annotations_train.csv"

# TODO: Add shape comments
# TODO: Think about augmentation steps
train_pipeline = [
dict(type="DecordInit"),
dict(type="ClipVideo"),
dict(type="SampleFrames", clip_len=16, frame_interval=4, num_clips=1),
dict(type="DecordDecode"),
dict(type="Resize", scale=(-1, 224)),
dict(type="RandomResizedCrop"),
dict(type="Resize", scale=(224, 224), keep_ratio=False),
dict(type="Flip", flip_ratio=0.5),
dict(type="FormatShape", input_format="NCTHW"),
dict(type="PackActionInputs"),
]

train_dataloader = dict(
batch_size=3, # From VideoMAEv2 repo
num_workers=8,
persistent_workers=True,
sampler=dict(type="DefaultSampler", shuffle=True),
dataset=dict(
type=dataset_type,
sampling_strategy=sampling_strategy,
label_strategy=label_strategy,
ann_file=ann_file_train,
pipeline=train_pipeline,
num_classes=3,
indices=100,
),
)

# VALIDATION
ann_file_val = "data/Fall_Simulation_Data/annotations_val.csv"

val_pipeline = [
dict(type="DecordInit"),
dict(type="ClipVideo"),
dict(
type="SampleFrames", clip_len=16, frame_interval=4, num_clips=1, test_mode=True
),
dict(type="DecordDecode"),
dict(type="Resize", scale=(-1, 224)),
dict(type="CenterCrop", crop_size=224), # From VideoMAEv2 repo
dict(type="FormatShape", input_format="NCTHW"),
dict(type="PackActionInputs"),
]

val_dataloader = train_dataloader
# val_dataloader = dict(
# batch_size=3, # From VideoMAEv2 repo
# num_workers=8,
# persistent_workers=True,
# sampler=dict(type="DefaultSampler", shuffle=False),
# dataset=dict(
# type=dataset_type,
# sampling_strategy=sampling_strategy,
# label_strategy=label_strategy,
# ann_file=ann_file_val,
# pipeline=val_pipeline,
# num_classes=3,
# ),
# )

# TEST
ann_file_test = "data/Fall_Simulation_Data/annotations_test.csv"

test_pipeline = [
dict(type="DecordInit"),
dict(
type="SampleFrames", clip_len=16, frame_interval=4, num_clips=5, test_mode=True
), # From VideoMAEv2 repo
dict(type="DecordDecode"),
dict(type="Resize", scale=(-1, 224)),
dict(type="ThreeCrop", crop_size=224), # From VideoMAEv2 repo
dict(type="FormatShape", input_format="NCTHW"),
dict(type="PackActionInputs"),
]

test_dataloader = dict(
batch_size=3, # From VideoMAEv2 repo
num_workers=8,
persistent_workers=True,
sampler=dict(type="DefaultSampler", shuffle=False),
dataset=dict(
type=dataset_type,
sampling_strategy=sampling_strategy,
label_strategy=label_strategy,
ann_file=ann_file_test,
pipeline=test_pipeline,
num_classes=3,
),
)
48 changes: 48 additions & 0 deletions configs/default_runtime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Default runtime for our experiments."""

# Trying to skip this part, since we have custom registries not in this scope
default_scope = "mmaction"
work_dir = "experiments"
custom_imports = dict(imports=["datasets"], allow_failed_imports=False)
launcher = "none"

default_hooks = dict(
runtime_info=dict(type="RuntimeInfoHook"),
timer=dict(type="IterTimerHook"),
logger=dict(type="LoggerHook"),
param_scheduler=dict(type="ParamSchedulerHook"),
checkpoint=dict(
type="CheckpointHook",
interval=1,
by_epoch=True,
max_keep_ckpts=3,
save_best="auto", # For CE, this is top-1-acc
),
sampler_seed=dict(type="DistSamplerSeedHook"),
sync_buffers=dict(type="SyncBuffersHook"),
)

# Hook disabled since it cannot handle NCTHW tensors
# TODO fix this
# custom_hooks = [dict(type="VisualizationHook", enable=True)]

env_cfg = dict(
cudnn_benchmark=False,
mp_cfg=dict(mp_start_method="fork", opencv_num_threads=0),
dist_cfg=dict(backend="nccl"),
)

log_processor = dict(
type="LogProcessor",
window_size=10,
by_epoch=True,
)

vis_backends = [dict(type="TensorboardVisBackend", save_dir="experiments/tensorboard")]
visualizer = dict(type="ActionVisualizer", vis_backends=vis_backends)

log_level = "INFO"

# Overwrite this to continue training
load_from = None
resume = False
5 changes: 2 additions & 3 deletions configs/models/videomaev2.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,10 @@
average_clips="prob",
multi_class=True,
),
# TODO: update this to fit our dataset
data_preprocessor=dict(
type="ActionDataPreprocessor",
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
mean=[102.17311096191406, 98.78225708007812, 92.68714141845703],
std=[58.04566192626953, 57.004024505615234, 57.3704948425293],
format_shape="NCTHW",
),
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
_base_ = ["../default_runtime.py", "../datasets/high-quality-fall_runner-base.py"]

# Finetuning parameters are from VideoMAEv2 repo
# https://github.com/OpenGVLab/VideoMAEv2/blob/master/docs/FINETUNE.md


# ViT-S-P16
model = dict(
type="Recognizer3D",
backbone=dict(
type="VisionTransformer",
img_size=224,
patch_size=16,
embed_dims=384,
depth=12,
num_heads=6,
mlp_ratio=4,
qkv_bias=True,
num_frames=16,
norm_cfg=dict(type="LN", eps=1e-6),
drop_path_rate=0.3, # From VideoMAEv2 repo
),
cls_head=dict(
type="TimeSformerHead",
num_classes=3,
in_channels=384,
average_clips="prob",
),
data_preprocessor=dict(
type="ActionDataPreprocessor",
mean=[102.17311096191406, 98.78225708007812, 92.68714141845703],
std=[58.04566192626953, 57.004024505615234, 57.3704948425293],
format_shape="NCTHW",
),
)

# Loading weights
load_from = "weights/vit-small-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_20230510-25c748fd.pth"

# TRAINING CONFIG
train_cfg = dict(type="EpochBasedTrainLoop", max_epochs=35, val_interval=1)

# TODO: Think about fine-tuning param scheduler
param_scheduler = [
dict(
type="LinearLR", start_factor=0.001, by_epoch=True, begin=0, end=5
), # From VideoMAEv2 repo
]

optim_wrapper = dict(
type="OptimWrapper",
optimizer=dict(
type="AdamW", # From VideoMAEv2 repo
lr=1e-3, # From VideoMAEv2 repo
weight_decay=0.1, # From VideoMAEv2 repo
betas=(0.9, 0.999), # From VideoMAEv2 repo
),
clip_grad=dict(max_norm=5, norm_type=2), # From VideoMAEv2 repo
)

# VALIDATION CONFIG
val_evaluator = dict(
type="AccMetric", metric_options=dict(top_k_accuracy=dict(topk=(1,)))
)
val_cfg = dict(type="ValLoop")


# TEST CONFIG
test_evaluator = dict(
type="AccMetric", metric_options=dict(top_k_accuracy=dict(topk=(1,)))
)
test_cfg = dict(type="TestLoop")
4 changes: 3 additions & 1 deletion containers/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@
openpyxl>=3.0
openmim>=0.3
ffmpeg-python>=0.2
dvc[s3]
dvc[s3]
dvclive>=3.3
tensorboard>=2.15
17 changes: 11 additions & 6 deletions datasets/high_quality_fall_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,20 @@ def __init__(
test_mode: bool = False,
**kwargs,
) -> None:
# Bug in MMENGINE: kwarg `custom_imports` is not removed from kwargs
# this causes an error when building the dataset
# TODO: Create an issue on MMENGINE, can be fixed here:
# https://github.com/open-mmlab/mmengine/blob/85c0976bc2434157f786d44cdd8f0fb2955414f0/mmengine/config/config.py#L462C34-L462C34
kwargs.pop("custom_imports", None)

if isinstance(sampling_strategy, dict):
built_sampling_strategy = SAMPLING_STRATEGIES.build(sampling_strategy) # type: SamplingStrategy
self.sampling_strategy = SAMPLING_STRATEGIES.build(sampling_strategy) # type: SamplingStrategy
else:
built_sampling_strategy = sampling_strategy
self.sampling_strategy = built_sampling_strategy
self.sampling_strategy = sampling_strategy
if isinstance(label_strategy, dict):
built_label_strategy = LABEL_STRATEGIES.build(label_strategy) # type: LabelStrategy
self.label_strategy = LABEL_STRATEGIES.build(label_strategy) # type: LabelStrategy
else:
built_label_strategy = label_strategy
self.label_strategy = built_label_strategy
self.label_strategy = label_strategy
super().__init__(
ann_file,
pipeline=pipeline,
Expand All @@ -95,6 +99,7 @@ def __init__(
start_index=start_index,
modality=modality,
test_mode=test_mode,
**kwargs,
)

def load_data_list(self) -> List[dict]:
Expand Down
3 changes: 2 additions & 1 deletion datasets/transforms/clip_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,11 @@ def transform(self, results: Dict) -> Dict:
dict: The result dict.
"""
interval = results["interval"]
total_frames = results["total_frames"]
fps = results["avg_fps"]
offset = results["start_index"] if "start_index" in results else 0
start_frame = int(interval[0] * fps) + offset
end_frame = int(interval[1] * fps) + offset
end_frame = min(int(interval[1] * fps) + offset, total_frames)
results["start_index"] = start_frame
results["total_frames"] = end_frame - start_frame
return results
Loading

0 comments on commit 1bb8ba3

Please sign in to comment.