Skip to content

Commit

Permalink
Merge branch 'main' into 9-preprocessing-cutup-script
Browse files Browse the repository at this point in the history
  • Loading branch information
Grutschus committed Nov 22, 2023
2 parents 574e687 + 3bfe75c commit 6636bdb
Show file tree
Hide file tree
Showing 26 changed files with 807 additions and 2 deletions.
1 change: 1 addition & 0 deletions .dvc/config
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
[core]
remote = storage
autostage = true
hardlink_lock = true
['remote "storage"']
url = s3://human-fall-detection/
endpointurl = https://s3.tebi.io
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,6 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

# Container files
**.sif
1 change: 1 addition & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
[submodule "mmaction2"]
path = mmaction2
url = git@github.com:Grutschus/mmaction2.git
branch = custom_features
16 changes: 16 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Python: Debug Tests",
"type": "python",
"request": "launch",
"program": "${file}",
"purpose": [
"debug-test"
],
"console": "integratedTerminal",
"justMyCode": false
}
]
}
11 changes: 10 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
{
"ruff.format.args": [
"--config=pyproject.toml"
]
],
"mypy-type-checker.args": [
"--config-file=pyproject.toml"
],
"python.testing.pytestArgs": [
"tests"
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"jupyter.debugJustMyCode": false
}
22 changes: 22 additions & 0 deletions configs/datasets/ds_uniformsample_existencelabel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from datasets.transforms.label_strategy import HQFD_LABEL_DESCRIPTION

custom_imports = dict(imports="datasets", allow_failed_imports=False)
type = "HighQualityFallDataset"
sampling_strategy = dict(type="UniformSampling", clip_len=10)
label_strategy = dict(type="ExistenceLabel", label_description=HQFD_LABEL_DESCRIPTION)
ann_file = "tests/test_data/test_annotation.csv"
pipeline = [
dict(type="DecordInit"),
dict(type="ClipVideo"),
dict(type="SampleFrames", clip_len=16, frame_interval=4, num_clips=1),
dict(type="DecordDecode"),
dict(type="Resize", scale=(-1, 224)),
dict(type="RandomResizedCrop"),
dict(type="Resize", scale=(224, 224), keep_ratio=False),
dict(type="Flip", flip_ratio=0.5),
dict(type="FormatShape", input_format="NCTHW"),
dict(type="PackActionInputs"),
] # type: ignore
multiclass = True
num_classes = 3
test_mode = True
101 changes: 101 additions & 0 deletions configs/models/videomaev2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
_base_ = ["../../mmaction2/configs/_base_/default_runtime.py"]

custom_imports = dict(imports="datasets", allow_failed_imports=False)
work_dir = "work_dirs/videomaev2"
launcher = "none"


# model settings
model = dict(
type="Recognizer3D",
backbone=dict(
type="VisionTransformer",
img_size=224,
patch_size=16,
embed_dims=384,
depth=12,
num_heads=6,
mlp_ratio=4,
qkv_bias=True,
num_frames=16,
norm_cfg=dict(type="LN", eps=1e-6),
),
cls_head=dict(
type="TimeSformerHead",
num_classes=3,
in_channels=384,
average_clips="prob",
multi_class=True,
),
# TODO: update this to fit our dataset
data_preprocessor=dict(
type="ActionDataPreprocessor",
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
format_shape="NCTHW",
),
)

# dataset settings
dataset_type = "HighQualityFallDataset"
ann_file_train = "tests/test_data/test_annotation.csv"

train_pipeline = [
dict(type="DecordInit"),
dict(type="ClipVideo"),
dict(type="SampleFrames", clip_len=16, frame_interval=4, num_clips=1),
dict(type="DecordDecode"),
dict(type="Resize", scale=(-1, 224)),
dict(type="RandomResizedCrop"),
dict(type="Resize", scale=(224, 224), keep_ratio=False),
dict(type="Flip", flip_ratio=0.5),
dict(type="FormatShape", input_format="NCTHW"),
dict(type="PackActionInputs"),
]

train_dataloader = dict(
batch_size=1,
num_workers=8,
persistent_workers=False,
sampler=dict(type="DefaultSampler", shuffle=False),
dataset=dict(
type=dataset_type,
sampling_strategy=dict(type="UniformSampling", clip_len=10),
label_strategy=dict(
type="ExistenceLabel",
label_description=dict(
names=["fall", "lying", "other"],
start_timestamp_names=["fall_start", "lying_start"],
end_timestamp_names=["fall_end", "lying_end"],
visible_names=["fall_visible", "lying_visible"],
other_class=2,
),
),
ann_file=ann_file_train,
pipeline=train_pipeline,
multi_class=True,
num_classes=3,
),
)

train_cfg = dict(type="EpochBasedTrainLoop", max_epochs=5, val_interval=0)

param_scheduler = dict(
type="MultiStepLR", # Decays the learning rate once the number of epoch reaches one of the milestones
begin=0, # Step at which to start updating the learning rate
end=100, # Step at which to stop updating the learning rate
by_epoch=True, # Whether the scheduled learning rate is updated by epochs
milestones=[40, 80], # Steps to decay the learning rate
gamma=0.1,
)

optim_wrapper = dict( # Config of optimizer wrapper
type="OptimWrapper", # Name of optimizer wrapper, switch to AmpOptimWrapper to enable mixed precision training
optimizer=dict( # Config of optimizer. Support all kinds of optimizers in PyTorch. Refer to https://pytorch.org/docs/stable/optim.html#algorithms
type="SGD", # Name of optimizer
lr=0.01, # Learning rate
momentum=0.9, # Momentum factor
weight_decay=0.0001,
), # Weight decay
clip_grad=dict(max_norm=40, norm_type=2),
) # Config of gradient clip
15 changes: 15 additions & 0 deletions containers/c3se_job_container.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Bootstrap: localimage
From: /apps/containers/PyTorch/PyTorch-2.1.0-NGC-23.09.sif


%files
containers/requirements.txt
/mimer/NOBACKUP/groups/naiss2023-22-1160/human-fall-detection/mmaction2

%post
export DEBIAN_FRONTEND="noninteractive" && apt-get update -y && apt-get install -y python3-opencv
pip install -r containers/requirements.txt
mim install mmengine mmcv mmdet mmpose
cd /mimer/NOBACKUP/groups/naiss2023-22-1160/human-fall-detection/mmaction2 && pip install -v -e .
# For some reason there is a wrong version of opencv installed
pip uninstall -y opencv
5 changes: 5 additions & 0 deletions containers/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# PyTorch is preinstalled in the image
openpyxl>=3.0
openmim>=0.3
ffmpeg-python>=0.2
dvc[s3]
5 changes: 5 additions & 0 deletions datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .high_quality_fall_dataset import HighQualityFallDataset
from .transforms import * # noqa: F401, F403


__all__ = ["HighQualityFallDataset"]
118 changes: 118 additions & 0 deletions datasets/high_quality_fall_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
from typing import Callable, List, Optional, Union

import pandas as pd
from mmaction.datasets import BaseActionDataset
from mmaction.registry import DATASETS
from mmaction.utils import ConfigType
from mmengine.fileio import exists

from datasets.transforms.label_strategy import LabelStrategy
from datasets.transforms.sampling_strategy import SamplingStrategy
from registry import LABEL_STRATEGIES, SAMPLING_STRATEGIES


@DATASETS.register_module()
class HighQualityFallDataset(BaseActionDataset):
"""HighQualityFallDataset dataset for action recognition.
The dataset loads raw videos and applies specified transforms to return
a dict containing the frame tensors and other information.
It will sample clips from longer videos according to a `SamplingStrategy`
and add an `interval` key to the resulting dict.
This key may be processed by the `ClipVideo` transform.
The ann_file is a CSV file with the following columns:
- video_path: relative path to the video from the data prefix
- fall_start: timestamp in seconds of the start of the fall
- fall_end: timestamp in seconds of the end of the fall
- lying_start: timestamp in seconds of the start of the lying
- lying_end: timestamp in seconds of the end of the lying
- length: length of the video in seconds
- fall_visible: boolean indicating whether the fall is visible
on the video
- lying_visible: boolean indicating whether the lying is visible
on the video
Example of a annotation file:
| video_path | fall_start | fall_end | lying_start | lying_end | length |fall_visible | lying_visible |
|--------------------------------------------------|------------|----------|-------------|-----------|--------|-------------|---------------|
| data/Fall_Simulation_Data/videos/Fall30_Cam3.avi | 24.0 | 27.0 | 27.0 | 88.0 | 240.0 |True | True |
| data/Fall_Simulation_Data/videos/ADL16_Cam1.avi | | | | | 325 | | |
Args:
ann_file (str): Path to the annotation file.
sampling_strategy (SamplingStrategy): Strategy used to sample clips.
label_strategy (LabelStrategy): Strategy used to label clips.
pipeline (List[Union[dict, ConfigDict, Callable]]): A sequence of
data transforms. Should include a `ClipVideo` transform.
data_prefix (dict or ConfigDict): Path to a directory where videos
are held. Defaults to ``dict(video='')``.
multi_class (bool): Determines whether the dataset is a multi-class
dataset. Defaults to False.
num_classes (int, optional): Number of classes of the dataset, used in
multi-class datasets. Defaults to None.
start_index (int): Specify a start index for frames in consideration of
different filename format. However, when taking videos as input,
it should be set to 0, since frames loaded from videos count
from 0. Defaults to 0.
modality (str): Modality of data. Support ``'RGB'``, ``'Flow'``.
Defaults to ``'RGB'``.
test_mode (bool): Store True when building test or validation dataset.
Defaults to False."""

def __init__(
self,
ann_file: str,
sampling_strategy: SamplingStrategy | dict,
label_strategy: LabelStrategy | dict,
pipeline: List[Union[dict, Callable]],
data_prefix: ConfigType = dict(video=""),
multi_class: bool = False,
num_classes: Optional[int] = None,
start_index: int = 0,
modality: str = "RGB",
test_mode: bool = False,
**kwargs,
) -> None:
if isinstance(sampling_strategy, dict):
built_sampling_strategy = SAMPLING_STRATEGIES.build(sampling_strategy) # type: SamplingStrategy
else:
built_sampling_strategy = sampling_strategy
self.sampling_strategy = built_sampling_strategy
if isinstance(label_strategy, dict):
built_label_strategy = LABEL_STRATEGIES.build(label_strategy) # type: LabelStrategy
else:
built_label_strategy = label_strategy
self.label_strategy = built_label_strategy
super().__init__(
ann_file,
pipeline=pipeline,
data_prefix=data_prefix,
multi_class=multi_class,
num_classes=num_classes,
start_index=start_index,
modality=modality,
test_mode=test_mode,
)

def load_data_list(self) -> List[dict]:
exists(self.ann_file)
annotations = pd.read_csv(self.ann_file)
data_list = []
for _, annotation in annotations.iterrows():
sampled_clips = self.sampling_strategy.sample(annotation)
labels = [
self.label_strategy.label(annotation, clip) for clip in sampled_clips
]

for clip, label in zip(sampled_clips, labels):
data_list.append(
{
"filename": annotation["video_path"],
"label": label,
"interval": clip,
}
)
return data_list
3 changes: 3 additions & 0 deletions datasets/transforms/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .clip_video import ClipVideo

__all__ = ["ClipVideo"]
42 changes: 42 additions & 0 deletions datasets/transforms/clip_video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from typing import Dict
from mmaction.registry import TRANSFORMS # type: ignore
from mmcv.transforms import BaseTransform # type: ignore


@TRANSFORMS.register_module()
class ClipVideo(BaseTransform):
"""Clip a video to a given interval.
Does not affect the video_reader. Just sets the `total_frames` and
`start_index` keys.
If a `start_index` key is already present, it will be offset.
Required Keys:
- `interval`: a tuple of two floats,
the start and end of the interval in seconds.
- `avg_fps`
Modified Keys:
- `start_index`
- `total_frames`
"""

def transform(self, results: Dict) -> Dict:
"""Perform the `ClipVideo` transformation.
Args:
results (dict): The result dict.
Returns:
dict: The result dict.
"""
interval = results["interval"]
fps = results["avg_fps"]
offset = results["start_index"] if "start_index" in results else 0
start_frame = int(interval[0] * fps) + offset
end_frame = int(interval[1] * fps) + offset
results["start_index"] = start_frame
results["total_frames"] = end_frame - start_frame
return results
Loading

0 comments on commit 6636bdb

Please sign in to comment.