Merge branch 'main' into 9-preprocessing-cutup-script

Grutschus · Nov 22, 2023 · 6636bdb · 6636bdb
2 parents 574e687 + 3bfe75c
commit 6636bdb
Show file tree

Hide file tree

Showing 26 changed files with 807 additions and 2 deletions.
diff --git a/.dvc/config b/.dvc/config
@@ -1,6 +1,7 @@
 [core]
  remote = storage
  autostage = true
+ hardlink_lock = true
 ['remote "storage"']
  url = s3://human-fall-detection/
  endpointurl = https://s3.tebi.io
diff --git a/.gitignore b/.gitignore
@@ -158,3 +158,6 @@ cython_debug/
 # and can be added to the global gitignore or merged into this file. For a more nuclear
 # option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+# Container files
+**.sif
diff --git a/.gitmodules b/.gitmodules
@@ -1,3 +1,4 @@
 [submodule "mmaction2"]
  path = mmaction2
  url = git@github.com:Grutschus/mmaction2.git
+ branch = custom_features
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,16 @@
+{
+ "version": "0.2.0",
+ "configurations": [
+ {
+ "name": "Python: Debug Tests",
+ "type": "python",
+ "request": "launch",
+ "program": "${file}",
+ "purpose": [
+ "debug-test"
+ ],
+ "console": "integratedTerminal",
+ "justMyCode": false
+ }
+ ]
+}
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -1,5 +1,14 @@
 {
  "ruff.format.args": [
  "--config=pyproject.toml"
- ]
+ ],
+ "mypy-type-checker.args": [
+ "--config-file=pyproject.toml"
+ ],
+ "python.testing.pytestArgs": [
+ "tests"
+ ],
+ "python.testing.unittestEnabled": false,
+ "python.testing.pytestEnabled": true,
+ "jupyter.debugJustMyCode": false
 }
diff --git a/configs/datasets/ds_uniformsample_existencelabel.py b/configs/datasets/ds_uniformsample_existencelabel.py
@@ -0,0 +1,22 @@
+from datasets.transforms.label_strategy import HQFD_LABEL_DESCRIPTION
+
+custom_imports = dict(imports="datasets", allow_failed_imports=False)
+type = "HighQualityFallDataset"
+sampling_strategy = dict(type="UniformSampling", clip_len=10)
+label_strategy = dict(type="ExistenceLabel", label_description=HQFD_LABEL_DESCRIPTION)
+ann_file = "tests/test_data/test_annotation.csv"
+pipeline = [
+ dict(type="DecordInit"),
+ dict(type="ClipVideo"),
+ dict(type="SampleFrames", clip_len=16, frame_interval=4, num_clips=1),
+ dict(type="DecordDecode"),
+ dict(type="Resize", scale=(-1, 224)),
+ dict(type="RandomResizedCrop"),
+ dict(type="Resize", scale=(224, 224), keep_ratio=False),
+ dict(type="Flip", flip_ratio=0.5),
+ dict(type="FormatShape", input_format="NCTHW"),
+ dict(type="PackActionInputs"),
+] # type: ignore
+multiclass = True
+num_classes = 3
+test_mode = True
diff --git a/configs/models/videomaev2.py b/configs/models/videomaev2.py
@@ -0,0 +1,101 @@
+_base_ = ["../../mmaction2/configs/_base_/default_runtime.py"]
+
+custom_imports = dict(imports="datasets", allow_failed_imports=False)
+work_dir = "work_dirs/videomaev2"
+launcher = "none"
+
+
+# model settings
+model = dict(
+ type="Recognizer3D",
+ backbone=dict(
+ type="VisionTransformer",
+ img_size=224,
+ patch_size=16,
+ embed_dims=384,
+ depth=12,
+ num_heads=6,
+ mlp_ratio=4,
+ qkv_bias=True,
+ num_frames=16,
+ norm_cfg=dict(type="LN", eps=1e-6),
+ ),
+ cls_head=dict(
+ type="TimeSformerHead",
+ num_classes=3,
+ in_channels=384,
+ average_clips="prob",
+ multi_class=True,
+ ),
+ # TODO: update this to fit our dataset
+ data_preprocessor=dict(
+ type="ActionDataPreprocessor",
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ format_shape="NCTHW",
+ ),
+)
+
+# dataset settings
+dataset_type = "HighQualityFallDataset"
+ann_file_train = "tests/test_data/test_annotation.csv"
+
+train_pipeline = [
+ dict(type="DecordInit"),
+ dict(type="ClipVideo"),
+ dict(type="SampleFrames", clip_len=16, frame_interval=4, num_clips=1),
+ dict(type="DecordDecode"),
+ dict(type="Resize", scale=(-1, 224)),
+ dict(type="RandomResizedCrop"),
+ dict(type="Resize", scale=(224, 224), keep_ratio=False),
+ dict(type="Flip", flip_ratio=0.5),
+ dict(type="FormatShape", input_format="NCTHW"),
+ dict(type="PackActionInputs"),
+]
+
+train_dataloader = dict(
+ batch_size=1,
+ num_workers=8,
+ persistent_workers=False,
+ sampler=dict(type="DefaultSampler", shuffle=False),
+ dataset=dict(
+ type=dataset_type,
+ sampling_strategy=dict(type="UniformSampling", clip_len=10),
+ label_strategy=dict(
+ type="ExistenceLabel",
+ label_description=dict(
+ names=["fall", "lying", "other"],
+ start_timestamp_names=["fall_start", "lying_start"],
+ end_timestamp_names=["fall_end", "lying_end"],
+ visible_names=["fall_visible", "lying_visible"],
+ other_class=2,
+ ),
+ ),
+ ann_file=ann_file_train,
+ pipeline=train_pipeline,
+ multi_class=True,
+ num_classes=3,
+ ),
+)
+
+train_cfg = dict(type="EpochBasedTrainLoop", max_epochs=5, val_interval=0)
+
+param_scheduler = dict(
+ type="MultiStepLR", # Decays the learning rate once the number of epoch reaches one of the milestones
+ begin=0, # Step at which to start updating the learning rate
+ end=100, # Step at which to stop updating the learning rate
+ by_epoch=True, # Whether the scheduled learning rate is updated by epochs
+ milestones=[40, 80], # Steps to decay the learning rate
+ gamma=0.1,
+)
+
+optim_wrapper = dict( # Config of optimizer wrapper
+ type="OptimWrapper", # Name of optimizer wrapper, switch to AmpOptimWrapper to enable mixed precision training
+ optimizer=dict( # Config of optimizer. Support all kinds of optimizers in PyTorch. Refer to https://pytorch.org/docs/stable/optim.html#algorithms
+ type="SGD", # Name of optimizer
+ lr=0.01, # Learning rate
+ momentum=0.9, # Momentum factor
+ weight_decay=0.0001,
+ ), # Weight decay
+ clip_grad=dict(max_norm=40, norm_type=2),
+) # Config of gradient clip
diff --git a/containers/c3se_job_container.def b/containers/c3se_job_container.def
@@ -0,0 +1,15 @@
+Bootstrap: localimage
+From: /apps/containers/PyTorch/PyTorch-2.1.0-NGC-23.09.sif
+
+
+%files
+ containers/requirements.txt
+ /mimer/NOBACKUP/groups/naiss2023-22-1160/human-fall-detection/mmaction2
+
+%post
+ export DEBIAN_FRONTEND="noninteractive" && apt-get update -y && apt-get install -y python3-opencv
+ pip install -r containers/requirements.txt
+ mim install mmengine mmcv mmdet mmpose
+ cd /mimer/NOBACKUP/groups/naiss2023-22-1160/human-fall-detection/mmaction2 && pip install -v -e .
+ # For some reason there is a wrong version of opencv installed
+ pip uninstall -y opencv
diff --git a/containers/requirements.txt b/containers/requirements.txt
@@ -0,0 +1,5 @@
+# PyTorch is preinstalled in the image
+openpyxl>=3.0
+openmim>=0.3
+ffmpeg-python>=0.2
+dvc[s3]
diff --git a/datasets/__init__.py b/datasets/__init__.py
@@ -0,0 +1,5 @@
+from .high_quality_fall_dataset import HighQualityFallDataset
+from .transforms import * # noqa: F401, F403
+
+
+__all__ = ["HighQualityFallDataset"]
diff --git a/datasets/high_quality_fall_dataset.py b/datasets/high_quality_fall_dataset.py
@@ -0,0 +1,118 @@
+from typing import Callable, List, Optional, Union
+
+import pandas as pd
+from mmaction.datasets import BaseActionDataset
+from mmaction.registry import DATASETS
+from mmaction.utils import ConfigType
+from mmengine.fileio import exists
+
+from datasets.transforms.label_strategy import LabelStrategy
+from datasets.transforms.sampling_strategy import SamplingStrategy
+from registry import LABEL_STRATEGIES, SAMPLING_STRATEGIES
+
+
+@DATASETS.register_module()
+class HighQualityFallDataset(BaseActionDataset):
+ """HighQualityFallDataset dataset for action recognition.
+
+ The dataset loads raw videos and applies specified transforms to return
+ a dict containing the frame tensors and other information.
+
+ It will sample clips from longer videos according to a `SamplingStrategy`
+ and add an `interval` key to the resulting dict.
+ This key may be processed by the `ClipVideo` transform.
+
+ The ann_file is a CSV file with the following columns:
+ - video_path: relative path to the video from the data prefix
+ - fall_start: timestamp in seconds of the start of the fall
+ - fall_end: timestamp in seconds of the end of the fall
+ - lying_start: timestamp in seconds of the start of the lying
+ - lying_end: timestamp in seconds of the end of the lying
+ - length: length of the video in seconds
+ - fall_visible: boolean indicating whether the fall is visible
+ on the video
+ - lying_visible: boolean indicating whether the lying is visible
+ on the video
+
+ Example of a annotation file:
+
+ | video_path | fall_start | fall_end | lying_start | lying_end | length |fall_visible | lying_visible |
+ |--------------------------------------------------|------------|----------|-------------|-----------|--------|-------------|---------------|
+ | data/Fall_Simulation_Data/videos/Fall30_Cam3.avi | 24.0 | 27.0 | 27.0 | 88.0 | 240.0 |True | True |
+ | data/Fall_Simulation_Data/videos/ADL16_Cam1.avi | | | | | 325 | | |
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ sampling_strategy (SamplingStrategy): Strategy used to sample clips.
+ label_strategy (LabelStrategy): Strategy used to label clips.
+ pipeline (List[Union[dict, ConfigDict, Callable]]): A sequence of
+ data transforms. Should include a `ClipVideo` transform.
+ data_prefix (dict or ConfigDict): Path to a directory where videos
+ are held. Defaults to ``dict(video='')``.
+ multi_class (bool): Determines whether the dataset is a multi-class
+ dataset. Defaults to False.
+ num_classes (int, optional): Number of classes of the dataset, used in
+ multi-class datasets. Defaults to None.
+ start_index (int): Specify a start index for frames in consideration of
+ different filename format. However, when taking videos as input,
+ it should be set to 0, since frames loaded from videos count
+ from 0. Defaults to 0.
+ modality (str): Modality of data. Support ``'RGB'``, ``'Flow'``.
+ Defaults to ``'RGB'``.
+ test_mode (bool): Store True when building test or validation dataset.
+ Defaults to False."""
+
+ def __init__(
+ self,
+ ann_file: str,
+ sampling_strategy: SamplingStrategy | dict,
+ label_strategy: LabelStrategy | dict,
+ pipeline: List[Union[dict, Callable]],
+ data_prefix: ConfigType = dict(video=""),
+ multi_class: bool = False,
+ num_classes: Optional[int] = None,
+ start_index: int = 0,
+ modality: str = "RGB",
+ test_mode: bool = False,
+ **kwargs,
+ ) -> None:
+ if isinstance(sampling_strategy, dict):
+ built_sampling_strategy = SAMPLING_STRATEGIES.build(sampling_strategy) # type: SamplingStrategy
+ else:
+ built_sampling_strategy = sampling_strategy
+ self.sampling_strategy = built_sampling_strategy
+ if isinstance(label_strategy, dict):
+ built_label_strategy = LABEL_STRATEGIES.build(label_strategy) # type: LabelStrategy
+ else:
+ built_label_strategy = label_strategy
+ self.label_strategy = built_label_strategy
+ super().__init__(
+ ann_file,
+ pipeline=pipeline,
+ data_prefix=data_prefix,
+ multi_class=multi_class,
+ num_classes=num_classes,
+ start_index=start_index,
+ modality=modality,
+ test_mode=test_mode,
+ )
+
+ def load_data_list(self) -> List[dict]:
+ exists(self.ann_file)
+ annotations = pd.read_csv(self.ann_file)
+ data_list = []
+ for _, annotation in annotations.iterrows():
+ sampled_clips = self.sampling_strategy.sample(annotation)
+ labels = [
+ self.label_strategy.label(annotation, clip) for clip in sampled_clips
+ ]
+
+ for clip, label in zip(sampled_clips, labels):
+ data_list.append(
+ {
+ "filename": annotation["video_path"],
+ "label": label,
+ "interval": clip,
+ }
+ )
+ return data_list
diff --git a/datasets/transforms/__init__.py b/datasets/transforms/__init__.py
@@ -0,0 +1,3 @@
+from .clip_video import ClipVideo
+
+__all__ = ["ClipVideo"]
diff --git a/datasets/transforms/clip_video.py b/datasets/transforms/clip_video.py
@@ -0,0 +1,42 @@
+from typing import Dict
+from mmaction.registry import TRANSFORMS # type: ignore
+from mmcv.transforms import BaseTransform # type: ignore
+
+
+@TRANSFORMS.register_module()
+class ClipVideo(BaseTransform):
+ """Clip a video to a given interval.
+ Does not affect the video_reader. Just sets the `total_frames` and
+ `start_index` keys.
+
+ If a `start_index` key is already present, it will be offset.
+
+ Required Keys:
+
+ - `interval`: a tuple of two floats,
+ the start and end of the interval in seconds.
+ - `avg_fps`
+
+ Modified Keys:
+
+ - `start_index`
+ - `total_frames`
+ """
+
+ def transform(self, results: Dict) -> Dict:
+ """Perform the `ClipVideo` transformation.
+
+ Args:
+ results (dict): The result dict.
+
+ Returns:
+ dict: The result dict.
+ """
+ interval = results["interval"]
+ fps = results["avg_fps"]
+ offset = results["start_index"] if "start_index" in results else 0
+ start_frame = int(interval[0] * fps) + offset
+ end_frame = int(interval[1] * fps) + offset
+ results["start_index"] = start_frame
+ results["total_frames"] = end_frame - start_frame
+ return results