Merge pull request #42 from Grutschus/40-experiments

40 experiments
Grutschus · Dec 10, 2023 · 0457f4a · 0457f4a
2 parents fd62a4e + 0c9b189
commit 0457f4a
Show file tree

Hide file tree

Showing 18 changed files with 2,594 additions and 360 deletions.
diff --git a/configs/datasets/high-quality-fall_runner_k400-hyperparams.py b/configs/datasets/high-quality-fall_runner_k400-hyperparams.py
@@ -0,0 +1,113 @@
+"""Base `Runner` config for high-quality-fall dataset."""
+
+dataset_type = "HighQualityFallDataset"
+
+label_strategy = dict(
+ type="PriorityLabel",
+ label_description=dict(
+ names=["fall", "lying", "other"],
+ start_timestamp_names=["fall_start", "lying_start"],
+ end_timestamp_names=["fall_end", "lying_end"],
+ visible_names=["fall_visible", "lying_visible"],
+ other_class=2,
+ ),
+)
+
+sampling_strategy = dict(type="UniformSampling", clip_len=10)
+
+
+# TRAIN
+ann_file_train = "data/Fall_Simulation_Data/annotations_train.csv"
+
+# TODO: Add shape comments
+# TODO: Think about augmentation steps
+train_pipeline = [
+ dict(type="DecordInit"),
+ dict(type="ClipVideo"),
+ dict(type="SampleFrames", clip_len=16, frame_interval=4, num_clips=1),
+ dict(type="DecordDecode"),
+ dict(type="Resize", scale=(-1, 224)),
+ dict(type="RandomCrop", size=224),
+ dict(type="Resize", scale=(224, 224), keep_ratio=False),
+ dict(type="Flip", flip_ratio=0.5),
+ dict(type="FormatShape", input_format="NCTHW"),
+ dict(type="PackActionInputs"),
+]
+
+train_dataloader = dict(
+ batch_size=12, # From VideoMAEv2 repo
+ num_workers=8,
+ persistent_workers=False,
+ sampler=dict(type="DefaultSampler", shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ sampling_strategy=sampling_strategy,
+ label_strategy=label_strategy,
+ ann_file=ann_file_train,
+ pipeline=train_pipeline,
+ num_classes=3,
+ # indices=100,
+ ),
+)
+
+# VALIDATION
+ann_file_val = "data/Fall_Simulation_Data/annotations_val.csv"
+
+val_pipeline = [
+ dict(type="DecordInit"),
+ dict(type="ClipVideo"),
+ dict(
+ type="SampleFrames", clip_len=16, frame_interval=4, num_clips=1, test_mode=True
+ ),
+ dict(type="DecordDecode"),
+ dict(type="Resize", scale=(-1, 224)),
+ dict(type="CenterCrop", crop_size=224), # From VideoMAEv2 repo
+ dict(type="FormatShape", input_format="NCTHW"),
+ dict(type="PackActionInputs"),
+]
+
+# val_dataloader = train_dataloader
+val_dataloader = dict(
+ batch_size=12, # From VideoMAEv2 repo
+ num_workers=8,
+ persistent_workers=False,
+ sampler=dict(type="DefaultSampler", shuffle=False),
+ dataset=dict(
+ type=dataset_type,
+ sampling_strategy=sampling_strategy,
+ label_strategy=label_strategy,
+ ann_file=ann_file_val,
+ pipeline=val_pipeline,
+ num_classes=3,
+ ),
+)
+
+# TEST
+ann_file_test = "data/Fall_Simulation_Data/annotations_test.csv"
+
+test_pipeline = [
+ dict(type="DecordInit"),
+ dict(
+ type="SampleFrames", clip_len=16, frame_interval=4, num_clips=5, test_mode=True
+ ), # From VideoMAEv2 repo
+ dict(type="DecordDecode"),
+ dict(type="Resize", scale=(-1, 224)),
+ dict(type="ThreeCrop", crop_size=224), # From VideoMAEv2 repo
+ dict(type="FormatShape", input_format="NCTHW"),
+ dict(type="PackActionInputs"),
+]
+
+test_dataloader = dict(
+ batch_size=1, # From VideoMAEv2 repo
+ num_workers=8,
+ persistent_workers=False,
+ sampler=dict(type="DefaultSampler", shuffle=False),
+ dataset=dict(
+ type=dataset_type,
+ sampling_strategy=sampling_strategy,
+ label_strategy=label_strategy,
+ ann_file=ann_file_test,
+ pipeline=test_pipeline,
+ num_classes=3,
+ ),
+)
diff --git a/configs/experiments/overfitting_run.py b/configs/experiments/overfitting_run.py
@@ -1,5 +1,5 @@
 _base_ = [
- "../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_base.py"
+ "../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_k400-hyperparams.py"
 ]
 
 EXPERIMENT_NAME = "overfitting_run"
@@ -10,6 +10,7 @@
 
 # Overrides
 train_dataloader = dict(
+ batch_size=1,
  sampler=dict(type="DefaultSampler", shuffle=False),
  dataset=dict(
  indices=100,
@@ -19,11 +20,14 @@
 ann_file_val = "data/Fall_Simulation_Data/annotations_train.csv"
 
 val_dataloader = dict(
+ num_workers=0,
+ persistent_workers=False,
+ batch_size=1,
  dataset=dict(
  ann_file=ann_file_val,
  indices=100,
  ),
 )
 
 default_hooks = dict(checkpoint=dict(interval=0))
-custom_hooks = [dict(type="CustomVisualizationHook", enable=True, interval=10)]
+custom_hooks = [dict(type="CustomVisualizationHook", enable=True, interval=1)]
diff --git a/...it-b_frame-int-8_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams.py b/...it-b_frame-int-8_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams.py
@@ -0,0 +1,78 @@
+_base_ = [
+ "../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_k400-hyperparams.py"
+]
+
+EXPERIMENT_NAME = "vit-b_frame-int-8_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams"
+visualizer = dict(
+ vis_backends=dict(save_dir=f"experiments/tensorboard/{EXPERIMENT_NAME}/")
+)
+work_dir = f"experiments/{EXPERIMENT_NAME}"
+
+# Overrides
+default_hooks = dict(checkpoint=dict(interval=1))
+
+# 1487 samples in val -> 92 batches per node -> We want around 10 images
+custom_hooks = [dict(type="CustomVisualizationHook", enable=True, interval=300)]
+
+# Use ViT-B/16
+model = dict(
+ backbone=dict(embed_dims=768, depth=12, num_heads=12),
+ cls_head=dict(in_channels=768),
+)
+load_from = "weights/vit-base-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_20230510-3e7f93b2.pth"
+
+# Use frame_interval 8
+train_pipeline = [
+ dict(type="DecordInit"),
+ dict(type="ClipVideo"),
+ dict(
+ type="SampleFrames", clip_len=16, frame_interval=8, num_clips=1
+ ), # This has changed
+ dict(type="DecordDecode"),
+ dict(type="Resize", scale=(-1, 224)),
+ dict(type="RandomCrop", size=224),
+ dict(type="Resize", scale=(224, 224), keep_ratio=False),
+ dict(type="Flip", flip_ratio=0.5),
+ dict(type="FormatShape", input_format="NCTHW"),
+ dict(type="PackActionInputs"),
+]
+
+
+# Use Gaussian sampling
+train_dataloader = dict(
+ dataset=dict(
+ sampling_strategy=dict(
+ type="GaussianSampling",
+ clip_len=5,
+ fallback_sampler=dict(
+ type="UniformSampling", clip_len=5, stride=5, overlap=False
+ ),
+ ),
+ drop_ratios=[0.0, 0.0, 0.30],
+ pipeline=train_pipeline,
+ )
+)
+# We are not changing the val/test dataloaders since gaussian sampling requires labels
+# and we cannot have a valid validation if we use labels in the preprocessing
+
+val_pipeline = [
+ dict(type="DecordInit"),
+ dict(type="ClipVideo"),
+ dict(
+ type="SampleFrames", clip_len=16, frame_interval=8, num_clips=1, test_mode=True
+ ),
+ dict(type="DecordDecode"),
+ dict(type="Resize", scale=(-1, 224)),
+ dict(type="CenterCrop", crop_size=224), # From VideoMAEv2 repo
+ dict(type="FormatShape", input_format="NCTHW"),
+ dict(type="PackActionInputs"),
+]
+
+val_dataloader = dict(
+ dataset=dict(
+ sampling_strategy=dict(
+ type="UniformSampling", clip_len=5, stride=0, overlap=False
+ ),
+ pipeline=val_pipeline,
+ ),
+)
diff --git a/...xperiments/vit-b_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams.py b/...xperiments/vit-b_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams.py
@@ -0,0 +1,47 @@
+_base_ = [
+ "../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_k400-hyperparams.py"
+]
+
+EXPERIMENT_NAME = (
+ "vit-b_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams"
+)
+visualizer = dict(
+ vis_backends=dict(save_dir=f"experiments/tensorboard/{EXPERIMENT_NAME}/")
+)
+work_dir = f"experiments/{EXPERIMENT_NAME}"
+
+# Overrides
+default_hooks = dict(checkpoint=dict(interval=1))
+
+# 1487 samples in val -> 92 batches per node -> We want around 10 images
+custom_hooks = [dict(type="CustomVisualizationHook", enable=True, interval=300)]
+
+# Use ViT-B/16
+model = dict(
+ backbone=dict(embed_dims=768, depth=12, num_heads=12),
+ cls_head=dict(in_channels=768),
+)
+load_from = "weights/vit-base-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_20230510-3e7f93b2.pth"
+
+# Use Gaussian sampling
+train_dataloader = dict(
+ dataset=dict(
+ sampling_strategy=dict(
+ type="GaussianSampling",
+ clip_len=5,
+ fallback_sampler=dict(
+ type="UniformSampling", clip_len=5, stride=5, overlap=False
+ ),
+ ),
+ drop_ratios=[0.0, 0.0, 0.30],
+ )
+)
+# We are not changing the val/test dataloaders since gaussian sampling requires labels
+# and we cannot have a valid validation if we use labels in the preprocessing
+val_dataloader = dict(
+ dataset=dict(
+ sampling_strategy=dict(
+ type="UniformSampling", clip_len=5, stride=0, overlap=False
+ ),
+ ),
+)
diff --git a/configs/experiments/vit-b_gaussian-sampling_priority-labeling_k400-hyperparams.py b/configs/experiments/vit-b_gaussian-sampling_priority-labeling_k400-hyperparams.py
@@ -0,0 +1,29 @@
+_base_ = [
+ "../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_k400-hyperparams.py"
+]
+
+EXPERIMENT_NAME = "vit-b_gaussian-sampling_priority-labeling_k400-hyperparams"
+visualizer = dict(
+ vis_backends=dict(save_dir=f"experiments/tensorboard/{EXPERIMENT_NAME}/")
+)
+work_dir = f"experiments/{EXPERIMENT_NAME}"
+
+# Overrides
+default_hooks = dict(checkpoint=dict(interval=1))
+
+# 1487 samples in val -> 92 batches per node -> We want around 10 images
+custom_hooks = [dict(type="CustomVisualizationHook", enable=True, interval=150)]
+
+# Use ViT-B/16
+model = dict(
+ backbone=dict(embed_dims=768, depth=12, num_heads=12),
+ cls_head=dict(in_channels=768),
+)
+load_from = "weights/vit-base-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_20230510-3e7f93b2.pth"
+
+# Use Gaussian sampling
+train_dataloader = dict(
+ dataset=dict(sampling_strategy=dict(type="GaussianSampling", clip_len=10))
+)
+# We are not changing the val/test dataloaders since gaussian sampling requires labels
+# and we cannot have a valid validation if we use labels in the preprocessing
diff --git a/configs/experiments/vit-b_gaussian-sampling_priority-labeling_paper-hyperparams.py b/configs/experiments/vit-b_gaussian-sampling_priority-labeling_paper-hyperparams.py
@@ -0,0 +1,30 @@
+_base_ = [
+ "../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_base.py"
+]
+
+EXPERIMENT_NAME = "vit-b_gaussian-sampling_priority-labeling_paper-hyperparams"
+visualizer = dict(
+ vis_backends=dict(save_dir=f"experiments/tensorboard/{EXPERIMENT_NAME}/")
+)
+work_dir = f"experiments/{EXPERIMENT_NAME}"
+
+# Overrides
+default_hooks = dict(checkpoint=dict(interval=3))
+
+# 1487 samples in val -> 372 per node -> 124 batches per node -> We want around 10 images
+# -> Interval = 124 / 10 = 12
+custom_hooks = [dict(type="CustomVisualizationHook", enable=True, interval=10)]
+
+# Use ViT-B/16
+model = dict(
+ backbone=dict(embed_dims=768, depth=12, num_heads=12),
+ cls_head=dict(in_channels=768),
+)
+load_from = "weights/vit-base-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_20230510-3e7f93b2.pth"
+
+# Use Gaussian sampling
+train_dataloader = dict(
+ dataset=dict(sampling_strategy=dict(type="GaussianSampling", clip_len=10))
+)
+# We are not changing the val/test dataloaders since gaussian sampling requires labels
+# and we cannot have a valid validation if we use labels in the preprocessing
diff --git a/...periments/vit-b_gaussian-sampling_priority-labeling_paper-hyperparams_weighted-ce-loss.py b/...periments/vit-b_gaussian-sampling_priority-labeling_paper-hyperparams_weighted-ce-loss.py
@@ -0,0 +1,40 @@
+_base_ = [
+ "../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_base.py"
+]
+
+EXPERIMENT_NAME = (
+ "vit-b_gaussian-sampling_priority-labeling_paper-hyperparams_weighted-ce-loss"
+)
+visualizer = dict(
+ vis_backends=dict(save_dir=f"experiments/tensorboard/{EXPERIMENT_NAME}/")
+)
+work_dir = f"experiments/{EXPERIMENT_NAME}"
+
+# Overrides
+default_hooks = dict(checkpoint=dict(interval=3))
+
+# 1487 samples in val -> 372 per node -> 124 batches per node -> We want around 10 images
+# -> Interval = 124 / 10 = 12
+custom_hooks = [dict(type="CustomVisualizationHook", enable=True, interval=10)]
+
+# Use ViT-B/16
+# Add weighted CE loss
+# weight_for_class_i = total_samples / (num_samples_in_class_i * num_classes)
+model = dict(
+ backbone=dict(embed_dims=768, depth=12, num_heads=12),
+ cls_head=dict(
+ in_channels=768,
+ loss_cls=dict(
+ type="CrossEntropyLoss",
+ class_weight=[26.38235294117647, 37.901408450704224, 3.7168508287292816],
+ ),
+ ),
+)
+load_from = "weights/vit-base-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_20230510-3e7f93b2.pth"
+
+# Use Gaussian sampling
+train_dataloader = dict(
+ dataset=dict(sampling_strategy=dict(type="GaussianSampling", clip_len=10))
+)
+# We are not changing the val/test dataloaders since gaussian sampling requires labels
+# and we cannot have a valid validation if we use labels in the preprocessing