Skip to content

Commit

Permalink
Merge pull request #42 from Grutschus/40-experiments
Browse files Browse the repository at this point in the history
40 experiments
  • Loading branch information
Grutschus committed Dec 10, 2023
2 parents fd62a4e + 0c9b189 commit 0457f4a
Show file tree
Hide file tree
Showing 18 changed files with 2,594 additions and 360 deletions.
113 changes: 113 additions & 0 deletions configs/datasets/high-quality-fall_runner_k400-hyperparams.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
"""Base `Runner` config for high-quality-fall dataset."""

dataset_type = "HighQualityFallDataset"

label_strategy = dict(
type="PriorityLabel",
label_description=dict(
names=["fall", "lying", "other"],
start_timestamp_names=["fall_start", "lying_start"],
end_timestamp_names=["fall_end", "lying_end"],
visible_names=["fall_visible", "lying_visible"],
other_class=2,
),
)

sampling_strategy = dict(type="UniformSampling", clip_len=10)


# TRAIN
ann_file_train = "data/Fall_Simulation_Data/annotations_train.csv"

# TODO: Add shape comments
# TODO: Think about augmentation steps
train_pipeline = [
dict(type="DecordInit"),
dict(type="ClipVideo"),
dict(type="SampleFrames", clip_len=16, frame_interval=4, num_clips=1),
dict(type="DecordDecode"),
dict(type="Resize", scale=(-1, 224)),
dict(type="RandomCrop", size=224),
dict(type="Resize", scale=(224, 224), keep_ratio=False),
dict(type="Flip", flip_ratio=0.5),
dict(type="FormatShape", input_format="NCTHW"),
dict(type="PackActionInputs"),
]

train_dataloader = dict(
batch_size=12, # From VideoMAEv2 repo
num_workers=8,
persistent_workers=False,
sampler=dict(type="DefaultSampler", shuffle=True),
dataset=dict(
type=dataset_type,
sampling_strategy=sampling_strategy,
label_strategy=label_strategy,
ann_file=ann_file_train,
pipeline=train_pipeline,
num_classes=3,
# indices=100,
),
)

# VALIDATION
ann_file_val = "data/Fall_Simulation_Data/annotations_val.csv"

val_pipeline = [
dict(type="DecordInit"),
dict(type="ClipVideo"),
dict(
type="SampleFrames", clip_len=16, frame_interval=4, num_clips=1, test_mode=True
),
dict(type="DecordDecode"),
dict(type="Resize", scale=(-1, 224)),
dict(type="CenterCrop", crop_size=224), # From VideoMAEv2 repo
dict(type="FormatShape", input_format="NCTHW"),
dict(type="PackActionInputs"),
]

# val_dataloader = train_dataloader
val_dataloader = dict(
batch_size=12, # From VideoMAEv2 repo
num_workers=8,
persistent_workers=False,
sampler=dict(type="DefaultSampler", shuffle=False),
dataset=dict(
type=dataset_type,
sampling_strategy=sampling_strategy,
label_strategy=label_strategy,
ann_file=ann_file_val,
pipeline=val_pipeline,
num_classes=3,
),
)

# TEST
ann_file_test = "data/Fall_Simulation_Data/annotations_test.csv"

test_pipeline = [
dict(type="DecordInit"),
dict(
type="SampleFrames", clip_len=16, frame_interval=4, num_clips=5, test_mode=True
), # From VideoMAEv2 repo
dict(type="DecordDecode"),
dict(type="Resize", scale=(-1, 224)),
dict(type="ThreeCrop", crop_size=224), # From VideoMAEv2 repo
dict(type="FormatShape", input_format="NCTHW"),
dict(type="PackActionInputs"),
]

test_dataloader = dict(
batch_size=1, # From VideoMAEv2 repo
num_workers=8,
persistent_workers=False,
sampler=dict(type="DefaultSampler", shuffle=False),
dataset=dict(
type=dataset_type,
sampling_strategy=sampling_strategy,
label_strategy=label_strategy,
ann_file=ann_file_test,
pipeline=test_pipeline,
num_classes=3,
),
)
8 changes: 6 additions & 2 deletions configs/experiments/overfitting_run.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
_base_ = [
"../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_base.py"
"../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_k400-hyperparams.py"
]

EXPERIMENT_NAME = "overfitting_run"
Expand All @@ -10,6 +10,7 @@

# Overrides
train_dataloader = dict(
batch_size=1,
sampler=dict(type="DefaultSampler", shuffle=False),
dataset=dict(
indices=100,
Expand All @@ -19,11 +20,14 @@
ann_file_val = "data/Fall_Simulation_Data/annotations_train.csv"

val_dataloader = dict(
num_workers=0,
persistent_workers=False,
batch_size=1,
dataset=dict(
ann_file=ann_file_val,
indices=100,
),
)

default_hooks = dict(checkpoint=dict(interval=0))
custom_hooks = [dict(type="CustomVisualizationHook", enable=True, interval=10)]
custom_hooks = [dict(type="CustomVisualizationHook", enable=True, interval=1)]
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
_base_ = [
"../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_k400-hyperparams.py"
]

EXPERIMENT_NAME = "vit-b_frame-int-8_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams"
visualizer = dict(
vis_backends=dict(save_dir=f"experiments/tensorboard/{EXPERIMENT_NAME}/")
)
work_dir = f"experiments/{EXPERIMENT_NAME}"

# Overrides
default_hooks = dict(checkpoint=dict(interval=1))

# 1487 samples in val -> 92 batches per node -> We want around 10 images
custom_hooks = [dict(type="CustomVisualizationHook", enable=True, interval=300)]

# Use ViT-B/16
model = dict(
backbone=dict(embed_dims=768, depth=12, num_heads=12),
cls_head=dict(in_channels=768),
)
load_from = "weights/vit-base-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_20230510-3e7f93b2.pth"

# Use frame_interval 8
train_pipeline = [
dict(type="DecordInit"),
dict(type="ClipVideo"),
dict(
type="SampleFrames", clip_len=16, frame_interval=8, num_clips=1
), # This has changed
dict(type="DecordDecode"),
dict(type="Resize", scale=(-1, 224)),
dict(type="RandomCrop", size=224),
dict(type="Resize", scale=(224, 224), keep_ratio=False),
dict(type="Flip", flip_ratio=0.5),
dict(type="FormatShape", input_format="NCTHW"),
dict(type="PackActionInputs"),
]


# Use Gaussian sampling
train_dataloader = dict(
dataset=dict(
sampling_strategy=dict(
type="GaussianSampling",
clip_len=5,
fallback_sampler=dict(
type="UniformSampling", clip_len=5, stride=5, overlap=False
),
),
drop_ratios=[0.0, 0.0, 0.30],
pipeline=train_pipeline,
)
)
# We are not changing the val/test dataloaders since gaussian sampling requires labels
# and we cannot have a valid validation if we use labels in the preprocessing

val_pipeline = [
dict(type="DecordInit"),
dict(type="ClipVideo"),
dict(
type="SampleFrames", clip_len=16, frame_interval=8, num_clips=1, test_mode=True
),
dict(type="DecordDecode"),
dict(type="Resize", scale=(-1, 224)),
dict(type="CenterCrop", crop_size=224), # From VideoMAEv2 repo
dict(type="FormatShape", input_format="NCTHW"),
dict(type="PackActionInputs"),
]

val_dataloader = dict(
dataset=dict(
sampling_strategy=dict(
type="UniformSampling", clip_len=5, stride=0, overlap=False
),
pipeline=val_pipeline,
),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
_base_ = [
"../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_k400-hyperparams.py"
]

EXPERIMENT_NAME = (
"vit-b_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams"
)
visualizer = dict(
vis_backends=dict(save_dir=f"experiments/tensorboard/{EXPERIMENT_NAME}/")
)
work_dir = f"experiments/{EXPERIMENT_NAME}"

# Overrides
default_hooks = dict(checkpoint=dict(interval=1))

# 1487 samples in val -> 92 batches per node -> We want around 10 images
custom_hooks = [dict(type="CustomVisualizationHook", enable=True, interval=300)]

# Use ViT-B/16
model = dict(
backbone=dict(embed_dims=768, depth=12, num_heads=12),
cls_head=dict(in_channels=768),
)
load_from = "weights/vit-base-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_20230510-3e7f93b2.pth"

# Use Gaussian sampling
train_dataloader = dict(
dataset=dict(
sampling_strategy=dict(
type="GaussianSampling",
clip_len=5,
fallback_sampler=dict(
type="UniformSampling", clip_len=5, stride=5, overlap=False
),
),
drop_ratios=[0.0, 0.0, 0.30],
)
)
# We are not changing the val/test dataloaders since gaussian sampling requires labels
# and we cannot have a valid validation if we use labels in the preprocessing
val_dataloader = dict(
dataset=dict(
sampling_strategy=dict(
type="UniformSampling", clip_len=5, stride=0, overlap=False
),
),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
_base_ = [
"../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_k400-hyperparams.py"
]

EXPERIMENT_NAME = "vit-b_gaussian-sampling_priority-labeling_k400-hyperparams"
visualizer = dict(
vis_backends=dict(save_dir=f"experiments/tensorboard/{EXPERIMENT_NAME}/")
)
work_dir = f"experiments/{EXPERIMENT_NAME}"

# Overrides
default_hooks = dict(checkpoint=dict(interval=1))

# 1487 samples in val -> 92 batches per node -> We want around 10 images
custom_hooks = [dict(type="CustomVisualizationHook", enable=True, interval=150)]

# Use ViT-B/16
model = dict(
backbone=dict(embed_dims=768, depth=12, num_heads=12),
cls_head=dict(in_channels=768),
)
load_from = "weights/vit-base-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_20230510-3e7f93b2.pth"

# Use Gaussian sampling
train_dataloader = dict(
dataset=dict(sampling_strategy=dict(type="GaussianSampling", clip_len=10))
)
# We are not changing the val/test dataloaders since gaussian sampling requires labels
# and we cannot have a valid validation if we use labels in the preprocessing
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
_base_ = [
"../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_base.py"
]

EXPERIMENT_NAME = "vit-b_gaussian-sampling_priority-labeling_paper-hyperparams"
visualizer = dict(
vis_backends=dict(save_dir=f"experiments/tensorboard/{EXPERIMENT_NAME}/")
)
work_dir = f"experiments/{EXPERIMENT_NAME}"

# Overrides
default_hooks = dict(checkpoint=dict(interval=3))

# 1487 samples in val -> 372 per node -> 124 batches per node -> We want around 10 images
# -> Interval = 124 / 10 = 12
custom_hooks = [dict(type="CustomVisualizationHook", enable=True, interval=10)]

# Use ViT-B/16
model = dict(
backbone=dict(embed_dims=768, depth=12, num_heads=12),
cls_head=dict(in_channels=768),
)
load_from = "weights/vit-base-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_20230510-3e7f93b2.pth"

# Use Gaussian sampling
train_dataloader = dict(
dataset=dict(sampling_strategy=dict(type="GaussianSampling", clip_len=10))
)
# We are not changing the val/test dataloaders since gaussian sampling requires labels
# and we cannot have a valid validation if we use labels in the preprocessing
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
_base_ = [
"../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_base.py"
]

EXPERIMENT_NAME = (
"vit-b_gaussian-sampling_priority-labeling_paper-hyperparams_weighted-ce-loss"
)
visualizer = dict(
vis_backends=dict(save_dir=f"experiments/tensorboard/{EXPERIMENT_NAME}/")
)
work_dir = f"experiments/{EXPERIMENT_NAME}"

# Overrides
default_hooks = dict(checkpoint=dict(interval=3))

# 1487 samples in val -> 372 per node -> 124 batches per node -> We want around 10 images
# -> Interval = 124 / 10 = 12
custom_hooks = [dict(type="CustomVisualizationHook", enable=True, interval=10)]

# Use ViT-B/16
# Add weighted CE loss
# weight_for_class_i = total_samples / (num_samples_in_class_i * num_classes)
model = dict(
backbone=dict(embed_dims=768, depth=12, num_heads=12),
cls_head=dict(
in_channels=768,
loss_cls=dict(
type="CrossEntropyLoss",
class_weight=[26.38235294117647, 37.901408450704224, 3.7168508287292816],
),
),
)
load_from = "weights/vit-base-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_20230510-3e7f93b2.pth"

# Use Gaussian sampling
train_dataloader = dict(
dataset=dict(sampling_strategy=dict(type="GaussianSampling", clip_len=10))
)
# We are not changing the val/test dataloaders since gaussian sampling requires labels
# and we cannot have a valid validation if we use labels in the preprocessing
Loading

0 comments on commit 0457f4a

Please sign in to comment.