Skip to content

Commit

Permalink
Experiment with revised lr (#58)
Browse files Browse the repository at this point in the history
* added av to apptainer

* fixed lr schedule

* renamed best experiment

* reduced no of works due to OOM

* added config for reruns

* added resume flags to job scripts

* added safeguard against index out of bounds

* added test runs

* added model tests

* added latest experiment runs

* added high temp resolution config

* reduced num of workers

* ran high temp resolution exp
  • Loading branch information
Grutschus committed Jan 17, 2024
1 parent f314be4 commit e068ebb
Show file tree
Hide file tree
Showing 12 changed files with 163 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
# val_dataloader = train_dataloader
val_dataloader = dict(
batch_size=12, # From VideoMAEv2 repo
num_workers=8,
num_workers=6,
persistent_workers=False,
sampler=dict(type="DefaultSampler", shuffle=False),
dataset=dict(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
_base_ = [
"../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_k400-hyperparams.py"
]

EXPERIMENT_NAME = "high_temporal_resolution_frame-int-8_cutup-5s-clips-30-drop_fixed_lr"
visualizer = dict(
vis_backends=dict(save_dir=f"model_tests/tensorboard/{EXPERIMENT_NAME}")
)
work_dir = f"model_tests/{EXPERIMENT_NAME}"

# Overrides
default_hooks = dict(checkpoint=dict(interval=1))

# 1487 samples in val -> 92 batches per node -> We want around 10 images
# custom_hooks = [dict(type="CustomVisualizationHook", enable=True, interval=300)]

# Use ViT-B/16
model = dict(
backbone=dict(embed_dims=768, depth=12, num_heads=12),
cls_head=dict(in_channels=768),
)
load_from = "weights/vit-base-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_20230510-3e7f93b2.pth"

# Use frame_interval 8
train_pipeline = [
dict(type="DecordInit"),
dict(type="ClipVideo"),
dict(
type="SampleFrames", clip_len=16, frame_interval=8, num_clips=1
), # This has changed
dict(type="DecordDecode"),
dict(type="Resize", scale=(-1, 224)),
dict(type="RandomCrop", size=224),
dict(type="Resize", scale=(224, 224), keep_ratio=False),
dict(type="Flip", flip_ratio=0.5),
dict(type="FormatShape", input_format="NCTHW"),
dict(type="PackActionInputs"),
]


# Use Cutup sampling
train_dataloader = dict(
dataset=dict(
sampling_strategy=dict(
type="UniformSampling",
clip_len=5,
),
drop_ratios=[0.0, 0.0, 0.30],
pipeline=train_pipeline,
)
)
# We are not changing the val/test dataloaders since gaussian sampling requires labels
# and we cannot have a valid validation if we use labels in the preprocessing

val_pipeline = [
dict(type="DecordInit"),
dict(type="ClipVideo"),
dict(
type="SampleFrames", clip_len=16, frame_interval=8, num_clips=1, test_mode=True
),
dict(type="DecordDecode"),
dict(type="Resize", scale=(-1, 224)),
dict(type="CenterCrop", crop_size=224), # From VideoMAEv2 repo
dict(type="FormatShape", input_format="NCTHW"),
dict(type="PackActionInputs"),
]

val_dataloader = dict(
dataset=dict(
sampling_strategy=dict(
type="UniformSampling", clip_len=5, stride=0, overlap=False
),
pipeline=val_pipeline,
),
)


test_pipeline = [
dict(type="DecordInit"),
dict(type="ClipVideo"),
dict(
type="SampleFrames", clip_len=16, frame_interval=8, num_clips=5, test_mode=True
), # From VideoMAEv2 repo
dict(type="DecordDecode"),
dict(type="Resize", scale=(-1, 224)),
dict(type="ThreeCrop", crop_size=224), # From VideoMAEv2 repo
dict(type="FormatShape", input_format="NCTHW"),
dict(type="PackActionInputs"),
]

test_dataloader = dict(
num_workers=2,
sampler=dict(type="DefaultSampler", shuffle=False),
dataset=dict(
pipeline=test_pipeline,
sampling_strategy=dict(
type="UniformSampling", clip_len=5, stride=1, overlap=True
),
),
)
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
"../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_k400-hyperparams.py"
]

EXPERIMENT_NAME = "vit-b_frame-int-8_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams"
EXPERIMENT_NAME = "frame-int-8_gaussian-sampling-5s-clips-30-drop_fixed_lr"
visualizer = dict(
vis_backends=dict(save_dir=f"model_tests/tensorboard/{EXPERIMENT_NAME}")
)
work_dir = f"experiments/{EXPERIMENT_NAME}"
work_dir = f"model_tests/{EXPERIMENT_NAME}"

# Overrides
default_hooks = dict(checkpoint=dict(interval=1))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
convert_to_iter_based=True,
eta_min=1e-6,
begin=5,
end=35, # TODO: Repeat most important experiments with this corrected
end=90,
),
]

Expand Down
3 changes: 2 additions & 1 deletion containers/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ ffmpeg-python>=0.2
dvc[s3]
dvclive>=3.3
tensorboard>=2.15
moviepy>=1.0
moviepy>=1.0
av>=11.0
4 changes: 4 additions & 0 deletions datasets/transforms/sampling_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,10 @@ def sample(self, annotation: pd.Series) -> List[IntervalInSeconds]:

sample_list = []
for sample in samples:
if sample < (self.clip_len / 2) or sample > (
annotation["length"] - (self.clip_len / 2)
):
continue
start = max(0, sample - self.clip_len / 2)
end = min(annotation["length"], sample + self.clip_len / 2)
sample_list.append((start, end))
Expand Down
6 changes: 3 additions & 3 deletions experiments.dvc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
outs:
- md5: e36ce5c5dd248684edb13f4602312b36.dir
size: 29342240865
nfiles: 114
- md5: 4f930a358ed1b9f9be761e679b989aa5.dir
size: 36389694277
nfiles: 140
hash: md5
path: experiments
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/env bash
#SBATCH -A NAISS2023-22-1160 -p alvis
#SBATCH -N 1 --gpus-per-node=T4:4
#SBATCH --time=24:00:00

apptainer exec \
--env PYTHONPATH=$(pwd) \
containers/c3se_job_container.sif \
python -m torch.distributed.launch --nproc_per_node=4 \
mmaction2/tools/test.py \
configs/experiments/vit-b_frame-int-8_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams.py \
experiments/frame-int-8_gaussian-sampling-5s-clips-30-drop_fixed_lr/best_acc_unweighted_average_f1_epoch_38.pth \
--work-dir model_tests/frame-int-8_gaussian-sampling-5s-clips-30-drop_fixed_lr \
--dump model_tests/frame-int-8_gaussian-sampling-5s-clips-30-drop_fixed_lr/predictions.pkl \
--show-dir model_tests/frame-int-8_gaussian-sampling-5s-clips-30-drop_fixed_lr/visualizations \
--interval 10 \
--launcher pytorch
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/env bash
#SBATCH -A NAISS2023-22-1160 -p alvis
#SBATCH -N 1 --gpus-per-node=T4:4
#SBATCH --time=24:00:00

apptainer exec \
--env PYTHONPATH=$(pwd) \
containers/c3se_job_container.sif \
python -m torch.distributed.launch --nproc_per_node=4 \
mmaction2/tools/test.py \
configs/experiments/vit-b_frame-int-8_cutup-5s-clips-30-drop_priority-labeling_k400-hyperparams.py \
experiments/frame-int-8_cutup-5s-clips-30-drop_fixed_lr/best_acc_unweighted_average_f1_epoch_30.pth \
--work-dir model_tests/high_temporal_resolution_frame-int-8_cutup-5s-clips-30-drop_fixed_lr \
--dump model_tests/high_temporal_resolution_frame-int-8_cutup-5s-clips-30-drop_fixed_lr/predictions.pkl \
--show-dir model_tests/high_temporal_resolution_frame-int-8_cutup-5s-clips-30-drop_fixed_lr/visualizations \
--interval 2000 \
--launcher pytorch
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env bash
#SBATCH -A NAISS2023-22-1160 -p alvis
#SBATCH -N 1 --gpus-per-node=A40:1
#SBATCH --time=48:00:00

apptainer exec \
--env PYTHONPATH=$(pwd) \
containers/c3se_job_container.sif \
python mmaction2/tools/train.py \
configs/experiments/vit-b_frame-int-8_cutup-5s-clips-30-drop_priority-labeling_k400-hyperparams.py \
--resume auto
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ apptainer exec \
--env PYTHONPATH=$(pwd) \
containers/c3se_job_container.sif \
python mmaction2/tools/train.py \
configs/experiments/vit-b_frame-int-8_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams.py
configs/experiments/vit-b_frame-int-8_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams.py \
--resume auto
6 changes: 3 additions & 3 deletions model_tests.dvc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
outs:
- md5: ea5466afdb49cf3effcf204a4bc52bd5.dir
size: 4737669843
nfiles: 63856
- md5: bee28e9198acbeb39b6498f3d20f6c08.dir
size: 9864631908
nfiles: 132513
hash: md5
path: model_tests

0 comments on commit e068ebb

Please sign in to comment.