diff --git a/configs/datasets/high-quality-fall_runner_k400-hyperparams.py b/configs/datasets/high-quality-fall_runner_k400-hyperparams.py new file mode 100644 index 0000000..29bddfd --- /dev/null +++ b/configs/datasets/high-quality-fall_runner_k400-hyperparams.py @@ -0,0 +1,113 @@ +"""Base `Runner` config for high-quality-fall dataset.""" + +dataset_type = "HighQualityFallDataset" + +label_strategy = dict( + type="PriorityLabel", + label_description=dict( + names=["fall", "lying", "other"], + start_timestamp_names=["fall_start", "lying_start"], + end_timestamp_names=["fall_end", "lying_end"], + visible_names=["fall_visible", "lying_visible"], + other_class=2, + ), +) + +sampling_strategy = dict(type="UniformSampling", clip_len=10) + + +# TRAIN +ann_file_train = "data/Fall_Simulation_Data/annotations_train.csv" + +# TODO: Add shape comments +# TODO: Think about augmentation steps +train_pipeline = [ + dict(type="DecordInit"), + dict(type="ClipVideo"), + dict(type="SampleFrames", clip_len=16, frame_interval=4, num_clips=1), + dict(type="DecordDecode"), + dict(type="Resize", scale=(-1, 224)), + dict(type="RandomCrop", size=224), + dict(type="Resize", scale=(224, 224), keep_ratio=False), + dict(type="Flip", flip_ratio=0.5), + dict(type="FormatShape", input_format="NCTHW"), + dict(type="PackActionInputs"), +] + +train_dataloader = dict( + batch_size=12, # From VideoMAEv2 repo + num_workers=8, + persistent_workers=False, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=dataset_type, + sampling_strategy=sampling_strategy, + label_strategy=label_strategy, + ann_file=ann_file_train, + pipeline=train_pipeline, + num_classes=3, + # indices=100, + ), +) + +# VALIDATION +ann_file_val = "data/Fall_Simulation_Data/annotations_val.csv" + +val_pipeline = [ + dict(type="DecordInit"), + dict(type="ClipVideo"), + dict( + type="SampleFrames", clip_len=16, frame_interval=4, num_clips=1, test_mode=True + ), + dict(type="DecordDecode"), + dict(type="Resize", scale=(-1, 224)), + dict(type="CenterCrop", crop_size=224), # From VideoMAEv2 repo + dict(type="FormatShape", input_format="NCTHW"), + dict(type="PackActionInputs"), +] + +# val_dataloader = train_dataloader +val_dataloader = dict( + batch_size=12, # From VideoMAEv2 repo + num_workers=8, + persistent_workers=False, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=dataset_type, + sampling_strategy=sampling_strategy, + label_strategy=label_strategy, + ann_file=ann_file_val, + pipeline=val_pipeline, + num_classes=3, + ), +) + +# TEST +ann_file_test = "data/Fall_Simulation_Data/annotations_test.csv" + +test_pipeline = [ + dict(type="DecordInit"), + dict( + type="SampleFrames", clip_len=16, frame_interval=4, num_clips=5, test_mode=True + ), # From VideoMAEv2 repo + dict(type="DecordDecode"), + dict(type="Resize", scale=(-1, 224)), + dict(type="ThreeCrop", crop_size=224), # From VideoMAEv2 repo + dict(type="FormatShape", input_format="NCTHW"), + dict(type="PackActionInputs"), +] + +test_dataloader = dict( + batch_size=1, # From VideoMAEv2 repo + num_workers=8, + persistent_workers=False, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=dataset_type, + sampling_strategy=sampling_strategy, + label_strategy=label_strategy, + ann_file=ann_file_test, + pipeline=test_pipeline, + num_classes=3, + ), +) diff --git a/configs/experiments/overfitting_run.py b/configs/experiments/overfitting_run.py index 06c3afc..06800f4 100644 --- a/configs/experiments/overfitting_run.py +++ b/configs/experiments/overfitting_run.py @@ -1,5 +1,5 @@ _base_ = [ - "../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_base.py" + "../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_k400-hyperparams.py" ] EXPERIMENT_NAME = "overfitting_run" @@ -10,6 +10,7 @@ # Overrides train_dataloader = dict( + batch_size=1, sampler=dict(type="DefaultSampler", shuffle=False), dataset=dict( indices=100, @@ -19,6 +20,9 @@ ann_file_val = "data/Fall_Simulation_Data/annotations_train.csv" val_dataloader = dict( + num_workers=0, + persistent_workers=False, + batch_size=1, dataset=dict( ann_file=ann_file_val, indices=100, @@ -26,4 +30,4 @@ ) default_hooks = dict(checkpoint=dict(interval=0)) -custom_hooks = [dict(type="CustomVisualizationHook", enable=True, interval=10)] +custom_hooks = [dict(type="CustomVisualizationHook", enable=True, interval=1)] diff --git a/configs/experiments/vit-b_frame-int-8_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams.py b/configs/experiments/vit-b_frame-int-8_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams.py new file mode 100644 index 0000000..a1bf91a --- /dev/null +++ b/configs/experiments/vit-b_frame-int-8_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams.py @@ -0,0 +1,78 @@ +_base_ = [ + "../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_k400-hyperparams.py" +] + +EXPERIMENT_NAME = "vit-b_frame-int-8_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams" +visualizer = dict( + vis_backends=dict(save_dir=f"experiments/tensorboard/{EXPERIMENT_NAME}/") +) +work_dir = f"experiments/{EXPERIMENT_NAME}" + +# Overrides +default_hooks = dict(checkpoint=dict(interval=1)) + +# 1487 samples in val -> 92 batches per node -> We want around 10 images +custom_hooks = [dict(type="CustomVisualizationHook", enable=True, interval=300)] + +# Use ViT-B/16 +model = dict( + backbone=dict(embed_dims=768, depth=12, num_heads=12), + cls_head=dict(in_channels=768), +) +load_from = "weights/vit-base-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_20230510-3e7f93b2.pth" + +# Use frame_interval 8 +train_pipeline = [ + dict(type="DecordInit"), + dict(type="ClipVideo"), + dict( + type="SampleFrames", clip_len=16, frame_interval=8, num_clips=1 + ), # This has changed + dict(type="DecordDecode"), + dict(type="Resize", scale=(-1, 224)), + dict(type="RandomCrop", size=224), + dict(type="Resize", scale=(224, 224), keep_ratio=False), + dict(type="Flip", flip_ratio=0.5), + dict(type="FormatShape", input_format="NCTHW"), + dict(type="PackActionInputs"), +] + + +# Use Gaussian sampling +train_dataloader = dict( + dataset=dict( + sampling_strategy=dict( + type="GaussianSampling", + clip_len=5, + fallback_sampler=dict( + type="UniformSampling", clip_len=5, stride=5, overlap=False + ), + ), + drop_ratios=[0.0, 0.0, 0.30], + pipeline=train_pipeline, + ) +) +# We are not changing the val/test dataloaders since gaussian sampling requires labels +# and we cannot have a valid validation if we use labels in the preprocessing + +val_pipeline = [ + dict(type="DecordInit"), + dict(type="ClipVideo"), + dict( + type="SampleFrames", clip_len=16, frame_interval=8, num_clips=1, test_mode=True + ), + dict(type="DecordDecode"), + dict(type="Resize", scale=(-1, 224)), + dict(type="CenterCrop", crop_size=224), # From VideoMAEv2 repo + dict(type="FormatShape", input_format="NCTHW"), + dict(type="PackActionInputs"), +] + +val_dataloader = dict( + dataset=dict( + sampling_strategy=dict( + type="UniformSampling", clip_len=5, stride=0, overlap=False + ), + pipeline=val_pipeline, + ), +) diff --git a/configs/experiments/vit-b_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams.py b/configs/experiments/vit-b_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams.py new file mode 100644 index 0000000..b1d81d7 --- /dev/null +++ b/configs/experiments/vit-b_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams.py @@ -0,0 +1,47 @@ +_base_ = [ + "../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_k400-hyperparams.py" +] + +EXPERIMENT_NAME = ( + "vit-b_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams" +) +visualizer = dict( + vis_backends=dict(save_dir=f"experiments/tensorboard/{EXPERIMENT_NAME}/") +) +work_dir = f"experiments/{EXPERIMENT_NAME}" + +# Overrides +default_hooks = dict(checkpoint=dict(interval=1)) + +# 1487 samples in val -> 92 batches per node -> We want around 10 images +custom_hooks = [dict(type="CustomVisualizationHook", enable=True, interval=300)] + +# Use ViT-B/16 +model = dict( + backbone=dict(embed_dims=768, depth=12, num_heads=12), + cls_head=dict(in_channels=768), +) +load_from = "weights/vit-base-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_20230510-3e7f93b2.pth" + +# Use Gaussian sampling +train_dataloader = dict( + dataset=dict( + sampling_strategy=dict( + type="GaussianSampling", + clip_len=5, + fallback_sampler=dict( + type="UniformSampling", clip_len=5, stride=5, overlap=False + ), + ), + drop_ratios=[0.0, 0.0, 0.30], + ) +) +# We are not changing the val/test dataloaders since gaussian sampling requires labels +# and we cannot have a valid validation if we use labels in the preprocessing +val_dataloader = dict( + dataset=dict( + sampling_strategy=dict( + type="UniformSampling", clip_len=5, stride=0, overlap=False + ), + ), +) diff --git a/configs/experiments/vit-b_gaussian-sampling_priority-labeling_k400-hyperparams.py b/configs/experiments/vit-b_gaussian-sampling_priority-labeling_k400-hyperparams.py new file mode 100644 index 0000000..3cc96a5 --- /dev/null +++ b/configs/experiments/vit-b_gaussian-sampling_priority-labeling_k400-hyperparams.py @@ -0,0 +1,29 @@ +_base_ = [ + "../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_k400-hyperparams.py" +] + +EXPERIMENT_NAME = "vit-b_gaussian-sampling_priority-labeling_k400-hyperparams" +visualizer = dict( + vis_backends=dict(save_dir=f"experiments/tensorboard/{EXPERIMENT_NAME}/") +) +work_dir = f"experiments/{EXPERIMENT_NAME}" + +# Overrides +default_hooks = dict(checkpoint=dict(interval=1)) + +# 1487 samples in val -> 92 batches per node -> We want around 10 images +custom_hooks = [dict(type="CustomVisualizationHook", enable=True, interval=150)] + +# Use ViT-B/16 +model = dict( + backbone=dict(embed_dims=768, depth=12, num_heads=12), + cls_head=dict(in_channels=768), +) +load_from = "weights/vit-base-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_20230510-3e7f93b2.pth" + +# Use Gaussian sampling +train_dataloader = dict( + dataset=dict(sampling_strategy=dict(type="GaussianSampling", clip_len=10)) +) +# We are not changing the val/test dataloaders since gaussian sampling requires labels +# and we cannot have a valid validation if we use labels in the preprocessing diff --git a/configs/experiments/vit-b_gaussian-sampling_priority-labeling_paper-hyperparams.py b/configs/experiments/vit-b_gaussian-sampling_priority-labeling_paper-hyperparams.py new file mode 100644 index 0000000..03352ac --- /dev/null +++ b/configs/experiments/vit-b_gaussian-sampling_priority-labeling_paper-hyperparams.py @@ -0,0 +1,30 @@ +_base_ = [ + "../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_base.py" +] + +EXPERIMENT_NAME = "vit-b_gaussian-sampling_priority-labeling_paper-hyperparams" +visualizer = dict( + vis_backends=dict(save_dir=f"experiments/tensorboard/{EXPERIMENT_NAME}/") +) +work_dir = f"experiments/{EXPERIMENT_NAME}" + +# Overrides +default_hooks = dict(checkpoint=dict(interval=3)) + +# 1487 samples in val -> 372 per node -> 124 batches per node -> We want around 10 images +# -> Interval = 124 / 10 = 12 +custom_hooks = [dict(type="CustomVisualizationHook", enable=True, interval=10)] + +# Use ViT-B/16 +model = dict( + backbone=dict(embed_dims=768, depth=12, num_heads=12), + cls_head=dict(in_channels=768), +) +load_from = "weights/vit-base-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_20230510-3e7f93b2.pth" + +# Use Gaussian sampling +train_dataloader = dict( + dataset=dict(sampling_strategy=dict(type="GaussianSampling", clip_len=10)) +) +# We are not changing the val/test dataloaders since gaussian sampling requires labels +# and we cannot have a valid validation if we use labels in the preprocessing diff --git a/configs/experiments/vit-b_gaussian-sampling_priority-labeling_paper-hyperparams_weighted-ce-loss.py b/configs/experiments/vit-b_gaussian-sampling_priority-labeling_paper-hyperparams_weighted-ce-loss.py new file mode 100644 index 0000000..e892cf2 --- /dev/null +++ b/configs/experiments/vit-b_gaussian-sampling_priority-labeling_paper-hyperparams_weighted-ce-loss.py @@ -0,0 +1,40 @@ +_base_ = [ + "../models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_base.py" +] + +EXPERIMENT_NAME = ( + "vit-b_gaussian-sampling_priority-labeling_paper-hyperparams_weighted-ce-loss" +) +visualizer = dict( + vis_backends=dict(save_dir=f"experiments/tensorboard/{EXPERIMENT_NAME}/") +) +work_dir = f"experiments/{EXPERIMENT_NAME}" + +# Overrides +default_hooks = dict(checkpoint=dict(interval=3)) + +# 1487 samples in val -> 372 per node -> 124 batches per node -> We want around 10 images +# -> Interval = 124 / 10 = 12 +custom_hooks = [dict(type="CustomVisualizationHook", enable=True, interval=10)] + +# Use ViT-B/16 +# Add weighted CE loss +# weight_for_class_i = total_samples / (num_samples_in_class_i * num_classes) +model = dict( + backbone=dict(embed_dims=768, depth=12, num_heads=12), + cls_head=dict( + in_channels=768, + loss_cls=dict( + type="CrossEntropyLoss", + class_weight=[26.38235294117647, 37.901408450704224, 3.7168508287292816], + ), + ), +) +load_from = "weights/vit-base-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_20230510-3e7f93b2.pth" + +# Use Gaussian sampling +train_dataloader = dict( + dataset=dict(sampling_strategy=dict(type="GaussianSampling", clip_len=10)) +) +# We are not changing the val/test dataloaders since gaussian sampling requires labels +# and we cannot have a valid validation if we use labels in the preprocessing diff --git a/configs/models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_k400-hyperparams.py b/configs/models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_k400-hyperparams.py new file mode 100644 index 0000000..cbcf715 --- /dev/null +++ b/configs/models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_k400-hyperparams.py @@ -0,0 +1,416 @@ +_base_ = [ + "../default_runtime.py", + "../datasets/high-quality-fall_runner_k400-hyperparams.py", +] + +# Finetuning parameters are from VideoMAEv2 repo +# https://github.com/OpenGVLab/VideoMAEv2/blob/master/scripts/finetune/vit_b_k400_ft.sh + + +# ViT-S-P16 +model = dict( + type="Recognizer3D", + backbone=dict( + type="VisionTransformer", + img_size=224, + patch_size=16, + embed_dims=384, + depth=12, + num_heads=6, + mlp_ratio=4, + qkv_bias=True, + num_frames=16, + norm_cfg=dict(type="LN", eps=1e-6), + drop_path_rate=0.3, # From VideoMAEv2 repo + ), + cls_head=dict( + type="TimeSformerHead", + num_classes=3, + in_channels=384, + average_clips="prob", + topk=(1,), + ), + data_preprocessor=dict( + type="ActionDataPreprocessor", + mean=[102.17311096191406, 98.78225708007812, 92.68714141845703], + std=[58.04566192626953, 57.004024505615234, 57.3704948425293], + format_shape="NCTHW", + ), +) + +# Loading weights +load_from = "weights/vit-small-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_20230510-25c748fd.pth" + +# TRAINING CONFIG +train_cfg = dict(type="EpochBasedTrainLoop", max_epochs=90, val_interval=1) + +# TODO: Think about fine-tuning param scheduler +param_scheduler = [ + dict( + type="LinearLR", + by_epoch=True, + convert_to_iter_based=True, + start_factor=1e-3, + end_factor=1, + begin=0, + end=5, + ), # From VideoMAEv2 repo - Warmup + dict( + type="CosineAnnealingLR", + by_epoch=True, + convert_to_iter_based=True, + eta_min=1e-6, + begin=5, + end=35, + ), +] + +auto_scale_lr = dict(enable=True, base_batch_size=256) + +# Layer Decay and Weight Decay module configs +vit_b_layer_decay_75_custom_keys = { + "backbone.patch_embed.projection.weight": { + "lr_mult": 0.023757264018058777, + "decay_mult": 1, + }, + "backbone.patch_embed.projection.bias": { + "lr_mult": 0.023757264018058777, + "decay_mult": 0, + }, + "backbone.blocks.0.norm1.weight": {"lr_mult": 0.03167635202407837, "decay_mult": 0}, + "backbone.blocks.0.norm1.bias": {"lr_mult": 0.03167635202407837, "decay_mult": 0}, + "backbone.blocks.0.attn.q_bias": {"lr_mult": 0.03167635202407837, "decay_mult": 0}, + "backbone.blocks.0.attn.v_bias": {"lr_mult": 0.03167635202407837, "decay_mult": 0}, + "backbone.blocks.0.attn.proj.bias": { + "lr_mult": 0.03167635202407837, + "decay_mult": 0, + }, + "backbone.blocks.0.norm2.weight": {"lr_mult": 0.03167635202407837, "decay_mult": 0}, + "backbone.blocks.0.norm2.bias": {"lr_mult": 0.03167635202407837, "decay_mult": 0}, + "backbone.blocks.0.mlp.layers.0.0.bias": { + "lr_mult": 0.03167635202407837, + "decay_mult": 0, + }, + "backbone.blocks.0.mlp.layers.1.bias": { + "lr_mult": 0.03167635202407837, + "decay_mult": 0, + }, + "backbone.blocks.0.attn.qkv.weight": { + "lr_mult": 0.03167635202407837, + "decay_mult": 1, + }, + "backbone.blocks.0.attn.proj.weight": { + "lr_mult": 0.03167635202407837, + "decay_mult": 1, + }, + "backbone.blocks.0.mlp.layers.0.0.weight": { + "lr_mult": 0.03167635202407837, + "decay_mult": 1, + }, + "backbone.blocks.0.mlp.layers.1.weight": { + "lr_mult": 0.03167635202407837, + "decay_mult": 1, + }, + "backbone.blocks.1.norm1.weight": {"lr_mult": 0.04223513603210449, "decay_mult": 0}, + "backbone.blocks.1.norm1.bias": {"lr_mult": 0.04223513603210449, "decay_mult": 0}, + "backbone.blocks.1.attn.q_bias": {"lr_mult": 0.04223513603210449, "decay_mult": 0}, + "backbone.blocks.1.attn.v_bias": {"lr_mult": 0.04223513603210449, "decay_mult": 0}, + "backbone.blocks.1.attn.proj.bias": { + "lr_mult": 0.04223513603210449, + "decay_mult": 0, + }, + "backbone.blocks.1.norm2.weight": {"lr_mult": 0.04223513603210449, "decay_mult": 0}, + "backbone.blocks.1.norm2.bias": {"lr_mult": 0.04223513603210449, "decay_mult": 0}, + "backbone.blocks.1.mlp.layers.0.0.bias": { + "lr_mult": 0.04223513603210449, + "decay_mult": 0, + }, + "backbone.blocks.1.mlp.layers.1.bias": { + "lr_mult": 0.04223513603210449, + "decay_mult": 0, + }, + "backbone.blocks.1.attn.qkv.weight": { + "lr_mult": 0.04223513603210449, + "decay_mult": 1, + }, + "backbone.blocks.1.attn.proj.weight": { + "lr_mult": 0.04223513603210449, + "decay_mult": 1, + }, + "backbone.blocks.1.mlp.layers.0.0.weight": { + "lr_mult": 0.04223513603210449, + "decay_mult": 1, + }, + "backbone.blocks.1.mlp.layers.1.weight": { + "lr_mult": 0.04223513603210449, + "decay_mult": 1, + }, + "backbone.blocks.2.norm1.weight": { + "lr_mult": 0.056313514709472656, + "decay_mult": 0, + }, + "backbone.blocks.2.norm1.bias": {"lr_mult": 0.056313514709472656, "decay_mult": 0}, + "backbone.blocks.2.attn.q_bias": {"lr_mult": 0.056313514709472656, "decay_mult": 0}, + "backbone.blocks.2.attn.v_bias": {"lr_mult": 0.056313514709472656, "decay_mult": 0}, + "backbone.blocks.2.attn.proj.bias": { + "lr_mult": 0.056313514709472656, + "decay_mult": 0, + }, + "backbone.blocks.2.norm2.weight": { + "lr_mult": 0.056313514709472656, + "decay_mult": 0, + }, + "backbone.blocks.2.norm2.bias": {"lr_mult": 0.056313514709472656, "decay_mult": 0}, + "backbone.blocks.2.mlp.layers.0.0.bias": { + "lr_mult": 0.056313514709472656, + "decay_mult": 0, + }, + "backbone.blocks.2.mlp.layers.1.bias": { + "lr_mult": 0.056313514709472656, + "decay_mult": 0, + }, + "backbone.blocks.2.attn.qkv.weight": { + "lr_mult": 0.056313514709472656, + "decay_mult": 1, + }, + "backbone.blocks.2.attn.proj.weight": { + "lr_mult": 0.056313514709472656, + "decay_mult": 1, + }, + "backbone.blocks.2.mlp.layers.0.0.weight": { + "lr_mult": 0.056313514709472656, + "decay_mult": 1, + }, + "backbone.blocks.2.mlp.layers.1.weight": { + "lr_mult": 0.056313514709472656, + "decay_mult": 1, + }, + "backbone.blocks.3.norm1.weight": {"lr_mult": 0.07508468627929688, "decay_mult": 0}, + "backbone.blocks.3.norm1.bias": {"lr_mult": 0.07508468627929688, "decay_mult": 0}, + "backbone.blocks.3.attn.q_bias": {"lr_mult": 0.07508468627929688, "decay_mult": 0}, + "backbone.blocks.3.attn.v_bias": {"lr_mult": 0.07508468627929688, "decay_mult": 0}, + "backbone.blocks.3.attn.proj.bias": { + "lr_mult": 0.07508468627929688, + "decay_mult": 0, + }, + "backbone.blocks.3.norm2.weight": {"lr_mult": 0.07508468627929688, "decay_mult": 0}, + "backbone.blocks.3.norm2.bias": {"lr_mult": 0.07508468627929688, "decay_mult": 0}, + "backbone.blocks.3.mlp.layers.0.0.bias": { + "lr_mult": 0.07508468627929688, + "decay_mult": 0, + }, + "backbone.blocks.3.mlp.layers.1.bias": { + "lr_mult": 0.07508468627929688, + "decay_mult": 0, + }, + "backbone.blocks.3.attn.qkv.weight": { + "lr_mult": 0.07508468627929688, + "decay_mult": 1, + }, + "backbone.blocks.3.attn.proj.weight": { + "lr_mult": 0.07508468627929688, + "decay_mult": 1, + }, + "backbone.blocks.3.mlp.layers.0.0.weight": { + "lr_mult": 0.07508468627929688, + "decay_mult": 1, + }, + "backbone.blocks.3.mlp.layers.1.weight": { + "lr_mult": 0.07508468627929688, + "decay_mult": 1, + }, + "backbone.blocks.4.norm1.weight": {"lr_mult": 0.1001129150390625, "decay_mult": 0}, + "backbone.blocks.4.norm1.bias": {"lr_mult": 0.1001129150390625, "decay_mult": 0}, + "backbone.blocks.4.attn.q_bias": {"lr_mult": 0.1001129150390625, "decay_mult": 0}, + "backbone.blocks.4.attn.v_bias": {"lr_mult": 0.1001129150390625, "decay_mult": 0}, + "backbone.blocks.4.attn.proj.bias": { + "lr_mult": 0.1001129150390625, + "decay_mult": 0, + }, + "backbone.blocks.4.norm2.weight": {"lr_mult": 0.1001129150390625, "decay_mult": 0}, + "backbone.blocks.4.norm2.bias": {"lr_mult": 0.1001129150390625, "decay_mult": 0}, + "backbone.blocks.4.mlp.layers.0.0.bias": { + "lr_mult": 0.1001129150390625, + "decay_mult": 0, + }, + "backbone.blocks.4.mlp.layers.1.bias": { + "lr_mult": 0.1001129150390625, + "decay_mult": 0, + }, + "backbone.blocks.4.attn.qkv.weight": { + "lr_mult": 0.1001129150390625, + "decay_mult": 1, + }, + "backbone.blocks.4.attn.proj.weight": { + "lr_mult": 0.1001129150390625, + "decay_mult": 1, + }, + "backbone.blocks.4.mlp.layers.0.0.weight": { + "lr_mult": 0.1001129150390625, + "decay_mult": 1, + }, + "backbone.blocks.4.mlp.layers.1.weight": { + "lr_mult": 0.1001129150390625, + "decay_mult": 1, + }, + "backbone.blocks.5.norm1.weight": {"lr_mult": 0.13348388671875, "decay_mult": 0}, + "backbone.blocks.5.norm1.bias": {"lr_mult": 0.13348388671875, "decay_mult": 0}, + "backbone.blocks.5.attn.q_bias": {"lr_mult": 0.13348388671875, "decay_mult": 0}, + "backbone.blocks.5.attn.v_bias": {"lr_mult": 0.13348388671875, "decay_mult": 0}, + "backbone.blocks.5.attn.proj.bias": {"lr_mult": 0.13348388671875, "decay_mult": 0}, + "backbone.blocks.5.norm2.weight": {"lr_mult": 0.13348388671875, "decay_mult": 0}, + "backbone.blocks.5.norm2.bias": {"lr_mult": 0.13348388671875, "decay_mult": 0}, + "backbone.blocks.5.mlp.layers.0.0.bias": { + "lr_mult": 0.13348388671875, + "decay_mult": 0, + }, + "backbone.blocks.5.mlp.layers.1.bias": { + "lr_mult": 0.13348388671875, + "decay_mult": 0, + }, + "backbone.blocks.5.attn.qkv.weight": {"lr_mult": 0.13348388671875, "decay_mult": 1}, + "backbone.blocks.5.attn.proj.weight": { + "lr_mult": 0.13348388671875, + "decay_mult": 1, + }, + "backbone.blocks.5.mlp.layers.0.0.weight": { + "lr_mult": 0.13348388671875, + "decay_mult": 1, + }, + "backbone.blocks.5.mlp.layers.1.weight": { + "lr_mult": 0.13348388671875, + "decay_mult": 1, + }, + "backbone.blocks.6.norm1.weight": {"lr_mult": 0.177978515625, "decay_mult": 0}, + "backbone.blocks.6.norm1.bias": {"lr_mult": 0.177978515625, "decay_mult": 0}, + "backbone.blocks.6.attn.q_bias": {"lr_mult": 0.177978515625, "decay_mult": 0}, + "backbone.blocks.6.attn.v_bias": {"lr_mult": 0.177978515625, "decay_mult": 0}, + "backbone.blocks.6.attn.proj.bias": {"lr_mult": 0.177978515625, "decay_mult": 0}, + "backbone.blocks.6.norm2.weight": {"lr_mult": 0.177978515625, "decay_mult": 0}, + "backbone.blocks.6.norm2.bias": {"lr_mult": 0.177978515625, "decay_mult": 0}, + "backbone.blocks.6.mlp.layers.0.0.bias": { + "lr_mult": 0.177978515625, + "decay_mult": 0, + }, + "backbone.blocks.6.mlp.layers.1.bias": {"lr_mult": 0.177978515625, "decay_mult": 0}, + "backbone.blocks.6.attn.qkv.weight": {"lr_mult": 0.177978515625, "decay_mult": 1}, + "backbone.blocks.6.attn.proj.weight": {"lr_mult": 0.177978515625, "decay_mult": 1}, + "backbone.blocks.6.mlp.layers.0.0.weight": { + "lr_mult": 0.177978515625, + "decay_mult": 1, + }, + "backbone.blocks.6.mlp.layers.1.weight": { + "lr_mult": 0.177978515625, + "decay_mult": 1, + }, + "backbone.blocks.7.norm1.weight": {"lr_mult": 0.2373046875, "decay_mult": 0}, + "backbone.blocks.7.norm1.bias": {"lr_mult": 0.2373046875, "decay_mult": 0}, + "backbone.blocks.7.attn.q_bias": {"lr_mult": 0.2373046875, "decay_mult": 0}, + "backbone.blocks.7.attn.v_bias": {"lr_mult": 0.2373046875, "decay_mult": 0}, + "backbone.blocks.7.attn.proj.bias": {"lr_mult": 0.2373046875, "decay_mult": 0}, + "backbone.blocks.7.norm2.weight": {"lr_mult": 0.2373046875, "decay_mult": 0}, + "backbone.blocks.7.norm2.bias": {"lr_mult": 0.2373046875, "decay_mult": 0}, + "backbone.blocks.7.mlp.layers.0.0.bias": {"lr_mult": 0.2373046875, "decay_mult": 0}, + "backbone.blocks.7.mlp.layers.1.bias": {"lr_mult": 0.2373046875, "decay_mult": 0}, + "backbone.blocks.7.attn.qkv.weight": {"lr_mult": 0.2373046875, "decay_mult": 1}, + "backbone.blocks.7.attn.proj.weight": {"lr_mult": 0.2373046875, "decay_mult": 1}, + "backbone.blocks.7.mlp.layers.0.0.weight": { + "lr_mult": 0.2373046875, + "decay_mult": 1, + }, + "backbone.blocks.7.mlp.layers.1.weight": {"lr_mult": 0.2373046875, "decay_mult": 1}, + "backbone.blocks.8.norm1.weight": {"lr_mult": 0.31640625, "decay_mult": 0}, + "backbone.blocks.8.norm1.bias": {"lr_mult": 0.31640625, "decay_mult": 0}, + "backbone.blocks.8.attn.q_bias": {"lr_mult": 0.31640625, "decay_mult": 0}, + "backbone.blocks.8.attn.v_bias": {"lr_mult": 0.31640625, "decay_mult": 0}, + "backbone.blocks.8.attn.proj.bias": {"lr_mult": 0.31640625, "decay_mult": 0}, + "backbone.blocks.8.norm2.weight": {"lr_mult": 0.31640625, "decay_mult": 0}, + "backbone.blocks.8.norm2.bias": {"lr_mult": 0.31640625, "decay_mult": 0}, + "backbone.blocks.8.mlp.layers.0.0.bias": {"lr_mult": 0.31640625, "decay_mult": 0}, + "backbone.blocks.8.mlp.layers.1.bias": {"lr_mult": 0.31640625, "decay_mult": 0}, + "backbone.blocks.8.attn.qkv.weight": {"lr_mult": 0.31640625, "decay_mult": 1}, + "backbone.blocks.8.attn.proj.weight": {"lr_mult": 0.31640625, "decay_mult": 1}, + "backbone.blocks.8.mlp.layers.0.0.weight": {"lr_mult": 0.31640625, "decay_mult": 1}, + "backbone.blocks.8.mlp.layers.1.weight": {"lr_mult": 0.31640625, "decay_mult": 1}, + "backbone.blocks.9.norm1.weight": {"lr_mult": 0.421875, "decay_mult": 0}, + "backbone.blocks.9.norm1.bias": {"lr_mult": 0.421875, "decay_mult": 0}, + "backbone.blocks.9.attn.q_bias": {"lr_mult": 0.421875, "decay_mult": 0}, + "backbone.blocks.9.attn.v_bias": {"lr_mult": 0.421875, "decay_mult": 0}, + "backbone.blocks.9.attn.proj.bias": {"lr_mult": 0.421875, "decay_mult": 0}, + "backbone.blocks.9.norm2.weight": {"lr_mult": 0.421875, "decay_mult": 0}, + "backbone.blocks.9.norm2.bias": {"lr_mult": 0.421875, "decay_mult": 0}, + "backbone.blocks.9.mlp.layers.0.0.bias": {"lr_mult": 0.421875, "decay_mult": 0}, + "backbone.blocks.9.mlp.layers.1.bias": {"lr_mult": 0.421875, "decay_mult": 0}, + "backbone.blocks.9.attn.qkv.weight": {"lr_mult": 0.421875, "decay_mult": 1}, + "backbone.blocks.9.attn.proj.weight": {"lr_mult": 0.421875, "decay_mult": 1}, + "backbone.blocks.9.mlp.layers.0.0.weight": {"lr_mult": 0.421875, "decay_mult": 1}, + "backbone.blocks.9.mlp.layers.1.weight": {"lr_mult": 0.421875, "decay_mult": 1}, + "backbone.blocks.10.norm1.weight": {"lr_mult": 0.5625, "decay_mult": 0}, + "backbone.blocks.10.norm1.bias": {"lr_mult": 0.5625, "decay_mult": 0}, + "backbone.blocks.10.attn.q_bias": {"lr_mult": 0.5625, "decay_mult": 0}, + "backbone.blocks.10.attn.v_bias": {"lr_mult": 0.5625, "decay_mult": 0}, + "backbone.blocks.10.attn.proj.bias": {"lr_mult": 0.5625, "decay_mult": 0}, + "backbone.blocks.10.norm2.weight": {"lr_mult": 0.5625, "decay_mult": 0}, + "backbone.blocks.10.norm2.bias": {"lr_mult": 0.5625, "decay_mult": 0}, + "backbone.blocks.10.mlp.layers.0.0.bias": {"lr_mult": 0.5625, "decay_mult": 0}, + "backbone.blocks.10.mlp.layers.1.bias": {"lr_mult": 0.5625, "decay_mult": 0}, + "backbone.blocks.10.attn.qkv.weight": {"lr_mult": 0.5625, "decay_mult": 1}, + "backbone.blocks.10.attn.proj.weight": {"lr_mult": 0.5625, "decay_mult": 1}, + "backbone.blocks.10.mlp.layers.0.0.weight": {"lr_mult": 0.5625, "decay_mult": 1}, + "backbone.blocks.10.mlp.layers.1.weight": {"lr_mult": 0.5625, "decay_mult": 1}, + "backbone.blocks.11.norm1.weight": {"lr_mult": 0.75, "decay_mult": 0}, + "backbone.blocks.11.norm1.bias": {"lr_mult": 0.75, "decay_mult": 0}, + "backbone.blocks.11.attn.q_bias": {"lr_mult": 0.75, "decay_mult": 0}, + "backbone.blocks.11.attn.v_bias": {"lr_mult": 0.75, "decay_mult": 0}, + "backbone.blocks.11.attn.proj.bias": {"lr_mult": 0.75, "decay_mult": 0}, + "backbone.blocks.11.norm2.weight": {"lr_mult": 0.75, "decay_mult": 0}, + "backbone.blocks.11.norm2.bias": {"lr_mult": 0.75, "decay_mult": 0}, + "backbone.blocks.11.mlp.layers.0.0.bias": {"lr_mult": 0.75, "decay_mult": 0}, + "backbone.blocks.11.mlp.layers.1.bias": {"lr_mult": 0.75, "decay_mult": 0}, + "backbone.blocks.11.attn.qkv.weight": {"lr_mult": 0.75, "decay_mult": 1}, + "backbone.blocks.11.attn.proj.weight": {"lr_mult": 0.75, "decay_mult": 1}, + "backbone.blocks.11.mlp.layers.0.0.weight": {"lr_mult": 0.75, "decay_mult": 1}, + "backbone.blocks.11.mlp.layers.1.weight": {"lr_mult": 0.75, "decay_mult": 1}, + "backbone.fc_norm.weight": {"lr_mult": 1.0, "decay_mult": 0}, + "backbone.fc_norm.bias": {"lr_mult": 1.0, "decay_mult": 0}, + "cls_head.fc_cls.bias": {"lr_mult": 1.0, "decay_mult": 0}, + "cls_head.fc_cls.weight": {"lr_mult": 1.0, "decay_mult": 1}, +} + + +optim_wrapper = dict( + type="AmpOptimWrapper", # Automatic Mixed Precision may speed up trainig + optimizer=dict( + type="AdamW", # From VideoMAEv2 repo + lr=7e-4, # From VideoMAEv2 repo + weight_decay=0.05, # From VideoMAEv2 repo + betas=(0.9, 0.999), # From VideoMAEv2 repo + ), + paramwise_cfg=dict(custom_keys=vit_b_layer_decay_75_custom_keys), + # clip_grad=dict(max_norm=5, norm_type=2), # From VideoMAEv2 repo +) + +# VALIDATION CONFIG +val_evaluator = dict( + type="AddAccMetric", + metric_list=( + "unweighted_average_f1", + "per_class_f1", + "per_class_precision", + "per_class_recall", + ), +) +val_cfg = dict(type="ValLoop") + + +# TEST CONFIG +test_evaluator = dict( + type="AddAccMetric", + metric_list=( + "unweighted_average_f1", + "per_class_f1", + "per_class_precision", + "per_class_recall", + ), +) +test_cfg = dict(type="TestLoop") diff --git a/experiments.dvc b/experiments.dvc index 98f6b2a..ff1b824 100644 --- a/experiments.dvc +++ b/experiments.dvc @@ -1,6 +1,6 @@ outs: -- md5: 004d25dfcdbec8b9a95e429079227b93.dir - size: 1022832432 - nfiles: 9 +- md5: 55075530cd6a7d51a35547b6eebafda0.dir + size: 21652483819 + nfiles: 97 hash: md5 path: experiments diff --git a/job_scripts/vit-b_frame-int-8_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams.sh b/job_scripts/vit-b_frame-int-8_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams.sh new file mode 100644 index 0000000..07c096e --- /dev/null +++ b/job_scripts/vit-b_frame-int-8_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +#SBATCH -A NAISS2023-22-1160 -p alvis +#SBATCH -N 1 --gpus-per-node=A40:1 +#SBATCH --time=48:00:00 + +apptainer exec \ + --env PYTHONPATH=$(pwd) \ + containers/c3se_job_container.sif \ + python mmaction2/tools/train.py \ + configs/experiments/vit-b_frame-int-8_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams.py \ No newline at end of file diff --git a/job_scripts/vit-b_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams.sh b/job_scripts/vit-b_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams.sh new file mode 100644 index 0000000..0fafc47 --- /dev/null +++ b/job_scripts/vit-b_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +#SBATCH -A NAISS2023-22-1160 -p alvis +#SBATCH -N 1 --gpus-per-node=A40:1 +#SBATCH --time=48:00:00 + +apptainer exec \ + --env PYTHONPATH=$(pwd) \ + containers/c3se_job_container.sif \ + python mmaction2/tools/train.py \ + configs/experiments/vit-b_gaussian-sampling-5s-clips-30-drop_priority-labeling_k400-hyperparams.py \ No newline at end of file diff --git a/job_scripts/vit-b_gaussian-sampling_priority-labeling_k400-hyperparams.sh b/job_scripts/vit-b_gaussian-sampling_priority-labeling_k400-hyperparams.sh new file mode 100644 index 0000000..fc47fdd --- /dev/null +++ b/job_scripts/vit-b_gaussian-sampling_priority-labeling_k400-hyperparams.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +#SBATCH -A NAISS2023-22-1160 -p alvis +#SBATCH -N 1 --gpus-per-node=A40:1 +#SBATCH --time=24:00:00 + +apptainer exec \ + --env PYTHONPATH=$(pwd) \ + containers/c3se_job_container.sif \ + python mmaction2/tools/train.py \ + configs/experiments/vit-b_gaussian-sampling_priority-labeling_k400-hyperparams.py \ + --resume auto \ No newline at end of file diff --git a/job_scripts/vit-b_gaussian-sampling_priority-labeling_paper-hyperparams.sh b/job_scripts/vit-b_gaussian-sampling_priority-labeling_paper-hyperparams.sh new file mode 100644 index 0000000..ce03cdb --- /dev/null +++ b/job_scripts/vit-b_gaussian-sampling_priority-labeling_paper-hyperparams.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +#SBATCH -A NAISS2023-22-1160 -p alvis +#SBATCH -N 1 --gpus-per-node=A40:4 +#SBATCH --time=24:00:00 + +apptainer exec \ + --env PYTHONPATH=$(pwd) \ + containers/c3se_job_container.sif \ + python -m torch.distributed.launch --nproc_per_node=4 \ + mmaction2/tools/train.py \ + configs/experiments/vit-b_gaussian-sampling_priority-labeling_paper-hyperparams.py \ + --launcher pytorch --resume auto \ No newline at end of file diff --git a/job_scripts/vit-b_gaussian-sampling_priority-labeling_paper-hyperparams_weighted-ce-loss.sh b/job_scripts/vit-b_gaussian-sampling_priority-labeling_paper-hyperparams_weighted-ce-loss.sh new file mode 100644 index 0000000..0603a02 --- /dev/null +++ b/job_scripts/vit-b_gaussian-sampling_priority-labeling_paper-hyperparams_weighted-ce-loss.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +#SBATCH -A NAISS2023-22-1160 -p alvis +#SBATCH -N 1 --gpus-per-node=A40:4 +#SBATCH --time=24:00:00 + +apptainer exec \ + --env PYTHONPATH=$(pwd) \ + containers/c3se_job_container.sif \ + python -m torch.distributed.launch --nproc_per_node=4 \ + mmaction2/tools/train.py \ + configs/experiments/vit-b_gaussian-sampling_priority-labeling_paper-hyperparams_weighted-ce-loss.py \ + --launcher pytorch \ No newline at end of file diff --git a/notebooks/custom_keys_optimizer.ipynb b/notebooks/custom_keys_optimizer.ipynb new file mode 100644 index 0000000..31557cb --- /dev/null +++ b/notebooks/custom_keys_optimizer.ipynb @@ -0,0 +1,938 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Custom Keys Optimizer\n", + "\n", + "Here we create the custom keys dictionary for the runner config.\n", + "It is necessary to get layer decay." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "LAYER_DECAY = 0.75\n", + "MODEL_DEPTH = 12\n", + "BASE_WEIGHT_DECAY = 0.05" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "12/04 22:32:34 - mmengine - \u001b[4m\u001b[97mINFO\u001b[0m - \n", + "------------------------------------------------------------\n", + "System environment:\n", + " sys.platform: darwin\n", + " Python: 3.10.13 | packaged by conda-forge | (main, Oct 26 2023, 18:09:17) [Clang 16.0.6 ]\n", + " CUDA available: False\n", + " numpy_random_seed: 104644062\n", + " GCC: Apple clang version 15.0.0 (clang-1500.0.40.1)\n", + " PyTorch: 2.1.1\n", + " PyTorch compiling details: PyTorch built with:\n", + " - GCC 4.2\n", + " - C++ Version: 201703\n", + " - clang 13.1.6\n", + " - LAPACK is enabled (usually provided by MKL)\n", + " - NNPACK is enabled\n", + " - CPU capability usage: NO AVX\n", + " - Build settings: BLAS_INFO=accelerate, BUILD_TYPE=Release, CXX_COMPILER=/Applications/Xcode_13.3.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++, CXX_FLAGS= -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DLIBKINETO_NOROCTRACER -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DUSE_PYTORCH_METAL_EXPORT -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DUSE_COREML_DELEGATE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=braced-scalar-init -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wvla-extension -Wnewline-eof -Winconsistent-missing-override -Winconsistent-missing-destructor-override -Wno-range-loop-analysis -Wno-pass-failed -Wsuggest-override -Wno-error=pedantic -Wno-error=old-style-cast -Wno-error=inconsistent-missing-override -Wno-error=inconsistent-missing-destructor-override -Wconstant-conversion -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-missing-braces -Wunused-lambda-capture -Qunused-arguments -fcolor-diagnostics -faligned-new -Wno-unused-but-set-variable -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -DUSE_MPS -Wno-unused-private-field -Wno-missing-braces, LAPACK_INFO=accelerate, TORCH_DISABLE_GPU_ASSERTS=OFF, TORCH_VERSION=2.1.1, USE_CUDA=0, USE_CUDNN=OFF, USE_EIGEN_FOR_BLAS=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=OFF, USE_MKLDNN=OFF, USE_MPI=OFF, USE_NCCL=OFF, USE_NNPACK=ON, USE_OPENMP=OFF, USE_ROCM=OFF, \n", + "\n", + " TorchVision: 0.16.1\n", + " OpenCV: 4.8.1\n", + " MMEngine: 0.10.1\n", + "\n", + "Runtime environment:\n", + " cudnn_benchmark: False\n", + " mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0}\n", + " dist_cfg: {'backend': 'nccl'}\n", + " seed: 104644062\n", + " Distributed launcher: none\n", + " Distributed training: False\n", + " GPU number: 1\n", + "------------------------------------------------------------\n", + "\n", + "12/04 22:32:34 - mmengine - \u001b[4m\u001b[97mINFO\u001b[0m - Config:\n", + "ann_file_test = 'data/Fall_Simulation_Data/annotations_test.csv'\n", + "ann_file_train = 'data/Fall_Simulation_Data/annotations_train.csv'\n", + "ann_file_val = 'data/Fall_Simulation_Data/annotations_val.csv'\n", + "custom_hooks = [\n", + " dict(enable=True, type='CustomVisualizationHook'),\n", + "]\n", + "custom_imports = dict(\n", + " allow_failed_imports=False,\n", + " imports=[\n", + " 'datasets',\n", + " 'evaluation',\n", + " 'visualization',\n", + " ])\n", + "dataset_type = 'HighQualityFallDataset'\n", + "default_hooks = dict(\n", + " checkpoint=dict(\n", + " by_epoch=True,\n", + " interval=3,\n", + " max_keep_ckpts=3,\n", + " save_best='auto',\n", + " type='CheckpointHook'),\n", + " logger=dict(type='LoggerHook'),\n", + " param_scheduler=dict(type='ParamSchedulerHook'),\n", + " runtime_info=dict(type='RuntimeInfoHook'),\n", + " sampler_seed=dict(type='DistSamplerSeedHook'),\n", + " sync_buffers=dict(type='SyncBuffersHook'),\n", + " timer=dict(type='IterTimerHook'))\n", + "default_scope = 'mmaction'\n", + "env_cfg = dict(\n", + " cudnn_benchmark=False,\n", + " dist_cfg=dict(backend='nccl'),\n", + " mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))\n", + "label_strategy = dict(\n", + " label_description=dict(\n", + " end_timestamp_names=[\n", + " 'fall_end',\n", + " 'lying_end',\n", + " ],\n", + " names=[\n", + " 'fall',\n", + " 'lying',\n", + " 'other',\n", + " ],\n", + " other_class=2,\n", + " start_timestamp_names=[\n", + " 'fall_start',\n", + " 'lying_start',\n", + " ],\n", + " visible_names=[\n", + " 'fall_visible',\n", + " 'lying_visible',\n", + " ]),\n", + " type='PriorityLabel')\n", + "launcher = 'none'\n", + "load_from = 'weights/vit-small-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_20230510-25c748fd.pth'\n", + "log_level = 'INFO'\n", + "log_processor = dict(by_epoch=True, type='LogProcessor', window_size=10)\n", + "model = dict(\n", + " backbone=dict(\n", + " depth=12,\n", + " drop_path_rate=0.3,\n", + " embed_dims=384,\n", + " img_size=224,\n", + " mlp_ratio=4,\n", + " norm_cfg=dict(eps=1e-06, type='LN'),\n", + " num_frames=16,\n", + " num_heads=6,\n", + " patch_size=16,\n", + " qkv_bias=True,\n", + " type='VisionTransformer'),\n", + " cls_head=dict(\n", + " average_clips='prob',\n", + " in_channels=384,\n", + " num_classes=3,\n", + " topk=(1, ),\n", + " type='TimeSformerHead'),\n", + " data_preprocessor=dict(\n", + " format_shape='NCTHW',\n", + " mean=[\n", + " 102.17311096191406,\n", + " 98.78225708007812,\n", + " 92.68714141845703,\n", + " ],\n", + " std=[\n", + " 58.04566192626953,\n", + " 57.004024505615234,\n", + " 57.3704948425293,\n", + " ],\n", + " type='ActionDataPreprocessor'),\n", + " type='Recognizer3D')\n", + "optim_wrapper = dict(\n", + " clip_grad=dict(max_norm=5, norm_type=2),\n", + " optimizer=dict(\n", + " betas=(\n", + " 0.9,\n", + " 0.999,\n", + " ), lr=0.001, type='AdamW', weight_decay=0.1),\n", + " type='AmpOptimWrapper')\n", + "param_scheduler = [\n", + " dict(\n", + " begin=0,\n", + " by_epoch=True,\n", + " convert_to_iter_based=True,\n", + " end=5,\n", + " end_factor=1,\n", + " start_factor=0.001,\n", + " type='LinearLR'),\n", + " dict(\n", + " begin=5,\n", + " by_epoch=True,\n", + " convert_to_iter_based=True,\n", + " end=35,\n", + " eta_min=1e-06,\n", + " type='CosineAnnealingLR'),\n", + "]\n", + "resume = False\n", + "sampling_strategy = dict(clip_len=10, type='UniformSampling')\n", + "test_cfg = dict(type='TestLoop')\n", + "test_dataloader = dict(\n", + " batch_size=3,\n", + " dataset=dict(\n", + " ann_file='data/Fall_Simulation_Data/annotations_test.csv',\n", + " label_strategy=dict(\n", + " label_description=dict(\n", + " end_timestamp_names=[\n", + " 'fall_end',\n", + " 'lying_end',\n", + " ],\n", + " names=[\n", + " 'fall',\n", + " 'lying',\n", + " 'other',\n", + " ],\n", + " other_class=2,\n", + " start_timestamp_names=[\n", + " 'fall_start',\n", + " 'lying_start',\n", + " ],\n", + " visible_names=[\n", + " 'fall_visible',\n", + " 'lying_visible',\n", + " ]),\n", + " type='PriorityLabel'),\n", + " num_classes=3,\n", + " pipeline=[\n", + " dict(type='DecordInit'),\n", + " dict(\n", + " clip_len=16,\n", + " frame_interval=4,\n", + " num_clips=5,\n", + " test_mode=True,\n", + " type='SampleFrames'),\n", + " dict(type='DecordDecode'),\n", + " dict(scale=(\n", + " -1,\n", + " 224,\n", + " ), type='Resize'),\n", + " dict(crop_size=224, type='ThreeCrop'),\n", + " dict(input_format='NCTHW', type='FormatShape'),\n", + " dict(type='PackActionInputs'),\n", + " ],\n", + " sampling_strategy=dict(clip_len=10, type='UniformSampling'),\n", + " type='HighQualityFallDataset'),\n", + " num_workers=8,\n", + " persistent_workers=True,\n", + " sampler=dict(shuffle=False, type='DefaultSampler'))\n", + "test_evaluator = dict(\n", + " metric_list=(\n", + " 'unweighted_average_f1',\n", + " 'per_class_f1',\n", + " 'per_class_precision',\n", + " 'per_class_recall',\n", + " ),\n", + " type='AddAccMetric')\n", + "test_pipeline = [\n", + " dict(type='DecordInit'),\n", + " dict(\n", + " clip_len=16,\n", + " frame_interval=4,\n", + " num_clips=5,\n", + " test_mode=True,\n", + " type='SampleFrames'),\n", + " dict(type='DecordDecode'),\n", + " dict(scale=(\n", + " -1,\n", + " 224,\n", + " ), type='Resize'),\n", + " dict(crop_size=224, type='ThreeCrop'),\n", + " dict(input_format='NCTHW', type='FormatShape'),\n", + " dict(type='PackActionInputs'),\n", + "]\n", + "train_cfg = dict(max_epochs=35, type='EpochBasedTrainLoop', val_interval=1)\n", + "train_dataloader = dict(\n", + " batch_size=3,\n", + " dataset=dict(\n", + " ann_file='data/Fall_Simulation_Data/annotations_train.csv',\n", + " label_strategy=dict(\n", + " label_description=dict(\n", + " end_timestamp_names=[\n", + " 'fall_end',\n", + " 'lying_end',\n", + " ],\n", + " names=[\n", + " 'fall',\n", + " 'lying',\n", + " 'other',\n", + " ],\n", + " other_class=2,\n", + " start_timestamp_names=[\n", + " 'fall_start',\n", + " 'lying_start',\n", + " ],\n", + " visible_names=[\n", + " 'fall_visible',\n", + " 'lying_visible',\n", + " ]),\n", + " type='PriorityLabel'),\n", + " num_classes=3,\n", + " pipeline=[\n", + " dict(type='DecordInit'),\n", + " dict(type='ClipVideo'),\n", + " dict(\n", + " clip_len=16,\n", + " frame_interval=4,\n", + " num_clips=1,\n", + " type='SampleFrames'),\n", + " dict(type='DecordDecode'),\n", + " dict(scale=(\n", + " -1,\n", + " 224,\n", + " ), type='Resize'),\n", + " dict(size=224, type='RandomCrop'),\n", + " dict(keep_ratio=False, scale=(\n", + " 224,\n", + " 224,\n", + " ), type='Resize'),\n", + " dict(flip_ratio=0.5, type='Flip'),\n", + " dict(input_format='NCTHW', type='FormatShape'),\n", + " dict(type='PackActionInputs'),\n", + " ],\n", + " sampling_strategy=dict(clip_len=10, type='UniformSampling'),\n", + " type='HighQualityFallDataset'),\n", + " num_workers=8,\n", + " persistent_workers=True,\n", + " sampler=dict(shuffle=True, type='DefaultSampler'))\n", + "train_pipeline = [\n", + " dict(type='DecordInit'),\n", + " dict(type='ClipVideo'),\n", + " dict(clip_len=16, frame_interval=4, num_clips=1, type='SampleFrames'),\n", + " dict(type='DecordDecode'),\n", + " dict(scale=(\n", + " -1,\n", + " 224,\n", + " ), type='Resize'),\n", + " dict(size=224, type='RandomCrop'),\n", + " dict(keep_ratio=False, scale=(\n", + " 224,\n", + " 224,\n", + " ), type='Resize'),\n", + " dict(flip_ratio=0.5, type='Flip'),\n", + " dict(input_format='NCTHW', type='FormatShape'),\n", + " dict(type='PackActionInputs'),\n", + "]\n", + "val_cfg = dict(type='ValLoop')\n", + "val_dataloader = dict(\n", + " batch_size=3,\n", + " dataset=dict(\n", + " ann_file='data/Fall_Simulation_Data/annotations_val.csv',\n", + " label_strategy=dict(\n", + " label_description=dict(\n", + " end_timestamp_names=[\n", + " 'fall_end',\n", + " 'lying_end',\n", + " ],\n", + " names=[\n", + " 'fall',\n", + " 'lying',\n", + " 'other',\n", + " ],\n", + " other_class=2,\n", + " start_timestamp_names=[\n", + " 'fall_start',\n", + " 'lying_start',\n", + " ],\n", + " visible_names=[\n", + " 'fall_visible',\n", + " 'lying_visible',\n", + " ]),\n", + " type='PriorityLabel'),\n", + " num_classes=3,\n", + " pipeline=[\n", + " dict(type='DecordInit'),\n", + " dict(type='ClipVideo'),\n", + " dict(\n", + " clip_len=16,\n", + " frame_interval=4,\n", + " num_clips=1,\n", + " test_mode=True,\n", + " type='SampleFrames'),\n", + " dict(type='DecordDecode'),\n", + " dict(scale=(\n", + " -1,\n", + " 224,\n", + " ), type='Resize'),\n", + " dict(crop_size=224, type='CenterCrop'),\n", + " dict(input_format='NCTHW', type='FormatShape'),\n", + " dict(type='PackActionInputs'),\n", + " ],\n", + " sampling_strategy=dict(clip_len=10, type='UniformSampling'),\n", + " type='HighQualityFallDataset'),\n", + " num_workers=8,\n", + " persistent_workers=True,\n", + " sampler=dict(shuffle=False, type='DefaultSampler'))\n", + "val_evaluator = dict(\n", + " metric_list=(\n", + " 'unweighted_average_f1',\n", + " 'per_class_f1',\n", + " 'per_class_precision',\n", + " 'per_class_recall',\n", + " ),\n", + " type='AddAccMetric')\n", + "val_pipeline = [\n", + " dict(type='DecordInit'),\n", + " dict(type='ClipVideo'),\n", + " dict(\n", + " clip_len=16,\n", + " frame_interval=4,\n", + " num_clips=1,\n", + " test_mode=True,\n", + " type='SampleFrames'),\n", + " dict(type='DecordDecode'),\n", + " dict(scale=(\n", + " -1,\n", + " 224,\n", + " ), type='Resize'),\n", + " dict(crop_size=224, type='CenterCrop'),\n", + " dict(input_format='NCTHW', type='FormatShape'),\n", + " dict(type='PackActionInputs'),\n", + "]\n", + "vis_backends = dict(\n", + " save_dir='experiments/tensorboard', type='TensorboardVisBackend')\n", + "visualizer = dict(\n", + " type='ActionVisualizer',\n", + " vis_backends=dict(\n", + " save_dir='experiments/tensorboard', type='TensorboardVisBackend'))\n", + "work_dir = 'experiments'\n", + "\n", + "12/04 22:32:34 - mmengine - \u001b[4m\u001b[97mINFO\u001b[0m - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used.\n", + "12/04 22:32:34 - mmengine - \u001b[4m\u001b[97mINFO\u001b[0m - Hooks will be executed in the following order:\n", + "before_run:\n", + "(VERY_HIGH ) RuntimeInfoHook \n", + "(BELOW_NORMAL) LoggerHook \n", + " -------------------- \n", + "before_train:\n", + "(VERY_HIGH ) RuntimeInfoHook \n", + "(NORMAL ) IterTimerHook \n", + "(VERY_LOW ) CheckpointHook \n", + " -------------------- \n", + "before_train_epoch:\n", + "(VERY_HIGH ) RuntimeInfoHook \n", + "(NORMAL ) IterTimerHook \n", + "(NORMAL ) DistSamplerSeedHook \n", + " -------------------- \n", + "before_train_iter:\n", + "(VERY_HIGH ) RuntimeInfoHook \n", + "(NORMAL ) IterTimerHook \n", + " -------------------- \n", + "after_train_iter:\n", + "(VERY_HIGH ) RuntimeInfoHook \n", + "(NORMAL ) IterTimerHook \n", + "(BELOW_NORMAL) LoggerHook \n", + "(LOW ) ParamSchedulerHook \n", + "(VERY_LOW ) CheckpointHook \n", + " -------------------- \n", + "after_train_epoch:\n", + "(NORMAL ) IterTimerHook \n", + "(NORMAL ) SyncBuffersHook \n", + "(LOW ) ParamSchedulerHook \n", + "(VERY_LOW ) CheckpointHook \n", + " -------------------- \n", + "before_val:\n", + "(VERY_HIGH ) RuntimeInfoHook \n", + " -------------------- \n", + "before_val_epoch:\n", + "(NORMAL ) IterTimerHook \n", + "(NORMAL ) SyncBuffersHook \n", + " -------------------- \n", + "before_val_iter:\n", + "(NORMAL ) IterTimerHook \n", + " -------------------- \n", + "after_val_iter:\n", + "(NORMAL ) IterTimerHook \n", + "(NORMAL ) CustomVisualizationHook \n", + "(BELOW_NORMAL) LoggerHook \n", + " -------------------- \n", + "after_val_epoch:\n", + "(VERY_HIGH ) RuntimeInfoHook \n", + "(NORMAL ) IterTimerHook \n", + "(BELOW_NORMAL) LoggerHook \n", + "(LOW ) ParamSchedulerHook \n", + "(VERY_LOW ) CheckpointHook \n", + " -------------------- \n", + "after_val:\n", + "(VERY_HIGH ) RuntimeInfoHook \n", + " -------------------- \n", + "after_train:\n", + "(VERY_HIGH ) RuntimeInfoHook \n", + "(VERY_LOW ) CheckpointHook \n", + " -------------------- \n", + "before_test:\n", + "(VERY_HIGH ) RuntimeInfoHook \n", + " -------------------- \n", + "before_test_epoch:\n", + "(NORMAL ) IterTimerHook \n", + " -------------------- \n", + "before_test_iter:\n", + "(NORMAL ) IterTimerHook \n", + " -------------------- \n", + "after_test_iter:\n", + "(NORMAL ) IterTimerHook \n", + "(NORMAL ) CustomVisualizationHook \n", + "(BELOW_NORMAL) LoggerHook \n", + " -------------------- \n", + "after_test_epoch:\n", + "(VERY_HIGH ) RuntimeInfoHook \n", + "(NORMAL ) IterTimerHook \n", + "(BELOW_NORMAL) LoggerHook \n", + " -------------------- \n", + "after_test:\n", + "(VERY_HIGH ) RuntimeInfoHook \n", + " -------------------- \n", + "after_run:\n", + "(BELOW_NORMAL) LoggerHook \n", + " -------------------- \n" + ] + } + ], + "source": [ + "from mmengine.runner import Runner\n", + "from mmengine.config import Config\n", + "\n", + "runner_cfg = Config.fromfile(\n", + " \"configs/models/vit-s-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_base.py\"\n", + ")\n", + "runner = Runner.from_cfg(runner_cfg)\n", + "model = runner.model" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Functions from the VideoMAE repo\n", + "\n", + "\n", + "def get_num_layer_for_vit(var_name, num_max_layer):\n", + " if var_name in (\"backbone.cls_token\", \"backbone.mask_token\", \"backbone.pos_embed\"):\n", + " return 0\n", + " elif var_name.startswith(\"backbone.patch_embed\"):\n", + " return 0\n", + " elif var_name.startswith(\"backbone.rel_pos_bias\"):\n", + " return num_max_layer - 1\n", + " elif var_name.startswith(\"backbone.blocks\"):\n", + " layer_id = int(var_name.split(\".\")[2])\n", + " return layer_id + 1\n", + " else:\n", + " return num_max_layer - 1\n", + "\n", + "\n", + "class LayerDecayValueAssigner(object):\n", + " def __init__(self, values):\n", + " self.values = values\n", + "\n", + " def get_scale(self, layer_id):\n", + " return self.values[layer_id]\n", + "\n", + " def get_layer_id(self, var_name):\n", + " return get_num_layer_for_vit(var_name, len(self.values))\n", + "\n", + "\n", + "def get_parameter_groups(\n", + " model, weight_decay=1e-5, skip_list=(), get_num_layer=None, get_layer_scale=None\n", + "):\n", + " parameter_group_names = {}\n", + " parameter_group_vars = {}\n", + "\n", + " for name, param in model.named_parameters():\n", + " if not param.requires_grad:\n", + " continue # frozen weights\n", + " if (\n", + " len(param.shape) == 1\n", + " or name.endswith(\".bias\")\n", + " or name.endswith(\".scale\")\n", + " or name in skip_list\n", + " ):\n", + " group_name = \"no_decay\"\n", + " this_weight_decay = 0.0\n", + " else:\n", + " group_name = \"decay\"\n", + " this_weight_decay = weight_decay\n", + " if get_num_layer is not None:\n", + " layer_id = get_num_layer(name)\n", + " group_name = \"layer_%d_%s\" % (layer_id, group_name)\n", + " else:\n", + " layer_id = None\n", + "\n", + " if group_name not in parameter_group_names:\n", + " if get_layer_scale is not None:\n", + " scale = get_layer_scale(layer_id)\n", + " else:\n", + " scale = 1.0\n", + "\n", + " parameter_group_names[group_name] = {\n", + " \"weight_decay\": this_weight_decay,\n", + " \"params\": [],\n", + " \"lr_scale\": scale,\n", + " }\n", + " parameter_group_vars[group_name] = {\n", + " \"weight_decay\": this_weight_decay,\n", + " \"params\": [],\n", + " \"lr_scale\": scale,\n", + " }\n", + "\n", + " parameter_group_vars[group_name][\"params\"].append(param)\n", + " parameter_group_names[group_name][\"params\"].append(name)\n", + "\n", + " return parameter_group_names" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# Get the parameter groups from VideoMAE\n", + "\n", + "assigner = LayerDecayValueAssigner(\n", + " list(LAYER_DECAY ** (MODEL_DEPTH + 1 - i) for i in range(MODEL_DEPTH + 2))\n", + ")\n", + "\n", + "groups = get_parameter_groups(\n", + " model,\n", + " BASE_WEIGHT_DECAY,\n", + " get_num_layer=assigner.get_layer_id,\n", + " get_layer_scale=assigner.get_scale,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'backbone.patch_embed.projection.weight': {'lr_mult': 0.023757264018058777,\n", + " 'decay_mult': 1},\n", + " 'backbone.patch_embed.projection.bias': {'lr_mult': 0.023757264018058777,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.0.norm1.weight': {'lr_mult': 0.03167635202407837,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.0.norm1.bias': {'lr_mult': 0.03167635202407837,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.0.attn.q_bias': {'lr_mult': 0.03167635202407837,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.0.attn.v_bias': {'lr_mult': 0.03167635202407837,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.0.attn.proj.bias': {'lr_mult': 0.03167635202407837,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.0.norm2.weight': {'lr_mult': 0.03167635202407837,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.0.norm2.bias': {'lr_mult': 0.03167635202407837,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.0.mlp.layers.0.0.bias': {'lr_mult': 0.03167635202407837,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.0.mlp.layers.1.bias': {'lr_mult': 0.03167635202407837,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.0.attn.qkv.weight': {'lr_mult': 0.03167635202407837,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.0.attn.proj.weight': {'lr_mult': 0.03167635202407837,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.0.mlp.layers.0.0.weight': {'lr_mult': 0.03167635202407837,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.0.mlp.layers.1.weight': {'lr_mult': 0.03167635202407837,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.1.norm1.weight': {'lr_mult': 0.04223513603210449,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.1.norm1.bias': {'lr_mult': 0.04223513603210449,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.1.attn.q_bias': {'lr_mult': 0.04223513603210449,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.1.attn.v_bias': {'lr_mult': 0.04223513603210449,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.1.attn.proj.bias': {'lr_mult': 0.04223513603210449,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.1.norm2.weight': {'lr_mult': 0.04223513603210449,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.1.norm2.bias': {'lr_mult': 0.04223513603210449,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.1.mlp.layers.0.0.bias': {'lr_mult': 0.04223513603210449,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.1.mlp.layers.1.bias': {'lr_mult': 0.04223513603210449,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.1.attn.qkv.weight': {'lr_mult': 0.04223513603210449,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.1.attn.proj.weight': {'lr_mult': 0.04223513603210449,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.1.mlp.layers.0.0.weight': {'lr_mult': 0.04223513603210449,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.1.mlp.layers.1.weight': {'lr_mult': 0.04223513603210449,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.2.norm1.weight': {'lr_mult': 0.056313514709472656,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.2.norm1.bias': {'lr_mult': 0.056313514709472656,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.2.attn.q_bias': {'lr_mult': 0.056313514709472656,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.2.attn.v_bias': {'lr_mult': 0.056313514709472656,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.2.attn.proj.bias': {'lr_mult': 0.056313514709472656,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.2.norm2.weight': {'lr_mult': 0.056313514709472656,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.2.norm2.bias': {'lr_mult': 0.056313514709472656,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.2.mlp.layers.0.0.bias': {'lr_mult': 0.056313514709472656,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.2.mlp.layers.1.bias': {'lr_mult': 0.056313514709472656,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.2.attn.qkv.weight': {'lr_mult': 0.056313514709472656,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.2.attn.proj.weight': {'lr_mult': 0.056313514709472656,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.2.mlp.layers.0.0.weight': {'lr_mult': 0.056313514709472656,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.2.mlp.layers.1.weight': {'lr_mult': 0.056313514709472656,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.3.norm1.weight': {'lr_mult': 0.07508468627929688,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.3.norm1.bias': {'lr_mult': 0.07508468627929688,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.3.attn.q_bias': {'lr_mult': 0.07508468627929688,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.3.attn.v_bias': {'lr_mult': 0.07508468627929688,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.3.attn.proj.bias': {'lr_mult': 0.07508468627929688,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.3.norm2.weight': {'lr_mult': 0.07508468627929688,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.3.norm2.bias': {'lr_mult': 0.07508468627929688,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.3.mlp.layers.0.0.bias': {'lr_mult': 0.07508468627929688,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.3.mlp.layers.1.bias': {'lr_mult': 0.07508468627929688,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.3.attn.qkv.weight': {'lr_mult': 0.07508468627929688,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.3.attn.proj.weight': {'lr_mult': 0.07508468627929688,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.3.mlp.layers.0.0.weight': {'lr_mult': 0.07508468627929688,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.3.mlp.layers.1.weight': {'lr_mult': 0.07508468627929688,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.4.norm1.weight': {'lr_mult': 0.1001129150390625,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.4.norm1.bias': {'lr_mult': 0.1001129150390625,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.4.attn.q_bias': {'lr_mult': 0.1001129150390625,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.4.attn.v_bias': {'lr_mult': 0.1001129150390625,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.4.attn.proj.bias': {'lr_mult': 0.1001129150390625,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.4.norm2.weight': {'lr_mult': 0.1001129150390625,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.4.norm2.bias': {'lr_mult': 0.1001129150390625,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.4.mlp.layers.0.0.bias': {'lr_mult': 0.1001129150390625,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.4.mlp.layers.1.bias': {'lr_mult': 0.1001129150390625,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.4.attn.qkv.weight': {'lr_mult': 0.1001129150390625,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.4.attn.proj.weight': {'lr_mult': 0.1001129150390625,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.4.mlp.layers.0.0.weight': {'lr_mult': 0.1001129150390625,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.4.mlp.layers.1.weight': {'lr_mult': 0.1001129150390625,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.5.norm1.weight': {'lr_mult': 0.13348388671875,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.5.norm1.bias': {'lr_mult': 0.13348388671875,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.5.attn.q_bias': {'lr_mult': 0.13348388671875,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.5.attn.v_bias': {'lr_mult': 0.13348388671875,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.5.attn.proj.bias': {'lr_mult': 0.13348388671875,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.5.norm2.weight': {'lr_mult': 0.13348388671875,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.5.norm2.bias': {'lr_mult': 0.13348388671875,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.5.mlp.layers.0.0.bias': {'lr_mult': 0.13348388671875,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.5.mlp.layers.1.bias': {'lr_mult': 0.13348388671875,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.5.attn.qkv.weight': {'lr_mult': 0.13348388671875,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.5.attn.proj.weight': {'lr_mult': 0.13348388671875,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.5.mlp.layers.0.0.weight': {'lr_mult': 0.13348388671875,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.5.mlp.layers.1.weight': {'lr_mult': 0.13348388671875,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.6.norm1.weight': {'lr_mult': 0.177978515625,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.6.norm1.bias': {'lr_mult': 0.177978515625, 'decay_mult': 0},\n", + " 'backbone.blocks.6.attn.q_bias': {'lr_mult': 0.177978515625, 'decay_mult': 0},\n", + " 'backbone.blocks.6.attn.v_bias': {'lr_mult': 0.177978515625, 'decay_mult': 0},\n", + " 'backbone.blocks.6.attn.proj.bias': {'lr_mult': 0.177978515625,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.6.norm2.weight': {'lr_mult': 0.177978515625,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.6.norm2.bias': {'lr_mult': 0.177978515625, 'decay_mult': 0},\n", + " 'backbone.blocks.6.mlp.layers.0.0.bias': {'lr_mult': 0.177978515625,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.6.mlp.layers.1.bias': {'lr_mult': 0.177978515625,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.6.attn.qkv.weight': {'lr_mult': 0.177978515625,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.6.attn.proj.weight': {'lr_mult': 0.177978515625,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.6.mlp.layers.0.0.weight': {'lr_mult': 0.177978515625,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.6.mlp.layers.1.weight': {'lr_mult': 0.177978515625,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.7.norm1.weight': {'lr_mult': 0.2373046875, 'decay_mult': 0},\n", + " 'backbone.blocks.7.norm1.bias': {'lr_mult': 0.2373046875, 'decay_mult': 0},\n", + " 'backbone.blocks.7.attn.q_bias': {'lr_mult': 0.2373046875, 'decay_mult': 0},\n", + " 'backbone.blocks.7.attn.v_bias': {'lr_mult': 0.2373046875, 'decay_mult': 0},\n", + " 'backbone.blocks.7.attn.proj.bias': {'lr_mult': 0.2373046875,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.7.norm2.weight': {'lr_mult': 0.2373046875, 'decay_mult': 0},\n", + " 'backbone.blocks.7.norm2.bias': {'lr_mult': 0.2373046875, 'decay_mult': 0},\n", + " 'backbone.blocks.7.mlp.layers.0.0.bias': {'lr_mult': 0.2373046875,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.7.mlp.layers.1.bias': {'lr_mult': 0.2373046875,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.7.attn.qkv.weight': {'lr_mult': 0.2373046875,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.7.attn.proj.weight': {'lr_mult': 0.2373046875,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.7.mlp.layers.0.0.weight': {'lr_mult': 0.2373046875,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.7.mlp.layers.1.weight': {'lr_mult': 0.2373046875,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.8.norm1.weight': {'lr_mult': 0.31640625, 'decay_mult': 0},\n", + " 'backbone.blocks.8.norm1.bias': {'lr_mult': 0.31640625, 'decay_mult': 0},\n", + " 'backbone.blocks.8.attn.q_bias': {'lr_mult': 0.31640625, 'decay_mult': 0},\n", + " 'backbone.blocks.8.attn.v_bias': {'lr_mult': 0.31640625, 'decay_mult': 0},\n", + " 'backbone.blocks.8.attn.proj.bias': {'lr_mult': 0.31640625, 'decay_mult': 0},\n", + " 'backbone.blocks.8.norm2.weight': {'lr_mult': 0.31640625, 'decay_mult': 0},\n", + " 'backbone.blocks.8.norm2.bias': {'lr_mult': 0.31640625, 'decay_mult': 0},\n", + " 'backbone.blocks.8.mlp.layers.0.0.bias': {'lr_mult': 0.31640625,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.8.mlp.layers.1.bias': {'lr_mult': 0.31640625,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.8.attn.qkv.weight': {'lr_mult': 0.31640625, 'decay_mult': 1},\n", + " 'backbone.blocks.8.attn.proj.weight': {'lr_mult': 0.31640625,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.8.mlp.layers.0.0.weight': {'lr_mult': 0.31640625,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.8.mlp.layers.1.weight': {'lr_mult': 0.31640625,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.9.norm1.weight': {'lr_mult': 0.421875, 'decay_mult': 0},\n", + " 'backbone.blocks.9.norm1.bias': {'lr_mult': 0.421875, 'decay_mult': 0},\n", + " 'backbone.blocks.9.attn.q_bias': {'lr_mult': 0.421875, 'decay_mult': 0},\n", + " 'backbone.blocks.9.attn.v_bias': {'lr_mult': 0.421875, 'decay_mult': 0},\n", + " 'backbone.blocks.9.attn.proj.bias': {'lr_mult': 0.421875, 'decay_mult': 0},\n", + " 'backbone.blocks.9.norm2.weight': {'lr_mult': 0.421875, 'decay_mult': 0},\n", + " 'backbone.blocks.9.norm2.bias': {'lr_mult': 0.421875, 'decay_mult': 0},\n", + " 'backbone.blocks.9.mlp.layers.0.0.bias': {'lr_mult': 0.421875,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.9.mlp.layers.1.bias': {'lr_mult': 0.421875, 'decay_mult': 0},\n", + " 'backbone.blocks.9.attn.qkv.weight': {'lr_mult': 0.421875, 'decay_mult': 1},\n", + " 'backbone.blocks.9.attn.proj.weight': {'lr_mult': 0.421875, 'decay_mult': 1},\n", + " 'backbone.blocks.9.mlp.layers.0.0.weight': {'lr_mult': 0.421875,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.9.mlp.layers.1.weight': {'lr_mult': 0.421875,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.10.norm1.weight': {'lr_mult': 0.5625, 'decay_mult': 0},\n", + " 'backbone.blocks.10.norm1.bias': {'lr_mult': 0.5625, 'decay_mult': 0},\n", + " 'backbone.blocks.10.attn.q_bias': {'lr_mult': 0.5625, 'decay_mult': 0},\n", + " 'backbone.blocks.10.attn.v_bias': {'lr_mult': 0.5625, 'decay_mult': 0},\n", + " 'backbone.blocks.10.attn.proj.bias': {'lr_mult': 0.5625, 'decay_mult': 0},\n", + " 'backbone.blocks.10.norm2.weight': {'lr_mult': 0.5625, 'decay_mult': 0},\n", + " 'backbone.blocks.10.norm2.bias': {'lr_mult': 0.5625, 'decay_mult': 0},\n", + " 'backbone.blocks.10.mlp.layers.0.0.bias': {'lr_mult': 0.5625,\n", + " 'decay_mult': 0},\n", + " 'backbone.blocks.10.mlp.layers.1.bias': {'lr_mult': 0.5625, 'decay_mult': 0},\n", + " 'backbone.blocks.10.attn.qkv.weight': {'lr_mult': 0.5625, 'decay_mult': 1},\n", + " 'backbone.blocks.10.attn.proj.weight': {'lr_mult': 0.5625, 'decay_mult': 1},\n", + " 'backbone.blocks.10.mlp.layers.0.0.weight': {'lr_mult': 0.5625,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.10.mlp.layers.1.weight': {'lr_mult': 0.5625,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.11.norm1.weight': {'lr_mult': 0.75, 'decay_mult': 0},\n", + " 'backbone.blocks.11.norm1.bias': {'lr_mult': 0.75, 'decay_mult': 0},\n", + " 'backbone.blocks.11.attn.q_bias': {'lr_mult': 0.75, 'decay_mult': 0},\n", + " 'backbone.blocks.11.attn.v_bias': {'lr_mult': 0.75, 'decay_mult': 0},\n", + " 'backbone.blocks.11.attn.proj.bias': {'lr_mult': 0.75, 'decay_mult': 0},\n", + " 'backbone.blocks.11.norm2.weight': {'lr_mult': 0.75, 'decay_mult': 0},\n", + " 'backbone.blocks.11.norm2.bias': {'lr_mult': 0.75, 'decay_mult': 0},\n", + " 'backbone.blocks.11.mlp.layers.0.0.bias': {'lr_mult': 0.75, 'decay_mult': 0},\n", + " 'backbone.blocks.11.mlp.layers.1.bias': {'lr_mult': 0.75, 'decay_mult': 0},\n", + " 'backbone.blocks.11.attn.qkv.weight': {'lr_mult': 0.75, 'decay_mult': 1},\n", + " 'backbone.blocks.11.attn.proj.weight': {'lr_mult': 0.75, 'decay_mult': 1},\n", + " 'backbone.blocks.11.mlp.layers.0.0.weight': {'lr_mult': 0.75,\n", + " 'decay_mult': 1},\n", + " 'backbone.blocks.11.mlp.layers.1.weight': {'lr_mult': 0.75, 'decay_mult': 1},\n", + " 'backbone.fc_norm.weight': {'lr_mult': 1.0, 'decay_mult': 0},\n", + " 'backbone.fc_norm.bias': {'lr_mult': 1.0, 'decay_mult': 0},\n", + " 'cls_head.fc_cls.bias': {'lr_mult': 1.0, 'decay_mult': 0},\n", + " 'cls_head.fc_cls.weight': {'lr_mult': 1.0, 'decay_mult': 1}}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Convert the parameter groups to the format used by mmaction\n", + "\n", + "custom_keys = {}\n", + "for _, group in groups.items():\n", + " decay_mult = 0 if group[\"weight_decay\"] == 0 else 1\n", + " params = group[\"params\"]\n", + " lr_mult = group[\"lr_scale\"]\n", + " for param in params:\n", + " custom_keys[param] = {\"lr_mult\": lr_mult, \"decay_mult\": decay_mult}\n", + "\n", + "custom_keys" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "human-fall-detection", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/dataset_label_analysis.ipynb b/notebooks/dataset_label_analysis.ipynb index dbc73a3..d4d6287 100644 --- a/notebooks/dataset_label_analysis.ipynb +++ b/notebooks/dataset_label_analysis.ipynb @@ -1,353 +1,837 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Label Analysis\n", - "\n", - "In this notebook we analyse the datsets and label distributions we get for different settings for sampling and labeling strategy." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "%reload_ext autoreload\n", - "%autoreload 2\n", - "import re\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "from datasets import HighQualityFallDataset\n", - "from datasets.transforms.label_strategy import HQFD_LABEL_DESCRIPTION, PriorityLabel\n", - "from datasets.transforms.sampling_strategy import GaussianSampling, UniformSampling" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
filenamelabelintervalsample_idxmodalitystart_indexlabel_namevideo_category
0data/Fall_Simulation_Data/videos/ADL17_Cam1.avi2(30.0, 40.0)0RGB0OtherADL
1data/Fall_Simulation_Data/videos/ADL17_Cam1.avi2(50.0, 60.0)1RGB0OtherADL
2data/Fall_Simulation_Data/videos/ADL17_Cam1.avi2(60.0, 70.0)2RGB0OtherADL
3data/Fall_Simulation_Data/videos/ADL17_Cam1.avi2(90.0, 100.0)3RGB0OtherADL
4data/Fall_Simulation_Data/videos/ADL17_Cam1.avi2(160.0, 170.0)4RGB0OtherADL
\n", - "
" - ], - "text/plain": [ - " filename label interval \\\n", - "0 data/Fall_Simulation_Data/videos/ADL17_Cam1.avi 2 (30.0, 40.0) \n", - "1 data/Fall_Simulation_Data/videos/ADL17_Cam1.avi 2 (50.0, 60.0) \n", - "2 data/Fall_Simulation_Data/videos/ADL17_Cam1.avi 2 (60.0, 70.0) \n", - "3 data/Fall_Simulation_Data/videos/ADL17_Cam1.avi 2 (90.0, 100.0) \n", - "4 data/Fall_Simulation_Data/videos/ADL17_Cam1.avi 2 (160.0, 170.0) \n", - "\n", - " sample_idx modality start_index label_name video_category \n", - "0 0 RGB 0 Other ADL \n", - "1 1 RGB 0 Other ADL \n", - "2 2 RGB 0 Other ADL \n", - "3 3 RGB 0 Other ADL \n", - "4 4 RGB 0 Other ADL " - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ANN_FILE = \"data/Fall_Simulation_Data/annotations.csv\"\n", - "\n", - "uniform_sampling = UniformSampling(clip_len=10, stride=0, overlap=False)\n", - "gaussian_sampling = GaussianSampling(\n", - " clip_len=10, n_samples_per_sec=None, fallback_sampler=None, std=None\n", - ")\n", - "label_strategy = PriorityLabel(\n", - " label_description=HQFD_LABEL_DESCRIPTION,\n", - " threshold=0.0,\n", - " absolute_threshold=False,\n", - " priority=[0, 1, 2],\n", - ")\n", - "\n", - "hqfd = HighQualityFallDataset(\n", - " ann_file=ANN_FILE,\n", - " sampling_strategy=gaussian_sampling,\n", - " label_strategy=label_strategy,\n", - " pipeline=[],\n", - " num_classes=3,\n", - " test_mode=False,\n", - " drop_ratios=[0.0, 0.0, 0.75],\n", - ")\n", - "\n", - "df_hqfd = pd.DataFrame(list(hqfd))\n", - "class_names = [\"Fall\", \"Lying\", \"Other\"]\n", - "df_hqfd[\"label_name\"] = df_hqfd[\"label\"].apply(lambda x: class_names[x])\n", - "\n", - "\n", - "def extract_category(filename):\n", - " match = re.search(r\"(ADL|Fall)\", filename.split(\"/\")[-1])\n", - " return match.group(1) if match else None\n", - "\n", - "\n", - "df_hqfd[\"video_category\"] = df_hqfd[\"filename\"].apply(extract_category)\n", - "\n", - "df_hqfd.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_label_dist(df_hqfd):\n", - " display(\"---- Label Distribution ----\")\n", - " display(df_hqfd[\"label_name\"].value_counts().sort_index() / len(df_hqfd))\n", - "\n", - " display(\"---- Label Counts ----\")\n", - " display(df_hqfd[\"label_name\"].value_counts().sort_index())\n", - " df_hqfd[\"label_name\"].value_counts().sort_index().plot(kind=\"bar\")\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'---- Label Distribution ----'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "label_name\n", - "Fall 0.288679\n", - "Lying 0.204657\n", - "Other 0.506664\n", - "Name: count, dtype: float64" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'---- Label Counts ----'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "label_name\n", - "Fall 1711\n", - "Lying 1213\n", - "Other 3003\n", - "Name: count, dtype: int64" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plot_label_dist(df_hqfd)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'---- Label Distribution ----'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "label_name\n", - "Fall 0.525330\n", - "Lying 0.372429\n", - "Other 0.102241\n", - "Name: count, dtype: float64" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'---- Label Counts ----'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "label_name\n", - "Fall 1711\n", - "Lying 1213\n", - "Other 333\n", - "Name: count, dtype: int64" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plot_label_dist(df_hqfd[df_hqfd[\"video_category\"] == \"Fall\"])" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "human-fall-detection", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Label Analysis\n", + "\n", + "In this notebook we analyse the datsets and label distributions we get for different settings for sampling and labeling strategy." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%reload_ext autoreload\n", + "%autoreload 2\n", + "import re\n", + "import shutil\n", + "from pathlib import Path\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from datasets import HighQualityFallDataset\n", + "from datasets.transforms.label_strategy import HQFD_LABEL_DESCRIPTION, PriorityLabel\n", + "from datasets.transforms.sampling_strategy import GaussianSampling, UniformSampling" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from mmaction.datasets.transforms import (\n", + " DecordInit,\n", + " DecordDecode,\n", + " SampleFrames,\n", + " Resize,\n", + " RandomCrop,\n", + " Flip,\n", + " FormatShape,\n", + " PackActionInputs,\n", + " CenterCrop,\n", + ")\n", + "from datasets.transforms import ClipVideo\n", + "\n", + "# Train pipeline\n", + "train_pipeline = [\n", + " DecordInit(),\n", + " ClipVideo(),\n", + " SampleFrames(clip_len=16, frame_interval=8, num_clips=1),\n", + " DecordDecode(),\n", + " Resize(scale=(-1, 224)),\n", + " RandomCrop(size=224),\n", + " Resize(scale=(224, 224), keep_ratio=False),\n", + " Flip(flip_ratio=0.5),\n", + " FormatShape(input_format=\"NCTHW\"),\n", + " PackActionInputs(),\n", + "]\n", + "\n", + "val_pipeline = [\n", + " DecordInit(),\n", + " ClipVideo(),\n", + " SampleFrames(clip_len=16, frame_interval=8, num_clips=1, test_mode=True),\n", + " DecordDecode(),\n", + " Resize(scale=(-1, 224)),\n", + " CenterCrop(crop_size=224),\n", + " FormatShape(input_format=\"NCTHW\"),\n", + " PackActionInputs(),\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "val_ann_file = \"data/Fall_Simulation_Data/annotations_val.csv\"\n", + "train_ann_file = \"data/Fall_Simulation_Data/annotations_train.csv\"\n", + "\n", + "np.random.seed(42)\n", + "\n", + "gaussian_sampling = GaussianSampling(\n", + " clip_len=5,\n", + " n_samples_per_sec=None,\n", + " fallback_sampler=UniformSampling(clip_len=5, stride=5, overlap=False),\n", + " std=None,\n", + ")\n", + "label_strategy = PriorityLabel(\n", + " label_description=HQFD_LABEL_DESCRIPTION,\n", + " threshold=0,\n", + " absolute_threshold=False,\n", + " priority=[0, 1, 2],\n", + ")\n", + "\n", + "uniform_sampling = UniformSampling(clip_len=5, stride=0, overlap=False)\n", + "val_hqfd = HighQualityFallDataset(\n", + " ann_file=val_ann_file,\n", + " sampling_strategy=uniform_sampling,\n", + " label_strategy=label_strategy,\n", + " pipeline=val_pipeline,\n", + " num_classes=3,\n", + " test_mode=True,\n", + ")\n", + "\n", + "train_hqfd = HighQualityFallDataset(\n", + " ann_file=train_ann_file,\n", + " sampling_strategy=gaussian_sampling,\n", + " label_strategy=label_strategy,\n", + " pipeline=train_pipeline,\n", + " num_classes=3,\n", + " test_mode=False,\n", + " drop_ratios=[0.0, 0.0, 0.30],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def analyse_dataset(ds):\n", + " df_hqfd = convert_ds_to_df(ds)\n", + " display(\"------- Full Dataset -------\")\n", + " plot_label_dist(df_hqfd)\n", + " display(\"------- Only Fall videos -------\")\n", + " plot_label_dist(df_hqfd[df_hqfd[\"video_category\"] == \"Fall\"])\n", + "\n", + "\n", + "def convert_ds_to_df(ds: HighQualityFallDataset):\n", + " np.random.seed(42)\n", + " df_hqfd = pd.DataFrame(ds.load_data_list())\n", + " class_names = [\"Fall\", \"Lying\", \"Other\"]\n", + " df_hqfd[\"label_name\"] = df_hqfd[\"label\"].apply(lambda x: class_names[x])\n", + "\n", + " def extract_category(filename):\n", + " match = re.search(r\"(ADL|Fall)\", filename.split(\"/\")[-1])\n", + " return match.group(1) if match else None\n", + "\n", + " df_hqfd[\"video_category\"] = df_hqfd[\"filename\"].apply(extract_category)\n", + "\n", + " return df_hqfd\n", + "\n", + "\n", + "def plot_label_dist(df_hqfd):\n", + " display(\"---- Dataset Size ----\")\n", + " display(len(df_hqfd))\n", + "\n", + " display(\"---- Label Distribution ----\")\n", + " display(df_hqfd[\"label_name\"].value_counts().sort_index() / len(df_hqfd))\n", + "\n", + " display(\"---- Label Counts ----\")\n", + " display(df_hqfd[\"label_name\"].value_counts().sort_index())\n", + " df_hqfd[\"label_name\"].value_counts().sort_index().plot(kind=\"bar\")\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train DS" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'------- Full Dataset -------'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'---- Dataset Size ----'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "10876" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'---- Label Distribution ----'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "label_name\n", + "Fall 0.146837\n", + "Lying 0.194189\n", + "Other 0.658974\n", + "Name: count, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'---- Label Counts ----'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "label_name\n", + "Fall 1597\n", + "Lying 2112\n", + "Other 7167\n", + "Name: count, dtype: int64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'------- Only Fall videos -------'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'---- Dataset Size ----'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "5532" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'---- Label Distribution ----'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "label_name\n", + "Fall 0.288684\n", + "Lying 0.381779\n", + "Other 0.329537\n", + "Name: count, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'---- Label Counts ----'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "label_name\n", + "Fall 1597\n", + "Lying 2112\n", + "Other 1823\n", + "Name: count, dtype: int64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "analyse_dataset(train_hqfd)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Val DS" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'------- Full Dataset -------'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'---- Dataset Size ----'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "2985" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'---- Label Distribution ----'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "label_name\n", + "Fall 0.013400\n", + "Lying 0.157789\n", + "Other 0.828811\n", + "Name: count, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'---- Label Counts ----'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "label_name\n", + "Fall 40\n", + "Lying 471\n", + "Other 2474\n", + "Name: count, dtype: int64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'------- Only Fall videos -------'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'---- Dataset Size ----'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "852" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'---- Label Distribution ----'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "label_name\n", + "Fall 0.046948\n", + "Lying 0.552817\n", + "Other 0.400235\n", + "Name: count, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'---- Label Counts ----'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "label_name\n", + "Fall 40\n", + "Lying 471\n", + "Other 341\n", + "Name: count, dtype: int64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "analyse_dataset(val_hqfd)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualization of Samples" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "from mmaction.visualization import ActionVisualizer\n", + "\n", + "vis = ActionVisualizer()\n", + "\n", + "\n", + "def get_random_sample_from_class(ds: HighQualityFallDataset, label: int, n: int = 1):\n", + " np.random.seed(42)\n", + " dl = pd.DataFrame(ds.load_data_list())\n", + " idx = dl[dl[\"label\"] == label].index.to_list()\n", + " idx = np.random.choice(idx, size=n, replace=False)\n", + " return idx\n", + "\n", + "\n", + "def render_sample_videos(path: str = \"tmp\"):\n", + " from ipywidgets import Output, GridspecLayout\n", + " from IPython import display\n", + "\n", + " videos = sorted(Path(path).glob(\"*.gif\"))\n", + " grid = GridspecLayout((len(videos) // 4) + 1, 4)\n", + "\n", + " for i, video in enumerate(videos):\n", + " out = Output()\n", + " with out:\n", + " display.display(display.Image(video))\n", + " grid[i // 4, i % 4] = out\n", + "\n", + " display.display(grid)\n", + "\n", + "\n", + "def generate_sample_videos(\n", + " ds: HighQualityFallDataset,\n", + " idx: int | list[int],\n", + " out_path: str | None = None,\n", + " render: bool = True,\n", + "):\n", + " if isinstance(idx, int):\n", + " idx = [idx]\n", + " vis = ActionVisualizer()\n", + " for i in idx:\n", + " # video [B, C, T, H, W] -> [T, H, W, C]\n", + " video = ds[i][\"inputs\"].squeeze(0).permute(1, 2, 3, 0).numpy().astype(np.uint8)\n", + " # Convert to RGB\n", + " video = video[..., ::-1]\n", + " vis.add_datasample(\n", + " name=i,\n", + " video=video,\n", + " data_sample=ds[i][\"data_samples\"],\n", + " draw_gt=True,\n", + " show_frames=True,\n", + " out_type=\"gif\",\n", + " out_path=f\"tmp/{i}.gif\" if out_path is None else out_path + f\"/{i}.gif\",\n", + " )\n", + " if render:\n", + " render_sample_videos()\n", + " shutil.rmtree(\"tmp\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Validation Vis" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Class 0 Samples in Val" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ece263d38db14c2f996cc4766a5201d8", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "GridspecLayout(children=(Output(layout=Layout(grid_area='widget001')), Output(layout=Layout(grid_area='widget0…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "idx = get_random_sample_from_class(val_hqfd, 0, 20)\n", + "generate_sample_videos(val_hqfd, idx)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Class 1 Val Samples" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9f65ac016a0447d1b751e327a0ae8305", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "GridspecLayout(children=(Output(layout=Layout(grid_area='widget001')), Output(layout=Layout(grid_area='widget0…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "idx = get_random_sample_from_class(val_hqfd, 1, 20)\n", + "generate_sample_videos(val_hqfd, idx)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Class 2 Val" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "cce622ef085647279b2cd1419a3168d8", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "GridspecLayout(children=(Output(layout=Layout(grid_area='widget001')), Output(layout=Layout(grid_area='widget0…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "idx = get_random_sample_from_class(val_hqfd, 2, 20)\n", + "generate_sample_videos(val_hqfd, idx)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train Vis" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1a841bc7b69f4fb9baa6fda4dec06068", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "GridspecLayout(children=(Output(layout=Layout(grid_area='widget001')), Output(layout=Layout(grid_area='widget0…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "idx = get_random_sample_from_class(train_hqfd, 0, 20)\n", + "generate_sample_videos(train_hqfd, idx)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c0d9d8651a6e4a8295d90dd4599e9813", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "GridspecLayout(children=(Output(layout=Layout(grid_area='widget001')), Output(layout=Layout(grid_area='widget0…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "idx = get_random_sample_from_class(train_hqfd, 1, 20)\n", + "generate_sample_videos(train_hqfd, idx)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7bdcf6b03db04a5bbe6aada25554ae4f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "GridspecLayout(children=(Output(layout=Layout(grid_area='widget001')), Output(layout=Layout(grid_area='widget0…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "idx = get_random_sample_from_class(train_hqfd, 2, 20)\n", + "generate_sample_videos(train_hqfd, idx)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Class weight calculation\n", + "\n", + "weight_for_class_i = total_samples / (num_samples_in_class_i * num_classes)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'df_hqfd' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/Users/tillgrutschus/Library/CloudStorage/OneDrive-Personal/Documents/Arbeit und Beruf/Uppsala/Project in Data Science/human-fall-detection/notebooks/dataset_label_analysis.ipynb Cell 24\u001b[0m line \u001b[0;36m4\n\u001b[1;32m 2\u001b[0m num_classes \u001b[39m=\u001b[39m \u001b[39m3\u001b[39m\n\u001b[1;32m 3\u001b[0m \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(num_classes):\n\u001b[0;32m----> 4\u001b[0m total_samples \u001b[39m=\u001b[39m \u001b[39mlen\u001b[39m(df_hqfd)\n\u001b[1;32m 5\u001b[0m num_samples_in_class_i \u001b[39m=\u001b[39m \u001b[39mlen\u001b[39m(df_hqfd[df_hqfd[\u001b[39m\"\u001b[39m\u001b[39mlabel\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m==\u001b[39m i])\n\u001b[1;32m 6\u001b[0m weights\u001b[39m.\u001b[39mappend(total_samples \u001b[39m/\u001b[39m num_samples_in_class_i \u001b[39m*\u001b[39m num_classes)\n", + "\u001b[0;31mNameError\u001b[0m: name 'df_hqfd' is not defined" + ] + } + ], + "source": [ + "weights = []\n", + "num_classes = 3\n", + "for i in range(num_classes):\n", + " total_samples = len(df_hqfd)\n", + " num_samples_in_class_i = len(df_hqfd[df_hqfd[\"label\"] == i])\n", + " weights.append(total_samples / num_samples_in_class_i * num_classes)\n", + "\n", + "display(weights)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "human-fall-detection", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/visualization/visualization_hook.py b/visualization/visualization_hook.py index e5be66e..257e2e6 100644 --- a/visualization/visualization_hook.py +++ b/visualization/visualization_hook.py @@ -53,7 +53,7 @@ def _draw_samples( elif "frame_dir" in data_sample: sample_name = osp.basename(data_sample.get("frame_dir")) else: - sample_name = str(sample_id) + sample_name = f"visualization/{str(sample_id)}" draw_args = self.draw_args if self.out_dir is not None: diff --git a/weights.dvc b/weights.dvc index 7bd26e2..1e03786 100644 --- a/weights.dvc +++ b/weights.dvc @@ -1,6 +1,6 @@ outs: -- md5: 48821704004601bbd361b199c730da94.dir - size: 394932752 - nfiles: 2 +- md5: 0ca76cf4d3f1320299c140bb8713267c.dir + size: 568033822 + nfiles: 3 hash: md5 path: weights