CONFIG ├── train │ └── seed: 1112 │ interval: step │ monitor: val/accuracy │ mode: max │ ema: 0.0 │ test: false │ debug: false │ ignore_warnings: false │ state: │ mode: null │ n_context: 0 │ n_context_eval: 0 │ ckpt: null │ disable_dataset: false │ validate_at_start: false │ pretrained_model_path: null │ pretrained_model_strict_load: true │ pretrained_model_state_hook: │ _name_: null │ post_init_hook: │ _name_: null │ layer_decay: │ _name_: null │ decay: 0.7 │ optimizer_param_grouping: │ bias_weight_decay: false │ normalization_weight_decay: false │ remove_test_loader_in_eval: true │ global_batch_size: 1024 │ ├── tolerance │ └── logdir: ./resume │ id: null │ ├── wandb │ └── None ├── trainer │ └── _target_: pytorch_lightning.Trainer │ devices: 4 │ accelerator: gpu │ accumulate_grad_batches: 2 │ max_epochs: 305 │ gradient_clip_val: 0.0 │ log_every_n_steps: 10 │ limit_train_batches: 1.0 │ limit_val_batches: 1.0 │ precision: 16 │ replace_sampler_ddp: false │ ├── loader │ └── batch_size: 128 │ num_workers: 12 │ pin_memory: true │ drop_last: true │ batch_size_eval: 128 │ batch_size_test: 128 │ persistent_workers: true │ ├── dataset │ └── _name_: imagenet │ data_dirname: imagenet │ cache_dir: None │ image_size: 224 │ val_split: 0.0 │ shuffle: true │ num_aug_repeats: 3 │ num_gpus: 4 │ loader_fft: false │ train_transforms: │ _target_: timm.data.create_transform │ input_size: 224 │ is_training: true │ auto_augment: rand-m9-mstd0.5-inc1 │ interpolation: random │ re_prob: 0.25 │ re_mode: pixel │ val_transforms: │ _target_: timm.data.create_transform │ input_size: 224 │ interpolation: bicubic │ crop_pct: 0.9 │ test_transforms: │ _target_: timm.data.create_transform │ input_size: 224 │ interpolation: bicubic │ crop_pct: 0.9 │ mixup: │ _target_: src.dataloaders.utils.timm_mixup.TimmMixup │ mixup_alpha: 0.8 │ cutmix_alpha: 1.0 │ ├── task │ └── _name_: base │ loss: │ _name_: soft_cross_entropy │ label_smoothing: 0.1 │ metrics: │ - accuracy │ - accuracy@5 │ - accuracy@10 │ torchmetrics: null │ loss_val: │ _name_: cross_entropy │ ├── optimizer │ └── _name_: adamw │ lr: 0.0002 │ weight_decay: 0.01 │ betas: │ - 0.9 │ - 0.999 │ ├── scheduler │ └── _name_: cosine_warmup_timm │ t_in_epochs: false │ t_initial: 300 │ lr_min: 1.0e-05 │ warmup_lr_init: 1.0e-06 │ warmup_t: 10 │ ├── callbacks │ └── learning_rate_monitor: │ logging_interval: step │ timer: │ step: true │ inter_step: false │ epoch: true │ val: true │ params: │ total: true │ trainable: true │ fixed: true │ model_checkpoint: │ monitor: val/accuracy │ mode: max │ save_top_k: 1 │ save_last: true │ dirpath: checkpoints/ │ filename: val/accuracy │ auto_insert_metric_name: false │ verbose: true │ ├── model │ └── layer: │ _name_: hyena │ l_max: 1024 │ order: 2 │ filter_order: 64 │ num_heads: 1 │ inner_factor: 1 │ num_blocks: 1 │ fused_bias_fc: false │ outer_mixing: false │ dropout: 0.0 │ filter_dropout: 0.0 │ filter_cls: hyena-filter │ post_order_ffn: false │ jit_filter: false │ short_filter_order: 5 │ activation: id │ return_state: true │ filter_args: │ emb_dim: 33 │ order: 128 │ fused_fft_conv: false │ lr: 0.0002 │ lr_pos_emb: 1.0e-05 │ dropout: 0.0 │ w: 1 │ wd: 0 │ bias: true │ normalized: false │ num_inner_mlps: 1 │ _name_: vit_b_16 │ patch_size: 16 │ d_model: 768 │ dropout: 0.0 │ drop_path_rate: 0.1 │ depth: 12 │ expand: 4 │ norm: layer │ use_pos_embed: false │ use_cls_token: false │ layer_reps: 1 │ img_size: 224 │ num_classes: 1000 │ track_norms: false │ ├── encoder │ └── None └── decoder └── None