CONFIG
├── train
│   └── seed: 1112                                                                                                                                                                                                 
│       interval: step                                                                                                                                                                                             
│       monitor: val/accuracy                                                                                                                                                                                      
│       mode: max                                                                                                                                                                                                  
│       ema: 0.0                                                                                                                                                                                                   
│       test: false                                                                                                                                                                                                
│       debug: false                                                                                                                                                                                               
│       ignore_warnings: false                                                                                                                                                                                     
│       state:                                                                                                                                                                                                     
│         mode: null                                                                                                                                                                                               
│         n_context: 0                                                                                                                                                                                             
│         n_context_eval: 0                                                                                                                                                                                        
│       ckpt: null                                                                                                                                                                                                 
│       disable_dataset: false                                                                                                                                                                                     
│       validate_at_start: false                                                                                                                                                                                   
│       pretrained_model_path: null                                                                                                                                                                                
│       pretrained_model_strict_load: true                                                                                                                                                                         
│       pretrained_model_state_hook:                                                                                                                                                                               
│         _name_: null                                                                                                                                                                                             
│       post_init_hook:                                                                                                                                                                                            
│         _name_: null                                                                                                                                                                                             
│       layer_decay:                                                                                                                                                                                               
│         _name_: null                                                                                                                                                                                             
│         decay: 0.7                                                                                                                                                                                               
│       optimizer_param_grouping:                                                                                                                                                                                  
│         bias_weight_decay: false                                                                                                                                                                                 
│         normalization_weight_decay: false                                                                                                                                                                        
│       remove_test_loader_in_eval: true                                                                                                                                                                           
│       global_batch_size: 1024                                                                                                                                                                                    
│                                                                                                                                                                                                                  
├── tolerance
│   └── logdir: ./resume                                                                                                                                                                                           
│       id: null                                                                                                                                                                                                   
│                                                                                                                                                                                                                  
├── wandb
│   └── None                                                                                                                                                                                                       
├── trainer
│   └── _target_: pytorch_lightning.Trainer                                                                                                                                                                        
│       devices: 4                                                                                                                                                                                                 
│       accelerator: gpu                                                                                                                                                                                           
│       accumulate_grad_batches: 2                                                                                                                                                                                 
│       max_epochs: 305                                                                                                                                                                                            
│       gradient_clip_val: 0.0                                                                                                                                                                                     
│       log_every_n_steps: 10                                                                                                                                                                                      
│       limit_train_batches: 1.0                                                                                                                                                                                   
│       limit_val_batches: 1.0                                                                                                                                                                                     
│       precision: 16                                                                                                                                                                                              
│       replace_sampler_ddp: false                                                                                                                                                                                 
│                                                                                                                                                                                                                  
├── loader
│   └── batch_size: 128                                                                                                                                                                                            
│       num_workers: 12                                                                                                                                                                                            
│       pin_memory: true                                                                                                                                                                                           
│       drop_last: true                                                                                                                                                                                            
│       batch_size_eval: 128                                                                                                                                                                                       
│       batch_size_test: 128                                                                                                                                                                                       
│       persistent_workers: true                                                                                                                                                                                   
│                                                                                                                                                                                                                  
├── dataset
│   └── _name_: imagenet                                                                                                                                                                                           
│       data_dirname: imagenet                                                                                                                                                                                     
│       cache_dir: None                                                                                                                                                                                            
│       image_size: 224                                                                                                                                                                                            
│       val_split: 0.0                                                                                                                                                                                             
│       shuffle: true                                                                                                                                                                                              
│       num_aug_repeats: 3                                                                                                                                                                                         
│       num_gpus: 4                                                                                                                                                                                                
│       loader_fft: false                                                                                                                                                                                          
│       train_transforms:                                                                                                                                                                                          
│         _target_: timm.data.create_transform                                                                                                                                                                     
│         input_size: 224                                                                                                                                                                                          
│         is_training: true                                                                                                                                                                                        
│         auto_augment: rand-m9-mstd0.5-inc1                                                                                                                                                                       
│         interpolation: random                                                                                                                                                                                    
│         re_prob: 0.25                                                                                                                                                                                            
│         re_mode: pixel                                                                                                                                                                                           
│       val_transforms:                                                                                                                                                                                            
│         _target_: timm.data.create_transform                                                                                                                                                                     
│         input_size: 224                                                                                                                                                                                          
│         interpolation: bicubic                                                                                                                                                                                   
│         crop_pct: 0.9                                                                                                                                                                                            
│       test_transforms:                                                                                                                                                                                           
│         _target_: timm.data.create_transform                                                                                                                                                                     
│         input_size: 224                                                                                                                                                                                          
│         interpolation: bicubic                                                                                                                                                                                   
│         crop_pct: 0.9                                                                                                                                                                                            
│       mixup:                                                                                                                                                                                                     
│         _target_: src.dataloaders.utils.timm_mixup.TimmMixup                                                                                                                                                     
│         mixup_alpha: 0.8                                                                                                                                                                                         
│         cutmix_alpha: 1.0                                                                                                                                                                                        
│                                                                                                                                                                                                                  
├── task
│   └── _name_: base                                                                                                                                                                                               
│       loss:                                                                                                                                                                                                      
│         _name_: soft_cross_entropy                                                                                                                                                                               
│         label_smoothing: 0.1                                                                                                                                                                                     
│       metrics:                                                                                                                                                                                                   
│       - accuracy                                                                                                                                                                                                 
│       - accuracy@5                                                                                                                                                                                               
│       - accuracy@10                                                                                                                                                                                              
│       torchmetrics: null                                                                                                                                                                                         
│       loss_val:                                                                                                                                                                                                  
│         _name_: cross_entropy                                                                                                                                                                                    
│                                                                                                                                                                                                                  
├── optimizer
│   └── _name_: adamw                                                                                                                                                                                              
│       lr: 0.0002                                                                                                                                                                                                 
│       weight_decay: 0.01                                                                                                                                                                                         
│       betas:                                                                                                                                                                                                     
│       - 0.9                                                                                                                                                                                                      
│       - 0.999                                                                                                                                                                                                    
│                                                                                                                                                                                                                  
├── scheduler
│   └── _name_: cosine_warmup_timm                                                                                                                                                                                 
│       t_in_epochs: false                                                                                                                                                                                         
│       t_initial: 300                                                                                                                                                                                             
│       lr_min: 1.0e-05                                                                                                                                                                                            
│       warmup_lr_init: 1.0e-06                                                                                                                                                                                    
│       warmup_t: 10                                                                                                                                                                                               
│                                                                                                                                                                                                                  
├── callbacks
│   └── learning_rate_monitor:                                                                                                                                                                                     
│         logging_interval: step                                                                                                                                                                                   
│       timer:                                                                                                                                                                                                     
│         step: true                                                                                                                                                                                               
│         inter_step: false                                                                                                                                                                                        
│         epoch: true                                                                                                                                                                                              
│         val: true                                                                                                                                                                                                
│       params:                                                                                                                                                                                                    
│         total: true                                                                                                                                                                                              
│         trainable: true                                                                                                                                                                                          
│         fixed: true                                                                                                                                                                                              
│       model_checkpoint:                                                                                                                                                                                          
│         monitor: val/accuracy                                                                                                                                                                                    
│         mode: max                                                                                                                                                                                                
│         save_top_k: 1                                                                                                                                                                                            
│         save_last: true                                                                                                                                                                                          
│         dirpath: checkpoints/                                                                                                                                                                                    
│         filename: val/accuracy                                                                                                                                                                                   
│         auto_insert_metric_name: false                                                                                                                                                                           
│         verbose: true                                                                                                                                                                                            
│                                                                                                                                                                                                                  
├── model
│   └── layer:                                                                                                                                                                                                     
│         _name_: hyena                                                                                                                                                                                            
│         l_max: 1024                                                                                                                                                                                              
│         order: 2                                                                                                                                                                                                 
│         filter_order: 64                                                                                                                                                                                         
│         num_heads: 1                                                                                                                                                                                             
│         inner_factor: 1                                                                                                                                                                                          
│         num_blocks: 1                                                                                                                                                                                            
│         fused_bias_fc: false                                                                                                                                                                                     
│         outer_mixing: false                                                                                                                                                                                      
│         dropout: 0.0                                                                                                                                                                                             
│         filter_dropout: 0.0                                                                                                                                                                                      
│         filter_cls: hyena-filter                                                                                                                                                                                 
│         post_order_ffn: false                                                                                                                                                                                    
│         jit_filter: false                                                                                                                                                                                        
│         short_filter_order: 5                                                                                                                                                                                    
│         activation: id                                                                                                                                                                                           
│         return_state: true                                                                                                                                                                                       
│         filter_args:                                                                                                                                                                                             
│           emb_dim: 33                                                                                                                                                                                            
│           order: 128                                                                                                                                                                                             
│           fused_fft_conv: false                                                                                                                                                                                  
│           lr: 0.0002                                                                                                                                                                                             
│           lr_pos_emb: 1.0e-05                                                                                                                                                                                    
│           dropout: 0.0                                                                                                                                                                                           
│           w: 1                                                                                                                                                                                                   
│           wd: 0                                                                                                                                                                                                  
│           bias: true                                                                                                                                                                                             
│           normalized: false                                                                                                                                                                                      
│           num_inner_mlps: 1                                                                                                                                                                                      
│       _name_: vit_b_16                                                                                                                                                                                           
│       patch_size: 16                                                                                                                                                                                             
│       d_model: 768                                                                                                                                                                                               
│       dropout: 0.0                                                                                                                                                                                               
│       drop_path_rate: 0.1                                                                                                                                                                                        
│       depth: 12                                                                                                                                                                                                  
│       expand: 4                                                                                                                                                                                                  
│       norm: layer                                                                                                                                                                                                
│       use_pos_embed: false                                                                                                                                                                                       
│       use_cls_token: false                                                                                                                                                                                       
│       layer_reps: 1                                                                                                                                                                                              
│       img_size: 224                                                                                                                                                                                              
│       num_classes: 1000                                                                                                                                                                                          
│       track_norms: false                                                                                                                                                                                         
│                                                                                                                                                                                                                  
├── encoder
│   └── None                                                                                                                                                                                                       
└── decoder
    └── None