### # Casanovo configuration. # Blank entries are interpreted as "None". # Parameters that can be modified when running inference with Casanovo, # i.e. denovo and eval modes in the command line interface, are marked with # "(I)". Other parameters shouldn't be changed unless a new Casanovo model # is being trained. ### # Random seed to ensure reproducible results. random_seed: 454 # Spectrum processing options. # Number of the most intense peaks to retain, any remaining peaks are discarded n_peaks: 150 # Min peak m/z allowed, peaks with smaller m/z are discarded min_mz: 50.0 # Max peak m/z allowed, peaks with larger m/z are discarded max_mz: 2500.0 # Min peak intensity allowed, less intense peaks are discarded min_intensity: 0.01 # Max absolute m/z difference allowed when removing the precursor peak remove_precursor_tol: 2.0 # Da # Max precursor charge allowed, spectra with larger charge are skipped max_charge: 10 # Max absolute difference allowed with respect to observed precursor m/z (I) # Predictions outside the tolerance range are assinged a negative peptide score precursor_mass_tol: 50 # ppm # Isotopes to consider when comparing predicted and observed precursor m/z's (I) isotope_error_range: [0, 1] # The minimum length of predicted peptides (I). min_peptide_len: 6 # Model architecture options. # Dimensionality of latent representations, i.e. peak embeddings dim_model: 512 # Number of attention heads n_head: 8 # Dimensionality of fully connected layers dim_feedforward: 1024 # Number of transformer layers in spectrum encoder and peptide decoder n_layers: 9 # Dropout rate for model weights dropout: 0.0 # Number of dimensions to use for encoding peak intensity # Projected up to ``dim_model`` by default and summed with the peak m/z encoding dim_intensity: # Option to provide a pre-trained spectrum encoder when training # Trained from scratch by default custom_encoder: # Max decoded peptide length max_length: 100 # Amino acid and modification vocabulary to use residues: "G": 57.021464 "A": 71.037114 "S": 87.032028 "P": 97.052764 "V": 99.068414 "T": 101.047670 "C+57.021": 160.030649 # 103.009185 + 57.021464 "L": 113.084064 "I": 113.084064 "N": 114.042927 "D": 115.026943 "Q": 128.058578 "K": 128.094963 "E": 129.042593 "M": 131.040485 "H": 137.058912 "F": 147.068414 "R": 156.101111 "Y": 163.063329 "W": 186.079313 # Amino acid modifications. "M+15.995": 147.035400 # Met oxidation: 131.040485 + 15.994915 "N+0.984": 115.026943 # Asn deamidation: 114.042927 + 0.984016 "Q+0.984": 129.042594 # Gln deamidation: 128.058578 + 0.984016 # N-terminal modifications. "+42.011": 42.010565 # Acetylation "+43.006": 43.005814 # Carbamylation "-17.027": -17.026549 # NH3 loss "+43.006-17.027": 25.980265 # Carbamylation and NH3 loss # Logging frequency in training steps n_log: 1 # Tensorboard object to keep track of training metrics tb_summarywriter: "tb_logs" # Number of warmup iterations for learning rate scheduler warmup_iters: 100_000 # Max number of iterations for learning rate scheduler max_iters: 600_000 # Learning rate for weight updates during training learning_rate: 5e-4 # Regularization term for weight updates weight_decay: 1e-5 # Training/inference options. # Number of spectra in one training batch train_batch_size: 32 # Number of spectra in one inference batch (I) predict_batch_size: 1024 # Number of beams used in beam search (I) n_beams: 1 # Number of PSMs for each spectrum (I) top_match: 1 # Object for logging training progress logger: # Max number of training epochs max_epochs: 30 # Number of validation steps to run before training begins num_sanity_val_steps: 0 # Set to "False" to further train a pre-trained Casanovo model train_from_scratch: True # Save model checkpoints during training save_model: True # Path to saved checkpoints model_save_folder_path: "" # Set to "False" to save the PyTorch model instance save_weights_only: True # Model validation and checkpointing frequency in training steps every_n_train_steps: 30 # Disable usage of a GPU (including Apple MPS): no_gpu: False