samples/config_all_options.yaml

# affix version
version:
  {
    minimum: 0.1.3-dev,
    maximum: 0.1.3-dev # this should NOT be made a variable, but should be tested after every tag is created
  }
## Choose the model parameters here
model:
  {
    dimension: 3, # the dimension of the model and dataset: defines dimensionality of computations
    base_filters: 30, # Set base filters: number of filters present in the initial module of the U-Net convolution; for IncU-Net, keep this divisible by 4
    architecture: resunet, # options: unet, resunet, deep_resunet, deep_unet, light_resunet, light_unet, fcn, uinc, vgg, densenet
    norm_type: batch, # options: batch, instance, or none (only for VGG); used for all networks
    final_layer: softmax, # can be either sigmoid, softmax or none (none == regression/logits)
    # sigmoid_input_multiplier: 1.0, # this is used during sigmoid, and defaults to 1.0
    class_list: [0,1,2,4], # Set the list of labels the model should train on and predict
    # class_list: '[*range(0,100,1)]' # a range of values from 0 to 99 with a step of 1 will be created; customize as needed, but ensure this is defined as a string as it will be passed through 'eval' function
    # class_list: '[0,1||2||3,1||4,4]', # combinatorial training - this will construct one-hot encoded mask using logical operands between specified annotations. Note that double '|' or '&' should be passed and not single to avoid python parsing
    ignore_label_validation: 0, # this is the location of the class_list whose performance is ignored during validation metric calculation
    amp: False, # Set if you want to use Automatic Mixed Precision for your operations or not - options: True, False
    # num_channels: 3, # set the input channels - useful when reading RGB or images that have vectored pixel types from the CSV
    # save_at_every_epoch: True, # allows you to save the model at every epoch
    # print_summary: True, # prints the summary of the model before training; defaults to True
    
    ## densenet models have the following optional parameters:
    # growth_rate (int) - how many filters to add each layer (k in paper)
    # num_init_features (int) - the number of filters to learn in the first convolution layer
    # bn_size (int) - multiplicative factor for number of bottle neck layers (i.e. bn_size * k features in the bottleneck layer)
    # drop_rate (float) - dropout rate after each dense layer
    # num_classes (int) - number of classification classes

    ## unet_multilayer, unetr, transunet have the following optional parameter:
    # depth (int) - the number of encoder/decoder layers

    ## imagenet_unet has the following optional parameter:
    # pretrained (bool) - if True (default), uses the pretrained imagenet weights
    # final_layer - one of ["sigmoid", "softmax", "logsoftmax", "tanh", "identity"]
    # encoder_name (str) - the name of the encoder to use, pick from https://github.com/qubvel/segmentation_models.pytorch#encoders
    # decoder_use_batchnorm (str) - whether to use batch norm or not or inplace, this will override 'norm_type', see https://github.com/qubvel/segmentation_models.pytorch/blob/master/segmentation_models_pytorch/decoders/unet/model.py
    # decoder_attention_type (str) - the decoder attention type, see https://github.com/qubvel/segmentation_models.pytorch/blob/master/segmentation_models_pytorch/decoders/unet/model.py
    # encoder_depth (int) - the depth of the encoder, also picked up from 'depth'
    # decoder_channels (list) - a list of numbers of channels for each decoder layer, should be same length as 'encoder_depth'
    # converter_type (str) - either acs (targets ACSConv) or conv3d (targets nn.Conv3d) or soft (targets SoftACSConv with learnable weights, default); see https://doi.org/10.1109/JBHI.2021.3049452
    # the following parameters can be used to convert the "imagenet_unet" model to a classifier/regression network; they only come into the picture when the "problem_type" is identified as not segmentation. 
    # - pooling (str): One of "max", "avg"; default is "avg"
    # - dropout (float): Dropout factor in [0, 1); default is 0.2
  }
## metrics to evaluate the validation performance
metrics:
  - dice # segmentation
  # - hausdorff # hausdorff 100 percentile, segmentation
  # - hausdorff95 # hausdorff 95 percentile, segmentation
  # - mse # regression/classification
  # - accuracy # classification ## more details https://lightning.ai/docs/torchmetrics/v1.1.2/classification/accuracy.html
  # - classification_accuracy # classification
  # - balanced_accuracy # classification ## more details https://scikit-learn.org/stable/modules/generated/sklearn.metrics.balanced_accuracy_score.html
  # - per_label_accuracy # used for classification
  # - f1 # classification/segmentation ## more details https://lightning.ai/docs/torchmetrics/v1.1.2/classification/f1_score.html
  # - precision # classification/segmentation ## more details https://lightning.ai/docs/torchmetrics/v1.1.2/classification/precision.html
  # - recall # classification/segmentation ## more details https://lightning.ai/docs/torchmetrics/v1.1.2/classification/recall.html
  # - iou # classification/segmentation ## more details https://lightning.ai/docs/torchmetrics/v1.1.2/classification/jaccard_index.html
## this customizes the inference, primarily used for segmentation outputs
inference_mechanism: {
  grid_aggregator_overlap: crop, # this option provides the option to strategize the grid aggregation output; should be either 'crop' or 'average' - https://torchio.readthedocs.io/patches/patch_inference.html#grid-aggregator
  patch_overlap: 0, # amount of overlap of patches during inference, defaults to 0; see https://torchio.readthedocs.io/patches/patch_inference.html#gridsampler
}
# this is to enable or disable lazy loading - setting to true reads all data once during data loading, resulting in improvements
# in I/O at the expense of memory consumption
in_memory: False
# if enabled, resize/resample operations in `data_preprocessing` will save files to disk instead of directly getting read into memory as tensors
memory_save_mode: False
# this will save the generated masks for validation and testing data for qualitative analysis
save_output: False
# this will save the patches used during training for qualitative analysis
save_training: False
# Set the Modality : rad for radiology, path for histopathology
modality: rad
## Patch size during training - 2D patch for breast images since third dimension is not patched 
patch_size: [144,144,64]
# uniform: UniformSampler or label: LabelSampler
patch_sampler: uniform
# patch_sampler: label
# patch_sampler:
#   {
#     type: label,
#     enable_padding: True,
#     padding_mode: symmetric,  # for options, see 'mode' in https://numpy.org/doc/stable/reference/generated/numpy.pad.html
#     biased_sampling: True,  # adds additional sampling probability of labels based on "sampling_weights" key; only gets invoked when using label sampler. If not present, gets calculated using the same mechanism as weighted_loss
#   }
# If enabled, this parameter pads images and labels when label sampler is used
enable_padding: False
# Number of epochs
num_epochs: 100
# Set the patience - measured in number of epochs after which, if the performance metric does not improve, exit the training loop - defaults to the number of epochs
patience: 50
# Set the batch size
batch_size: 1
# gradient clip : norm, value, agc
clip_mode: norm
# clip_gradient value
clip_grad: 0.1
## Set the initial learning rate
learning_rate: 0.001
# Learning rate scheduler - options:"triangle", "triangle_modified", "exp", "step", "reduce-on-plateau", "cosineannealing", "triangular", "triangular2", "exp_range"
# triangle/triangle_modified use LambdaLR but triangular/triangular2/exp_range uses CyclicLR
scheduler:
  {
    type: triangle,
    min_lr: 0.00001,
    max_lr: 1,
  }
# Set which loss function you want to use - options : 'dc' - for dice only, 'dcce' - for sum of dice and CE and you can guess the next (only lower-case please)
# options: dc (dice only), dc_log (-log of dice), ce (), dcce (sum of dice and ce), focal/dc_focal, mcc/mcc_log, mse () ...
# mse is the MSE defined by torch and can define a variable 'reduction'; see https://pytorch.org/docs/stable/generated/torch.nn.MSELoss.html#torch.nn.MSELoss
# focal is the focal loss and can define 2 variables: gamma and size_average
# use mse_torch for regression/classification problems and dice for segmentation
loss_function: dc
# this parameter weights the loss to handle imbalanced losses better
weighted_loss: True  # generates new keys "class_weights" and "penalty_weights" that handle the aggregate weights of the class and penalties per label, respectively
#loss_function:
#  {
#    'mse':{
#      'reduction': 'mean' # see https://pytorch.org/docs/stable/generated/torch.nn.MSELoss.html#torch.nn.MSELoss for all options
#    }
#  }
#loss_function:
#  {
#    'focal':{
#      'gamma': 1.0
#    }
#  }
# Which optimizer do you want to use - sgd, asgd, adam, adamw, adamax, sparseadam, rprop, adadelta, adagrad, rmsprop,
# each has their own options and functionalities, which are initialized with defaults, see GANDLF.optimizers.wrap_torch for details
optimizer: adam
## this parameter controls the nested training process
# performs randomized k-fold cross-validation, see https://en.wikipedia.org/wiki/Cross-validation_(statistics) for details
# split is performed using sklearn's KFold method: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html
# for train on a single fold, use '-' before the fold number to make the number of folds "negative" -- NOT recommended
nested_training:
  {
    stratified: False, # this will perform stratified k-fold cross-validation but only with offline data splitting, see https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedKFold.html
    testing: 5, # this controls the number of testing data folds for final model evaluation; [NOT recommended] to disable this, use '1'
    validation: 5 # this controls the number of validation data folds to be used for model *selection* during training (not used for back-propagation)
  }
## pre-processing
# this constructs an order of transformations, which is applied to all images in the data loader
# order: all_methods_as_specified_in_dict --> normalize [normalization methods always applied at the end]
# 'to_canonical': change the image to canonical orientation, see https://torchio.readthedocs.io/transforms/preprocessing.html#torchio.transforms.ToCanonical
# 'rgba2rgb': convert images from rgba to rgb
# 'threshold': performs intensity thresholding; i.e., if x[i] < min: x[i] = 0; and if x[i] > max: x[i] = 0
# 'clip': performs intensity clipping; i.e., if x[i] < min: x[i] = min; and if x[i] > max: x[i] = max
# 'threshold'/'clip': if either min/max is not defined, it is taken as the minimum/maximum of the image, respectively
# 'normalize': performs z-score normalization: https://torchio.readthedocs.io/transforms/preprocessing.html#torchio.transforms.ZNormalization
# 'normalize_positive':perform z-score normalize but with mean and std-dev calculated on only pixels > 0 
# 'normalize_nonZero': perform z-score normalize but with mean and std-dev calculated on only non-zero pixels
# 'normalize_nonZero_masked': perform z-score normalize but with mean and std-dev calculated on only non-zero pixels with the stats applied on non-zero pixels
# 'crop_external_zero_planes': crops all non-zero planes from input tensor to reduce image search space
# 'resample: resolution: X,Y,Z': resample the voxel resolution: https://torchio.readthedocs.io/transforms/preprocessing.html#torchio.transforms.Resample
# 'resample: resolution: X': resample the voxel resolution in an isotropic manner: https://torchio.readthedocs.io/transforms/preprocessing.html#torchio.transforms.Resample
# resize the image(s) and mask (this should be greater than or equal to patch_size); resize is done ONLY when resample is not defined -- WARNING: resizing images on the fly ensures that images get loaded in memory, which dramatically increases RAM usage
# 'resize_image' resizes the image and mask BEFORE applying any another operation
# 'resize_patch' resizes the image and mask AFTER extracting the patch
data_preprocessing:
  {
    # 'histogram_matching':{
    #   'target': '/path/to/target/image.nii.gz', # this is the target image to which the histogram of the current image is matched, if this not defined, histogram equalization is performed on the entire image with an equal ramp of [-1,1]
    #   'num_hist_level': 1024, # number of histogram levels
    #   'num_match_points': 16, # number of matching points for histogram matching
    # },
    # 'histogram_equalization':{ # this performs global histogram equalization using the same logic as 'histogram_matching', just without the target
    #   'num_hist_level': 1024, # number of histogram levels
    #   'num_match_points': 16, # number of matching points for histogram matching
    # },
    # 'adaptive_histogram_equalization', # this performs Power Law Adaptive Histogram Equalization using https://simpleitk.org/doxygen/latest/html/classitk_1_1simple_1_1AdaptiveHistogramEqualizationImageFilter.html
    'threshold':{
      'min': 10, 
      'max': 75
    },
    # 'clip':{
    #   'min': 10, 
    #   'max': 75
    # }, 
    'normalize',
    # 'normalize_positive', # this performs z-score normalization only on pixels > 0 
    # 'normalize_nonZero', # this performs z-score normalization only on non-zero pixels
    # 'normalize_nonZero_masked', # this performs z-score normalization only on masked region
    'resample':{
      'resolution': [1,2,3]
    },
    'resample_min':{
      'resolution': 1, # this will be the maximum spacing (translates to minium resolution) across all axes
    },
    #'resize_image': [128,128], # this is generally not recommended, as it changes image properties in unexpected ways
    #'resize_patch': [128,128], # this is generally not recommended, as it changes image properties in unexpected ways
    'crop_external_zero_planes', # this will crop all zero-valued planes across all axes
    'crop': [64,64,64], # this will crop the image by removing specified number of pixels; see https://torchio.readthedocs.io/transforms/preprocessing.html#torchio.transforms.Crop
    'centercrop': [64,64,64], # this will crop the image to the specified size from the center of image; see https://torchio.readthedocs.io/transforms/preprocessing.html#croporpad
    ## histogram matching algorithms
    # 'histogram_matching':{
    #   'target': '/path/to/template.nii.gz', # if this is absent, global histogram equalization takes place
    #   # 'target': 'adaptive', # this will perform adaptive histogram matching using https://simpleitk.org/doxygen/latest/html/classitk_1_1simple_1_1AdaptiveHistogramEqualizationImageFilter.html
    # },
    ## stain normalization algorithms
    # 'stain_normalization':{
    #   'target': '/path/to/target.png', # this is required
    #   'extractor': 'vahadane', # can be either vahadane, ruifrok or macenko; defaults to ruifrok
    # }
    ## rescale image
    # 'rescale':{
    #   'in_min_max': [15,125], # desired output intensity range, defaults to min/max of image
    #   'out_min_max': [0,1], # desired output intensity range, defaults to [0,1]
    #   'percentiles': [5,95], # percentile values of the input image that will be mapped to the output range, defaults to [0,100]
    # }
  }
## various data augmentation techniques
# options: affine, elastic, downsample, motion, kspace, bias, blur, gaussianNoise, swap
# keep/edit as needed
# all transforms: https://torchio.readthedocs.io/transforms/transforms.html
# 'kspace': one of ghosting or spiking is picked (randomly) for augmentation
# 'probability' sub-parameter adds the probability of the particular augmentation getting added during training (this is always 1 for normalize and resampling)
data_augmentation: 
  {
    default_probability: 1.0, # keeping probability 1.0 to ensure that all augmentations are applied
    'affine':{ # for options, see https://torchio.readthedocs.io/transforms/augmentation.html#randomaffine
      'scales': [0.5, 1.5],
      'degrees': 25,
      'translation': 2,
    },
    'elastic': # for options, see https://torchio.readthedocs.io/transforms/augmentation.html#randomelasticdeformation
    {
      'num_control_points': 7,
      'max_displacement': 0.1,
      'locked_borders': 2,
    },
    'kspace':{
      'probability': 1
    },
    'motion':{
      'probability': 1
    },
    'bias',
    blur, # this is a gaussian blur, and can take 'std' as a sub-parameter, however, the default 'std' is [0, 0.015 * std(image)]
    ## example of blur with specific std range
    # 'blur': {
    #   'std': [0, 1] # example std-dev range, for details, see https://torchio.readthedocs.io/transforms/augmentation.html#torchio.transforms.RandomBlur
    # },
    'noise': { # for details, see https://torchio.readthedocs.io/transforms/augmentation.html#torchio.transforms.RandomNoise
      'mean': 0, # default mean
      'std': [0, 1] # default std-dev range
    },
    noise_var, # this is a random noise, and can take 'std' and 'mean' as a sub-parameter, however, the default 'std' is [0, 0.015 * std(image)]
    'gamma',
    'swap':{
      'patch_size': 15, # patch size for swapping; if a single number if provided, the same number is used for all axes
      'num_iterations': 50, # number of times that two patches will be swapped, defaults to 100
    },
    'flip':{
      'axis': [0,1,2] # one or more axes can be put here. if this isn't defined, all axes are considered
    },
    'anisotropic':{
      'axis': [0,1],
      'downsampling': [2,2.5]
    },
    'rotate_90':{ # explicitly rotate image by 90
      'axis': [0,2] # one or more axes can be put here. if this isn't defined, all axes are considered
    },
    'rotate_180', # explicitly rotate image by 180; if 'axis' isn't defined, default is [1,2,3]
    'colorjitter':{ # this is used to apply the ColorJitter transform form torch - only used for rgb images
      'brightness': [0,1], # optional: needs to be between [0,1]
      'contrast': [0,0.75], # optional: needs to be between [0,1]
      'saturation': [0,0.5], # optional: needs to be between [0,1]
      'hue': [-0.25,0.25], # optional: needs to be between [-0.5,0.5] for range and [0,1] for a single value
    }, 
    'hed_transform':{
      'haematoxylin_bias_range': [-0.1, 0.1],
      'eosin_bias_range': [-0.1, 0.1],
      'dab_bias_range': [-0.1, 0.1],
      'haematoxylin_sigma_range': [-0.1, 0.1],
      'eosin_sigma_range': [-0.1, 0.1],
      'dab_sigma_range': [-0.1, 0.1],
      'cutoff_range': [0.01, 0.99],
    }
  }
# ## post-processing steps - only applied before output labels are saved
# data_postprocessing:
#   {
#     'fill_holes', # this will fill holes in the image
#     'mapping': {0: 0, 1: 1, 2: 4}, # this will map the labels to a new set of labels, useful to convert labels from combinatorial training (i.e., combined segmentation labels)
#   }
## parallel training on HPC - here goes the command to prepend to send to a high performance computing
# cluster for parallel computing during multi-fold training
# not used for single fold training
# this gets passed before the training_loop, so ensure enough memory is provided along with other parameters
# that your HPC would expect
# ${outputDir} will be changed to the outputDir you pass in CLI + '/${fold_number}'
# ensure that the correct location of the virtual environment is getting invoked, otherwise it would pick up the system python, which might not have all dependencies
# parallel_compute_command: 'qsub -b y -l gpu -l h_vmem=32G -cwd -o ${outputDir}/\$JOB_ID.stdout -e ${outputDir}/\$JOB_ID.stderr `pwd`/sge_wrapper _correct_location_of_virtual_environment_/venv/bin/python'
## queue configuration - https://torchio.readthedocs.io/data/patch_training.html?#queue
# this determines the maximum number of patches that can be stored in the queue. Using a large number means that the queue needs to be filled less often, but more CPU memory is needed to store the patches
q_max_length: 40
# this determines the number of patches to extract from each volume. A small number of patches ensures a large variability in the queue, but training will be slower
q_samples_per_volume: 5
# this determines the number subprocesses to use for data loading; '0' means main process is used
q_num_workers: 2 # scale this according to available CPU resources
# used for debugging
q_verbose: False