NRCan · remtav · Feb 11, 2022 · Mar 16, 2022 · Mar 16, 2022 · Mar 17, 2022
diff --git a/.github/workflows/github-actions-ci.yml b/.github/workflows/github-actions-ci.yml
@@ -26,4 +26,6 @@ jobs:
           python GDL.py mode=sampling
           python GDL.py mode=train
           python GDL.py mode=inference
-          python GDL.py mode=evaluate
+          python GDL.py mode=evaluate
+          # hyperparameter optimization with optuna
+          python GDL.py mode=train general.max_epochs=1 --multirun
diff --git a/GDL.py b/GDL.py
@@ -58,7 +58,7 @@ def run_gdl(cfg: DictConfig) -> None:
     start_time = time.time()
     # Read the task and execute it
     task = get_method(f"{cfg.mode}_{cfg.general.task}.main")
-    task(cfg)
+    metric = task(cfg)
 
     # Add git hash from current commit to parameters.
     with open_dict(cfg):
@@ -75,6 +75,7 @@ def run_gdl(cfg: DictConfig) -> None:
         "Elapsed time: {:.2f}s".format(time.time() - start_time) +
         "\n" + "-" * len(msg) + "\n"
     )
+    return metric
     # ------------------------------------
 
 

diff --git a/config/gdl_config_template.yaml b/config/gdl_config_template.yaml
@@ -13,6 +13,8 @@ defaults:
   - hydra: default
   - override hydra/hydra_logging: colorlog # enable color logging to make it pretty
   - override hydra/job_logging: colorlog # enable color logging to make it pretty
+  - override hydra/sweeper: optuna
+  - override hydra/sweeper/sampler: tpe
   - _self_
 
 general:

diff --git a/config/hydra/default.yaml b/config/hydra/default.yaml
@@ -5,6 +5,36 @@ sweep:
   dir: logs/multiruns/${now:%Y-%m-%d_%H-%M-%S}
   subdir: ${hydra.job.num}
 
+# for hyperparameter optimization with Optuna: https://hydra.cc/docs/next/plugins/optuna_sweeper/
+sweeper:
+  sampler:
+    seed: 123
+  direction: maximize
+  study_name: run_gdl
+  storage: null
+  n_trials: 3
+  n_jobs: 1
+
+  search_space:
+    training.lr:
+      type: float
+      log: True
+      low: 1e-7
+      high: 0.01
+    loss:
+      type: categorical
+      choices: ["binary/softbce", "binary/lovasz", "binary/dice"]
+    model:
+      type: categorical
+      choices: ['deeplabv3_pretrained']
+    optimizer:
+      type: categorical
+      choices: ['adam', 'adabound']
+    # gpus don't empty between each run, so max ram threshold must be 100% or gpu will be excluded
+    training.max_used_ram:
+      type: categorical
+      choices: [100]
+
 # you can set here environment variables that are universal for all users
 # for system specific variables (like data paths) it's better to use .env file!
 job:

diff --git a/config/model/deeplabv3_resnet101.yaml → config/model/deeplabv3_pretrained.yaml b/config/model/deeplabv3_resnet101.yaml → config/model/deeplabv3_pretrained.yaml
@@ -1,5 +1,5 @@
 # @package _global_
 model:
-  model_name: deeplabv3_resnet101
+  model_name: deeplabv3_pretrained
   pretrained: True
 
diff --git a/config/model/ternauset.yaml → ...g/model/deeplabv3_resnet101_dualhead.yaml b/config/model/ternauset.yaml → ...g/model/deeplabv3_resnet101_dualhead.yaml
@@ -1,5 +1,5 @@
 # @package _global_
 model:
-  model_name: ternausnet
+  model_name: deeplabv3_resnet101_dualhead
   pretrained: True
 
diff --git a/config/model/fastrcnn.yaml b/config/model/fastrcnn.yaml
diff --git a/config/model/fcn_resnet101.yaml b/config/model/fcn_resnet101.yaml
diff --git a/config/model/inception.yaml b/config/model/inception.yaml
diff --git a/config/old_development/config_test_4channels_implementation.yaml b/config/old_development/config_test_4channels_implementation.yaml
@@ -16,8 +16,7 @@ global:
   number_of_bands: 4
   modalities: RGBN
   # Model must be in the follow list:
-  # unet, unetsmall, checkpointed_unet, ternausnet,
-  # fcn_resnet101, deeplabv3_resnet101
+  # unet, unetsmall, checkpointed_unet, deeplabv3_resnet101
   model_name: deeplabv3_resnet101
   bucket_name:   # name of the S3 bucket where data is stored. Leave blank if using local files
   task: segmentation  # Task to perform. Either segmentation or classification

diff --git a/config/optimizer/adabound.yaml b/config/optimizer/adabound.yaml
@@ -0,0 +1,6 @@
+# @package _global_
+optimizer:
+  optimizer_name: 'adabound'
+  class_name: utils.adabound.AdaBound
+  params:
+    lr: ${training.lr}
diff --git a/config/training/default_training.yaml b/config/training/default_training.yaml
@@ -1,9 +1,9 @@
 # @package _global_
 training:
   num_gpus: 1
-  batch_size: 1
+  batch_size: 2  # models with batchnorm require batch size > 1
   eval_batch_size:
-  batch_metrics:
+  batch_metrics: 1
   lr: 0.0001
   max_epochs: ${general.max_epochs}
   min_epochs: ${general.min_epochs}

diff --git a/gdl_hyperopt_template.py b/gdl_hyperopt_template.py
@@ -1,166 +0,0 @@
-"""Hyperparamater optimization for GDL using hyperopt
-
-This is a template for using hyperopt with GDL. The my_space variable currently needs to
-be modified here, as well as GDL config modification logic within the objective_with_args
-function.
-
-"""
-
-import argparse
-from pathlib import Path
-import pickle
-from functools import partial
-import pprint
-import numpy as np
-
-from ruamel_yaml import YAML
-import mlflow
-import torch
-# ToDo: Add hyperopt to GDL requirements
-from hyperopt import fmin, tpe, hp, Trials, STATUS_OK
-
-from train_segmentation import main as train_main
-
-# This is the hyperparameter space to explore
-my_space = {'model_name': hp.choice('model_name', ['unet_pretrained', 'deeplabv3_resnet101']),
-            'loss_fn': hp.choice('loss_fn', ['CrossEntropy', 'Lovasz', 'Duo']),
-            'optimizer': hp.choice('optimizer', ['adam', 'adabound']),
-            'learning_rate': hp.loguniform('learning_rate', np.log(1e-7), np.log(0.1))}
-
-
-def read_parameters(param_file):
-    """Read and return parameters in .yaml file
-    Args:
-        param_file: Full file path of the parameters file
-    Returns:
-        YAML (Ruamel) CommentedMap dict-like object
-    """
-    yaml = YAML()
-    with open(param_file) as yamlfile:
-        params = yaml.load(yamlfile)
-    return params
-
-
-def get_latest_mlrun(params):
-    """Get latest mlflow run
-
-    :param params: gdl parameters dictionary
-    :return: mlflow run object
-    """
-
-    tracking_uri = params['global']['mlflow_uri']
-    mlflow.set_tracking_uri(tracking_uri)
-    mlexp = mlflow.get_experiment_by_name(params['global']['mlflow_experiment_name'])
-    exp_id = mlexp.experiment_id
-    try:
-        run_ids = ([x.run_id for x in mlflow.list_run_infos(
-            exp_id, max_results=1, order_by=["tag.release DESC"])])
-    except AttributeError:
-        mlflow_client = mlflow.tracking.MlflowClient(tracking_uri=tracking_uri)
-        run_ids = [x.run_id for x in mlflow_client.list_run_infos(exp_id, run_view_type=3)[0:1]]
-    mlrun = mlflow.get_run(run_ids[0])
-    return mlrun
-
-
-def objective_with_args(hparams, params, config_path):
-    """Objective function for hyperopt
-
-    This function edits the GDL parameters and runs a training.
-
-    :param hparams: arguments provided by hyperopt selection from hyperparameter space
-    :param params: gdl parameters dictionary
-    :param config_path: path to gdl configuration file
-    :return: loss dictionary for hyperopt
-    """
-
-    # ToDo: This is dependent on the specific structure of the GDL config file
-    params['global']['model_name'] = hparams['model_name']
-    # params['training']['target_size'] = hparams['target_size']
-    params['training']['loss_fn '] = hparams['loss_fn']
-    params['training']['optimizer'] = hparams['optimizer']
-    params['training']['learning_rate'] = hparams['learning_rate']
-
-    try:
-        mlrun = get_latest_mlrun(params)
-        run_name_split = mlrun.data.tags['mlflow.runName'].split('_')
-        params['global']['mlflow_run_name'] = run_name_split[0] + f'_{int(run_name_split[1]) + 1}'
-    except:
-        pass
-    train_main(params, config_path)
-    torch.cuda.empty_cache()
-
-    mlflow.end_run()
-    mlrun = get_latest_mlrun(params)
-
-    # ToDo: Probably need some cleanup to avoid accumulating results on disk
-
-    # ToDo: This loss should be configurable
-    return {'loss': -mlrun.data.metrics['tst_iou'], 'status': STATUS_OK}
-
-
-def trials_to_csv(trials, csv_pth):
-    """hyperopt trials to CSV
-
-    :param trials: hyperopt trials object
-    """
-
-    params = sorted(list(trials.vals.keys()))
-    csv_str = ''
-    for param in params:
-        csv_str += f'{param}, '
-    csv_str = csv_str + 'loss' + '\n'
-
-    for i in range(len(trials.trials)):
-        for param in params:
-            if my_space[param].name == 'switch':
-                csv_str += f'{my_space[param].pos_args[trials.vals[param][i] + 1].obj}, '
-            else:
-                csv_str += f'{trials.vals[param][i]}, '
-        csv_str = csv_str + f'{trials.results[i]["loss"]}' + '\n'
-
-    # ToDo: Customize where the csv output is
-    with open(csv_pth, 'w') as csv_obj:
-        csv_obj.write(csv_str)
-
-
-def main(params, config_path):
-    # ToDo: Customize where the trials file is
-    # ToDo: Customize where the trials file is
-    root_path = Path(params['global']['assets_path'])
-    pkl_file = root_path.joinpath('hyperopt_trials.pkl')
-    csv_file = root_path.joinpath('hyperopt_results.csv')
-    if pkl_file.is_file():
-        trials = pickle.load(open(pkl_file, "rb"))
-    else:
-        trials = Trials()
-
-    objective = partial(objective_with_args, params=params, config_path=config_path)
-
-    n = 0
-    while n < params['global']['hyperopt_runs']:
-        best = fmin(objective,
-                    space=my_space,
-                    algo=tpe.suggest,
-                    trials=trials,
-                    max_evals=n + params['global']['hyperopt_delta'])
-        n += params['global']['hyperopt_delta']
-        pickle.dump(trials, open(pkl_file, "wb"))
-
-    # ToDo: Cleanup the output
-    pprint.pprint(trials.vals)
-    pprint.pprint(trials.results)
-    for key, val in best.items():
-        if my_space[key].name == 'switch':
-            best[key] = my_space[key].pos_args[val + 1].obj
-    pprint.pprint(best)
-    print(trials.best_trial['result'])
-    trials_to_csv(trials, csv_file)
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Geo Deep Learning hyperopt')
-    parser.add_argument('param_file', type=str, help='Path of gdl config file')
-    args = parser.parse_args()
-    gdl_params = read_parameters(args.param_file)
-    gdl_params['self'] = {'config_file': args.param_file}
-    main(gdl_params, Path(args.param_file))

diff --git a/inference_segmentation.py b/inference_segmentation.py
@@ -389,6 +389,10 @@ def main(params: dict) -> None:
                             debug=debug)
 
         pred = pred[np.newaxis, :, :].astype(np.uint8)
+
+        if debug and not np.any(pred):
+            logging.error(f"Only background values were predicted. There may be a problem with the model")
+
         inf_meta.update({"driver": "GTiff",
                          "height": pred.shape[1],
                          "width": pred.shape[2],
@@ -403,6 +407,7 @@ def main(params: dict) -> None:
             temp_file.unlink()
         except OSError as e:
             logging.warning(f'File Error: {temp_file, e.strerror}')
+
         if raster_to_vec:
             start_vec = time.time()
             inference_vec = working_folder.joinpath(local_img.parent.name,

diff --git a/models/README.md b/models/README.md
@@ -1,14 +1,11 @@
 ## **Models available**
 
 ## Train from Scratch
-- [Inception V3](https://arxiv.org/pdf/1512.00567.pdf)
-- [Ternausnet](https://arxiv.org/abs/1801.05746.pdf)
 - [Unet](https://arxiv.org/abs/1505.04597.pdf)
 - Unet small (less deep version of [Unet](https://arxiv.org/abs/1505.04597.pdf))
 - Checkpointed Unet (same as [Unet](https://arxiv.org/abs/1505.04597.pdf) small, but uses less GPU memory and recomputes data during the backward pass)
 
 ## Pre-trained (torch vision by default pretrained on coco dataset)
-- [FCN with backbone resnet101](https://people.eecs.berkeley.edu/~jonlong/long_shelhamer_fcn.pdf)
 - [Deeplabv3 with backbone resnet101](https://arxiv.org/abs/1706.05587.pdf)
 - Experimental: [Deeplabv3 with backbone resnet101](https://arxiv.org/abs/1706.05587.pdf)  adapted for RGB-NIR (4 Bands)