NRCan · remtav · Oct 5, 2021 · Mar 18, 2021 · Mar 18, 2021 · Mar 22, 2021
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,18 @@
+FROM continuumio/miniconda3
+
+WORKDIR /app
+
+# Create the environment:
+COPY environment.yml .
+RUN conda env create -f environment.yml
+
+# Make RUN commands use the new environment:
+SHELL ["conda", "run", "-n", "geo_deep_env", "/bin/bash", "-c"]
+
+# Make sure the environment is activated:
+RUN echo "Make sure flask is installed:"
+RUN python -c "import flask"
+
+# The code to run when container is started:
+#COPY run.py .
+ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "geo_deep_env", "python"]
diff --git a/README.md b/README.md
@@ -32,7 +32,7 @@ The final step in the process is to assign every pixel in the original image a v
 ## **Requirement**
 This project comprises a set of commands to be run at a shell command prompt.  Examples used here are for a bash shell in an Ubuntu GNU/Linux environment.
 
-- [Python 3.6](https://www.python.org/downloads/release/python-360/), see the full list of dependencies in [requirements.txt](requirements.txt)
+- [Python 3.7.6](https://www.python.org/downloads/release/python-376/), see the full list of dependencies in [environment.yml](environment.yml)
 - [mlflow](https://mlflow.org/)
 - [minicanda](https://docs.conda.io/en/latest/miniconda.html) (highly recommended)
 - nvidia GPU (highly recommended)
@@ -43,13 +43,20 @@ This project comprises a set of commands to be run at a shell command prompt.  E
 Those step are for your a workstation on Ubuntu 18.04 using miniconda.
 Set and activate your python environment with the following commands:  
 ```shell
-conda create -n gpu_ENV python=3.6 -c pytorch pytorch torchvision
-conda activate gpu_ENV
-conda install --override-channels -c main -c conda-forge ruamel_yaml h5py fiona rasterio geopandas scikit-image scikit-learn tqdm
+conda env create -f environment.yml
+conda activate geo_deep_env
 conda install -c fastai nvidia-ml-py3
-conda install mlflow segmentation-models-pytorch
 ```
-> For Windows OS:
+##### For Docker
+Move to the geo deep learning directory and use either of the following commands:
+```shell
+docker build .
+or
+docker-compose build
+```
+
+##### For Windows OS:
+> - You will have to convert the environment.yml file to requirements.txt, most packages need pip install.
 > - Install rasterio, fiona and gdal first, before installing the rest. We've experienced some [installation issues](https://github.com/conda-forge/gdal-feedstock/issues/213), with those libraries.
 > - Mlflow should be installed using pip rather than conda, as mentionned [here](https://github.com/mlflow/mlflow/issues/1951)  
 

diff --git a/config/travis_CI/environment.yml b/config/travis_CI/environment.yml
@@ -127,29 +127,29 @@ dependencies:
   - zlib=1.2.11=h516909a_1010
   - zstd=1.4.9=ha95c52a_0
   - pip:
-    - affine==2.3.0
-    - attrs==20.3.0
-    - click-plugins==1.1.1
-    - cligj==0.7.1
-    - cycler==0.10.0
-    - decorator==4.4.2
-    - efficientnet-pytorch==0.6.3
-    - fiona==1.8.18
-    - geopandas==0.9.0
-    - imageio==2.9.0
-    - kiwisolver==1.3.1
-    - matplotlib==3.4.0
-    - munch==2.5.0
-    - networkx==2.5
-    - pretrainedmodels==0.7.4
-    - pyproj==3.0.1
-    - pywavelets==1.1.1
-    - rasterio==1.2.1
-    - scikit-image==0.18.1
-    - segmentation-models-pytorch==0.1.3
-    - shapely==1.7.1
-    - snuggs==1.4.7
-    - tifffile==2021.3.17
-    - timm==0.3.2
-    - ttach==0.0.3
+      - affine==2.3.0
+      - attrs==20.3.0
+      - click-plugins==1.1.1
+      - cligj==0.7.1
+      - cycler==0.10.0
+      - decorator==4.4.2
+      - efficientnet-pytorch==0.6.3
+      - fiona==1.8.18
+      - geopandas==0.9.0
+      - imageio==2.9.0
+      - kiwisolver==1.3.1
+      - matplotlib==3.4.0
+      - munch==2.5.0
+      - networkx==2.5
+      - pretrainedmodels==0.7.4
+      - pyproj==3.0.1
+      - pywavelets==1.1.1
+      - rasterio==1.2.1
+      - scikit-image==0.18.1
+      - segmentation-models-pytorch==0.1.3
+      - shapely==1.7.1
+      - snuggs==1.4.7
+      - tifffile==2021.3.17
+      - timm==0.3.2
+      - ttach==0.0.3
 prefix: /home/remi/miniconda3/envs/ci_env
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,10 @@
+version: '3'
+services:
+  geo-deep-learning:
+    build: .
+    environment:
+      - PYTHONPATH=.
+    volumes:
+      - .:/app
+    entrypoint: python
+    command: --version
diff --git a/gdl_hyperopt_HPC.py b/gdl_hyperopt_HPC.py
@@ -0,0 +1,163 @@
+"""Hyperparamater optimization for GDL using hyperopt
+
+This is a template for using hyperopt with GDL. The my_space variable currently needs to
+be modified here, as well as GDL config modification logic within the objective_with_args
+function.
+
+"""
+
+import argparse
+from pathlib import Path
+import pickle
+from functools import partial
+import pprint
+import numpy as np
+
+import mlflow
+import torch
+# ToDo: Add hyperopt to GDL requirements
+from hyperopt import fmin, tpe, hp, Trials, STATUS_OK
+
+from utils.readers import read_parameters
+from train_segmentation import main as train_main
+
+# This is the hyperparameter space to explore
+# my_space = {'target_size': hp.choice('target_size', [512, 636]),
+#             'model_name': hp.choice('model_name', ['unet_pretrained', 'deeplabv3_resnet101']),
+#             'loss_fn': hp.choice('loss_fn', ['CrossEntropy', 'Lovasz', 'Duo']),
+#             'optimizer': hp.choice('optimizer', ['adam', 'adabound']),
+#             'learning_rate': hp.loguniform('learning_rate', np.log(1e-7), np.log(0.1))}
+
+my_space = {'loss_fn': hp.choice('loss_fn', ['CrossEntropy', 'Lovasz', 'Duo']),
+            'optimizer': hp.choice('optimizer', ['adam', 'adabound']),
+            'model_name': hp.choice('model_name', ['unet_pretrained', 'deeplabv3_resnet101']),
+            'learning_rate': hp.loguniform('learning_rate', np.log(1e-7), np.log(0.1))}
+
+
+def get_latest_mlrun(params):
+    """Get latest mlflow run
+
+    :param params: gdl parameters dictionary
+    :return: mlflow run object
+    """
+
+    tracking_uri = params['global']['mlflow_uri']
+    mlflow.set_tracking_uri(tracking_uri)
+    mlexp = mlflow.get_experiment_by_name(params['global']['mlflow_experiment_name'])
+    exp_id = mlexp.experiment_id
+    try:
+        run_ids = ([x.run_id for x in mlflow.list_run_infos(
+            exp_id, max_results=1, order_by=["tag.release DESC"])])
+    except AttributeError:
+        mlflow_client = mlflow.tracking.MlflowClient(tracking_uri=tracking_uri)
+        run_ids = [x.run_id for x in mlflow_client.list_run_infos(exp_id, run_view_type=3)[0:1]]
+    mlrun = mlflow.get_run(run_ids[0])
+    return mlrun
+
+
+def objective_with_args(hparams, params, config_path):
+    """Objective function for hyperopt
+
+    This function edits the GDL parameters and runs a training.
+
+    :param hparams: arguments provided by hyperopt selection from hyperparameter space
+    :param params: gdl parameters dictionary
+    :param config_path: path to gdl configuration file
+    :return: loss dictionary for hyperopt
+    """
+
+    # ToDo: This is dependent on the specific structure of the GDL config file
+    params['global']['model_name'] = hparams['model_name']
+    params['training']['loss_fn '] = hparams['loss_fn']
+    params['training']['optimizer'] = hparams['optimizer']
+    params['training']['learning_rate'] = hparams['learning_rate']
+
+    try:
+        mlrun = get_latest_mlrun(params)
+        run_name_split = mlrun.data.tags['mlflow.runName'].split('_')
+        params['global']['mlflow_run_name'] = run_name_split[0] + f'_{int(run_name_split[1]) + 1}'
+    except:
+        pass
+
+    if params['global']['model_name'] == "unet_pretrained":
+        params['training']['state_dict_path'] = params['training']['dict_unet']
+    elif params['global']['model_name'] == "deeplabv3_resnet101":
+        params['training']['state_dict_path'] = params['training']['dict_deeplab']
+
+    train_main(params, config_path)
+    torch.cuda.empty_cache()
+
+    mlflow.end_run()
+    mlrun = get_latest_mlrun(params)
+
+    # ToDo: Probably need some cleanup to avoid accumulating results on disk
+
+    # ToDo: This loss should be configurable
+    return {'loss': -mlrun.data.metrics['tst_iou'], 'status': STATUS_OK}
+
+
+def trials_to_csv(trials, csv_pth):
+    """hyperopt trials to CSV
+
+    :param trials: hyperopt trials object
+    """
+
+    params = sorted(list(trials.vals.keys()))
+    csv_str = ''
+    for param in params:
+        csv_str += f'{param}, '
+    csv_str = csv_str + 'loss' + '\n'
+
+    for i in range(len(trials.trials)):
+        for param in params:
+            if my_space[param].name == 'switch':
+                csv_str += f'{my_space[param].pos_args[trials.vals[param][i] + 1].obj}, '
+            else:
+                csv_str += f'{trials.vals[param][i]}, '
+        csv_str = csv_str + f'{trials.results[i]["loss"]}' + '\n'
+
+    # ToDo: Customize where the csv output is
+    with open(csv_pth, 'w') as csv_obj:
+        csv_obj.write(csv_str)
+
+
+def main(params, config_path):
+    # ToDo: Customize where the trials file is
+    root_path = Path(params['global']['assets_path'])
+    pkl_file = root_path.joinpath('hyperopt_trials.pkl')
+    csv_file = root_path.joinpath('hyperopt_results.csv')
+    if pkl_file.is_file():
+        trials = pickle.load(open(pkl_file, "rb"))
+    else:
+        trials = Trials()
+
+    objective = partial(objective_with_args, params=params, config_path=config_path)
+
+    n = 0
+    while n < params['global']['hyperopt_runs']:
+        best = fmin(objective,
+                    space=my_space,
+                    algo=tpe.suggest,
+                    trials=trials,
+                    max_evals=n + params['global']['hyperopt_delta'])
+        n += params['global']['hyperopt_delta']
+        pickle.dump(trials, open(pkl_file, "wb"))
+
+    # ToDo: Cleanup the output
+    pprint.pprint(trials.vals)
+    pprint.pprint(trials.results)
+    for key, val in best.items():
+        if my_space[key].name == 'switch':
+            best[key] = my_space[key].pos_args[val + 1].obj
+    pprint.pprint(best)
+    print(trials.best_trial['result'])
+    trials_to_csv(trials, csv_file)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Geo Deep Learning hyperopt')
+    parser.add_argument('param_file', type=str, help='Path of gdl config file')
+    args = parser.parse_args()
+    gdl_params = read_parameters(args.param_file)
+    gdl_params['self'] = {'config_file': args.param_file}
+    main(gdl_params, Path(args.param_file))
diff --git a/gdl_hyperopt_template.py b/gdl_hyperopt_template.py
@@ -11,6 +11,7 @@
 import pickle
 from functools import partial
 import pprint
+import numpy as np
 
 import mlflow
 import torch
@@ -21,12 +22,10 @@
 from train_segmentation import main as train_main
 
 # This is the hyperparameter space to explore
-my_space = {'target_size': hp.choice('target_size', [128, 256]),
-            'model_name': hp.choice('model_name', ['unet', 'deeplabv3+_pretrained']),
-            'permanent_water_weight': hp.uniform('permanent_water_weight', 1.0, 10.0),
-            'rivers_weight': hp.uniform('rivers_weight', 1.0, 10.0),
-            'flood_weight': hp.uniform('flood_weight', 1.0, 10.0),
-            'noise': hp.choice('noise', [0.0, 1.0])}
+my_space = {'model_name': hp.choice('model_name', ['unet_pretrained', 'deeplabv3_resnet101']),
+            'loss_fn': hp.choice('loss_fn', ['CrossEntropy', 'Lovasz', 'Duo']),
+            'optimizer': hp.choice('optimizer', ['adam', 'adabound']),
+            'learning_rate': hp.loguniform('learning_rate', np.log(1e-7), np.log(0.1))}
 
 
 def get_latest_mlrun(params):
@@ -62,20 +61,23 @@ def objective_with_args(hparams, params, config_path):
     """
 
     # ToDo: This is dependent on the specific structure of the GDL config file
-    params['training']['target_size'] = hparams['target_size']
     params['global']['model_name'] = hparams['model_name']
-    # ToDo: Should adjust batch size as a function of model and target size...
-    params['training']['class_weights'] = [1.0, hparams['permanent_water_weight'], hparams['rivers_weight'],
-                                           hparams['flood_weight']]
-    params['training']['augmentation']['noise'] = hparams['noise']
+    # params['training']['target_size'] = hparams['target_size']
+    params['training']['loss_fn '] = hparams['loss_fn']
+    params['training']['optimizer'] = hparams['optimizer']
+    params['training']['learning_rate'] = hparams['learning_rate']
 
     try:
         mlrun = get_latest_mlrun(params)
         run_name_split = mlrun.data.tags['mlflow.runName'].split('_')
-        params['global']['mlflow_run_name'] = run_name_split[0] + f'_{int(run_name_split[1])+1}'
+        params['global']['mlflow_run_name'] = run_name_split[0] + f'_{int(run_name_split[1]) + 1}'
     except:
         pass
 
+    if params['global']['model_name'] == "unet_pretrained":
+        params['training']['state_dict_path'] = params['training']['dict_unet']
+    elif params['global']['model_name'] == "deeplabv3_resnet101":
+        params['training']['state_dict_path'] = params['training']['dict_deeplab']
     train_main(params, config_path)
     torch.cuda.empty_cache()
 
@@ -88,7 +90,7 @@ def objective_with_args(hparams, params, config_path):
     return {'loss': -mlrun.data.metrics['tst_iou'], 'status': STATUS_OK}
 
 
-def trials_to_csv(trials):
+def trials_to_csv(trials, csv_pth):
     """hyperopt trials to CSV
 
     :param trials: hyperopt trials object
@@ -109,14 +111,18 @@ def trials_to_csv(trials):
         csv_str = csv_str + f'{trials.results[i]["loss"]}' + '\n'
 
     # ToDo: Customize where the csv output is
-    with open('hyperopt_results.csv', 'w') as csv_obj:
+    with open(csv_pth, 'w') as csv_obj:
         csv_obj.write(csv_str)
 
 
 def main(params, config_path):
     # ToDo: Customize where the trials file is
-    if Path('hyperopt_trials.pkl').is_file():
-        trials = pickle.load(open("hyperopt_trials.pkl", "rb"))
+    # ToDo: Customize where the trials file is
+    root_path = Path(params['global']['assets_path'])
+    pkl_file = root_path.joinpath('hyperopt_trials.pkl')
+    csv_file = root_path.joinpath('hyperopt_results.csv')
+    if pkl_file.is_file():
+        trials = pickle.load(open(pkl_file, "rb"))
     else:
         trials = Trials()
 
@@ -128,19 +134,19 @@ def main(params, config_path):
                     space=my_space,
                     algo=tpe.suggest,
                     trials=trials,
-                    max_evals=n+params['global']['hyperopt_delta'])
+                    max_evals=n + params['global']['hyperopt_delta'])
         n += params['global']['hyperopt_delta']
-        pickle.dump(trials, open("hyperopt_trials.pkl", "wb"))
+        pickle.dump(trials, open(pkl_file, "wb"))
 
     # ToDo: Cleanup the output
     pprint.pprint(trials.vals)
     pprint.pprint(trials.results)
     for key, val in best.items():
         if my_space[key].name == 'switch':
-            best[key] = my_space[key].pos_args[val+1].obj
+            best[key] = my_space[key].pos_args[val + 1].obj
     pprint.pprint(best)
     print(trials.best_trial['result'])
-    trials_to_csv(trials)
+    trials_to_csv(trials, csv_file)
 
 
 if __name__ == '__main__':