NRCan · remtav · Oct 5, 2021 · Mar 18, 2021 · Mar 18, 2021 · Mar 22, 2021
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,18 @@
+FROM continuumio/miniconda3
+
+WORKDIR /app
+
+# Create the environment:
+COPY environment.yml .
+RUN conda env create -f environment.yml
+
+# Make RUN commands use the new environment:
+SHELL ["conda", "run", "-n", "geo_deep_env", "/bin/bash", "-c"]
+
+# Make sure the environment is activated:
+RUN echo "Make sure flask is installed:"
+RUN python -c "import flask"
+
+# The code to run when container is started:
+#COPY run.py .
+ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "geo_deep_env", "python"]
diff --git a/README.md b/README.md
@@ -32,9 +32,9 @@ The final step in the process is to assign every pixel in the original image a v
 ## **Requirement**
 This project comprises a set of commands to be run at a shell command prompt.  Examples used here are for a bash shell in an Ubuntu GNU/Linux environment.
 
-- [Python 3.6](https://www.python.org/downloads/release/python-360/), see the full list of dependencies in [requirements.txt](requirements.txt)
+- [Python 3.7.6](https://www.python.org/downloads/release/python-376/), see the full list of dependencies in [environment.yml](environment.yml)
 - [mlflow](https://mlflow.org/)
-- [minicanda](https://docs.conda.io/en/latest/miniconda.html) (highly recommended)
+- [miniconda](https://docs.conda.io/en/latest/miniconda.html) (highly recommended)
 - nvidia GPU (highly recommended)
 
 > The system can be used on your workstation or cluster and on [AWS](https://aws.amazon.com/).
@@ -43,13 +43,20 @@ This project comprises a set of commands to be run at a shell command prompt.  E
 Those step are for your a workstation on Ubuntu 18.04 using miniconda.
 Set and activate your python environment with the following commands:  
 ```shell
-conda create -n gpu_ENV python=3.6 -c pytorch pytorch torchvision
-conda activate gpu_ENV
-conda install --override-channels -c main -c conda-forge ruamel_yaml h5py fiona rasterio geopandas scikit-image scikit-learn tqdm
+conda env create -f environment.yml
+conda activate geo_deep_env
 conda install -c fastai nvidia-ml-py3
-conda install mlflow segmentation-models-pytorch
 ```
-> For Windows OS:
+##### For Docker
+Move to the geo deep learning directory and use either of the following commands:
+```shell
+docker build .
+or
+docker-compose build
+```
+
+##### For Windows OS:
+> - You will have to convert the environment.yml file to requirements.txt, most packages need pip install.
 > - Install rasterio, fiona and gdal first, before installing the rest. We've experienced some [installation issues](https://github.com/conda-forge/gdal-feedstock/issues/213), with those libraries.
 > - Mlflow should be installed using pip rather than conda, as mentionned [here](https://github.com/mlflow/mlflow/issues/1951)  
 

diff --git a/config/travis_CI/environment.yml b/config/travis_CI/environment.yml
@@ -127,29 +127,29 @@ dependencies:
   - zlib=1.2.11=h516909a_1010
   - zstd=1.4.9=ha95c52a_0
   - pip:
-    - affine==2.3.0
-    - attrs==20.3.0
-    - click-plugins==1.1.1
-    - cligj==0.7.1
-    - cycler==0.10.0
-    - decorator==4.4.2
-    - efficientnet-pytorch==0.6.3
-    - fiona==1.8.18
-    - geopandas==0.9.0
-    - imageio==2.9.0
-    - kiwisolver==1.3.1
-    - matplotlib==3.4.0
-    - munch==2.5.0
-    - networkx==2.5
-    - pretrainedmodels==0.7.4
-    - pyproj==3.0.1
-    - pywavelets==1.1.1
-    - rasterio==1.2.1
-    - scikit-image==0.18.1
-    - segmentation-models-pytorch==0.1.3
-    - shapely==1.7.1
-    - snuggs==1.4.7
-    - tifffile==2021.3.17
-    - timm==0.3.2
-    - ttach==0.0.3
+      - affine==2.3.0
+      - attrs==20.3.0
+      - click-plugins==1.1.1
+      - cligj==0.7.1
+      - cycler==0.10.0
+      - decorator==4.4.2
+      - efficientnet-pytorch==0.6.3
+      - fiona==1.8.18
+      - geopandas==0.9.0
+      - imageio==2.9.0
+      - kiwisolver==1.3.1
+      - matplotlib==3.4.0
+      - munch==2.5.0
+      - networkx==2.5
+      - pretrainedmodels==0.7.4
+      - pyproj==3.0.1
+      - pywavelets==1.1.1
+      - rasterio==1.2.1
+      - scikit-image==0.18.1
+      - segmentation-models-pytorch==0.1.3
+      - shapely==1.7.1
+      - snuggs==1.4.7
+      - tifffile==2021.3.17
+      - timm==0.3.2
+      - ttach==0.0.3
 prefix: /home/remi/miniconda3/envs/ci_env
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,10 @@
+version: '3'
+services:
+  geo-deep-learning:
+    build: .
+    environment:
+      - PYTHONPATH=.
+    volumes:
+      - .:/app
+    entrypoint: python
+    command: --version
diff --git a/gdl_hyperopt_template.py b/gdl_hyperopt_template.py
@@ -11,6 +11,7 @@
 import pickle
 from functools import partial
 import pprint
+import numpy as np
 
 import mlflow
 import torch
@@ -21,12 +22,10 @@
 from train_segmentation import main as train_main
 
 # This is the hyperparameter space to explore
-my_space = {'target_size': hp.choice('target_size', [128, 256]),
-            'model_name': hp.choice('model_name', ['unet', 'deeplabv3+_pretrained']),
-            'permanent_water_weight': hp.uniform('permanent_water_weight', 1.0, 10.0),
-            'rivers_weight': hp.uniform('rivers_weight', 1.0, 10.0),
-            'flood_weight': hp.uniform('flood_weight', 1.0, 10.0),
-            'noise': hp.choice('noise', [0.0, 1.0])}
+my_space = {'model_name': hp.choice('model_name', ['unet_pretrained', 'deeplabv3_resnet101']),
+            'loss_fn': hp.choice('loss_fn', ['CrossEntropy', 'Lovasz', 'Duo']),
+            'optimizer': hp.choice('optimizer', ['adam', 'adabound']),
+            'learning_rate': hp.loguniform('learning_rate', np.log(1e-7), np.log(0.1))}
 
 
 def get_latest_mlrun(params):
@@ -62,20 +61,23 @@ def objective_with_args(hparams, params, config_path):
     """
 
     # ToDo: This is dependent on the specific structure of the GDL config file
-    params['training']['target_size'] = hparams['target_size']
     params['global']['model_name'] = hparams['model_name']
-    # ToDo: Should adjust batch size as a function of model and target size...
-    params['training']['class_weights'] = [1.0, hparams['permanent_water_weight'], hparams['rivers_weight'],
-                                           hparams['flood_weight']]
-    params['training']['augmentation']['noise'] = hparams['noise']
+    # params['training']['target_size'] = hparams['target_size']
+    params['training']['loss_fn '] = hparams['loss_fn']
+    params['training']['optimizer'] = hparams['optimizer']
+    params['training']['learning_rate'] = hparams['learning_rate']
 
     try:
         mlrun = get_latest_mlrun(params)
         run_name_split = mlrun.data.tags['mlflow.runName'].split('_')
-        params['global']['mlflow_run_name'] = run_name_split[0] + f'_{int(run_name_split[1])+1}'
+        params['global']['mlflow_run_name'] = run_name_split[0] + f'_{int(run_name_split[1]) + 1}'
     except:
         pass
 
+    if params['global']['model_name'] == "unet_pretrained":
+        params['training']['state_dict_path'] = params['training']['dict_unet']
+    elif params['global']['model_name'] == "deeplabv3_resnet101":
+        params['training']['state_dict_path'] = params['training']['dict_deeplab']
     train_main(params, config_path)
     torch.cuda.empty_cache()
 
@@ -88,7 +90,7 @@ def objective_with_args(hparams, params, config_path):
     return {'loss': -mlrun.data.metrics['tst_iou'], 'status': STATUS_OK}
 
 
-def trials_to_csv(trials):
+def trials_to_csv(trials, csv_pth):
     """hyperopt trials to CSV
 
     :param trials: hyperopt trials object
@@ -109,14 +111,18 @@ def trials_to_csv(trials):
         csv_str = csv_str + f'{trials.results[i]["loss"]}' + '\n'
 
     # ToDo: Customize where the csv output is
-    with open('hyperopt_results.csv', 'w') as csv_obj:
+    with open(csv_pth, 'w') as csv_obj:
         csv_obj.write(csv_str)
 
 
 def main(params, config_path):
     # ToDo: Customize where the trials file is
-    if Path('hyperopt_trials.pkl').is_file():
-        trials = pickle.load(open("hyperopt_trials.pkl", "rb"))
+    # ToDo: Customize where the trials file is
+    root_path = Path(params['global']['assets_path'])
+    pkl_file = root_path.joinpath('hyperopt_trials.pkl')
+    csv_file = root_path.joinpath('hyperopt_results.csv')
+    if pkl_file.is_file():
+        trials = pickle.load(open(pkl_file, "rb"))
     else:
         trials = Trials()
 
@@ -128,19 +134,19 @@ def main(params, config_path):
                     space=my_space,
                     algo=tpe.suggest,
                     trials=trials,
-                    max_evals=n+params['global']['hyperopt_delta'])
+                    max_evals=n + params['global']['hyperopt_delta'])
         n += params['global']['hyperopt_delta']
-        pickle.dump(trials, open("hyperopt_trials.pkl", "wb"))
+        pickle.dump(trials, open(pkl_file, "wb"))
 
     # ToDo: Cleanup the output
     pprint.pprint(trials.vals)
     pprint.pprint(trials.results)
     for key, val in best.items():
         if my_space[key].name == 'switch':
-            best[key] = my_space[key].pos_args[val+1].obj
+            best[key] = my_space[key].pos_args[val + 1].obj
     pprint.pprint(best)
     print(trials.best_trial['result'])
-    trials_to_csv(trials)
+    trials_to_csv(trials, csv_file)
 
 
 if __name__ == '__main__':

diff --git a/images_to_samples.py b/images_to_samples.py
@@ -212,7 +212,8 @@ def samples_preparation(in_img_array,
     metadata_idx = append_to_dataset(samples_file["metadata"], repr(image_metadata))
 
     if overlap > 25:
-         logging.warning("high overlap >25%, note that automatic train/val split creates very similar samples in both sets")
+        logging.warning(
+            "high overlap >25%, note that automatic train/val split creates very similar samples in both sets")
     dist_samples = round(sample_size * (1 - (overlap / 100)))
     added_samples = 0
     excl_samples = 0
@@ -247,6 +248,7 @@ def samples_preparation(in_img_array,
                 # Stratification bias
                 if (stratd is not None) and (dataset == 'trn'):
                     tile_size = target.size
+                    u, count = np.unique(target, return_counts=True)
                     tile_counts = {x: y for x, y in zip(u, count)}
                     tile_props = {x: y / tile_size for x, y in zip(u, count)}
                     for key in tile_props.keys():
@@ -268,8 +270,8 @@ def samples_preparation(in_img_array,
                                      for key, val in tile_props.items()}
                     distances_val = {key: np.abs(val - stratd['val']['total_props'][key])
                                      for key, val in tile_props.items()}
-                    dist_trn = np.mean(np.array(list(distances_trn.values()))**2)
-                    dist_val = np.mean(np.array(list(distances_val.values()))**2)
+                    dist_trn = np.mean(np.array(list(distances_trn.values())) ** 2)
+                    dist_val = np.mean(np.array(list(distances_val.values())) ** 2)
                     dist = dist_val - dist_trn
                     stratification_bias = stratd['strat_factor'] * np.sign(dist)
                 else:
@@ -311,7 +313,7 @@ def samples_preparation(in_img_array,
                 final_dataset = 'val' if val else dataset
                 logging.debug(f'Dset={final_dataset}, '
                               f'Added samps={added_samples}/{len(_tqdm) * len(range(0, w, dist_samples))}, '
-                              f'Excld samps={excl_samples}/{len(_tqdm) * len(range(0, w, dist_samples))}, ' 
+                              f'Excld samps={excl_samples}/{len(_tqdm) * len(range(0, w, dist_samples))}, '
                               f'Target annot perc={100 - target_background_percent:.1f}')
 
     if added_samples == 0:
@@ -365,7 +367,7 @@ def main(params):
     :param params: (dict) Parameters found in the yaml config file.
     """
     start_time = time.time()
-    
+
     # mlflow logging
     mlflow_uri = get_key_def('mlflow_uri', params['global'], default="./mlruns")
     experiment_name = get_key_def('mlflow_experiment_name', params['global'], default='gdl-training', expected_type=str)
@@ -416,7 +418,6 @@ def main(params):
     else:
         stratd = None
 
-
     # add git hash from current commit to parameters if available. Parameters will be saved to hdf5s
     params['global']['git_hash'] = get_git_hash()