From 35611fe31419ea31530cc6c6e33ae0bc549e9373 Mon Sep 17 00:00:00 2001
From: Simon Pfreundschuh <simon.pfreundschuh@chalmers.se>
Date: Wed, 4 Mar 2020 10:05:08 +0100
Subject: [PATCH 01/19] Reworking qrnn.

---
 typhon/retrieval/__init__.py               |   2 +-
 typhon/retrieval/qrnn/backends/__init__.py |   0
 typhon/retrieval/qrnn/backends/keras.py    | 801 +++++++++++++++++++++
 typhon/retrieval/qrnn/backends/pytorch.py  | 345 +++++++++
 typhon/retrieval/qrnn/qrnn.py              | 143 +---
 5 files changed, 1178 insertions(+), 113 deletions(-)
 create mode 100644 typhon/retrieval/qrnn/backends/__init__.py
 create mode 100644 typhon/retrieval/qrnn/backends/keras.py
 create mode 100644 typhon/retrieval/qrnn/backends/pytorch.py

diff --git a/typhon/retrieval/__init__.py b/typhon/retrieval/__init__.py
index 09fdccb5..ff43f188 100644
--- a/typhon/retrieval/__init__.py
+++ b/typhon/retrieval/__init__.py
@@ -5,4 +5,4 @@
 
 from .common import *  # noqa
 from .oem import *  # noqa
-from .spareice import *  # noqa
+#from .spareice import *  # noqa
diff --git a/typhon/retrieval/qrnn/backends/__init__.py b/typhon/retrieval/qrnn/backends/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/typhon/retrieval/qrnn/backends/keras.py b/typhon/retrieval/qrnn/backends/keras.py
new file mode 100644
index 00000000..df5fec2f
--- /dev/null
+++ b/typhon/retrieval/qrnn/backends/keras.py
@@ -0,0 +1,801 @@
+"""
+Keras backend for QRNNs
+=======================
+
+This module implements the Keras backend for QRNNs.
+"""
+import copy
+import logging
+import os
+import pickle
+
+import numpy as np
+from scipy.interpolate import CubicSpline
+
+# Keras Imports
+try:
+    import keras
+    from keras.models import Sequential, clone_model
+    from keras.layers import Dense, Activation, Dropout
+    from keras.optimizers import SGD
+except ImportError:
+    raise ImportError(
+        "Could not import the required Keras modules. The QRNN "
+        "implementation was developed for use with Keras version 2.0.9.")
+
+################################################################################
+# Loss Functions
+################################################################################
+
+import keras.backend as K
+
+logger = logging.getLogger(__name__)
+
+def skewed_absolute_error(y_true, y_pred, tau):
+    """
+    The quantile loss function for a given quantile tau:
+
+    L(y_true, y_pred) = (tau - I(y_pred < y_true)) * (y_pred - y_true)
+
+    Where I is the indicator function.
+    """
+    dy = y_pred - y_true
+    return K.mean((1.0 - tau) * K.relu(dy) + tau * K.relu(-dy), axis=-1)
+
+
+def quantile_loss(y_true, y_pred, taus):
+    """
+    The quantiles loss for a list of quantiles. Sums up the error contribution
+    from the each of the quantile loss functions.
+    """
+    e = skewed_absolute_error(
+        K.flatten(y_true), K.flatten(y_pred[:, 0]), taus[0])
+    for i, tau in enumerate(taus[1:]):
+        e += skewed_absolute_error(K.flatten(y_true),
+                                   K.flatten(y_pred[:, i + 1]),
+                                   tau)
+    return e
+
+
+class QuantileLoss:
+    """
+    Wrapper class for the quantile error loss function. A class is used here
+    to allow the implementation of a custom `__repr` function, so that the
+    loss function object can be easily loaded using `keras.model.load`.
+
+    Attributes:
+
+        quantiles: List of quantiles that should be estimated with
+                   this loss function.
+
+    """
+
+    def __init__(self, quantiles):
+        self.__name__ = "QuantileLoss"
+        self.quantiles = quantiles
+
+    def __call__(self, y_true, y_pred):
+        return quantile_loss(y_true, y_pred, self.quantiles)
+
+    def __repr__(self):
+        return "QuantileLoss(" + repr(self.quantiles) + ")"
+
+################################################################################
+# Keras Interface Classes
+################################################################################
+
+class TrainingGenerator:
+    """
+    This Keras sample generator takes the noise-free training data
+    and adds independent Gaussian noise to each of the components
+    of the input.
+
+    Attributes:
+
+        x_train: The training input, i.e. the brightness temperatures
+                 measured by the satellite.
+        y_train: The training output, i.e. the value of the retrieval
+                 quantity.
+        x_mean: A vector containing the mean of each input component.
+        x_sigma: A vector containing the standard deviation of each
+                 component.
+        batch_size: The size of a training batch.
+    """
+
+    def __init__(self, x_train, x_mean, x_sigma, y_train, sigma_noise, batch_size):
+        self.bs = batch_size
+
+        self.x_train = x_train
+        self.x_mean = x_mean
+        self.x_sigma = x_sigma
+        self.y_train = y_train
+        self.sigma_noise = sigma_noise
+
+        self.indices = np.random.permutation(x_train.shape[0])
+        self.i = 0
+
+    def __iter__(self):
+        logger.info("iter...")
+        return self
+
+    def __next__(self):
+        inds = self.indices[np.arange(self.i * self.bs,
+                                      (self.i + 1) * self.bs)
+                            % self.indices.size]
+        x_batch = np.copy(self.x_train[inds, :])
+        if not self.sigma_noise is None:
+            x_batch += np.random.randn(*x_batch.shape) * self.sigma_noise
+        x_batch = (x_batch - self.x_mean) / self.x_sigma
+        y_batch = self.y_train[inds]
+
+        self.i = self.i + 1
+
+        # Shuffle training set after each epoch.
+        if self.i % (self.x_train.shape[0] // self.bs) == 0:
+            self.indices = np.random.permutation(self.x_train.shape[0])
+
+        return (x_batch, y_batch)
+
+class AdversarialTrainingGenerator:
+    """
+    This Keras sample generator takes the noise-free training data
+    and adds independent Gaussian noise to each of the components
+    of the input.
+
+    Attributes:
+
+        x_train: The training input, i.e. the brightness temperatures
+                 measured by the satellite.
+        y_train: The training output, i.e. the value of the retrieval
+                 quantity.
+        x_mean: A vector containing the mean of each input component.
+        x_sigma: A vector containing the standard deviation of each
+                 component.
+        batch_size: The size of a training batch.
+    """
+
+    def __init__(self,
+                 x_train,
+                 x_mean,
+                 x_sigma,
+                 y_train,
+                 sigma_noise,
+                 batch_size,
+                 input_gradients,
+                 eps):
+        self.bs = batch_size
+
+        self.x_train = x_train
+        self.x_mean = x_mean
+        self.x_sigma = x_sigma
+        self.y_train = y_train
+        self.sigma_noise = sigma_noise
+
+        self.indices = np.random.permutation(x_train.shape[0])
+        self.i = 0
+
+        # compile gradient function
+        bs2 = self.bs // 2
+
+        self.input_gradients = input_gradients
+        self.eps = eps
+
+    def __iter__(self):
+        logger.info("iter...")
+        return self
+
+    def __next__(self):
+
+        if self.i == 0:
+            inds = np.random.randint(0, self.x_train.shape[0], self.bs)
+
+            x_batch = np.copy(self.x_train[inds, :])
+            if (self.sigma_noise):
+                x_batch += np.random.randn(*x_batch.shape) * self.sigma_noise
+
+            x_batch = (x_batch - self.x_mean) / self.x_sigma
+            y_batch = self.y_train[inds]
+
+        else:
+
+            bs2 = self.bs // 2
+            inds = np.random.randint(0, self.x_train.shape[0], bs2)
+
+            x_batch = np.zeros((self.bs, self.x_train.shape[1]))
+            y_batch = np.zeros((self.bs, 1))
+
+            x_batch[:bs2, :] = np.copy(self.x_train[inds, :])
+            if (self.sigma_noise):
+                x_batch[:bs2, :] += np.random.randn(bs2, self.x_train.shape[1]) \
+                    * self.sigma_noise
+            x_batch[:bs2, :] = (x_batch[:bs2, :] - self.x_mean) / self.x_sigma
+            y_batch[:bs2, :] = self.y_train[inds].reshape(-1, 1)
+            x_batch[bs2:, :] = x_batch[:bs2, :]
+            y_batch[bs2:, :] = y_batch[:bs2, :]
+
+            if (self.i > 10):
+                grads = self.input_gradients(
+                    [x_batch[:bs2, :], y_batch[:bs2, :], [1.0]])[0]
+                x_batch[bs2:, :] += self.eps * np.sign(grads)
+
+        self.i = self.i + 1
+        return (x_batch, y_batch)
+
+
+# TODO: Make y-noise argument optional
+class ValidationGenerator:
+    """
+    This Keras sample generator is similar to the training generator
+    only that it returns the whole validation set and doesn't perform
+    any randomization.
+
+    Attributes:
+
+        x_val: The validation input, i.e. the brightness temperatures
+                 measured by the satellite.
+        y_val: The validation output, i.e. the value of the retrieval
+                 quantity.
+        x_mean: A vector containing the mean of each input component.
+        x_sigma: A vector containing the standard deviation of each
+                 component.
+    """
+
+    def __init__(self, x_val, x_mean, x_sigma, y_val, sigma_noise):
+        self.x_val = x_val
+        self.x_mean = x_mean
+        self.x_sigma = x_sigma
+
+        self.y_val = y_val
+
+        self.sigma_noise = sigma_noise
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        x_val = np.copy(self.x_val)
+        if not self.sigma_noise is None:
+            x_val += np.random.randn(*self.x_val.shape) * self.sigma_noise
+        x_val = (x_val - self.x_mean) / self.x_sigma
+        return (x_val, self.y_val)
+
+
+class LRDecay(keras.callbacks.Callback):
+    """
+    The LRDecay class implements the Keras callback interface and reduces
+    the learning rate according to validation loss reduction.
+
+    Attributes:
+
+        lr_decay: The factor c > 1.0 by which the learning rate is
+                  reduced.
+        lr_minimum: The training is stopped when this learning rate
+                    is reached.
+        convergence_steps: The number of epochs without validation loss
+                           reduction required to reduce the learning rate.
+
+    """
+
+    def __init__(self, model, lr_decay, lr_minimum, convergence_steps):
+        self.model = model
+        self.lr_decay = lr_decay
+        self.lr_minimum = lr_minimum
+        self.convergence_steps = convergence_steps
+        self.steps = 0
+
+    def on_train_begin(self, logs={}):
+        self.losses = []
+        self.steps = 0
+        self.min_loss = 1e30
+
+    def on_epoch_end(self, epoch, logs={}):
+        self.losses += [logs.get('val_loss')]
+        if not self.losses[-1] < self.min_loss:
+            self.steps = self.steps + 1
+        else:
+            self.steps = 0
+        if self.steps > self.convergence_steps:
+            lr = keras.backend.get_value(self.model.optimizer.lr)
+            keras.backend.set_value(
+                self.model.optimizer.lr, lr / self.lr_decay)
+            self.steps = 0
+            logger.info("\n Reduced learning rate to " + str(lr))
+
+            if lr < self.lr_minimum:
+                self.model.stop_training = True
+
+        self.min_loss = min(self.min_loss, self.losses[-1])
+
+################################################################################
+# QRNN
+################################################################################
+
+class KerasQRNN:
+    r"""
+    Quantile Regression Neural Network (QRNN)
+
+    This class implements quantile regression neural networks and can be used
+    to estimate quantiles of the posterior distribution of remote sensing
+    retrievals.
+
+    Internally, the QRNN uses a feed-forward neural network that is trained
+    to minimize the quantile loss function
+
+    .. math::
+            \mathcal{L}_\tau(y_\tau, y_{true}) =
+            \begin{cases} (1 - \tau)|y_\tau - y_{true}| & \text{ if } y_\tau < y_\text{true} \\
+            \tau |y_\tau - y_\text{true}| & \text{ otherwise, }\end{cases}
+
+    where :math:`x_\text{true}` is the expected value of the retrieval quantity
+    and and :math:`x_\tau` is the predicted quantile. The neural network
+    has one output neuron for each quantile to estimate.
+
+    For the training, this implementation provides custom data generators that
+    can be used to add Gaussian noise to the training data as well as adversarial
+    training using the fast gradient sign method.
+
+    This implementation also provides functionality to use an ensemble of networks
+    instead of just a single network to predict the quantiles.
+
+    .. note:: For the QRNN I am using :math:`x` to denote the input vector and
+              :math:`y` to denote the output. While this is opposed to typical
+              inverse problem notation, it is inline with machine learning
+              notation and felt more natural for the implementation. If this
+              annoys you, I am sorry. But the other way round it would have
+              annoyed other people and in particular me.
+
+    Attributes:
+
+        input_dim (int):
+            The input dimension of the neural network, i.e. the dimension of the
+            measurement vector.
+
+        quantiles (numpy.array):
+            The 1D-array containing the quantiles :math:`\tau \in [0, 1]` that the
+            network learns to predict.
+
+        depth (int):
+            The number layers in the network excluding the input layer.
+
+        width (int):
+            The width of the hidden layers in the network.
+
+        activation (str):
+            The name of the activation functions to use in the hidden layers
+            of the network.
+
+        models (list of keras.models.Sequential):
+            The ensemble of Keras neural networks used for the quantile regression
+            neural network.
+    """
+
+    def __init__(self,
+                 input_dim,
+                 quantiles,
+                 model=(3, 128, "relu"),
+                 ensemble_size=1,
+                 **kwargs):
+        """
+        Create a QRNN model.
+
+        Arguments:
+
+            input_dim(int): The dimension of the measurement space, i.e. the number
+                            of elements in a single measurement vector y
+
+            quantiles(np.array): 1D-array containing the quantiles  to estimate of
+                                 the posterior distribution. Given as fractions
+                                 within the range [0, 1].
+
+            depth(int): The number of hidden layers  in the neural network to
+                        use for the regression. Default is 3, i.e. three hidden
+                        plus input and output layer.
+
+            width(int): The number of neurons in each hidden layer.
+
+            activation(str): The name of the activation functions to use. Default
+                             is "relu", for rectified linear unit. See 
+                             `this <https://keras.io/activations>`_ link for
+                             available functions.
+
+            **kwargs: Additional keyword arguments are passed to the constructor
+                      call `keras.layers.Dense` of the hidden layers, which can
+                      for example be used to add regularization. For more info consult
+                      `Keras documentation. <https://keras.io/layers/core/#dense>`_
+        """
+        self.input_dim = input_dim
+        self.quantiles = np.array(quantiles)
+
+        if type(model) is tuple:
+            depth = model[0]
+            width = model[1]
+            activation = model[2]
+
+            model = Sequential()
+            if depth == 0:
+                model.add(Dense(input_dim=input_dim,
+                                units=len(quantiles),
+                                activation=None))
+            else:
+                model.add(Dense(input_dim=input_dim,
+                                units=width,
+                                activation=activation))
+                for i in range(depth - 2):
+                    model.add(Dense(units=width,
+                                    activation=activation,
+                                    **kwargs))
+                    model.add(Dense(units=len(quantiles), activation=None))
+        elif type(model) is Model:
+            pass
+        else:
+            raise Exception("The provided model is neither a suitable "
+                            "architecture tuple nor a keras model.")
+
+        self.models = [clone_model(model) for i in range(ensemble_size)]
+
+
+
+    def __fit_params__(self, kwargs):
+        at = kwargs.pop("adversarial_training", False)
+        dat = kwargs.pop("delta_at", 0.01)
+        batch_size = kwargs.pop("batch_size", 512)
+        convergence_epochs = kwargs.pop("convergence_epochs", 10)
+        initial_learning_rate = kwargs.pop('initial_learning_rate', 0.01)
+        learning_rate_decay = kwargs.pop('learning_rate_decay', 2.0)
+        learning_rate_minimum = kwargs.pop('learning_rate_minimum', 1e-6)
+        maximum_epochs = kwargs.pop("maximum_epochs", 200)
+        training_split = kwargs.pop("training_split", 0.9)
+        return at, dat, batch_size, convergence_epochs, initial_learning_rate, \
+            learning_rate_decay, learning_rate_minimum, maximum_epochs, \
+            training_split, kwargs
+
+    def cross_validation(self,
+                        x_train,
+                        y_train,
+                        sigma_noise = None,
+                        n_folds=5,
+                        s=None,
+                        **kwargs):
+        r"""
+        Perform n-fold cross validation.
+
+        This function trains the network n times on different subsets of the
+        provided training data, always keeping a fraction of 1/n samples apart
+        for testing. Performance for each of the networks is evaluated and mean
+        and standard deviation for all folds are returned. This is to reduce
+        the influence of random fluctuations of the network performance during
+        hyperparameter tuning.
+
+        Arguments:
+
+            x_train(numpy.array): Array of shape :code:`(m, n)` containing the
+                                  m n-dimensional training inputs.
+
+            y_train(numpy.array): Array of shape :code:`(m, 1)` containing the
+                                  m training outputs.
+
+            sigma_noise(None, float, np.array): If not `None` this value is used
+                                                to multiply the Gaussian noise
+                                                that is added to each training
+                                                batch. If None no noise is
+                                                added.
+
+            n_folds(int): Number of folds to perform for the cross correlation.
+
+            s(callable, None): Performance metric for the fold. If not None,
+                               this should be a function object taking as
+                               arguments :code:`(y_test, y_pred)`, i.e. the
+                               expected output for the given fold :code:`y_test`
+                               and the predicted output :code:`y_pred`. The
+                               returned value is taken as performance metric.
+
+            **kwargs: Additional keyword arguments are passed on to the :code:`fit`
+                      method that is called for each fold.
+        """
+
+        n = x_train.shape[0]
+        n_test = n // n_folds
+        inds = np.random.permutation(np.arange(0, n))
+
+        results = []
+
+        # Nomenclature is a bit difficult here:
+        # Each cross validation fold has its own training,
+        # vaildation and test set. The size of the test set
+        # is number of provided training samples divided by the
+        # number of fold. The rest is used a traning and internal
+        # validation set according to the chose training_split
+        # ratio.
+
+
+        for i in range(n_folds):
+            for m in self.models:
+                m.reset_states()
+
+            # Indices to use for training including training data and internal
+            # validation set to monitor convergence.
+            inds_train = np.append(np.arange(0, i * n_test),
+                                       np.arange(min((i + 1) * n_test, n), n))
+            inds_train = inds[inds_train]
+            # Indices used to evaluate performance of the model.
+            inds_test = np.arange(i * n_test, (i + 1) * n_test)
+            inds_test = inds[inds_test]
+
+            x_test_fold = x_train[inds_test, :]
+            y_test_fold = y_train[inds_test]
+
+            # Splitting training and validation set.
+            x_train_fold = x_train[inds_train, :]
+            y_train_fold = y_train[inds_train]
+
+            self.fit(x_train_fold, y_train_fold,
+                     sigma_noise, **kwargs)
+
+            # Evaluation on this folds test set.
+            if s:
+                y_pred = self.predict(x_test_fold)
+                results += [s(y_pred, y_test_fold)]
+            else:
+                loss = self.models[0].evaluate(
+                    (x_test_fold - self.x_mean) / self.x_sigma,
+                    y_test_fold)
+                logger.info(loss)
+                results += [loss]
+        logger.info(results)
+        results = np.array(results)
+        logger.info(results)
+        return (np.mean(results, axis=0), np.std(results, axis=0))
+
+    def fit(self,
+            x_train,
+            y_train,
+            sigma_noise=None,
+            x_val=None,
+            y_val=None,
+            **kwargs):
+        r"""
+        Train the QRNN on given training data.
+
+        The training uses an internal validation set to monitor training
+        progress. This can be either split at random from the training
+        data (see `training_fraction` argument) or provided explicitly
+        using the `x_val` and `y_val` parameters
+
+        Training of the QRNN is performed using stochastic gradient descent
+        (SGD). The learning rate is adaptively reduced during training when
+        the loss on the internal validation set has not been reduced for a
+        certain number of epochs.
+
+        Two data augmentation techniques can be used to improve the
+        calibration of the QRNNs predictions. The first one adds Gaussian
+        noise to training batches before they are passed to the network.
+        The noise statistics are defined using the `sigma_noise` argument.
+        The second one is adversarial training. If adversarial training is
+        used, half of each training batch consists of adversarial samples
+        generated using the fast gradient sign method. The strength of the
+        perturbation is controlled using the `delta_at` parameter.
+
+        During one epoch, each training sample is passed once to the network.
+        Their order is randomzied between epochs.
+
+        Arguments:
+
+            x_train(np.array): Array of shape `(n, m)` containing n training
+                               samples of dimension m.
+
+            y_train(np.array): Array of shape `(n, )` containing the training
+                               output corresponding to the training data in
+                               `x_train`.
+
+            sigma_noise(None, float, np.array): If not `None` this value is used
+                                                to multiply the Gaussian noise
+                                                that is added to each training
+                                                batch. If None no noise is
+                                                added.
+            x_val(np.array): Array of shape :code:`(n', m)` containing n' validation
+                             inputs that will be used to monitor training loss. Must
+                             be provided in unison with :code:`y_val` or otherwise
+                             will be ignored.
+
+            y_val(np.array): Array of shape :code:`(n')` containing n'  validation
+                             outputs corresponding to the inputs in :code:`x_val`.
+                             Must be provided in unison with :code:`x_val` or
+                             otherwise will be ignored.
+
+            adversarial_training(Bool): Whether or not to use adversarial training.
+                                        `False` by default.
+
+            delta_at(flaot): Perturbation factor for the fast gradient sign method
+                             determining the strength of the adversarial training
+                             perturbation. `0.01` by default.
+
+            batch_size(float): The batch size to use during training. Defaults to `512`.
+
+            convergence_epochs(int): The number of epochs without decrease in
+                                     validation loss before the learning rate
+                                     is reduced. Defaults to `10`.
+
+            initial_learning_rate(float): The inital value for the learning
+                                          rate.
+
+            learning_rate_decay(float): The factor by which to reduce the
+                                        learning rate after no improvement
+                                        on the internal validation set was
+                                        observed for `convergence_epochs`
+                                        epochs. Defaults to `2.0`.
+
+            learning_rate_minimum(float): The minimum learning rate at which
+                                          the training is terminated. Defaults
+                                          to `1e-6`.
+
+            maximum_epochs(int): The maximum number of epochs to perform if
+                                 training does not terminate before.
+
+            training_split(float): The ratio `0 < ts < 1.0` of the samples in
+                                   to be used as internal validation set. Defaults
+                                   to 0.9.
+
+        """
+        if not (x_train.shape[1] == self.input_dim):
+            raise Exception("Training input must have the same extent along"    /
+                            "dimension 1 as input_dim (" + str(self.input_dim)  /
+                            + ")")
+
+        if not (y_train.shape[1] == 1):
+            raise Exception("Currently only scalar retrieval targets are"       /
+                            " supported.")
+
+        x_mean = np.mean(x_train, axis=0, keepdims=True)
+        x_sigma = np.std(x_train, axis=0, keepdims=True)
+        self.x_mean = x_mean
+        self.x_sigma = x_sigma
+
+        # Handle parameters
+        # at:  adversarial training
+        # bs:  batch size
+        # ce:  convergence epochs
+        # ilr: initial learning rate
+        # lrd: learning rate decay
+        # lrm: learning rate minimum
+        # me:  maximum number of epochs
+        # ts:  split ratio of training set
+        at, dat, bs, ce, ilr, lrd, lrm, me, ts, kwargs = self.__fit_params__(
+            kwargs)
+
+        # Split training and validation set if x_val or y_val
+        # are not provided.
+        n = x_train.shape[0]
+        n_train = n
+        if x_val is None and y_val is None:
+            n_train = round(ts * n)
+            n_val = n - n_train
+            inds = np.random.permutation(n)
+            x_val = x_train[inds[n_train:], :]
+            y_val = y_train[inds[n_train:]]
+            x_train = x_train[inds[:n_train], :]
+            y_train = y_train[inds[:n_train]]
+        loss = QuantileLoss(self.quantiles)
+
+        self.custom_objects = {loss.__name__: loss}
+        optimizer = SGD(lr=ilr)
+        for model in self.models:
+            model.compile(loss=loss, optimizer=optimizer)
+
+            if at:
+                inputs = [model.input, model.targets[0],
+                            model.sample_weights[0]]
+                input_gradients = K.function(
+                    inputs, K.gradients(model.total_loss, model.input))
+                training_generator = AdversarialTrainingGenerator(x_train,
+                                                                    self.x_mean,
+                                                                    self.x_sigma,
+                                                                    y_train,
+                                                                    sigma_noise,
+                                                                    bs,
+                                                                    input_gradients,
+                                                                    dat)
+            else:
+                training_generator = TrainingGenerator(x_train, self.x_mean, self.x_sigma,
+                                                        y_train, sigma_noise, bs)
+            validation_generator = ValidationGenerator(x_val, self.x_mean, self.x_sigma,
+                                                        y_val, sigma_noise)
+            lr_callback = LRDecay(model, lrd, lrm, ce)
+            model.fit_generator(training_generator, steps_per_epoch=n_train // bs,
+                                epochs=me, validation_data=validation_generator,
+                                validation_steps=1, callbacks=[lr_callback])
+
+    def predict(self, x):
+        r"""
+        Predict quantiles of the conditional distribution P(y|x).
+
+        Forward propagates the inputs in `x` through the network to
+        obtain the predicted quantiles `y`.
+
+        Arguments:
+
+            x(np.array): Array of shape `(n, m)` containing `n` m-dimensional inputs
+                         for which to predict the conditional quantiles.
+
+        Returns:
+
+             Array of shape `(n, k)` with the columns corresponding to the k
+             quantiles of the network.
+
+        """
+        predictions = np.stack(
+            [m.predict((x - self.x_mean) / self.x_sigma) for m in self.models])
+        return np.mean(predictions, axis=0)
+
+
+    def save(self, path):
+        r"""
+        Store the QRNN model in a file.
+
+        This stores the model to a file using pickle for all
+        attributes that support pickling. The Keras model
+        is handled separately, since it can not be pickled.
+
+        .. note:: In addition to the model file with the given filename,
+                  additional files suffixed with :code:`_model_i` will be
+                  created for each neural network this model consists of.
+
+        Arguments:
+
+            path(str): The path including filename indicating where to
+                       store the model.
+
+        """
+
+        f = open(path, "wb")
+        filename = os.path.basename(path)
+        name = os.path.splitext(filename)[0]
+        dirname = os.path.dirname(path)
+
+        self.model_files = []
+        for i, m in enumerate(self.models):
+            self.model_files += [name + "_model_" + str(i)]
+            m.save(os.path.join(dirname, self.model_files[i]))
+        pickle.dump(self, f)
+        f.close()
+
+    @staticmethod
+    def load(path):
+        r"""
+        Load a model from a file.
+
+        This loads a model that has been stored using the `save` method.
+
+        Arguments:
+
+            path(str): The path from which to read the model.
+
+        Return:
+
+            The loaded QRNN object.
+        """
+        filename = os.path.basename(path)
+        dirname = os.path.dirname(path)
+
+        f = open(path, "rb")
+        qrnn = pickle.load(f)
+        qrnn.models = []
+        for mf in qrnn.model_files:
+            mf = os.path.basename(mf)
+            try:
+                mp = os.path.join(dirname, os.path.basename(mf))
+                qrnn.models += [keras.models.load_model(mp, qrnn.custom_objects)]
+            except:
+                raise Exception("Error loading the neural network models. " \
+                                "Please make sure all files created during the"\
+                                " saving are in this folder.")
+        f.close()
+        return qrnn
+
+    def __getstate__(self):
+        dct = copy.copy(self.__dict__)
+        dct.pop("models")
+        return dct
+
+    def __setstate__(self, state):
+        self.__dict__ = state
+        self.models = None
diff --git a/typhon/retrieval/qrnn/backends/pytorch.py b/typhon/retrieval/qrnn/backends/pytorch.py
new file mode 100644
index 00000000..4fe5ce58
--- /dev/null
+++ b/typhon/retrieval/qrnn/backends/pytorch.py
@@ -0,0 +1,345 @@
+"""
+models
+======
+
+This module provides an implementation of quantile regression neural networks (QRNNs)
+using the pytorch deep learning framework.
+"""
+import torch
+import numpy as np
+from torch import nn
+from torch import optim
+from tqdm.auto import tqdm
+
+activations = {"elu" : nn.ELU,
+               "hardshrink" : nn.Hardshrink,
+               "hardtanh" : nn.Hardtanh,
+               "PReLU" : nn.PReLU,
+               "SELU" : nn.SELU,
+               "CELU" : nn.CELU,
+               "Sigmoid" : nn.Sigmoid,
+               "Softmin" : nn.Softmin}
+
+class QuantileLoss:
+    r"""
+    The quantile loss function
+
+    This function object implements the quantile loss defined as
+
+
+    .. math::
+
+        \mathcal{L}(y_\text{pred}, y_\text{true}) =
+        \begin{cases}
+        \tau \cdot |y_\text{pred} - y_\text{true}| & , y_\text{pred} < y_\text{true} \\
+        (1 - \tau) \cdot |y_\text{pred} - y_\text{true}| & , \text{otherwise}
+        \end{cases}
+
+
+    as a training criterion for the training of neural networks. The loss criterion
+    expects a vector :math:`\mathbf{y}_\tau` of predicted quantiles and the observed
+    value :math:`y`. The loss for a single training sample is computed by summing the losses
+    corresponding to each quantiles. The loss for a batch of training samples is
+    computed by taking the mean over all samples in the batch.
+    """
+    def __init__(self,
+                 quantiles,
+                 mask = -1.0):
+        """
+        Create an instance of the quantile loss function with the given quantiles.
+
+        Arguments:
+            quantiles: Array or iterable containing the quantiles to be estimated.
+        """
+        self.quantiles = torch.tensor(quantiles).float()
+        self.n_quantiles = len(quantiles)
+        self.mask = np.float32(mask)
+
+    def to(self, device):
+        self.quantiles = self.quantiles.to(device)
+
+    def __call__(self, y_pred, y_true):
+        """
+        Compute the mean quantile loss for given inputs.
+
+        Arguments:
+            y_pred: N-tensor containing the predicted quantiles along the last
+                dimension
+            y_true: (N-1)-tensor containing the true y values corresponding to
+                the predictions in y_pred
+
+        Returns:
+            The mean quantile loss.
+        """
+        dy = y_pred - y_true
+        n = self.quantiles.size()[0]
+        qs = self.quantiles.reshape((n,) + (1, ) * max(len(dy.size()) - 2, 0))
+        l = torch.where(dy >= 0.0,
+                        (1.0 - qs) * dy,
+                        (-qs) * dy)
+        if not self.mask is None:
+            l = torch.where(y_true == self.mask, torch.zeros_like(l), l)
+        return l.mean()
+
+################################################################################
+# QRNN
+################################################################################
+
+class PytorchQRNN:
+    """
+    Quantile regression neural network (QRNN)
+
+    This class implements QRNNs as a fully-connected network with
+    a given number of layers.
+    """
+    def __init__(self,
+                 input_dimension,
+                 quantiles,
+                 model = (3, 128, "relu"),
+                 depth = 3,
+                 width = 128,
+                 activation = nn.ReLU):
+        """
+        Arguments:
+            input_dimension(int): The number of input features.
+            quantiles(array): Array of the quantiles to predict.
+            depth(int): Number of layers of the network.
+            width(int): The number of neurons in the inner layers of the network.
+            activation: The activation function to use for the inner layers of the network.
+        """
+        self.input_dimension = input_dimension
+        self.quantiles = np.array(quantiles)
+        self.depth = depth
+        self.width = width
+        self.activation = nn.ReLU
+        self.criterion = QuantileLoss(self.quantiles)
+
+        n_quantiles = len(quantiles)
+
+        if type(model) is tuple:
+            depth = model[0]
+            width = model[1]
+            act = model[2]
+            if type(act) is str:
+                if act in activations:
+                    act = activations[act]
+                else:
+                    raise ValueError("{} is not one of the available "
+                                     " activations ({}) to use in a pytorch "
+                                     "network.".format(act,
+                                                       list(activations.keys())))
+
+            self.model = nn.Sequential()
+            self.model.add_module("fc_0", nn.Linear(input_dimension, width))
+            self.model.add_module("act_0", activation())
+            for i in range(1, depth - 1):
+                self.model.add_module("fc_{}".format(i), nn.Linear(width, width))
+                self.model.add_module("act_{}".format(i), activation())
+                self.model.add_module("fc_{}".format(depth - 1), nn.Linear(width, n_quantiles))
+
+            self.criterion = QuantileLoss(self.quantiles)
+        elif isinstance(model, nn.Module):
+            self.model = model
+        else:
+            raise ValueError("Provided model must either be a valid architecture"
+                             " tuple or an instance of pytorch.nn.Module.")
+
+        self.training_errors = []
+        self.validation_errors = []
+
+    def _make_adversarial_samples(self, x, y, eps):
+        self.model.zero_grad()
+        x.requires_grad = True
+        y_pred = self.model(x)
+        c = self.criterion(y_pred, y)
+        c.backward()
+        x_adv = x.detach() + eps * torch.sign(x.grad.detach())
+        return x_adv
+
+    def train(self,
+              training_data,
+              validation_data,
+              n_epochs = 1,
+              adversarial_training = False,
+              eps_adv = 1e-6,
+              lr = 1e-2,
+              momentum = 0.0,
+              gpu = False):
+
+        """
+        Train the network.
+
+        This trains the network for the given number of epochs using the provided
+        training and validation data.
+
+        If desired, the training can be augmented using adversarial training. In this
+        case the network is additionally trained with an adversarial batch of examples
+        in each step of the training.
+
+        Arguments:
+            training_data: pytorch dataloader providing the training data
+            validation_data: pytorch dataloader providing the validation data
+            n_epochs: the number of epochs to train the network for
+            adversarial_training: whether or not to use adversarial training
+            eps_adv: The scaling factor to use for adversarial training.
+        """
+        self.optimizer = optim.SGD(self.model.parameters(),
+                                   lr = lr,
+                                   momentum = momentum)
+
+        self.model.train()
+
+        if torch.cuda.is_available() and gpu:
+            dev = torch.device("cuda")
+        else:
+            dev = torch.device("cpu")
+        self.model.to(dev)
+
+        self.criterion.to(dev)
+
+        for i in range(n_epochs):
+
+            err = 0.0
+            n = 0
+            iterator = tqdm(enumerate(training_data), total = len(training_data))
+            for j, (x, y) in iterator:
+                x = x.to(dev)
+                y = y.to(dev)
+
+                shape = x.size()
+                y = y.reshape((shape[0], 1, shape[2], shape[3]))
+
+                self.optimizer.zero_grad()
+                y_pred = self.model(x)
+                c = self.criterion(y_pred, y)
+                c.backward()
+                self.optimizer.step()
+
+                err += c.item() * x.size()[0]
+                n += x.size()[0]
+
+                if adversarial_training:
+                    self.optimizer.zero_grad()
+                    x_adv = self._make_adversarial_samples(x, y, eps_adv)
+                    y_pred = self.model(x)
+                    c = self.criterion(y_pred, y)
+                    c.backward()
+                    self.optimizer.step()
+
+                if j % 100:
+                    iterator.set_postfix({"Training errror" : err / n})
+
+            # Save training error
+            self.training_errors.append(err / n)
+
+            val_err = 0.0
+            n = 0
+
+            for x, y in validation_data:
+                x = x.to(dev)
+                y = y.to(dev)
+
+                shape = x.size()
+                y = y.reshape((shape[0], 1, shape[2], shape[3]))
+
+                y_pred = self.model(x)
+                c = self.criterion(y_pred, y)
+
+                val_err += c.item() * x.size()[0]
+                n += x.size()[0]
+
+            self.validation_errors.append(val_err / n)
+        self.model.eval()
+
+    def predict(self, x):
+        """
+        Predict quantiles for given input.
+
+        Args:
+            x: 2D tensor containing the inputs for which for which to
+                predict the quantiles.
+
+        Returns:
+            tensor: 2D tensor containing the predicted quantiles along
+                the last dimension.
+        """
+        return self.model(x)
+
+    def calibration(self,
+                    data,
+                    gpu=False):
+        """
+        Computes the calibration of the predictions from the neural network.
+
+        Arguments:
+            data: torch dataloader object providing the data for which to compute
+                the calibration.
+
+        Returns:
+            (intervals, frequencies): Tuple containing the confidence intervals and
+                corresponding observed frequencies.
+        """
+
+        if gpu and torch.cuda.is_available():
+            dev = torch.device("cuda")
+        else:
+            dev = torch.device("cpu")
+        self.model.to(dev)
+
+        n_intervals = self.quantiles.size // 2
+        qs = self.quantiles
+        intervals = np.array([q_r - q_l for (q_l, q_r) in zip(qs, reversed(qs))])[:n_intervals]
+        counts = np.zeros(n_intervals)
+
+        total = 0.0
+
+        iterator = tqdm(data)
+        for x, y in iterator:
+            x = x.to(dev)
+            y = y.to(dev)
+            shape = x.size()
+            y = y.reshape((shape[0], 1, shape[2], shape[3]))
+
+            y_pred = self.predict(x)
+            y_pred = y_pred.cpu()
+            y = y.cpu()
+
+            for i in range(n_intervals):
+                l = y_pred[:, i]
+                r = y_pred[:, -(i + 1)]
+                counts[i] += np.logical_and(y.cpu() >= l, y.cpu() < r).sum()
+
+            total += np.prod(y.size())
+
+        return intervals[::-1], (counts / total)[::-1]
+
+
+    def save(self, path):
+        """
+        Save QRNN to file.
+
+        Arguments:
+            The path in which to store the QRNN.
+        """
+        torch.save({"input_dimension" : self.input_dimension,
+                    "quantiles" : self.quantiles,
+                    "width" : self.width,
+                    "depth" : self.depth,
+                    "activation" : self.activation,
+                    "network_state" : self.model.state_dict(),
+                    "optimizer_state" : self.optimizer.state_dict()},
+                    path)
+
+    @staticmethod
+    def load(self, path):
+        """
+        Load QRNN from file.
+
+        Arguments:
+            path: Path of the file where the QRNN was stored.
+        """
+        state = torch.load(path)
+        keys = ["input_dimension", "quantiles", "depth", "width", "activation"]
+        qrnn = QRNN(*[state[k] for k in keys])
+        qrnn.model.load_state_dict["network_state"]
+        qrnn.optimizer.load_state_dict["optimizer_state"]
diff --git a/typhon/retrieval/qrnn/qrnn.py b/typhon/retrieval/qrnn/qrnn.py
index 956c375b..9b1c46d9 100644
--- a/typhon/retrieval/qrnn/qrnn.py
+++ b/typhon/retrieval/qrnn/qrnn.py
@@ -369,13 +369,10 @@ class QRNN:
             The ensemble of Keras neural networks used for the quantile regression
             neural network.
     """
-
     def __init__(self,
                  input_dim,
                  quantiles,
-                 depth=3,
-                 width=128,
-                 activation="relu",
+                 model=(3, 128, "relu"),
                  ensemble_size=1,
                  **kwargs):
         """
@@ -408,39 +405,31 @@ def __init__(self,
         """
         self.input_dim = input_dim
         self.quantiles = np.array(quantiles)
-        self.depth = depth
-        self.width = width
-        self.activation = activation
-
-        model = Sequential()
-        if depth == 0:
-            model.add(Dense(input_dim=input_dim,
-                            units=len(quantiles),
-                            activation=None))
-        else:
-            model.add(Dense(input_dim=input_dim,
-                            units=width,
-                            activation=activation))
-            for i in range(depth - 2):
-                model.add(Dense(units=width,
-                                activation=activation,
-                                **kwargs))
-            model.add(Dense(units=len(quantiles), activation=None))
-        self.models = [clone_model(model) for i in range(ensemble_size)]
-
-    def __fit_params__(self, kwargs):
-        at = kwargs.pop("adversarial_training", False)
-        dat = kwargs.pop("delta_at", 0.01)
-        batch_size = kwargs.pop("batch_size", 512)
-        convergence_epochs = kwargs.pop("convergence_epochs", 10)
-        initial_learning_rate = kwargs.pop('initial_learning_rate', 0.01)
-        learning_rate_decay = kwargs.pop('learning_rate_decay', 2.0)
-        learning_rate_minimum = kwargs.pop('learning_rate_minimum', 1e-6)
-        maximum_epochs = kwargs.pop("maximum_epochs", 200)
-        training_split = kwargs.pop("training_split", 0.9)
-        return at, dat, batch_size, convergence_epochs, initial_learning_rate, \
-            learning_rate_decay, learning_rate_minimum, maximum_epochs, \
-            training_split, kwargs
+
+        try:
+            from typhon.retrieval.qrnn.backends.keras import KerasQRNN
+            model = KerasQRNN(input_dim, quantiles, model, ensemble_size)
+        except:
+            from typhon.retrieval.qrnn.backends.pytorch import PytorchQRNN
+            model = PytorchQRNN(input_dim, quantiles, model, ensemble_size)
+
+
+
+        #try:
+        #    from typhon.retrieval.qrnn.backends.keras import KerasQRNN
+        #    model = KerasQRNN(input_dim, quantiles, model, ensemble_size)
+        #except:
+        #    try:
+        #        from typhon.retrieval.qrnn.backends.pytorch import QRNNPytorch
+        #        model = QRNNPytorch(input_dim, quantiles, model)
+        #    except:
+        #        model = None
+        #if model is None:
+        #    raise Exception("Could not create model backend. Please make sure "
+        #                    "that the provided architecture string is either a "
+        #                    "valid architecture tuple, a keras model order a "
+        #                    "pytorch module.")
+        self.model = model
 
     def cross_validation(self,
                         x_train,
@@ -539,13 +528,7 @@ def cross_validation(self,
         logger.info(results)
         return (np.mean(results, axis=0), np.std(results, axis=0))
 
-    def fit(self,
-            x_train,
-            y_train,
-            sigma_noise=None,
-            x_val=None,
-            y_val=None,
-            **kwargs):
+    def fit(self, *args, **kwargs):
         r"""
         Train the QRNN on given training data.
 
@@ -629,73 +612,11 @@ def fit(self,
                                    to 0.9.
 
         """
-        if not (x_train.shape[1] == self.input_dim):
-            raise Exception("Training input must have the same extent along"    /
-                            "dimension 1 as input_dim (" + str(self.input_dim)  /
-                            + ")")
+        self.model.fit(*args, **kwargs)
 
-        if not (y_train.shape[1] == 1):
-            raise Exception("Currently only scalar retrieval targets are"       /
-                            " supported.")
+    def train(self, *args, **kwargs):
+        self.model.train(*args, **kwargs)
 
-        x_mean = np.mean(x_train, axis=0, keepdims=True)
-        x_sigma = np.std(x_train, axis=0, keepdims=True)
-        self.x_mean = x_mean
-        self.x_sigma = x_sigma
-
-        # Handle parameters
-        # at:  adversarial training
-        # bs:  batch size
-        # ce:  convergence epochs
-        # ilr: initial learning rate
-        # lrd: learning rate decay
-        # lrm: learning rate minimum
-        # me:  maximum number of epochs
-        # ts:  split ratio of training set
-        at, dat, bs, ce, ilr, lrd, lrm, me, ts, kwargs = self.__fit_params__(
-            kwargs)
-
-        # Split training and validation set if x_val or y_val
-        # are not provided.
-        n = x_train.shape[0]
-        n_train = n
-        if x_val is None and y_val is None:
-            n_train = round(ts * n)
-            n_val = n - n_train
-            inds = np.random.permutation(n)
-            x_val = x_train[inds[n_train:], :]
-            y_val = y_train[inds[n_train:]]
-            x_train = x_train[inds[:n_train], :]
-            y_train = y_train[inds[:n_train]]
-        loss = QuantileLoss(self.quantiles)
-
-        self.custom_objects = {loss.__name__: loss}
-        for model in self.models:
-            optimizer = SGD(lr=ilr)
-            model.compile(loss=loss, optimizer=optimizer)
-
-            if at:
-                inputs = [model.input, model.targets[0],
-                          model.sample_weights[0]]
-                input_gradients = K.function(
-                    inputs, K.gradients(model.total_loss, model.input))
-                training_generator = AdversarialTrainingGenerator(x_train,
-                                                                  self.x_mean,
-                                                                  self.x_sigma,
-                                                                  y_train,
-                                                                  sigma_noise,
-                                                                  bs,
-                                                                  input_gradients,
-                                                                  dat)
-            else:
-                training_generator = TrainingGenerator(x_train, self.x_mean, self.x_sigma,
-                                                       y_train, sigma_noise, bs)
-            validation_generator = ValidationGenerator(x_val, self.x_mean, self.x_sigma,
-                                                       y_val, sigma_noise)
-            lr_callback = LRDecay(model, lrd, lrm, ce)
-            model.fit_generator(training_generator, steps_per_epoch=n_train // bs,
-                                epochs=me, validation_data=validation_generator,
-                                validation_steps=1, callbacks=[lr_callback])
 
     def predict(self, x):
         r"""
@@ -715,9 +636,7 @@ def predict(self, x):
              quantiles of the network.
 
         """
-        predictions = np.stack(
-            [m.predict((x - self.x_mean) / self.x_sigma) for m in self.models])
-        return np.mean(predictions, axis=0)
+        return self.model.predict(x)
 
     def cdf(self, x):
         r"""

From 4307c1d95c6cde6d772e90e2e100501a74e46af4 Mon Sep 17 00:00:00 2001
From: Simon Pfreundschuh <simon.pfreundschuh@chalmers.se>
Date: Tue, 17 Mar 2020 09:45:42 +0100
Subject: [PATCH 02/19] Wokring implementation of pytorch fully-connected
 model.

---
 setup.py                                  |   2 +-
 typhon/retrieval/__init__.py              |   2 +-
 typhon/retrieval/qrnn/__init__.py         |   2 +-
 typhon/retrieval/qrnn/backends/keras.py   |  43 +-
 typhon/retrieval/qrnn/backends/pytorch.py |  11 +-
 typhon/retrieval/qrnn/models/keras.py     | 108 ++++
 typhon/retrieval/qrnn/models/pytorch.py   | 360 +++++++++++++
 typhon/retrieval/qrnn/qrnn.py             | 608 +++++-----------------
 8 files changed, 618 insertions(+), 518 deletions(-)
 create mode 100644 typhon/retrieval/qrnn/models/keras.py
 create mode 100644 typhon/retrieval/qrnn/models/pytorch.py

diff --git a/setup.py b/setup.py
index ff29fdb2..525618ed 100644
--- a/setup.py
+++ b/setup.py
@@ -86,7 +86,7 @@
         "numexpr",
         "numpy>=1.13",
         "pandas",
-        "scikit-image",
+        "scikit-image>=0.15",
         "scikit-learn",
         "scipy>=0.15.1",
         "setuptools>=0.7.2",
diff --git a/typhon/retrieval/__init__.py b/typhon/retrieval/__init__.py
index ff43f188..09fdccb5 100644
--- a/typhon/retrieval/__init__.py
+++ b/typhon/retrieval/__init__.py
@@ -5,4 +5,4 @@
 
 from .common import *  # noqa
 from .oem import *  # noqa
-#from .spareice import *  # noqa
+from .spareice import *  # noqa
diff --git a/typhon/retrieval/qrnn/__init__.py b/typhon/retrieval/qrnn/__init__.py
index bb15d24e..ae8c7421 100644
--- a/typhon/retrieval/qrnn/__init__.py
+++ b/typhon/retrieval/qrnn/__init__.py
@@ -4,4 +4,4 @@
 The implementation has been developed specifically for remote sensing
 applications and provides a high level interface allowing for simple
 training and evaluation of the QRNNs."""
-from typhon.retrieval.qrnn.qrnn import QRNN
+from typhon.retrieval.qrnn.qrnn import QRNN, set_backend
diff --git a/typhon/retrieval/qrnn/backends/keras.py b/typhon/retrieval/qrnn/backends/keras.py
index df5fec2f..4a7830fc 100644
--- a/typhon/retrieval/qrnn/backends/keras.py
+++ b/typhon/retrieval/qrnn/backends/keras.py
@@ -15,7 +15,7 @@
 # Keras Imports
 try:
     import keras
-    from keras.models import Sequential, clone_model
+    from keras.models import Sequential, clone_model, Model
     from keras.layers import Dense, Activation, Dropout
     from keras.optimizers import SGD
 except ImportError:
@@ -425,16 +425,14 @@ def __init__(self,
                                     activation=activation,
                                     **kwargs))
                     model.add(Dense(units=len(quantiles), activation=None))
-        elif type(model) is Model:
-            pass
+        elif isinstance(model, Model):
+            self.models = [clone_model(model) for i in range(ensemble_size)]
+        elif isinstance(model, list) and isinstance(model[0], Model):
+            self.models = model
         else:
             raise Exception("The provided model is neither a suitable "
                             "architecture tuple nor a keras model.")
 
-        self.models = [clone_model(model) for i in range(ensemble_size)]
-
-
-
     def __fit_params__(self, kwargs):
         at = kwargs.pop("adversarial_training", False)
         dat = kwargs.pop("delta_at", 0.01)
@@ -776,20 +774,23 @@ def load(path):
         filename = os.path.basename(path)
         dirname = os.path.dirname(path)
 
-        f = open(path, "rb")
-        qrnn = pickle.load(f)
-        qrnn.models = []
-        for mf in qrnn.model_files:
-            mf = os.path.basename(mf)
-            try:
-                mp = os.path.join(dirname, os.path.basename(mf))
-                qrnn.models += [keras.models.load_model(mp, qrnn.custom_objects)]
-            except:
-                raise Exception("Error loading the neural network models. " \
-                                "Please make sure all files created during the"\
-                                " saving are in this folder.")
-        f.close()
-        return qrnn
+        with open(path, "rb") as f:
+
+            qrnn = pickle.load(f)
+            qrnn.models = []
+            for mf in qrnn.model_files:
+                mf = os.path.basename(mf)
+                try:
+                    mp = os.path.join(dirname, os.path.basename(mf))
+                    qrnn.models += [keras.models.load_model(mp, qrnn.custom_objects)]
+                except:
+                    raise Exception("Error loading the neural network models. " \
+                                    "Please make sure all files created during the"\
+                                    " saving are in this folder.")
+            keras_qrnn = KerasQRNN(qrnn.input_dim, qrnn.quantiles, qrnn.models)
+            keras_qrnn.x_mean = qrnn.x_mean
+            keras_qrnn.x_sigma = qrnn.x_sigma
+        return keras_qrnn
 
     def __getstate__(self):
         dct = copy.copy(self.__dict__)
diff --git a/typhon/retrieval/qrnn/backends/pytorch.py b/typhon/retrieval/qrnn/backends/pytorch.py
index 4fe5ce58..da1d6846 100644
--- a/typhon/retrieval/qrnn/backends/pytorch.py
+++ b/typhon/retrieval/qrnn/backends/pytorch.py
@@ -14,11 +14,12 @@
 activations = {"elu" : nn.ELU,
                "hardshrink" : nn.Hardshrink,
                "hardtanh" : nn.Hardtanh,
-               "PReLU" : nn.PReLU,
-               "SELU" : nn.SELU,
-               "CELU" : nn.CELU,
-               "Sigmoid" : nn.Sigmoid,
-               "Softmin" : nn.Softmin}
+               "prelu" : nn.PReLU,
+               "selu" : nn.SELU,
+               "celu" : nn.CELU,
+               "sigmoid" : nn.Sigmoid,
+               "softplus" : nn.Softplus
+               "softmin" : nn.Softmin}
 
 class QuantileLoss:
     r"""
diff --git a/typhon/retrieval/qrnn/models/keras.py b/typhon/retrieval/qrnn/models/keras.py
new file mode 100644
index 00000000..58e511e8
--- /dev/null
+++ b/typhon/retrieval/qrnn/models/keras.py
@@ -0,0 +1,108 @@
+import numpy as np
+from keras.models import Sequential
+from keras.layers import Dense, Activation
+import keras.backend as K
+import logging
+
+################################################################################
+# Quantile loss
+################################################################################
+
+logger = logging.getLogger(__name__)
+
+def skewed_absolute_error(y_true, y_pred, tau):
+    """
+    The quantile loss function for a given quantile tau:
+
+    L(y_true, y_pred) = (tau - I(y_pred < y_true)) * (y_pred - y_true)
+
+    Where I is the indicator function.
+    """
+    dy = y_pred - y_true
+    return K.mean((1.0 - tau) * K.relu(dy) + tau * K.relu(-dy), axis=-1)
+
+
+def quantile_loss(y_true, y_pred, taus):
+    """
+    The quantiles loss for a list of quantiles. Sums up the error contribution
+    from the each of the quantile loss functions.
+    """
+    e = skewed_absolute_error(
+        K.flatten(y_true), K.flatten(y_pred[:, 0]), taus[0])
+    for i, tau in enumerate(taus[1:]):
+        e += skewed_absolute_error(K.flatten(y_true),
+                                   K.flatten(y_pred[:, i + 1]),
+                                   tau)
+    return e
+
+
+class QuantileLoss:
+    """
+    Wrapper class for the quantile error loss function. A class is used here
+    to allow the implementation of a custom `__repr` function, so that the
+    loss function object can be easily loaded using `keras.model.load`.
+
+    Attributes:
+
+        quantiles: List of quantiles that should be estimated with
+                   this loss function.
+
+    """
+
+    def __init__(self, quantiles):
+        self.__name__ = "QuantileLoss"
+        self.quantiles = quantiles
+
+    def __call__(self, y_true, y_pred):
+        return quantile_loss(y_true, y_pred, self.quantiles)
+
+    def __repr__(self):
+        return "QuantileLoss(" + repr(self.quantiles) + ")"
+
+
+class KerasModel:
+    def __init__(self):
+        pass
+
+    def train(self):
+        pass
+
+
+class FullyConnected(Sequential):
+    """
+    Convenience class to represent fully connected models with
+    given number of input and output features and depth and
+    width of hidden layers.
+    """
+    def __init__(self,
+                 input_dimension,
+                 quantiles,
+                 arch,
+                 activation = "relu"):
+        """
+        Create a fully-connected neural network.
+
+        Args:
+            input_dimension(int): Number of input features
+            output_dimension(int): Number of output features
+            arch(tuple): Tuple (d, w) of d, the number of hidden
+                layers in the network, and w, the width of the net-
+                work.
+            activation(str or None): Activation function to insert between
+                hidden layers. The given string is passed to the
+                keras.layers.Activation class. If None no activation function
+                is used.
+        """
+        quantiles = np.array(quantiles)
+        output_dimension = quantiles.size
+        if len(arch) == 0:
+            layers = [Dense(output_dimension, input_shape=(input_dimension))]
+        else:
+            d, w = arch
+            layers = [Dense(input_dimension, input_shape=(w,))]
+            for i in range(d - 1):
+                layers.append(Dense(w, input_shape=(w,)))
+                if not activation is None:
+                    layers.append(Activation(activation))
+            layers.append(Dense(output_dimension, input_shape=(w,)))
+        super().__init__(layers)
diff --git a/typhon/retrieval/qrnn/models/pytorch.py b/typhon/retrieval/qrnn/models/pytorch.py
new file mode 100644
index 00000000..f77835f8
--- /dev/null
+++ b/typhon/retrieval/qrnn/models/pytorch.py
@@ -0,0 +1,360 @@
+"""
+models
+======
+
+This module provides an implementation of quantile regression neural networks (QRNNs)
+using the pytorch deep learning framework.
+"""
+import torch
+import numpy as np
+from torch import nn
+from torch import optim
+from tqdm.auto import tqdm
+
+activations = {"elu" : nn.ELU,
+               "hardshrink" : nn.Hardshrink,
+               "hardtanh" : nn.Hardtanh,
+               "prelu" : nn.PReLU,
+               "relu" : nn.ReLU,
+               "selu" : nn.SELU,
+               "celu" : nn.CELU,
+               "sigmoid" : nn.Sigmoid,
+               "softplus" : nn.Softplus,
+               "softmin" : nn.Softmin}
+
+
+def load(f):
+    model = torch.load(f)
+    return model
+
+################################################################################
+# Quantile loss
+################################################################################
+
+class QuantileLoss:
+    r"""
+    The quantile loss function
+
+    This function object implements the quantile loss defined as
+
+
+    .. math::
+
+        \mathcal{L}(y_\text{pred}, y_\text{true}) =
+        \begin{cases}
+        \tau \cdot |y_\text{pred} - y_\text{true}| & , y_\text{pred} < y_\text{true} \\
+        (1 - \tau) \cdot |y_\text{pred} - y_\text{true}| & , \text{otherwise}
+        \end{cases}
+
+
+    as a training criterion for the training of neural networks. The loss criterion
+    expects a vector :math:`\mathbf{y}_\tau` of predicted quantiles and the observed
+    value :math:`y`. The loss for a single training sample is computed by summing the losses
+    corresponding to each quantiles. The loss for a batch of training samples is
+    computed by taking the mean over all samples in the batch.
+    """
+    def __init__(self,
+                 quantiles,
+                 mask = -1.0):
+        """
+        Create an instance of the quantile loss function with the given quantiles.
+
+        Arguments:
+            quantiles: Array or iterable containing the quantiles to be estimated.
+        """
+        self.quantiles = torch.tensor(quantiles).float()
+        self.n_quantiles = len(quantiles)
+        self.mask = np.float32(mask)
+
+    def to(self, device):
+        self.quantiles = self.quantiles.to(device)
+
+    def __call__(self, y_pred, y_true):
+        """
+        Compute the mean quantile loss for given inputs.
+
+        Arguments:
+            y_pred: N-tensor containing the predicted quantiles along the last
+                dimension
+            y_true: (N-1)-tensor containing the true y values corresponding to
+                the predictions in y_pred
+
+        Returns:
+            The mean quantile loss.
+        """
+        dy = y_pred - y_true
+        n = self.quantiles.size()[0]
+        qs = self.quantiles.reshape((n,) + (1, ) * max(len(dy.size()) - 2, 0))
+        l = torch.where(dy >= 0.0,
+                        (1.0 - qs) * dy,
+                        (-qs) * dy)
+        if not self.mask is None:
+            l = torch.where(y_true == self.mask, torch.zeros_like(l), l)
+        return l.mean()
+
+################################################################################
+# QRNN
+################################################################################
+
+class PytorchModel:
+    """
+    Quantile regression neural network (QRNN)
+
+    This class implements QRNNs as a fully-connected network with
+    a given number of layers.
+    """
+    def __init__(self,
+                 input_dimension,
+                 quantiles):
+        """
+        Arguments:
+            input_dimension(int): The number of input features.
+            quantiles(array): Array of the quantiles to predict.
+        """
+        self.input_dimension = input_dimension
+        self.quantiles = np.array(quantiles)
+        self.criterion = QuantileLoss(self.quantiles)
+        self.training_errors = []
+        self.validation_errors = []
+
+    def _make_adversarial_samples(self, x, y, eps):
+        self.model.zero_grad()
+        x.requires_grad = True
+        y_pred = self.model(x)
+        c = self.criterion(y_pred, y)
+        c.backward()
+        x_adv = x.detach() + eps * torch.sign(x.grad.detach())
+        return x_adv
+
+    def train(self, *args, **kwargs):
+        """
+        Train the network.
+
+        This trains the network for the given number of epochs using the provided
+        training and validation data.
+
+        If desired, the training can be augmented using adversarial training. In this
+        case the network is additionally trained with an adversarial batch of examples
+        in each step of the training.
+
+        Arguments:
+            training_data: pytorch dataloader providing the training data
+            validation_data: pytorch dataloader providing the validation data
+            n_epochs: the number of epochs to train the network for
+            adversarial_training: whether or not to use adversarial training
+            eps_adv: The scaling factor to use for adversarial training.
+        """
+        if len(args) < 2:
+            return nn.Sequential.train(self, *args)
+
+        training_data, validation_data = args
+        n_epochs = kwargs.get("n_epochs", 1)
+        adversarial_training = kwargs.get("adversarial_training", False)
+        eps_adv = kwargs.get("eps_adv", 1e-6)
+        lr = kwargs.get("lr", 1e-2)
+        momentum = kwargs.get("momentum", 0.0)
+        gpu = kwargs.get("gpu", False)
+
+        self.optimizer = optim.SGD(self.parameters(),
+                                   lr = lr,
+                                   momentum = momentum)
+        self.train()
+
+        if torch.cuda.is_available() and gpu:
+            dev = torch.device("cuda")
+        else:
+            dev = torch.device("cpu")
+        self.to(dev)
+
+        self.criterion.to(dev)
+
+        for i in range(n_epochs):
+
+            err = 0.0
+            n = 0
+            iterator = tqdm(enumerate(training_data), total = len(training_data))
+            for j, (x, y) in iterator:
+                x = x.to(dev)
+                y = y.to(dev)
+
+                shape = x.size()
+                shape = (shape[0], 1) + shape[2:]
+                y = y.reshape(shape)
+
+                self.optimizer.zero_grad()
+                y_pred = self(x)
+                c = self.criterion(y_pred, y)
+                c.backward()
+                self.optimizer.step()
+
+                err += c.item() * x.size()[0]
+                n += x.size()[0]
+
+                if adversarial_training:
+                    self.optimizer.zero_grad()
+                    x_adv = self._make_adversarial_samples(x, y, eps_adv)
+                    y_pred = self(x)
+                    c = self.criterion(y_pred, y)
+                    c.backward()
+                    self.optimizer.step()
+
+                if j % 100:
+                    iterator.set_postfix({"Training errror" : err / n})
+
+            # Save training error
+            self.training_errors.append(err / n)
+
+            val_err = 0.0
+            n = 0
+
+            for x, y in validation_data:
+                x = x.to(dev)
+                y = y.to(dev)
+
+                shape = x.size()
+                shape = (shape[0], 1) + shape[2:]
+                y = y.reshape(shape)
+
+                y_pred = self(x)
+                c = self.criterion(y_pred, y)
+
+                val_err += c.item() * x.size()[0]
+                n += x.size()[0]
+
+            self.validation_errors.append(val_err / n)
+        self.eval()
+
+    def predict(self, x):
+        """
+        Predict quantiles for given input.
+
+        Args:
+            x: 2D tensor containing the inputs for which for which to
+                predict the quantiles.
+
+        Returns:
+            tensor: 2D tensor containing the predicted quantiles along
+                the last dimension.
+        """
+        return self(x)
+
+    def calibration(self,
+                    data,
+                    gpu=False):
+        """
+        Computes the calibration of the predictions from the neural network.
+
+        Arguments:
+            data: torch dataloader object providing the data for which to compute
+                the calibration.
+
+        Returns:
+            (intervals, frequencies): Tuple containing the confidence intervals and
+                corresponding observed frequencies.
+        """
+
+        if gpu and torch.cuda.is_available():
+            dev = torch.device("cuda")
+        else:
+            dev = torch.device("cpu")
+        self.to(dev)
+
+        n_intervals = self.quantiles.size // 2
+        qs = self.quantiles
+        intervals = np.array([q_r - q_l for (q_l, q_r) in zip(qs, reversed(qs))])[:n_intervals]
+        counts = np.zeros(n_intervals)
+
+        total = 0.0
+
+        iterator = tqdm(data)
+        for x, y in iterator:
+            x = x.to(dev)
+            y = y.to(dev)
+            shape = x.size()
+            shape = (shape[0], 1) + shape[2:]
+            y = y.reshape(shape)
+
+            y_pred = self.predict(x)
+            y_pred = y_pred.cpu()
+            y = y.cpu()
+
+            for i in range(n_intervals):
+                l = y_pred[:, [i]]
+                r = y_pred[:, [-(i + 1)]]
+                counts[i] += np.logical_and(y >= l, y < r).sum()
+
+            total += np.prod(y.size())
+        return intervals[::-1], (counts / total)[::-1]
+
+    def save(self, path):
+        """
+        Save QRNN to file.
+
+        Arguments:
+            The path in which to store the QRNN.
+        """
+        torch.save({"input_dimension" : self.input_dimension,
+                    "quantiles" : self.quantiles,
+                    "width" : self.width,
+                    "depth" : self.depth,
+                    "activation" : self.activation,
+                    "network_state" : self.state_dict(),
+                    "optimizer_state" : self.optimizer.state_dict()},
+                    path)
+
+    @staticmethod
+    def load(self, path):
+        """
+        Load QRNN from file.
+
+        Arguments:
+            path: Path of the file where the QRNN was stored.
+        """
+        state = torch.load(path)
+        keys = ["input_dimension", "quantiles", "depth", "width", "activation"]
+        qrnn = QRNN(*[state[k] for k in keys])
+        qrnn.load_state_dict["network_state"]
+        qrnn.optimizer.load_state_dict["optimizer_state"]
+
+class FullyConnected(PytorchModel, nn.Sequential):
+    """
+    Convenience class to represent fully connected models with
+    given number of input and output features and depth and
+    width of hidden layers.
+    """
+    def __init__(self,
+                 input_dimension,
+                 quantiles,
+                 arch):
+
+        """
+        Create a fully-connected neural network.
+
+        Args:
+            input_dimension(int): Number of input features
+            output_dimension(int): Number of output features
+            arch(tuple): Tuple (d, w) of d, the number of hidden
+                layers in the network, and w, the width of the net-
+                work.
+        """
+        PytorchModel.__init__(self, input_dimension, quantiles)
+        output_dimension = quantiles.size
+        self.arch = arch
+
+        if len(arch) == 0:
+            layers = [nn.Linear(input_dimension, output_dimension)]
+        else:
+            d, w, act = arch
+            if type(act) == str:
+                act = activations[act]
+            layers = [nn.Linear(input_dimension, w)]
+            for i in range(d - 1):
+                layers.append(nn.Linear(w, w))
+                if not act is None:
+                    layers.append(act())
+            layers.append(nn.Linear(w, output_dimension))
+        nn.Sequential.__init__(self, *layers)
+
+    def save(self, f):
+        torch.save(self, f)
+
diff --git a/typhon/retrieval/qrnn/qrnn.py b/typhon/retrieval/qrnn/qrnn.py
index 9b1c46d9..cf45647b 100644
--- a/typhon/retrieval/qrnn/qrnn.py
+++ b/typhon/retrieval/qrnn/qrnn.py
@@ -6,371 +6,115 @@
 import numpy as np
 from scipy.interpolate import CubicSpline
 
-# Keras Imports
-try:
-    import keras
-    from keras.models import Sequential, clone_model
-    from keras.layers import Dense, Activation, Dropout
-    from keras.optimizers import SGD
-except ImportError:
-    raise ImportError(
-        "Could not import the required Keras modules. The QRNN "
-        "implementation was developed for use with Keras version 2.0.9.")
-
-################################################################################
-# Loss Functions
-################################################################################
-
-import keras.backend as K
-
-
-logger = logging.getLogger(__name__)
-
-
-def skewed_absolute_error(y_true, y_pred, tau):
-    """
-    The quantile loss function for a given quantile tau:
-
-    L(y_true, y_pred) = (tau - I(y_pred < y_true)) * (y_pred - y_true)
-
-    Where I is the indicator function.
-    """
-    dy = y_pred - y_true
-    return K.mean((1.0 - tau) * K.relu(dy) + tau * K.relu(-dy), axis=-1)
-
-
-def quantile_loss(y_true, y_pred, taus):
-    """
-    The quantiles loss for a list of quantiles. Sums up the error contribution
-    from the each of the quantile loss functions.
-    """
-    e = skewed_absolute_error(
-        K.flatten(y_true), K.flatten(y_pred[:, 0]), taus[0])
-    for i, tau in enumerate(taus[1:]):
-        e += skewed_absolute_error(K.flatten(y_true),
-                                   K.flatten(y_pred[:, i + 1]),
-                                   tau)
-    return e
-
-
-class QuantileLoss:
-    """
-    Wrapper class for the quantile error loss function. A class is used here
-    to allow the implementation of a custom `__repr` function, so that the
-    loss function object can be easily loaded using `keras.model.load`.
-
-    Attributes:
-
-        quantiles: List of quantiles that should be estimated with
-                   this loss function.
-
-    """
-
-    def __init__(self, quantiles):
-        self.__name__ = "QuantileLoss"
-        self.quantiles = quantiles
-
-    def __call__(self, y_true, y_pred):
-        return quantile_loss(y_true, y_pred, self.quantiles)
-
-    def __repr__(self):
-        return "QuantileLoss(" + repr(self.quantiles) + ")"
-
 ################################################################################
-# Keras Interface Classes
+# Set the backend
 ################################################################################
 
-
-class TrainingGenerator:
-    """
-    This Keras sample generator takes the noise-free training data
-    and adds independent Gaussian noise to each of the components
-    of the input.
-
-    Attributes:
-
-        x_train: The training input, i.e. the brightness temperatures
-                 measured by the satellite.
-        y_train: The training output, i.e. the value of the retrieval
-                 quantity.
-        x_mean: A vector containing the mean of each input component.
-        x_sigma: A vector containing the standard deviation of each
-                 component.
-        batch_size: The size of a training batch.
-    """
-
-    def __init__(self, x_train, x_mean, x_sigma, y_train, sigma_noise, batch_size):
-        self.bs = batch_size
-
-        self.x_train = x_train
-        self.x_mean = x_mean
-        self.x_sigma = x_sigma
-        self.y_train = y_train
-        self.sigma_noise = sigma_noise
-
-        self.indices = np.random.permutation(x_train.shape[0])
-        self.i = 0
-
-    def __iter__(self):
-        logger.info("iter...")
-        return self
-
-    def __next__(self):
-        inds = self.indices[np.arange(self.i * self.bs,
-                                      (self.i + 1) * self.bs)
-                            % self.indices.size]
-        x_batch = np.copy(self.x_train[inds, :])
-        if not self.sigma_noise is None:
-            x_batch += np.random.randn(*x_batch.shape) * self.sigma_noise
-        x_batch = (x_batch - self.x_mean) / self.x_sigma
-        y_batch = self.y_train[inds]
-
-        self.i = self.i + 1
-
-        # Shuffle training set after each epoch.
-        if self.i % (self.x_train.shape[0] // self.bs) == 0:
-            self.indices = np.random.permutation(self.x_train.shape[0])
-
-        return (x_batch, y_batch)
-
-# TODO: Make y-noise argument optional
-
-
-class AdversarialTrainingGenerator:
-    """
-    This Keras sample generator takes the noise-free training data
-    and adds independent Gaussian noise to each of the components
-    of the input.
-
-    Attributes:
-
-        x_train: The training input, i.e. the brightness temperatures
-                 measured by the satellite.
-        y_train: The training output, i.e. the value of the retrieval
-                 quantity.
-        x_mean: A vector containing the mean of each input component.
-        x_sigma: A vector containing the standard deviation of each
-                 component.
-        batch_size: The size of a training batch.
-    """
-
-    def __init__(self,
-                 x_train,
-                 x_mean,
-                 x_sigma,
-                 y_train,
-                 sigma_noise,
-                 batch_size,
-                 input_gradients,
-                 eps):
-        self.bs = batch_size
-
-        self.x_train = x_train
-        self.x_mean = x_mean
-        self.x_sigma = x_sigma
-        self.y_train = y_train
-        self.sigma_noise = sigma_noise
-
-        self.indices = np.random.permutation(x_train.shape[0])
-        self.i = 0
-
-        # compile gradient function
-        bs2 = self.bs // 2
-
-        self.input_gradients = input_gradients
-        self.eps = eps
-
-    def __iter__(self):
-        logger.info("iter...")
-        return self
-
-    def __next__(self):
-
-        if self.i == 0:
-            inds = np.random.randint(0, self.x_train.shape[0], self.bs)
-
-            x_batch = np.copy(self.x_train[inds, :])
-            if (self.sigma_noise):
-                x_batch += np.random.randn(*x_batch.shape) * self.sigma_noise
-
-            x_batch = (x_batch - self.x_mean) / self.x_sigma
-            y_batch = self.y_train[inds]
-
-        else:
-
-            bs2 = self.bs // 2
-            inds = np.random.randint(0, self.x_train.shape[0], bs2)
-
-            x_batch = np.zeros((self.bs, self.x_train.shape[1]))
-            y_batch = np.zeros((self.bs, 1))
-
-            x_batch[:bs2, :] = np.copy(self.x_train[inds, :])
-            if (self.sigma_noise):
-                x_batch[:bs2, :] += np.random.randn(bs2, self.x_train.shape[1]) \
-                    * self.sigma_noise
-            x_batch[:bs2, :] = (x_batch[:bs2, :] - self.x_mean) / self.x_sigma
-            y_batch[:bs2, :] = self.y_train[inds].reshape(-1, 1)
-            x_batch[bs2:, :] = x_batch[:bs2, :]
-            y_batch[bs2:, :] = y_batch[:bs2, :]
-
-            if (self.i > 10):
-                grads = self.input_gradients(
-                    [x_batch[:bs2, :], y_batch[:bs2, :], [1.0]])[0]
-                x_batch[bs2:, :] += self.eps * np.sign(grads)
-
-        self.i = self.i + 1
-        return (x_batch, y_batch)
-
-
-# TODO: Make y-noise argument optional
-class ValidationGenerator:
-    """
-    This Keras sample generator is similar to the training generator
-    only that it returns the whole validation set and doesn't perform
-    any randomization.
-
-    Attributes:
-
-        x_val: The validation input, i.e. the brightness temperatures
-                 measured by the satellite.
-        y_val: The validation output, i.e. the value of the retrieval
-                 quantity.
-        x_mean: A vector containing the mean of each input component.
-        x_sigma: A vector containing the standard deviation of each
-                 component.
-    """
-
-    def __init__(self, x_val, x_mean, x_sigma, y_val, sigma_noise):
-        self.x_val = x_val
-        self.x_mean = x_mean
-        self.x_sigma = x_sigma
-
-        self.y_val = y_val
-
-        self.sigma_noise = sigma_noise
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        x_val = np.copy(self.x_val)
-        if not self.sigma_noise is None:
-            x_val += np.random.randn(*self.x_val.shape) * self.sigma_noise
-        x_val = (x_val - self.x_mean) / self.x_sigma
-        return (x_val, self.y_val)
-
-
-class LRDecay(keras.callbacks.Callback):
+try:
+    import typhon.retrieval.qrnn.models.keras as keras
+    backend = keras
+except Exception as e:
+    raise e
+    try:
+        import typhon.retrieval.qrnn.models.pytorch as pytorch
+        backend = pytorch
+    except:
+        raise Exception("Couldn't import neither Keras nor Pytorch "
+                        "one of them must be available to use the QRNN"
+                        " module.")
+
+def set_backend(name):
+    global backend
+    if name == "keras":
+        try:
+            import typhon.retrieval.qrnn.models.keras as keras
+            backend = keras
+        except Exception as e:
+            raise Exception("The following error occurred while trying "
+                            " to import keras: ", e)
+    elif name == "pytorch":
+        try:
+            import typhon.retrieval.qrnn.models.pytorch as pytorch
+            backend = pytorch
+        except Exception as e:
+            raise Exception("The following error occurred while trying "
+                            " to import pytorch: ", e)
+    else:
+        raise Exception("\"{}\" is not a supported backend.".format(name))
+
+def create_model(input_dim,
+                 output_dim,
+                 arch):
     """
-    The LRDecay class implements the Keras callback interface and reduces
-    the learning rate according to validation loss reduction.
-
-    Attributes:
-
-        lr_decay: The factor c > 1.0 by which the learning rate is
-                  reduced.
-        lr_minimum: The training is stopped when this learning rate
-                    is reached.
-        convergence_steps: The number of epochs without validation loss
-                           reduction required to reduce the learning rate.
-
+    Creates a fully-connected neural network from a tuple
+    describing its architecture.
+
+    Args:
+        input_dim(int): Number of input features.
+        output_dim(int): Number of output features.
+        arch: Tuple (d, w, a) containing the depth, i.e. number of
+            hidden layers, and width of the hidden layers, i. e.
+            the number of neurons in them, and the name of the
+            activation function as string.
+    Return:
+        Depending on the available backends, a fully-connected
+        keras or pytorch model, with the requested number of hidden
+        layers and neurons in them.
     """
-
-    def __init__(self, model, lr_decay, lr_minimum, convergence_steps):
-        self.model = model
-        self.lr_decay = lr_decay
-        self.lr_minimum = lr_minimum
-        self.convergence_steps = convergence_steps
-        self.steps = 0
-
-    def on_train_begin(self, logs={}):
-        self.losses = []
-        self.steps = 0
-        self.min_loss = 1e30
-
-    def on_epoch_end(self, epoch, logs={}):
-        self.losses += [logs.get('val_loss')]
-        if not self.losses[-1] < self.min_loss:
-            self.steps = self.steps + 1
-        else:
-            self.steps = 0
-        if self.steps > self.convergence_steps:
-            lr = keras.backend.get_value(self.model.optimizer.lr)
-            keras.backend.set_value(
-                self.model.optimizer.lr, lr / self.lr_decay)
-            self.steps = 0
-            logger.info("\n Reduced learning rate to " + str(lr))
-
-            if lr < self.lr_minimum:
-                self.model.stop_training = True
-
-        self.min_loss = min(self.min_loss, self.losses[-1])
+    return backend.FullyConnected(input_dim, output_dim, arch)
 
 ################################################################################
-# QRNN
+# QRNN class
 ################################################################################
 
-
 class QRNN:
     r"""
     Quantile Regression Neural Network (QRNN)
 
-    This class implements quantile regression neural networks and can be used
-    to estimate quantiles of the posterior distribution of remote sensing
-    retrievals.
+    This class provides a high-level implementation of  quantile regression
+    neural networks. They can be used to estimate quantiles of the posterior
+    distribution of remote sensing retrievals.
 
-    Internally, the QRNN uses a feed-forward neural network that is trained
-    to minimize the quantile loss function
+    The QRNN uses a generic neural network model, that is trained to minimize
+    the quantile loss function
 
     .. math::
             \mathcal{L}_\tau(y_\tau, y_{true}) =
             \begin{cases} (1 - \tau)|y_\tau - y_{true}| & \text{ if } y_\tau < y_\text{true} \\
             \tau |y_\tau - y_\text{true}| & \text{ otherwise, }\end{cases}
 
-    where :math:`x_\text{true}` is the expected value of the retrieval quantity
+    where :math:`x_\text{true}` is the true value of the retrieval quantity
     and and :math:`x_\tau` is the predicted quantile. The neural network
     has one output neuron for each quantile to estimate.
 
-    For the training, this implementation provides custom data generators that
-    can be used to add Gaussian noise to the training data as well as adversarial
-    training using the fast gradient sign method.
-
-    This implementation also provides functionality to use an ensemble of networks
-    instead of just a single network to predict the quantiles.
-
-    .. note:: For the QRNN I am using :math:`x` to denote the input vector and
-              :math:`y` to denote the output. While this is opposed to typical
-              inverse problem notation, it is inline with machine learning
-              notation and felt more natural for the implementation. If this
-              annoys you, I am sorry. But the other way round it would have
-              annoyed other people and in particular me.
+    The QRNN class provides a generic QRNN implementation in the sense that it
+    does not assumed a fixed neural network infrastructure. Instead, this
+    functionality is off-loaded to a model object, which can be an arbitrary
+    regression model such as a fully-connected or a convolutional network.
+    A range of different models are provided in the typhon.retrieval.qrnn.models
+    module. This QRNN class just implements high-level operation on the QRNN
+    output while training and prediction are delegated to the model object. For
+    details on the respective implementation refer to the documentation of the
+    corresponding model class.
+
+    .. note:: For the QRNN implementation :math:`x` is used to denote the input
+              vector and :math:`y` to denote the output. While this is opposed
+              to typical inverse problem notation, it is in line with machine
+              learning notation and felt more natural for the implementation.
+              If this annoys you, I am sorry. But the other way round it would
+              have annoyed other people and in particular me.
 
     Attributes:
 
-        input_dim (int):
-            The input dimension of the neural network, i.e. the dimension of the
-            measurement vector.
-
         quantiles (numpy.array):
             The 1D-array containing the quantiles :math:`\tau \in [0, 1]` that the
             network learns to predict.
 
-        depth (int):
-            The number layers in the network excluding the input layer.
-
-        width (int):
-            The width of the hidden layers in the network.
-
-        activation (str):
-            The name of the activation functions to use in the hidden layers
-            of the network.
-
-        models (list of keras.models.Sequential):
-            The ensemble of Keras neural networks used for the quantile regression
-            neural network.
+        model: The model object that implements the actual neural network
+            regressor.
     """
     def __init__(self,
-                 input_dim,
+                 input_dimension,
                  quantiles,
                  model=(3, 128, "relu"),
                  ensemble_size=1,
@@ -379,57 +123,26 @@ def __init__(self,
         Create a QRNN model.
 
         Arguments:
-
             input_dim(int): The dimension of the measurement space, i.e. the number
-                            of elements in a single measurement vector y
-
+                of elements in a single measurement vector y
             quantiles(np.array): 1D-array containing the quantiles  to estimate of
-                                 the posterior distribution. Given as fractions
-                                 within the range [0, 1].
-
-            depth(int): The number of hidden layers  in the neural network to
-                        use for the regression. Default is 3, i.e. three hidden
-                        plus input and output layer.
-
-            width(int): The number of neurons in each hidden layer.
-
-            activation(str): The name of the activation functions to use. Default
-                             is "relu", for rectified linear unit. See 
-                             `this <https://keras.io/activations>`_ link for
-                             available functions.
-
-            **kwargs: Additional keyword arguments are passed to the constructor
-                      call `keras.layers.Dense` of the hidden layers, which can
-                      for example be used to add regularization. For more info consult
-                      `Keras documentation. <https://keras.io/layers/core/#dense>`_
+                the posterior distribution. Given as fractions
+                within the range [0, 1].
+            model: A (possible trained) model instance or a tuple
+                :code:`(d, w, act)` describing the architecture of a
+                fully-connected neural network with d hidden layers
+                with w neurons and act activation functions.
+            ensemble_size: The size of the ensemble if an ensemble model
+                should be used.
         """
-        self.input_dim = input_dim
+        self.input_dimension = input_dimension
         self.quantiles = np.array(quantiles)
+        self.backend = backend.__name__
 
-        try:
-            from typhon.retrieval.qrnn.backends.keras import KerasQRNN
-            model = KerasQRNN(input_dim, quantiles, model, ensemble_size)
-        except:
-            from typhon.retrieval.qrnn.backends.pytorch import PytorchQRNN
-            model = PytorchQRNN(input_dim, quantiles, model, ensemble_size)
-
-
-
-        #try:
-        #    from typhon.retrieval.qrnn.backends.keras import KerasQRNN
-        #    model = KerasQRNN(input_dim, quantiles, model, ensemble_size)
-        #except:
-        #    try:
-        #        from typhon.retrieval.qrnn.backends.pytorch import QRNNPytorch
-        #        model = QRNNPytorch(input_dim, quantiles, model)
-        #    except:
-        #        model = None
-        #if model is None:
-        #    raise Exception("Could not create model backend. Please make sure "
-        #                    "that the provided architecture string is either a "
-        #                    "valid architecture tuple, a keras model order a "
-        #                    "pytorch module.")
-        self.model = model
+        if type(model) == tuple:
+            self.model = backend.FullyConnected(self.input_dimension,
+                                                self.quantiles,
+                                                model)
 
     def cross_validation(self,
                         x_train,
@@ -529,95 +242,11 @@ def cross_validation(self,
         return (np.mean(results, axis=0), np.std(results, axis=0))
 
     def fit(self, *args, **kwargs):
-        r"""
-        Train the QRNN on given training data.
-
-        The training uses an internal validation set to monitor training
-        progress. This can be either split at random from the training
-        data (see `training_fraction` argument) or provided explicitly
-        using the `x_val` and `y_val` parameters
-
-        Training of the QRNN is performed using stochastic gradient descent
-        (SGD). The learning rate is adaptively reduced during training when
-        the loss on the internal validation set has not been reduced for a
-        certain number of epochs.
-
-        Two data augmentation techniques can be used to improve the
-        calibration of the QRNNs predictions. The first one adds Gaussian
-        noise to training batches before they are passed to the network.
-        The noise statistics are defined using the `sigma_noise` argument.
-        The second one is adversarial training. If adversarial training is
-        used, half of each training batch consists of adversarial samples
-        generated using the fast gradient sign method. The strength of the
-        perturbation is controlled using the `delta_at` parameter.
-
-        During one epoch, each training sample is passed once to the network.
-        Their order is randomzied between epochs.
-
-        Arguments:
-
-            x_train(np.array): Array of shape `(n, m)` containing n training
-                               samples of dimension m.
-
-            y_train(np.array): Array of shape `(n, )` containing the training
-                               output corresponding to the training data in
-                               `x_train`.
-
-            sigma_noise(None, float, np.array): If not `None` this value is used
-                                                to multiply the Gaussian noise
-                                                that is added to each training
-                                                batch. If None no noise is
-                                                added.
-            x_val(np.array): Array of shape :code:`(n', m)` containing n' validation
-                             inputs that will be used to monitor training loss. Must
-                             be provided in unison with :code:`y_val` or otherwise
-                             will be ignored.
-
-            y_val(np.array): Array of shape :code:`(n')` containing n'  validation
-                             outputs corresponding to the inputs in :code:`x_val`.
-                             Must be provided in unison with :code:`x_val` or
-                             otherwise will be ignored.
-
-            adversarial_training(Bool): Whether or not to use adversarial training.
-                                        `False` by default.
-
-            delta_at(flaot): Perturbation factor for the fast gradient sign method
-                             determining the strength of the adversarial training
-                             perturbation. `0.01` by default.
-
-            batch_size(float): The batch size to use during training. Defaults to `512`.
-
-            convergence_epochs(int): The number of epochs without decrease in
-                                     validation loss before the learning rate
-                                     is reduced. Defaults to `10`.
-
-            initial_learning_rate(float): The inital value for the learning
-                                          rate.
-
-            learning_rate_decay(float): The factor by which to reduce the
-                                        learning rate after no improvement
-                                        on the internal validation set was
-                                        observed for `convergence_epochs`
-                                        epochs. Defaults to `2.0`.
-
-            learning_rate_minimum(float): The minimum learning rate at which
-                                          the training is terminated. Defaults
-                                          to `1e-6`.
-
-            maximum_epochs(int): The maximum number of epochs to perform if
-                                 training does not terminate before.
-
-            training_split(float): The ratio `0 < ts < 1.0` of the samples in
-                                   to be used as internal validation set. Defaults
-                                   to 0.9.
-
-        """
         self.model.fit(*args, **kwargs)
 
     def train(self, *args, **kwargs):
         self.model.train(*args, **kwargs)
 
-
     def predict(self, x):
         r"""
         Predict quantiles of the conditional distribution P(y|x).
@@ -679,6 +308,12 @@ def cdf(self, x):
 
         return y_pred, qs
 
+    def calibration(self, *args, **kwargs):
+        """
+        Compute calibration curve for the given dataset.
+        """
+        return self.model.calibration(*args, *kwargs)
+
     def pdf(self, x, use_splines = False):
         r"""
         Approximate the posterior probability density function (PDF) for given
@@ -891,17 +526,9 @@ def save(self, path):
                        store the model.
 
         """
-
         f = open(path, "wb")
-        filename = os.path.basename(path)
-        name = os.path.splitext(filename)[0]
-        dirname = os.path.dirname(path)
-
-        self.model_files = []
-        for i, m in enumerate(self.models):
-            self.model_files += [name + "_model_" + str(i)]
-            m.save(os.path.join(dirname, self.model_files[i]))
         pickle.dump(self, f)
+        self.model.save(f)
         f.close()
 
     @staticmethod
@@ -919,27 +546,30 @@ def load(path):
 
             The loaded QRNN object.
         """
-        filename = os.path.basename(path)
-        dirname = os.path.dirname(path)
-
-        f = open(path, "rb")
-        qrnn = pickle.load(f)
-        qrnn.models = []
-        for mf in qrnn.model_files:
-            mf = os.path.basename(mf)
-            try:
-                mp = os.path.join(dirname, os.path.basename(mf))
-                qrnn.models += [keras.models.load_model(mp, qrnn.custom_objects)]
-            except:
-                raise Exception("Error loading the neural network models. " \
-                                "Please make sure all files created during the"\
-                                " saving are in this folder.")
-        f.close()
+        import importlib
+        with open(path, 'rb') as f:
+            qrnn = pickle.load(f)
+            backend = importlib.import_module(qrnn.backend)
+            model = backend.load(f)
+            qrnn.model = model
         return qrnn
 
+        #try:
+        #    from typhon.retrieval.qrnn.backends.keras import KerasQRNN, QuantileLoss
+        #    globals()["QuantileLoss"] = QuantileLoss
+        #    model = KerasQRNN.load(path)
+        #    print(type(model))
+        #    qrnn = QRNN(model.input_dim, model.quantiles, model.models)
+        #    qrnn.model = model
+        #    globals().pop("QuantileLoss")
+        #    return qrnn
+
+        #except Exception as e:
+        #    raise e
+
     def __getstate__(self):
         dct = copy.copy(self.__dict__)
-        dct.pop("models")
+        dct.pop("model")
         return dct
 
     def __setstate__(self, state):

From 18c117f7fbb5a9df20620377ad8bc9cc8de9584a Mon Sep 17 00:00:00 2001
From: Simon Pfreundschuh <simon.pfreundschuh@chalmers.se>
Date: Tue, 17 Mar 2020 10:26:50 +0100
Subject: [PATCH 03/19] Don't raise exception when keras can't be imported.

---
 typhon/retrieval/qrnn/qrnn.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/typhon/retrieval/qrnn/qrnn.py b/typhon/retrieval/qrnn/qrnn.py
index cf45647b..93a802c4 100644
--- a/typhon/retrieval/qrnn/qrnn.py
+++ b/typhon/retrieval/qrnn/qrnn.py
@@ -14,7 +14,6 @@
     import typhon.retrieval.qrnn.models.keras as keras
     backend = keras
 except Exception as e:
-    raise e
     try:
         import typhon.retrieval.qrnn.models.pytorch as pytorch
         backend = pytorch

From acb9a44fdb9d7f4ff311b94409348df0ef333eb2 Mon Sep 17 00:00:00 2001
From: Simon Pfreundschuh <simon.pfreundschuh@chalmers.se>
Date: Wed, 18 Mar 2020 09:15:05 +0100
Subject: [PATCH 04/19] Restructured documentation for retrieval sub-module.

---
 doc/typhon.retrieval.bmci.rst     | 11 +++++
 doc/typhon.retrieval.mcmc.rst     | 11 +++++
 doc/typhon.retrieval.oem.rst      | 15 +++++++
 doc/typhon.retrieval.qrnn.rst     | 71 +++++++++++++++++++++++++++++++
 doc/typhon.retrieval.scores.rst   | 14 ++++++
 doc/typhon.retrieval.spareice.rst | 11 +++++
 6 files changed, 133 insertions(+)
 create mode 100644 doc/typhon.retrieval.bmci.rst
 create mode 100644 doc/typhon.retrieval.mcmc.rst
 create mode 100644 doc/typhon.retrieval.oem.rst
 create mode 100644 doc/typhon.retrieval.qrnn.rst
 create mode 100644 doc/typhon.retrieval.scores.rst
 create mode 100644 doc/typhon.retrieval.spareice.rst

diff --git a/doc/typhon.retrieval.bmci.rst b/doc/typhon.retrieval.bmci.rst
new file mode 100644
index 00000000..c26d48f6
--- /dev/null
+++ b/doc/typhon.retrieval.bmci.rst
@@ -0,0 +1,11 @@
+Bayesian Monte Carlo Integration (BMCI)
+=======================================
+
+.. automodule:: typhon.retrieval.bmci
+
+.. currentmodule:: typhon.retrieval.bmci
+
+.. autosummary::
+   :toctree: generated
+
+   BMCI
diff --git a/doc/typhon.retrieval.mcmc.rst b/doc/typhon.retrieval.mcmc.rst
new file mode 100644
index 00000000..7e2e700d
--- /dev/null
+++ b/doc/typhon.retrieval.mcmc.rst
@@ -0,0 +1,11 @@
+Markov-Chain Monte Carlo (MCMC)
+===============================
+
+.. automodule:: typhon.retrieval.mcmc
+
+.. currentmodule:: typhon.retrieval.mcmc
+
+.. autosummary::
+   :toctree: generated
+
+   MCMC
diff --git a/doc/typhon.retrieval.oem.rst b/doc/typhon.retrieval.oem.rst
new file mode 100644
index 00000000..17795de0
--- /dev/null
+++ b/doc/typhon.retrieval.oem.rst
@@ -0,0 +1,15 @@
+Optimal estimation method (OEM)
+===============================
+
+.. automodule:: typhon.retrieval.oem
+
+.. currentmodule:: typhon.retrieval.oem
+
+.. autosummary::
+   :toctree: generated
+
+   error_covariance_matrix
+   averaging_kernel_matrix
+   retrieval_gain_matrix
+   smoothing_error
+   retrieval_noise
diff --git a/doc/typhon.retrieval.qrnn.rst b/doc/typhon.retrieval.qrnn.rst
new file mode 100644
index 00000000..4915733b
--- /dev/null
+++ b/doc/typhon.retrieval.qrnn.rst
@@ -0,0 +1,71 @@
+Quantile regression neural networks (QRNNs)
+===========================================
+
+An implementation of quantile regression neural networks (QRNNs) developed
+specifically for remote sensing applications providing a flexible
+interface for simple training and evaluation of QRNNs.
+
+Overview
+--------
+
+The QRNN implementation consists of two-layers:
+
+  - A high-level interface provided by the :py:class:`~typhon.retrieval.qrnn.QRNN`
+    class
+  - Backend-specific implementations of different neural network architectures
+    to be used as models by the high-level implementation
+
+
+The QRNN class
+--------------
+
+The :py:class:`~typhon.retrieval.qrnn.QRNN` class provides the high-level interface for QRNNs. If you want
+to train a fully-connected QRNN, this is all you need. The class itself
+implments generic functionality related to the evaluation of QRNNs and the post
+processing of results such as computing the PSD or the posterior mean. For the
+rest it acts as a wrapper around its model attribute, which encapsules all
+network- and DL-framework-specific code.
+
+API documentation
+^^^^^^^^^^^^^^^^^
+
+.. automodule:: typhon.retrieval.qrnn.qrnn
+.. currentmodule:: typhon.retrieval.qrnn.qrnn
+.. autosummary::
+   :toctree: generated
+
+   QRNN
+
+Backends
+--------
+
+Currently both `keras <https://keras.io/>`_ and `pytorch <https://pytorch.org/>`_
+are supported as backends for neural network. The QRNN implementation will
+automatically use the one the is available on your system. If both are available
+you can choose a specific backend using the :py:meth:`~typhon.retrieval.qrnn.set_backend` function.
+
+
+API documentation
+^^^^^^^^^^^^^^^^^
+
+.. automodule:: typhon.retrieval.qrnn.qrnn
+.. currentmodule:: typhon.retrieval.qrnn.qrnn
+.. autosummary::
+   :toctree: generated
+
+   set_backend
+
+Neural network models
+---------------------
+
+
+Pytorch
+^^^^^^^
+
+.. automodule:: typhon.retrieval.qrnn.models.pytorch
+.. currentmodule:: typhon.retrieval.qrnn.models.pytorch
+.. autosummary::
+   :toctree: generated
+
+   FullyConnected
+
diff --git a/doc/typhon.retrieval.scores.rst b/doc/typhon.retrieval.scores.rst
new file mode 100644
index 00000000..0cd8290d
--- /dev/null
+++ b/doc/typhon.retrieval.scores.rst
@@ -0,0 +1,14 @@
+Retrieval scores
+================
+
+.. automodule:: typhon.retrieval.scores
+
+.. currentmodule:: typhon.retrieval.scores
+
+.. autosummary::
+   :toctree: generated
+
+    mape
+    bias
+    quantile_score
+    mean_quantile_score
diff --git a/doc/typhon.retrieval.spareice.rst b/doc/typhon.retrieval.spareice.rst
new file mode 100644
index 00000000..79420cb8
--- /dev/null
+++ b/doc/typhon.retrieval.spareice.rst
@@ -0,0 +1,11 @@
+SpareIce
+========
+
+.. automodule:: typhon.retrieval.spareice
+
+.. currentmodule:: typhon.retrieval.spareice
+
+.. autosummary::
+   :toctree: generated
+
+   SPAREICE

From 8dc3c35513e3905dc3667290cb389979d18d932b Mon Sep 17 00:00:00 2001
From: Simon Pfreundschuh <simon.pfreundschuh@chalmers.se>
Date: Wed, 18 Mar 2020 09:15:19 +0100
Subject: [PATCH 05/19] Restructured documentation for retrieval sub-module.

---
 doc/typhon.retrieval.rst          | 96 ++++++-------------------------
 typhon/retrieval/qrnn/__init__.py | 10 ++--
 typhon/retrieval/qrnn/qrnn.py     | 48 +++++++++-------
 3 files changed, 50 insertions(+), 104 deletions(-)

diff --git a/doc/typhon.retrieval.rst b/doc/typhon.retrieval.rst
index 46944282..3c487560 100644
--- a/doc/typhon.retrieval.rst
+++ b/doc/typhon.retrieval.rst
@@ -1,87 +1,29 @@
-retrieval
-=========
+The retrieval submodule contains implementations of different retrieval methods
+as well as functions for the assessment of their performance.
 
-.. automodule:: typhon.retrieval
-.. currentmodule:: typhon.retrieval
+Retrieval methods
+=================
 
-.. autosummary::
-   :toctree: generated
+.. toctree::
+   :maxdepth: 1
 
-retrieval.bmci
-==============
+   typhon.retrieval.bmci
+   typhon.retrieval.mcmc
+   typhon.retrieval.qrnn
+   typhon.retrieval.oem
 
-.. automodule:: typhon.retrieval.bmci
-
-.. currentmodule:: typhon.retrieval.bmci
-
-.. autosummary::
-   :toctree: generated
-
-   BMCI
-
-retrieval.mcmc
-==============
-
-.. automodule:: typhon.retrieval.mcmc
-
-.. currentmodule:: typhon.retrieval.mcmc
-
-.. autosummary::
-   :toctree: generated
-
-   MCMC
-
-retrieval.oem
-=============
-
-.. automodule:: typhon.retrieval.oem
-
-.. currentmodule:: typhon.retrieval.oem
-
-.. autosummary::
-   :toctree: generated
-
-   error_covariance_matrix
-   averaging_kernel_matrix
-   retrieval_gain_matrix
-   smoothing_error
-   retrieval_noise
-
-retrieval.qrnn
-==============
-
-.. automodule:: typhon.retrieval.qrnn
-
-.. currentmodule:: typhon.retrieval.qrnn
-
-.. autosummary::
-   :toctree: generated
-
-   QRNN
-
-retrieval.scores
-================
-
-.. automodule:: typhon.retrieval.scores
+Retrieval products
+==================
 
-.. currentmodule:: typhon.retrieval.scores
+.. toctree::
+   :maxdepth: 1
 
-.. autosummary::
-   :toctree: generated
+   typhon.retrieval.spareice
 
-    mape
-    bias
-    quantile_score
-    mean_quantile_score
 
-retrieval.spareice
+Utility functions
 ==================
+.. toctree::
+   :maxdepth: 1
 
-.. automodule:: typhon.retrieval.spareice
-
-.. currentmodule:: typhon.retrieval.spareice
-
-.. autosummary::
-   :toctree: generated
-
-   SPAREICE
+   typhon.retrieval.scores
diff --git a/typhon/retrieval/qrnn/__init__.py b/typhon/retrieval/qrnn/__init__.py
index ae8c7421..e37e9f8c 100644
--- a/typhon/retrieval/qrnn/__init__.py
+++ b/typhon/retrieval/qrnn/__init__.py
@@ -1,7 +1,7 @@
 r"""
-An implementation of quantile regression neural networks based on the
-`Keras <https://github.com/keras-team/keras>` deep learning package.
-The implementation has been developed specifically for remote sensing
-applications and provides a high level interface allowing for simple
-training and evaluation of the QRNNs."""
+Quantile regression neural networks (QRNNs)
+
+The module provides a flexible implementation of QRNNs for remote sensing
+retrievals.
+"""
 from typhon.retrieval.qrnn.qrnn import QRNN, set_backend
diff --git a/typhon/retrieval/qrnn/qrnn.py b/typhon/retrieval/qrnn/qrnn.py
index 93a802c4..8f586957 100644
--- a/typhon/retrieval/qrnn/qrnn.py
+++ b/typhon/retrieval/qrnn/qrnn.py
@@ -23,6 +23,14 @@
                         " module.")
 
 def set_backend(name):
+    """
+    Set the neural network package to use as backend.
+
+    The currently available backend are "keras" and "pytorch".
+
+    Args:
+        name(str): The name of the backend.
+    """
     global backend
     if name == "keras":
         try:
@@ -52,7 +60,7 @@ def create_model(input_dim,
         input_dim(int): Number of input features.
         output_dim(int): Number of output features.
         arch: Tuple (d, w, a) containing the depth, i.e. number of
-            hidden layers, and width of the hidden layers, i. e.
+            hidden layers width of the hidden layers, i. e.
             the number of neurons in them, and the name of the
             activation function as string.
     Return:
@@ -71,11 +79,11 @@ class QRNN:
     Quantile Regression Neural Network (QRNN)
 
     This class provides a high-level implementation of  quantile regression
-    neural networks. They can be used to estimate quantiles of the posterior
+    neural networks. It can be used to estimate quantiles of the posterior
     distribution of remote sensing retrievals.
 
-    The QRNN uses a generic neural network model, that is trained to minimize
-    the quantile loss function
+    The :class:`QRNN`` class uses an arbitrary neural network model, that is
+    trained to minimize the quantile loss function
 
     .. math::
             \mathcal{L}_\tau(y_\tau, y_{true}) =
@@ -87,28 +95,24 @@ class QRNN:
     has one output neuron for each quantile to estimate.
 
     The QRNN class provides a generic QRNN implementation in the sense that it
-    does not assumed a fixed neural network infrastructure. Instead, this
+    does not assume a fixed neural network infrastructure. Instead, this
     functionality is off-loaded to a model object, which can be an arbitrary
-    regression model such as a fully-connected or a convolutional network.
-    A range of different models are provided in the typhon.retrieval.qrnn.models
-    module. This QRNN class just implements high-level operation on the QRNN
-    output while training and prediction are delegated to the model object. For
-    details on the respective implementation refer to the documentation of the
-    corresponding model class.
+    regression network such as a fully-connected or a convolutional network. A
+    range of different models are provided in the typhon.retrieval.qrnn.models
+    module. The :class:`QRNN`` class just implements high-level operation on
+    the QRNN output while training and prediction are delegated to the model
+    object. For details on the respective implementation refer to the
+    documentation of the corresponding model class.
 
     .. note:: For the QRNN implementation :math:`x` is used to denote the input
               vector and :math:`y` to denote the output. While this is opposed
-              to typical inverse problem notation, it is in line with machine
-              learning notation and felt more natural for the implementation.
-              If this annoys you, I am sorry. But the other way round it would
-              have annoyed other people and in particular me.
+              to inverse problem notation typically used for retrievals, it is
+              in line with machine learning notation and felt more natural for
+              the implementation. If this annoys you, I am sorry.
 
     Attributes:
-
-        quantiles (numpy.array):
-            The 1D-array containing the quantiles :math:`\tau \in [0, 1]` that the
-            network learns to predict.
-
+        quantiles (numpy.array): The 1D-array containing the quantiles
+            :math:`\tau \in [0, 1]` that the network learns to predict.
         model: The model object that implements the actual neural network
             regressor.
     """
@@ -129,8 +133,8 @@ def __init__(self,
                 within the range [0, 1].
             model: A (possible trained) model instance or a tuple
                 :code:`(d, w, act)` describing the architecture of a
-                fully-connected neural network with d hidden layers
-                with w neurons and act activation functions.
+                fully-connected neural network with :code:`d` hidden layers
+                with :code:`w` neurons and :code:`act` activation functions.
             ensemble_size: The size of the ensemble if an ensemble model
                 should be used.
         """

From ba3bd5f8708ef8745977907a048a97fd5da5e4f0 Mon Sep 17 00:00:00 2001
From: Simon Pfreundschuh <simon.pfreundschuh@chalmers.se>
Date: Thu, 19 Mar 2020 17:33:51 +0100
Subject: [PATCH 06/19] Added tests for QRNN.

---
 typhon/retrieval/qrnn/models/keras.py    | 436 +++++++++++++++++++++--
 typhon/retrieval/qrnn/models/pytorch.py  | 272 ++++++++++----
 typhon/retrieval/qrnn/qrnn.py            |  41 ++-
 typhon/tests/retrieval/qrnn/test_qrnn.py |  68 ++++
 4 files changed, 713 insertions(+), 104 deletions(-)
 create mode 100644 typhon/tests/retrieval/qrnn/test_qrnn.py

diff --git a/typhon/retrieval/qrnn/models/keras.py b/typhon/retrieval/qrnn/models/keras.py
index 58e511e8..28b7aef7 100644
--- a/typhon/retrieval/qrnn/models/keras.py
+++ b/typhon/retrieval/qrnn/models/keras.py
@@ -1,16 +1,62 @@
 import numpy as np
+import keras
 from keras.models import Sequential
-from keras.layers import Dense, Activation
+from keras.layers import Dense, Activation, deserialize
+from keras.optimizers import SGD
 import keras.backend as K
 import logging
 
+
+def save_model(f, model):
+    """
+    Save keras model.
+
+    Args:
+        f(:code:`str` or binary stream): Either a path or a binary stream
+            to store the data to.
+        model(:code:`keras.models.Models`): The Keras model to save
+    """
+    keras.models.save_model(model, f)
+
+def load_model(f, quantiles):
+    """
+    Load keras model.
+
+    Args:
+        f(:code:`str` or binary stream): Either a path or a binary stream
+            to read the model from
+        quantiles(:code:`np.ndarray`): Array containing the quantiles
+            that the model predicts.
+
+    Returns:
+        The loaded keras model.
+    """
+    #
+    # This is a bit hacky but seems required to handle
+    # the custom model classes.
+    #
+    def make_fully_connected(layers = None,
+                             **kwargs):
+        layers = list(map(deserialize, layers))
+        input_dimensions = layers[0].batch_input_shape[1]
+        return FullyConnected(input_dimensions,
+                              quantiles,
+                              (),
+                              layers)
+    custom_objects = {"FullyConnected" : make_fully_connected,
+                      "QuantileLoss" : QuantileLoss}
+    model = keras.models.load_model(f, custom_objects=custom_objects)
+    return model
+
 ################################################################################
 # Quantile loss
 ################################################################################
 
 logger = logging.getLogger(__name__)
 
-def skewed_absolute_error(y_true, y_pred, tau):
+def skewed_absolute_error(y_true,
+                          y_pred,
+                          tau):
     """
     The quantile loss function for a given quantile tau:
 
@@ -67,42 +113,378 @@ def __init__(self):
     def train(self):
         pass
 
+################################################################################
+# Keras data generators
+################################################################################
+
+class BatchedDataset:
+    """
+    Keras data loader that batches a given dataset of numpy arryas.
+    """
+    def __init__(self,
+                 training_data,
+                 batch_size):
+        """
+        Create batched dataset.
+
+        Args:
+            training_data: Tuple :code:`(x, y)` containing the input
+               and output data as arrays.
+            batch_size(:code:`int`): The batch size
+        """
+        x, y = training_data
+        self.x = x
+        self.y = y
+        self.bs = batch_size
+        self.indices = np.random.permutation(x.shape[0])
+        self.i = 0
+
+    def __iter__(self):
+        logger.info("iter...")
+        return self
+
+    def __len__(self):
+        return self.x.shape[0] // self.bs
+
+    def __next__(self):
+        inds = self.indices[np.arange(self.i * self.bs,
+                                      (self.i + 1) * self.bs)
+                            % self.indices.size]
+        x_batch = np.copy(self.x[inds, :])
+        y_batch = self.y[inds]
+        self.i = self.i + 1
+        # Shuffle training set after each epoch.
+        if self.i % (self.x.shape[0] // self.bs) == 0:
+            self.indices = np.random.permutation(self.x.shape[0])
+
+        return (x_batch, y_batch)
+
+class TrainingGenerator:
+    """
+    This Keras sample generator takes a generator for noise-free training data
+    and adds independent Gaussian noise to each of the components of the input.
+
+    Attributes:
+        training_data: Data generator providing the data
+        sigma_noise: A vector containing the standard deviation of each
+                 component.
+    """
+    def __init__(self,
+                 training_data,
+                 sigma_noise = None):
+        """
+        Args:
+            training_data: Data generator providing the original (noise-free)
+                training data.
+            sigma_noise: Vector the length of the input dimensions specifying
+                the standard deviation of the noise.
+        """
+        self.training_data = training_data
+        self.sigma_noise = sigma_noise
+
+    def __iter__(self):
+        logger.info("iter...")
+        return self
+
+    def __len__(self):
+        return len(self.training_data)
+
+    def __next__(self):
+        x_batch, y_batch = next(self.training_data)
+        if not self.sigma_noise is None:
+            x_batch += np.random.randn(*x_batch.shape) * self.sigma_noise
+        return (x_batch, y_batch)
+
+class AdversarialTrainingGenerator:
+    """
+    This Keras sample generator takes the noise-free training data
+    and adds independent Gaussian noise to each of the components
+    of the input.
+
+    Attributes:
+        training_data: Training generator to use to generate the input
+            data
+        input_gradients: Keras function to compute the gradients of the
+            network
+        eps: The perturbation factor.
+    """
+    def __init__(self,
+                 training_data,
+                 input_gradients,
+                 eps):
+        """
+        Args:
+            training_data: Training generator to use to generate the input
+                data
+            input_gradients: Keras function to compute the gradients of the
+                network
+            eps: The perturbation factor.
+        """
+        self.training_data = training_data
+        self.input_gradients = input_gradients
+        self.eps = eps
+
+    def __iter__(self):
+        logger.info("iter...")
+        return self
+
+    def __len__(self):
+        return len(self.training_data)
+
+    def __next__(self):
+        if self.i % 2 == 0:
+            x_batch, y_batch = next(self.training_data)
+            self.x_batch = x_batch
+            self.y_batch = y_batch
+        else:
+            x_batch = self.x_batch
+            y_batch = self.y_batch
+            grads = self.input_gradients([x_batch, y_batch, 1.0])
+            x_batch += self.eps * np.sign(grads)
+
+        self.i = self.i + 1
+        return x_batch, y_batch
+
+class ValidationGenerator:
+    """
+    This Keras sample generator is similar to the training generator
+    only that it returns the whole validation set and doesn't perform
+    any randomization.
+
+    Attributes:
+
+        x_val: The validation input, i.e. the brightness temperatures
+                 measured by the satellite.
+        y_val: The validation output, i.e. the value of the retrieval
+                 quantity.
+        x_mean: A vector containing the mean of each input component.
+        x_sigma: A vector containing the standard deviation of each
+                 component.
+    """
+
+    def __init__(self,
+                 validation_data,
+                 sigma_noise):
+        self.validation_data = validation_data
+        self.sigma_noise = sigma_noise
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        x_val, y_val = next(self.validation_data)
+        if not self.sigma_noise is None:
+            x_val += np.random.randn(*self.x_val.shape) * self.sigma_noise
+        return (x_val, self.y_val)
+
+################################################################################
+# LRDecay
+################################################################################
+
+class LRDecay(keras.callbacks.Callback):
+    """
+    The LRDecay class implements the Keras callback interface and reduces
+    the learning rate according to validation loss reduction.
+
+    Attributes:
+
+        lr_decay: The factor c > 1.0 by which the learning rate is
+                  reduced.
+        lr_minimum: The training is stopped when this learning rate
+                    is reached.
+        convergence_steps: The number of epochs without validation loss
+                           reduction required to reduce the learning rate.
+
+    """
+
+    def __init__(self, model, lr_decay, lr_minimum, convergence_steps):
+        self.model = model
+        self.lr_decay = lr_decay
+        self.lr_minimum = lr_minimum
+        self.convergence_steps = convergence_steps
+        self.steps = 0
+
+    def on_train_begin(self, logs={}):
+        self.losses = []
+        self.steps = 0
+        self.min_loss = 1e30
+
+    def on_epoch_end(self, epoch, logs={}):
+        loss = logs.get('val_loss')
+        if loss is None:
+            loss = logs.get("loss")
+        self.losses += [loss]
+        if not self.losses[-1] < self.min_loss:
+            self.steps = self.steps + 1
+        else:
+            self.steps = 0
+        if self.steps > self.convergence_steps:
+            lr = keras.backend.get_value(self.model.optimizer.lr)
+            keras.backend.set_value(
+                self.model.optimizer.lr, lr / self.lr_decay)
+            self.steps = 0
+            logger.info("\n Reduced learning rate to " + str(lr))
+
+            if lr < self.lr_minimum:
+                self.model.stop_training = True
+
+        self.min_loss = min(self.min_loss, self.losses[-1])
+
+################################################################################
+# QRNN
+################################################################################
+
+class KerasModel:
+    r"""
+    Base class for Keras models.
+
+    This base class provides generic utility function for the training, saving
+    and evaluation of Keras models.
+
+    Attributes:
+        input_dimensions (int): The input dimension of the neural network, i.e.
+            the dimension of the measurement vector.
+        quantiles (numpy.array): The 1D-array containing the quantiles
+            :math:`\tau \in [0, 1]` that the network learns to predict.
+
+        depth (int):
+            The number layers in the network excluding the input layer.
+
+        width (int):
+            The width of the hidden layers in the network.
+
+        activation (str):
+            The name of the activation functions to use in the hidden layers
+            of the network.
+
+        models (list of keras.models.Sequential):
+            The ensemble of Keras neural networks used for the quantile regression
+            neural network.
+    """
+
+    def __init__(self,
+                 input_dimension,
+                 quantiles):
+        """
+        Create a QRNN model.
+
+        Arguments:
+            input_dimension(int): The dimension of the measurement space, i.e. the number
+                            of elements in a single measurement vector y
+
+            quantiles(np.array): 1D-array containing the quantiles  to estimate of
+                                 the posterior distribution. Given as fractions
+                                 within the range [0, 1].
+        """
+        self.input_dimension = input_dimension
+        self.quantiles = np.array(quantiles)
+
+
+    def train(self,
+              training_data,
+              validation_data = None,
+              batch_size = 256,
+              sigma_noise=None,
+              adversarial_training = False,
+              delta_at = 0.01,
+              initial_learning_rate = 1e-2,
+              momentum = 0.0,
+              convergence_epochs = 5,
+              learning_rate_decay = 2.0,
+              learning_rate_minimum = 1e-6,
+              maximum_epochs = 200,
+              training_split = 0.9,
+              gpu = False):
 
-class FullyConnected(Sequential):
+        if type(training_data) == tuple:
+            if not type(training_data[0]) == np.ndarray:
+                raise ValueError("When training data is provided as tuple"
+                                 " (x, y) it must contain numpy arrays.")
+            training_data = BatchedDataset(training_data,
+                                           batch_size)
+
+        if type(validation_data) is tuple:
+            validation_data = BatchedDataset(validation_data,
+                                             batch_size)
+
+        loss = QuantileLoss(self.quantiles)
+
+        # Compile model
+        self.custom_objects = {loss.__name__: loss}
+        optimizer = SGD(lr=initial_learning_rate)
+        self.compile(loss=loss, optimizer=optimizer)
+
+        #
+        # Setup training generator
+        #
+        training_generator = TrainingGenerator(training_data,
+                                               sigma_noise)
+        if adversarial_training:
+            inputs = [self.input,
+                      self.targets[0],
+                      self.sample_weights[0]]
+            input_gradients = K.function(
+                inputs, K.gradients(self.total_loss, self.input))
+            training_generator = AdversarialTrainingGenerator(training_generator,
+                                                              input_gradients,
+                                                              delta_at)
+
+        if validation_data is None:
+            validation_generator = None
+        else:
+            validation_generator = ValidationGenerator(validation_data,
+                                                       sigma_noise)
+        lr_callback = LRDecay(self,
+                              learning_rate_decay,
+                              learning_rate_minimum,
+                              convergence_epochs)
+        self.fit_generator(training_generator,
+                           steps_per_epoch=len(training_generator),
+                           epochs=maximum_epochs,
+                           validation_data=validation_generator,
+                           validation_steps=1,
+                           callbacks=[lr_callback])
+
+
+################################################################################
+# Fully-connected network
+################################################################################
+
+class FullyConnected(KerasModel, Sequential):
     """
-    Convenience class to represent fully connected models with
-    given number of input and output features and depth and
-    width of hidden layers.
+    Keras implementation of fully-connected networks.
     """
     def __init__(self,
                  input_dimension,
                  quantiles,
                  arch,
-                 activation = "relu"):
+                 layers = None):
         """
         Create a fully-connected neural network.
 
         Args:
-            input_dimension(int): Number of input features
-            output_dimension(int): Number of output features
-            arch(tuple): Tuple (d, w) of d, the number of hidden
-                layers in the network, and w, the width of the net-
-                work.
-            activation(str or None): Activation function to insert between
-                hidden layers. The given string is passed to the
-                keras.layers.Activation class. If None no activation function
-                is used.
+            input_dimension(:code:`int`): Number of input features
+            quantiles(:code:`array`): The quantiles to predict given
+                as fractions within [0, 1].
+            arch(tuple): Tuple :code:`(d, w, a)` containing :code:`d`, the
+                number of hidden layers in the network, :code:`w`, the width
+                of the network and :code:`a`, the type of activation functions
+                to be used as string.
         """
         quantiles = np.array(quantiles)
         output_dimension = quantiles.size
-        if len(arch) == 0:
-            layers = [Dense(output_dimension, input_shape=(input_dimension))]
-        else:
-            d, w = arch
-            layers = [Dense(input_dimension, input_shape=(w,))]
-            for i in range(d - 1):
-                layers.append(Dense(w, input_shape=(w,)))
-                if not activation is None:
-                    layers.append(Activation(activation))
-            layers.append(Dense(output_dimension, input_shape=(w,)))
-        super().__init__(layers)
+
+        if layers is None:
+            if len(arch) == 0:
+                layers = [Dense(output_dimension, input_shape=(input_dimension))]
+            else:
+                d, w, a = arch
+                layers = [Dense(w, input_shape=(input_dimension,))]
+                for i in range(d - 1):
+                    layers.append(Dense(w, input_shape=(w,)))
+                    if not a is None:
+                        layers.append(Activation(a))
+                layers.append(Dense(output_dimension, input_shape=(w,)))
+
+        KerasModel.__init__(self, input_dimension, quantiles)
+        Sequential.__init__(self, layers)
diff --git a/typhon/retrieval/qrnn/models/pytorch.py b/typhon/retrieval/qrnn/models/pytorch.py
index f77835f8..a9349496 100644
--- a/typhon/retrieval/qrnn/models/pytorch.py
+++ b/typhon/retrieval/qrnn/models/pytorch.py
@@ -9,6 +9,8 @@
 import numpy as np
 from torch import nn
 from torch import optim
+from torch.optim.lr_scheduler import ReduceLROnPlateau
+from torch.utils.data import Dataset
 from tqdm.auto import tqdm
 
 activations = {"elu" : nn.ELU,
@@ -23,10 +25,85 @@
                "softmin" : nn.Softmin}
 
 
-def load(f):
+def save_model(f, model):
+    """
+    Save pytorch model.
+
+    Args:
+        f(:code:`str` or binary stream): Either a path or a binary stream
+            to store the data to.
+        model(:code:`pytorch.nn.Moduel`): The pytorch model to save
+    """
+    torch.save(model, f)
+
+def load_model(f, quantiles):
+    """
+    Load pytorch model.
+
+    Args:
+        f(:code:`str` or binary stream): Either a path or a binary stream
+            to read the model from
+        quantiles(:code:`np.ndarray`): Array containing the quantiles
+            that the model predicts.
+
+    Returns:
+        The loaded pytorch model.
+    """
     model = torch.load(f)
     return model
 
+def handle_input(data, device = None):
+    """
+    Handle input data.
+
+    This function handles data supplied
+
+      - as tuple of :code:`np.ndarray`
+      - a single :code:`np.ndarray`
+      - torch :code:`dataloader`
+
+    If a numpy array is provided it is converted to a torch tensor
+    so that it can be fed into a pytorch model.
+    """
+    if type(data) == tuple:
+        x, y = data
+        x = torch.tensor(x, dtype=torch.float)
+        y = torch.tensor(y, dtype=torch.float)
+        if not device is None:
+            x = x.to(device)
+            y = y.to(device)
+        return x, y
+    if type(data) == np.ndarray:
+        x = torch.tensor(data, dtype=torch.float)
+        if not device is None:
+            x = x.to(device)
+        return x
+    else:
+        return data
+
+class BatchedDataset(Dataset):
+    """
+    Batches an un-bactched dataset.
+    """
+    def __init__(self, x, y, batch_size):
+        self.x = x
+        self.y = y
+        self.batch_size = batch_size
+
+    def __len__(self):
+        # This is required because x and y are tensors and don't throw these
+        # errors themselves.
+        return self.x.shape[0] // self.batch_size
+
+    def __getitem__(self, i):
+        if i >= len(self):
+            raise IndexError()
+        i_start = i * self.batch_size
+        i_end = (i + 1) * self.batch_size
+        x = self.x[i_start : i_end]
+        y = self.y[i_start : i_end]
+        return (x, y)
+
 ################################################################################
 # Quantile loss
 ################################################################################
@@ -130,12 +207,12 @@ def train(self, *args, **kwargs):
         """
         Train the network.
 
-        This trains the network for the given number of epochs using the provided
-        training and validation data.
+        This trains the network for the given number of epochs using the
+        provided training and validation data.
 
-        If desired, the training can be augmented using adversarial training. In this
-        case the network is additionally trained with an adversarial batch of examples
-        in each step of the training.
+        If desired, the training can be augmented using adversarial training.
+        In this case the network is additionally trained with an adversarial
+        batch of examples in each step of the training.
 
         Arguments:
             training_data: pytorch dataloader providing the training data
@@ -144,38 +221,93 @@ def train(self, *args, **kwargs):
             adversarial_training: whether or not to use adversarial training
             eps_adv: The scaling factor to use for adversarial training.
         """
-        if len(args) < 2:
-            return nn.Sequential.train(self, *args)
-
-        training_data, validation_data = args
-        n_epochs = kwargs.get("n_epochs", 1)
-        adversarial_training = kwargs.get("adversarial_training", False)
-        eps_adv = kwargs.get("eps_adv", 1e-6)
-        lr = kwargs.get("lr", 1e-2)
-        momentum = kwargs.get("momentum", 0.0)
-        gpu = kwargs.get("gpu", False)
-
-        self.optimizer = optim.SGD(self.parameters(),
-                                   lr = lr,
-                                   momentum = momentum)
-        self.train()
-
+        # Handle overload of train() method
+        if len(args) < 1 or (len(args) == 1 and type(args[0]) == bool):
+            return nn.Sequential.train(self, *args, **kwargs)
+
+        #
+        # Parse training arguments
+        #
+
+        training_data = args[0]
+        arguments = {"validation_data" : None,
+                     "batch_size" : 256,
+                     "sigma_noise" : None,
+                     "adversarial_training" : False,
+                     "delta_at" : 0.01,
+                     "initial_learning_rate" : 1e-2,
+                     "momentum" : 0.0,
+                     "convergence_epochs" : 5,
+                     "learning_rate_decay" : 2.0,
+                     "learning_rate_minimum" : 1e-6,
+                     "maximum_epochs" : 1,
+                     "training_split" : 0.9,
+                     "gpu" : False}
+        argument_names = arguments.keys()
+        for a, n in zip(args[1:], argument_names):
+            arguments[n] = a
+        for k in kwargs:
+            if k in arguments:
+                arguments[k] = kwargs[k]
+            else:
+                raise ValueError("Unknown argument to {}.".print(k))
+
+        validation_data = arguments["validation_data"]
+        batch_size = arguments["batch_size"]
+        sigma_noise = arguments["sigma_noise"]
+        adversarial_training = arguments["adversarial_training"]
+        delta_at = arguments["delta_at"]
+        initial_learning_rate = arguments["initial_learning_rate"]
+        convergence_epochs = arguments["convergence_epochs"]
+        learning_rate_decay = arguments["learning_rate_decay"]
+        learning_rate_minimum = arguments["learning_rate_minimum"]
+        maximum_epochs = arguments["maximum_epochs"]
+        training_split = arguments["training_split"]
+        gpu = arguments["gpu"]
+        momentum = arguments["momentum"]
+
+        #
+        # Determine device to use
+        #
         if torch.cuda.is_available() and gpu:
-            dev = torch.device("cuda")
+            device = torch.device("cuda")
         else:
-            dev = torch.device("cpu")
-        self.to(dev)
+            device = torch.device("cpu")
+        self.to(device)
 
-        self.criterion.to(dev)
+        #
+        # Handle input data
+        #
+        try:
+            x, y = handle_input(training_data, device)
+            training_data = BatchedDataset(x, y, batch_size)
+        except:
+            pass
 
-        for i in range(n_epochs):
 
+        self.train()
+        self.optimizer = optim.SGD(self.parameters(),
+                                   lr = initial_learning_rate,
+                                   momentum = momentum)
+        self.criterion.to(device)
+        scheduler = ReduceLROnPlateau(self.optimizer,
+                                            factor=1.0 / learning_rate_decay,
+                                            patience=convergence_epochs,
+                                            min_lr=learning_rate_minimum)
+        training_errors = []
+        validation_errors = []
+
+        #
+        # Training loop
+        #
+
+        for i in range(maximum_epochs):
             err = 0.0
             n = 0
             iterator = tqdm(enumerate(training_data), total = len(training_data))
             for j, (x, y) in iterator:
-                x = x.to(dev)
-                y = y.to(dev)
+                x = x.to(device)
+                y = y.to(device)
 
                 shape = x.size()
                 shape = (shape[0], 1) + shape[2:]
@@ -192,7 +324,7 @@ def train(self, *args, **kwargs):
 
                 if adversarial_training:
                     self.optimizer.zero_grad()
-                    x_adv = self._make_adversarial_samples(x, y, eps_adv)
+                    x_adv = self._make_adversarial_samples(x, y, delta_at)
                     y_pred = self(x)
                     c = self.criterion(y_pred, y)
                     c.backward()
@@ -202,41 +334,42 @@ def train(self, *args, **kwargs):
                     iterator.set_postfix({"Training errror" : err / n})
 
             # Save training error
-            self.training_errors.append(err / n)
+            training_errors.append(err / n)
 
             val_err = 0.0
             n = 0
+            if not validation_data is None:
+                print(validation_data)
+                for x, y in validation_data:
+                    x = x.to(device)
+                    y = y.to(device)
 
-            for x, y in validation_data:
-                x = x.to(dev)
-                y = y.to(dev)
-
-                shape = x.size()
-                shape = (shape[0], 1) + shape[2:]
-                y = y.reshape(shape)
+                    shape = x.size()
+                    shape = (shape[0], 1) + shape[2:]
+                    y = y.reshape(shape)
 
-                y_pred = self(x)
-                c = self.criterion(y_pred, y)
+                    y_pred = self(x)
+                    c = self.criterion(y_pred, y)
 
-                val_err += c.item() * x.size()[0]
-                n += x.size()[0]
+                    val_err += c.item() * x.size()[0]
+                    n += x.size()[0]
+            validation_errors.append(val_err)
 
-            self.validation_errors.append(val_err / n)
+        self.training_errors += training_errors
+        self.validation_errors += validation_errors
         self.eval()
+        return {"training_errors" : self.training_errors,
+                "validation_errors" : self.validation_errors}
 
-    def predict(self, x):
-        """
-        Predict quantiles for given input.
-
-        Args:
-            x: 2D tensor containing the inputs for which for which to
-                predict the quantiles.
-
-        Returns:
-            tensor: 2D tensor containing the predicted quantiles along
-                the last dimension.
-        """
-        return self(x)
+    def predict(self, x, gpu=False):
+        ""
+        if torch.cuda.is_available() and gpu:
+            device = torch.device("cuda")
+        else:
+            device = torch.device("cpu")
+        x = handle_input(x, device)
+        self.to(device)
+        return self(x).detach()
 
     def calibration(self,
                     data,
@@ -261,7 +394,8 @@ def calibration(self,
 
         n_intervals = self.quantiles.size // 2
         qs = self.quantiles
-        intervals = np.array([q_r - q_l for (q_l, q_r) in zip(qs, reversed(qs))])[:n_intervals]
+        intervals = np.array([q_r - q_l for (q_l, q_r)
+                              in zip(qs, reversed(qs))])[:n_intervals]
         counts = np.zeros(n_intervals)
 
         total = 0.0
@@ -316,11 +450,13 @@ def load(self, path):
         qrnn.load_state_dict["network_state"]
         qrnn.optimizer.load_state_dict["optimizer_state"]
 
+################################################################################
+# Fully-connected network
+################################################################################
+
 class FullyConnected(PytorchModel, nn.Sequential):
     """
-    Convenience class to represent fully connected models with
-    given number of input and output features and depth and
-    width of hidden layers.
+    Pytorch implementation of a fully-connected QRNN model.
     """
     def __init__(self,
                  input_dimension,
@@ -331,11 +467,13 @@ def __init__(self,
         Create a fully-connected neural network.
 
         Args:
-            input_dimension(int): Number of input features
-            output_dimension(int): Number of output features
-            arch(tuple): Tuple (d, w) of d, the number of hidden
-                layers in the network, and w, the width of the net-
-                work.
+            input_dimension(:code:`int`): Number of input features
+            quantiles(:code:`array`): The quantiles to predict given
+                as fractions within [0, 1].
+            arch(tuple): Tuple :code:`(d, w, a)` containing :code:`d`, the
+                number of hidden layers in the network, :code:`w`, the width
+                of the network and :code:`a`, the type of activation functions
+                to be used as string.
         """
         PytorchModel.__init__(self, input_dimension, quantiles)
         output_dimension = quantiles.size
@@ -354,7 +492,3 @@ def __init__(self,
                     layers.append(act())
             layers.append(nn.Linear(w, output_dimension))
         nn.Sequential.__init__(self, *layers)
-
-    def save(self, f):
-        torch.save(self, f)
-
diff --git a/typhon/retrieval/qrnn/qrnn.py b/typhon/retrieval/qrnn/qrnn.py
index 8f586957..9c235443 100644
--- a/typhon/retrieval/qrnn/qrnn.py
+++ b/typhon/retrieval/qrnn/qrnn.py
@@ -2,6 +2,7 @@
 import logging
 import os
 import pickle
+import importlib
 
 import numpy as np
 from scipy.interpolate import CubicSpline
@@ -244,11 +245,35 @@ def cross_validation(self,
         logger.info(results)
         return (np.mean(results, axis=0), np.std(results, axis=0))
 
-    def fit(self, *args, **kwargs):
-        self.model.fit(*args, **kwargs)
-
-    def train(self, *args, **kwargs):
-        self.model.train(*args, **kwargs)
+    def train(self,
+              training_data,
+              validation_data = None,
+              batch_size = 256,
+              sigma_noise = None,
+              adversarial_training = False,
+              delta_at = 0.01,
+              initial_learning_rate = 1e-2,
+              momentum = 0.0,
+              convergence_epochs = 5,
+              learning_rate_decay = 2.0,
+              learning_rate_minimum = 1e-6,
+              maximum_epochs = 200,
+              training_split = 0.9,
+              gpu = True):
+        self.model.train(training_data,
+                         validation_data,
+                         batch_size,
+                         sigma_noise,
+                         adversarial_training,
+                         delta_at,
+                         initial_learning_rate,
+                         momentum,
+                         convergence_epochs,
+                         learning_rate_decay,
+                         learning_rate_minimum,
+                         maximum_epochs,
+                         training_split,
+                         gpu)
 
     def predict(self, x):
         r"""
@@ -531,7 +556,8 @@ def save(self, path):
         """
         f = open(path, "wb")
         pickle.dump(self, f)
-        self.model.save(f)
+        backend = importlib.import_module(self.backend)
+        backend.save_model(self.model, f)
         f.close()
 
     @staticmethod
@@ -549,11 +575,10 @@ def load(path):
 
             The loaded QRNN object.
         """
-        import importlib
         with open(path, 'rb') as f:
             qrnn = pickle.load(f)
             backend = importlib.import_module(qrnn.backend)
-            model = backend.load(f)
+            model = backend.load_model(f, qrnn.quantiles)
             qrnn.model = model
         return qrnn
 
diff --git a/typhon/tests/retrieval/qrnn/test_qrnn.py b/typhon/tests/retrieval/qrnn/test_qrnn.py
new file mode 100644
index 00000000..3dea2241
--- /dev/null
+++ b/typhon/tests/retrieval/qrnn/test_qrnn.py
@@ -0,0 +1,68 @@
+from typhon.retrieval.qrnn import QRNN, set_backend
+import numpy as np
+import os
+import tempfile
+
+#
+# Import available backends.
+#
+
+backends = []
+try:
+    import typhon.retrieval.qrnn.models.keras
+    backends += ["keras"]
+except:
+    pass
+
+try:
+    import typhon.retrieval.qrnn.models.pytorch
+    backends += ["pytorch"]
+except:
+    pass
+
+class TestQrnn:
+
+    def setup_method(self):
+        dir = os.path.dirname(os.path.realpath(__file__))
+        path = os.path.join(dir, "test_data")
+        x_train = np.load(os.path.join(path, "x_train.npy"))
+        x_mean = np.mean(x_train, keepdims = True)
+        x_sigma = np.std(x_train, keepdims = True)
+        self.x_train = (x_train - x_mean) / x_sigma
+        self.y_train = np.load(os.path.join(path, "y_train.npy"))
+
+    def test_qrnn(self):
+        """
+        Test training of QRNNs using numpy arrays as input.
+        """
+        for backend in backends:
+            set_backend(backend)
+            qrnn = QRNN(self.x_train.shape[1],
+                        np.linspace(0.05, 0.95, 10))
+            qrnn.train((self.x_train, self.y_train),
+                       maximum_epochs = 1)
+            qrnn.predict(self.x_train)
+
+    def save_qrnn(self):
+        """
+        Test saving and loading of QRNNs.
+        """
+        qrnn = QRNN(self.x_train.shape[1],
+                    np.linspace(0.05, 0.95, 10))
+        f = tempfile.NamedTemporaryFile()
+        qrnn.save(f.name)
+        qrnn_loaded = QRNN.load(f.name)
+
+        x_pred = qrnn.predict(self.x_train)
+        x_pred_loaded = qrnn.predict(self.x_train)
+
+        if not type(x_pred) == np.ndarray:
+            x_pred = x_pred.detach()
+
+        assert(np.allclose(x_pred, x_pred_loaded))
+
+
+__file__ = "./bla.py"
+test = TestQrnn()
+test.setup_method()
+test.test_qrnn()

From c28b81db05ab20e505bc838b9f0bc4d08b2f6ec8 Mon Sep 17 00:00:00 2001
From: Simon Pfreundschuh <simon.pfreundschuh@chalmers.se>
Date: Fri, 20 Mar 2020 16:39:05 +0100
Subject: [PATCH 07/19] Added unet implementation.

---
 typhon/retrieval/qrnn/__init__.py             |   2 +-
 .../retrieval/qrnn/models/pytorch/__init__.py |  10 +
 .../models/{pytorch.py => pytorch/common.py}  |  62 +-----
 .../qrnn/models/pytorch/fully_connected.py    |  46 ++++
 typhon/retrieval/qrnn/models/pytorch/unet.py  | 209 ++++++++++++++++++
 typhon/retrieval/qrnn/qrnn.py                 |  43 +++-
 typhon/tests/retrieval/qrnn/test_qrnn.py      |  24 +-
 7 files changed, 336 insertions(+), 60 deletions(-)
 create mode 100644 typhon/retrieval/qrnn/models/pytorch/__init__.py
 rename typhon/retrieval/qrnn/models/{pytorch.py => pytorch/common.py} (88%)
 create mode 100644 typhon/retrieval/qrnn/models/pytorch/fully_connected.py
 create mode 100644 typhon/retrieval/qrnn/models/pytorch/unet.py

diff --git a/typhon/retrieval/qrnn/__init__.py b/typhon/retrieval/qrnn/__init__.py
index e37e9f8c..f69e66f9 100644
--- a/typhon/retrieval/qrnn/__init__.py
+++ b/typhon/retrieval/qrnn/__init__.py
@@ -4,4 +4,4 @@
 The module provides a flexible implementation of QRNNs for remote sensing
 retrievals.
 """
-from typhon.retrieval.qrnn.qrnn import QRNN, set_backend
+from typhon.retrieval.qrnn.qrnn import QRNN, set_backend, get_backend
diff --git a/typhon/retrieval/qrnn/models/pytorch/__init__.py b/typhon/retrieval/qrnn/models/pytorch/__init__.py
new file mode 100644
index 00000000..2a00f434
--- /dev/null
+++ b/typhon/retrieval/qrnn/models/pytorch/__init__.py
@@ -0,0 +1,10 @@
+"""
+models
+======
+
+This module provides an implementation of quantile regression neural networks (QRNNs)
+using the pytorch deep learning framework.
+"""
+from typhon.retrieval.qrnn.models.pytorch.common import BatchedDataset
+from typhon.retrieval.qrnn.models.pytorch.fully_connected import FullyConnected
+from typhon.retrieval.qrnn.models.pytorch.unet import UNet
diff --git a/typhon/retrieval/qrnn/models/pytorch.py b/typhon/retrieval/qrnn/models/pytorch/common.py
similarity index 88%
rename from typhon/retrieval/qrnn/models/pytorch.py
rename to typhon/retrieval/qrnn/models/pytorch/common.py
index a9349496..e7ec42a8 100644
--- a/typhon/retrieval/qrnn/models/pytorch.py
+++ b/typhon/retrieval/qrnn/models/pytorch/common.py
@@ -1,9 +1,8 @@
 """
-models
-======
+models.pytorch.common
+=====================
 
-This module provides an implementation of quantile regression neural networks (QRNNs)
-using the pytorch deep learning framework.
+This module provides common functionality required to realize QRNNs in pytorch.
 """
 import torch
 import numpy as np
@@ -83,11 +82,14 @@ def handle_input(data, device = None):
 
 class BatchedDataset(Dataset):
     """
-    Batches an un-bactched dataset.
+    Batches an un-batched dataset.
     """
-    def __init__(self, x, y, batch_size):
-        self.x = x
-        self.y = y
+    def __init__(self,
+                 training_data,
+                 batch_size):
+        x, y = training_data
+        self.x = torch.tensor(x, dtype=torch.float)
+        self.y = torch.tensor(y, dtype=torch.float)
         self.batch_size = batch_size
 
     def __len__(self):
@@ -193,6 +195,7 @@ def __init__(self,
         self.criterion = QuantileLoss(self.quantiles)
         self.training_errors = []
         self.validation_errors = []
+        self.backend = "typhon.retrieval.qrnn.models.typhon"
 
     def _make_adversarial_samples(self, x, y, eps):
         self.model.zero_grad()
@@ -449,46 +452,3 @@ def load(self, path):
         qrnn = QRNN(*[state[k] for k in keys])
         qrnn.load_state_dict["network_state"]
         qrnn.optimizer.load_state_dict["optimizer_state"]
-
-################################################################################
-# Fully-connected network
-################################################################################
-
-class FullyConnected(PytorchModel, nn.Sequential):
-    """
-    Pytorch implementation of a fully-connected QRNN model.
-    """
-    def __init__(self,
-                 input_dimension,
-                 quantiles,
-                 arch):
-
-        """
-        Create a fully-connected neural network.
-
-        Args:
-            input_dimension(:code:`int`): Number of input features
-            quantiles(:code:`array`): The quantiles to predict given
-                as fractions within [0, 1].
-            arch(tuple): Tuple :code:`(d, w, a)` containing :code:`d`, the
-                number of hidden layers in the network, :code:`w`, the width
-                of the network and :code:`a`, the type of activation functions
-                to be used as string.
-        """
-        PytorchModel.__init__(self, input_dimension, quantiles)
-        output_dimension = quantiles.size
-        self.arch = arch
-
-        if len(arch) == 0:
-            layers = [nn.Linear(input_dimension, output_dimension)]
-        else:
-            d, w, act = arch
-            if type(act) == str:
-                act = activations[act]
-            layers = [nn.Linear(input_dimension, w)]
-            for i in range(d - 1):
-                layers.append(nn.Linear(w, w))
-                if not act is None:
-                    layers.append(act())
-            layers.append(nn.Linear(w, output_dimension))
-        nn.Sequential.__init__(self, *layers)
diff --git a/typhon/retrieval/qrnn/models/pytorch/fully_connected.py b/typhon/retrieval/qrnn/models/pytorch/fully_connected.py
new file mode 100644
index 00000000..32a78e70
--- /dev/null
+++ b/typhon/retrieval/qrnn/models/pytorch/fully_connected.py
@@ -0,0 +1,46 @@
+from torch import nn
+from torch import optim
+from typhon.retrieval.qrnn.models.pytorch.common import PytorchModel
+
+################################################################################
+# Fully-connected network
+################################################################################
+
+class FullyConnected(PytorchModel, nn.Sequential):
+    """
+    Pytorch implementation of a fully-connected QRNN model.
+    """
+    def __init__(self,
+                 input_dimension,
+                 quantiles,
+                 arch):
+
+        """
+        Create a fully-connected neural network.
+
+        Args:
+            input_dimension(:code:`int`): Number of input features
+            quantiles(:code:`array`): The quantiles to predict given
+                as fractions within [0, 1].
+            arch(tuple): Tuple :code:`(d, w, a)` containing :code:`d`, the
+                number of hidden layers in the network, :code:`w`, the width
+                of the network and :code:`a`, the type of activation functions
+                to be used as string.
+        """
+        PytorchModel.__init__(self, input_dimension, quantiles)
+        output_dimension = quantiles.size
+        self.arch = arch
+
+        if len(arch) == 0:
+            layers = [nn.Linear(input_dimension, output_dimension)]
+        else:
+            d, w, act = arch
+            if type(act) == str:
+                act = activations[act]
+            layers = [nn.Linear(input_dimension, w)]
+            for i in range(d - 1):
+                layers.append(nn.Linear(w, w))
+                if not act is None:
+                    layers.append(act())
+            layers.append(nn.Linear(w, output_dimension))
+        nn.Sequential.__init__(self, *layers)
diff --git a/typhon/retrieval/qrnn/models/pytorch/unet.py b/typhon/retrieval/qrnn/models/pytorch/unet.py
new file mode 100644
index 00000000..09bdf179
--- /dev/null
+++ b/typhon/retrieval/qrnn/models/pytorch/unet.py
@@ -0,0 +1,209 @@
+import torch
+from torch import nn
+
+from typhon.retrieval.qrnn.models.pytorch.common import PytorchModel
+
+class Layer(nn.Sequential):
+    """
+    Basic building block of a UNet. Consists of a convolutional
+    layer followed by an activation layers and an optional batch
+    norm layer.
+
+    Args:
+        features_in(:code:`int`): Number of features of input
+        features_out(:code:`int`): Raw number of output features of the
+            layer not including skip connections.
+        batch_norm(:code:`bool`): Whether or not to include a batch norm
+            layer.
+        kernel_size(:code:`int`): Kernel size to use for the conv. layer.
+        activation(:code:`activation`): Activation to use after conv. layer.
+        skip_connection(:code:`bool`): Whether to include skip connections, i.e.
+            to include input in layer output.
+    """
+    def __init__(self,
+                 features_in,
+                 features_out,
+                 batch_norm=True,
+                 kernel_size=3,
+                 activation=nn.ReLU,
+                 skip_connection=False):
+        self._features_in = features_in
+        self._features_out = features_out
+        self.skip_connection = skip_connection
+
+        if not activation is None:
+            modules = [nn.ConstantPad2d(1, 0.0),
+                       nn.Conv2d(features_in,
+                                 features_out,
+                                 kernel_size),
+                       nn.BatchNorm2d(features_out),
+                       activation()]
+        else:
+            modules = [nn.ConstantPad2d(1, 0.0),
+                       nn.Conv2d(features_in,
+                                 features_out,
+                                 kernel_size),
+                       nn.BatchNorm2d(features_out)]
+        super().__init__(*modules)
+
+    @property
+    def features_out(self):
+        if self.skip_connection:
+            return self._features_in + self._features_out
+        else:
+            return self._features_out
+
+    def forward(self, x):
+        y = nn.Sequential.forward(self, x)
+        if self.skip_connection:
+            y = torch.cat([x, y], dim=1)
+        return y
+
+class Block(nn.Sequential):
+    """
+    A block bundles a set of layers.
+
+    Args:
+        features_in(:code:`int`): The number of input features of the block
+        features_out(:code:`int`): The number of output features of the block.
+        depth(:code:`int`): The number of layers of the block
+        activation(:code:`nn.Module`): Pytorch activation layer to
+          use. :code:`nn.ReLU` by default.
+        skip_connection(:code:`str`): Whether or not to insert skip
+          connections before all layers (:code:`"all"`) or just at
+          the end (:code:`"end"`).
+    """
+    def __init__(self,
+                 features_in,
+                 features_out,
+                 depth=2,
+                 batch_norm=True,
+                 activation=nn.ReLU,
+                 kernel_size=3,
+                 skip_connection=None):
+
+        self._features_in = features_in
+
+        if skip_connection == "all":
+            skip_connection_layer = True
+            self.skip_connection = False
+        elif skip_connection == "end":
+            skip_connection_layer = False
+            self.skip_connection = True
+        else:
+            skip_connection_layer = False
+            self.skip_connection = False
+
+        layers = []
+        nf = features_in
+        for d in range(depth):
+            layers.append(Layer(nf,
+                                features_out,
+                                activation=activation,
+                                batch_norm=batch_norm,
+                                kernel_size=kernel_size,
+                                skip_connection=skip_connection_layer))
+            nf = layers[-1].features_out
+
+        self._features_out = layers[-1].features_out
+        super().__init__(*layers)
+
+    @property
+    def features_out(self):
+        if self.skip_connection:
+            return self._features_in + self._features_out
+        else:
+            return self._features_out
+
+    def forward(self, x):
+        y = nn.Sequential.forward(self, x)
+        if self.skip_connection:
+            y = torch.cat([x, y], dim=1)
+        return y
+
+
+class DownSampler(nn.Sequential):
+    def __init__(self):
+        modules = [nn.MaxPool2d(2)]
+        super().__init__(*modules)
+
+class UpSampler(nn.Sequential):
+    def __init__(self,
+                 features_in,
+                 features_out):
+        modules = [nn.ConvTranspose2d(features_in,
+                                      features_out,
+                                      3,
+                                      padding=1,
+                                      output_padding=1,
+                                      stride=2)]
+        super().__init__(*modules)
+
+
+
+class UNet(PytorchModel, nn.Module):
+    def __init__(self,
+                 input_features,
+                 quantiles,
+                 n_features=32,
+                 n_levels=4,
+                 skip_connection=None):
+
+        nn.Module.__init__(self)
+        PytorchModel.__init__(self, input_features, quantiles)
+
+        # Down-sampling blocks
+        self.down_blocks = nn.ModuleList()
+        self.down_samplers = nn.ModuleList()
+        features_in = input_features
+        features_out = n_features
+        for i in range(n_levels - 1):
+            self.down_blocks.append(Block(features_in,
+                                          features_out,
+                                          skip_connection=skip_connection))
+            self.down_samplers.append(DownSampler())
+            features_in = self.down_blocks[-1].features_out
+            features_out = features_out * 2
+
+        self.center_block = Block(features_in,
+                                  features_out,
+                                  skip_connection=skip_connection)
+
+        self.up_blocks = nn.ModuleList()
+        self.up_samplers = nn.ModuleList()
+        features_in = self.center_block.features_out
+        features_out = features_out // 2
+        for i in range(n_levels - 1):
+            self.up_samplers.append(UpSampler(features_in,
+                                              features_out))
+            features_in = features_out + self.down_blocks[(- i - 1)].features_out
+            self.up_blocks.append(Block(features_in,
+                                        features_out,
+                                        skip_connection=skip_connection))
+            features_out = features_out // 2
+            features_in = self.up_blocks[-1].features_out
+
+        self.head = nn.Sequential(nn.Conv2d(features_in, features_in, 1),
+                                  nn.ReLU(),
+                                  nn.Conv2d(features_in, features_in, 1),
+                                  nn.ReLU(),
+                                  nn.Conv2d(features_in, quantiles.size, 1))
+
+    def forward(self, x):
+
+        features = []
+        for (b, s) in zip(self.down_blocks, self.down_samplers):
+            x = b(x)
+            features.append(x)
+            x = s(x)
+
+        x = self.center_block(x)
+
+        for (b, u, f) in zip(self.up_blocks, self.up_samplers, features[::-1]):
+            x = u(x)
+            x = torch.cat([x, f], 1)
+            x = b(x)
+
+        self.features = features
+
+        return self.head(x)
diff --git a/typhon/retrieval/qrnn/qrnn.py b/typhon/retrieval/qrnn/qrnn.py
index 9c235443..2649820f 100644
--- a/typhon/retrieval/qrnn/qrnn.py
+++ b/typhon/retrieval/qrnn/qrnn.py
@@ -50,6 +50,34 @@ def set_backend(name):
     else:
         raise Exception("\"{}\" is not a supported backend.".format(name))
 
+def get_backend(name):
+    """
+    Get module object corresponding to the short backend name.
+
+    The currently available backend are "keras" and "pytorch".
+
+    Args:
+        name(str): The name of the backend.
+    """
+    if name == "keras":
+        try:
+            import typhon.retrieval.qrnn.models.keras as keras
+            backend = keras
+        except Exception as e:
+            raise Exception("The following error occurred while trying "
+                            " to import keras: ", e)
+    elif name == "pytorch":
+        try:
+            import typhon.retrieval.qrnn.models.pytorch as pytorch
+            backend = pytorch
+        except Exception as e:
+            raise Exception("The following error occurred while trying "
+                            " to import pytorch: ", e)
+    else:
+        raise Exception("\"{}\" is not a supported backend.".format(name))
+    return backend
+
+
 def create_model(input_dim,
                  output_dim,
                  arch):
@@ -119,7 +147,7 @@ class QRNN:
     """
     def __init__(self,
                  input_dimension,
-                 quantiles,
+                 quantiles=None,
                  model=(3, 128, "relu"),
                  ensemble_size=1,
                  **kwargs):
@@ -147,6 +175,19 @@ def __init__(self,
             self.model = backend.FullyConnected(self.input_dimension,
                                                 self.quantiles,
                                                 model)
+            if quantiles is None:
+                raise ValueError("If model is given as architecture tuple, the"
+                                  " 'quantiles' kwarg must be provided.")
+        else:
+            if not quantiles is None:
+                if not quantiles == model.quantiles:
+                    raise ValueError("Provided quantiles do not match those of "
+                                     "the provided model.")
+
+            self.model = model
+            self.quantiles = model.quantiles
+            self.backend = model.backend
+
 
     def cross_validation(self,
                         x_train,
diff --git a/typhon/tests/retrieval/qrnn/test_qrnn.py b/typhon/tests/retrieval/qrnn/test_qrnn.py
index 3dea2241..a9975c50 100644
--- a/typhon/tests/retrieval/qrnn/test_qrnn.py
+++ b/typhon/tests/retrieval/qrnn/test_qrnn.py
@@ -1,7 +1,8 @@
-from typhon.retrieval.qrnn import QRNN, set_backend
+from typhon.retrieval.qrnn import QRNN, set_backend, get_backend
 import numpy as np
 import os
 import tempfile
+import importlib
 
 #
 # Import available backends.
@@ -43,6 +44,21 @@ def test_qrnn(self):
                        maximum_epochs = 1)
             qrnn.predict(self.x_train)
 
+    def test_qrnn_datasets(self):
+        """
+        Provide data as dataset object instead of numpy arrays.
+        """
+        for backend in backends:
+            set_backend(backend)
+            backend = get_backend(backend)
+            data = backend.BatchedDataset((self.x_train, self.y_train),
+                                          256)
+            qrnn = QRNN(self.x_train.shape[1],
+                        np.linspace(0.05, 0.95, 10))
+            qrnn.train(data,
+                       maximum_epochs = 1)
+
+
     def save_qrnn(self):
         """
         Test saving and loading of QRNNs.
@@ -60,9 +76,3 @@ def save_qrnn(self):
             x_pred = x_pred.detach()
 
         assert(np.allclose(x_pred, x_pred_loaded))
-
-
-__file__ = "./bla.py"
-test = TestQrnn()
-test.setup_method()
-test.test_qrnn()

From af1bda9d3fafa1a3148594f6e44fdfcfe0274e5f Mon Sep 17 00:00:00 2001
From: Simon Pfreundschuh <simon.pfreundschuh@chalmers.se>
Date: Mon, 23 Mar 2020 15:57:27 +0100
Subject: [PATCH 08/19] Added classification method to QRNN class.

---
 .../retrieval/qrnn/models/pytorch/__init__.py |  4 ++-
 .../retrieval/qrnn/models/pytorch/common.py   |  7 +++--
 .../qrnn/models/pytorch/fully_connected.py    |  3 ++-
 typhon/retrieval/qrnn/qrnn.py                 | 27 ++++++++++++++++++-
 4 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/typhon/retrieval/qrnn/models/pytorch/__init__.py b/typhon/retrieval/qrnn/models/pytorch/__init__.py
index 2a00f434..430fd8bb 100644
--- a/typhon/retrieval/qrnn/models/pytorch/__init__.py
+++ b/typhon/retrieval/qrnn/models/pytorch/__init__.py
@@ -5,6 +5,8 @@
 This module provides an implementation of quantile regression neural networks (QRNNs)
 using the pytorch deep learning framework.
 """
-from typhon.retrieval.qrnn.models.pytorch.common import BatchedDataset
+from typhon.retrieval.qrnn.models.pytorch.common import (BatchedDataset,
+                                                         save_model,
+                                                         load_model)
 from typhon.retrieval.qrnn.models.pytorch.fully_connected import FullyConnected
 from typhon.retrieval.qrnn.models.pytorch.unet import UNet
diff --git a/typhon/retrieval/qrnn/models/pytorch/common.py b/typhon/retrieval/qrnn/models/pytorch/common.py
index e7ec42a8..0999473c 100644
--- a/typhon/retrieval/qrnn/models/pytorch/common.py
+++ b/typhon/retrieval/qrnn/models/pytorch/common.py
@@ -195,7 +195,7 @@ def __init__(self,
         self.criterion = QuantileLoss(self.quantiles)
         self.training_errors = []
         self.validation_errors = []
-        self.backend = "typhon.retrieval.qrnn.models.typhon"
+        self.backend = "typhon.retrieval.qrnn.models.pytorch"
 
     def _make_adversarial_samples(self, x, y, eps):
         self.model.zero_grad()
@@ -283,11 +283,10 @@ def train(self, *args, **kwargs):
         #
         try:
             x, y = handle_input(training_data, device)
-            training_data = BatchedDataset(x, y, batch_size)
+            training_data = BatchedDataset((x, y), batch_size)
         except:
             pass
 
-
         self.train()
         self.optimizer = optim.SGD(self.parameters(),
                                    lr = initial_learning_rate,
@@ -372,7 +371,7 @@ def predict(self, x, gpu=False):
             device = torch.device("cpu")
         x = handle_input(x, device)
         self.to(device)
-        return self(x).detach()
+        return self(x).detach().numpy()
 
     def calibration(self,
                     data,
diff --git a/typhon/retrieval/qrnn/models/pytorch/fully_connected.py b/typhon/retrieval/qrnn/models/pytorch/fully_connected.py
index 32a78e70..8a659e35 100644
--- a/typhon/retrieval/qrnn/models/pytorch/fully_connected.py
+++ b/typhon/retrieval/qrnn/models/pytorch/fully_connected.py
@@ -1,6 +1,7 @@
 from torch import nn
 from torch import optim
-from typhon.retrieval.qrnn.models.pytorch.common import PytorchModel
+from typhon.retrieval.qrnn.models.pytorch.common import (PytorchModel,
+                                                         activations)
 
 ################################################################################
 # Fully-connected network
diff --git a/typhon/retrieval/qrnn/qrnn.py b/typhon/retrieval/qrnn/qrnn.py
index 2649820f..4cab3394 100644
--- a/typhon/retrieval/qrnn/qrnn.py
+++ b/typhon/retrieval/qrnn/qrnn.py
@@ -598,9 +598,34 @@ def save(self, path):
         f = open(path, "wb")
         pickle.dump(self, f)
         backend = importlib.import_module(self.backend)
-        backend.save_model(self.model, f)
+        backend.save_model(f, self.model)
         f.close()
 
+    def classify(self, x, threshold):
+        """
+        Classify output based on posterior PDF and given numeric threshold.
+
+        Args:
+            x: The input data as :code:`np.ndarray` or backend-specific
+               dataset object.
+            threshold: The numeric threshold to apply for classification.
+        """
+        y = self.predict(x)
+        out_shape = y.shape[:1] + (1,) + y.shape[2:]
+        c = self.quantiles[0] * np.ones(out_shape)
+
+        for i in range(self.quantiles.size - 1):
+            q_l = y[:, [i]]
+            q_r = y[:, [i+1]]
+            inds = np.logical_and(q_l < threshold,
+                                  q_r >= threshold)
+            c[inds] = self.quantiles[i] * (threshold - q_l[inds])
+            c[inds] += self.quantiles[i + 1] * (q_r[inds] - threshold)
+            c[inds] /= (q_r[inds] - q_l[inds])
+
+        c[threshold > q_r] = self.quantiles[-1]
+        return 1.0 - c
+
     @staticmethod
     def load(path):
         r"""

From bffdbd2c3921b2e35fb51c86a1201abac4b9276c Mon Sep 17 00:00:00 2001
From: Simon Pfreundschuh <simon.pfreundschuh@chalmers.se>
Date: Tue, 2 Jun 2020 13:05:27 +0200
Subject: [PATCH 09/19] Make posterior estimation work on batches.

---
 .../retrieval/qrnn/models/pytorch/common.py   |  7 +--
 typhon/retrieval/qrnn/qrnn.py                 | 43 +++++++++++--------
 2 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/typhon/retrieval/qrnn/models/pytorch/common.py b/typhon/retrieval/qrnn/models/pytorch/common.py
index 0999473c..0dfbdfae 100644
--- a/typhon/retrieval/qrnn/models/pytorch/common.py
+++ b/typhon/retrieval/qrnn/models/pytorch/common.py
@@ -341,7 +341,6 @@ def train(self, *args, **kwargs):
             val_err = 0.0
             n = 0
             if not validation_data is None:
-                print(validation_data)
                 for x, y in validation_data:
                     x = x.to(device)
                     y = y.to(device)
@@ -355,7 +354,9 @@ def train(self, *args, **kwargs):
 
                     val_err += c.item() * x.size()[0]
                     n += x.size()[0]
-            validation_errors.append(val_err)
+                validation_errors.append(val_err / n)
+                iterator.set_postfix({"Validation errror" : val_err / n})
+                iterator.update()
 
         self.training_errors += training_errors
         self.validation_errors += validation_errors
@@ -410,7 +411,7 @@ def calibration(self,
             shape = (shape[0], 1) + shape[2:]
             y = y.reshape(shape)
 
-            y_pred = self.predict(x)
+            y_pred = self(x)
             y_pred = y_pred.cpu()
             y = y.cpu()
 
diff --git a/typhon/retrieval/qrnn/qrnn.py b/typhon/retrieval/qrnn/qrnn.py
index 4cab3394..c9da8e8c 100644
--- a/typhon/retrieval/qrnn/qrnn.py
+++ b/typhon/retrieval/qrnn/qrnn.py
@@ -301,20 +301,20 @@ def train(self,
               maximum_epochs = 200,
               training_split = 0.9,
               gpu = True):
-        self.model.train(training_data,
-                         validation_data,
-                         batch_size,
-                         sigma_noise,
-                         adversarial_training,
-                         delta_at,
-                         initial_learning_rate,
-                         momentum,
-                         convergence_epochs,
-                         learning_rate_decay,
-                         learning_rate_minimum,
-                         maximum_epochs,
-                         training_split,
-                         gpu)
+        return self.model.train(training_data,
+                                validation_data,
+                                batch_size,
+                                sigma_noise,
+                                adversarial_training,
+                                delta_at,
+                                initial_learning_rate,
+                                momentum,
+                                convergence_epochs,
+                                learning_rate_decay,
+                                learning_rate_minimum,
+                                maximum_epochs,
+                                training_split,
+                                gpu)
 
     def predict(self, x):
         r"""
@@ -365,10 +365,15 @@ def cdf(self, x):
             values of the posterior CDF :math: `F(x)` in `fs`.
 
         """
-        y_pred = np.zeros(self.quantiles.size + 2)
-        y_pred[1:-1] = self.predict(x)
-        y_pred[0] = 2.0 * y_pred[1] - y_pred[2]
-        y_pred[-1] = 2.0 * y_pred[-2] - y_pred[-3]
+        if len(x.shape) > 1:
+            s = x.shape[:-1] + (self.quantiles.size + 2,)
+        else:
+            s = (1, self.quantiles.size + 2)
+
+        y_pred = np.zeros(s)
+        y_pred[:, 1:-1] = self.predict(x)
+        y_pred[:, 0] = 2.0 * y_pred[:, 1] - y_pred[:, 2]
+        y_pred[:, -1] = 2.0 * y_pred[:, -2] - y_pred[:, -3]
 
         qs = np.zeros(self.quantiles.size + 2)
         qs[1:-1] = self.quantiles
@@ -502,7 +507,7 @@ def posterior_mean(self, x):
             Array containing the posterior means for the provided inputs.
         """
         y_pred, qs = self.cdf(x)
-        mus = y_pred[-1] - np.trapz(qs, x=y_pred)
+        mus = y_pred[:, -1] - np.trapz(qs, x=y_pred)
         return mus
 
     @staticmethod

From 18015eb9bfc4c298aa6202b1c720e8d3e99b9ad8 Mon Sep 17 00:00:00 2001
From: Simon Pfreundschuh <simon.pfreundschuh@chalmers.se>
Date: Tue, 2 Jun 2020 16:03:38 +0200
Subject: [PATCH 10/19] Load model to cpu.

---
 typhon/retrieval/qrnn/models/pytorch/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/typhon/retrieval/qrnn/models/pytorch/common.py b/typhon/retrieval/qrnn/models/pytorch/common.py
index 0dfbdfae..6a297f5b 100644
--- a/typhon/retrieval/qrnn/models/pytorch/common.py
+++ b/typhon/retrieval/qrnn/models/pytorch/common.py
@@ -447,7 +447,7 @@ def load(self, path):
         Arguments:
             path: Path of the file where the QRNN was stored.
         """
-        state = torch.load(path)
+        state = torch.load(path, map_location=torch.device("cpu"))
         keys = ["input_dimension", "quantiles", "depth", "width", "activation"]
         qrnn = QRNN(*[state[k] for k in keys])
         qrnn.load_state_dict["network_state"]

From 3e692f7919ed32381d4f094e5944a177d93e051a Mon Sep 17 00:00:00 2001
From: Simon Pfreundschuh <simon.pfreundschuh@chalmers.se>
Date: Thu, 25 Jun 2020 18:19:36 +0200
Subject: [PATCH 11/19] Removed tqdm status bar to make output cleaner.

---
 typhon/retrieval/qrnn/backends/pytorch.py     | 12 +++++++
 .../retrieval/qrnn/models/pytorch/common.py   | 34 +++++++++++++++----
 2 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/typhon/retrieval/qrnn/backends/pytorch.py b/typhon/retrieval/qrnn/backends/pytorch.py
index da1d6846..f3dab8c9 100644
--- a/typhon/retrieval/qrnn/backends/pytorch.py
+++ b/typhon/retrieval/qrnn/backends/pytorch.py
@@ -198,11 +198,23 @@ def train(self,
 
         self.criterion.to(dev)
 
+        def clear_output():
+            print("clreaing output")
+            try:
+                import IPython
+                from IPython.display import clear_output
+                clear_output(wait=True)
+            except:
+                print("failed loading Ipython")
+                pass
+
         for i in range(n_epochs):
+            clear_output()
 
             err = 0.0
             n = 0
             iterator = tqdm(enumerate(training_data), total = len(training_data))
+            iterator.set_description(f"Training epoch {i}/{n_epochs}, lr = {self.optimizer.lr}")
             for j, (x, y) in iterator:
                 x = x.to(dev)
                 y = y.to(dev)
diff --git a/typhon/retrieval/qrnn/models/pytorch/common.py b/typhon/retrieval/qrnn/models/pytorch/common.py
index 6a297f5b..c2b70929 100644
--- a/typhon/retrieval/qrnn/models/pytorch/common.py
+++ b/typhon/retrieval/qrnn/models/pytorch/common.py
@@ -10,7 +10,7 @@
 from torch import optim
 from torch.optim.lr_scheduler import ReduceLROnPlateau
 from torch.utils.data import Dataset
-from tqdm.auto import tqdm
+from tqdm import tqdm
 
 activations = {"elu" : nn.ELU,
                "hardshrink" : nn.Hardshrink,
@@ -299,6 +299,20 @@ def train(self, *args, **kwargs):
         training_errors = []
         validation_errors = []
 
+        #
+        # Function to clear output if we are in a notebook.
+        #
+
+        def clear_output():
+            print("clreaing output")
+            try:
+                import IPython
+                from IPython.display import clear_output
+                clear_output(wait=True)
+            except:
+                print("failed loading Ipython")
+                pass
+
         #
         # Training loop
         #
@@ -306,8 +320,9 @@ def train(self, *args, **kwargs):
         for i in range(maximum_epochs):
             err = 0.0
             n = 0
-            iterator = tqdm(enumerate(training_data), total = len(training_data))
-            for j, (x, y) in iterator:
+            #clear_output()
+            for j, (x, y) in enumerate(training_data):
+
                 x = x.to(device)
                 y = y.to(device)
 
@@ -332,8 +347,11 @@ def train(self, *args, **kwargs):
                     c.backward()
                     self.optimizer.step()
 
+
                 if j % 100:
-                    iterator.set_postfix({"Training errror" : err / n})
+                    print("Epoch {} / {}: Batch {} / {}, Training error: {:.3f}"
+                          .format(i, maximum_epochs, j, len(training_data), err / n),
+                          end="\r")
 
             # Save training error
             training_errors.append(err / n)
@@ -355,8 +373,12 @@ def train(self, *args, **kwargs):
                     val_err += c.item() * x.size()[0]
                     n += x.size()[0]
                 validation_errors.append(val_err / n)
-                iterator.set_postfix({"Validation errror" : val_err / n})
-                iterator.update()
+                print("Epoch {} / {}: Training error: {:.3f}, Validation error: {:.3f}, Learning rate: {:.5f}"
+                      .format(i, maximum_epochs, training_errors[-1], validation_errors[-1], self.optimizer.defaults["lr"]))
+                scheduler.step(val_err / n)
+            else:
+                scheduler.step(err / n)
+
 
         self.training_errors += training_errors
         self.validation_errors += validation_errors

From 713a1675502d4590818a061651a9154ba7a8ac80 Mon Sep 17 00:00:00 2001
From: Simon Pfreundschuh <simon.pfreundschuh@chalmers.se>
Date: Mon, 29 Jun 2020 11:53:22 +0200
Subject: [PATCH 12/19] Print correct learning rate.

---
 .../retrieval/qrnn/models/pytorch/common.py   | 24 ++++++++++++-------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/typhon/retrieval/qrnn/models/pytorch/common.py b/typhon/retrieval/qrnn/models/pytorch/common.py
index c2b70929..bf6dca88 100644
--- a/typhon/retrieval/qrnn/models/pytorch/common.py
+++ b/typhon/retrieval/qrnn/models/pytorch/common.py
@@ -293,9 +293,9 @@ def train(self, *args, **kwargs):
                                    momentum = momentum)
         self.criterion.to(device)
         scheduler = ReduceLROnPlateau(self.optimizer,
-                                            factor=1.0 / learning_rate_decay,
-                                            patience=convergence_epochs,
-                                            min_lr=learning_rate_minimum)
+                                      factor=1.0 / learning_rate_decay,
+                                      patience=convergence_epochs,
+                                      min_lr=learning_rate_minimum)
         training_errors = []
         validation_errors = []
 
@@ -356,12 +356,14 @@ def clear_output():
             # Save training error
             training_errors.append(err / n)
 
+            lr = [group['lr'] for group in self.optimizer.param_groups][0]
+
             val_err = 0.0
             n = 0
             if not validation_data is None:
                 for x, y in validation_data:
-                    x = x.to(device)
-                    y = y.to(device)
+                    x = x.to(device).detach()
+                    y = y.to(device).detach()
 
                     shape = x.size()
                     shape = (shape[0], 1) + shape[2:]
@@ -373,11 +375,15 @@ def clear_output():
                     val_err += c.item() * x.size()[0]
                     n += x.size()[0]
                 validation_errors.append(val_err / n)
+
+
                 print("Epoch {} / {}: Training error: {:.3f}, Validation error: {:.3f}, Learning rate: {:.5f}"
-                      .format(i, maximum_epochs, training_errors[-1], validation_errors[-1], self.optimizer.defaults["lr"]))
+                      .format(i, maximum_epochs, training_errors[-1], validation_errors[-1], lr))
                 scheduler.step(val_err / n)
             else:
                 scheduler.step(err / n)
+                print("Epoch {} / {}: Training error: {:.3f}, Learning rate: {:.5f}"
+                      .format(i, maximum_epochs, training_errors[-1], lr))
 
 
         self.training_errors += training_errors
@@ -394,7 +400,7 @@ def predict(self, x, gpu=False):
             device = torch.device("cpu")
         x = handle_input(x, device)
         self.to(device)
-        return self(x).detach().numpy()
+        return self(x.detach()).detach().numpy()
 
     def calibration(self,
                     data,
@@ -427,8 +433,8 @@ def calibration(self,
 
         iterator = tqdm(data)
         for x, y in iterator:
-            x = x.to(dev)
-            y = y.to(dev)
+            x = x.to(dev).detach()
+            y = y.to(dev).detach()
             shape = x.size()
             shape = (shape[0], 1) + shape[2:]
             y = y.reshape(shape)

From 6defb7869595415a86deebe45a7f9a8f3be69579 Mon Sep 17 00:00:00 2001
From: Simon Pfreundschuh <simon.pfreundschuh@chalmers.se>
Date: Sun, 27 Sep 2020 21:40:56 +0200
Subject: [PATCH 13/19] Refactoring, documentation and testing.

---
 doc/typhon.retrieval.qrnn.rst                 |  48 +-
 typhon/retrieval/qrnn/models/keras.py         | 179 +++---
 .../retrieval/qrnn/models/pytorch/__init__.py |  16 +-
 .../retrieval/qrnn/models/pytorch/common.py   | 195 ++++---
 .../qrnn/models/pytorch/fully_connected.py    |  10 +-
 typhon/retrieval/qrnn/models/pytorch/unet.py  | 137 ++---
 typhon/retrieval/qrnn/qrnn.py                 | 550 ++++++++----------
 typhon/tests/retrieval/qrnn/test_qrnn.py      |  69 ++-
 8 files changed, 589 insertions(+), 615 deletions(-)

diff --git a/doc/typhon.retrieval.qrnn.rst b/doc/typhon.retrieval.qrnn.rst
index 4915733b..1fa24dcd 100644
--- a/doc/typhon.retrieval.qrnn.rst
+++ b/doc/typhon.retrieval.qrnn.rst
@@ -19,22 +19,12 @@ The QRNN implementation consists of two-layers:
 The QRNN class
 --------------
 
-The :py:class:`~typhon.retrieval.qrnn.QRNN` class provides the high-level interface for QRNNs. If you want
-to train a fully-connected QRNN, this is all you need. The class itself
-implments generic functionality related to the evaluation of QRNNs and the post
-processing of results such as computing the PSD or the posterior mean. For the
-rest it acts as a wrapper around its model attribute, which encapsules all
-network- and DL-framework-specific code.
-
-API documentation
-^^^^^^^^^^^^^^^^^
-
-.. automodule:: typhon.retrieval.qrnn.qrnn
-.. currentmodule:: typhon.retrieval.qrnn.qrnn
-.. autosummary::
-   :toctree: generated
-
-   QRNN
+The :py:class:`~typhon.retrieval.qrnn.QRNN` class provides the high-level
+interface for QRNNs. This is all that is required to train a plain,
+fully-connected QRNN. The class itself implments generic functionality related
+to the evaluation of QRNNs and the post processing of results such as computing
+the PSD or the posterior mean. For the rest it acts as a wrapper around its
+model attribute, which encapsules all network- and DL-framework-specific code.
 
 Backends
 --------
@@ -44,23 +34,35 @@ are supported as backends for neural network. The QRNN implementation will
 automatically use the one the is available on your system. If both are available
 you can choose a specific backend using the :py:meth:`~typhon.retrieval.qrnn.set_backend` function.
 
+Neural network models
+---------------------
+
+The :py:class:`typhon.retrieva.qrnn.QRNN` has designed to work with any generic
+regression neural network model. This aim of this was to make the implementation
+sufficiently flexible to allow special network architectures or customization of
+the training process.
+
+This gives the user the flexibility to design custom NN models in pytorch
+or Keras and use them with the ``QRNN`` class. Some predefined architectures
+are defined in the :py:mod:`typhon.retrieval.qrnn.models` submodule.
 
 API documentation
-^^^^^^^^^^^^^^^^^
+-----------------
 
 .. automodule:: typhon.retrieval.qrnn.qrnn
 .. currentmodule:: typhon.retrieval.qrnn.qrnn
 .. autosummary::
    :toctree: generated
 
-   set_backend
-
-Neural network models
----------------------
+   QRNN
 
+.. automodule:: typhon.retrieval.qrnn.models.pytorch
+.. currentmodule:: typhon.retrieval.qrnn.models.pytorch
+.. autosummary::
+   :toctree: generated
 
-Pytorch
-^^^^^^^
+   FullyConnected
+   UNet
 
 .. automodule:: typhon.retrieval.qrnn.models.pytorch
 .. currentmodule:: typhon.retrieval.qrnn.models.pytorch
diff --git a/typhon/retrieval/qrnn/models/keras.py b/typhon/retrieval/qrnn/models/keras.py
index 28b7aef7..f1b31970 100644
--- a/typhon/retrieval/qrnn/models/keras.py
+++ b/typhon/retrieval/qrnn/models/keras.py
@@ -1,3 +1,10 @@
+"""
+typhon.retrieval.qrnn.models.keras
+==================================
+
+This module provides Keras neural network models that can be used as backend
+models with the :py:class:`typhon.retrieval.qrnn.QRNN` class.
+"""
 import numpy as np
 import keras
 from keras.models import Sequential
@@ -18,6 +25,7 @@ def save_model(f, model):
     """
     keras.models.save_model(model, f)
 
+
 def load_model(f, quantiles):
     """
     Load keras model.
@@ -35,28 +43,27 @@ def load_model(f, quantiles):
     # This is a bit hacky but seems required to handle
     # the custom model classes.
     #
-    def make_fully_connected(layers = None,
-                             **kwargs):
+    def make_fully_connected(layers=None, **kwargs):
         layers = list(map(deserialize, layers))
         input_dimensions = layers[0].batch_input_shape[1]
-        return FullyConnected(input_dimensions,
-                              quantiles,
-                              (),
-                              layers)
-    custom_objects = {"FullyConnected" : make_fully_connected,
-                      "QuantileLoss" : QuantileLoss}
+        return FullyConnected(input_dimensions, quantiles, (), layers)
+
+    custom_objects = {
+        "FullyConnected": make_fully_connected,
+        "QuantileLoss": QuantileLoss,
+    }
     model = keras.models.load_model(f, custom_objects=custom_objects)
     return model
 
+
 ################################################################################
 # Quantile loss
 ################################################################################
 
 logger = logging.getLogger(__name__)
 
-def skewed_absolute_error(y_true,
-                          y_pred,
-                          tau):
+
+def skewed_absolute_error(y_true, y_pred, tau):
     """
     The quantile loss function for a given quantile tau:
 
@@ -73,12 +80,9 @@ def quantile_loss(y_true, y_pred, taus):
     The quantiles loss for a list of quantiles. Sums up the error contribution
     from the each of the quantile loss functions.
     """
-    e = skewed_absolute_error(
-        K.flatten(y_true), K.flatten(y_pred[:, 0]), taus[0])
+    e = skewed_absolute_error(K.flatten(y_true), K.flatten(y_pred[:, 0]), taus[0])
     for i, tau in enumerate(taus[1:]):
-        e += skewed_absolute_error(K.flatten(y_true),
-                                   K.flatten(y_pred[:, i + 1]),
-                                   tau)
+        e += skewed_absolute_error(K.flatten(y_true), K.flatten(y_pred[:, i + 1]), tau)
     return e
 
 
@@ -113,17 +117,18 @@ def __init__(self):
     def train(self):
         pass
 
+
 ################################################################################
 # Keras data generators
 ################################################################################
 
+
 class BatchedDataset:
     """
     Keras data loader that batches a given dataset of numpy arryas.
     """
-    def __init__(self,
-                 training_data,
-                 batch_size):
+
+    def __init__(self, training_data, batch_size):
         """
         Create batched dataset.
 
@@ -147,9 +152,9 @@ def __len__(self):
         return self.x.shape[0] // self.bs
 
     def __next__(self):
-        inds = self.indices[np.arange(self.i * self.bs,
-                                      (self.i + 1) * self.bs)
-                            % self.indices.size]
+        inds = self.indices[
+            np.arange(self.i * self.bs, (self.i + 1) * self.bs) % self.indices.size
+        ]
         x_batch = np.copy(self.x[inds, :])
         y_batch = self.y[inds]
         self.i = self.i + 1
@@ -159,6 +164,7 @@ def __next__(self):
 
         return (x_batch, y_batch)
 
+
 class TrainingGenerator:
     """
     This Keras sample generator takes a generator for noise-free training data
@@ -169,9 +175,8 @@ class TrainingGenerator:
         sigma_noise: A vector containing the standard deviation of each
                  component.
     """
-    def __init__(self,
-                 training_data,
-                 sigma_noise = None):
+
+    def __init__(self, training_data, sigma_noise=None):
         """
         Args:
             training_data: Data generator providing the original (noise-free)
@@ -195,6 +200,7 @@ def __next__(self):
             x_batch += np.random.randn(*x_batch.shape) * self.sigma_noise
         return (x_batch, y_batch)
 
+
 class AdversarialTrainingGenerator:
     """
     This Keras sample generator takes the noise-free training data
@@ -208,10 +214,8 @@ class AdversarialTrainingGenerator:
             network
         eps: The perturbation factor.
     """
-    def __init__(self,
-                 training_data,
-                 input_gradients,
-                 eps):
+
+    def __init__(self, training_data, input_gradients, eps):
         """
         Args:
             training_data: Training generator to use to generate the input
@@ -245,6 +249,7 @@ def __next__(self):
         self.i = self.i + 1
         return x_batch, y_batch
 
+
 class ValidationGenerator:
     """
     This Keras sample generator is similar to the training generator
@@ -262,9 +267,7 @@ class ValidationGenerator:
                  component.
     """
 
-    def __init__(self,
-                 validation_data,
-                 sigma_noise):
+    def __init__(self, validation_data, sigma_noise):
         self.validation_data = validation_data
         self.sigma_noise = sigma_noise
 
@@ -277,10 +280,12 @@ def __next__(self):
             x_val += np.random.randn(*self.x_val.shape) * self.sigma_noise
         return (x_val, self.y_val)
 
+
 ################################################################################
 # LRDecay
 ################################################################################
 
+
 class LRDecay(keras.callbacks.Callback):
     """
     The LRDecay class implements the Keras callback interface and reduces
@@ -310,7 +315,7 @@ def on_train_begin(self, logs={}):
         self.min_loss = 1e30
 
     def on_epoch_end(self, epoch, logs={}):
-        loss = logs.get('val_loss')
+        loss = logs.get("val_loss")
         if loss is None:
             loss = logs.get("loss")
         self.losses += [loss]
@@ -320,8 +325,7 @@ def on_epoch_end(self, epoch, logs={}):
             self.steps = 0
         if self.steps > self.convergence_steps:
             lr = keras.backend.get_value(self.model.optimizer.lr)
-            keras.backend.set_value(
-                self.model.optimizer.lr, lr / self.lr_decay)
+            keras.backend.set_value(self.model.optimizer.lr, lr / self.lr_decay)
             self.steps = 0
             logger.info("\n Reduced learning rate to " + str(lr))
 
@@ -330,10 +334,12 @@ def on_epoch_end(self, epoch, logs={}):
 
         self.min_loss = min(self.min_loss, self.losses[-1])
 
+
 ################################################################################
 # QRNN
 ################################################################################
 
+
 class KerasModel:
     r"""
     Base class for Keras models.
@@ -362,9 +368,7 @@ class KerasModel:
             neural network.
     """
 
-    def __init__(self,
-                 input_dimension,
-                 quantiles):
+    def __init__(self, input_dimension, quantiles):
         """
         Create a QRNN model.
 
@@ -379,33 +383,40 @@ def __init__(self,
         self.input_dimension = input_dimension
         self.quantiles = np.array(quantiles)
 
-
-    def train(self,
-              training_data,
-              validation_data = None,
-              batch_size = 256,
-              sigma_noise=None,
-              adversarial_training = False,
-              delta_at = 0.01,
-              initial_learning_rate = 1e-2,
-              momentum = 0.0,
-              convergence_epochs = 5,
-              learning_rate_decay = 2.0,
-              learning_rate_minimum = 1e-6,
-              maximum_epochs = 200,
-              training_split = 0.9,
-              gpu = False):
+    def reset(self):
+        """
+        Reinitialize the state of the model.
+        """
+        self.reset_states()
+
+    def train(
+        self,
+        training_data,
+        validation_data=None,
+        batch_size=256,
+        sigma_noise=None,
+        adversarial_training=False,
+        delta_at=0.01,
+        initial_learning_rate=1e-2,
+        momentum=0.0,
+        convergence_epochs=5,
+        learning_rate_decay=2.0,
+        learning_rate_minimum=1e-6,
+        maximum_epochs=200,
+        training_split=0.9,
+        gpu=False,
+    ):
 
         if type(training_data) == tuple:
             if not type(training_data[0]) == np.ndarray:
-                raise ValueError("When training data is provided as tuple"
-                                 " (x, y) it must contain numpy arrays.")
-            training_data = BatchedDataset(training_data,
-                                           batch_size)
+                raise ValueError(
+                    "When training data is provided as tuple"
+                    " (x, y) it must contain numpy arrays."
+                )
+            training_data = BatchedDataset(training_data, batch_size)
 
         if type(validation_data) is tuple:
-            validation_data = BatchedDataset(validation_data,
-                                             batch_size)
+            validation_data = BatchedDataset(validation_data, batch_size)
 
         loss = QuantileLoss(self.quantiles)
 
@@ -417,48 +428,44 @@ def train(self,
         #
         # Setup training generator
         #
-        training_generator = TrainingGenerator(training_data,
-                                               sigma_noise)
+        training_generator = TrainingGenerator(training_data, sigma_noise)
         if adversarial_training:
-            inputs = [self.input,
-                      self.targets[0],
-                      self.sample_weights[0]]
+            inputs = [self.input, self.targets[0], self.sample_weights[0]]
             input_gradients = K.function(
-                inputs, K.gradients(self.total_loss, self.input))
-            training_generator = AdversarialTrainingGenerator(training_generator,
-                                                              input_gradients,
-                                                              delta_at)
+                inputs, K.gradients(self.total_loss, self.input)
+            )
+            training_generator = AdversarialTrainingGenerator(
+                training_generator, input_gradients, delta_at
+            )
 
         if validation_data is None:
             validation_generator = None
         else:
-            validation_generator = ValidationGenerator(validation_data,
-                                                       sigma_noise)
-        lr_callback = LRDecay(self,
-                              learning_rate_decay,
-                              learning_rate_minimum,
-                              convergence_epochs)
-        self.fit_generator(training_generator,
-                           steps_per_epoch=len(training_generator),
-                           epochs=maximum_epochs,
-                           validation_data=validation_generator,
-                           validation_steps=1,
-                           callbacks=[lr_callback])
+            validation_generator = ValidationGenerator(validation_data, sigma_noise)
+        lr_callback = LRDecay(
+            self, learning_rate_decay, learning_rate_minimum, convergence_epochs
+        )
+        self.fit_generator(
+            training_generator,
+            steps_per_epoch=len(training_generator),
+            epochs=maximum_epochs,
+            validation_data=validation_generator,
+            validation_steps=1,
+            callbacks=[lr_callback],
+        )
 
 
 ################################################################################
 # Fully-connected network
 ################################################################################
 
+
 class FullyConnected(KerasModel, Sequential):
     """
     Keras implementation of fully-connected networks.
     """
-    def __init__(self,
-                 input_dimension,
-                 quantiles,
-                 arch,
-                 layers = None):
+
+    def __init__(self, input_dimension, quantiles, arch, layers=None):
         """
         Create a fully-connected neural network.
 
diff --git a/typhon/retrieval/qrnn/models/pytorch/__init__.py b/typhon/retrieval/qrnn/models/pytorch/__init__.py
index 430fd8bb..eb2830eb 100644
--- a/typhon/retrieval/qrnn/models/pytorch/__init__.py
+++ b/typhon/retrieval/qrnn/models/pytorch/__init__.py
@@ -1,12 +1,14 @@
 """
-models
-======
+typhon.retrieval.qrnn.models.pytorch
+====================================
 
-This module provides an implementation of quantile regression neural networks (QRNNs)
-using the pytorch deep learning framework.
+This model provides Pytorch neural network models that can be used a backend
+models for the :py:class:`typhon.retrieval.qrnn.QRNN` class.
 """
-from typhon.retrieval.qrnn.models.pytorch.common import (BatchedDataset,
-                                                         save_model,
-                                                         load_model)
+from typhon.retrieval.qrnn.models.pytorch.common import (
+    BatchedDataset,
+    save_model,
+    load_model,
+)
 from typhon.retrieval.qrnn.models.pytorch.fully_connected import FullyConnected
 from typhon.retrieval.qrnn.models.pytorch.unet import UNet
diff --git a/typhon/retrieval/qrnn/models/pytorch/common.py b/typhon/retrieval/qrnn/models/pytorch/common.py
index bf6dca88..17300fa0 100644
--- a/typhon/retrieval/qrnn/models/pytorch/common.py
+++ b/typhon/retrieval/qrnn/models/pytorch/common.py
@@ -12,16 +12,18 @@
 from torch.utils.data import Dataset
 from tqdm import tqdm
 
-activations = {"elu" : nn.ELU,
-               "hardshrink" : nn.Hardshrink,
-               "hardtanh" : nn.Hardtanh,
-               "prelu" : nn.PReLU,
-               "relu" : nn.ReLU,
-               "selu" : nn.SELU,
-               "celu" : nn.CELU,
-               "sigmoid" : nn.Sigmoid,
-               "softplus" : nn.Softplus,
-               "softmin" : nn.Softmin}
+activations = {
+    "elu": nn.ELU,
+    "hardshrink": nn.Hardshrink,
+    "hardtanh": nn.Hardtanh,
+    "prelu": nn.PReLU,
+    "relu": nn.ReLU,
+    "selu": nn.SELU,
+    "celu": nn.CELU,
+    "sigmoid": nn.Sigmoid,
+    "softplus": nn.Softplus,
+    "softmin": nn.Softmin,
+}
 
 
 def save_model(f, model):
@@ -35,6 +37,7 @@ def save_model(f, model):
     """
     torch.save(model, f)
 
+
 def load_model(f, quantiles):
     """
     Load pytorch model.
@@ -51,7 +54,8 @@ def load_model(f, quantiles):
     model = torch.load(f)
     return model
 
-def handle_input(data, device = None):
+
+def handle_input(data, device=None):
     """
     Handle input data.
 
@@ -80,13 +84,13 @@ def handle_input(data, device = None):
     else:
         return data
 
+
 class BatchedDataset(Dataset):
     """
     Batches an un-batched dataset.
     """
-    def __init__(self,
-                 training_data,
-                 batch_size):
+
+    def __init__(self, training_data, batch_size):
         x, y = training_data
         self.x = torch.tensor(x, dtype=torch.float)
         self.y = torch.tensor(y, dtype=torch.float)
@@ -102,14 +106,16 @@ def __getitem__(self, i):
             raise IndexError()
         i_start = i * self.batch_size
         i_end = (i + 1) * self.batch_size
-        x = self.x[i_start : i_end]
-        y = self.y[i_start : i_end]
+        x = self.x[i_start:i_end]
+        y = self.y[i_start:i_end]
         return (x, y)
 
+
 ################################################################################
 # Quantile loss
 ################################################################################
 
+
 class QuantileLoss:
     r"""
     The quantile loss function
@@ -132,9 +138,8 @@ class QuantileLoss:
     corresponding to each quantiles. The loss for a batch of training samples is
     computed by taking the mean over all samples in the batch.
     """
-    def __init__(self,
-                 quantiles,
-                 mask = -1.0):
+
+    def __init__(self, quantiles, mask=-1.0):
         """
         Create an instance of the quantile loss function with the given quantiles.
 
@@ -163,18 +168,18 @@ def __call__(self, y_pred, y_true):
         """
         dy = y_pred - y_true
         n = self.quantiles.size()[0]
-        qs = self.quantiles.reshape((n,) + (1, ) * max(len(dy.size()) - 2, 0))
-        l = torch.where(dy >= 0.0,
-                        (1.0 - qs) * dy,
-                        (-qs) * dy)
+        qs = self.quantiles.reshape((n,) + (1,) * max(len(dy.size()) - 2, 0))
+        l = torch.where(dy >= 0.0, (1.0 - qs) * dy, (-qs) * dy)
         if not self.mask is None:
             l = torch.where(y_true == self.mask, torch.zeros_like(l), l)
         return l.mean()
 
+
 ################################################################################
 # QRNN
 ################################################################################
 
+
 class PytorchModel:
     """
     Quantile regression neural network (QRNN)
@@ -182,9 +187,8 @@ class PytorchModel:
     This class implements QRNNs as a fully-connected network with
     a given number of layers.
     """
-    def __init__(self,
-                 input_dimension,
-                 quantiles):
+
+    def __init__(self, input_dimension, quantiles):
         """
         Arguments:
             input_dimension(int): The number of input features.
@@ -206,6 +210,17 @@ def _make_adversarial_samples(self, x, y, eps):
         x_adv = x.detach() + eps * torch.sign(x.grad.detach())
         return x_adv
 
+    def reset(self):
+        """
+        Reinitializes the weights of a model.
+        """
+
+        def reset_function(module):
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+                m.reset_parameters()
+
+        self.apply(reset_function)
+
     def train(self, *args, **kwargs):
         """
         Train the network.
@@ -233,19 +248,21 @@ def train(self, *args, **kwargs):
         #
 
         training_data = args[0]
-        arguments = {"validation_data" : None,
-                     "batch_size" : 256,
-                     "sigma_noise" : None,
-                     "adversarial_training" : False,
-                     "delta_at" : 0.01,
-                     "initial_learning_rate" : 1e-2,
-                     "momentum" : 0.0,
-                     "convergence_epochs" : 5,
-                     "learning_rate_decay" : 2.0,
-                     "learning_rate_minimum" : 1e-6,
-                     "maximum_epochs" : 1,
-                     "training_split" : 0.9,
-                     "gpu" : False}
+        arguments = {
+            "validation_data": None,
+            "batch_size": 256,
+            "sigma_noise": None,
+            "adversarial_training": False,
+            "delta_at": 0.01,
+            "initial_learning_rate": 1e-2,
+            "momentum": 0.0,
+            "convergence_epochs": 5,
+            "learning_rate_decay": 2.0,
+            "learning_rate_minimum": 1e-6,
+            "maximum_epochs": 1,
+            "training_split": 0.9,
+            "gpu": False,
+        }
         argument_names = arguments.keys()
         for a, n in zip(args[1:], argument_names):
             arguments[n] = a
@@ -288,31 +305,19 @@ def train(self, *args, **kwargs):
             pass
 
         self.train()
-        self.optimizer = optim.SGD(self.parameters(),
-                                   lr = initial_learning_rate,
-                                   momentum = momentum)
+        self.optimizer = optim.SGD(
+            self.parameters(), lr=initial_learning_rate, momentum=momentum
+        )
         self.criterion.to(device)
-        scheduler = ReduceLROnPlateau(self.optimizer,
-                                      factor=1.0 / learning_rate_decay,
-                                      patience=convergence_epochs,
-                                      min_lr=learning_rate_minimum)
+        scheduler = ReduceLROnPlateau(
+            self.optimizer,
+            factor=1.0 / learning_rate_decay,
+            patience=convergence_epochs,
+            min_lr=learning_rate_minimum,
+        )
         training_errors = []
         validation_errors = []
 
-        #
-        # Function to clear output if we are in a notebook.
-        #
-
-        def clear_output():
-            print("clreaing output")
-            try:
-                import IPython
-                from IPython.display import clear_output
-                clear_output(wait=True)
-            except:
-                print("failed loading Ipython")
-                pass
-
         #
         # Training loop
         #
@@ -320,7 +325,6 @@ def clear_output():
         for i in range(maximum_epochs):
             err = 0.0
             n = 0
-            #clear_output()
             for j, (x, y) in enumerate(training_data):
 
                 x = x.to(device)
@@ -347,20 +351,22 @@ def clear_output():
                     c.backward()
                     self.optimizer.step()
 
-
                 if j % 100:
-                    print("Epoch {} / {}: Batch {} / {}, Training error: {:.3f}"
-                          .format(i, maximum_epochs, j, len(training_data), err / n),
-                          end="\r")
+                    print(
+                        "Epoch {} / {}: Batch {} / {}, Training error: {:.3f}".format(
+                            i, maximum_epochs, j, len(training_data), err / n
+                        ),
+                        end="\r",
+                    )
 
             # Save training error
             training_errors.append(err / n)
 
-            lr = [group['lr'] for group in self.optimizer.param_groups][0]
+            lr = [group["lr"] for group in self.optimizer.param_groups][0]
 
             val_err = 0.0
-            n = 0
             if not validation_data is None:
+                n = 0
                 for x, y in validation_data:
                     x = x.to(device).detach()
                     y = y.to(device).detach()
@@ -376,21 +382,31 @@ def clear_output():
                     n += x.size()[0]
                 validation_errors.append(val_err / n)
 
-
-                print("Epoch {} / {}: Training error: {:.3f}, Validation error: {:.3f}, Learning rate: {:.5f}"
-                      .format(i, maximum_epochs, training_errors[-1], validation_errors[-1], lr))
+                print(
+                    "Epoch {} / {}: Training error: {:.3f}, Validation error: {:.3f}, Learning rate: {:.5f}".format(
+                        i,
+                        maximum_epochs,
+                        training_errors[-1],
+                        validation_errors[-1],
+                        lr,
+                    )
+                )
                 scheduler.step(val_err / n)
             else:
                 scheduler.step(err / n)
-                print("Epoch {} / {}: Training error: {:.3f}, Learning rate: {:.5f}"
-                      .format(i, maximum_epochs, training_errors[-1], lr))
-
+                print(
+                    "Epoch {} / {}: Training error: {:.3f}, Learning rate: {:.5f}".format(
+                        i, maximum_epochs, training_errors[-1], lr
+                    )
+                )
 
         self.training_errors += training_errors
         self.validation_errors += validation_errors
         self.eval()
-        return {"training_errors" : self.training_errors,
-                "validation_errors" : self.validation_errors}
+        return {
+            "training_errors": self.training_errors,
+            "validation_errors": self.validation_errors,
+        }
 
     def predict(self, x, gpu=False):
         ""
@@ -402,9 +418,7 @@ def predict(self, x, gpu=False):
         self.to(device)
         return self(x.detach()).detach().numpy()
 
-    def calibration(self,
-                    data,
-                    gpu=False):
+    def calibration(self, data, gpu=False):
         """
         Computes the calibration of the predictions from the neural network.
 
@@ -425,8 +439,9 @@ def calibration(self,
 
         n_intervals = self.quantiles.size // 2
         qs = self.quantiles
-        intervals = np.array([q_r - q_l for (q_l, q_r)
-                              in zip(qs, reversed(qs))])[:n_intervals]
+        intervals = np.array([q_r - q_l for (q_l, q_r) in zip(qs, reversed(qs))])[
+            :n_intervals
+        ]
         counts = np.zeros(n_intervals)
 
         total = 0.0
@@ -458,14 +473,18 @@ def save(self, path):
         Arguments:
             The path in which to store the QRNN.
         """
-        torch.save({"input_dimension" : self.input_dimension,
-                    "quantiles" : self.quantiles,
-                    "width" : self.width,
-                    "depth" : self.depth,
-                    "activation" : self.activation,
-                    "network_state" : self.state_dict(),
-                    "optimizer_state" : self.optimizer.state_dict()},
-                    path)
+        torch.save(
+            {
+                "input_dimension": self.input_dimension,
+                "quantiles": self.quantiles,
+                "width": self.width,
+                "depth": self.depth,
+                "activation": self.activation,
+                "network_state": self.state_dict(),
+                "optimizer_state": self.optimizer.state_dict(),
+            },
+            path,
+        )
 
     @staticmethod
     def load(self, path):
diff --git a/typhon/retrieval/qrnn/models/pytorch/fully_connected.py b/typhon/retrieval/qrnn/models/pytorch/fully_connected.py
index 8a659e35..47412ba9 100644
--- a/typhon/retrieval/qrnn/models/pytorch/fully_connected.py
+++ b/typhon/retrieval/qrnn/models/pytorch/fully_connected.py
@@ -1,20 +1,18 @@
 from torch import nn
 from torch import optim
-from typhon.retrieval.qrnn.models.pytorch.common import (PytorchModel,
-                                                         activations)
+from typhon.retrieval.qrnn.models.pytorch.common import PytorchModel, activations
 
 ################################################################################
 # Fully-connected network
 ################################################################################
 
+
 class FullyConnected(PytorchModel, nn.Sequential):
     """
     Pytorch implementation of a fully-connected QRNN model.
     """
-    def __init__(self,
-                 input_dimension,
-                 quantiles,
-                 arch):
+
+    def __init__(self, input_dimension, quantiles, arch):
 
         """
         Create a fully-connected neural network.
diff --git a/typhon/retrieval/qrnn/models/pytorch/unet.py b/typhon/retrieval/qrnn/models/pytorch/unet.py
index 09bdf179..0a9d9403 100644
--- a/typhon/retrieval/qrnn/models/pytorch/unet.py
+++ b/typhon/retrieval/qrnn/models/pytorch/unet.py
@@ -3,6 +3,7 @@
 
 from typhon.retrieval.qrnn.models.pytorch.common import PytorchModel
 
+
 class Layer(nn.Sequential):
     """
     Basic building block of a UNet. Consists of a convolutional
@@ -20,30 +21,33 @@ class Layer(nn.Sequential):
         skip_connection(:code:`bool`): Whether to include skip connections, i.e.
             to include input in layer output.
     """
-    def __init__(self,
-                 features_in,
-                 features_out,
-                 batch_norm=True,
-                 kernel_size=3,
-                 activation=nn.ReLU,
-                 skip_connection=False):
+
+    def __init__(
+        self,
+        features_in,
+        features_out,
+        batch_norm=True,
+        kernel_size=3,
+        activation=nn.ReLU,
+        skip_connection=False,
+    ):
         self._features_in = features_in
         self._features_out = features_out
         self.skip_connection = skip_connection
 
         if not activation is None:
-            modules = [nn.ConstantPad2d(1, 0.0),
-                       nn.Conv2d(features_in,
-                                 features_out,
-                                 kernel_size),
-                       nn.BatchNorm2d(features_out),
-                       activation()]
+            modules = [
+                nn.ConstantPad2d(1, 0.0),
+                nn.Conv2d(features_in, features_out, kernel_size),
+                nn.BatchNorm2d(features_out),
+                activation(),
+            ]
         else:
-            modules = [nn.ConstantPad2d(1, 0.0),
-                       nn.Conv2d(features_in,
-                                 features_out,
-                                 kernel_size),
-                       nn.BatchNorm2d(features_out)]
+            modules = [
+                nn.ConstantPad2d(1, 0.0),
+                nn.Conv2d(features_in, features_out, kernel_size),
+                nn.BatchNorm2d(features_out),
+            ]
         super().__init__(*modules)
 
     @property
@@ -59,6 +63,7 @@ def forward(self, x):
             y = torch.cat([x, y], dim=1)
         return y
 
+
 class Block(nn.Sequential):
     """
     A block bundles a set of layers.
@@ -73,14 +78,17 @@ class Block(nn.Sequential):
           connections before all layers (:code:`"all"`) or just at
           the end (:code:`"end"`).
     """
-    def __init__(self,
-                 features_in,
-                 features_out,
-                 depth=2,
-                 batch_norm=True,
-                 activation=nn.ReLU,
-                 kernel_size=3,
-                 skip_connection=None):
+
+    def __init__(
+        self,
+        features_in,
+        features_out,
+        depth=2,
+        batch_norm=True,
+        activation=nn.ReLU,
+        kernel_size=3,
+        skip_connection=None,
+    ):
 
         self._features_in = features_in
 
@@ -97,12 +105,16 @@ def __init__(self,
         layers = []
         nf = features_in
         for d in range(depth):
-            layers.append(Layer(nf,
-                                features_out,
-                                activation=activation,
-                                batch_norm=batch_norm,
-                                kernel_size=kernel_size,
-                                skip_connection=skip_connection_layer))
+            layers.append(
+                Layer(
+                    nf,
+                    features_out,
+                    activation=activation,
+                    batch_norm=batch_norm,
+                    kernel_size=kernel_size,
+                    skip_connection=skip_connection_layer,
+                )
+            )
             nf = layers[-1].features_out
 
         self._features_out = layers[-1].features_out
@@ -127,27 +139,21 @@ def __init__(self):
         modules = [nn.MaxPool2d(2)]
         super().__init__(*modules)
 
+
 class UpSampler(nn.Sequential):
-    def __init__(self,
-                 features_in,
-                 features_out):
-        modules = [nn.ConvTranspose2d(features_in,
-                                      features_out,
-                                      3,
-                                      padding=1,
-                                      output_padding=1,
-                                      stride=2)]
+    def __init__(self, features_in, features_out):
+        modules = [
+            nn.ConvTranspose2d(
+                features_in, features_out, 3, padding=1, output_padding=1, stride=2
+            )
+        ]
         super().__init__(*modules)
 
 
-
 class UNet(PytorchModel, nn.Module):
-    def __init__(self,
-                 input_features,
-                 quantiles,
-                 n_features=32,
-                 n_levels=4,
-                 skip_connection=None):
+    def __init__(
+        self, input_features, quantiles, n_features=32, n_levels=4, skip_connection=None
+    ):
 
         nn.Module.__init__(self)
         PytorchModel.__init__(self, input_features, quantiles)
@@ -158,36 +164,37 @@ def __init__(self,
         features_in = input_features
         features_out = n_features
         for i in range(n_levels - 1):
-            self.down_blocks.append(Block(features_in,
-                                          features_out,
-                                          skip_connection=skip_connection))
+            self.down_blocks.append(
+                Block(features_in, features_out, skip_connection=skip_connection)
+            )
             self.down_samplers.append(DownSampler())
             features_in = self.down_blocks[-1].features_out
             features_out = features_out * 2
 
-        self.center_block = Block(features_in,
-                                  features_out,
-                                  skip_connection=skip_connection)
+        self.center_block = Block(
+            features_in, features_out, skip_connection=skip_connection
+        )
 
         self.up_blocks = nn.ModuleList()
         self.up_samplers = nn.ModuleList()
         features_in = self.center_block.features_out
         features_out = features_out // 2
         for i in range(n_levels - 1):
-            self.up_samplers.append(UpSampler(features_in,
-                                              features_out))
-            features_in = features_out + self.down_blocks[(- i - 1)].features_out
-            self.up_blocks.append(Block(features_in,
-                                        features_out,
-                                        skip_connection=skip_connection))
+            self.up_samplers.append(UpSampler(features_in, features_out))
+            features_in = features_out + self.down_blocks[(-i - 1)].features_out
+            self.up_blocks.append(
+                Block(features_in, features_out, skip_connection=skip_connection)
+            )
             features_out = features_out // 2
             features_in = self.up_blocks[-1].features_out
 
-        self.head = nn.Sequential(nn.Conv2d(features_in, features_in, 1),
-                                  nn.ReLU(),
-                                  nn.Conv2d(features_in, features_in, 1),
-                                  nn.ReLU(),
-                                  nn.Conv2d(features_in, quantiles.size, 1))
+        self.head = nn.Sequential(
+            nn.Conv2d(features_in, features_in, 1),
+            nn.ReLU(),
+            nn.Conv2d(features_in, features_in, 1),
+            nn.ReLU(),
+            nn.Conv2d(features_in, quantiles.size, 1),
+        )
 
     def forward(self, x):
 
diff --git a/typhon/retrieval/qrnn/qrnn.py b/typhon/retrieval/qrnn/qrnn.py
index c9da8e8c..3f2ccaf0 100644
--- a/typhon/retrieval/qrnn/qrnn.py
+++ b/typhon/retrieval/qrnn/qrnn.py
@@ -1,8 +1,19 @@
+"""
+typhon.retrieval.qrnn.qrnn
+==========================
+
+This module provides the QRNN class, which implements the high-level
+functionality of quantile regression neural networks, while the neural
+network implementation is left to the model backends implemented in the
+``typhon.retrieval.qrnn.models`` submodule.
+"""
 import copy
 import logging
 import os
 import pickle
 import importlib
+import scipy
+from scipy.stats import norm
 
 import numpy as np
 from scipy.interpolate import CubicSpline
@@ -13,15 +24,20 @@
 
 try:
     import typhon.retrieval.qrnn.models.keras as keras
+
     backend = keras
 except Exception as e:
     try:
         import typhon.retrieval.qrnn.models.pytorch as pytorch
+
         backend = pytorch
     except:
-        raise Exception("Couldn't import neither Keras nor Pytorch "
-                        "one of them must be available to use the QRNN"
-                        " module.")
+        raise Exception(
+            "Couldn't import neither Keras nor Pytorch "
+            "one of them must be available to use the QRNN"
+            " module."
+        )
+
 
 def set_backend(name):
     """
@@ -36,19 +52,24 @@ def set_backend(name):
     if name == "keras":
         try:
             import typhon.retrieval.qrnn.models.keras as keras
+
             backend = keras
         except Exception as e:
-            raise Exception("The following error occurred while trying "
-                            " to import keras: ", e)
+            raise Exception(
+                "The following error occurred while trying " " to import keras: ", e
+            )
     elif name == "pytorch":
         try:
             import typhon.retrieval.qrnn.models.pytorch as pytorch
+
             backend = pytorch
         except Exception as e:
-            raise Exception("The following error occurred while trying "
-                            " to import pytorch: ", e)
+            raise Exception(
+                "The following error occurred while trying " " to import pytorch: ", e
+            )
     else:
-        raise Exception("\"{}\" is not a supported backend.".format(name))
+        raise Exception('"{}" is not a supported backend.'.format(name))
+
 
 def get_backend(name):
     """
@@ -62,25 +83,27 @@ def get_backend(name):
     if name == "keras":
         try:
             import typhon.retrieval.qrnn.models.keras as keras
+
             backend = keras
         except Exception as e:
-            raise Exception("The following error occurred while trying "
-                            " to import keras: ", e)
+            raise Exception(
+                "The following error occurred while trying " " to import keras: ", e
+            )
     elif name == "pytorch":
         try:
             import typhon.retrieval.qrnn.models.pytorch as pytorch
+
             backend = pytorch
         except Exception as e:
-            raise Exception("The following error occurred while trying "
-                            " to import pytorch: ", e)
+            raise Exception(
+                "The following error occurred while trying " " to import pytorch: ", e
+            )
     else:
-        raise Exception("\"{}\" is not a supported backend.".format(name))
+        raise Exception('"{}" is not a supported backend.'.format(name))
     return backend
 
 
-def create_model(input_dim,
-                 output_dim,
-                 arch):
+def create_model(input_dim, output_dim, arch):
     """
     Creates a fully-connected neural network from a tuple
     describing its architecture.
@@ -99,10 +122,12 @@ def create_model(input_dim,
     """
     return backend.FullyConnected(input_dim, output_dim, arch)
 
+
 ################################################################################
 # QRNN class
 ################################################################################
 
+
 class QRNN:
     r"""
     Quantile Regression Neural Network (QRNN)
@@ -124,197 +149,155 @@ class QRNN:
     has one output neuron for each quantile to estimate.
 
     The QRNN class provides a generic QRNN implementation in the sense that it
-    does not assume a fixed neural network infrastructure. Instead, this
-    functionality is off-loaded to a model object, which can be an arbitrary
-    regression network such as a fully-connected or a convolutional network. A
-    range of different models are provided in the typhon.retrieval.qrnn.models
-    module. The :class:`QRNN`` class just implements high-level operation on
-    the QRNN output while training and prediction are delegated to the model
-    object. For details on the respective implementation refer to the
-    documentation of the corresponding model class.
-
-    .. note:: For the QRNN implementation :math:`x` is used to denote the input
-              vector and :math:`y` to denote the output. While this is opposed
-              to inverse problem notation typically used for retrievals, it is
-              in line with machine learning notation and felt more natural for
-              the implementation. If this annoys you, I am sorry.
+    does not assume a fixed neural network architecture or implementation.
+    Instead, this functionality is off-loaded to a model object, which can be
+    an arbitrary regression network such as a fully-connected or a
+    convolutional network. A range of different models are provided in the
+    typhon.retrieval.qrnn.models module. The :class:`QRNN`` class just
+    implements high-level operation on the QRNN output while training and
+    prediction are delegated to the model object. For details on the respective
+    implementation refer to the documentation of the corresponding model class.
+
+    .. note::
+
+      For the QRNN implementation :math:`x` is used to denote the input
+      vector and :math:`y` to denote the output. While this is opposed
+      to inverse problem notation typically used for retrievals, it is
+      in line with machine learning notation and felt more natural for
+      the implementation. If this annoys you, I am sorry.
 
     Attributes:
-        quantiles (numpy.array): The 1D-array containing the quantiles
-            :math:`\tau \in [0, 1]` that the network learns to predict.
-        model: The model object that implements the actual neural network
-            regressor.
+        backend(``str``):
+            The name of the backend used for the neural network model.
+        quantiles (numpy.array):
+            The 1D-array containing the quantiles :math:`\tau \in [0, 1]`
+            that the network learns to predict.
+        model:
+            The neural network regression model used to predict the quantiles.
     """
-    def __init__(self,
-                 input_dimension,
-                 quantiles=None,
-                 model=(3, 128, "relu"),
-                 ensemble_size=1,
-                 **kwargs):
+
+    def __init__(
+        self,
+        input_dimensions,
+        quantiles=None,
+        model=(3, 128, "relu"),
+        ensemble_size=1,
+        **kwargs
+    ):
         """
         Create a QRNN model.
 
         Arguments:
-            input_dim(int): The dimension of the measurement space, i.e. the number
-                of elements in a single measurement vector y
-            quantiles(np.array): 1D-array containing the quantiles  to estimate of
-                the posterior distribution. Given as fractions
-                within the range [0, 1].
-            model: A (possible trained) model instance or a tuple
-                :code:`(d, w, act)` describing the architecture of a
-                fully-connected neural network with :code:`d` hidden layers
-                with :code:`w` neurons and :code:`act` activation functions.
-            ensemble_size: The size of the ensemble if an ensemble model
-                should be used.
+            input_dimensions(int):
+                The dimension of the measurement space, i.e. the
+                number of elements in a single measurement vector y
+            quantiles(np.array):
+                1D-array containing the quantiles  to estimate of
+                the posterior distribution. Given as fractions within the range
+                [0, 1].
+            model:
+                A (possibly trained) model instance or a tuple
+                ``(d, w, act)`` describing the architecture of a fully-connected
+                neural network with :code:`d` hidden layers with :code:`w` neurons
+                and :code:`act` activation functions.
         """
-        self.input_dimension = input_dimension
+        self.input_dimensions = input_dimensions
         self.quantiles = np.array(quantiles)
         self.backend = backend.__name__
 
         if type(model) == tuple:
-            self.model = backend.FullyConnected(self.input_dimension,
-                                                self.quantiles,
-                                                model)
+            self.model = backend.FullyConnected(
+                self.input_dimensions, self.quantiles, model
+            )
             if quantiles is None:
-                raise ValueError("If model is given as architecture tuple, the"
-                                  " 'quantiles' kwarg must be provided.")
+                raise ValueError(
+                    "If model is given as architecture tuple, the"
+                    " 'quantiles' kwarg must be provided."
+                )
         else:
             if not quantiles is None:
                 if not quantiles == model.quantiles:
-                    raise ValueError("Provided quantiles do not match those of "
-                                     "the provided model.")
+                    raise ValueError(
+                        "Provided quantiles do not match those of "
+                        "the provided model."
+                    )
 
             self.model = model
             self.quantiles = model.quantiles
             self.backend = model.backend
 
+    def train(
+        self,
+        training_data,
+        validation_data=None,
+        batch_size=256,
+        sigma_noise=None,
+        adversarial_training=False,
+        delta_at=0.01,
+        initial_learning_rate=1e-2,
+        momentum=0.0,
+        convergence_epochs=5,
+        learning_rate_decay=2.0,
+        learning_rate_minimum=1e-6,
+        maximum_epochs=200,
+        training_split=0.9,
+        gpu=False,
+    ):
+        """
+        Train model on given training data.
 
-    def cross_validation(self,
-                        x_train,
-                        y_train,
-                        sigma_noise = None,
-                        n_folds=5,
-                        s=None,
-                        **kwargs):
-        r"""
-        Perform n-fold cross validation.
-
-        This function trains the network n times on different subsets of the
-        provided training data, always keeping a fraction of 1/n samples apart
-        for testing. Performance for each of the networks is evaluated and mean
-        and standard deviation for all folds are returned. This is to reduce
-        the influence of random fluctuations of the network performance during
-        hyperparameter tuning.
-
-        Arguments:
-
-            x_train(numpy.array): Array of shape :code:`(m, n)` containing the
-                                  m n-dimensional training inputs.
-
-            y_train(numpy.array): Array of shape :code:`(m, 1)` containing the
-                                  m training outputs.
-
-            sigma_noise(None, float, np.array): If not `None` this value is used
-                                                to multiply the Gaussian noise
-                                                that is added to each training
-                                                batch. If None no noise is
-                                                added.
-
-            n_folds(int): Number of folds to perform for the cross correlation.
-
-            s(callable, None): Performance metric for the fold. If not None,
-                               this should be a function object taking as
-                               arguments :code:`(y_test, y_pred)`, i.e. the
-                               expected output for the given fold :code:`y_test`
-                               and the predicted output :code:`y_pred`. The
-                               returned value is taken as performance metric.
+        The training is performed on the provided training data and an
+        optionally-provided validation set. Training can use the following
+        augmentation methods:
+            - Gaussian noise added to input
+            - Adversarial training
+        The learning rate is decreased gradually when the validation or training
+        loss did not decrease for a given number of epochs.
 
-            **kwargs: Additional keyword arguments are passed on to the :code:`fit`
-                      method that is called for each fold.
+        Args:
+            training_data: Tuple of numpy arrays of a dataset object to use to
+                train the model.
+            validation_data: Optional validation data in the same format as the
+                training data.
+            batch_size: If training data is provided as arrays, this batch size
+                will be used to for the training.
+            sigma_noise: If training data is provided as arrays, training data
+                will be augmented by adding noise with the given standard
+                deviations to each input vector before it is presented to the
+                model.
+            adversarial_training(``bool``): Whether or not to perform
+                adversarial training using the fast gradient sign method.
+            delta_at: The scaling factor to apply for adversarial training.
+            initial_learning_rate(``float``): The learning rate with which the
+                 training is started.
+            momentum(``float``): The momentum to use for training.
+            convergence_epochs(``int``): The number of epochs with
+                 non-decreasing loss before the learning rate is decreased
+            learning_rate_decay(``float``): The factor by which the learning rate
+                 is decreased.
+            learning_rate_minimum(``float``): The learning rate at which the
+                 training is aborted.
+            maximum_epochs(``int``): For how many epochs to keep training.
+            training_split(``float``): If no validation data is provided, this
+                 is the fraction of training data that is used for validation.
+            gpu(``bool``): Whether or not to try to run the training on the GPU.
         """
-
-        n = x_train.shape[0]
-        n_test = n // n_folds
-        inds = np.random.permutation(np.arange(0, n))
-
-        results = []
-
-        # Nomenclature is a bit difficult here:
-        # Each cross validation fold has its own training,
-        # vaildation and test set. The size of the test set
-        # is number of provided training samples divided by the
-        # number of fold. The rest is used a traning and internal
-        # validation set according to the chose training_split
-        # ratio.
-
-
-        for i in range(n_folds):
-            for m in self.models:
-                m.reset_states()
-
-            # Indices to use for training including training data and internal
-            # validation set to monitor convergence.
-            inds_train = np.append(np.arange(0, i * n_test),
-                                       np.arange(min((i + 1) * n_test, n), n))
-            inds_train = inds[inds_train]
-            # Indices used to evaluate performance of the model.
-            inds_test = np.arange(i * n_test, (i + 1) * n_test)
-            inds_test = inds[inds_test]
-
-            x_test_fold = x_train[inds_test, :]
-            y_test_fold = y_train[inds_test]
-
-            # Splitting training and validation set.
-            x_train_fold = x_train[inds_train, :]
-            y_train_fold = y_train[inds_train]
-
-            self.fit(x_train_fold, y_train_fold,
-                     sigma_noise, **kwargs)
-
-            # Evaluation on this folds test set.
-            if s:
-                y_pred = self.predict(x_test_fold)
-                results += [s(y_pred, y_test_fold)]
-            else:
-                loss = self.models[0].evaluate(
-                    (x_test_fold - self.x_mean) / self.x_sigma,
-                    y_test_fold)
-                logger.info(loss)
-                results += [loss]
-        logger.info(results)
-        results = np.array(results)
-        logger.info(results)
-        return (np.mean(results, axis=0), np.std(results, axis=0))
-
-    def train(self,
-              training_data,
-              validation_data = None,
-              batch_size = 256,
-              sigma_noise = None,
-              adversarial_training = False,
-              delta_at = 0.01,
-              initial_learning_rate = 1e-2,
-              momentum = 0.0,
-              convergence_epochs = 5,
-              learning_rate_decay = 2.0,
-              learning_rate_minimum = 1e-6,
-              maximum_epochs = 200,
-              training_split = 0.9,
-              gpu = True):
-        return self.model.train(training_data,
-                                validation_data,
-                                batch_size,
-                                sigma_noise,
-                                adversarial_training,
-                                delta_at,
-                                initial_learning_rate,
-                                momentum,
-                                convergence_epochs,
-                                learning_rate_decay,
-                                learning_rate_minimum,
-                                maximum_epochs,
-                                training_split,
-                                gpu)
+        return self.model.train(
+            training_data,
+            validation_data,
+            batch_size,
+            sigma_noise,
+            adversarial_training,
+            delta_at,
+            initial_learning_rate,
+            momentum,
+            convergence_epochs,
+            learning_rate_decay,
+            learning_rate_minimum,
+            maximum_epochs,
+            training_split,
+            gpu,
+        )
 
     def predict(self, x):
         r"""
@@ -343,16 +326,17 @@ def cdf(self, x):
         Propagates the inputs in `x` forward through the network and
         approximates the posterior CDF by a piecewise linear function.
 
-        The piecewise linear function is given by its values at
-        approximate quantiles $x_\tau$ for
-        :math: `\tau = \{0.0, \tau_1, \ldots, \tau_k, 1.0\}` where
-        :math: `\tau_k` are the quantiles to be estimated by the network.
-        The values for :math:`x_0.0` and :math:`x_1.0` are computed using
+        The piecewise linear function is given by its values at approximate
+        quantiles :math:`x_\tau`` for :math:`\tau = \{0.0, \tau_1, \ldots,
+        \tau_k, 1.0\}` where :math:`\tau_k` are the quantiles to be estimated
+        by the network. The values for :math:`x_{0.0}` and :math:`x_{1.0}` are
+        computed using
 
         .. math::
-            x_0.0 = 2.0 x_{\tau_1} - x_{\tau_2}
 
-            x_1.0 = 2.0 x_{\tau_k} - x_{\tau_{k-1}}
+            x_{0.0} = 2.0 x_{\tau_1} - x_{\tau_2}
+
+            x_{1.0} = 2.0 x_{\tau_k} - x_{\tau_{k-1}}
 
         Arguments:
 
@@ -361,8 +345,8 @@ def cdf(self, x):
 
         Returns:
 
-            Tuple (xs, fs) containing the :math: `x`-values in `xs` and corresponding
-            values of the posterior CDF :math: `F(x)` in `fs`.
+            Tuple (xs, fs) containing the :math:`x`-values in `xs` and corresponding
+            values of the posterior CDF :math:`F(x)` in `fs`.
 
         """
         if len(x.shape) > 1:
@@ -388,86 +372,37 @@ def calibration(self, *args, **kwargs):
         """
         return self.model.calibration(*args, *kwargs)
 
-    def pdf(self, x, use_splines = False):
+    def pdf(self, x):
         r"""
         Approximate the posterior probability density function (PDF) for given
-        inputs `x`.
-
-        By default, the PDF is approximated by computing the derivative of the
-        piece-wise linear approximation of the CDF as computed by the :code:`cdf`
-        function.
+        inputs ``x``.
 
-        If :code:`use_splines` is set to :code:`True`, the PDF is computed from
-        a spline fit to the approximate CDF.
+        The PDF is approximated by computing the derivative of the piece-wise
+        linear approximation of the CDF as computed by the
+        :py:meth:`typhon.retrieval.qrnn.QRNN.cdf` function.
 
         Arguments:
 
             x(np.array): Array of shape `(n, m)` containing `n` inputs for which
-                         to predict the conditional quantiles.
-
-            use_splines(bool): Whether or not to use a spline fit to the CDF to
-            approximate the PDF.
+               to predict PDFs.
 
         Returns:
 
-            Tuple (xs, fs) containing the :math: `x`-values in `xs` and corresponding
-            values of the approximate posterior PDF :math: `F(x)` in `fs`.
+            Tuple (x_pdf, y_pdf) containing the array with shape `(n, k)`  containing
+            the x and y coordinates describing the PDF for the inputs in ``x``.
 
         """
+        x_cdf, y_cdf = self.cdf(x)
+        n, m = x_cdf.shape
+        x_pdf = np.zeros((n, m + 1))
 
-        y_pred = np.zeros(self.quantiles.size)
-        y_pred = self.predict(x).ravel()
-
-        y = np.zeros(y_pred.size + 1)
-        y[1:-1] = 0.5 * (y_pred[1:] + y_pred[:-1])
-        y[0] = 2 * y_pred[0] - y_pred[1]
-        y[-1] = 2 * y_pred[-1] - y_pred[-2]
-
-        if not use_splines:
-
-            p = np.zeros(y.size)
-            p[1:-1] = np.diff(self.quantiles) / np.diff(y_pred)
-        else:
-
-            y = np.zeros(y_pred.size + 2)
-            y[1:-1] = y_pred
-            y[0] = 3 * y_pred[0] - 2 * y_pred[1]
-            y[-1] = 3 * y_pred[-1] - 2 * y_pred[-2]
-            q = np.zeros(self.quantiles.size + 2)
-            q[1:-1] = np.array(self.quantiles)
-            q[0] = 0.0
-            q[-1] = 1.0
-
-            sr = CubicSpline(y, q, bc_type = "clamped")
-            y = np.linspace(y[0], y[-1], 101)
-            p = sr(y, nu = 1)
-
-        return y, p
-
-
-        y_pred = np.zeros(self.quantiles.size + 2)
-        y_pred[1:-1] = self.predict(x)
-        y_pred[0] = 2.0 * y_pred[1] - y_pred[2]
-        y_pred[-1] = 2.0 * y_pred[-2] - y_pred[-3]
+        x_pdf[:, 0] = x_cdf[:, 0]
+        x_pdf[:, -1] = x_cdf[:, -1]
+        x_pdf[:, 1:-1] = 0.5 * (x_cdf[:, 1:] + x_cdf[:, :-1])
 
-        if use_splines:
-            x_t = np.zeros(x.size + 2)
-            x_t[1:-1] = x
-            x_t[0] = 2 * x[0] - x[1]
-            x_t[-1] = 2 * x[-1] - x[-2]
-            y_t = np.zeros(y.size + 2)
-            y_t[1:-1] = y
-            y_t[-1] = 1.0
-
-        else:
-            logger.info(y)
-            x_new = np.zeros(x.size - 1)
-            x_new[2:-2] = 0.5 * (x[2:-3] + x[3:-2])
-            x_new[0:2] = x[0:2]
-            x_new[-2:] = x[-2:]
-            y_new = np.zeros(y.size - 1)
-            y_new[1:-1] = np.diff(y[1:-1]) / np.diff(x[1:-1])
-        return x_new, y_new
+        y_pdf = np.zeros((n, m + 1))
+        y_pdf[:, 1:-1] = np.diff(y_cdf) / np.diff(x_cdf, axis=-1)
+        return x_pdf, y_pdf
 
     def sample_posterior(self, x, n=1):
         r"""
@@ -485,13 +420,16 @@ def sample_posterior(self, x, n=1):
 
         Returns:
 
-            Tuple (xs, fs) containing the :math: `x`-values in `xs` and corresponding
+            Tuple (xs, fs) containing the :math:`x`-values in `xs` and corresponding
             values of the posterior CDF :math: `F(x)` in `fs`.
         """
-        y_pred, qs = self.cdf(x)
-        p = np.random.rand(n)
-        y = np.interp(p, qs, y_pred)
-        return y
+        result = np.zeros((x.shape[0], n))
+        x_cdf, y_cdf = self.cdf(x)
+        for i in range(x_cdf.shape[0]):
+            p = np.random.rand(n)
+            y = np.interp(p, y_cdf, x_cdf[i, :])
+            result[i, :] = y
+        return result
 
     def posterior_mean(self, x):
         r"""
@@ -555,24 +493,21 @@ def crps(y_pred, y_test, quantiles):
         qs[0, 0] = 0.0
         qs[0, -1] = 1.0
 
-        return np.trapz((qs - ind)**2.0, y_cdf)
+        return np.trapz((qs - ind) ** 2.0, y_cdf)
 
-    def evaluate_crps(self, x, y_test):
+    def evaluate_crps(self, x_test, y_test):
         r"""
         Predict quantiles and compute the Continuous Ranked Probability Score (CRPS).
 
-        This function evaluates the networks prediction on the
-        inputs in `x` and evaluates the CRPS of the predictions
-        against the materializations in `y_test`.
+        This function evaluates the network predictions on the test data
+        ``x_test`` and ``y_test`` and and evaluates the CRPS.
 
         Arguments:
 
-            x(numpy.array): Array of shape `(n, m)` containing the `n`
-                            `m`-dimensional inputs for which to evaluate
-                            the CRPS.
+            x_test(numpy.array): Array of shape `(n, m)` input test data.
 
-            y_test(numpy.array): Array containing the `n` materializations of
-                                 the true conditional distribution.
+            y_test(numpy.array): Array of length n containing the output test
+                 data.
 
         Returns:
 
@@ -580,31 +515,7 @@ def evaluate_crps(self, x, y_test):
             inputs in `x`.
 
         """
-        return QRNN.crps(self.predict(x), y_test, self.quantiles)
-
-    def save(self, path):
-        r"""
-        Store the QRNN model in a file.
-
-        This stores the model to a file using pickle for all
-        attributes that support pickling. The Keras model
-        is handled separately, since it can not be pickled.
-
-        .. note:: In addition to the model file with the given filename,
-                  additional files suffixed with :code:`_model_i` will be
-                  created for each neural network this model consists of.
-
-        Arguments:
-
-            path(str): The path including filename indicating where to
-                       store the model.
-
-        """
-        f = open(path, "wb")
-        pickle.dump(self, f)
-        backend = importlib.import_module(self.backend)
-        backend.save_model(f, self.model)
-        f.close()
+        return QRNN.crps(self.predict(x_test), y_test, self.quantiles)
 
     def classify(self, x, threshold):
         """
@@ -621,12 +532,11 @@ def classify(self, x, threshold):
 
         for i in range(self.quantiles.size - 1):
             q_l = y[:, [i]]
-            q_r = y[:, [i+1]]
-            inds = np.logical_and(q_l < threshold,
-                                  q_r >= threshold)
+            q_r = y[:, [i + 1]]
+            inds = np.logical_and(q_l < threshold, q_r >= threshold)
             c[inds] = self.quantiles[i] * (threshold - q_l[inds])
             c[inds] += self.quantiles[i + 1] * (q_r[inds] - threshold)
-            c[inds] /= (q_r[inds] - q_l[inds])
+            c[inds] /= q_r[inds] - q_l[inds]
 
         c[threshold > q_r] = self.quantiles[-1]
         return 1.0 - c
@@ -636,7 +546,8 @@ def load(path):
         r"""
         Load a model from a file.
 
-        This loads a model that has been stored using the `save` method.
+        This loads a model that has been stored using the
+        :py:meth:`typhon.retrieval.qrnn.QRNN.save`  method.
 
         Arguments:
 
@@ -646,25 +557,32 @@ def load(path):
 
             The loaded QRNN object.
         """
-        with open(path, 'rb') as f:
+        with open(path, "rb") as f:
             qrnn = pickle.load(f)
             backend = importlib.import_module(qrnn.backend)
             model = backend.load_model(f, qrnn.quantiles)
             qrnn.model = model
         return qrnn
 
-        #try:
-        #    from typhon.retrieval.qrnn.backends.keras import KerasQRNN, QuantileLoss
-        #    globals()["QuantileLoss"] = QuantileLoss
-        #    model = KerasQRNN.load(path)
-        #    print(type(model))
-        #    qrnn = QRNN(model.input_dim, model.quantiles, model.models)
-        #    qrnn.model = model
-        #    globals().pop("QuantileLoss")
-        #    return qrnn
-
-        #except Exception as e:
-        #    raise e
+    def save(self, path):
+        r"""
+        Store the QRNN model in a file.
+
+        This stores the model to a file using pickle for all attributes that
+        support pickling. The Keras model is handled separately, since it can
+        not be pickled.
+
+        Arguments:
+
+            path(str): The path including filename indicating where to
+                       store the model.
+
+        """
+        f = open(path, "wb")
+        pickle.dump(self, f)
+        backend = importlib.import_module(self.backend)
+        backend.save_model(f, self.model)
+        f.close()
 
     def __getstate__(self):
         dct = copy.copy(self.__dict__)
diff --git a/typhon/tests/retrieval/qrnn/test_qrnn.py b/typhon/tests/retrieval/qrnn/test_qrnn.py
index a9975c50..29a6ae93 100644
--- a/typhon/tests/retrieval/qrnn/test_qrnn.py
+++ b/typhon/tests/retrieval/qrnn/test_qrnn.py
@@ -1,8 +1,14 @@
+"""
+Tests for typhon.retrieval.qrnn module.
+
+Tests the QRNN implementation for all available backends.
+"""
 from typhon.retrieval.qrnn import QRNN, set_backend, get_backend
 import numpy as np
 import os
-import tempfile
 import importlib
+import pytest
+import tempfile
 
 #
 # Import available backends.
@@ -22,7 +28,6 @@
     pass
 
 class TestQrnn:
-
     def setup_method(self):
         dir = os.path.dirname(os.path.realpath(__file__))
         path = os.path.join(dir, "test_data")
@@ -32,37 +37,53 @@ def setup_method(self):
         self.x_train = (x_train - x_mean) / x_sigma
         self.y_train = np.load(os.path.join(path, "y_train.npy"))
 
-    def test_qrnn(self):
+    @pytest.mark.parametrize("backend", backends)
+    def test_qrnn(self, backend):
         """
         Test training of QRNNs using numpy arrays as input.
         """
-        for backend in backends:
-            set_backend(backend)
-            qrnn = QRNN(self.x_train.shape[1],
-                        np.linspace(0.05, 0.95, 10))
-            qrnn.train((self.x_train, self.y_train),
-                       maximum_epochs = 1)
-            qrnn.predict(self.x_train)
-
-    def test_qrnn_datasets(self):
+        set_backend(backend)
+        qrnn = QRNN(self.x_train.shape[1],
+                    np.linspace(0.05, 0.95, 10))
+        qrnn.train((self.x_train, self.y_train),
+                    maximum_epochs = 1)
+
+        qrnn.predict(self.x_train)
+
+        x, qs = qrnn.cdf(self.x_train[:2, :])
+        assert qs[0] == 0.0
+        assert qs[-1] == 1.0
+
+        x, y = qrnn.pdf(self.x_train[:2, :])
+        assert x.shape == y.shape
+
+        mu = qrnn.posterior_mean(self.x_train[:2, :])
+        assert len(mu.shape) == 1
+
+        r = qrnn.sample_posterior(self.x_train[:2, :], n=2)
+        assert r.shape == (2, 2)
+
+    @pytest.mark.parametrize("backend", backends)
+    def test_qrnn_datasets(self, backend):
         """
         Provide data as dataset object instead of numpy arrays.
         """
-        for backend in backends:
-            set_backend(backend)
-            backend = get_backend(backend)
-            data = backend.BatchedDataset((self.x_train, self.y_train),
-                                          256)
-            qrnn = QRNN(self.x_train.shape[1],
-                        np.linspace(0.05, 0.95, 10))
-            qrnn.train(data,
-                       maximum_epochs = 1)
-
-
-    def save_qrnn(self):
+        set_backend(backend)
+        backend = get_backend(backend)
+        data = backend.BatchedDataset((self.x_train, self.y_train),
+                                        256)
+        qrnn = QRNN(self.x_train.shape[1],
+                    np.linspace(0.05, 0.95, 10))
+        qrnn.train(data,
+                    maximum_epochs = 1)
+
+
+    @pytest.mark.parametrize("backend", backends)
+    def test_save_qrnn(self, backend):
         """
         Test saving and loading of QRNNs.
         """
+        set_backend(backend)
         qrnn = QRNN(self.x_train.shape[1],
                     np.linspace(0.05, 0.95, 10))
         f = tempfile.NamedTemporaryFile()

From c47f822bbe9bdff0e2f4c1bb9d92b449a84b474b Mon Sep 17 00:00:00 2001
From: Simon Pfreundschuh <simon.pfreundschuh@chalmers.se>
Date: Sun, 27 Sep 2020 21:50:56 +0200
Subject: [PATCH 14/19] Refactoring, documentation and testing.

---
 typhon/retrieval/qrnn/qrnn.py            | 216 ++++++++++++++---------
 typhon/tests/retrieval/qrnn/test_qrnn.py |   2 +
 2 files changed, 131 insertions(+), 87 deletions(-)

diff --git a/typhon/retrieval/qrnn/qrnn.py b/typhon/retrieval/qrnn/qrnn.py
index 3f2ccaf0..ad89437f 100644
--- a/typhon/retrieval/qrnn/qrnn.py
+++ b/typhon/retrieval/qrnn/qrnn.py
@@ -24,20 +24,15 @@
 
 try:
     import typhon.retrieval.qrnn.models.keras as keras
-
     backend = keras
 except Exception as e:
     try:
         import typhon.retrieval.qrnn.models.pytorch as pytorch
-
         backend = pytorch
     except:
-        raise Exception(
-            "Couldn't import neither Keras nor Pytorch "
-            "one of them must be available to use the QRNN"
-            " module."
-        )
-
+        raise Exception("Couldn't import neither Keras nor Pytorch "
+                        "one of them must be available to use the QRNN"
+                        " module.")
 
 def set_backend(name):
     """
@@ -52,24 +47,19 @@ def set_backend(name):
     if name == "keras":
         try:
             import typhon.retrieval.qrnn.models.keras as keras
-
             backend = keras
         except Exception as e:
-            raise Exception(
-                "The following error occurred while trying " " to import keras: ", e
-            )
+            raise Exception("The following error occurred while trying "
+                            " to import keras: ", e)
     elif name == "pytorch":
         try:
             import typhon.retrieval.qrnn.models.pytorch as pytorch
-
             backend = pytorch
         except Exception as e:
-            raise Exception(
-                "The following error occurred while trying " " to import pytorch: ", e
-            )
+            raise Exception("The following error occurred while trying "
+                            " to import pytorch: ", e)
     else:
-        raise Exception('"{}" is not a supported backend.'.format(name))
-
+        raise Exception("\"{}\" is not a supported backend.".format(name))
 
 def get_backend(name):
     """
@@ -83,27 +73,64 @@ def get_backend(name):
     if name == "keras":
         try:
             import typhon.retrieval.qrnn.models.keras as keras
-
             backend = keras
         except Exception as e:
-            raise Exception(
-                "The following error occurred while trying " " to import keras: ", e
-            )
+            raise Exception("The following error occurred while trying "
+                            " to import keras: ", e)
     elif name == "pytorch":
         try:
             import typhon.retrieval.qrnn.models.pytorch as pytorch
-
             backend = pytorch
         except Exception as e:
-            raise Exception(
-                "The following error occurred while trying " " to import pytorch: ", e
-            )
+            raise Exception("The following error occurred while trying "
+                            " to import pytorch: ", e)
     else:
-        raise Exception('"{}" is not a supported backend.'.format(name))
+        raise Exception("\"{}\" is not a supported backend.".format(name))
     return backend
 
+def fit_gaussian_to_quantiles(y_pred, taus):
+    """
+    Fits Gaussian distributions to predicted quantiles.
 
-def create_model(input_dim, output_dim, arch):
+    Fits mean and standard deviation values to quantiles by minimizing
+    the mean squared distance of the predicted quantiles and those of
+    the corresponding Gaussian distribution.
+
+    Args:
+        y_pred (``np.array``): Array of shape `(n, m)` containing the `m`
+            predicted quantiles for n different inputs.
+        taus(``np.array``): Array of shape `(m,)` containing the quantile
+            fractions corresponding to the predictions in ``y_pred``.
+
+    Returns:
+        Tuple ``(mu, sigma)`` of vectors of size `n` containing the mean and
+        standard deviations of the Gaussian distributions corresponding to
+        the predictions in ``y_pred``.
+    """
+    x = norm.ppf(taus)
+
+    d2e_00 = x.size
+    d2e_01 = x.sum(axis=-1)
+    d2e_10 = x.sum(axis=-1)
+    d2e_11 = np.sum(x ** 2, axis=-1)
+
+    d2e_det_inv = 1.0 / (d2e_00 * d2e_11 - d2e_01 * d2e_11)
+    d2e_inv_00 = d2e_det_inv * d2e_11
+    d2e_inv_01 = -d2e_det_inv * d2e_01
+    d2e_inv_10 = -d2e_det_inv * d2e_10
+    d2e_inv_11 = d2e_det_inv * d2e_00
+
+    de_0 = -np.sum(y_pred - x, axis=-1)
+    de_1 = -np.sum(x * (y_pred - x), axis=-1)
+
+    mu = -(d2e_inv_00 * de_0 + d2e_inv_01 * de_1)
+    sigma = 1.0 - (d2e_inv_10 * de_0 + d2e_inv_11 * de_1)
+
+    return mu, sigma
+
+def create_model(input_dim,
+                 output_dim,
+                 arch):
     """
     Creates a fully-connected neural network from a tuple
     describing its architecture.
@@ -127,7 +154,6 @@ def create_model(input_dim, output_dim, arch):
 # QRNN class
 ################################################################################
 
-
 class QRNN:
     r"""
     Quantile Regression Neural Network (QRNN)
@@ -175,15 +201,12 @@ class QRNN:
         model:
             The neural network regression model used to predict the quantiles.
     """
-
-    def __init__(
-        self,
-        input_dimensions,
-        quantiles=None,
-        model=(3, 128, "relu"),
-        ensemble_size=1,
-        **kwargs
-    ):
+    def __init__(self,
+                 input_dimensions,
+                 quantiles=None,
+                 model=(3, 128, "relu"),
+                 ensemble_size=1,
+                 **kwargs):
         """
         Create a QRNN model.
 
@@ -206,43 +229,37 @@ def __init__(
         self.backend = backend.__name__
 
         if type(model) == tuple:
-            self.model = backend.FullyConnected(
-                self.input_dimensions, self.quantiles, model
-            )
+            self.model = backend.FullyConnected(self.input_dimensions,
+                                                self.quantiles,
+                                                model)
             if quantiles is None:
-                raise ValueError(
-                    "If model is given as architecture tuple, the"
-                    " 'quantiles' kwarg must be provided."
-                )
+                raise ValueError("If model is given as architecture tuple, the"
+                                  " 'quantiles' kwarg must be provided.")
         else:
             if not quantiles is None:
                 if not quantiles == model.quantiles:
-                    raise ValueError(
-                        "Provided quantiles do not match those of "
-                        "the provided model."
-                    )
+                    raise ValueError("Provided quantiles do not match those of "
+                                     "the provided model.")
 
             self.model = model
             self.quantiles = model.quantiles
             self.backend = model.backend
 
-    def train(
-        self,
-        training_data,
-        validation_data=None,
-        batch_size=256,
-        sigma_noise=None,
-        adversarial_training=False,
-        delta_at=0.01,
-        initial_learning_rate=1e-2,
-        momentum=0.0,
-        convergence_epochs=5,
-        learning_rate_decay=2.0,
-        learning_rate_minimum=1e-6,
-        maximum_epochs=200,
-        training_split=0.9,
-        gpu=False,
-    ):
+    def train(self,
+              training_data,
+              validation_data=None,
+              batch_size=256,
+              sigma_noise=None,
+              adversarial_training=False,
+              delta_at=0.01,
+              initial_learning_rate=1e-2,
+              momentum=0.0,
+              convergence_epochs=5,
+              learning_rate_decay=2.0,
+              learning_rate_minimum=1e-6,
+              maximum_epochs=200,
+              training_split=0.9,
+              gpu = False):
         """
         Train model on given training data.
 
@@ -282,22 +299,20 @@ def train(
                  is the fraction of training data that is used for validation.
             gpu(``bool``): Whether or not to try to run the training on the GPU.
         """
-        return self.model.train(
-            training_data,
-            validation_data,
-            batch_size,
-            sigma_noise,
-            adversarial_training,
-            delta_at,
-            initial_learning_rate,
-            momentum,
-            convergence_epochs,
-            learning_rate_decay,
-            learning_rate_minimum,
-            maximum_epochs,
-            training_split,
-            gpu,
-        )
+        return self.model.train(training_data,
+                                validation_data,
+                                batch_size,
+                                sigma_noise,
+                                adversarial_training,
+                                delta_at,
+                                initial_learning_rate,
+                                momentum,
+                                convergence_epochs,
+                                learning_rate_decay,
+                                learning_rate_minimum,
+                                maximum_epochs,
+                                training_split,
+                                gpu)
 
     def predict(self, x):
         r"""
@@ -431,6 +446,31 @@ def sample_posterior(self, x, n=1):
             result[i, :] = y
         return result
 
+    def sample_posterior_gaussian_fit(self, x, n=1):
+        r"""
+        Generates :code:`n` samples from the estimated posterior
+        distribution for the input vector :code:`x`. The sampling
+        is performed by the inverse CDF method using the estimated
+        CDF obtained from the :code:`cdf` member function.
+
+        Arguments:
+
+            x(np.array): Array of shape `(n, m)` containing `n` inputs for which
+                         to predict the conditional quantiles.
+
+            n(int): The number of samples to generate.
+
+        Returns:
+
+            Tuple (xs, fs) containing the :math:`x`-values in `xs` and corresponding
+            values of the posterior CDF :math: `F(x)` in `fs`.
+        """
+        result = np.zeros((x.shape[0], n))
+        y_pred = self.predict(x)
+        mu, sigma = fit_gaussian_to_quantiles(y_pred, self.quantiles)
+        x = np.random.normal(size=(y_pred.shape[0], n))
+        return mu + sigma * x
+
     def posterior_mean(self, x):
         r"""
         Computes the posterior mean by computing the first moment of the
@@ -493,7 +533,7 @@ def crps(y_pred, y_test, quantiles):
         qs[0, 0] = 0.0
         qs[0, -1] = 1.0
 
-        return np.trapz((qs - ind) ** 2.0, y_cdf)
+        return np.trapz((qs - ind)**2.0, y_cdf)
 
     def evaluate_crps(self, x_test, y_test):
         r"""
@@ -532,11 +572,12 @@ def classify(self, x, threshold):
 
         for i in range(self.quantiles.size - 1):
             q_l = y[:, [i]]
-            q_r = y[:, [i + 1]]
-            inds = np.logical_and(q_l < threshold, q_r >= threshold)
+            q_r = y[:, [i+1]]
+            inds = np.logical_and(q_l < threshold,
+                                  q_r >= threshold)
             c[inds] = self.quantiles[i] * (threshold - q_l[inds])
             c[inds] += self.quantiles[i + 1] * (q_r[inds] - threshold)
-            c[inds] /= q_r[inds] - q_l[inds]
+            c[inds] /= (q_r[inds] - q_l[inds])
 
         c[threshold > q_r] = self.quantiles[-1]
         return 1.0 - c
@@ -557,7 +598,7 @@ def load(path):
 
             The loaded QRNN object.
         """
-        with open(path, "rb") as f:
+        with open(path, 'rb') as f:
             qrnn = pickle.load(f)
             backend = importlib.import_module(qrnn.backend)
             model = backend.load_model(f, qrnn.quantiles)
@@ -584,6 +625,7 @@ def save(self, path):
         backend.save_model(f, self.model)
         f.close()
 
+
     def __getstate__(self):
         dct = copy.copy(self.__dict__)
         dct.pop("model")
diff --git a/typhon/tests/retrieval/qrnn/test_qrnn.py b/typhon/tests/retrieval/qrnn/test_qrnn.py
index 29a6ae93..95d3023e 100644
--- a/typhon/tests/retrieval/qrnn/test_qrnn.py
+++ b/typhon/tests/retrieval/qrnn/test_qrnn.py
@@ -63,6 +63,8 @@ def test_qrnn(self, backend):
         r = qrnn.sample_posterior(self.x_train[:2, :], n=2)
         assert r.shape == (2, 2)
 
+        r = qrnn.sample_posterior_gaussian_fit(self.x_train[:2, :], n=2)
+
     @pytest.mark.parametrize("backend", backends)
     def test_qrnn_datasets(self, backend):
         """

From 8e95a45c940f28086b7a04dcffe95a0c0f45fea4 Mon Sep 17 00:00:00 2001
From: Simon Pfreundschuh <simon.pfreundschuh@chalmers.se>
Date: Sun, 27 Sep 2020 22:44:13 +0200
Subject: [PATCH 15/19] Added method to sample from fitted Gaussian posterior.

---
 typhon/retrieval/qrnn/qrnn.py            |  2 +-
 typhon/tests/retrieval/qrnn/test_qrnn.py | 35 +++++++++++-------------
 2 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/typhon/retrieval/qrnn/qrnn.py b/typhon/retrieval/qrnn/qrnn.py
index ad89437f..72307cae 100644
--- a/typhon/retrieval/qrnn/qrnn.py
+++ b/typhon/retrieval/qrnn/qrnn.py
@@ -469,7 +469,7 @@ def sample_posterior_gaussian_fit(self, x, n=1):
         y_pred = self.predict(x)
         mu, sigma = fit_gaussian_to_quantiles(y_pred, self.quantiles)
         x = np.random.normal(size=(y_pred.shape[0], n))
-        return mu + sigma * x
+        return mu.reshape(-1, 1) + sigma.reshape(-1, 1) * x
 
     def posterior_mean(self, x):
         r"""
diff --git a/typhon/tests/retrieval/qrnn/test_qrnn.py b/typhon/tests/retrieval/qrnn/test_qrnn.py
index 95d3023e..53b7ea03 100644
--- a/typhon/tests/retrieval/qrnn/test_qrnn.py
+++ b/typhon/tests/retrieval/qrnn/test_qrnn.py
@@ -17,23 +17,26 @@
 backends = []
 try:
     import typhon.retrieval.qrnn.models.keras
+
     backends += ["keras"]
 except:
     pass
 
 try:
     import typhon.retrieval.qrnn.models.pytorch
+
     backends += ["pytorch"]
 except:
     pass
 
+
 class TestQrnn:
     def setup_method(self):
         dir = os.path.dirname(os.path.realpath(__file__))
         path = os.path.join(dir, "test_data")
         x_train = np.load(os.path.join(path, "x_train.npy"))
-        x_mean = np.mean(x_train, keepdims = True)
-        x_sigma = np.std(x_train, keepdims = True)
+        x_mean = np.mean(x_train, keepdims=True)
+        x_sigma = np.std(x_train, keepdims=True)
         self.x_train = (x_train - x_mean) / x_sigma
         self.y_train = np.load(os.path.join(path, "y_train.npy"))
 
@@ -43,10 +46,8 @@ def test_qrnn(self, backend):
         Test training of QRNNs using numpy arrays as input.
         """
         set_backend(backend)
-        qrnn = QRNN(self.x_train.shape[1],
-                    np.linspace(0.05, 0.95, 10))
-        qrnn.train((self.x_train, self.y_train),
-                    maximum_epochs = 1)
+        qrnn = QRNN(self.x_train.shape[1], np.linspace(0.05, 0.95, 10))
+        qrnn.train((self.x_train, self.y_train), maximum_epochs=1)
 
         qrnn.predict(self.x_train)
 
@@ -60,10 +61,11 @@ def test_qrnn(self, backend):
         mu = qrnn.posterior_mean(self.x_train[:2, :])
         assert len(mu.shape) == 1
 
-        r = qrnn.sample_posterior(self.x_train[:2, :], n=2)
-        assert r.shape == (2, 2)
+        r = qrnn.sample_posterior(self.x_train[:4, :], n=2)
+        assert r.shape == (4, 2)
 
-        r = qrnn.sample_posterior_gaussian_fit(self.x_train[:2, :], n=2)
+        r = qrnn.sample_posterior_gaussian_fit(self.x_train[:4, :], n=2)
+        assert r.shape == (4, 2)
 
     @pytest.mark.parametrize("backend", backends)
     def test_qrnn_datasets(self, backend):
@@ -72,13 +74,9 @@ def test_qrnn_datasets(self, backend):
         """
         set_backend(backend)
         backend = get_backend(backend)
-        data = backend.BatchedDataset((self.x_train, self.y_train),
-                                        256)
-        qrnn = QRNN(self.x_train.shape[1],
-                    np.linspace(0.05, 0.95, 10))
-        qrnn.train(data,
-                    maximum_epochs = 1)
-
+        data = backend.BatchedDataset((self.x_train, self.y_train), 256)
+        qrnn = QRNN(self.x_train.shape[1], np.linspace(0.05, 0.95, 10))
+        qrnn.train(data, maximum_epochs=1)
 
     @pytest.mark.parametrize("backend", backends)
     def test_save_qrnn(self, backend):
@@ -86,8 +84,7 @@ def test_save_qrnn(self, backend):
         Test saving and loading of QRNNs.
         """
         set_backend(backend)
-        qrnn = QRNN(self.x_train.shape[1],
-                    np.linspace(0.05, 0.95, 10))
+        qrnn = QRNN(self.x_train.shape[1], np.linspace(0.05, 0.95, 10))
         f = tempfile.NamedTemporaryFile()
         qrnn.save(f.name)
         qrnn_loaded = QRNN.load(f.name)
@@ -98,4 +95,4 @@ def test_save_qrnn(self, backend):
         if not type(x_pred) == np.ndarray:
             x_pred = x_pred.detach()
 
-        assert(np.allclose(x_pred, x_pred_loaded))
+        assert np.allclose(x_pred, x_pred_loaded)

From 0f471bf39175d545dbf27abaa3d91f6e3d25b949 Mon Sep 17 00:00:00 2001
From: Simon Pfreundschuh <simon.pfreundschuh@chalmers.se>
Date: Mon, 28 Sep 2020 07:57:35 +0200
Subject: [PATCH 16/19] Refactoring of model submodule.

---
 typhon/retrieval/qrnn/models/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 typhon/retrieval/qrnn/models/__init__.py

diff --git a/typhon/retrieval/qrnn/models/__init__.py b/typhon/retrieval/qrnn/models/__init__.py
new file mode 100644
index 00000000..e69de29b

From 98d23095e1bc85c697974a45f61dddf5d12fe5cb Mon Sep 17 00:00:00 2001
From: Simon Pfreundschuh <simon.pfreundschuh@chalmers.se>
Date: Mon, 28 Sep 2020 07:58:47 +0200
Subject: [PATCH 17/19] Refactored models submodule.

---
 typhon/retrieval/qrnn/models/keras.py         | 16 ++---
 .../qrnn/models/pytorch/fully_connected.py    | 14 ++--
 typhon/retrieval/qrnn/models/pytorch/unet.py  | 68 ++++++++++++++-----
 3 files changed, 69 insertions(+), 29 deletions(-)

diff --git a/typhon/retrieval/qrnn/models/keras.py b/typhon/retrieval/qrnn/models/keras.py
index f1b31970..e049809a 100644
--- a/typhon/retrieval/qrnn/models/keras.py
+++ b/typhon/retrieval/qrnn/models/keras.py
@@ -5,13 +5,13 @@
 This module provides Keras neural network models that can be used as backend
 models with the :py:class:`typhon.retrieval.qrnn.QRNN` class.
 """
+import logging
 import numpy as np
 import keras
 from keras.models import Sequential
 from keras.layers import Dense, Activation, deserialize
 from keras.optimizers import SGD
 import keras.backend as K
-import logging
 
 
 def save_model(f, model):
@@ -60,7 +60,7 @@ def make_fully_connected(layers=None, **kwargs):
 # Quantile loss
 ################################################################################
 
-logger = logging.getLogger(__name__)
+LOGGER = logging.getLogger(__name__)
 
 
 def skewed_absolute_error(y_true, y_pred, tau):
@@ -145,7 +145,7 @@ def __init__(self, training_data, batch_size):
         self.i = 0
 
     def __iter__(self):
-        logger.info("iter...")
+        LOGGER.info("iter...")
         return self
 
     def __len__(self):
@@ -188,7 +188,7 @@ def __init__(self, training_data, sigma_noise=None):
         self.sigma_noise = sigma_noise
 
     def __iter__(self):
-        logger.info("iter...")
+        LOGGER.info("iter...")
         return self
 
     def __len__(self):
@@ -229,7 +229,7 @@ def __init__(self, training_data, input_gradients, eps):
         self.eps = eps
 
     def __iter__(self):
-        logger.info("iter...")
+        LOGGER.info("iter...")
         return self
 
     def __len__(self):
@@ -327,7 +327,7 @@ def on_epoch_end(self, epoch, logs={}):
             lr = keras.backend.get_value(self.model.optimizer.lr)
             keras.backend.set_value(self.model.optimizer.lr, lr / self.lr_decay)
             self.steps = 0
-            logger.info("\n Reduced learning rate to " + str(lr))
+            LOGGER.info("\n Reduced learning rate to " + str(lr))
 
             if lr < self.lr_minimum:
                 self.model.stop_training = True
@@ -487,9 +487,9 @@ def __init__(self, input_dimension, quantiles, arch, layers=None):
             else:
                 d, w, a = arch
                 layers = [Dense(w, input_shape=(input_dimension,))]
-                for i in range(d - 1):
+                for _ in range(d - 1):
                     layers.append(Dense(w, input_shape=(w,)))
-                    if not a is None:
+                    if a is not None:
                         layers.append(Activation(a))
                 layers.append(Dense(output_dimension, input_shape=(w,)))
 
diff --git a/typhon/retrieval/qrnn/models/pytorch/fully_connected.py b/typhon/retrieval/qrnn/models/pytorch/fully_connected.py
index 47412ba9..f84983ba 100644
--- a/typhon/retrieval/qrnn/models/pytorch/fully_connected.py
+++ b/typhon/retrieval/qrnn/models/pytorch/fully_connected.py
@@ -1,5 +1,11 @@
+"""
+typhon.retrieval.qrnn.models.pytorch.fully_connected
+====================================================
+
+This module provides an implementation of a fully-connected feed forward
+neural network in pytorch.
+"""
 from torch import nn
-from torch import optim
 from typhon.retrieval.qrnn.models.pytorch.common import PytorchModel, activations
 
 ################################################################################
@@ -34,12 +40,12 @@ def __init__(self, input_dimension, quantiles, arch):
             layers = [nn.Linear(input_dimension, output_dimension)]
         else:
             d, w, act = arch
-            if type(act) == str:
+            if isinstance(act, str):
                 act = activations[act]
             layers = [nn.Linear(input_dimension, w)]
-            for i in range(d - 1):
+            for _ in range(d - 1):
                 layers.append(nn.Linear(w, w))
-                if not act is None:
+                if act is not None:
                     layers.append(act())
             layers.append(nn.Linear(w, output_dimension))
         nn.Sequential.__init__(self, *layers)
diff --git a/typhon/retrieval/qrnn/models/pytorch/unet.py b/typhon/retrieval/qrnn/models/pytorch/unet.py
index 0a9d9403..85b2d8b6 100644
--- a/typhon/retrieval/qrnn/models/pytorch/unet.py
+++ b/typhon/retrieval/qrnn/models/pytorch/unet.py
@@ -1,3 +1,14 @@
+"""
+typhon.retrieval.qrnn.models.pytorch.unet
+=========================================
+
+This module provides an implementation of the UNet [unet]_
+architecture.
+
+.. [unet] O. Ronneberger, P. Fischer and T. Brox, "U-net: Convolutional networks
+for biomedical image segmentation", Proc. Int. Conf. Med. Image Comput.
+Comput.-Assist. Intervent. (MICCAI), pp. 234-241, 2015.
+"""
 import torch
 from torch import nn
 
@@ -26,7 +37,6 @@ def __init__(
         self,
         features_in,
         features_out,
-        batch_norm=True,
         kernel_size=3,
         activation=nn.ReLU,
         skip_connection=False,
@@ -35,7 +45,7 @@ def __init__(
         self._features_out = features_out
         self.skip_connection = skip_connection
 
-        if not activation is None:
+        if activation is not None:
             modules = [
                 nn.ConstantPad2d(1, 0.0),
                 nn.Conv2d(features_in, features_out, kernel_size),
@@ -52,12 +62,15 @@ def __init__(
 
     @property
     def features_out(self):
+        """
+        The number outgoing channels of the layer.
+        """
         if self.skip_connection:
             return self._features_in + self._features_out
-        else:
-            return self._features_out
+        return self._features_out
 
     def forward(self, x):
+        """ Forward input through layer. """
         y = nn.Sequential.forward(self, x)
         if self.skip_connection:
             y = torch.cat([x, y], dim=1)
@@ -67,16 +80,6 @@ def forward(self, x):
 class Block(nn.Sequential):
     """
     A block bundles a set of layers.
-
-    Args:
-        features_in(:code:`int`): The number of input features of the block
-        features_out(:code:`int`): The number of output features of the block.
-        depth(:code:`int`): The number of layers of the block
-        activation(:code:`nn.Module`): Pytorch activation layer to
-          use. :code:`nn.ReLU` by default.
-        skip_connection(:code:`str`): Whether or not to insert skip
-          connections before all layers (:code:`"all"`) or just at
-          the end (:code:`"end"`).
     """
 
     def __init__(
@@ -89,7 +92,17 @@ def __init__(
         kernel_size=3,
         skip_connection=None,
     ):
-
+        """
+        Args:
+            features_in(:code:`int`): The number of input features of the block
+            features_out(:code:`int`): The number of output features of the block.
+            depth(:code:`int`): The number of layers of the block
+            activation(:code:`nn.Module`): Pytorch activation layer to
+                use. :code:`nn.ReLU` by default.
+            skip_connection(:code:`str`): Whether or not to insert skip
+                connections before all layers (:code:`"all"`) or just at
+                the end (:code:`"end"`).
+        """
         self._features_in = features_in
 
         if skip_connection == "all":
@@ -122,12 +135,16 @@ def __init__(
 
     @property
     def features_out(self):
+        """
+        The number outgoing channels of the layer.
+        """
         if self.skip_connection:
             return self._features_in + self._features_out
         else:
             return self._features_out
 
     def forward(self, x):
+        """ Forward input through layer. """
         y = nn.Sequential.forward(self, x)
         if self.skip_connection:
             y = torch.cat([x, y], dim=1)
@@ -135,12 +152,18 @@ def forward(self, x):
 
 
 class DownSampler(nn.Sequential):
+    """
+    A downsampling block reduces the input resolution by applying max-pooling.
+    """
     def __init__(self):
         modules = [nn.MaxPool2d(2)]
         super().__init__(*modules)
 
 
 class UpSampler(nn.Sequential):
+    """
+    An upsampling block increases the input resolution by transposed convolution.
+    """
     def __init__(self, features_in, features_out):
         modules = [
             nn.ConvTranspose2d(
@@ -151,10 +174,21 @@ def __init__(self, features_in, features_out):
 
 
 class UNet(PytorchModel, nn.Module):
+    """
+    Pytorch implementation of the UNet architecture for image segmentation.
+    """
     def __init__(
         self, input_features, quantiles, n_features=32, n_levels=4, skip_connection=None
     ):
-
+        """
+        Args:
+            input_features(``int``): The number of channels of the input image.
+            quantiles(``np.array``): Array containing the quantiles to predict.
+            n_features: The number of channels of the first convolution block.
+            n_level: The number of down-sampling steps.
+            skip_connection: Whether or not to include skip connections in
+                each block.
+        """
         nn.Module.__init__(self)
         PytorchModel.__init__(self, input_features, quantiles)
 
@@ -197,7 +231,7 @@ def __init__(
         )
 
     def forward(self, x):
-
+        """ Propagate input through layer. """
         features = []
         for (b, s) in zip(self.down_blocks, self.down_samplers):
             x = b(x)

From 67e682daafcbe7b61c4bc797ce811ec4b7aabfed Mon Sep 17 00:00:00 2001
From: Simon Pfreundschuh <simon.pfreundschuh@chalmers.se>
Date: Mon, 28 Sep 2020 08:27:56 +0200
Subject: [PATCH 18/19] Reverted setup.py.

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 525618ed..ff29fdb2 100644
--- a/setup.py
+++ b/setup.py
@@ -86,7 +86,7 @@
         "numexpr",
         "numpy>=1.13",
         "pandas",
-        "scikit-image>=0.15",
+        "scikit-image",
         "scikit-learn",
         "scipy>=0.15.1",
         "setuptools>=0.7.2",

From 5404c468e77c4fc01b74d7faa05c75c046f8071c Mon Sep 17 00:00:00 2001
From: Simon Pfreundschuh <simon.pfreundschuh@chalmers.se>
Date: Wed, 30 Sep 2020 11:19:33 +0200
Subject: [PATCH 19/19] Fixed typos.

---
 doc/typhon.retrieval.qrnn.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/typhon.retrieval.qrnn.rst b/doc/typhon.retrieval.qrnn.rst
index 1fa24dcd..917f4305 100644
--- a/doc/typhon.retrieval.qrnn.rst
+++ b/doc/typhon.retrieval.qrnn.rst
@@ -30,8 +30,8 @@ Backends
 --------
 
 Currently both `keras <https://keras.io/>`_ and `pytorch <https://pytorch.org/>`_
-are supported as backends for neural network. The QRNN implementation will
-automatically use the one the is available on your system. If both are available
+are supported as backends for neural networks. The QRNN implementation will
+automatically use the one that is available on your system. If both are available
 you can choose a specific backend using the :py:meth:`~typhon.retrieval.qrnn.set_backend` function.
 
 Neural network models
@@ -64,8 +64,8 @@ API documentation
    FullyConnected
    UNet
 
-.. automodule:: typhon.retrieval.qrnn.models.pytorch
-.. currentmodule:: typhon.retrieval.qrnn.models.pytorch
+.. automodule:: typhon.retrieval.qrnn.models.keras
+.. currentmodule:: typhon.retrieval.qrnn.models.keras
 .. autosummary::
    :toctree: generated