From 35611fe31419ea31530cc6c6e33ae0bc549e9373 Mon Sep 17 00:00:00 2001 From: Simon Pfreundschuh Date: Wed, 4 Mar 2020 10:05:08 +0100 Subject: [PATCH 01/19] Reworking qrnn. --- typhon/retrieval/__init__.py | 2 +- typhon/retrieval/qrnn/backends/__init__.py | 0 typhon/retrieval/qrnn/backends/keras.py | 801 +++++++++++++++++++++ typhon/retrieval/qrnn/backends/pytorch.py | 345 +++++++++ typhon/retrieval/qrnn/qrnn.py | 143 +--- 5 files changed, 1178 insertions(+), 113 deletions(-) create mode 100644 typhon/retrieval/qrnn/backends/__init__.py create mode 100644 typhon/retrieval/qrnn/backends/keras.py create mode 100644 typhon/retrieval/qrnn/backends/pytorch.py diff --git a/typhon/retrieval/__init__.py b/typhon/retrieval/__init__.py index 09fdccb5..ff43f188 100644 --- a/typhon/retrieval/__init__.py +++ b/typhon/retrieval/__init__.py @@ -5,4 +5,4 @@ from .common import * # noqa from .oem import * # noqa -from .spareice import * # noqa +#from .spareice import * # noqa diff --git a/typhon/retrieval/qrnn/backends/__init__.py b/typhon/retrieval/qrnn/backends/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/typhon/retrieval/qrnn/backends/keras.py b/typhon/retrieval/qrnn/backends/keras.py new file mode 100644 index 00000000..df5fec2f --- /dev/null +++ b/typhon/retrieval/qrnn/backends/keras.py @@ -0,0 +1,801 @@ +""" +Keras backend for QRNNs +======================= + +This module implements the Keras backend for QRNNs. +""" +import copy +import logging +import os +import pickle + +import numpy as np +from scipy.interpolate import CubicSpline + +# Keras Imports +try: + import keras + from keras.models import Sequential, clone_model + from keras.layers import Dense, Activation, Dropout + from keras.optimizers import SGD +except ImportError: + raise ImportError( + "Could not import the required Keras modules. The QRNN " + "implementation was developed for use with Keras version 2.0.9.") + +################################################################################ +# Loss Functions +################################################################################ + +import keras.backend as K + +logger = logging.getLogger(__name__) + +def skewed_absolute_error(y_true, y_pred, tau): + """ + The quantile loss function for a given quantile tau: + + L(y_true, y_pred) = (tau - I(y_pred < y_true)) * (y_pred - y_true) + + Where I is the indicator function. + """ + dy = y_pred - y_true + return K.mean((1.0 - tau) * K.relu(dy) + tau * K.relu(-dy), axis=-1) + + +def quantile_loss(y_true, y_pred, taus): + """ + The quantiles loss for a list of quantiles. Sums up the error contribution + from the each of the quantile loss functions. + """ + e = skewed_absolute_error( + K.flatten(y_true), K.flatten(y_pred[:, 0]), taus[0]) + for i, tau in enumerate(taus[1:]): + e += skewed_absolute_error(K.flatten(y_true), + K.flatten(y_pred[:, i + 1]), + tau) + return e + + +class QuantileLoss: + """ + Wrapper class for the quantile error loss function. A class is used here + to allow the implementation of a custom `__repr` function, so that the + loss function object can be easily loaded using `keras.model.load`. + + Attributes: + + quantiles: List of quantiles that should be estimated with + this loss function. + + """ + + def __init__(self, quantiles): + self.__name__ = "QuantileLoss" + self.quantiles = quantiles + + def __call__(self, y_true, y_pred): + return quantile_loss(y_true, y_pred, self.quantiles) + + def __repr__(self): + return "QuantileLoss(" + repr(self.quantiles) + ")" + +################################################################################ +# Keras Interface Classes +################################################################################ + +class TrainingGenerator: + """ + This Keras sample generator takes the noise-free training data + and adds independent Gaussian noise to each of the components + of the input. + + Attributes: + + x_train: The training input, i.e. the brightness temperatures + measured by the satellite. + y_train: The training output, i.e. the value of the retrieval + quantity. + x_mean: A vector containing the mean of each input component. + x_sigma: A vector containing the standard deviation of each + component. + batch_size: The size of a training batch. + """ + + def __init__(self, x_train, x_mean, x_sigma, y_train, sigma_noise, batch_size): + self.bs = batch_size + + self.x_train = x_train + self.x_mean = x_mean + self.x_sigma = x_sigma + self.y_train = y_train + self.sigma_noise = sigma_noise + + self.indices = np.random.permutation(x_train.shape[0]) + self.i = 0 + + def __iter__(self): + logger.info("iter...") + return self + + def __next__(self): + inds = self.indices[np.arange(self.i * self.bs, + (self.i + 1) * self.bs) + % self.indices.size] + x_batch = np.copy(self.x_train[inds, :]) + if not self.sigma_noise is None: + x_batch += np.random.randn(*x_batch.shape) * self.sigma_noise + x_batch = (x_batch - self.x_mean) / self.x_sigma + y_batch = self.y_train[inds] + + self.i = self.i + 1 + + # Shuffle training set after each epoch. + if self.i % (self.x_train.shape[0] // self.bs) == 0: + self.indices = np.random.permutation(self.x_train.shape[0]) + + return (x_batch, y_batch) + +class AdversarialTrainingGenerator: + """ + This Keras sample generator takes the noise-free training data + and adds independent Gaussian noise to each of the components + of the input. + + Attributes: + + x_train: The training input, i.e. the brightness temperatures + measured by the satellite. + y_train: The training output, i.e. the value of the retrieval + quantity. + x_mean: A vector containing the mean of each input component. + x_sigma: A vector containing the standard deviation of each + component. + batch_size: The size of a training batch. + """ + + def __init__(self, + x_train, + x_mean, + x_sigma, + y_train, + sigma_noise, + batch_size, + input_gradients, + eps): + self.bs = batch_size + + self.x_train = x_train + self.x_mean = x_mean + self.x_sigma = x_sigma + self.y_train = y_train + self.sigma_noise = sigma_noise + + self.indices = np.random.permutation(x_train.shape[0]) + self.i = 0 + + # compile gradient function + bs2 = self.bs // 2 + + self.input_gradients = input_gradients + self.eps = eps + + def __iter__(self): + logger.info("iter...") + return self + + def __next__(self): + + if self.i == 0: + inds = np.random.randint(0, self.x_train.shape[0], self.bs) + + x_batch = np.copy(self.x_train[inds, :]) + if (self.sigma_noise): + x_batch += np.random.randn(*x_batch.shape) * self.sigma_noise + + x_batch = (x_batch - self.x_mean) / self.x_sigma + y_batch = self.y_train[inds] + + else: + + bs2 = self.bs // 2 + inds = np.random.randint(0, self.x_train.shape[0], bs2) + + x_batch = np.zeros((self.bs, self.x_train.shape[1])) + y_batch = np.zeros((self.bs, 1)) + + x_batch[:bs2, :] = np.copy(self.x_train[inds, :]) + if (self.sigma_noise): + x_batch[:bs2, :] += np.random.randn(bs2, self.x_train.shape[1]) \ + * self.sigma_noise + x_batch[:bs2, :] = (x_batch[:bs2, :] - self.x_mean) / self.x_sigma + y_batch[:bs2, :] = self.y_train[inds].reshape(-1, 1) + x_batch[bs2:, :] = x_batch[:bs2, :] + y_batch[bs2:, :] = y_batch[:bs2, :] + + if (self.i > 10): + grads = self.input_gradients( + [x_batch[:bs2, :], y_batch[:bs2, :], [1.0]])[0] + x_batch[bs2:, :] += self.eps * np.sign(grads) + + self.i = self.i + 1 + return (x_batch, y_batch) + + +# TODO: Make y-noise argument optional +class ValidationGenerator: + """ + This Keras sample generator is similar to the training generator + only that it returns the whole validation set and doesn't perform + any randomization. + + Attributes: + + x_val: The validation input, i.e. the brightness temperatures + measured by the satellite. + y_val: The validation output, i.e. the value of the retrieval + quantity. + x_mean: A vector containing the mean of each input component. + x_sigma: A vector containing the standard deviation of each + component. + """ + + def __init__(self, x_val, x_mean, x_sigma, y_val, sigma_noise): + self.x_val = x_val + self.x_mean = x_mean + self.x_sigma = x_sigma + + self.y_val = y_val + + self.sigma_noise = sigma_noise + + def __iter__(self): + return self + + def __next__(self): + x_val = np.copy(self.x_val) + if not self.sigma_noise is None: + x_val += np.random.randn(*self.x_val.shape) * self.sigma_noise + x_val = (x_val - self.x_mean) / self.x_sigma + return (x_val, self.y_val) + + +class LRDecay(keras.callbacks.Callback): + """ + The LRDecay class implements the Keras callback interface and reduces + the learning rate according to validation loss reduction. + + Attributes: + + lr_decay: The factor c > 1.0 by which the learning rate is + reduced. + lr_minimum: The training is stopped when this learning rate + is reached. + convergence_steps: The number of epochs without validation loss + reduction required to reduce the learning rate. + + """ + + def __init__(self, model, lr_decay, lr_minimum, convergence_steps): + self.model = model + self.lr_decay = lr_decay + self.lr_minimum = lr_minimum + self.convergence_steps = convergence_steps + self.steps = 0 + + def on_train_begin(self, logs={}): + self.losses = [] + self.steps = 0 + self.min_loss = 1e30 + + def on_epoch_end(self, epoch, logs={}): + self.losses += [logs.get('val_loss')] + if not self.losses[-1] < self.min_loss: + self.steps = self.steps + 1 + else: + self.steps = 0 + if self.steps > self.convergence_steps: + lr = keras.backend.get_value(self.model.optimizer.lr) + keras.backend.set_value( + self.model.optimizer.lr, lr / self.lr_decay) + self.steps = 0 + logger.info("\n Reduced learning rate to " + str(lr)) + + if lr < self.lr_minimum: + self.model.stop_training = True + + self.min_loss = min(self.min_loss, self.losses[-1]) + +################################################################################ +# QRNN +################################################################################ + +class KerasQRNN: + r""" + Quantile Regression Neural Network (QRNN) + + This class implements quantile regression neural networks and can be used + to estimate quantiles of the posterior distribution of remote sensing + retrievals. + + Internally, the QRNN uses a feed-forward neural network that is trained + to minimize the quantile loss function + + .. math:: + \mathcal{L}_\tau(y_\tau, y_{true}) = + \begin{cases} (1 - \tau)|y_\tau - y_{true}| & \text{ if } y_\tau < y_\text{true} \\ + \tau |y_\tau - y_\text{true}| & \text{ otherwise, }\end{cases} + + where :math:`x_\text{true}` is the expected value of the retrieval quantity + and and :math:`x_\tau` is the predicted quantile. The neural network + has one output neuron for each quantile to estimate. + + For the training, this implementation provides custom data generators that + can be used to add Gaussian noise to the training data as well as adversarial + training using the fast gradient sign method. + + This implementation also provides functionality to use an ensemble of networks + instead of just a single network to predict the quantiles. + + .. note:: For the QRNN I am using :math:`x` to denote the input vector and + :math:`y` to denote the output. While this is opposed to typical + inverse problem notation, it is inline with machine learning + notation and felt more natural for the implementation. If this + annoys you, I am sorry. But the other way round it would have + annoyed other people and in particular me. + + Attributes: + + input_dim (int): + The input dimension of the neural network, i.e. the dimension of the + measurement vector. + + quantiles (numpy.array): + The 1D-array containing the quantiles :math:`\tau \in [0, 1]` that the + network learns to predict. + + depth (int): + The number layers in the network excluding the input layer. + + width (int): + The width of the hidden layers in the network. + + activation (str): + The name of the activation functions to use in the hidden layers + of the network. + + models (list of keras.models.Sequential): + The ensemble of Keras neural networks used for the quantile regression + neural network. + """ + + def __init__(self, + input_dim, + quantiles, + model=(3, 128, "relu"), + ensemble_size=1, + **kwargs): + """ + Create a QRNN model. + + Arguments: + + input_dim(int): The dimension of the measurement space, i.e. the number + of elements in a single measurement vector y + + quantiles(np.array): 1D-array containing the quantiles to estimate of + the posterior distribution. Given as fractions + within the range [0, 1]. + + depth(int): The number of hidden layers in the neural network to + use for the regression. Default is 3, i.e. three hidden + plus input and output layer. + + width(int): The number of neurons in each hidden layer. + + activation(str): The name of the activation functions to use. Default + is "relu", for rectified linear unit. See + `this `_ link for + available functions. + + **kwargs: Additional keyword arguments are passed to the constructor + call `keras.layers.Dense` of the hidden layers, which can + for example be used to add regularization. For more info consult + `Keras documentation. `_ + """ + self.input_dim = input_dim + self.quantiles = np.array(quantiles) + + if type(model) is tuple: + depth = model[0] + width = model[1] + activation = model[2] + + model = Sequential() + if depth == 0: + model.add(Dense(input_dim=input_dim, + units=len(quantiles), + activation=None)) + else: + model.add(Dense(input_dim=input_dim, + units=width, + activation=activation)) + for i in range(depth - 2): + model.add(Dense(units=width, + activation=activation, + **kwargs)) + model.add(Dense(units=len(quantiles), activation=None)) + elif type(model) is Model: + pass + else: + raise Exception("The provided model is neither a suitable " + "architecture tuple nor a keras model.") + + self.models = [clone_model(model) for i in range(ensemble_size)] + + + + def __fit_params__(self, kwargs): + at = kwargs.pop("adversarial_training", False) + dat = kwargs.pop("delta_at", 0.01) + batch_size = kwargs.pop("batch_size", 512) + convergence_epochs = kwargs.pop("convergence_epochs", 10) + initial_learning_rate = kwargs.pop('initial_learning_rate', 0.01) + learning_rate_decay = kwargs.pop('learning_rate_decay', 2.0) + learning_rate_minimum = kwargs.pop('learning_rate_minimum', 1e-6) + maximum_epochs = kwargs.pop("maximum_epochs", 200) + training_split = kwargs.pop("training_split", 0.9) + return at, dat, batch_size, convergence_epochs, initial_learning_rate, \ + learning_rate_decay, learning_rate_minimum, maximum_epochs, \ + training_split, kwargs + + def cross_validation(self, + x_train, + y_train, + sigma_noise = None, + n_folds=5, + s=None, + **kwargs): + r""" + Perform n-fold cross validation. + + This function trains the network n times on different subsets of the + provided training data, always keeping a fraction of 1/n samples apart + for testing. Performance for each of the networks is evaluated and mean + and standard deviation for all folds are returned. This is to reduce + the influence of random fluctuations of the network performance during + hyperparameter tuning. + + Arguments: + + x_train(numpy.array): Array of shape :code:`(m, n)` containing the + m n-dimensional training inputs. + + y_train(numpy.array): Array of shape :code:`(m, 1)` containing the + m training outputs. + + sigma_noise(None, float, np.array): If not `None` this value is used + to multiply the Gaussian noise + that is added to each training + batch. If None no noise is + added. + + n_folds(int): Number of folds to perform for the cross correlation. + + s(callable, None): Performance metric for the fold. If not None, + this should be a function object taking as + arguments :code:`(y_test, y_pred)`, i.e. the + expected output for the given fold :code:`y_test` + and the predicted output :code:`y_pred`. The + returned value is taken as performance metric. + + **kwargs: Additional keyword arguments are passed on to the :code:`fit` + method that is called for each fold. + """ + + n = x_train.shape[0] + n_test = n // n_folds + inds = np.random.permutation(np.arange(0, n)) + + results = [] + + # Nomenclature is a bit difficult here: + # Each cross validation fold has its own training, + # vaildation and test set. The size of the test set + # is number of provided training samples divided by the + # number of fold. The rest is used a traning and internal + # validation set according to the chose training_split + # ratio. + + + for i in range(n_folds): + for m in self.models: + m.reset_states() + + # Indices to use for training including training data and internal + # validation set to monitor convergence. + inds_train = np.append(np.arange(0, i * n_test), + np.arange(min((i + 1) * n_test, n), n)) + inds_train = inds[inds_train] + # Indices used to evaluate performance of the model. + inds_test = np.arange(i * n_test, (i + 1) * n_test) + inds_test = inds[inds_test] + + x_test_fold = x_train[inds_test, :] + y_test_fold = y_train[inds_test] + + # Splitting training and validation set. + x_train_fold = x_train[inds_train, :] + y_train_fold = y_train[inds_train] + + self.fit(x_train_fold, y_train_fold, + sigma_noise, **kwargs) + + # Evaluation on this folds test set. + if s: + y_pred = self.predict(x_test_fold) + results += [s(y_pred, y_test_fold)] + else: + loss = self.models[0].evaluate( + (x_test_fold - self.x_mean) / self.x_sigma, + y_test_fold) + logger.info(loss) + results += [loss] + logger.info(results) + results = np.array(results) + logger.info(results) + return (np.mean(results, axis=0), np.std(results, axis=0)) + + def fit(self, + x_train, + y_train, + sigma_noise=None, + x_val=None, + y_val=None, + **kwargs): + r""" + Train the QRNN on given training data. + + The training uses an internal validation set to monitor training + progress. This can be either split at random from the training + data (see `training_fraction` argument) or provided explicitly + using the `x_val` and `y_val` parameters + + Training of the QRNN is performed using stochastic gradient descent + (SGD). The learning rate is adaptively reduced during training when + the loss on the internal validation set has not been reduced for a + certain number of epochs. + + Two data augmentation techniques can be used to improve the + calibration of the QRNNs predictions. The first one adds Gaussian + noise to training batches before they are passed to the network. + The noise statistics are defined using the `sigma_noise` argument. + The second one is adversarial training. If adversarial training is + used, half of each training batch consists of adversarial samples + generated using the fast gradient sign method. The strength of the + perturbation is controlled using the `delta_at` parameter. + + During one epoch, each training sample is passed once to the network. + Their order is randomzied between epochs. + + Arguments: + + x_train(np.array): Array of shape `(n, m)` containing n training + samples of dimension m. + + y_train(np.array): Array of shape `(n, )` containing the training + output corresponding to the training data in + `x_train`. + + sigma_noise(None, float, np.array): If not `None` this value is used + to multiply the Gaussian noise + that is added to each training + batch. If None no noise is + added. + x_val(np.array): Array of shape :code:`(n', m)` containing n' validation + inputs that will be used to monitor training loss. Must + be provided in unison with :code:`y_val` or otherwise + will be ignored. + + y_val(np.array): Array of shape :code:`(n')` containing n' validation + outputs corresponding to the inputs in :code:`x_val`. + Must be provided in unison with :code:`x_val` or + otherwise will be ignored. + + adversarial_training(Bool): Whether or not to use adversarial training. + `False` by default. + + delta_at(flaot): Perturbation factor for the fast gradient sign method + determining the strength of the adversarial training + perturbation. `0.01` by default. + + batch_size(float): The batch size to use during training. Defaults to `512`. + + convergence_epochs(int): The number of epochs without decrease in + validation loss before the learning rate + is reduced. Defaults to `10`. + + initial_learning_rate(float): The inital value for the learning + rate. + + learning_rate_decay(float): The factor by which to reduce the + learning rate after no improvement + on the internal validation set was + observed for `convergence_epochs` + epochs. Defaults to `2.0`. + + learning_rate_minimum(float): The minimum learning rate at which + the training is terminated. Defaults + to `1e-6`. + + maximum_epochs(int): The maximum number of epochs to perform if + training does not terminate before. + + training_split(float): The ratio `0 < ts < 1.0` of the samples in + to be used as internal validation set. Defaults + to 0.9. + + """ + if not (x_train.shape[1] == self.input_dim): + raise Exception("Training input must have the same extent along" / + "dimension 1 as input_dim (" + str(self.input_dim) / + + ")") + + if not (y_train.shape[1] == 1): + raise Exception("Currently only scalar retrieval targets are" / + " supported.") + + x_mean = np.mean(x_train, axis=0, keepdims=True) + x_sigma = np.std(x_train, axis=0, keepdims=True) + self.x_mean = x_mean + self.x_sigma = x_sigma + + # Handle parameters + # at: adversarial training + # bs: batch size + # ce: convergence epochs + # ilr: initial learning rate + # lrd: learning rate decay + # lrm: learning rate minimum + # me: maximum number of epochs + # ts: split ratio of training set + at, dat, bs, ce, ilr, lrd, lrm, me, ts, kwargs = self.__fit_params__( + kwargs) + + # Split training and validation set if x_val or y_val + # are not provided. + n = x_train.shape[0] + n_train = n + if x_val is None and y_val is None: + n_train = round(ts * n) + n_val = n - n_train + inds = np.random.permutation(n) + x_val = x_train[inds[n_train:], :] + y_val = y_train[inds[n_train:]] + x_train = x_train[inds[:n_train], :] + y_train = y_train[inds[:n_train]] + loss = QuantileLoss(self.quantiles) + + self.custom_objects = {loss.__name__: loss} + optimizer = SGD(lr=ilr) + for model in self.models: + model.compile(loss=loss, optimizer=optimizer) + + if at: + inputs = [model.input, model.targets[0], + model.sample_weights[0]] + input_gradients = K.function( + inputs, K.gradients(model.total_loss, model.input)) + training_generator = AdversarialTrainingGenerator(x_train, + self.x_mean, + self.x_sigma, + y_train, + sigma_noise, + bs, + input_gradients, + dat) + else: + training_generator = TrainingGenerator(x_train, self.x_mean, self.x_sigma, + y_train, sigma_noise, bs) + validation_generator = ValidationGenerator(x_val, self.x_mean, self.x_sigma, + y_val, sigma_noise) + lr_callback = LRDecay(model, lrd, lrm, ce) + model.fit_generator(training_generator, steps_per_epoch=n_train // bs, + epochs=me, validation_data=validation_generator, + validation_steps=1, callbacks=[lr_callback]) + + def predict(self, x): + r""" + Predict quantiles of the conditional distribution P(y|x). + + Forward propagates the inputs in `x` through the network to + obtain the predicted quantiles `y`. + + Arguments: + + x(np.array): Array of shape `(n, m)` containing `n` m-dimensional inputs + for which to predict the conditional quantiles. + + Returns: + + Array of shape `(n, k)` with the columns corresponding to the k + quantiles of the network. + + """ + predictions = np.stack( + [m.predict((x - self.x_mean) / self.x_sigma) for m in self.models]) + return np.mean(predictions, axis=0) + + + def save(self, path): + r""" + Store the QRNN model in a file. + + This stores the model to a file using pickle for all + attributes that support pickling. The Keras model + is handled separately, since it can not be pickled. + + .. note:: In addition to the model file with the given filename, + additional files suffixed with :code:`_model_i` will be + created for each neural network this model consists of. + + Arguments: + + path(str): The path including filename indicating where to + store the model. + + """ + + f = open(path, "wb") + filename = os.path.basename(path) + name = os.path.splitext(filename)[0] + dirname = os.path.dirname(path) + + self.model_files = [] + for i, m in enumerate(self.models): + self.model_files += [name + "_model_" + str(i)] + m.save(os.path.join(dirname, self.model_files[i])) + pickle.dump(self, f) + f.close() + + @staticmethod + def load(path): + r""" + Load a model from a file. + + This loads a model that has been stored using the `save` method. + + Arguments: + + path(str): The path from which to read the model. + + Return: + + The loaded QRNN object. + """ + filename = os.path.basename(path) + dirname = os.path.dirname(path) + + f = open(path, "rb") + qrnn = pickle.load(f) + qrnn.models = [] + for mf in qrnn.model_files: + mf = os.path.basename(mf) + try: + mp = os.path.join(dirname, os.path.basename(mf)) + qrnn.models += [keras.models.load_model(mp, qrnn.custom_objects)] + except: + raise Exception("Error loading the neural network models. " \ + "Please make sure all files created during the"\ + " saving are in this folder.") + f.close() + return qrnn + + def __getstate__(self): + dct = copy.copy(self.__dict__) + dct.pop("models") + return dct + + def __setstate__(self, state): + self.__dict__ = state + self.models = None diff --git a/typhon/retrieval/qrnn/backends/pytorch.py b/typhon/retrieval/qrnn/backends/pytorch.py new file mode 100644 index 00000000..4fe5ce58 --- /dev/null +++ b/typhon/retrieval/qrnn/backends/pytorch.py @@ -0,0 +1,345 @@ +""" +models +====== + +This module provides an implementation of quantile regression neural networks (QRNNs) +using the pytorch deep learning framework. +""" +import torch +import numpy as np +from torch import nn +from torch import optim +from tqdm.auto import tqdm + +activations = {"elu" : nn.ELU, + "hardshrink" : nn.Hardshrink, + "hardtanh" : nn.Hardtanh, + "PReLU" : nn.PReLU, + "SELU" : nn.SELU, + "CELU" : nn.CELU, + "Sigmoid" : nn.Sigmoid, + "Softmin" : nn.Softmin} + +class QuantileLoss: + r""" + The quantile loss function + + This function object implements the quantile loss defined as + + + .. math:: + + \mathcal{L}(y_\text{pred}, y_\text{true}) = + \begin{cases} + \tau \cdot |y_\text{pred} - y_\text{true}| & , y_\text{pred} < y_\text{true} \\ + (1 - \tau) \cdot |y_\text{pred} - y_\text{true}| & , \text{otherwise} + \end{cases} + + + as a training criterion for the training of neural networks. The loss criterion + expects a vector :math:`\mathbf{y}_\tau` of predicted quantiles and the observed + value :math:`y`. The loss for a single training sample is computed by summing the losses + corresponding to each quantiles. The loss for a batch of training samples is + computed by taking the mean over all samples in the batch. + """ + def __init__(self, + quantiles, + mask = -1.0): + """ + Create an instance of the quantile loss function with the given quantiles. + + Arguments: + quantiles: Array or iterable containing the quantiles to be estimated. + """ + self.quantiles = torch.tensor(quantiles).float() + self.n_quantiles = len(quantiles) + self.mask = np.float32(mask) + + def to(self, device): + self.quantiles = self.quantiles.to(device) + + def __call__(self, y_pred, y_true): + """ + Compute the mean quantile loss for given inputs. + + Arguments: + y_pred: N-tensor containing the predicted quantiles along the last + dimension + y_true: (N-1)-tensor containing the true y values corresponding to + the predictions in y_pred + + Returns: + The mean quantile loss. + """ + dy = y_pred - y_true + n = self.quantiles.size()[0] + qs = self.quantiles.reshape((n,) + (1, ) * max(len(dy.size()) - 2, 0)) + l = torch.where(dy >= 0.0, + (1.0 - qs) * dy, + (-qs) * dy) + if not self.mask is None: + l = torch.where(y_true == self.mask, torch.zeros_like(l), l) + return l.mean() + +################################################################################ +# QRNN +################################################################################ + +class PytorchQRNN: + """ + Quantile regression neural network (QRNN) + + This class implements QRNNs as a fully-connected network with + a given number of layers. + """ + def __init__(self, + input_dimension, + quantiles, + model = (3, 128, "relu"), + depth = 3, + width = 128, + activation = nn.ReLU): + """ + Arguments: + input_dimension(int): The number of input features. + quantiles(array): Array of the quantiles to predict. + depth(int): Number of layers of the network. + width(int): The number of neurons in the inner layers of the network. + activation: The activation function to use for the inner layers of the network. + """ + self.input_dimension = input_dimension + self.quantiles = np.array(quantiles) + self.depth = depth + self.width = width + self.activation = nn.ReLU + self.criterion = QuantileLoss(self.quantiles) + + n_quantiles = len(quantiles) + + if type(model) is tuple: + depth = model[0] + width = model[1] + act = model[2] + if type(act) is str: + if act in activations: + act = activations[act] + else: + raise ValueError("{} is not one of the available " + " activations ({}) to use in a pytorch " + "network.".format(act, + list(activations.keys()))) + + self.model = nn.Sequential() + self.model.add_module("fc_0", nn.Linear(input_dimension, width)) + self.model.add_module("act_0", activation()) + for i in range(1, depth - 1): + self.model.add_module("fc_{}".format(i), nn.Linear(width, width)) + self.model.add_module("act_{}".format(i), activation()) + self.model.add_module("fc_{}".format(depth - 1), nn.Linear(width, n_quantiles)) + + self.criterion = QuantileLoss(self.quantiles) + elif isinstance(model, nn.Module): + self.model = model + else: + raise ValueError("Provided model must either be a valid architecture" + " tuple or an instance of pytorch.nn.Module.") + + self.training_errors = [] + self.validation_errors = [] + + def _make_adversarial_samples(self, x, y, eps): + self.model.zero_grad() + x.requires_grad = True + y_pred = self.model(x) + c = self.criterion(y_pred, y) + c.backward() + x_adv = x.detach() + eps * torch.sign(x.grad.detach()) + return x_adv + + def train(self, + training_data, + validation_data, + n_epochs = 1, + adversarial_training = False, + eps_adv = 1e-6, + lr = 1e-2, + momentum = 0.0, + gpu = False): + + """ + Train the network. + + This trains the network for the given number of epochs using the provided + training and validation data. + + If desired, the training can be augmented using adversarial training. In this + case the network is additionally trained with an adversarial batch of examples + in each step of the training. + + Arguments: + training_data: pytorch dataloader providing the training data + validation_data: pytorch dataloader providing the validation data + n_epochs: the number of epochs to train the network for + adversarial_training: whether or not to use adversarial training + eps_adv: The scaling factor to use for adversarial training. + """ + self.optimizer = optim.SGD(self.model.parameters(), + lr = lr, + momentum = momentum) + + self.model.train() + + if torch.cuda.is_available() and gpu: + dev = torch.device("cuda") + else: + dev = torch.device("cpu") + self.model.to(dev) + + self.criterion.to(dev) + + for i in range(n_epochs): + + err = 0.0 + n = 0 + iterator = tqdm(enumerate(training_data), total = len(training_data)) + for j, (x, y) in iterator: + x = x.to(dev) + y = y.to(dev) + + shape = x.size() + y = y.reshape((shape[0], 1, shape[2], shape[3])) + + self.optimizer.zero_grad() + y_pred = self.model(x) + c = self.criterion(y_pred, y) + c.backward() + self.optimizer.step() + + err += c.item() * x.size()[0] + n += x.size()[0] + + if adversarial_training: + self.optimizer.zero_grad() + x_adv = self._make_adversarial_samples(x, y, eps_adv) + y_pred = self.model(x) + c = self.criterion(y_pred, y) + c.backward() + self.optimizer.step() + + if j % 100: + iterator.set_postfix({"Training errror" : err / n}) + + # Save training error + self.training_errors.append(err / n) + + val_err = 0.0 + n = 0 + + for x, y in validation_data: + x = x.to(dev) + y = y.to(dev) + + shape = x.size() + y = y.reshape((shape[0], 1, shape[2], shape[3])) + + y_pred = self.model(x) + c = self.criterion(y_pred, y) + + val_err += c.item() * x.size()[0] + n += x.size()[0] + + self.validation_errors.append(val_err / n) + self.model.eval() + + def predict(self, x): + """ + Predict quantiles for given input. + + Args: + x: 2D tensor containing the inputs for which for which to + predict the quantiles. + + Returns: + tensor: 2D tensor containing the predicted quantiles along + the last dimension. + """ + return self.model(x) + + def calibration(self, + data, + gpu=False): + """ + Computes the calibration of the predictions from the neural network. + + Arguments: + data: torch dataloader object providing the data for which to compute + the calibration. + + Returns: + (intervals, frequencies): Tuple containing the confidence intervals and + corresponding observed frequencies. + """ + + if gpu and torch.cuda.is_available(): + dev = torch.device("cuda") + else: + dev = torch.device("cpu") + self.model.to(dev) + + n_intervals = self.quantiles.size // 2 + qs = self.quantiles + intervals = np.array([q_r - q_l for (q_l, q_r) in zip(qs, reversed(qs))])[:n_intervals] + counts = np.zeros(n_intervals) + + total = 0.0 + + iterator = tqdm(data) + for x, y in iterator: + x = x.to(dev) + y = y.to(dev) + shape = x.size() + y = y.reshape((shape[0], 1, shape[2], shape[3])) + + y_pred = self.predict(x) + y_pred = y_pred.cpu() + y = y.cpu() + + for i in range(n_intervals): + l = y_pred[:, i] + r = y_pred[:, -(i + 1)] + counts[i] += np.logical_and(y.cpu() >= l, y.cpu() < r).sum() + + total += np.prod(y.size()) + + return intervals[::-1], (counts / total)[::-1] + + + def save(self, path): + """ + Save QRNN to file. + + Arguments: + The path in which to store the QRNN. + """ + torch.save({"input_dimension" : self.input_dimension, + "quantiles" : self.quantiles, + "width" : self.width, + "depth" : self.depth, + "activation" : self.activation, + "network_state" : self.model.state_dict(), + "optimizer_state" : self.optimizer.state_dict()}, + path) + + @staticmethod + def load(self, path): + """ + Load QRNN from file. + + Arguments: + path: Path of the file where the QRNN was stored. + """ + state = torch.load(path) + keys = ["input_dimension", "quantiles", "depth", "width", "activation"] + qrnn = QRNN(*[state[k] for k in keys]) + qrnn.model.load_state_dict["network_state"] + qrnn.optimizer.load_state_dict["optimizer_state"] diff --git a/typhon/retrieval/qrnn/qrnn.py b/typhon/retrieval/qrnn/qrnn.py index 956c375b..9b1c46d9 100644 --- a/typhon/retrieval/qrnn/qrnn.py +++ b/typhon/retrieval/qrnn/qrnn.py @@ -369,13 +369,10 @@ class QRNN: The ensemble of Keras neural networks used for the quantile regression neural network. """ - def __init__(self, input_dim, quantiles, - depth=3, - width=128, - activation="relu", + model=(3, 128, "relu"), ensemble_size=1, **kwargs): """ @@ -408,39 +405,31 @@ def __init__(self, """ self.input_dim = input_dim self.quantiles = np.array(quantiles) - self.depth = depth - self.width = width - self.activation = activation - - model = Sequential() - if depth == 0: - model.add(Dense(input_dim=input_dim, - units=len(quantiles), - activation=None)) - else: - model.add(Dense(input_dim=input_dim, - units=width, - activation=activation)) - for i in range(depth - 2): - model.add(Dense(units=width, - activation=activation, - **kwargs)) - model.add(Dense(units=len(quantiles), activation=None)) - self.models = [clone_model(model) for i in range(ensemble_size)] - - def __fit_params__(self, kwargs): - at = kwargs.pop("adversarial_training", False) - dat = kwargs.pop("delta_at", 0.01) - batch_size = kwargs.pop("batch_size", 512) - convergence_epochs = kwargs.pop("convergence_epochs", 10) - initial_learning_rate = kwargs.pop('initial_learning_rate', 0.01) - learning_rate_decay = kwargs.pop('learning_rate_decay', 2.0) - learning_rate_minimum = kwargs.pop('learning_rate_minimum', 1e-6) - maximum_epochs = kwargs.pop("maximum_epochs", 200) - training_split = kwargs.pop("training_split", 0.9) - return at, dat, batch_size, convergence_epochs, initial_learning_rate, \ - learning_rate_decay, learning_rate_minimum, maximum_epochs, \ - training_split, kwargs + + try: + from typhon.retrieval.qrnn.backends.keras import KerasQRNN + model = KerasQRNN(input_dim, quantiles, model, ensemble_size) + except: + from typhon.retrieval.qrnn.backends.pytorch import PytorchQRNN + model = PytorchQRNN(input_dim, quantiles, model, ensemble_size) + + + + #try: + # from typhon.retrieval.qrnn.backends.keras import KerasQRNN + # model = KerasQRNN(input_dim, quantiles, model, ensemble_size) + #except: + # try: + # from typhon.retrieval.qrnn.backends.pytorch import QRNNPytorch + # model = QRNNPytorch(input_dim, quantiles, model) + # except: + # model = None + #if model is None: + # raise Exception("Could not create model backend. Please make sure " + # "that the provided architecture string is either a " + # "valid architecture tuple, a keras model order a " + # "pytorch module.") + self.model = model def cross_validation(self, x_train, @@ -539,13 +528,7 @@ def cross_validation(self, logger.info(results) return (np.mean(results, axis=0), np.std(results, axis=0)) - def fit(self, - x_train, - y_train, - sigma_noise=None, - x_val=None, - y_val=None, - **kwargs): + def fit(self, *args, **kwargs): r""" Train the QRNN on given training data. @@ -629,73 +612,11 @@ def fit(self, to 0.9. """ - if not (x_train.shape[1] == self.input_dim): - raise Exception("Training input must have the same extent along" / - "dimension 1 as input_dim (" + str(self.input_dim) / - + ")") + self.model.fit(*args, **kwargs) - if not (y_train.shape[1] == 1): - raise Exception("Currently only scalar retrieval targets are" / - " supported.") + def train(self, *args, **kwargs): + self.model.train(*args, **kwargs) - x_mean = np.mean(x_train, axis=0, keepdims=True) - x_sigma = np.std(x_train, axis=0, keepdims=True) - self.x_mean = x_mean - self.x_sigma = x_sigma - - # Handle parameters - # at: adversarial training - # bs: batch size - # ce: convergence epochs - # ilr: initial learning rate - # lrd: learning rate decay - # lrm: learning rate minimum - # me: maximum number of epochs - # ts: split ratio of training set - at, dat, bs, ce, ilr, lrd, lrm, me, ts, kwargs = self.__fit_params__( - kwargs) - - # Split training and validation set if x_val or y_val - # are not provided. - n = x_train.shape[0] - n_train = n - if x_val is None and y_val is None: - n_train = round(ts * n) - n_val = n - n_train - inds = np.random.permutation(n) - x_val = x_train[inds[n_train:], :] - y_val = y_train[inds[n_train:]] - x_train = x_train[inds[:n_train], :] - y_train = y_train[inds[:n_train]] - loss = QuantileLoss(self.quantiles) - - self.custom_objects = {loss.__name__: loss} - for model in self.models: - optimizer = SGD(lr=ilr) - model.compile(loss=loss, optimizer=optimizer) - - if at: - inputs = [model.input, model.targets[0], - model.sample_weights[0]] - input_gradients = K.function( - inputs, K.gradients(model.total_loss, model.input)) - training_generator = AdversarialTrainingGenerator(x_train, - self.x_mean, - self.x_sigma, - y_train, - sigma_noise, - bs, - input_gradients, - dat) - else: - training_generator = TrainingGenerator(x_train, self.x_mean, self.x_sigma, - y_train, sigma_noise, bs) - validation_generator = ValidationGenerator(x_val, self.x_mean, self.x_sigma, - y_val, sigma_noise) - lr_callback = LRDecay(model, lrd, lrm, ce) - model.fit_generator(training_generator, steps_per_epoch=n_train // bs, - epochs=me, validation_data=validation_generator, - validation_steps=1, callbacks=[lr_callback]) def predict(self, x): r""" @@ -715,9 +636,7 @@ def predict(self, x): quantiles of the network. """ - predictions = np.stack( - [m.predict((x - self.x_mean) / self.x_sigma) for m in self.models]) - return np.mean(predictions, axis=0) + return self.model.predict(x) def cdf(self, x): r""" From 4307c1d95c6cde6d772e90e2e100501a74e46af4 Mon Sep 17 00:00:00 2001 From: Simon Pfreundschuh Date: Tue, 17 Mar 2020 09:45:42 +0100 Subject: [PATCH 02/19] Wokring implementation of pytorch fully-connected model. --- setup.py | 2 +- typhon/retrieval/__init__.py | 2 +- typhon/retrieval/qrnn/__init__.py | 2 +- typhon/retrieval/qrnn/backends/keras.py | 43 +- typhon/retrieval/qrnn/backends/pytorch.py | 11 +- typhon/retrieval/qrnn/models/keras.py | 108 ++++ typhon/retrieval/qrnn/models/pytorch.py | 360 +++++++++++++ typhon/retrieval/qrnn/qrnn.py | 608 +++++----------------- 8 files changed, 618 insertions(+), 518 deletions(-) create mode 100644 typhon/retrieval/qrnn/models/keras.py create mode 100644 typhon/retrieval/qrnn/models/pytorch.py diff --git a/setup.py b/setup.py index ff29fdb2..525618ed 100644 --- a/setup.py +++ b/setup.py @@ -86,7 +86,7 @@ "numexpr", "numpy>=1.13", "pandas", - "scikit-image", + "scikit-image>=0.15", "scikit-learn", "scipy>=0.15.1", "setuptools>=0.7.2", diff --git a/typhon/retrieval/__init__.py b/typhon/retrieval/__init__.py index ff43f188..09fdccb5 100644 --- a/typhon/retrieval/__init__.py +++ b/typhon/retrieval/__init__.py @@ -5,4 +5,4 @@ from .common import * # noqa from .oem import * # noqa -#from .spareice import * # noqa +from .spareice import * # noqa diff --git a/typhon/retrieval/qrnn/__init__.py b/typhon/retrieval/qrnn/__init__.py index bb15d24e..ae8c7421 100644 --- a/typhon/retrieval/qrnn/__init__.py +++ b/typhon/retrieval/qrnn/__init__.py @@ -4,4 +4,4 @@ The implementation has been developed specifically for remote sensing applications and provides a high level interface allowing for simple training and evaluation of the QRNNs.""" -from typhon.retrieval.qrnn.qrnn import QRNN +from typhon.retrieval.qrnn.qrnn import QRNN, set_backend diff --git a/typhon/retrieval/qrnn/backends/keras.py b/typhon/retrieval/qrnn/backends/keras.py index df5fec2f..4a7830fc 100644 --- a/typhon/retrieval/qrnn/backends/keras.py +++ b/typhon/retrieval/qrnn/backends/keras.py @@ -15,7 +15,7 @@ # Keras Imports try: import keras - from keras.models import Sequential, clone_model + from keras.models import Sequential, clone_model, Model from keras.layers import Dense, Activation, Dropout from keras.optimizers import SGD except ImportError: @@ -425,16 +425,14 @@ def __init__(self, activation=activation, **kwargs)) model.add(Dense(units=len(quantiles), activation=None)) - elif type(model) is Model: - pass + elif isinstance(model, Model): + self.models = [clone_model(model) for i in range(ensemble_size)] + elif isinstance(model, list) and isinstance(model[0], Model): + self.models = model else: raise Exception("The provided model is neither a suitable " "architecture tuple nor a keras model.") - self.models = [clone_model(model) for i in range(ensemble_size)] - - - def __fit_params__(self, kwargs): at = kwargs.pop("adversarial_training", False) dat = kwargs.pop("delta_at", 0.01) @@ -776,20 +774,23 @@ def load(path): filename = os.path.basename(path) dirname = os.path.dirname(path) - f = open(path, "rb") - qrnn = pickle.load(f) - qrnn.models = [] - for mf in qrnn.model_files: - mf = os.path.basename(mf) - try: - mp = os.path.join(dirname, os.path.basename(mf)) - qrnn.models += [keras.models.load_model(mp, qrnn.custom_objects)] - except: - raise Exception("Error loading the neural network models. " \ - "Please make sure all files created during the"\ - " saving are in this folder.") - f.close() - return qrnn + with open(path, "rb") as f: + + qrnn = pickle.load(f) + qrnn.models = [] + for mf in qrnn.model_files: + mf = os.path.basename(mf) + try: + mp = os.path.join(dirname, os.path.basename(mf)) + qrnn.models += [keras.models.load_model(mp, qrnn.custom_objects)] + except: + raise Exception("Error loading the neural network models. " \ + "Please make sure all files created during the"\ + " saving are in this folder.") + keras_qrnn = KerasQRNN(qrnn.input_dim, qrnn.quantiles, qrnn.models) + keras_qrnn.x_mean = qrnn.x_mean + keras_qrnn.x_sigma = qrnn.x_sigma + return keras_qrnn def __getstate__(self): dct = copy.copy(self.__dict__) diff --git a/typhon/retrieval/qrnn/backends/pytorch.py b/typhon/retrieval/qrnn/backends/pytorch.py index 4fe5ce58..da1d6846 100644 --- a/typhon/retrieval/qrnn/backends/pytorch.py +++ b/typhon/retrieval/qrnn/backends/pytorch.py @@ -14,11 +14,12 @@ activations = {"elu" : nn.ELU, "hardshrink" : nn.Hardshrink, "hardtanh" : nn.Hardtanh, - "PReLU" : nn.PReLU, - "SELU" : nn.SELU, - "CELU" : nn.CELU, - "Sigmoid" : nn.Sigmoid, - "Softmin" : nn.Softmin} + "prelu" : nn.PReLU, + "selu" : nn.SELU, + "celu" : nn.CELU, + "sigmoid" : nn.Sigmoid, + "softplus" : nn.Softplus + "softmin" : nn.Softmin} class QuantileLoss: r""" diff --git a/typhon/retrieval/qrnn/models/keras.py b/typhon/retrieval/qrnn/models/keras.py new file mode 100644 index 00000000..58e511e8 --- /dev/null +++ b/typhon/retrieval/qrnn/models/keras.py @@ -0,0 +1,108 @@ +import numpy as np +from keras.models import Sequential +from keras.layers import Dense, Activation +import keras.backend as K +import logging + +################################################################################ +# Quantile loss +################################################################################ + +logger = logging.getLogger(__name__) + +def skewed_absolute_error(y_true, y_pred, tau): + """ + The quantile loss function for a given quantile tau: + + L(y_true, y_pred) = (tau - I(y_pred < y_true)) * (y_pred - y_true) + + Where I is the indicator function. + """ + dy = y_pred - y_true + return K.mean((1.0 - tau) * K.relu(dy) + tau * K.relu(-dy), axis=-1) + + +def quantile_loss(y_true, y_pred, taus): + """ + The quantiles loss for a list of quantiles. Sums up the error contribution + from the each of the quantile loss functions. + """ + e = skewed_absolute_error( + K.flatten(y_true), K.flatten(y_pred[:, 0]), taus[0]) + for i, tau in enumerate(taus[1:]): + e += skewed_absolute_error(K.flatten(y_true), + K.flatten(y_pred[:, i + 1]), + tau) + return e + + +class QuantileLoss: + """ + Wrapper class for the quantile error loss function. A class is used here + to allow the implementation of a custom `__repr` function, so that the + loss function object can be easily loaded using `keras.model.load`. + + Attributes: + + quantiles: List of quantiles that should be estimated with + this loss function. + + """ + + def __init__(self, quantiles): + self.__name__ = "QuantileLoss" + self.quantiles = quantiles + + def __call__(self, y_true, y_pred): + return quantile_loss(y_true, y_pred, self.quantiles) + + def __repr__(self): + return "QuantileLoss(" + repr(self.quantiles) + ")" + + +class KerasModel: + def __init__(self): + pass + + def train(self): + pass + + +class FullyConnected(Sequential): + """ + Convenience class to represent fully connected models with + given number of input and output features and depth and + width of hidden layers. + """ + def __init__(self, + input_dimension, + quantiles, + arch, + activation = "relu"): + """ + Create a fully-connected neural network. + + Args: + input_dimension(int): Number of input features + output_dimension(int): Number of output features + arch(tuple): Tuple (d, w) of d, the number of hidden + layers in the network, and w, the width of the net- + work. + activation(str or None): Activation function to insert between + hidden layers. The given string is passed to the + keras.layers.Activation class. If None no activation function + is used. + """ + quantiles = np.array(quantiles) + output_dimension = quantiles.size + if len(arch) == 0: + layers = [Dense(output_dimension, input_shape=(input_dimension))] + else: + d, w = arch + layers = [Dense(input_dimension, input_shape=(w,))] + for i in range(d - 1): + layers.append(Dense(w, input_shape=(w,))) + if not activation is None: + layers.append(Activation(activation)) + layers.append(Dense(output_dimension, input_shape=(w,))) + super().__init__(layers) diff --git a/typhon/retrieval/qrnn/models/pytorch.py b/typhon/retrieval/qrnn/models/pytorch.py new file mode 100644 index 00000000..f77835f8 --- /dev/null +++ b/typhon/retrieval/qrnn/models/pytorch.py @@ -0,0 +1,360 @@ +""" +models +====== + +This module provides an implementation of quantile regression neural networks (QRNNs) +using the pytorch deep learning framework. +""" +import torch +import numpy as np +from torch import nn +from torch import optim +from tqdm.auto import tqdm + +activations = {"elu" : nn.ELU, + "hardshrink" : nn.Hardshrink, + "hardtanh" : nn.Hardtanh, + "prelu" : nn.PReLU, + "relu" : nn.ReLU, + "selu" : nn.SELU, + "celu" : nn.CELU, + "sigmoid" : nn.Sigmoid, + "softplus" : nn.Softplus, + "softmin" : nn.Softmin} + + +def load(f): + model = torch.load(f) + return model + +################################################################################ +# Quantile loss +################################################################################ + +class QuantileLoss: + r""" + The quantile loss function + + This function object implements the quantile loss defined as + + + .. math:: + + \mathcal{L}(y_\text{pred}, y_\text{true}) = + \begin{cases} + \tau \cdot |y_\text{pred} - y_\text{true}| & , y_\text{pred} < y_\text{true} \\ + (1 - \tau) \cdot |y_\text{pred} - y_\text{true}| & , \text{otherwise} + \end{cases} + + + as a training criterion for the training of neural networks. The loss criterion + expects a vector :math:`\mathbf{y}_\tau` of predicted quantiles and the observed + value :math:`y`. The loss for a single training sample is computed by summing the losses + corresponding to each quantiles. The loss for a batch of training samples is + computed by taking the mean over all samples in the batch. + """ + def __init__(self, + quantiles, + mask = -1.0): + """ + Create an instance of the quantile loss function with the given quantiles. + + Arguments: + quantiles: Array or iterable containing the quantiles to be estimated. + """ + self.quantiles = torch.tensor(quantiles).float() + self.n_quantiles = len(quantiles) + self.mask = np.float32(mask) + + def to(self, device): + self.quantiles = self.quantiles.to(device) + + def __call__(self, y_pred, y_true): + """ + Compute the mean quantile loss for given inputs. + + Arguments: + y_pred: N-tensor containing the predicted quantiles along the last + dimension + y_true: (N-1)-tensor containing the true y values corresponding to + the predictions in y_pred + + Returns: + The mean quantile loss. + """ + dy = y_pred - y_true + n = self.quantiles.size()[0] + qs = self.quantiles.reshape((n,) + (1, ) * max(len(dy.size()) - 2, 0)) + l = torch.where(dy >= 0.0, + (1.0 - qs) * dy, + (-qs) * dy) + if not self.mask is None: + l = torch.where(y_true == self.mask, torch.zeros_like(l), l) + return l.mean() + +################################################################################ +# QRNN +################################################################################ + +class PytorchModel: + """ + Quantile regression neural network (QRNN) + + This class implements QRNNs as a fully-connected network with + a given number of layers. + """ + def __init__(self, + input_dimension, + quantiles): + """ + Arguments: + input_dimension(int): The number of input features. + quantiles(array): Array of the quantiles to predict. + """ + self.input_dimension = input_dimension + self.quantiles = np.array(quantiles) + self.criterion = QuantileLoss(self.quantiles) + self.training_errors = [] + self.validation_errors = [] + + def _make_adversarial_samples(self, x, y, eps): + self.model.zero_grad() + x.requires_grad = True + y_pred = self.model(x) + c = self.criterion(y_pred, y) + c.backward() + x_adv = x.detach() + eps * torch.sign(x.grad.detach()) + return x_adv + + def train(self, *args, **kwargs): + """ + Train the network. + + This trains the network for the given number of epochs using the provided + training and validation data. + + If desired, the training can be augmented using adversarial training. In this + case the network is additionally trained with an adversarial batch of examples + in each step of the training. + + Arguments: + training_data: pytorch dataloader providing the training data + validation_data: pytorch dataloader providing the validation data + n_epochs: the number of epochs to train the network for + adversarial_training: whether or not to use adversarial training + eps_adv: The scaling factor to use for adversarial training. + """ + if len(args) < 2: + return nn.Sequential.train(self, *args) + + training_data, validation_data = args + n_epochs = kwargs.get("n_epochs", 1) + adversarial_training = kwargs.get("adversarial_training", False) + eps_adv = kwargs.get("eps_adv", 1e-6) + lr = kwargs.get("lr", 1e-2) + momentum = kwargs.get("momentum", 0.0) + gpu = kwargs.get("gpu", False) + + self.optimizer = optim.SGD(self.parameters(), + lr = lr, + momentum = momentum) + self.train() + + if torch.cuda.is_available() and gpu: + dev = torch.device("cuda") + else: + dev = torch.device("cpu") + self.to(dev) + + self.criterion.to(dev) + + for i in range(n_epochs): + + err = 0.0 + n = 0 + iterator = tqdm(enumerate(training_data), total = len(training_data)) + for j, (x, y) in iterator: + x = x.to(dev) + y = y.to(dev) + + shape = x.size() + shape = (shape[0], 1) + shape[2:] + y = y.reshape(shape) + + self.optimizer.zero_grad() + y_pred = self(x) + c = self.criterion(y_pred, y) + c.backward() + self.optimizer.step() + + err += c.item() * x.size()[0] + n += x.size()[0] + + if adversarial_training: + self.optimizer.zero_grad() + x_adv = self._make_adversarial_samples(x, y, eps_adv) + y_pred = self(x) + c = self.criterion(y_pred, y) + c.backward() + self.optimizer.step() + + if j % 100: + iterator.set_postfix({"Training errror" : err / n}) + + # Save training error + self.training_errors.append(err / n) + + val_err = 0.0 + n = 0 + + for x, y in validation_data: + x = x.to(dev) + y = y.to(dev) + + shape = x.size() + shape = (shape[0], 1) + shape[2:] + y = y.reshape(shape) + + y_pred = self(x) + c = self.criterion(y_pred, y) + + val_err += c.item() * x.size()[0] + n += x.size()[0] + + self.validation_errors.append(val_err / n) + self.eval() + + def predict(self, x): + """ + Predict quantiles for given input. + + Args: + x: 2D tensor containing the inputs for which for which to + predict the quantiles. + + Returns: + tensor: 2D tensor containing the predicted quantiles along + the last dimension. + """ + return self(x) + + def calibration(self, + data, + gpu=False): + """ + Computes the calibration of the predictions from the neural network. + + Arguments: + data: torch dataloader object providing the data for which to compute + the calibration. + + Returns: + (intervals, frequencies): Tuple containing the confidence intervals and + corresponding observed frequencies. + """ + + if gpu and torch.cuda.is_available(): + dev = torch.device("cuda") + else: + dev = torch.device("cpu") + self.to(dev) + + n_intervals = self.quantiles.size // 2 + qs = self.quantiles + intervals = np.array([q_r - q_l for (q_l, q_r) in zip(qs, reversed(qs))])[:n_intervals] + counts = np.zeros(n_intervals) + + total = 0.0 + + iterator = tqdm(data) + for x, y in iterator: + x = x.to(dev) + y = y.to(dev) + shape = x.size() + shape = (shape[0], 1) + shape[2:] + y = y.reshape(shape) + + y_pred = self.predict(x) + y_pred = y_pred.cpu() + y = y.cpu() + + for i in range(n_intervals): + l = y_pred[:, [i]] + r = y_pred[:, [-(i + 1)]] + counts[i] += np.logical_and(y >= l, y < r).sum() + + total += np.prod(y.size()) + return intervals[::-1], (counts / total)[::-1] + + def save(self, path): + """ + Save QRNN to file. + + Arguments: + The path in which to store the QRNN. + """ + torch.save({"input_dimension" : self.input_dimension, + "quantiles" : self.quantiles, + "width" : self.width, + "depth" : self.depth, + "activation" : self.activation, + "network_state" : self.state_dict(), + "optimizer_state" : self.optimizer.state_dict()}, + path) + + @staticmethod + def load(self, path): + """ + Load QRNN from file. + + Arguments: + path: Path of the file where the QRNN was stored. + """ + state = torch.load(path) + keys = ["input_dimension", "quantiles", "depth", "width", "activation"] + qrnn = QRNN(*[state[k] for k in keys]) + qrnn.load_state_dict["network_state"] + qrnn.optimizer.load_state_dict["optimizer_state"] + +class FullyConnected(PytorchModel, nn.Sequential): + """ + Convenience class to represent fully connected models with + given number of input and output features and depth and + width of hidden layers. + """ + def __init__(self, + input_dimension, + quantiles, + arch): + + """ + Create a fully-connected neural network. + + Args: + input_dimension(int): Number of input features + output_dimension(int): Number of output features + arch(tuple): Tuple (d, w) of d, the number of hidden + layers in the network, and w, the width of the net- + work. + """ + PytorchModel.__init__(self, input_dimension, quantiles) + output_dimension = quantiles.size + self.arch = arch + + if len(arch) == 0: + layers = [nn.Linear(input_dimension, output_dimension)] + else: + d, w, act = arch + if type(act) == str: + act = activations[act] + layers = [nn.Linear(input_dimension, w)] + for i in range(d - 1): + layers.append(nn.Linear(w, w)) + if not act is None: + layers.append(act()) + layers.append(nn.Linear(w, output_dimension)) + nn.Sequential.__init__(self, *layers) + + def save(self, f): + torch.save(self, f) + diff --git a/typhon/retrieval/qrnn/qrnn.py b/typhon/retrieval/qrnn/qrnn.py index 9b1c46d9..cf45647b 100644 --- a/typhon/retrieval/qrnn/qrnn.py +++ b/typhon/retrieval/qrnn/qrnn.py @@ -6,371 +6,115 @@ import numpy as np from scipy.interpolate import CubicSpline -# Keras Imports -try: - import keras - from keras.models import Sequential, clone_model - from keras.layers import Dense, Activation, Dropout - from keras.optimizers import SGD -except ImportError: - raise ImportError( - "Could not import the required Keras modules. The QRNN " - "implementation was developed for use with Keras version 2.0.9.") - -################################################################################ -# Loss Functions -################################################################################ - -import keras.backend as K - - -logger = logging.getLogger(__name__) - - -def skewed_absolute_error(y_true, y_pred, tau): - """ - The quantile loss function for a given quantile tau: - - L(y_true, y_pred) = (tau - I(y_pred < y_true)) * (y_pred - y_true) - - Where I is the indicator function. - """ - dy = y_pred - y_true - return K.mean((1.0 - tau) * K.relu(dy) + tau * K.relu(-dy), axis=-1) - - -def quantile_loss(y_true, y_pred, taus): - """ - The quantiles loss for a list of quantiles. Sums up the error contribution - from the each of the quantile loss functions. - """ - e = skewed_absolute_error( - K.flatten(y_true), K.flatten(y_pred[:, 0]), taus[0]) - for i, tau in enumerate(taus[1:]): - e += skewed_absolute_error(K.flatten(y_true), - K.flatten(y_pred[:, i + 1]), - tau) - return e - - -class QuantileLoss: - """ - Wrapper class for the quantile error loss function. A class is used here - to allow the implementation of a custom `__repr` function, so that the - loss function object can be easily loaded using `keras.model.load`. - - Attributes: - - quantiles: List of quantiles that should be estimated with - this loss function. - - """ - - def __init__(self, quantiles): - self.__name__ = "QuantileLoss" - self.quantiles = quantiles - - def __call__(self, y_true, y_pred): - return quantile_loss(y_true, y_pred, self.quantiles) - - def __repr__(self): - return "QuantileLoss(" + repr(self.quantiles) + ")" - ################################################################################ -# Keras Interface Classes +# Set the backend ################################################################################ - -class TrainingGenerator: - """ - This Keras sample generator takes the noise-free training data - and adds independent Gaussian noise to each of the components - of the input. - - Attributes: - - x_train: The training input, i.e. the brightness temperatures - measured by the satellite. - y_train: The training output, i.e. the value of the retrieval - quantity. - x_mean: A vector containing the mean of each input component. - x_sigma: A vector containing the standard deviation of each - component. - batch_size: The size of a training batch. - """ - - def __init__(self, x_train, x_mean, x_sigma, y_train, sigma_noise, batch_size): - self.bs = batch_size - - self.x_train = x_train - self.x_mean = x_mean - self.x_sigma = x_sigma - self.y_train = y_train - self.sigma_noise = sigma_noise - - self.indices = np.random.permutation(x_train.shape[0]) - self.i = 0 - - def __iter__(self): - logger.info("iter...") - return self - - def __next__(self): - inds = self.indices[np.arange(self.i * self.bs, - (self.i + 1) * self.bs) - % self.indices.size] - x_batch = np.copy(self.x_train[inds, :]) - if not self.sigma_noise is None: - x_batch += np.random.randn(*x_batch.shape) * self.sigma_noise - x_batch = (x_batch - self.x_mean) / self.x_sigma - y_batch = self.y_train[inds] - - self.i = self.i + 1 - - # Shuffle training set after each epoch. - if self.i % (self.x_train.shape[0] // self.bs) == 0: - self.indices = np.random.permutation(self.x_train.shape[0]) - - return (x_batch, y_batch) - -# TODO: Make y-noise argument optional - - -class AdversarialTrainingGenerator: - """ - This Keras sample generator takes the noise-free training data - and adds independent Gaussian noise to each of the components - of the input. - - Attributes: - - x_train: The training input, i.e. the brightness temperatures - measured by the satellite. - y_train: The training output, i.e. the value of the retrieval - quantity. - x_mean: A vector containing the mean of each input component. - x_sigma: A vector containing the standard deviation of each - component. - batch_size: The size of a training batch. - """ - - def __init__(self, - x_train, - x_mean, - x_sigma, - y_train, - sigma_noise, - batch_size, - input_gradients, - eps): - self.bs = batch_size - - self.x_train = x_train - self.x_mean = x_mean - self.x_sigma = x_sigma - self.y_train = y_train - self.sigma_noise = sigma_noise - - self.indices = np.random.permutation(x_train.shape[0]) - self.i = 0 - - # compile gradient function - bs2 = self.bs // 2 - - self.input_gradients = input_gradients - self.eps = eps - - def __iter__(self): - logger.info("iter...") - return self - - def __next__(self): - - if self.i == 0: - inds = np.random.randint(0, self.x_train.shape[0], self.bs) - - x_batch = np.copy(self.x_train[inds, :]) - if (self.sigma_noise): - x_batch += np.random.randn(*x_batch.shape) * self.sigma_noise - - x_batch = (x_batch - self.x_mean) / self.x_sigma - y_batch = self.y_train[inds] - - else: - - bs2 = self.bs // 2 - inds = np.random.randint(0, self.x_train.shape[0], bs2) - - x_batch = np.zeros((self.bs, self.x_train.shape[1])) - y_batch = np.zeros((self.bs, 1)) - - x_batch[:bs2, :] = np.copy(self.x_train[inds, :]) - if (self.sigma_noise): - x_batch[:bs2, :] += np.random.randn(bs2, self.x_train.shape[1]) \ - * self.sigma_noise - x_batch[:bs2, :] = (x_batch[:bs2, :] - self.x_mean) / self.x_sigma - y_batch[:bs2, :] = self.y_train[inds].reshape(-1, 1) - x_batch[bs2:, :] = x_batch[:bs2, :] - y_batch[bs2:, :] = y_batch[:bs2, :] - - if (self.i > 10): - grads = self.input_gradients( - [x_batch[:bs2, :], y_batch[:bs2, :], [1.0]])[0] - x_batch[bs2:, :] += self.eps * np.sign(grads) - - self.i = self.i + 1 - return (x_batch, y_batch) - - -# TODO: Make y-noise argument optional -class ValidationGenerator: - """ - This Keras sample generator is similar to the training generator - only that it returns the whole validation set and doesn't perform - any randomization. - - Attributes: - - x_val: The validation input, i.e. the brightness temperatures - measured by the satellite. - y_val: The validation output, i.e. the value of the retrieval - quantity. - x_mean: A vector containing the mean of each input component. - x_sigma: A vector containing the standard deviation of each - component. - """ - - def __init__(self, x_val, x_mean, x_sigma, y_val, sigma_noise): - self.x_val = x_val - self.x_mean = x_mean - self.x_sigma = x_sigma - - self.y_val = y_val - - self.sigma_noise = sigma_noise - - def __iter__(self): - return self - - def __next__(self): - x_val = np.copy(self.x_val) - if not self.sigma_noise is None: - x_val += np.random.randn(*self.x_val.shape) * self.sigma_noise - x_val = (x_val - self.x_mean) / self.x_sigma - return (x_val, self.y_val) - - -class LRDecay(keras.callbacks.Callback): +try: + import typhon.retrieval.qrnn.models.keras as keras + backend = keras +except Exception as e: + raise e + try: + import typhon.retrieval.qrnn.models.pytorch as pytorch + backend = pytorch + except: + raise Exception("Couldn't import neither Keras nor Pytorch " + "one of them must be available to use the QRNN" + " module.") + +def set_backend(name): + global backend + if name == "keras": + try: + import typhon.retrieval.qrnn.models.keras as keras + backend = keras + except Exception as e: + raise Exception("The following error occurred while trying " + " to import keras: ", e) + elif name == "pytorch": + try: + import typhon.retrieval.qrnn.models.pytorch as pytorch + backend = pytorch + except Exception as e: + raise Exception("The following error occurred while trying " + " to import pytorch: ", e) + else: + raise Exception("\"{}\" is not a supported backend.".format(name)) + +def create_model(input_dim, + output_dim, + arch): """ - The LRDecay class implements the Keras callback interface and reduces - the learning rate according to validation loss reduction. - - Attributes: - - lr_decay: The factor c > 1.0 by which the learning rate is - reduced. - lr_minimum: The training is stopped when this learning rate - is reached. - convergence_steps: The number of epochs without validation loss - reduction required to reduce the learning rate. - + Creates a fully-connected neural network from a tuple + describing its architecture. + + Args: + input_dim(int): Number of input features. + output_dim(int): Number of output features. + arch: Tuple (d, w, a) containing the depth, i.e. number of + hidden layers, and width of the hidden layers, i. e. + the number of neurons in them, and the name of the + activation function as string. + Return: + Depending on the available backends, a fully-connected + keras or pytorch model, with the requested number of hidden + layers and neurons in them. """ - - def __init__(self, model, lr_decay, lr_minimum, convergence_steps): - self.model = model - self.lr_decay = lr_decay - self.lr_minimum = lr_minimum - self.convergence_steps = convergence_steps - self.steps = 0 - - def on_train_begin(self, logs={}): - self.losses = [] - self.steps = 0 - self.min_loss = 1e30 - - def on_epoch_end(self, epoch, logs={}): - self.losses += [logs.get('val_loss')] - if not self.losses[-1] < self.min_loss: - self.steps = self.steps + 1 - else: - self.steps = 0 - if self.steps > self.convergence_steps: - lr = keras.backend.get_value(self.model.optimizer.lr) - keras.backend.set_value( - self.model.optimizer.lr, lr / self.lr_decay) - self.steps = 0 - logger.info("\n Reduced learning rate to " + str(lr)) - - if lr < self.lr_minimum: - self.model.stop_training = True - - self.min_loss = min(self.min_loss, self.losses[-1]) + return backend.FullyConnected(input_dim, output_dim, arch) ################################################################################ -# QRNN +# QRNN class ################################################################################ - class QRNN: r""" Quantile Regression Neural Network (QRNN) - This class implements quantile regression neural networks and can be used - to estimate quantiles of the posterior distribution of remote sensing - retrievals. + This class provides a high-level implementation of quantile regression + neural networks. They can be used to estimate quantiles of the posterior + distribution of remote sensing retrievals. - Internally, the QRNN uses a feed-forward neural network that is trained - to minimize the quantile loss function + The QRNN uses a generic neural network model, that is trained to minimize + the quantile loss function .. math:: \mathcal{L}_\tau(y_\tau, y_{true}) = \begin{cases} (1 - \tau)|y_\tau - y_{true}| & \text{ if } y_\tau < y_\text{true} \\ \tau |y_\tau - y_\text{true}| & \text{ otherwise, }\end{cases} - where :math:`x_\text{true}` is the expected value of the retrieval quantity + where :math:`x_\text{true}` is the true value of the retrieval quantity and and :math:`x_\tau` is the predicted quantile. The neural network has one output neuron for each quantile to estimate. - For the training, this implementation provides custom data generators that - can be used to add Gaussian noise to the training data as well as adversarial - training using the fast gradient sign method. - - This implementation also provides functionality to use an ensemble of networks - instead of just a single network to predict the quantiles. - - .. note:: For the QRNN I am using :math:`x` to denote the input vector and - :math:`y` to denote the output. While this is opposed to typical - inverse problem notation, it is inline with machine learning - notation and felt more natural for the implementation. If this - annoys you, I am sorry. But the other way round it would have - annoyed other people and in particular me. + The QRNN class provides a generic QRNN implementation in the sense that it + does not assumed a fixed neural network infrastructure. Instead, this + functionality is off-loaded to a model object, which can be an arbitrary + regression model such as a fully-connected or a convolutional network. + A range of different models are provided in the typhon.retrieval.qrnn.models + module. This QRNN class just implements high-level operation on the QRNN + output while training and prediction are delegated to the model object. For + details on the respective implementation refer to the documentation of the + corresponding model class. + + .. note:: For the QRNN implementation :math:`x` is used to denote the input + vector and :math:`y` to denote the output. While this is opposed + to typical inverse problem notation, it is in line with machine + learning notation and felt more natural for the implementation. + If this annoys you, I am sorry. But the other way round it would + have annoyed other people and in particular me. Attributes: - input_dim (int): - The input dimension of the neural network, i.e. the dimension of the - measurement vector. - quantiles (numpy.array): The 1D-array containing the quantiles :math:`\tau \in [0, 1]` that the network learns to predict. - depth (int): - The number layers in the network excluding the input layer. - - width (int): - The width of the hidden layers in the network. - - activation (str): - The name of the activation functions to use in the hidden layers - of the network. - - models (list of keras.models.Sequential): - The ensemble of Keras neural networks used for the quantile regression - neural network. + model: The model object that implements the actual neural network + regressor. """ def __init__(self, - input_dim, + input_dimension, quantiles, model=(3, 128, "relu"), ensemble_size=1, @@ -379,57 +123,26 @@ def __init__(self, Create a QRNN model. Arguments: - input_dim(int): The dimension of the measurement space, i.e. the number - of elements in a single measurement vector y - + of elements in a single measurement vector y quantiles(np.array): 1D-array containing the quantiles to estimate of - the posterior distribution. Given as fractions - within the range [0, 1]. - - depth(int): The number of hidden layers in the neural network to - use for the regression. Default is 3, i.e. three hidden - plus input and output layer. - - width(int): The number of neurons in each hidden layer. - - activation(str): The name of the activation functions to use. Default - is "relu", for rectified linear unit. See - `this `_ link for - available functions. - - **kwargs: Additional keyword arguments are passed to the constructor - call `keras.layers.Dense` of the hidden layers, which can - for example be used to add regularization. For more info consult - `Keras documentation. `_ + the posterior distribution. Given as fractions + within the range [0, 1]. + model: A (possible trained) model instance or a tuple + :code:`(d, w, act)` describing the architecture of a + fully-connected neural network with d hidden layers + with w neurons and act activation functions. + ensemble_size: The size of the ensemble if an ensemble model + should be used. """ - self.input_dim = input_dim + self.input_dimension = input_dimension self.quantiles = np.array(quantiles) + self.backend = backend.__name__ - try: - from typhon.retrieval.qrnn.backends.keras import KerasQRNN - model = KerasQRNN(input_dim, quantiles, model, ensemble_size) - except: - from typhon.retrieval.qrnn.backends.pytorch import PytorchQRNN - model = PytorchQRNN(input_dim, quantiles, model, ensemble_size) - - - - #try: - # from typhon.retrieval.qrnn.backends.keras import KerasQRNN - # model = KerasQRNN(input_dim, quantiles, model, ensemble_size) - #except: - # try: - # from typhon.retrieval.qrnn.backends.pytorch import QRNNPytorch - # model = QRNNPytorch(input_dim, quantiles, model) - # except: - # model = None - #if model is None: - # raise Exception("Could not create model backend. Please make sure " - # "that the provided architecture string is either a " - # "valid architecture tuple, a keras model order a " - # "pytorch module.") - self.model = model + if type(model) == tuple: + self.model = backend.FullyConnected(self.input_dimension, + self.quantiles, + model) def cross_validation(self, x_train, @@ -529,95 +242,11 @@ def cross_validation(self, return (np.mean(results, axis=0), np.std(results, axis=0)) def fit(self, *args, **kwargs): - r""" - Train the QRNN on given training data. - - The training uses an internal validation set to monitor training - progress. This can be either split at random from the training - data (see `training_fraction` argument) or provided explicitly - using the `x_val` and `y_val` parameters - - Training of the QRNN is performed using stochastic gradient descent - (SGD). The learning rate is adaptively reduced during training when - the loss on the internal validation set has not been reduced for a - certain number of epochs. - - Two data augmentation techniques can be used to improve the - calibration of the QRNNs predictions. The first one adds Gaussian - noise to training batches before they are passed to the network. - The noise statistics are defined using the `sigma_noise` argument. - The second one is adversarial training. If adversarial training is - used, half of each training batch consists of adversarial samples - generated using the fast gradient sign method. The strength of the - perturbation is controlled using the `delta_at` parameter. - - During one epoch, each training sample is passed once to the network. - Their order is randomzied between epochs. - - Arguments: - - x_train(np.array): Array of shape `(n, m)` containing n training - samples of dimension m. - - y_train(np.array): Array of shape `(n, )` containing the training - output corresponding to the training data in - `x_train`. - - sigma_noise(None, float, np.array): If not `None` this value is used - to multiply the Gaussian noise - that is added to each training - batch. If None no noise is - added. - x_val(np.array): Array of shape :code:`(n', m)` containing n' validation - inputs that will be used to monitor training loss. Must - be provided in unison with :code:`y_val` or otherwise - will be ignored. - - y_val(np.array): Array of shape :code:`(n')` containing n' validation - outputs corresponding to the inputs in :code:`x_val`. - Must be provided in unison with :code:`x_val` or - otherwise will be ignored. - - adversarial_training(Bool): Whether or not to use adversarial training. - `False` by default. - - delta_at(flaot): Perturbation factor for the fast gradient sign method - determining the strength of the adversarial training - perturbation. `0.01` by default. - - batch_size(float): The batch size to use during training. Defaults to `512`. - - convergence_epochs(int): The number of epochs without decrease in - validation loss before the learning rate - is reduced. Defaults to `10`. - - initial_learning_rate(float): The inital value for the learning - rate. - - learning_rate_decay(float): The factor by which to reduce the - learning rate after no improvement - on the internal validation set was - observed for `convergence_epochs` - epochs. Defaults to `2.0`. - - learning_rate_minimum(float): The minimum learning rate at which - the training is terminated. Defaults - to `1e-6`. - - maximum_epochs(int): The maximum number of epochs to perform if - training does not terminate before. - - training_split(float): The ratio `0 < ts < 1.0` of the samples in - to be used as internal validation set. Defaults - to 0.9. - - """ self.model.fit(*args, **kwargs) def train(self, *args, **kwargs): self.model.train(*args, **kwargs) - def predict(self, x): r""" Predict quantiles of the conditional distribution P(y|x). @@ -679,6 +308,12 @@ def cdf(self, x): return y_pred, qs + def calibration(self, *args, **kwargs): + """ + Compute calibration curve for the given dataset. + """ + return self.model.calibration(*args, *kwargs) + def pdf(self, x, use_splines = False): r""" Approximate the posterior probability density function (PDF) for given @@ -891,17 +526,9 @@ def save(self, path): store the model. """ - f = open(path, "wb") - filename = os.path.basename(path) - name = os.path.splitext(filename)[0] - dirname = os.path.dirname(path) - - self.model_files = [] - for i, m in enumerate(self.models): - self.model_files += [name + "_model_" + str(i)] - m.save(os.path.join(dirname, self.model_files[i])) pickle.dump(self, f) + self.model.save(f) f.close() @staticmethod @@ -919,27 +546,30 @@ def load(path): The loaded QRNN object. """ - filename = os.path.basename(path) - dirname = os.path.dirname(path) - - f = open(path, "rb") - qrnn = pickle.load(f) - qrnn.models = [] - for mf in qrnn.model_files: - mf = os.path.basename(mf) - try: - mp = os.path.join(dirname, os.path.basename(mf)) - qrnn.models += [keras.models.load_model(mp, qrnn.custom_objects)] - except: - raise Exception("Error loading the neural network models. " \ - "Please make sure all files created during the"\ - " saving are in this folder.") - f.close() + import importlib + with open(path, 'rb') as f: + qrnn = pickle.load(f) + backend = importlib.import_module(qrnn.backend) + model = backend.load(f) + qrnn.model = model return qrnn + #try: + # from typhon.retrieval.qrnn.backends.keras import KerasQRNN, QuantileLoss + # globals()["QuantileLoss"] = QuantileLoss + # model = KerasQRNN.load(path) + # print(type(model)) + # qrnn = QRNN(model.input_dim, model.quantiles, model.models) + # qrnn.model = model + # globals().pop("QuantileLoss") + # return qrnn + + #except Exception as e: + # raise e + def __getstate__(self): dct = copy.copy(self.__dict__) - dct.pop("models") + dct.pop("model") return dct def __setstate__(self, state): From 18c117f7fbb5a9df20620377ad8bc9cc8de9584a Mon Sep 17 00:00:00 2001 From: Simon Pfreundschuh Date: Tue, 17 Mar 2020 10:26:50 +0100 Subject: [PATCH 03/19] Don't raise exception when keras can't be imported. --- typhon/retrieval/qrnn/qrnn.py | 1 - 1 file changed, 1 deletion(-) diff --git a/typhon/retrieval/qrnn/qrnn.py b/typhon/retrieval/qrnn/qrnn.py index cf45647b..93a802c4 100644 --- a/typhon/retrieval/qrnn/qrnn.py +++ b/typhon/retrieval/qrnn/qrnn.py @@ -14,7 +14,6 @@ import typhon.retrieval.qrnn.models.keras as keras backend = keras except Exception as e: - raise e try: import typhon.retrieval.qrnn.models.pytorch as pytorch backend = pytorch From acb9a44fdb9d7f4ff311b94409348df0ef333eb2 Mon Sep 17 00:00:00 2001 From: Simon Pfreundschuh Date: Wed, 18 Mar 2020 09:15:05 +0100 Subject: [PATCH 04/19] Restructured documentation for retrieval sub-module. --- doc/typhon.retrieval.bmci.rst | 11 +++++ doc/typhon.retrieval.mcmc.rst | 11 +++++ doc/typhon.retrieval.oem.rst | 15 +++++++ doc/typhon.retrieval.qrnn.rst | 71 +++++++++++++++++++++++++++++++ doc/typhon.retrieval.scores.rst | 14 ++++++ doc/typhon.retrieval.spareice.rst | 11 +++++ 6 files changed, 133 insertions(+) create mode 100644 doc/typhon.retrieval.bmci.rst create mode 100644 doc/typhon.retrieval.mcmc.rst create mode 100644 doc/typhon.retrieval.oem.rst create mode 100644 doc/typhon.retrieval.qrnn.rst create mode 100644 doc/typhon.retrieval.scores.rst create mode 100644 doc/typhon.retrieval.spareice.rst diff --git a/doc/typhon.retrieval.bmci.rst b/doc/typhon.retrieval.bmci.rst new file mode 100644 index 00000000..c26d48f6 --- /dev/null +++ b/doc/typhon.retrieval.bmci.rst @@ -0,0 +1,11 @@ +Bayesian Monte Carlo Integration (BMCI) +======================================= + +.. automodule:: typhon.retrieval.bmci + +.. currentmodule:: typhon.retrieval.bmci + +.. autosummary:: + :toctree: generated + + BMCI diff --git a/doc/typhon.retrieval.mcmc.rst b/doc/typhon.retrieval.mcmc.rst new file mode 100644 index 00000000..7e2e700d --- /dev/null +++ b/doc/typhon.retrieval.mcmc.rst @@ -0,0 +1,11 @@ +Markov-Chain Monte Carlo (MCMC) +=============================== + +.. automodule:: typhon.retrieval.mcmc + +.. currentmodule:: typhon.retrieval.mcmc + +.. autosummary:: + :toctree: generated + + MCMC diff --git a/doc/typhon.retrieval.oem.rst b/doc/typhon.retrieval.oem.rst new file mode 100644 index 00000000..17795de0 --- /dev/null +++ b/doc/typhon.retrieval.oem.rst @@ -0,0 +1,15 @@ +Optimal estimation method (OEM) +=============================== + +.. automodule:: typhon.retrieval.oem + +.. currentmodule:: typhon.retrieval.oem + +.. autosummary:: + :toctree: generated + + error_covariance_matrix + averaging_kernel_matrix + retrieval_gain_matrix + smoothing_error + retrieval_noise diff --git a/doc/typhon.retrieval.qrnn.rst b/doc/typhon.retrieval.qrnn.rst new file mode 100644 index 00000000..4915733b --- /dev/null +++ b/doc/typhon.retrieval.qrnn.rst @@ -0,0 +1,71 @@ +Quantile regression neural networks (QRNNs) +=========================================== + +An implementation of quantile regression neural networks (QRNNs) developed +specifically for remote sensing applications providing a flexible +interface for simple training and evaluation of QRNNs. + +Overview +-------- + +The QRNN implementation consists of two-layers: + + - A high-level interface provided by the :py:class:`~typhon.retrieval.qrnn.QRNN` + class + - Backend-specific implementations of different neural network architectures + to be used as models by the high-level implementation + + +The QRNN class +-------------- + +The :py:class:`~typhon.retrieval.qrnn.QRNN` class provides the high-level interface for QRNNs. If you want +to train a fully-connected QRNN, this is all you need. The class itself +implments generic functionality related to the evaluation of QRNNs and the post +processing of results such as computing the PSD or the posterior mean. For the +rest it acts as a wrapper around its model attribute, which encapsules all +network- and DL-framework-specific code. + +API documentation +^^^^^^^^^^^^^^^^^ + +.. automodule:: typhon.retrieval.qrnn.qrnn +.. currentmodule:: typhon.retrieval.qrnn.qrnn +.. autosummary:: + :toctree: generated + + QRNN + +Backends +-------- + +Currently both `keras `_ and `pytorch `_ +are supported as backends for neural network. The QRNN implementation will +automatically use the one the is available on your system. If both are available +you can choose a specific backend using the :py:meth:`~typhon.retrieval.qrnn.set_backend` function. + + +API documentation +^^^^^^^^^^^^^^^^^ + +.. automodule:: typhon.retrieval.qrnn.qrnn +.. currentmodule:: typhon.retrieval.qrnn.qrnn +.. autosummary:: + :toctree: generated + + set_backend + +Neural network models +--------------------- + + +Pytorch +^^^^^^^ + +.. automodule:: typhon.retrieval.qrnn.models.pytorch +.. currentmodule:: typhon.retrieval.qrnn.models.pytorch +.. autosummary:: + :toctree: generated + + FullyConnected + diff --git a/doc/typhon.retrieval.scores.rst b/doc/typhon.retrieval.scores.rst new file mode 100644 index 00000000..0cd8290d --- /dev/null +++ b/doc/typhon.retrieval.scores.rst @@ -0,0 +1,14 @@ +Retrieval scores +================ + +.. automodule:: typhon.retrieval.scores + +.. currentmodule:: typhon.retrieval.scores + +.. autosummary:: + :toctree: generated + + mape + bias + quantile_score + mean_quantile_score diff --git a/doc/typhon.retrieval.spareice.rst b/doc/typhon.retrieval.spareice.rst new file mode 100644 index 00000000..79420cb8 --- /dev/null +++ b/doc/typhon.retrieval.spareice.rst @@ -0,0 +1,11 @@ +SpareIce +======== + +.. automodule:: typhon.retrieval.spareice + +.. currentmodule:: typhon.retrieval.spareice + +.. autosummary:: + :toctree: generated + + SPAREICE From 8dc3c35513e3905dc3667290cb389979d18d932b Mon Sep 17 00:00:00 2001 From: Simon Pfreundschuh Date: Wed, 18 Mar 2020 09:15:19 +0100 Subject: [PATCH 05/19] Restructured documentation for retrieval sub-module. --- doc/typhon.retrieval.rst | 96 ++++++------------------------- typhon/retrieval/qrnn/__init__.py | 10 ++-- typhon/retrieval/qrnn/qrnn.py | 48 +++++++++------- 3 files changed, 50 insertions(+), 104 deletions(-) diff --git a/doc/typhon.retrieval.rst b/doc/typhon.retrieval.rst index 46944282..3c487560 100644 --- a/doc/typhon.retrieval.rst +++ b/doc/typhon.retrieval.rst @@ -1,87 +1,29 @@ -retrieval -========= +The retrieval submodule contains implementations of different retrieval methods +as well as functions for the assessment of their performance. -.. automodule:: typhon.retrieval -.. currentmodule:: typhon.retrieval +Retrieval methods +================= -.. autosummary:: - :toctree: generated +.. toctree:: + :maxdepth: 1 -retrieval.bmci -============== + typhon.retrieval.bmci + typhon.retrieval.mcmc + typhon.retrieval.qrnn + typhon.retrieval.oem -.. automodule:: typhon.retrieval.bmci - -.. currentmodule:: typhon.retrieval.bmci - -.. autosummary:: - :toctree: generated - - BMCI - -retrieval.mcmc -============== - -.. automodule:: typhon.retrieval.mcmc - -.. currentmodule:: typhon.retrieval.mcmc - -.. autosummary:: - :toctree: generated - - MCMC - -retrieval.oem -============= - -.. automodule:: typhon.retrieval.oem - -.. currentmodule:: typhon.retrieval.oem - -.. autosummary:: - :toctree: generated - - error_covariance_matrix - averaging_kernel_matrix - retrieval_gain_matrix - smoothing_error - retrieval_noise - -retrieval.qrnn -============== - -.. automodule:: typhon.retrieval.qrnn - -.. currentmodule:: typhon.retrieval.qrnn - -.. autosummary:: - :toctree: generated - - QRNN - -retrieval.scores -================ - -.. automodule:: typhon.retrieval.scores +Retrieval products +================== -.. currentmodule:: typhon.retrieval.scores +.. toctree:: + :maxdepth: 1 -.. autosummary:: - :toctree: generated + typhon.retrieval.spareice - mape - bias - quantile_score - mean_quantile_score -retrieval.spareice +Utility functions ================== +.. toctree:: + :maxdepth: 1 -.. automodule:: typhon.retrieval.spareice - -.. currentmodule:: typhon.retrieval.spareice - -.. autosummary:: - :toctree: generated - - SPAREICE + typhon.retrieval.scores diff --git a/typhon/retrieval/qrnn/__init__.py b/typhon/retrieval/qrnn/__init__.py index ae8c7421..e37e9f8c 100644 --- a/typhon/retrieval/qrnn/__init__.py +++ b/typhon/retrieval/qrnn/__init__.py @@ -1,7 +1,7 @@ r""" -An implementation of quantile regression neural networks based on the -`Keras ` deep learning package. -The implementation has been developed specifically for remote sensing -applications and provides a high level interface allowing for simple -training and evaluation of the QRNNs.""" +Quantile regression neural networks (QRNNs) + +The module provides a flexible implementation of QRNNs for remote sensing +retrievals. +""" from typhon.retrieval.qrnn.qrnn import QRNN, set_backend diff --git a/typhon/retrieval/qrnn/qrnn.py b/typhon/retrieval/qrnn/qrnn.py index 93a802c4..8f586957 100644 --- a/typhon/retrieval/qrnn/qrnn.py +++ b/typhon/retrieval/qrnn/qrnn.py @@ -23,6 +23,14 @@ " module.") def set_backend(name): + """ + Set the neural network package to use as backend. + + The currently available backend are "keras" and "pytorch". + + Args: + name(str): The name of the backend. + """ global backend if name == "keras": try: @@ -52,7 +60,7 @@ def create_model(input_dim, input_dim(int): Number of input features. output_dim(int): Number of output features. arch: Tuple (d, w, a) containing the depth, i.e. number of - hidden layers, and width of the hidden layers, i. e. + hidden layers width of the hidden layers, i. e. the number of neurons in them, and the name of the activation function as string. Return: @@ -71,11 +79,11 @@ class QRNN: Quantile Regression Neural Network (QRNN) This class provides a high-level implementation of quantile regression - neural networks. They can be used to estimate quantiles of the posterior + neural networks. It can be used to estimate quantiles of the posterior distribution of remote sensing retrievals. - The QRNN uses a generic neural network model, that is trained to minimize - the quantile loss function + The :class:`QRNN`` class uses an arbitrary neural network model, that is + trained to minimize the quantile loss function .. math:: \mathcal{L}_\tau(y_\tau, y_{true}) = @@ -87,28 +95,24 @@ class QRNN: has one output neuron for each quantile to estimate. The QRNN class provides a generic QRNN implementation in the sense that it - does not assumed a fixed neural network infrastructure. Instead, this + does not assume a fixed neural network infrastructure. Instead, this functionality is off-loaded to a model object, which can be an arbitrary - regression model such as a fully-connected or a convolutional network. - A range of different models are provided in the typhon.retrieval.qrnn.models - module. This QRNN class just implements high-level operation on the QRNN - output while training and prediction are delegated to the model object. For - details on the respective implementation refer to the documentation of the - corresponding model class. + regression network such as a fully-connected or a convolutional network. A + range of different models are provided in the typhon.retrieval.qrnn.models + module. The :class:`QRNN`` class just implements high-level operation on + the QRNN output while training and prediction are delegated to the model + object. For details on the respective implementation refer to the + documentation of the corresponding model class. .. note:: For the QRNN implementation :math:`x` is used to denote the input vector and :math:`y` to denote the output. While this is opposed - to typical inverse problem notation, it is in line with machine - learning notation and felt more natural for the implementation. - If this annoys you, I am sorry. But the other way round it would - have annoyed other people and in particular me. + to inverse problem notation typically used for retrievals, it is + in line with machine learning notation and felt more natural for + the implementation. If this annoys you, I am sorry. Attributes: - - quantiles (numpy.array): - The 1D-array containing the quantiles :math:`\tau \in [0, 1]` that the - network learns to predict. - + quantiles (numpy.array): The 1D-array containing the quantiles + :math:`\tau \in [0, 1]` that the network learns to predict. model: The model object that implements the actual neural network regressor. """ @@ -129,8 +133,8 @@ def __init__(self, within the range [0, 1]. model: A (possible trained) model instance or a tuple :code:`(d, w, act)` describing the architecture of a - fully-connected neural network with d hidden layers - with w neurons and act activation functions. + fully-connected neural network with :code:`d` hidden layers + with :code:`w` neurons and :code:`act` activation functions. ensemble_size: The size of the ensemble if an ensemble model should be used. """ From ba3bd5f8708ef8745977907a048a97fd5da5e4f0 Mon Sep 17 00:00:00 2001 From: Simon Pfreundschuh Date: Thu, 19 Mar 2020 17:33:51 +0100 Subject: [PATCH 06/19] Added tests for QRNN. --- typhon/retrieval/qrnn/models/keras.py | 436 +++++++++++++++++++++-- typhon/retrieval/qrnn/models/pytorch.py | 272 ++++++++++---- typhon/retrieval/qrnn/qrnn.py | 41 ++- typhon/tests/retrieval/qrnn/test_qrnn.py | 68 ++++ 4 files changed, 713 insertions(+), 104 deletions(-) create mode 100644 typhon/tests/retrieval/qrnn/test_qrnn.py diff --git a/typhon/retrieval/qrnn/models/keras.py b/typhon/retrieval/qrnn/models/keras.py index 58e511e8..28b7aef7 100644 --- a/typhon/retrieval/qrnn/models/keras.py +++ b/typhon/retrieval/qrnn/models/keras.py @@ -1,16 +1,62 @@ import numpy as np +import keras from keras.models import Sequential -from keras.layers import Dense, Activation +from keras.layers import Dense, Activation, deserialize +from keras.optimizers import SGD import keras.backend as K import logging + +def save_model(f, model): + """ + Save keras model. + + Args: + f(:code:`str` or binary stream): Either a path or a binary stream + to store the data to. + model(:code:`keras.models.Models`): The Keras model to save + """ + keras.models.save_model(model, f) + +def load_model(f, quantiles): + """ + Load keras model. + + Args: + f(:code:`str` or binary stream): Either a path or a binary stream + to read the model from + quantiles(:code:`np.ndarray`): Array containing the quantiles + that the model predicts. + + Returns: + The loaded keras model. + """ + # + # This is a bit hacky but seems required to handle + # the custom model classes. + # + def make_fully_connected(layers = None, + **kwargs): + layers = list(map(deserialize, layers)) + input_dimensions = layers[0].batch_input_shape[1] + return FullyConnected(input_dimensions, + quantiles, + (), + layers) + custom_objects = {"FullyConnected" : make_fully_connected, + "QuantileLoss" : QuantileLoss} + model = keras.models.load_model(f, custom_objects=custom_objects) + return model + ################################################################################ # Quantile loss ################################################################################ logger = logging.getLogger(__name__) -def skewed_absolute_error(y_true, y_pred, tau): +def skewed_absolute_error(y_true, + y_pred, + tau): """ The quantile loss function for a given quantile tau: @@ -67,42 +113,378 @@ def __init__(self): def train(self): pass +################################################################################ +# Keras data generators +################################################################################ + +class BatchedDataset: + """ + Keras data loader that batches a given dataset of numpy arryas. + """ + def __init__(self, + training_data, + batch_size): + """ + Create batched dataset. + + Args: + training_data: Tuple :code:`(x, y)` containing the input + and output data as arrays. + batch_size(:code:`int`): The batch size + """ + x, y = training_data + self.x = x + self.y = y + self.bs = batch_size + self.indices = np.random.permutation(x.shape[0]) + self.i = 0 + + def __iter__(self): + logger.info("iter...") + return self + + def __len__(self): + return self.x.shape[0] // self.bs + + def __next__(self): + inds = self.indices[np.arange(self.i * self.bs, + (self.i + 1) * self.bs) + % self.indices.size] + x_batch = np.copy(self.x[inds, :]) + y_batch = self.y[inds] + self.i = self.i + 1 + # Shuffle training set after each epoch. + if self.i % (self.x.shape[0] // self.bs) == 0: + self.indices = np.random.permutation(self.x.shape[0]) + + return (x_batch, y_batch) + +class TrainingGenerator: + """ + This Keras sample generator takes a generator for noise-free training data + and adds independent Gaussian noise to each of the components of the input. + + Attributes: + training_data: Data generator providing the data + sigma_noise: A vector containing the standard deviation of each + component. + """ + def __init__(self, + training_data, + sigma_noise = None): + """ + Args: + training_data: Data generator providing the original (noise-free) + training data. + sigma_noise: Vector the length of the input dimensions specifying + the standard deviation of the noise. + """ + self.training_data = training_data + self.sigma_noise = sigma_noise + + def __iter__(self): + logger.info("iter...") + return self + + def __len__(self): + return len(self.training_data) + + def __next__(self): + x_batch, y_batch = next(self.training_data) + if not self.sigma_noise is None: + x_batch += np.random.randn(*x_batch.shape) * self.sigma_noise + return (x_batch, y_batch) + +class AdversarialTrainingGenerator: + """ + This Keras sample generator takes the noise-free training data + and adds independent Gaussian noise to each of the components + of the input. + + Attributes: + training_data: Training generator to use to generate the input + data + input_gradients: Keras function to compute the gradients of the + network + eps: The perturbation factor. + """ + def __init__(self, + training_data, + input_gradients, + eps): + """ + Args: + training_data: Training generator to use to generate the input + data + input_gradients: Keras function to compute the gradients of the + network + eps: The perturbation factor. + """ + self.training_data = training_data + self.input_gradients = input_gradients + self.eps = eps + + def __iter__(self): + logger.info("iter...") + return self + + def __len__(self): + return len(self.training_data) + + def __next__(self): + if self.i % 2 == 0: + x_batch, y_batch = next(self.training_data) + self.x_batch = x_batch + self.y_batch = y_batch + else: + x_batch = self.x_batch + y_batch = self.y_batch + grads = self.input_gradients([x_batch, y_batch, 1.0]) + x_batch += self.eps * np.sign(grads) + + self.i = self.i + 1 + return x_batch, y_batch + +class ValidationGenerator: + """ + This Keras sample generator is similar to the training generator + only that it returns the whole validation set and doesn't perform + any randomization. + + Attributes: + + x_val: The validation input, i.e. the brightness temperatures + measured by the satellite. + y_val: The validation output, i.e. the value of the retrieval + quantity. + x_mean: A vector containing the mean of each input component. + x_sigma: A vector containing the standard deviation of each + component. + """ + + def __init__(self, + validation_data, + sigma_noise): + self.validation_data = validation_data + self.sigma_noise = sigma_noise + + def __iter__(self): + return self + + def __next__(self): + x_val, y_val = next(self.validation_data) + if not self.sigma_noise is None: + x_val += np.random.randn(*self.x_val.shape) * self.sigma_noise + return (x_val, self.y_val) + +################################################################################ +# LRDecay +################################################################################ + +class LRDecay(keras.callbacks.Callback): + """ + The LRDecay class implements the Keras callback interface and reduces + the learning rate according to validation loss reduction. + + Attributes: + + lr_decay: The factor c > 1.0 by which the learning rate is + reduced. + lr_minimum: The training is stopped when this learning rate + is reached. + convergence_steps: The number of epochs without validation loss + reduction required to reduce the learning rate. + + """ + + def __init__(self, model, lr_decay, lr_minimum, convergence_steps): + self.model = model + self.lr_decay = lr_decay + self.lr_minimum = lr_minimum + self.convergence_steps = convergence_steps + self.steps = 0 + + def on_train_begin(self, logs={}): + self.losses = [] + self.steps = 0 + self.min_loss = 1e30 + + def on_epoch_end(self, epoch, logs={}): + loss = logs.get('val_loss') + if loss is None: + loss = logs.get("loss") + self.losses += [loss] + if not self.losses[-1] < self.min_loss: + self.steps = self.steps + 1 + else: + self.steps = 0 + if self.steps > self.convergence_steps: + lr = keras.backend.get_value(self.model.optimizer.lr) + keras.backend.set_value( + self.model.optimizer.lr, lr / self.lr_decay) + self.steps = 0 + logger.info("\n Reduced learning rate to " + str(lr)) + + if lr < self.lr_minimum: + self.model.stop_training = True + + self.min_loss = min(self.min_loss, self.losses[-1]) + +################################################################################ +# QRNN +################################################################################ + +class KerasModel: + r""" + Base class for Keras models. + + This base class provides generic utility function for the training, saving + and evaluation of Keras models. + + Attributes: + input_dimensions (int): The input dimension of the neural network, i.e. + the dimension of the measurement vector. + quantiles (numpy.array): The 1D-array containing the quantiles + :math:`\tau \in [0, 1]` that the network learns to predict. + + depth (int): + The number layers in the network excluding the input layer. + + width (int): + The width of the hidden layers in the network. + + activation (str): + The name of the activation functions to use in the hidden layers + of the network. + + models (list of keras.models.Sequential): + The ensemble of Keras neural networks used for the quantile regression + neural network. + """ + + def __init__(self, + input_dimension, + quantiles): + """ + Create a QRNN model. + + Arguments: + input_dimension(int): The dimension of the measurement space, i.e. the number + of elements in a single measurement vector y + + quantiles(np.array): 1D-array containing the quantiles to estimate of + the posterior distribution. Given as fractions + within the range [0, 1]. + """ + self.input_dimension = input_dimension + self.quantiles = np.array(quantiles) + + + def train(self, + training_data, + validation_data = None, + batch_size = 256, + sigma_noise=None, + adversarial_training = False, + delta_at = 0.01, + initial_learning_rate = 1e-2, + momentum = 0.0, + convergence_epochs = 5, + learning_rate_decay = 2.0, + learning_rate_minimum = 1e-6, + maximum_epochs = 200, + training_split = 0.9, + gpu = False): -class FullyConnected(Sequential): + if type(training_data) == tuple: + if not type(training_data[0]) == np.ndarray: + raise ValueError("When training data is provided as tuple" + " (x, y) it must contain numpy arrays.") + training_data = BatchedDataset(training_data, + batch_size) + + if type(validation_data) is tuple: + validation_data = BatchedDataset(validation_data, + batch_size) + + loss = QuantileLoss(self.quantiles) + + # Compile model + self.custom_objects = {loss.__name__: loss} + optimizer = SGD(lr=initial_learning_rate) + self.compile(loss=loss, optimizer=optimizer) + + # + # Setup training generator + # + training_generator = TrainingGenerator(training_data, + sigma_noise) + if adversarial_training: + inputs = [self.input, + self.targets[0], + self.sample_weights[0]] + input_gradients = K.function( + inputs, K.gradients(self.total_loss, self.input)) + training_generator = AdversarialTrainingGenerator(training_generator, + input_gradients, + delta_at) + + if validation_data is None: + validation_generator = None + else: + validation_generator = ValidationGenerator(validation_data, + sigma_noise) + lr_callback = LRDecay(self, + learning_rate_decay, + learning_rate_minimum, + convergence_epochs) + self.fit_generator(training_generator, + steps_per_epoch=len(training_generator), + epochs=maximum_epochs, + validation_data=validation_generator, + validation_steps=1, + callbacks=[lr_callback]) + + +################################################################################ +# Fully-connected network +################################################################################ + +class FullyConnected(KerasModel, Sequential): """ - Convenience class to represent fully connected models with - given number of input and output features and depth and - width of hidden layers. + Keras implementation of fully-connected networks. """ def __init__(self, input_dimension, quantiles, arch, - activation = "relu"): + layers = None): """ Create a fully-connected neural network. Args: - input_dimension(int): Number of input features - output_dimension(int): Number of output features - arch(tuple): Tuple (d, w) of d, the number of hidden - layers in the network, and w, the width of the net- - work. - activation(str or None): Activation function to insert between - hidden layers. The given string is passed to the - keras.layers.Activation class. If None no activation function - is used. + input_dimension(:code:`int`): Number of input features + quantiles(:code:`array`): The quantiles to predict given + as fractions within [0, 1]. + arch(tuple): Tuple :code:`(d, w, a)` containing :code:`d`, the + number of hidden layers in the network, :code:`w`, the width + of the network and :code:`a`, the type of activation functions + to be used as string. """ quantiles = np.array(quantiles) output_dimension = quantiles.size - if len(arch) == 0: - layers = [Dense(output_dimension, input_shape=(input_dimension))] - else: - d, w = arch - layers = [Dense(input_dimension, input_shape=(w,))] - for i in range(d - 1): - layers.append(Dense(w, input_shape=(w,))) - if not activation is None: - layers.append(Activation(activation)) - layers.append(Dense(output_dimension, input_shape=(w,))) - super().__init__(layers) + + if layers is None: + if len(arch) == 0: + layers = [Dense(output_dimension, input_shape=(input_dimension))] + else: + d, w, a = arch + layers = [Dense(w, input_shape=(input_dimension,))] + for i in range(d - 1): + layers.append(Dense(w, input_shape=(w,))) + if not a is None: + layers.append(Activation(a)) + layers.append(Dense(output_dimension, input_shape=(w,))) + + KerasModel.__init__(self, input_dimension, quantiles) + Sequential.__init__(self, layers) diff --git a/typhon/retrieval/qrnn/models/pytorch.py b/typhon/retrieval/qrnn/models/pytorch.py index f77835f8..a9349496 100644 --- a/typhon/retrieval/qrnn/models/pytorch.py +++ b/typhon/retrieval/qrnn/models/pytorch.py @@ -9,6 +9,8 @@ import numpy as np from torch import nn from torch import optim +from torch.optim.lr_scheduler import ReduceLROnPlateau +from torch.utils.data import Dataset from tqdm.auto import tqdm activations = {"elu" : nn.ELU, @@ -23,10 +25,85 @@ "softmin" : nn.Softmin} -def load(f): +def save_model(f, model): + """ + Save pytorch model. + + Args: + f(:code:`str` or binary stream): Either a path or a binary stream + to store the data to. + model(:code:`pytorch.nn.Moduel`): The pytorch model to save + """ + torch.save(model, f) + +def load_model(f, quantiles): + """ + Load pytorch model. + + Args: + f(:code:`str` or binary stream): Either a path or a binary stream + to read the model from + quantiles(:code:`np.ndarray`): Array containing the quantiles + that the model predicts. + + Returns: + The loaded pytorch model. + """ model = torch.load(f) return model +def handle_input(data, device = None): + """ + Handle input data. + + This function handles data supplied + + - as tuple of :code:`np.ndarray` + - a single :code:`np.ndarray` + - torch :code:`dataloader` + + If a numpy array is provided it is converted to a torch tensor + so that it can be fed into a pytorch model. + """ + if type(data) == tuple: + x, y = data + x = torch.tensor(x, dtype=torch.float) + y = torch.tensor(y, dtype=torch.float) + if not device is None: + x = x.to(device) + y = y.to(device) + return x, y + if type(data) == np.ndarray: + x = torch.tensor(data, dtype=torch.float) + if not device is None: + x = x.to(device) + return x + else: + return data + +class BatchedDataset(Dataset): + """ + Batches an un-bactched dataset. + """ + def __init__(self, x, y, batch_size): + self.x = x + self.y = y + self.batch_size = batch_size + + def __len__(self): + # This is required because x and y are tensors and don't throw these + # errors themselves. + return self.x.shape[0] // self.batch_size + + def __getitem__(self, i): + if i >= len(self): + raise IndexError() + i_start = i * self.batch_size + i_end = (i + 1) * self.batch_size + x = self.x[i_start : i_end] + y = self.y[i_start : i_end] + return (x, y) + ################################################################################ # Quantile loss ################################################################################ @@ -130,12 +207,12 @@ def train(self, *args, **kwargs): """ Train the network. - This trains the network for the given number of epochs using the provided - training and validation data. + This trains the network for the given number of epochs using the + provided training and validation data. - If desired, the training can be augmented using adversarial training. In this - case the network is additionally trained with an adversarial batch of examples - in each step of the training. + If desired, the training can be augmented using adversarial training. + In this case the network is additionally trained with an adversarial + batch of examples in each step of the training. Arguments: training_data: pytorch dataloader providing the training data @@ -144,38 +221,93 @@ def train(self, *args, **kwargs): adversarial_training: whether or not to use adversarial training eps_adv: The scaling factor to use for adversarial training. """ - if len(args) < 2: - return nn.Sequential.train(self, *args) - - training_data, validation_data = args - n_epochs = kwargs.get("n_epochs", 1) - adversarial_training = kwargs.get("adversarial_training", False) - eps_adv = kwargs.get("eps_adv", 1e-6) - lr = kwargs.get("lr", 1e-2) - momentum = kwargs.get("momentum", 0.0) - gpu = kwargs.get("gpu", False) - - self.optimizer = optim.SGD(self.parameters(), - lr = lr, - momentum = momentum) - self.train() - + # Handle overload of train() method + if len(args) < 1 or (len(args) == 1 and type(args[0]) == bool): + return nn.Sequential.train(self, *args, **kwargs) + + # + # Parse training arguments + # + + training_data = args[0] + arguments = {"validation_data" : None, + "batch_size" : 256, + "sigma_noise" : None, + "adversarial_training" : False, + "delta_at" : 0.01, + "initial_learning_rate" : 1e-2, + "momentum" : 0.0, + "convergence_epochs" : 5, + "learning_rate_decay" : 2.0, + "learning_rate_minimum" : 1e-6, + "maximum_epochs" : 1, + "training_split" : 0.9, + "gpu" : False} + argument_names = arguments.keys() + for a, n in zip(args[1:], argument_names): + arguments[n] = a + for k in kwargs: + if k in arguments: + arguments[k] = kwargs[k] + else: + raise ValueError("Unknown argument to {}.".print(k)) + + validation_data = arguments["validation_data"] + batch_size = arguments["batch_size"] + sigma_noise = arguments["sigma_noise"] + adversarial_training = arguments["adversarial_training"] + delta_at = arguments["delta_at"] + initial_learning_rate = arguments["initial_learning_rate"] + convergence_epochs = arguments["convergence_epochs"] + learning_rate_decay = arguments["learning_rate_decay"] + learning_rate_minimum = arguments["learning_rate_minimum"] + maximum_epochs = arguments["maximum_epochs"] + training_split = arguments["training_split"] + gpu = arguments["gpu"] + momentum = arguments["momentum"] + + # + # Determine device to use + # if torch.cuda.is_available() and gpu: - dev = torch.device("cuda") + device = torch.device("cuda") else: - dev = torch.device("cpu") - self.to(dev) + device = torch.device("cpu") + self.to(device) - self.criterion.to(dev) + # + # Handle input data + # + try: + x, y = handle_input(training_data, device) + training_data = BatchedDataset(x, y, batch_size) + except: + pass - for i in range(n_epochs): + self.train() + self.optimizer = optim.SGD(self.parameters(), + lr = initial_learning_rate, + momentum = momentum) + self.criterion.to(device) + scheduler = ReduceLROnPlateau(self.optimizer, + factor=1.0 / learning_rate_decay, + patience=convergence_epochs, + min_lr=learning_rate_minimum) + training_errors = [] + validation_errors = [] + + # + # Training loop + # + + for i in range(maximum_epochs): err = 0.0 n = 0 iterator = tqdm(enumerate(training_data), total = len(training_data)) for j, (x, y) in iterator: - x = x.to(dev) - y = y.to(dev) + x = x.to(device) + y = y.to(device) shape = x.size() shape = (shape[0], 1) + shape[2:] @@ -192,7 +324,7 @@ def train(self, *args, **kwargs): if adversarial_training: self.optimizer.zero_grad() - x_adv = self._make_adversarial_samples(x, y, eps_adv) + x_adv = self._make_adversarial_samples(x, y, delta_at) y_pred = self(x) c = self.criterion(y_pred, y) c.backward() @@ -202,41 +334,42 @@ def train(self, *args, **kwargs): iterator.set_postfix({"Training errror" : err / n}) # Save training error - self.training_errors.append(err / n) + training_errors.append(err / n) val_err = 0.0 n = 0 + if not validation_data is None: + print(validation_data) + for x, y in validation_data: + x = x.to(device) + y = y.to(device) - for x, y in validation_data: - x = x.to(dev) - y = y.to(dev) - - shape = x.size() - shape = (shape[0], 1) + shape[2:] - y = y.reshape(shape) + shape = x.size() + shape = (shape[0], 1) + shape[2:] + y = y.reshape(shape) - y_pred = self(x) - c = self.criterion(y_pred, y) + y_pred = self(x) + c = self.criterion(y_pred, y) - val_err += c.item() * x.size()[0] - n += x.size()[0] + val_err += c.item() * x.size()[0] + n += x.size()[0] + validation_errors.append(val_err) - self.validation_errors.append(val_err / n) + self.training_errors += training_errors + self.validation_errors += validation_errors self.eval() + return {"training_errors" : self.training_errors, + "validation_errors" : self.validation_errors} - def predict(self, x): - """ - Predict quantiles for given input. - - Args: - x: 2D tensor containing the inputs for which for which to - predict the quantiles. - - Returns: - tensor: 2D tensor containing the predicted quantiles along - the last dimension. - """ - return self(x) + def predict(self, x, gpu=False): + "" + if torch.cuda.is_available() and gpu: + device = torch.device("cuda") + else: + device = torch.device("cpu") + x = handle_input(x, device) + self.to(device) + return self(x).detach() def calibration(self, data, @@ -261,7 +394,8 @@ def calibration(self, n_intervals = self.quantiles.size // 2 qs = self.quantiles - intervals = np.array([q_r - q_l for (q_l, q_r) in zip(qs, reversed(qs))])[:n_intervals] + intervals = np.array([q_r - q_l for (q_l, q_r) + in zip(qs, reversed(qs))])[:n_intervals] counts = np.zeros(n_intervals) total = 0.0 @@ -316,11 +450,13 @@ def load(self, path): qrnn.load_state_dict["network_state"] qrnn.optimizer.load_state_dict["optimizer_state"] +################################################################################ +# Fully-connected network +################################################################################ + class FullyConnected(PytorchModel, nn.Sequential): """ - Convenience class to represent fully connected models with - given number of input and output features and depth and - width of hidden layers. + Pytorch implementation of a fully-connected QRNN model. """ def __init__(self, input_dimension, @@ -331,11 +467,13 @@ def __init__(self, Create a fully-connected neural network. Args: - input_dimension(int): Number of input features - output_dimension(int): Number of output features - arch(tuple): Tuple (d, w) of d, the number of hidden - layers in the network, and w, the width of the net- - work. + input_dimension(:code:`int`): Number of input features + quantiles(:code:`array`): The quantiles to predict given + as fractions within [0, 1]. + arch(tuple): Tuple :code:`(d, w, a)` containing :code:`d`, the + number of hidden layers in the network, :code:`w`, the width + of the network and :code:`a`, the type of activation functions + to be used as string. """ PytorchModel.__init__(self, input_dimension, quantiles) output_dimension = quantiles.size @@ -354,7 +492,3 @@ def __init__(self, layers.append(act()) layers.append(nn.Linear(w, output_dimension)) nn.Sequential.__init__(self, *layers) - - def save(self, f): - torch.save(self, f) - diff --git a/typhon/retrieval/qrnn/qrnn.py b/typhon/retrieval/qrnn/qrnn.py index 8f586957..9c235443 100644 --- a/typhon/retrieval/qrnn/qrnn.py +++ b/typhon/retrieval/qrnn/qrnn.py @@ -2,6 +2,7 @@ import logging import os import pickle +import importlib import numpy as np from scipy.interpolate import CubicSpline @@ -244,11 +245,35 @@ def cross_validation(self, logger.info(results) return (np.mean(results, axis=0), np.std(results, axis=0)) - def fit(self, *args, **kwargs): - self.model.fit(*args, **kwargs) - - def train(self, *args, **kwargs): - self.model.train(*args, **kwargs) + def train(self, + training_data, + validation_data = None, + batch_size = 256, + sigma_noise = None, + adversarial_training = False, + delta_at = 0.01, + initial_learning_rate = 1e-2, + momentum = 0.0, + convergence_epochs = 5, + learning_rate_decay = 2.0, + learning_rate_minimum = 1e-6, + maximum_epochs = 200, + training_split = 0.9, + gpu = True): + self.model.train(training_data, + validation_data, + batch_size, + sigma_noise, + adversarial_training, + delta_at, + initial_learning_rate, + momentum, + convergence_epochs, + learning_rate_decay, + learning_rate_minimum, + maximum_epochs, + training_split, + gpu) def predict(self, x): r""" @@ -531,7 +556,8 @@ def save(self, path): """ f = open(path, "wb") pickle.dump(self, f) - self.model.save(f) + backend = importlib.import_module(self.backend) + backend.save_model(self.model, f) f.close() @staticmethod @@ -549,11 +575,10 @@ def load(path): The loaded QRNN object. """ - import importlib with open(path, 'rb') as f: qrnn = pickle.load(f) backend = importlib.import_module(qrnn.backend) - model = backend.load(f) + model = backend.load_model(f, qrnn.quantiles) qrnn.model = model return qrnn diff --git a/typhon/tests/retrieval/qrnn/test_qrnn.py b/typhon/tests/retrieval/qrnn/test_qrnn.py new file mode 100644 index 00000000..3dea2241 --- /dev/null +++ b/typhon/tests/retrieval/qrnn/test_qrnn.py @@ -0,0 +1,68 @@ +from typhon.retrieval.qrnn import QRNN, set_backend +import numpy as np +import os +import tempfile + +# +# Import available backends. +# + +backends = [] +try: + import typhon.retrieval.qrnn.models.keras + backends += ["keras"] +except: + pass + +try: + import typhon.retrieval.qrnn.models.pytorch + backends += ["pytorch"] +except: + pass + +class TestQrnn: + + def setup_method(self): + dir = os.path.dirname(os.path.realpath(__file__)) + path = os.path.join(dir, "test_data") + x_train = np.load(os.path.join(path, "x_train.npy")) + x_mean = np.mean(x_train, keepdims = True) + x_sigma = np.std(x_train, keepdims = True) + self.x_train = (x_train - x_mean) / x_sigma + self.y_train = np.load(os.path.join(path, "y_train.npy")) + + def test_qrnn(self): + """ + Test training of QRNNs using numpy arrays as input. + """ + for backend in backends: + set_backend(backend) + qrnn = QRNN(self.x_train.shape[1], + np.linspace(0.05, 0.95, 10)) + qrnn.train((self.x_train, self.y_train), + maximum_epochs = 1) + qrnn.predict(self.x_train) + + def save_qrnn(self): + """ + Test saving and loading of QRNNs. + """ + qrnn = QRNN(self.x_train.shape[1], + np.linspace(0.05, 0.95, 10)) + f = tempfile.NamedTemporaryFile() + qrnn.save(f.name) + qrnn_loaded = QRNN.load(f.name) + + x_pred = qrnn.predict(self.x_train) + x_pred_loaded = qrnn.predict(self.x_train) + + if not type(x_pred) == np.ndarray: + x_pred = x_pred.detach() + + assert(np.allclose(x_pred, x_pred_loaded)) + + +__file__ = "./bla.py" +test = TestQrnn() +test.setup_method() +test.test_qrnn() From c28b81db05ab20e505bc838b9f0bc4d08b2f6ec8 Mon Sep 17 00:00:00 2001 From: Simon Pfreundschuh Date: Fri, 20 Mar 2020 16:39:05 +0100 Subject: [PATCH 07/19] Added unet implementation. --- typhon/retrieval/qrnn/__init__.py | 2 +- .../retrieval/qrnn/models/pytorch/__init__.py | 10 + .../models/{pytorch.py => pytorch/common.py} | 62 +----- .../qrnn/models/pytorch/fully_connected.py | 46 ++++ typhon/retrieval/qrnn/models/pytorch/unet.py | 209 ++++++++++++++++++ typhon/retrieval/qrnn/qrnn.py | 43 +++- typhon/tests/retrieval/qrnn/test_qrnn.py | 24 +- 7 files changed, 336 insertions(+), 60 deletions(-) create mode 100644 typhon/retrieval/qrnn/models/pytorch/__init__.py rename typhon/retrieval/qrnn/models/{pytorch.py => pytorch/common.py} (88%) create mode 100644 typhon/retrieval/qrnn/models/pytorch/fully_connected.py create mode 100644 typhon/retrieval/qrnn/models/pytorch/unet.py diff --git a/typhon/retrieval/qrnn/__init__.py b/typhon/retrieval/qrnn/__init__.py index e37e9f8c..f69e66f9 100644 --- a/typhon/retrieval/qrnn/__init__.py +++ b/typhon/retrieval/qrnn/__init__.py @@ -4,4 +4,4 @@ The module provides a flexible implementation of QRNNs for remote sensing retrievals. """ -from typhon.retrieval.qrnn.qrnn import QRNN, set_backend +from typhon.retrieval.qrnn.qrnn import QRNN, set_backend, get_backend diff --git a/typhon/retrieval/qrnn/models/pytorch/__init__.py b/typhon/retrieval/qrnn/models/pytorch/__init__.py new file mode 100644 index 00000000..2a00f434 --- /dev/null +++ b/typhon/retrieval/qrnn/models/pytorch/__init__.py @@ -0,0 +1,10 @@ +""" +models +====== + +This module provides an implementation of quantile regression neural networks (QRNNs) +using the pytorch deep learning framework. +""" +from typhon.retrieval.qrnn.models.pytorch.common import BatchedDataset +from typhon.retrieval.qrnn.models.pytorch.fully_connected import FullyConnected +from typhon.retrieval.qrnn.models.pytorch.unet import UNet diff --git a/typhon/retrieval/qrnn/models/pytorch.py b/typhon/retrieval/qrnn/models/pytorch/common.py similarity index 88% rename from typhon/retrieval/qrnn/models/pytorch.py rename to typhon/retrieval/qrnn/models/pytorch/common.py index a9349496..e7ec42a8 100644 --- a/typhon/retrieval/qrnn/models/pytorch.py +++ b/typhon/retrieval/qrnn/models/pytorch/common.py @@ -1,9 +1,8 @@ """ -models -====== +models.pytorch.common +===================== -This module provides an implementation of quantile regression neural networks (QRNNs) -using the pytorch deep learning framework. +This module provides common functionality required to realize QRNNs in pytorch. """ import torch import numpy as np @@ -83,11 +82,14 @@ def handle_input(data, device = None): class BatchedDataset(Dataset): """ - Batches an un-bactched dataset. + Batches an un-batched dataset. """ - def __init__(self, x, y, batch_size): - self.x = x - self.y = y + def __init__(self, + training_data, + batch_size): + x, y = training_data + self.x = torch.tensor(x, dtype=torch.float) + self.y = torch.tensor(y, dtype=torch.float) self.batch_size = batch_size def __len__(self): @@ -193,6 +195,7 @@ def __init__(self, self.criterion = QuantileLoss(self.quantiles) self.training_errors = [] self.validation_errors = [] + self.backend = "typhon.retrieval.qrnn.models.typhon" def _make_adversarial_samples(self, x, y, eps): self.model.zero_grad() @@ -449,46 +452,3 @@ def load(self, path): qrnn = QRNN(*[state[k] for k in keys]) qrnn.load_state_dict["network_state"] qrnn.optimizer.load_state_dict["optimizer_state"] - -################################################################################ -# Fully-connected network -################################################################################ - -class FullyConnected(PytorchModel, nn.Sequential): - """ - Pytorch implementation of a fully-connected QRNN model. - """ - def __init__(self, - input_dimension, - quantiles, - arch): - - """ - Create a fully-connected neural network. - - Args: - input_dimension(:code:`int`): Number of input features - quantiles(:code:`array`): The quantiles to predict given - as fractions within [0, 1]. - arch(tuple): Tuple :code:`(d, w, a)` containing :code:`d`, the - number of hidden layers in the network, :code:`w`, the width - of the network and :code:`a`, the type of activation functions - to be used as string. - """ - PytorchModel.__init__(self, input_dimension, quantiles) - output_dimension = quantiles.size - self.arch = arch - - if len(arch) == 0: - layers = [nn.Linear(input_dimension, output_dimension)] - else: - d, w, act = arch - if type(act) == str: - act = activations[act] - layers = [nn.Linear(input_dimension, w)] - for i in range(d - 1): - layers.append(nn.Linear(w, w)) - if not act is None: - layers.append(act()) - layers.append(nn.Linear(w, output_dimension)) - nn.Sequential.__init__(self, *layers) diff --git a/typhon/retrieval/qrnn/models/pytorch/fully_connected.py b/typhon/retrieval/qrnn/models/pytorch/fully_connected.py new file mode 100644 index 00000000..32a78e70 --- /dev/null +++ b/typhon/retrieval/qrnn/models/pytorch/fully_connected.py @@ -0,0 +1,46 @@ +from torch import nn +from torch import optim +from typhon.retrieval.qrnn.models.pytorch.common import PytorchModel + +################################################################################ +# Fully-connected network +################################################################################ + +class FullyConnected(PytorchModel, nn.Sequential): + """ + Pytorch implementation of a fully-connected QRNN model. + """ + def __init__(self, + input_dimension, + quantiles, + arch): + + """ + Create a fully-connected neural network. + + Args: + input_dimension(:code:`int`): Number of input features + quantiles(:code:`array`): The quantiles to predict given + as fractions within [0, 1]. + arch(tuple): Tuple :code:`(d, w, a)` containing :code:`d`, the + number of hidden layers in the network, :code:`w`, the width + of the network and :code:`a`, the type of activation functions + to be used as string. + """ + PytorchModel.__init__(self, input_dimension, quantiles) + output_dimension = quantiles.size + self.arch = arch + + if len(arch) == 0: + layers = [nn.Linear(input_dimension, output_dimension)] + else: + d, w, act = arch + if type(act) == str: + act = activations[act] + layers = [nn.Linear(input_dimension, w)] + for i in range(d - 1): + layers.append(nn.Linear(w, w)) + if not act is None: + layers.append(act()) + layers.append(nn.Linear(w, output_dimension)) + nn.Sequential.__init__(self, *layers) diff --git a/typhon/retrieval/qrnn/models/pytorch/unet.py b/typhon/retrieval/qrnn/models/pytorch/unet.py new file mode 100644 index 00000000..09bdf179 --- /dev/null +++ b/typhon/retrieval/qrnn/models/pytorch/unet.py @@ -0,0 +1,209 @@ +import torch +from torch import nn + +from typhon.retrieval.qrnn.models.pytorch.common import PytorchModel + +class Layer(nn.Sequential): + """ + Basic building block of a UNet. Consists of a convolutional + layer followed by an activation layers and an optional batch + norm layer. + + Args: + features_in(:code:`int`): Number of features of input + features_out(:code:`int`): Raw number of output features of the + layer not including skip connections. + batch_norm(:code:`bool`): Whether or not to include a batch norm + layer. + kernel_size(:code:`int`): Kernel size to use for the conv. layer. + activation(:code:`activation`): Activation to use after conv. layer. + skip_connection(:code:`bool`): Whether to include skip connections, i.e. + to include input in layer output. + """ + def __init__(self, + features_in, + features_out, + batch_norm=True, + kernel_size=3, + activation=nn.ReLU, + skip_connection=False): + self._features_in = features_in + self._features_out = features_out + self.skip_connection = skip_connection + + if not activation is None: + modules = [nn.ConstantPad2d(1, 0.0), + nn.Conv2d(features_in, + features_out, + kernel_size), + nn.BatchNorm2d(features_out), + activation()] + else: + modules = [nn.ConstantPad2d(1, 0.0), + nn.Conv2d(features_in, + features_out, + kernel_size), + nn.BatchNorm2d(features_out)] + super().__init__(*modules) + + @property + def features_out(self): + if self.skip_connection: + return self._features_in + self._features_out + else: + return self._features_out + + def forward(self, x): + y = nn.Sequential.forward(self, x) + if self.skip_connection: + y = torch.cat([x, y], dim=1) + return y + +class Block(nn.Sequential): + """ + A block bundles a set of layers. + + Args: + features_in(:code:`int`): The number of input features of the block + features_out(:code:`int`): The number of output features of the block. + depth(:code:`int`): The number of layers of the block + activation(:code:`nn.Module`): Pytorch activation layer to + use. :code:`nn.ReLU` by default. + skip_connection(:code:`str`): Whether or not to insert skip + connections before all layers (:code:`"all"`) or just at + the end (:code:`"end"`). + """ + def __init__(self, + features_in, + features_out, + depth=2, + batch_norm=True, + activation=nn.ReLU, + kernel_size=3, + skip_connection=None): + + self._features_in = features_in + + if skip_connection == "all": + skip_connection_layer = True + self.skip_connection = False + elif skip_connection == "end": + skip_connection_layer = False + self.skip_connection = True + else: + skip_connection_layer = False + self.skip_connection = False + + layers = [] + nf = features_in + for d in range(depth): + layers.append(Layer(nf, + features_out, + activation=activation, + batch_norm=batch_norm, + kernel_size=kernel_size, + skip_connection=skip_connection_layer)) + nf = layers[-1].features_out + + self._features_out = layers[-1].features_out + super().__init__(*layers) + + @property + def features_out(self): + if self.skip_connection: + return self._features_in + self._features_out + else: + return self._features_out + + def forward(self, x): + y = nn.Sequential.forward(self, x) + if self.skip_connection: + y = torch.cat([x, y], dim=1) + return y + + +class DownSampler(nn.Sequential): + def __init__(self): + modules = [nn.MaxPool2d(2)] + super().__init__(*modules) + +class UpSampler(nn.Sequential): + def __init__(self, + features_in, + features_out): + modules = [nn.ConvTranspose2d(features_in, + features_out, + 3, + padding=1, + output_padding=1, + stride=2)] + super().__init__(*modules) + + + +class UNet(PytorchModel, nn.Module): + def __init__(self, + input_features, + quantiles, + n_features=32, + n_levels=4, + skip_connection=None): + + nn.Module.__init__(self) + PytorchModel.__init__(self, input_features, quantiles) + + # Down-sampling blocks + self.down_blocks = nn.ModuleList() + self.down_samplers = nn.ModuleList() + features_in = input_features + features_out = n_features + for i in range(n_levels - 1): + self.down_blocks.append(Block(features_in, + features_out, + skip_connection=skip_connection)) + self.down_samplers.append(DownSampler()) + features_in = self.down_blocks[-1].features_out + features_out = features_out * 2 + + self.center_block = Block(features_in, + features_out, + skip_connection=skip_connection) + + self.up_blocks = nn.ModuleList() + self.up_samplers = nn.ModuleList() + features_in = self.center_block.features_out + features_out = features_out // 2 + for i in range(n_levels - 1): + self.up_samplers.append(UpSampler(features_in, + features_out)) + features_in = features_out + self.down_blocks[(- i - 1)].features_out + self.up_blocks.append(Block(features_in, + features_out, + skip_connection=skip_connection)) + features_out = features_out // 2 + features_in = self.up_blocks[-1].features_out + + self.head = nn.Sequential(nn.Conv2d(features_in, features_in, 1), + nn.ReLU(), + nn.Conv2d(features_in, features_in, 1), + nn.ReLU(), + nn.Conv2d(features_in, quantiles.size, 1)) + + def forward(self, x): + + features = [] + for (b, s) in zip(self.down_blocks, self.down_samplers): + x = b(x) + features.append(x) + x = s(x) + + x = self.center_block(x) + + for (b, u, f) in zip(self.up_blocks, self.up_samplers, features[::-1]): + x = u(x) + x = torch.cat([x, f], 1) + x = b(x) + + self.features = features + + return self.head(x) diff --git a/typhon/retrieval/qrnn/qrnn.py b/typhon/retrieval/qrnn/qrnn.py index 9c235443..2649820f 100644 --- a/typhon/retrieval/qrnn/qrnn.py +++ b/typhon/retrieval/qrnn/qrnn.py @@ -50,6 +50,34 @@ def set_backend(name): else: raise Exception("\"{}\" is not a supported backend.".format(name)) +def get_backend(name): + """ + Get module object corresponding to the short backend name. + + The currently available backend are "keras" and "pytorch". + + Args: + name(str): The name of the backend. + """ + if name == "keras": + try: + import typhon.retrieval.qrnn.models.keras as keras + backend = keras + except Exception as e: + raise Exception("The following error occurred while trying " + " to import keras: ", e) + elif name == "pytorch": + try: + import typhon.retrieval.qrnn.models.pytorch as pytorch + backend = pytorch + except Exception as e: + raise Exception("The following error occurred while trying " + " to import pytorch: ", e) + else: + raise Exception("\"{}\" is not a supported backend.".format(name)) + return backend + + def create_model(input_dim, output_dim, arch): @@ -119,7 +147,7 @@ class QRNN: """ def __init__(self, input_dimension, - quantiles, + quantiles=None, model=(3, 128, "relu"), ensemble_size=1, **kwargs): @@ -147,6 +175,19 @@ def __init__(self, self.model = backend.FullyConnected(self.input_dimension, self.quantiles, model) + if quantiles is None: + raise ValueError("If model is given as architecture tuple, the" + " 'quantiles' kwarg must be provided.") + else: + if not quantiles is None: + if not quantiles == model.quantiles: + raise ValueError("Provided quantiles do not match those of " + "the provided model.") + + self.model = model + self.quantiles = model.quantiles + self.backend = model.backend + def cross_validation(self, x_train, diff --git a/typhon/tests/retrieval/qrnn/test_qrnn.py b/typhon/tests/retrieval/qrnn/test_qrnn.py index 3dea2241..a9975c50 100644 --- a/typhon/tests/retrieval/qrnn/test_qrnn.py +++ b/typhon/tests/retrieval/qrnn/test_qrnn.py @@ -1,7 +1,8 @@ -from typhon.retrieval.qrnn import QRNN, set_backend +from typhon.retrieval.qrnn import QRNN, set_backend, get_backend import numpy as np import os import tempfile +import importlib # # Import available backends. @@ -43,6 +44,21 @@ def test_qrnn(self): maximum_epochs = 1) qrnn.predict(self.x_train) + def test_qrnn_datasets(self): + """ + Provide data as dataset object instead of numpy arrays. + """ + for backend in backends: + set_backend(backend) + backend = get_backend(backend) + data = backend.BatchedDataset((self.x_train, self.y_train), + 256) + qrnn = QRNN(self.x_train.shape[1], + np.linspace(0.05, 0.95, 10)) + qrnn.train(data, + maximum_epochs = 1) + + def save_qrnn(self): """ Test saving and loading of QRNNs. @@ -60,9 +76,3 @@ def save_qrnn(self): x_pred = x_pred.detach() assert(np.allclose(x_pred, x_pred_loaded)) - - -__file__ = "./bla.py" -test = TestQrnn() -test.setup_method() -test.test_qrnn() From af1bda9d3fafa1a3148594f6e44fdfcfe0274e5f Mon Sep 17 00:00:00 2001 From: Simon Pfreundschuh Date: Mon, 23 Mar 2020 15:57:27 +0100 Subject: [PATCH 08/19] Added classification method to QRNN class. --- .../retrieval/qrnn/models/pytorch/__init__.py | 4 ++- .../retrieval/qrnn/models/pytorch/common.py | 7 +++-- .../qrnn/models/pytorch/fully_connected.py | 3 ++- typhon/retrieval/qrnn/qrnn.py | 27 ++++++++++++++++++- 4 files changed, 34 insertions(+), 7 deletions(-) diff --git a/typhon/retrieval/qrnn/models/pytorch/__init__.py b/typhon/retrieval/qrnn/models/pytorch/__init__.py index 2a00f434..430fd8bb 100644 --- a/typhon/retrieval/qrnn/models/pytorch/__init__.py +++ b/typhon/retrieval/qrnn/models/pytorch/__init__.py @@ -5,6 +5,8 @@ This module provides an implementation of quantile regression neural networks (QRNNs) using the pytorch deep learning framework. """ -from typhon.retrieval.qrnn.models.pytorch.common import BatchedDataset +from typhon.retrieval.qrnn.models.pytorch.common import (BatchedDataset, + save_model, + load_model) from typhon.retrieval.qrnn.models.pytorch.fully_connected import FullyConnected from typhon.retrieval.qrnn.models.pytorch.unet import UNet diff --git a/typhon/retrieval/qrnn/models/pytorch/common.py b/typhon/retrieval/qrnn/models/pytorch/common.py index e7ec42a8..0999473c 100644 --- a/typhon/retrieval/qrnn/models/pytorch/common.py +++ b/typhon/retrieval/qrnn/models/pytorch/common.py @@ -195,7 +195,7 @@ def __init__(self, self.criterion = QuantileLoss(self.quantiles) self.training_errors = [] self.validation_errors = [] - self.backend = "typhon.retrieval.qrnn.models.typhon" + self.backend = "typhon.retrieval.qrnn.models.pytorch" def _make_adversarial_samples(self, x, y, eps): self.model.zero_grad() @@ -283,11 +283,10 @@ def train(self, *args, **kwargs): # try: x, y = handle_input(training_data, device) - training_data = BatchedDataset(x, y, batch_size) + training_data = BatchedDataset((x, y), batch_size) except: pass - self.train() self.optimizer = optim.SGD(self.parameters(), lr = initial_learning_rate, @@ -372,7 +371,7 @@ def predict(self, x, gpu=False): device = torch.device("cpu") x = handle_input(x, device) self.to(device) - return self(x).detach() + return self(x).detach().numpy() def calibration(self, data, diff --git a/typhon/retrieval/qrnn/models/pytorch/fully_connected.py b/typhon/retrieval/qrnn/models/pytorch/fully_connected.py index 32a78e70..8a659e35 100644 --- a/typhon/retrieval/qrnn/models/pytorch/fully_connected.py +++ b/typhon/retrieval/qrnn/models/pytorch/fully_connected.py @@ -1,6 +1,7 @@ from torch import nn from torch import optim -from typhon.retrieval.qrnn.models.pytorch.common import PytorchModel +from typhon.retrieval.qrnn.models.pytorch.common import (PytorchModel, + activations) ################################################################################ # Fully-connected network diff --git a/typhon/retrieval/qrnn/qrnn.py b/typhon/retrieval/qrnn/qrnn.py index 2649820f..4cab3394 100644 --- a/typhon/retrieval/qrnn/qrnn.py +++ b/typhon/retrieval/qrnn/qrnn.py @@ -598,9 +598,34 @@ def save(self, path): f = open(path, "wb") pickle.dump(self, f) backend = importlib.import_module(self.backend) - backend.save_model(self.model, f) + backend.save_model(f, self.model) f.close() + def classify(self, x, threshold): + """ + Classify output based on posterior PDF and given numeric threshold. + + Args: + x: The input data as :code:`np.ndarray` or backend-specific + dataset object. + threshold: The numeric threshold to apply for classification. + """ + y = self.predict(x) + out_shape = y.shape[:1] + (1,) + y.shape[2:] + c = self.quantiles[0] * np.ones(out_shape) + + for i in range(self.quantiles.size - 1): + q_l = y[:, [i]] + q_r = y[:, [i+1]] + inds = np.logical_and(q_l < threshold, + q_r >= threshold) + c[inds] = self.quantiles[i] * (threshold - q_l[inds]) + c[inds] += self.quantiles[i + 1] * (q_r[inds] - threshold) + c[inds] /= (q_r[inds] - q_l[inds]) + + c[threshold > q_r] = self.quantiles[-1] + return 1.0 - c + @staticmethod def load(path): r""" From bffdbd2c3921b2e35fb51c86a1201abac4b9276c Mon Sep 17 00:00:00 2001 From: Simon Pfreundschuh Date: Tue, 2 Jun 2020 13:05:27 +0200 Subject: [PATCH 09/19] Make posterior estimation work on batches. --- .../retrieval/qrnn/models/pytorch/common.py | 7 +-- typhon/retrieval/qrnn/qrnn.py | 43 +++++++++++-------- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/typhon/retrieval/qrnn/models/pytorch/common.py b/typhon/retrieval/qrnn/models/pytorch/common.py index 0999473c..0dfbdfae 100644 --- a/typhon/retrieval/qrnn/models/pytorch/common.py +++ b/typhon/retrieval/qrnn/models/pytorch/common.py @@ -341,7 +341,6 @@ def train(self, *args, **kwargs): val_err = 0.0 n = 0 if not validation_data is None: - print(validation_data) for x, y in validation_data: x = x.to(device) y = y.to(device) @@ -355,7 +354,9 @@ def train(self, *args, **kwargs): val_err += c.item() * x.size()[0] n += x.size()[0] - validation_errors.append(val_err) + validation_errors.append(val_err / n) + iterator.set_postfix({"Validation errror" : val_err / n}) + iterator.update() self.training_errors += training_errors self.validation_errors += validation_errors @@ -410,7 +411,7 @@ def calibration(self, shape = (shape[0], 1) + shape[2:] y = y.reshape(shape) - y_pred = self.predict(x) + y_pred = self(x) y_pred = y_pred.cpu() y = y.cpu() diff --git a/typhon/retrieval/qrnn/qrnn.py b/typhon/retrieval/qrnn/qrnn.py index 4cab3394..c9da8e8c 100644 --- a/typhon/retrieval/qrnn/qrnn.py +++ b/typhon/retrieval/qrnn/qrnn.py @@ -301,20 +301,20 @@ def train(self, maximum_epochs = 200, training_split = 0.9, gpu = True): - self.model.train(training_data, - validation_data, - batch_size, - sigma_noise, - adversarial_training, - delta_at, - initial_learning_rate, - momentum, - convergence_epochs, - learning_rate_decay, - learning_rate_minimum, - maximum_epochs, - training_split, - gpu) + return self.model.train(training_data, + validation_data, + batch_size, + sigma_noise, + adversarial_training, + delta_at, + initial_learning_rate, + momentum, + convergence_epochs, + learning_rate_decay, + learning_rate_minimum, + maximum_epochs, + training_split, + gpu) def predict(self, x): r""" @@ -365,10 +365,15 @@ def cdf(self, x): values of the posterior CDF :math: `F(x)` in `fs`. """ - y_pred = np.zeros(self.quantiles.size + 2) - y_pred[1:-1] = self.predict(x) - y_pred[0] = 2.0 * y_pred[1] - y_pred[2] - y_pred[-1] = 2.0 * y_pred[-2] - y_pred[-3] + if len(x.shape) > 1: + s = x.shape[:-1] + (self.quantiles.size + 2,) + else: + s = (1, self.quantiles.size + 2) + + y_pred = np.zeros(s) + y_pred[:, 1:-1] = self.predict(x) + y_pred[:, 0] = 2.0 * y_pred[:, 1] - y_pred[:, 2] + y_pred[:, -1] = 2.0 * y_pred[:, -2] - y_pred[:, -3] qs = np.zeros(self.quantiles.size + 2) qs[1:-1] = self.quantiles @@ -502,7 +507,7 @@ def posterior_mean(self, x): Array containing the posterior means for the provided inputs. """ y_pred, qs = self.cdf(x) - mus = y_pred[-1] - np.trapz(qs, x=y_pred) + mus = y_pred[:, -1] - np.trapz(qs, x=y_pred) return mus @staticmethod From 18015eb9bfc4c298aa6202b1c720e8d3e99b9ad8 Mon Sep 17 00:00:00 2001 From: Simon Pfreundschuh Date: Tue, 2 Jun 2020 16:03:38 +0200 Subject: [PATCH 10/19] Load model to cpu. --- typhon/retrieval/qrnn/models/pytorch/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/typhon/retrieval/qrnn/models/pytorch/common.py b/typhon/retrieval/qrnn/models/pytorch/common.py index 0dfbdfae..6a297f5b 100644 --- a/typhon/retrieval/qrnn/models/pytorch/common.py +++ b/typhon/retrieval/qrnn/models/pytorch/common.py @@ -447,7 +447,7 @@ def load(self, path): Arguments: path: Path of the file where the QRNN was stored. """ - state = torch.load(path) + state = torch.load(path, map_location=torch.device("cpu")) keys = ["input_dimension", "quantiles", "depth", "width", "activation"] qrnn = QRNN(*[state[k] for k in keys]) qrnn.load_state_dict["network_state"] From 3e692f7919ed32381d4f094e5944a177d93e051a Mon Sep 17 00:00:00 2001 From: Simon Pfreundschuh Date: Thu, 25 Jun 2020 18:19:36 +0200 Subject: [PATCH 11/19] Removed tqdm status bar to make output cleaner. --- typhon/retrieval/qrnn/backends/pytorch.py | 12 +++++++ .../retrieval/qrnn/models/pytorch/common.py | 34 +++++++++++++++---- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/typhon/retrieval/qrnn/backends/pytorch.py b/typhon/retrieval/qrnn/backends/pytorch.py index da1d6846..f3dab8c9 100644 --- a/typhon/retrieval/qrnn/backends/pytorch.py +++ b/typhon/retrieval/qrnn/backends/pytorch.py @@ -198,11 +198,23 @@ def train(self, self.criterion.to(dev) + def clear_output(): + print("clreaing output") + try: + import IPython + from IPython.display import clear_output + clear_output(wait=True) + except: + print("failed loading Ipython") + pass + for i in range(n_epochs): + clear_output() err = 0.0 n = 0 iterator = tqdm(enumerate(training_data), total = len(training_data)) + iterator.set_description(f"Training epoch {i}/{n_epochs}, lr = {self.optimizer.lr}") for j, (x, y) in iterator: x = x.to(dev) y = y.to(dev) diff --git a/typhon/retrieval/qrnn/models/pytorch/common.py b/typhon/retrieval/qrnn/models/pytorch/common.py index 6a297f5b..c2b70929 100644 --- a/typhon/retrieval/qrnn/models/pytorch/common.py +++ b/typhon/retrieval/qrnn/models/pytorch/common.py @@ -10,7 +10,7 @@ from torch import optim from torch.optim.lr_scheduler import ReduceLROnPlateau from torch.utils.data import Dataset -from tqdm.auto import tqdm +from tqdm import tqdm activations = {"elu" : nn.ELU, "hardshrink" : nn.Hardshrink, @@ -299,6 +299,20 @@ def train(self, *args, **kwargs): training_errors = [] validation_errors = [] + # + # Function to clear output if we are in a notebook. + # + + def clear_output(): + print("clreaing output") + try: + import IPython + from IPython.display import clear_output + clear_output(wait=True) + except: + print("failed loading Ipython") + pass + # # Training loop # @@ -306,8 +320,9 @@ def train(self, *args, **kwargs): for i in range(maximum_epochs): err = 0.0 n = 0 - iterator = tqdm(enumerate(training_data), total = len(training_data)) - for j, (x, y) in iterator: + #clear_output() + for j, (x, y) in enumerate(training_data): + x = x.to(device) y = y.to(device) @@ -332,8 +347,11 @@ def train(self, *args, **kwargs): c.backward() self.optimizer.step() + if j % 100: - iterator.set_postfix({"Training errror" : err / n}) + print("Epoch {} / {}: Batch {} / {}, Training error: {:.3f}" + .format(i, maximum_epochs, j, len(training_data), err / n), + end="\r") # Save training error training_errors.append(err / n) @@ -355,8 +373,12 @@ def train(self, *args, **kwargs): val_err += c.item() * x.size()[0] n += x.size()[0] validation_errors.append(val_err / n) - iterator.set_postfix({"Validation errror" : val_err / n}) - iterator.update() + print("Epoch {} / {}: Training error: {:.3f}, Validation error: {:.3f}, Learning rate: {:.5f}" + .format(i, maximum_epochs, training_errors[-1], validation_errors[-1], self.optimizer.defaults["lr"])) + scheduler.step(val_err / n) + else: + scheduler.step(err / n) + self.training_errors += training_errors self.validation_errors += validation_errors From 713a1675502d4590818a061651a9154ba7a8ac80 Mon Sep 17 00:00:00 2001 From: Simon Pfreundschuh Date: Mon, 29 Jun 2020 11:53:22 +0200 Subject: [PATCH 12/19] Print correct learning rate. --- .../retrieval/qrnn/models/pytorch/common.py | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/typhon/retrieval/qrnn/models/pytorch/common.py b/typhon/retrieval/qrnn/models/pytorch/common.py index c2b70929..bf6dca88 100644 --- a/typhon/retrieval/qrnn/models/pytorch/common.py +++ b/typhon/retrieval/qrnn/models/pytorch/common.py @@ -293,9 +293,9 @@ def train(self, *args, **kwargs): momentum = momentum) self.criterion.to(device) scheduler = ReduceLROnPlateau(self.optimizer, - factor=1.0 / learning_rate_decay, - patience=convergence_epochs, - min_lr=learning_rate_minimum) + factor=1.0 / learning_rate_decay, + patience=convergence_epochs, + min_lr=learning_rate_minimum) training_errors = [] validation_errors = [] @@ -356,12 +356,14 @@ def clear_output(): # Save training error training_errors.append(err / n) + lr = [group['lr'] for group in self.optimizer.param_groups][0] + val_err = 0.0 n = 0 if not validation_data is None: for x, y in validation_data: - x = x.to(device) - y = y.to(device) + x = x.to(device).detach() + y = y.to(device).detach() shape = x.size() shape = (shape[0], 1) + shape[2:] @@ -373,11 +375,15 @@ def clear_output(): val_err += c.item() * x.size()[0] n += x.size()[0] validation_errors.append(val_err / n) + + print("Epoch {} / {}: Training error: {:.3f}, Validation error: {:.3f}, Learning rate: {:.5f}" - .format(i, maximum_epochs, training_errors[-1], validation_errors[-1], self.optimizer.defaults["lr"])) + .format(i, maximum_epochs, training_errors[-1], validation_errors[-1], lr)) scheduler.step(val_err / n) else: scheduler.step(err / n) + print("Epoch {} / {}: Training error: {:.3f}, Learning rate: {:.5f}" + .format(i, maximum_epochs, training_errors[-1], lr)) self.training_errors += training_errors @@ -394,7 +400,7 @@ def predict(self, x, gpu=False): device = torch.device("cpu") x = handle_input(x, device) self.to(device) - return self(x).detach().numpy() + return self(x.detach()).detach().numpy() def calibration(self, data, @@ -427,8 +433,8 @@ def calibration(self, iterator = tqdm(data) for x, y in iterator: - x = x.to(dev) - y = y.to(dev) + x = x.to(dev).detach() + y = y.to(dev).detach() shape = x.size() shape = (shape[0], 1) + shape[2:] y = y.reshape(shape) From 6defb7869595415a86deebe45a7f9a8f3be69579 Mon Sep 17 00:00:00 2001 From: Simon Pfreundschuh Date: Sun, 27 Sep 2020 21:40:56 +0200 Subject: [PATCH 13/19] Refactoring, documentation and testing. --- doc/typhon.retrieval.qrnn.rst | 48 +- typhon/retrieval/qrnn/models/keras.py | 179 +++--- .../retrieval/qrnn/models/pytorch/__init__.py | 16 +- .../retrieval/qrnn/models/pytorch/common.py | 195 ++++--- .../qrnn/models/pytorch/fully_connected.py | 10 +- typhon/retrieval/qrnn/models/pytorch/unet.py | 137 ++--- typhon/retrieval/qrnn/qrnn.py | 550 ++++++++---------- typhon/tests/retrieval/qrnn/test_qrnn.py | 69 ++- 8 files changed, 589 insertions(+), 615 deletions(-) diff --git a/doc/typhon.retrieval.qrnn.rst b/doc/typhon.retrieval.qrnn.rst index 4915733b..1fa24dcd 100644 --- a/doc/typhon.retrieval.qrnn.rst +++ b/doc/typhon.retrieval.qrnn.rst @@ -19,22 +19,12 @@ The QRNN implementation consists of two-layers: The QRNN class -------------- -The :py:class:`~typhon.retrieval.qrnn.QRNN` class provides the high-level interface for QRNNs. If you want -to train a fully-connected QRNN, this is all you need. The class itself -implments generic functionality related to the evaluation of QRNNs and the post -processing of results such as computing the PSD or the posterior mean. For the -rest it acts as a wrapper around its model attribute, which encapsules all -network- and DL-framework-specific code. - -API documentation -^^^^^^^^^^^^^^^^^ - -.. automodule:: typhon.retrieval.qrnn.qrnn -.. currentmodule:: typhon.retrieval.qrnn.qrnn -.. autosummary:: - :toctree: generated - - QRNN +The :py:class:`~typhon.retrieval.qrnn.QRNN` class provides the high-level +interface for QRNNs. This is all that is required to train a plain, +fully-connected QRNN. The class itself implments generic functionality related +to the evaluation of QRNNs and the post processing of results such as computing +the PSD or the posterior mean. For the rest it acts as a wrapper around its +model attribute, which encapsules all network- and DL-framework-specific code. Backends -------- @@ -44,23 +34,35 @@ are supported as backends for neural network. The QRNN implementation will automatically use the one the is available on your system. If both are available you can choose a specific backend using the :py:meth:`~typhon.retrieval.qrnn.set_backend` function. +Neural network models +--------------------- + +The :py:class:`typhon.retrieva.qrnn.QRNN` has designed to work with any generic +regression neural network model. This aim of this was to make the implementation +sufficiently flexible to allow special network architectures or customization of +the training process. + +This gives the user the flexibility to design custom NN models in pytorch +or Keras and use them with the ``QRNN`` class. Some predefined architectures +are defined in the :py:mod:`typhon.retrieval.qrnn.models` submodule. API documentation -^^^^^^^^^^^^^^^^^ +----------------- .. automodule:: typhon.retrieval.qrnn.qrnn .. currentmodule:: typhon.retrieval.qrnn.qrnn .. autosummary:: :toctree: generated - set_backend - -Neural network models ---------------------- + QRNN +.. automodule:: typhon.retrieval.qrnn.models.pytorch +.. currentmodule:: typhon.retrieval.qrnn.models.pytorch +.. autosummary:: + :toctree: generated -Pytorch -^^^^^^^ + FullyConnected + UNet .. automodule:: typhon.retrieval.qrnn.models.pytorch .. currentmodule:: typhon.retrieval.qrnn.models.pytorch diff --git a/typhon/retrieval/qrnn/models/keras.py b/typhon/retrieval/qrnn/models/keras.py index 28b7aef7..f1b31970 100644 --- a/typhon/retrieval/qrnn/models/keras.py +++ b/typhon/retrieval/qrnn/models/keras.py @@ -1,3 +1,10 @@ +""" +typhon.retrieval.qrnn.models.keras +================================== + +This module provides Keras neural network models that can be used as backend +models with the :py:class:`typhon.retrieval.qrnn.QRNN` class. +""" import numpy as np import keras from keras.models import Sequential @@ -18,6 +25,7 @@ def save_model(f, model): """ keras.models.save_model(model, f) + def load_model(f, quantiles): """ Load keras model. @@ -35,28 +43,27 @@ def load_model(f, quantiles): # This is a bit hacky but seems required to handle # the custom model classes. # - def make_fully_connected(layers = None, - **kwargs): + def make_fully_connected(layers=None, **kwargs): layers = list(map(deserialize, layers)) input_dimensions = layers[0].batch_input_shape[1] - return FullyConnected(input_dimensions, - quantiles, - (), - layers) - custom_objects = {"FullyConnected" : make_fully_connected, - "QuantileLoss" : QuantileLoss} + return FullyConnected(input_dimensions, quantiles, (), layers) + + custom_objects = { + "FullyConnected": make_fully_connected, + "QuantileLoss": QuantileLoss, + } model = keras.models.load_model(f, custom_objects=custom_objects) return model + ################################################################################ # Quantile loss ################################################################################ logger = logging.getLogger(__name__) -def skewed_absolute_error(y_true, - y_pred, - tau): + +def skewed_absolute_error(y_true, y_pred, tau): """ The quantile loss function for a given quantile tau: @@ -73,12 +80,9 @@ def quantile_loss(y_true, y_pred, taus): The quantiles loss for a list of quantiles. Sums up the error contribution from the each of the quantile loss functions. """ - e = skewed_absolute_error( - K.flatten(y_true), K.flatten(y_pred[:, 0]), taus[0]) + e = skewed_absolute_error(K.flatten(y_true), K.flatten(y_pred[:, 0]), taus[0]) for i, tau in enumerate(taus[1:]): - e += skewed_absolute_error(K.flatten(y_true), - K.flatten(y_pred[:, i + 1]), - tau) + e += skewed_absolute_error(K.flatten(y_true), K.flatten(y_pred[:, i + 1]), tau) return e @@ -113,17 +117,18 @@ def __init__(self): def train(self): pass + ################################################################################ # Keras data generators ################################################################################ + class BatchedDataset: """ Keras data loader that batches a given dataset of numpy arryas. """ - def __init__(self, - training_data, - batch_size): + + def __init__(self, training_data, batch_size): """ Create batched dataset. @@ -147,9 +152,9 @@ def __len__(self): return self.x.shape[0] // self.bs def __next__(self): - inds = self.indices[np.arange(self.i * self.bs, - (self.i + 1) * self.bs) - % self.indices.size] + inds = self.indices[ + np.arange(self.i * self.bs, (self.i + 1) * self.bs) % self.indices.size + ] x_batch = np.copy(self.x[inds, :]) y_batch = self.y[inds] self.i = self.i + 1 @@ -159,6 +164,7 @@ def __next__(self): return (x_batch, y_batch) + class TrainingGenerator: """ This Keras sample generator takes a generator for noise-free training data @@ -169,9 +175,8 @@ class TrainingGenerator: sigma_noise: A vector containing the standard deviation of each component. """ - def __init__(self, - training_data, - sigma_noise = None): + + def __init__(self, training_data, sigma_noise=None): """ Args: training_data: Data generator providing the original (noise-free) @@ -195,6 +200,7 @@ def __next__(self): x_batch += np.random.randn(*x_batch.shape) * self.sigma_noise return (x_batch, y_batch) + class AdversarialTrainingGenerator: """ This Keras sample generator takes the noise-free training data @@ -208,10 +214,8 @@ class AdversarialTrainingGenerator: network eps: The perturbation factor. """ - def __init__(self, - training_data, - input_gradients, - eps): + + def __init__(self, training_data, input_gradients, eps): """ Args: training_data: Training generator to use to generate the input @@ -245,6 +249,7 @@ def __next__(self): self.i = self.i + 1 return x_batch, y_batch + class ValidationGenerator: """ This Keras sample generator is similar to the training generator @@ -262,9 +267,7 @@ class ValidationGenerator: component. """ - def __init__(self, - validation_data, - sigma_noise): + def __init__(self, validation_data, sigma_noise): self.validation_data = validation_data self.sigma_noise = sigma_noise @@ -277,10 +280,12 @@ def __next__(self): x_val += np.random.randn(*self.x_val.shape) * self.sigma_noise return (x_val, self.y_val) + ################################################################################ # LRDecay ################################################################################ + class LRDecay(keras.callbacks.Callback): """ The LRDecay class implements the Keras callback interface and reduces @@ -310,7 +315,7 @@ def on_train_begin(self, logs={}): self.min_loss = 1e30 def on_epoch_end(self, epoch, logs={}): - loss = logs.get('val_loss') + loss = logs.get("val_loss") if loss is None: loss = logs.get("loss") self.losses += [loss] @@ -320,8 +325,7 @@ def on_epoch_end(self, epoch, logs={}): self.steps = 0 if self.steps > self.convergence_steps: lr = keras.backend.get_value(self.model.optimizer.lr) - keras.backend.set_value( - self.model.optimizer.lr, lr / self.lr_decay) + keras.backend.set_value(self.model.optimizer.lr, lr / self.lr_decay) self.steps = 0 logger.info("\n Reduced learning rate to " + str(lr)) @@ -330,10 +334,12 @@ def on_epoch_end(self, epoch, logs={}): self.min_loss = min(self.min_loss, self.losses[-1]) + ################################################################################ # QRNN ################################################################################ + class KerasModel: r""" Base class for Keras models. @@ -362,9 +368,7 @@ class KerasModel: neural network. """ - def __init__(self, - input_dimension, - quantiles): + def __init__(self, input_dimension, quantiles): """ Create a QRNN model. @@ -379,33 +383,40 @@ def __init__(self, self.input_dimension = input_dimension self.quantiles = np.array(quantiles) - - def train(self, - training_data, - validation_data = None, - batch_size = 256, - sigma_noise=None, - adversarial_training = False, - delta_at = 0.01, - initial_learning_rate = 1e-2, - momentum = 0.0, - convergence_epochs = 5, - learning_rate_decay = 2.0, - learning_rate_minimum = 1e-6, - maximum_epochs = 200, - training_split = 0.9, - gpu = False): + def reset(self): + """ + Reinitialize the state of the model. + """ + self.reset_states() + + def train( + self, + training_data, + validation_data=None, + batch_size=256, + sigma_noise=None, + adversarial_training=False, + delta_at=0.01, + initial_learning_rate=1e-2, + momentum=0.0, + convergence_epochs=5, + learning_rate_decay=2.0, + learning_rate_minimum=1e-6, + maximum_epochs=200, + training_split=0.9, + gpu=False, + ): if type(training_data) == tuple: if not type(training_data[0]) == np.ndarray: - raise ValueError("When training data is provided as tuple" - " (x, y) it must contain numpy arrays.") - training_data = BatchedDataset(training_data, - batch_size) + raise ValueError( + "When training data is provided as tuple" + " (x, y) it must contain numpy arrays." + ) + training_data = BatchedDataset(training_data, batch_size) if type(validation_data) is tuple: - validation_data = BatchedDataset(validation_data, - batch_size) + validation_data = BatchedDataset(validation_data, batch_size) loss = QuantileLoss(self.quantiles) @@ -417,48 +428,44 @@ def train(self, # # Setup training generator # - training_generator = TrainingGenerator(training_data, - sigma_noise) + training_generator = TrainingGenerator(training_data, sigma_noise) if adversarial_training: - inputs = [self.input, - self.targets[0], - self.sample_weights[0]] + inputs = [self.input, self.targets[0], self.sample_weights[0]] input_gradients = K.function( - inputs, K.gradients(self.total_loss, self.input)) - training_generator = AdversarialTrainingGenerator(training_generator, - input_gradients, - delta_at) + inputs, K.gradients(self.total_loss, self.input) + ) + training_generator = AdversarialTrainingGenerator( + training_generator, input_gradients, delta_at + ) if validation_data is None: validation_generator = None else: - validation_generator = ValidationGenerator(validation_data, - sigma_noise) - lr_callback = LRDecay(self, - learning_rate_decay, - learning_rate_minimum, - convergence_epochs) - self.fit_generator(training_generator, - steps_per_epoch=len(training_generator), - epochs=maximum_epochs, - validation_data=validation_generator, - validation_steps=1, - callbacks=[lr_callback]) + validation_generator = ValidationGenerator(validation_data, sigma_noise) + lr_callback = LRDecay( + self, learning_rate_decay, learning_rate_minimum, convergence_epochs + ) + self.fit_generator( + training_generator, + steps_per_epoch=len(training_generator), + epochs=maximum_epochs, + validation_data=validation_generator, + validation_steps=1, + callbacks=[lr_callback], + ) ################################################################################ # Fully-connected network ################################################################################ + class FullyConnected(KerasModel, Sequential): """ Keras implementation of fully-connected networks. """ - def __init__(self, - input_dimension, - quantiles, - arch, - layers = None): + + def __init__(self, input_dimension, quantiles, arch, layers=None): """ Create a fully-connected neural network. diff --git a/typhon/retrieval/qrnn/models/pytorch/__init__.py b/typhon/retrieval/qrnn/models/pytorch/__init__.py index 430fd8bb..eb2830eb 100644 --- a/typhon/retrieval/qrnn/models/pytorch/__init__.py +++ b/typhon/retrieval/qrnn/models/pytorch/__init__.py @@ -1,12 +1,14 @@ """ -models -====== +typhon.retrieval.qrnn.models.pytorch +==================================== -This module provides an implementation of quantile regression neural networks (QRNNs) -using the pytorch deep learning framework. +This model provides Pytorch neural network models that can be used a backend +models for the :py:class:`typhon.retrieval.qrnn.QRNN` class. """ -from typhon.retrieval.qrnn.models.pytorch.common import (BatchedDataset, - save_model, - load_model) +from typhon.retrieval.qrnn.models.pytorch.common import ( + BatchedDataset, + save_model, + load_model, +) from typhon.retrieval.qrnn.models.pytorch.fully_connected import FullyConnected from typhon.retrieval.qrnn.models.pytorch.unet import UNet diff --git a/typhon/retrieval/qrnn/models/pytorch/common.py b/typhon/retrieval/qrnn/models/pytorch/common.py index bf6dca88..17300fa0 100644 --- a/typhon/retrieval/qrnn/models/pytorch/common.py +++ b/typhon/retrieval/qrnn/models/pytorch/common.py @@ -12,16 +12,18 @@ from torch.utils.data import Dataset from tqdm import tqdm -activations = {"elu" : nn.ELU, - "hardshrink" : nn.Hardshrink, - "hardtanh" : nn.Hardtanh, - "prelu" : nn.PReLU, - "relu" : nn.ReLU, - "selu" : nn.SELU, - "celu" : nn.CELU, - "sigmoid" : nn.Sigmoid, - "softplus" : nn.Softplus, - "softmin" : nn.Softmin} +activations = { + "elu": nn.ELU, + "hardshrink": nn.Hardshrink, + "hardtanh": nn.Hardtanh, + "prelu": nn.PReLU, + "relu": nn.ReLU, + "selu": nn.SELU, + "celu": nn.CELU, + "sigmoid": nn.Sigmoid, + "softplus": nn.Softplus, + "softmin": nn.Softmin, +} def save_model(f, model): @@ -35,6 +37,7 @@ def save_model(f, model): """ torch.save(model, f) + def load_model(f, quantiles): """ Load pytorch model. @@ -51,7 +54,8 @@ def load_model(f, quantiles): model = torch.load(f) return model -def handle_input(data, device = None): + +def handle_input(data, device=None): """ Handle input data. @@ -80,13 +84,13 @@ def handle_input(data, device = None): else: return data + class BatchedDataset(Dataset): """ Batches an un-batched dataset. """ - def __init__(self, - training_data, - batch_size): + + def __init__(self, training_data, batch_size): x, y = training_data self.x = torch.tensor(x, dtype=torch.float) self.y = torch.tensor(y, dtype=torch.float) @@ -102,14 +106,16 @@ def __getitem__(self, i): raise IndexError() i_start = i * self.batch_size i_end = (i + 1) * self.batch_size - x = self.x[i_start : i_end] - y = self.y[i_start : i_end] + x = self.x[i_start:i_end] + y = self.y[i_start:i_end] return (x, y) + ################################################################################ # Quantile loss ################################################################################ + class QuantileLoss: r""" The quantile loss function @@ -132,9 +138,8 @@ class QuantileLoss: corresponding to each quantiles. The loss for a batch of training samples is computed by taking the mean over all samples in the batch. """ - def __init__(self, - quantiles, - mask = -1.0): + + def __init__(self, quantiles, mask=-1.0): """ Create an instance of the quantile loss function with the given quantiles. @@ -163,18 +168,18 @@ def __call__(self, y_pred, y_true): """ dy = y_pred - y_true n = self.quantiles.size()[0] - qs = self.quantiles.reshape((n,) + (1, ) * max(len(dy.size()) - 2, 0)) - l = torch.where(dy >= 0.0, - (1.0 - qs) * dy, - (-qs) * dy) + qs = self.quantiles.reshape((n,) + (1,) * max(len(dy.size()) - 2, 0)) + l = torch.where(dy >= 0.0, (1.0 - qs) * dy, (-qs) * dy) if not self.mask is None: l = torch.where(y_true == self.mask, torch.zeros_like(l), l) return l.mean() + ################################################################################ # QRNN ################################################################################ + class PytorchModel: """ Quantile regression neural network (QRNN) @@ -182,9 +187,8 @@ class PytorchModel: This class implements QRNNs as a fully-connected network with a given number of layers. """ - def __init__(self, - input_dimension, - quantiles): + + def __init__(self, input_dimension, quantiles): """ Arguments: input_dimension(int): The number of input features. @@ -206,6 +210,17 @@ def _make_adversarial_samples(self, x, y, eps): x_adv = x.detach() + eps * torch.sign(x.grad.detach()) return x_adv + def reset(self): + """ + Reinitializes the weights of a model. + """ + + def reset_function(module): + if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): + m.reset_parameters() + + self.apply(reset_function) + def train(self, *args, **kwargs): """ Train the network. @@ -233,19 +248,21 @@ def train(self, *args, **kwargs): # training_data = args[0] - arguments = {"validation_data" : None, - "batch_size" : 256, - "sigma_noise" : None, - "adversarial_training" : False, - "delta_at" : 0.01, - "initial_learning_rate" : 1e-2, - "momentum" : 0.0, - "convergence_epochs" : 5, - "learning_rate_decay" : 2.0, - "learning_rate_minimum" : 1e-6, - "maximum_epochs" : 1, - "training_split" : 0.9, - "gpu" : False} + arguments = { + "validation_data": None, + "batch_size": 256, + "sigma_noise": None, + "adversarial_training": False, + "delta_at": 0.01, + "initial_learning_rate": 1e-2, + "momentum": 0.0, + "convergence_epochs": 5, + "learning_rate_decay": 2.0, + "learning_rate_minimum": 1e-6, + "maximum_epochs": 1, + "training_split": 0.9, + "gpu": False, + } argument_names = arguments.keys() for a, n in zip(args[1:], argument_names): arguments[n] = a @@ -288,31 +305,19 @@ def train(self, *args, **kwargs): pass self.train() - self.optimizer = optim.SGD(self.parameters(), - lr = initial_learning_rate, - momentum = momentum) + self.optimizer = optim.SGD( + self.parameters(), lr=initial_learning_rate, momentum=momentum + ) self.criterion.to(device) - scheduler = ReduceLROnPlateau(self.optimizer, - factor=1.0 / learning_rate_decay, - patience=convergence_epochs, - min_lr=learning_rate_minimum) + scheduler = ReduceLROnPlateau( + self.optimizer, + factor=1.0 / learning_rate_decay, + patience=convergence_epochs, + min_lr=learning_rate_minimum, + ) training_errors = [] validation_errors = [] - # - # Function to clear output if we are in a notebook. - # - - def clear_output(): - print("clreaing output") - try: - import IPython - from IPython.display import clear_output - clear_output(wait=True) - except: - print("failed loading Ipython") - pass - # # Training loop # @@ -320,7 +325,6 @@ def clear_output(): for i in range(maximum_epochs): err = 0.0 n = 0 - #clear_output() for j, (x, y) in enumerate(training_data): x = x.to(device) @@ -347,20 +351,22 @@ def clear_output(): c.backward() self.optimizer.step() - if j % 100: - print("Epoch {} / {}: Batch {} / {}, Training error: {:.3f}" - .format(i, maximum_epochs, j, len(training_data), err / n), - end="\r") + print( + "Epoch {} / {}: Batch {} / {}, Training error: {:.3f}".format( + i, maximum_epochs, j, len(training_data), err / n + ), + end="\r", + ) # Save training error training_errors.append(err / n) - lr = [group['lr'] for group in self.optimizer.param_groups][0] + lr = [group["lr"] for group in self.optimizer.param_groups][0] val_err = 0.0 - n = 0 if not validation_data is None: + n = 0 for x, y in validation_data: x = x.to(device).detach() y = y.to(device).detach() @@ -376,21 +382,31 @@ def clear_output(): n += x.size()[0] validation_errors.append(val_err / n) - - print("Epoch {} / {}: Training error: {:.3f}, Validation error: {:.3f}, Learning rate: {:.5f}" - .format(i, maximum_epochs, training_errors[-1], validation_errors[-1], lr)) + print( + "Epoch {} / {}: Training error: {:.3f}, Validation error: {:.3f}, Learning rate: {:.5f}".format( + i, + maximum_epochs, + training_errors[-1], + validation_errors[-1], + lr, + ) + ) scheduler.step(val_err / n) else: scheduler.step(err / n) - print("Epoch {} / {}: Training error: {:.3f}, Learning rate: {:.5f}" - .format(i, maximum_epochs, training_errors[-1], lr)) - + print( + "Epoch {} / {}: Training error: {:.3f}, Learning rate: {:.5f}".format( + i, maximum_epochs, training_errors[-1], lr + ) + ) self.training_errors += training_errors self.validation_errors += validation_errors self.eval() - return {"training_errors" : self.training_errors, - "validation_errors" : self.validation_errors} + return { + "training_errors": self.training_errors, + "validation_errors": self.validation_errors, + } def predict(self, x, gpu=False): "" @@ -402,9 +418,7 @@ def predict(self, x, gpu=False): self.to(device) return self(x.detach()).detach().numpy() - def calibration(self, - data, - gpu=False): + def calibration(self, data, gpu=False): """ Computes the calibration of the predictions from the neural network. @@ -425,8 +439,9 @@ def calibration(self, n_intervals = self.quantiles.size // 2 qs = self.quantiles - intervals = np.array([q_r - q_l for (q_l, q_r) - in zip(qs, reversed(qs))])[:n_intervals] + intervals = np.array([q_r - q_l for (q_l, q_r) in zip(qs, reversed(qs))])[ + :n_intervals + ] counts = np.zeros(n_intervals) total = 0.0 @@ -458,14 +473,18 @@ def save(self, path): Arguments: The path in which to store the QRNN. """ - torch.save({"input_dimension" : self.input_dimension, - "quantiles" : self.quantiles, - "width" : self.width, - "depth" : self.depth, - "activation" : self.activation, - "network_state" : self.state_dict(), - "optimizer_state" : self.optimizer.state_dict()}, - path) + torch.save( + { + "input_dimension": self.input_dimension, + "quantiles": self.quantiles, + "width": self.width, + "depth": self.depth, + "activation": self.activation, + "network_state": self.state_dict(), + "optimizer_state": self.optimizer.state_dict(), + }, + path, + ) @staticmethod def load(self, path): diff --git a/typhon/retrieval/qrnn/models/pytorch/fully_connected.py b/typhon/retrieval/qrnn/models/pytorch/fully_connected.py index 8a659e35..47412ba9 100644 --- a/typhon/retrieval/qrnn/models/pytorch/fully_connected.py +++ b/typhon/retrieval/qrnn/models/pytorch/fully_connected.py @@ -1,20 +1,18 @@ from torch import nn from torch import optim -from typhon.retrieval.qrnn.models.pytorch.common import (PytorchModel, - activations) +from typhon.retrieval.qrnn.models.pytorch.common import PytorchModel, activations ################################################################################ # Fully-connected network ################################################################################ + class FullyConnected(PytorchModel, nn.Sequential): """ Pytorch implementation of a fully-connected QRNN model. """ - def __init__(self, - input_dimension, - quantiles, - arch): + + def __init__(self, input_dimension, quantiles, arch): """ Create a fully-connected neural network. diff --git a/typhon/retrieval/qrnn/models/pytorch/unet.py b/typhon/retrieval/qrnn/models/pytorch/unet.py index 09bdf179..0a9d9403 100644 --- a/typhon/retrieval/qrnn/models/pytorch/unet.py +++ b/typhon/retrieval/qrnn/models/pytorch/unet.py @@ -3,6 +3,7 @@ from typhon.retrieval.qrnn.models.pytorch.common import PytorchModel + class Layer(nn.Sequential): """ Basic building block of a UNet. Consists of a convolutional @@ -20,30 +21,33 @@ class Layer(nn.Sequential): skip_connection(:code:`bool`): Whether to include skip connections, i.e. to include input in layer output. """ - def __init__(self, - features_in, - features_out, - batch_norm=True, - kernel_size=3, - activation=nn.ReLU, - skip_connection=False): + + def __init__( + self, + features_in, + features_out, + batch_norm=True, + kernel_size=3, + activation=nn.ReLU, + skip_connection=False, + ): self._features_in = features_in self._features_out = features_out self.skip_connection = skip_connection if not activation is None: - modules = [nn.ConstantPad2d(1, 0.0), - nn.Conv2d(features_in, - features_out, - kernel_size), - nn.BatchNorm2d(features_out), - activation()] + modules = [ + nn.ConstantPad2d(1, 0.0), + nn.Conv2d(features_in, features_out, kernel_size), + nn.BatchNorm2d(features_out), + activation(), + ] else: - modules = [nn.ConstantPad2d(1, 0.0), - nn.Conv2d(features_in, - features_out, - kernel_size), - nn.BatchNorm2d(features_out)] + modules = [ + nn.ConstantPad2d(1, 0.0), + nn.Conv2d(features_in, features_out, kernel_size), + nn.BatchNorm2d(features_out), + ] super().__init__(*modules) @property @@ -59,6 +63,7 @@ def forward(self, x): y = torch.cat([x, y], dim=1) return y + class Block(nn.Sequential): """ A block bundles a set of layers. @@ -73,14 +78,17 @@ class Block(nn.Sequential): connections before all layers (:code:`"all"`) or just at the end (:code:`"end"`). """ - def __init__(self, - features_in, - features_out, - depth=2, - batch_norm=True, - activation=nn.ReLU, - kernel_size=3, - skip_connection=None): + + def __init__( + self, + features_in, + features_out, + depth=2, + batch_norm=True, + activation=nn.ReLU, + kernel_size=3, + skip_connection=None, + ): self._features_in = features_in @@ -97,12 +105,16 @@ def __init__(self, layers = [] nf = features_in for d in range(depth): - layers.append(Layer(nf, - features_out, - activation=activation, - batch_norm=batch_norm, - kernel_size=kernel_size, - skip_connection=skip_connection_layer)) + layers.append( + Layer( + nf, + features_out, + activation=activation, + batch_norm=batch_norm, + kernel_size=kernel_size, + skip_connection=skip_connection_layer, + ) + ) nf = layers[-1].features_out self._features_out = layers[-1].features_out @@ -127,27 +139,21 @@ def __init__(self): modules = [nn.MaxPool2d(2)] super().__init__(*modules) + class UpSampler(nn.Sequential): - def __init__(self, - features_in, - features_out): - modules = [nn.ConvTranspose2d(features_in, - features_out, - 3, - padding=1, - output_padding=1, - stride=2)] + def __init__(self, features_in, features_out): + modules = [ + nn.ConvTranspose2d( + features_in, features_out, 3, padding=1, output_padding=1, stride=2 + ) + ] super().__init__(*modules) - class UNet(PytorchModel, nn.Module): - def __init__(self, - input_features, - quantiles, - n_features=32, - n_levels=4, - skip_connection=None): + def __init__( + self, input_features, quantiles, n_features=32, n_levels=4, skip_connection=None + ): nn.Module.__init__(self) PytorchModel.__init__(self, input_features, quantiles) @@ -158,36 +164,37 @@ def __init__(self, features_in = input_features features_out = n_features for i in range(n_levels - 1): - self.down_blocks.append(Block(features_in, - features_out, - skip_connection=skip_connection)) + self.down_blocks.append( + Block(features_in, features_out, skip_connection=skip_connection) + ) self.down_samplers.append(DownSampler()) features_in = self.down_blocks[-1].features_out features_out = features_out * 2 - self.center_block = Block(features_in, - features_out, - skip_connection=skip_connection) + self.center_block = Block( + features_in, features_out, skip_connection=skip_connection + ) self.up_blocks = nn.ModuleList() self.up_samplers = nn.ModuleList() features_in = self.center_block.features_out features_out = features_out // 2 for i in range(n_levels - 1): - self.up_samplers.append(UpSampler(features_in, - features_out)) - features_in = features_out + self.down_blocks[(- i - 1)].features_out - self.up_blocks.append(Block(features_in, - features_out, - skip_connection=skip_connection)) + self.up_samplers.append(UpSampler(features_in, features_out)) + features_in = features_out + self.down_blocks[(-i - 1)].features_out + self.up_blocks.append( + Block(features_in, features_out, skip_connection=skip_connection) + ) features_out = features_out // 2 features_in = self.up_blocks[-1].features_out - self.head = nn.Sequential(nn.Conv2d(features_in, features_in, 1), - nn.ReLU(), - nn.Conv2d(features_in, features_in, 1), - nn.ReLU(), - nn.Conv2d(features_in, quantiles.size, 1)) + self.head = nn.Sequential( + nn.Conv2d(features_in, features_in, 1), + nn.ReLU(), + nn.Conv2d(features_in, features_in, 1), + nn.ReLU(), + nn.Conv2d(features_in, quantiles.size, 1), + ) def forward(self, x): diff --git a/typhon/retrieval/qrnn/qrnn.py b/typhon/retrieval/qrnn/qrnn.py index c9da8e8c..3f2ccaf0 100644 --- a/typhon/retrieval/qrnn/qrnn.py +++ b/typhon/retrieval/qrnn/qrnn.py @@ -1,8 +1,19 @@ +""" +typhon.retrieval.qrnn.qrnn +========================== + +This module provides the QRNN class, which implements the high-level +functionality of quantile regression neural networks, while the neural +network implementation is left to the model backends implemented in the +``typhon.retrieval.qrnn.models`` submodule. +""" import copy import logging import os import pickle import importlib +import scipy +from scipy.stats import norm import numpy as np from scipy.interpolate import CubicSpline @@ -13,15 +24,20 @@ try: import typhon.retrieval.qrnn.models.keras as keras + backend = keras except Exception as e: try: import typhon.retrieval.qrnn.models.pytorch as pytorch + backend = pytorch except: - raise Exception("Couldn't import neither Keras nor Pytorch " - "one of them must be available to use the QRNN" - " module.") + raise Exception( + "Couldn't import neither Keras nor Pytorch " + "one of them must be available to use the QRNN" + " module." + ) + def set_backend(name): """ @@ -36,19 +52,24 @@ def set_backend(name): if name == "keras": try: import typhon.retrieval.qrnn.models.keras as keras + backend = keras except Exception as e: - raise Exception("The following error occurred while trying " - " to import keras: ", e) + raise Exception( + "The following error occurred while trying " " to import keras: ", e + ) elif name == "pytorch": try: import typhon.retrieval.qrnn.models.pytorch as pytorch + backend = pytorch except Exception as e: - raise Exception("The following error occurred while trying " - " to import pytorch: ", e) + raise Exception( + "The following error occurred while trying " " to import pytorch: ", e + ) else: - raise Exception("\"{}\" is not a supported backend.".format(name)) + raise Exception('"{}" is not a supported backend.'.format(name)) + def get_backend(name): """ @@ -62,25 +83,27 @@ def get_backend(name): if name == "keras": try: import typhon.retrieval.qrnn.models.keras as keras + backend = keras except Exception as e: - raise Exception("The following error occurred while trying " - " to import keras: ", e) + raise Exception( + "The following error occurred while trying " " to import keras: ", e + ) elif name == "pytorch": try: import typhon.retrieval.qrnn.models.pytorch as pytorch + backend = pytorch except Exception as e: - raise Exception("The following error occurred while trying " - " to import pytorch: ", e) + raise Exception( + "The following error occurred while trying " " to import pytorch: ", e + ) else: - raise Exception("\"{}\" is not a supported backend.".format(name)) + raise Exception('"{}" is not a supported backend.'.format(name)) return backend -def create_model(input_dim, - output_dim, - arch): +def create_model(input_dim, output_dim, arch): """ Creates a fully-connected neural network from a tuple describing its architecture. @@ -99,10 +122,12 @@ def create_model(input_dim, """ return backend.FullyConnected(input_dim, output_dim, arch) + ################################################################################ # QRNN class ################################################################################ + class QRNN: r""" Quantile Regression Neural Network (QRNN) @@ -124,197 +149,155 @@ class QRNN: has one output neuron for each quantile to estimate. The QRNN class provides a generic QRNN implementation in the sense that it - does not assume a fixed neural network infrastructure. Instead, this - functionality is off-loaded to a model object, which can be an arbitrary - regression network such as a fully-connected or a convolutional network. A - range of different models are provided in the typhon.retrieval.qrnn.models - module. The :class:`QRNN`` class just implements high-level operation on - the QRNN output while training and prediction are delegated to the model - object. For details on the respective implementation refer to the - documentation of the corresponding model class. - - .. note:: For the QRNN implementation :math:`x` is used to denote the input - vector and :math:`y` to denote the output. While this is opposed - to inverse problem notation typically used for retrievals, it is - in line with machine learning notation and felt more natural for - the implementation. If this annoys you, I am sorry. + does not assume a fixed neural network architecture or implementation. + Instead, this functionality is off-loaded to a model object, which can be + an arbitrary regression network such as a fully-connected or a + convolutional network. A range of different models are provided in the + typhon.retrieval.qrnn.models module. The :class:`QRNN`` class just + implements high-level operation on the QRNN output while training and + prediction are delegated to the model object. For details on the respective + implementation refer to the documentation of the corresponding model class. + + .. note:: + + For the QRNN implementation :math:`x` is used to denote the input + vector and :math:`y` to denote the output. While this is opposed + to inverse problem notation typically used for retrievals, it is + in line with machine learning notation and felt more natural for + the implementation. If this annoys you, I am sorry. Attributes: - quantiles (numpy.array): The 1D-array containing the quantiles - :math:`\tau \in [0, 1]` that the network learns to predict. - model: The model object that implements the actual neural network - regressor. + backend(``str``): + The name of the backend used for the neural network model. + quantiles (numpy.array): + The 1D-array containing the quantiles :math:`\tau \in [0, 1]` + that the network learns to predict. + model: + The neural network regression model used to predict the quantiles. """ - def __init__(self, - input_dimension, - quantiles=None, - model=(3, 128, "relu"), - ensemble_size=1, - **kwargs): + + def __init__( + self, + input_dimensions, + quantiles=None, + model=(3, 128, "relu"), + ensemble_size=1, + **kwargs + ): """ Create a QRNN model. Arguments: - input_dim(int): The dimension of the measurement space, i.e. the number - of elements in a single measurement vector y - quantiles(np.array): 1D-array containing the quantiles to estimate of - the posterior distribution. Given as fractions - within the range [0, 1]. - model: A (possible trained) model instance or a tuple - :code:`(d, w, act)` describing the architecture of a - fully-connected neural network with :code:`d` hidden layers - with :code:`w` neurons and :code:`act` activation functions. - ensemble_size: The size of the ensemble if an ensemble model - should be used. + input_dimensions(int): + The dimension of the measurement space, i.e. the + number of elements in a single measurement vector y + quantiles(np.array): + 1D-array containing the quantiles to estimate of + the posterior distribution. Given as fractions within the range + [0, 1]. + model: + A (possibly trained) model instance or a tuple + ``(d, w, act)`` describing the architecture of a fully-connected + neural network with :code:`d` hidden layers with :code:`w` neurons + and :code:`act` activation functions. """ - self.input_dimension = input_dimension + self.input_dimensions = input_dimensions self.quantiles = np.array(quantiles) self.backend = backend.__name__ if type(model) == tuple: - self.model = backend.FullyConnected(self.input_dimension, - self.quantiles, - model) + self.model = backend.FullyConnected( + self.input_dimensions, self.quantiles, model + ) if quantiles is None: - raise ValueError("If model is given as architecture tuple, the" - " 'quantiles' kwarg must be provided.") + raise ValueError( + "If model is given as architecture tuple, the" + " 'quantiles' kwarg must be provided." + ) else: if not quantiles is None: if not quantiles == model.quantiles: - raise ValueError("Provided quantiles do not match those of " - "the provided model.") + raise ValueError( + "Provided quantiles do not match those of " + "the provided model." + ) self.model = model self.quantiles = model.quantiles self.backend = model.backend + def train( + self, + training_data, + validation_data=None, + batch_size=256, + sigma_noise=None, + adversarial_training=False, + delta_at=0.01, + initial_learning_rate=1e-2, + momentum=0.0, + convergence_epochs=5, + learning_rate_decay=2.0, + learning_rate_minimum=1e-6, + maximum_epochs=200, + training_split=0.9, + gpu=False, + ): + """ + Train model on given training data. - def cross_validation(self, - x_train, - y_train, - sigma_noise = None, - n_folds=5, - s=None, - **kwargs): - r""" - Perform n-fold cross validation. - - This function trains the network n times on different subsets of the - provided training data, always keeping a fraction of 1/n samples apart - for testing. Performance for each of the networks is evaluated and mean - and standard deviation for all folds are returned. This is to reduce - the influence of random fluctuations of the network performance during - hyperparameter tuning. - - Arguments: - - x_train(numpy.array): Array of shape :code:`(m, n)` containing the - m n-dimensional training inputs. - - y_train(numpy.array): Array of shape :code:`(m, 1)` containing the - m training outputs. - - sigma_noise(None, float, np.array): If not `None` this value is used - to multiply the Gaussian noise - that is added to each training - batch. If None no noise is - added. - - n_folds(int): Number of folds to perform for the cross correlation. - - s(callable, None): Performance metric for the fold. If not None, - this should be a function object taking as - arguments :code:`(y_test, y_pred)`, i.e. the - expected output for the given fold :code:`y_test` - and the predicted output :code:`y_pred`. The - returned value is taken as performance metric. + The training is performed on the provided training data and an + optionally-provided validation set. Training can use the following + augmentation methods: + - Gaussian noise added to input + - Adversarial training + The learning rate is decreased gradually when the validation or training + loss did not decrease for a given number of epochs. - **kwargs: Additional keyword arguments are passed on to the :code:`fit` - method that is called for each fold. + Args: + training_data: Tuple of numpy arrays of a dataset object to use to + train the model. + validation_data: Optional validation data in the same format as the + training data. + batch_size: If training data is provided as arrays, this batch size + will be used to for the training. + sigma_noise: If training data is provided as arrays, training data + will be augmented by adding noise with the given standard + deviations to each input vector before it is presented to the + model. + adversarial_training(``bool``): Whether or not to perform + adversarial training using the fast gradient sign method. + delta_at: The scaling factor to apply for adversarial training. + initial_learning_rate(``float``): The learning rate with which the + training is started. + momentum(``float``): The momentum to use for training. + convergence_epochs(``int``): The number of epochs with + non-decreasing loss before the learning rate is decreased + learning_rate_decay(``float``): The factor by which the learning rate + is decreased. + learning_rate_minimum(``float``): The learning rate at which the + training is aborted. + maximum_epochs(``int``): For how many epochs to keep training. + training_split(``float``): If no validation data is provided, this + is the fraction of training data that is used for validation. + gpu(``bool``): Whether or not to try to run the training on the GPU. """ - - n = x_train.shape[0] - n_test = n // n_folds - inds = np.random.permutation(np.arange(0, n)) - - results = [] - - # Nomenclature is a bit difficult here: - # Each cross validation fold has its own training, - # vaildation and test set. The size of the test set - # is number of provided training samples divided by the - # number of fold. The rest is used a traning and internal - # validation set according to the chose training_split - # ratio. - - - for i in range(n_folds): - for m in self.models: - m.reset_states() - - # Indices to use for training including training data and internal - # validation set to monitor convergence. - inds_train = np.append(np.arange(0, i * n_test), - np.arange(min((i + 1) * n_test, n), n)) - inds_train = inds[inds_train] - # Indices used to evaluate performance of the model. - inds_test = np.arange(i * n_test, (i + 1) * n_test) - inds_test = inds[inds_test] - - x_test_fold = x_train[inds_test, :] - y_test_fold = y_train[inds_test] - - # Splitting training and validation set. - x_train_fold = x_train[inds_train, :] - y_train_fold = y_train[inds_train] - - self.fit(x_train_fold, y_train_fold, - sigma_noise, **kwargs) - - # Evaluation on this folds test set. - if s: - y_pred = self.predict(x_test_fold) - results += [s(y_pred, y_test_fold)] - else: - loss = self.models[0].evaluate( - (x_test_fold - self.x_mean) / self.x_sigma, - y_test_fold) - logger.info(loss) - results += [loss] - logger.info(results) - results = np.array(results) - logger.info(results) - return (np.mean(results, axis=0), np.std(results, axis=0)) - - def train(self, - training_data, - validation_data = None, - batch_size = 256, - sigma_noise = None, - adversarial_training = False, - delta_at = 0.01, - initial_learning_rate = 1e-2, - momentum = 0.0, - convergence_epochs = 5, - learning_rate_decay = 2.0, - learning_rate_minimum = 1e-6, - maximum_epochs = 200, - training_split = 0.9, - gpu = True): - return self.model.train(training_data, - validation_data, - batch_size, - sigma_noise, - adversarial_training, - delta_at, - initial_learning_rate, - momentum, - convergence_epochs, - learning_rate_decay, - learning_rate_minimum, - maximum_epochs, - training_split, - gpu) + return self.model.train( + training_data, + validation_data, + batch_size, + sigma_noise, + adversarial_training, + delta_at, + initial_learning_rate, + momentum, + convergence_epochs, + learning_rate_decay, + learning_rate_minimum, + maximum_epochs, + training_split, + gpu, + ) def predict(self, x): r""" @@ -343,16 +326,17 @@ def cdf(self, x): Propagates the inputs in `x` forward through the network and approximates the posterior CDF by a piecewise linear function. - The piecewise linear function is given by its values at - approximate quantiles $x_\tau$ for - :math: `\tau = \{0.0, \tau_1, \ldots, \tau_k, 1.0\}` where - :math: `\tau_k` are the quantiles to be estimated by the network. - The values for :math:`x_0.0` and :math:`x_1.0` are computed using + The piecewise linear function is given by its values at approximate + quantiles :math:`x_\tau`` for :math:`\tau = \{0.0, \tau_1, \ldots, + \tau_k, 1.0\}` where :math:`\tau_k` are the quantiles to be estimated + by the network. The values for :math:`x_{0.0}` and :math:`x_{1.0}` are + computed using .. math:: - x_0.0 = 2.0 x_{\tau_1} - x_{\tau_2} - x_1.0 = 2.0 x_{\tau_k} - x_{\tau_{k-1}} + x_{0.0} = 2.0 x_{\tau_1} - x_{\tau_2} + + x_{1.0} = 2.0 x_{\tau_k} - x_{\tau_{k-1}} Arguments: @@ -361,8 +345,8 @@ def cdf(self, x): Returns: - Tuple (xs, fs) containing the :math: `x`-values in `xs` and corresponding - values of the posterior CDF :math: `F(x)` in `fs`. + Tuple (xs, fs) containing the :math:`x`-values in `xs` and corresponding + values of the posterior CDF :math:`F(x)` in `fs`. """ if len(x.shape) > 1: @@ -388,86 +372,37 @@ def calibration(self, *args, **kwargs): """ return self.model.calibration(*args, *kwargs) - def pdf(self, x, use_splines = False): + def pdf(self, x): r""" Approximate the posterior probability density function (PDF) for given - inputs `x`. - - By default, the PDF is approximated by computing the derivative of the - piece-wise linear approximation of the CDF as computed by the :code:`cdf` - function. + inputs ``x``. - If :code:`use_splines` is set to :code:`True`, the PDF is computed from - a spline fit to the approximate CDF. + The PDF is approximated by computing the derivative of the piece-wise + linear approximation of the CDF as computed by the + :py:meth:`typhon.retrieval.qrnn.QRNN.cdf` function. Arguments: x(np.array): Array of shape `(n, m)` containing `n` inputs for which - to predict the conditional quantiles. - - use_splines(bool): Whether or not to use a spline fit to the CDF to - approximate the PDF. + to predict PDFs. Returns: - Tuple (xs, fs) containing the :math: `x`-values in `xs` and corresponding - values of the approximate posterior PDF :math: `F(x)` in `fs`. + Tuple (x_pdf, y_pdf) containing the array with shape `(n, k)` containing + the x and y coordinates describing the PDF for the inputs in ``x``. """ + x_cdf, y_cdf = self.cdf(x) + n, m = x_cdf.shape + x_pdf = np.zeros((n, m + 1)) - y_pred = np.zeros(self.quantiles.size) - y_pred = self.predict(x).ravel() - - y = np.zeros(y_pred.size + 1) - y[1:-1] = 0.5 * (y_pred[1:] + y_pred[:-1]) - y[0] = 2 * y_pred[0] - y_pred[1] - y[-1] = 2 * y_pred[-1] - y_pred[-2] - - if not use_splines: - - p = np.zeros(y.size) - p[1:-1] = np.diff(self.quantiles) / np.diff(y_pred) - else: - - y = np.zeros(y_pred.size + 2) - y[1:-1] = y_pred - y[0] = 3 * y_pred[0] - 2 * y_pred[1] - y[-1] = 3 * y_pred[-1] - 2 * y_pred[-2] - q = np.zeros(self.quantiles.size + 2) - q[1:-1] = np.array(self.quantiles) - q[0] = 0.0 - q[-1] = 1.0 - - sr = CubicSpline(y, q, bc_type = "clamped") - y = np.linspace(y[0], y[-1], 101) - p = sr(y, nu = 1) - - return y, p - - - y_pred = np.zeros(self.quantiles.size + 2) - y_pred[1:-1] = self.predict(x) - y_pred[0] = 2.0 * y_pred[1] - y_pred[2] - y_pred[-1] = 2.0 * y_pred[-2] - y_pred[-3] + x_pdf[:, 0] = x_cdf[:, 0] + x_pdf[:, -1] = x_cdf[:, -1] + x_pdf[:, 1:-1] = 0.5 * (x_cdf[:, 1:] + x_cdf[:, :-1]) - if use_splines: - x_t = np.zeros(x.size + 2) - x_t[1:-1] = x - x_t[0] = 2 * x[0] - x[1] - x_t[-1] = 2 * x[-1] - x[-2] - y_t = np.zeros(y.size + 2) - y_t[1:-1] = y - y_t[-1] = 1.0 - - else: - logger.info(y) - x_new = np.zeros(x.size - 1) - x_new[2:-2] = 0.5 * (x[2:-3] + x[3:-2]) - x_new[0:2] = x[0:2] - x_new[-2:] = x[-2:] - y_new = np.zeros(y.size - 1) - y_new[1:-1] = np.diff(y[1:-1]) / np.diff(x[1:-1]) - return x_new, y_new + y_pdf = np.zeros((n, m + 1)) + y_pdf[:, 1:-1] = np.diff(y_cdf) / np.diff(x_cdf, axis=-1) + return x_pdf, y_pdf def sample_posterior(self, x, n=1): r""" @@ -485,13 +420,16 @@ def sample_posterior(self, x, n=1): Returns: - Tuple (xs, fs) containing the :math: `x`-values in `xs` and corresponding + Tuple (xs, fs) containing the :math:`x`-values in `xs` and corresponding values of the posterior CDF :math: `F(x)` in `fs`. """ - y_pred, qs = self.cdf(x) - p = np.random.rand(n) - y = np.interp(p, qs, y_pred) - return y + result = np.zeros((x.shape[0], n)) + x_cdf, y_cdf = self.cdf(x) + for i in range(x_cdf.shape[0]): + p = np.random.rand(n) + y = np.interp(p, y_cdf, x_cdf[i, :]) + result[i, :] = y + return result def posterior_mean(self, x): r""" @@ -555,24 +493,21 @@ def crps(y_pred, y_test, quantiles): qs[0, 0] = 0.0 qs[0, -1] = 1.0 - return np.trapz((qs - ind)**2.0, y_cdf) + return np.trapz((qs - ind) ** 2.0, y_cdf) - def evaluate_crps(self, x, y_test): + def evaluate_crps(self, x_test, y_test): r""" Predict quantiles and compute the Continuous Ranked Probability Score (CRPS). - This function evaluates the networks prediction on the - inputs in `x` and evaluates the CRPS of the predictions - against the materializations in `y_test`. + This function evaluates the network predictions on the test data + ``x_test`` and ``y_test`` and and evaluates the CRPS. Arguments: - x(numpy.array): Array of shape `(n, m)` containing the `n` - `m`-dimensional inputs for which to evaluate - the CRPS. + x_test(numpy.array): Array of shape `(n, m)` input test data. - y_test(numpy.array): Array containing the `n` materializations of - the true conditional distribution. + y_test(numpy.array): Array of length n containing the output test + data. Returns: @@ -580,31 +515,7 @@ def evaluate_crps(self, x, y_test): inputs in `x`. """ - return QRNN.crps(self.predict(x), y_test, self.quantiles) - - def save(self, path): - r""" - Store the QRNN model in a file. - - This stores the model to a file using pickle for all - attributes that support pickling. The Keras model - is handled separately, since it can not be pickled. - - .. note:: In addition to the model file with the given filename, - additional files suffixed with :code:`_model_i` will be - created for each neural network this model consists of. - - Arguments: - - path(str): The path including filename indicating where to - store the model. - - """ - f = open(path, "wb") - pickle.dump(self, f) - backend = importlib.import_module(self.backend) - backend.save_model(f, self.model) - f.close() + return QRNN.crps(self.predict(x_test), y_test, self.quantiles) def classify(self, x, threshold): """ @@ -621,12 +532,11 @@ def classify(self, x, threshold): for i in range(self.quantiles.size - 1): q_l = y[:, [i]] - q_r = y[:, [i+1]] - inds = np.logical_and(q_l < threshold, - q_r >= threshold) + q_r = y[:, [i + 1]] + inds = np.logical_and(q_l < threshold, q_r >= threshold) c[inds] = self.quantiles[i] * (threshold - q_l[inds]) c[inds] += self.quantiles[i + 1] * (q_r[inds] - threshold) - c[inds] /= (q_r[inds] - q_l[inds]) + c[inds] /= q_r[inds] - q_l[inds] c[threshold > q_r] = self.quantiles[-1] return 1.0 - c @@ -636,7 +546,8 @@ def load(path): r""" Load a model from a file. - This loads a model that has been stored using the `save` method. + This loads a model that has been stored using the + :py:meth:`typhon.retrieval.qrnn.QRNN.save` method. Arguments: @@ -646,25 +557,32 @@ def load(path): The loaded QRNN object. """ - with open(path, 'rb') as f: + with open(path, "rb") as f: qrnn = pickle.load(f) backend = importlib.import_module(qrnn.backend) model = backend.load_model(f, qrnn.quantiles) qrnn.model = model return qrnn - #try: - # from typhon.retrieval.qrnn.backends.keras import KerasQRNN, QuantileLoss - # globals()["QuantileLoss"] = QuantileLoss - # model = KerasQRNN.load(path) - # print(type(model)) - # qrnn = QRNN(model.input_dim, model.quantiles, model.models) - # qrnn.model = model - # globals().pop("QuantileLoss") - # return qrnn - - #except Exception as e: - # raise e + def save(self, path): + r""" + Store the QRNN model in a file. + + This stores the model to a file using pickle for all attributes that + support pickling. The Keras model is handled separately, since it can + not be pickled. + + Arguments: + + path(str): The path including filename indicating where to + store the model. + + """ + f = open(path, "wb") + pickle.dump(self, f) + backend = importlib.import_module(self.backend) + backend.save_model(f, self.model) + f.close() def __getstate__(self): dct = copy.copy(self.__dict__) diff --git a/typhon/tests/retrieval/qrnn/test_qrnn.py b/typhon/tests/retrieval/qrnn/test_qrnn.py index a9975c50..29a6ae93 100644 --- a/typhon/tests/retrieval/qrnn/test_qrnn.py +++ b/typhon/tests/retrieval/qrnn/test_qrnn.py @@ -1,8 +1,14 @@ +""" +Tests for typhon.retrieval.qrnn module. + +Tests the QRNN implementation for all available backends. +""" from typhon.retrieval.qrnn import QRNN, set_backend, get_backend import numpy as np import os -import tempfile import importlib +import pytest +import tempfile # # Import available backends. @@ -22,7 +28,6 @@ pass class TestQrnn: - def setup_method(self): dir = os.path.dirname(os.path.realpath(__file__)) path = os.path.join(dir, "test_data") @@ -32,37 +37,53 @@ def setup_method(self): self.x_train = (x_train - x_mean) / x_sigma self.y_train = np.load(os.path.join(path, "y_train.npy")) - def test_qrnn(self): + @pytest.mark.parametrize("backend", backends) + def test_qrnn(self, backend): """ Test training of QRNNs using numpy arrays as input. """ - for backend in backends: - set_backend(backend) - qrnn = QRNN(self.x_train.shape[1], - np.linspace(0.05, 0.95, 10)) - qrnn.train((self.x_train, self.y_train), - maximum_epochs = 1) - qrnn.predict(self.x_train) - - def test_qrnn_datasets(self): + set_backend(backend) + qrnn = QRNN(self.x_train.shape[1], + np.linspace(0.05, 0.95, 10)) + qrnn.train((self.x_train, self.y_train), + maximum_epochs = 1) + + qrnn.predict(self.x_train) + + x, qs = qrnn.cdf(self.x_train[:2, :]) + assert qs[0] == 0.0 + assert qs[-1] == 1.0 + + x, y = qrnn.pdf(self.x_train[:2, :]) + assert x.shape == y.shape + + mu = qrnn.posterior_mean(self.x_train[:2, :]) + assert len(mu.shape) == 1 + + r = qrnn.sample_posterior(self.x_train[:2, :], n=2) + assert r.shape == (2, 2) + + @pytest.mark.parametrize("backend", backends) + def test_qrnn_datasets(self, backend): """ Provide data as dataset object instead of numpy arrays. """ - for backend in backends: - set_backend(backend) - backend = get_backend(backend) - data = backend.BatchedDataset((self.x_train, self.y_train), - 256) - qrnn = QRNN(self.x_train.shape[1], - np.linspace(0.05, 0.95, 10)) - qrnn.train(data, - maximum_epochs = 1) - - - def save_qrnn(self): + set_backend(backend) + backend = get_backend(backend) + data = backend.BatchedDataset((self.x_train, self.y_train), + 256) + qrnn = QRNN(self.x_train.shape[1], + np.linspace(0.05, 0.95, 10)) + qrnn.train(data, + maximum_epochs = 1) + + + @pytest.mark.parametrize("backend", backends) + def test_save_qrnn(self, backend): """ Test saving and loading of QRNNs. """ + set_backend(backend) qrnn = QRNN(self.x_train.shape[1], np.linspace(0.05, 0.95, 10)) f = tempfile.NamedTemporaryFile() From c47f822bbe9bdff0e2f4c1bb9d92b449a84b474b Mon Sep 17 00:00:00 2001 From: Simon Pfreundschuh Date: Sun, 27 Sep 2020 21:50:56 +0200 Subject: [PATCH 14/19] Refactoring, documentation and testing. --- typhon/retrieval/qrnn/qrnn.py | 216 ++++++++++++++--------- typhon/tests/retrieval/qrnn/test_qrnn.py | 2 + 2 files changed, 131 insertions(+), 87 deletions(-) diff --git a/typhon/retrieval/qrnn/qrnn.py b/typhon/retrieval/qrnn/qrnn.py index 3f2ccaf0..ad89437f 100644 --- a/typhon/retrieval/qrnn/qrnn.py +++ b/typhon/retrieval/qrnn/qrnn.py @@ -24,20 +24,15 @@ try: import typhon.retrieval.qrnn.models.keras as keras - backend = keras except Exception as e: try: import typhon.retrieval.qrnn.models.pytorch as pytorch - backend = pytorch except: - raise Exception( - "Couldn't import neither Keras nor Pytorch " - "one of them must be available to use the QRNN" - " module." - ) - + raise Exception("Couldn't import neither Keras nor Pytorch " + "one of them must be available to use the QRNN" + " module.") def set_backend(name): """ @@ -52,24 +47,19 @@ def set_backend(name): if name == "keras": try: import typhon.retrieval.qrnn.models.keras as keras - backend = keras except Exception as e: - raise Exception( - "The following error occurred while trying " " to import keras: ", e - ) + raise Exception("The following error occurred while trying " + " to import keras: ", e) elif name == "pytorch": try: import typhon.retrieval.qrnn.models.pytorch as pytorch - backend = pytorch except Exception as e: - raise Exception( - "The following error occurred while trying " " to import pytorch: ", e - ) + raise Exception("The following error occurred while trying " + " to import pytorch: ", e) else: - raise Exception('"{}" is not a supported backend.'.format(name)) - + raise Exception("\"{}\" is not a supported backend.".format(name)) def get_backend(name): """ @@ -83,27 +73,64 @@ def get_backend(name): if name == "keras": try: import typhon.retrieval.qrnn.models.keras as keras - backend = keras except Exception as e: - raise Exception( - "The following error occurred while trying " " to import keras: ", e - ) + raise Exception("The following error occurred while trying " + " to import keras: ", e) elif name == "pytorch": try: import typhon.retrieval.qrnn.models.pytorch as pytorch - backend = pytorch except Exception as e: - raise Exception( - "The following error occurred while trying " " to import pytorch: ", e - ) + raise Exception("The following error occurred while trying " + " to import pytorch: ", e) else: - raise Exception('"{}" is not a supported backend.'.format(name)) + raise Exception("\"{}\" is not a supported backend.".format(name)) return backend +def fit_gaussian_to_quantiles(y_pred, taus): + """ + Fits Gaussian distributions to predicted quantiles. -def create_model(input_dim, output_dim, arch): + Fits mean and standard deviation values to quantiles by minimizing + the mean squared distance of the predicted quantiles and those of + the corresponding Gaussian distribution. + + Args: + y_pred (``np.array``): Array of shape `(n, m)` containing the `m` + predicted quantiles for n different inputs. + taus(``np.array``): Array of shape `(m,)` containing the quantile + fractions corresponding to the predictions in ``y_pred``. + + Returns: + Tuple ``(mu, sigma)`` of vectors of size `n` containing the mean and + standard deviations of the Gaussian distributions corresponding to + the predictions in ``y_pred``. + """ + x = norm.ppf(taus) + + d2e_00 = x.size + d2e_01 = x.sum(axis=-1) + d2e_10 = x.sum(axis=-1) + d2e_11 = np.sum(x ** 2, axis=-1) + + d2e_det_inv = 1.0 / (d2e_00 * d2e_11 - d2e_01 * d2e_11) + d2e_inv_00 = d2e_det_inv * d2e_11 + d2e_inv_01 = -d2e_det_inv * d2e_01 + d2e_inv_10 = -d2e_det_inv * d2e_10 + d2e_inv_11 = d2e_det_inv * d2e_00 + + de_0 = -np.sum(y_pred - x, axis=-1) + de_1 = -np.sum(x * (y_pred - x), axis=-1) + + mu = -(d2e_inv_00 * de_0 + d2e_inv_01 * de_1) + sigma = 1.0 - (d2e_inv_10 * de_0 + d2e_inv_11 * de_1) + + return mu, sigma + +def create_model(input_dim, + output_dim, + arch): """ Creates a fully-connected neural network from a tuple describing its architecture. @@ -127,7 +154,6 @@ def create_model(input_dim, output_dim, arch): # QRNN class ################################################################################ - class QRNN: r""" Quantile Regression Neural Network (QRNN) @@ -175,15 +201,12 @@ class QRNN: model: The neural network regression model used to predict the quantiles. """ - - def __init__( - self, - input_dimensions, - quantiles=None, - model=(3, 128, "relu"), - ensemble_size=1, - **kwargs - ): + def __init__(self, + input_dimensions, + quantiles=None, + model=(3, 128, "relu"), + ensemble_size=1, + **kwargs): """ Create a QRNN model. @@ -206,43 +229,37 @@ def __init__( self.backend = backend.__name__ if type(model) == tuple: - self.model = backend.FullyConnected( - self.input_dimensions, self.quantiles, model - ) + self.model = backend.FullyConnected(self.input_dimensions, + self.quantiles, + model) if quantiles is None: - raise ValueError( - "If model is given as architecture tuple, the" - " 'quantiles' kwarg must be provided." - ) + raise ValueError("If model is given as architecture tuple, the" + " 'quantiles' kwarg must be provided.") else: if not quantiles is None: if not quantiles == model.quantiles: - raise ValueError( - "Provided quantiles do not match those of " - "the provided model." - ) + raise ValueError("Provided quantiles do not match those of " + "the provided model.") self.model = model self.quantiles = model.quantiles self.backend = model.backend - def train( - self, - training_data, - validation_data=None, - batch_size=256, - sigma_noise=None, - adversarial_training=False, - delta_at=0.01, - initial_learning_rate=1e-2, - momentum=0.0, - convergence_epochs=5, - learning_rate_decay=2.0, - learning_rate_minimum=1e-6, - maximum_epochs=200, - training_split=0.9, - gpu=False, - ): + def train(self, + training_data, + validation_data=None, + batch_size=256, + sigma_noise=None, + adversarial_training=False, + delta_at=0.01, + initial_learning_rate=1e-2, + momentum=0.0, + convergence_epochs=5, + learning_rate_decay=2.0, + learning_rate_minimum=1e-6, + maximum_epochs=200, + training_split=0.9, + gpu = False): """ Train model on given training data. @@ -282,22 +299,20 @@ def train( is the fraction of training data that is used for validation. gpu(``bool``): Whether or not to try to run the training on the GPU. """ - return self.model.train( - training_data, - validation_data, - batch_size, - sigma_noise, - adversarial_training, - delta_at, - initial_learning_rate, - momentum, - convergence_epochs, - learning_rate_decay, - learning_rate_minimum, - maximum_epochs, - training_split, - gpu, - ) + return self.model.train(training_data, + validation_data, + batch_size, + sigma_noise, + adversarial_training, + delta_at, + initial_learning_rate, + momentum, + convergence_epochs, + learning_rate_decay, + learning_rate_minimum, + maximum_epochs, + training_split, + gpu) def predict(self, x): r""" @@ -431,6 +446,31 @@ def sample_posterior(self, x, n=1): result[i, :] = y return result + def sample_posterior_gaussian_fit(self, x, n=1): + r""" + Generates :code:`n` samples from the estimated posterior + distribution for the input vector :code:`x`. The sampling + is performed by the inverse CDF method using the estimated + CDF obtained from the :code:`cdf` member function. + + Arguments: + + x(np.array): Array of shape `(n, m)` containing `n` inputs for which + to predict the conditional quantiles. + + n(int): The number of samples to generate. + + Returns: + + Tuple (xs, fs) containing the :math:`x`-values in `xs` and corresponding + values of the posterior CDF :math: `F(x)` in `fs`. + """ + result = np.zeros((x.shape[0], n)) + y_pred = self.predict(x) + mu, sigma = fit_gaussian_to_quantiles(y_pred, self.quantiles) + x = np.random.normal(size=(y_pred.shape[0], n)) + return mu + sigma * x + def posterior_mean(self, x): r""" Computes the posterior mean by computing the first moment of the @@ -493,7 +533,7 @@ def crps(y_pred, y_test, quantiles): qs[0, 0] = 0.0 qs[0, -1] = 1.0 - return np.trapz((qs - ind) ** 2.0, y_cdf) + return np.trapz((qs - ind)**2.0, y_cdf) def evaluate_crps(self, x_test, y_test): r""" @@ -532,11 +572,12 @@ def classify(self, x, threshold): for i in range(self.quantiles.size - 1): q_l = y[:, [i]] - q_r = y[:, [i + 1]] - inds = np.logical_and(q_l < threshold, q_r >= threshold) + q_r = y[:, [i+1]] + inds = np.logical_and(q_l < threshold, + q_r >= threshold) c[inds] = self.quantiles[i] * (threshold - q_l[inds]) c[inds] += self.quantiles[i + 1] * (q_r[inds] - threshold) - c[inds] /= q_r[inds] - q_l[inds] + c[inds] /= (q_r[inds] - q_l[inds]) c[threshold > q_r] = self.quantiles[-1] return 1.0 - c @@ -557,7 +598,7 @@ def load(path): The loaded QRNN object. """ - with open(path, "rb") as f: + with open(path, 'rb') as f: qrnn = pickle.load(f) backend = importlib.import_module(qrnn.backend) model = backend.load_model(f, qrnn.quantiles) @@ -584,6 +625,7 @@ def save(self, path): backend.save_model(f, self.model) f.close() + def __getstate__(self): dct = copy.copy(self.__dict__) dct.pop("model") diff --git a/typhon/tests/retrieval/qrnn/test_qrnn.py b/typhon/tests/retrieval/qrnn/test_qrnn.py index 29a6ae93..95d3023e 100644 --- a/typhon/tests/retrieval/qrnn/test_qrnn.py +++ b/typhon/tests/retrieval/qrnn/test_qrnn.py @@ -63,6 +63,8 @@ def test_qrnn(self, backend): r = qrnn.sample_posterior(self.x_train[:2, :], n=2) assert r.shape == (2, 2) + r = qrnn.sample_posterior_gaussian_fit(self.x_train[:2, :], n=2) + @pytest.mark.parametrize("backend", backends) def test_qrnn_datasets(self, backend): """ From 8e95a45c940f28086b7a04dcffe95a0c0f45fea4 Mon Sep 17 00:00:00 2001 From: Simon Pfreundschuh Date: Sun, 27 Sep 2020 22:44:13 +0200 Subject: [PATCH 15/19] Added method to sample from fitted Gaussian posterior. --- typhon/retrieval/qrnn/qrnn.py | 2 +- typhon/tests/retrieval/qrnn/test_qrnn.py | 35 +++++++++++------------- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/typhon/retrieval/qrnn/qrnn.py b/typhon/retrieval/qrnn/qrnn.py index ad89437f..72307cae 100644 --- a/typhon/retrieval/qrnn/qrnn.py +++ b/typhon/retrieval/qrnn/qrnn.py @@ -469,7 +469,7 @@ def sample_posterior_gaussian_fit(self, x, n=1): y_pred = self.predict(x) mu, sigma = fit_gaussian_to_quantiles(y_pred, self.quantiles) x = np.random.normal(size=(y_pred.shape[0], n)) - return mu + sigma * x + return mu.reshape(-1, 1) + sigma.reshape(-1, 1) * x def posterior_mean(self, x): r""" diff --git a/typhon/tests/retrieval/qrnn/test_qrnn.py b/typhon/tests/retrieval/qrnn/test_qrnn.py index 95d3023e..53b7ea03 100644 --- a/typhon/tests/retrieval/qrnn/test_qrnn.py +++ b/typhon/tests/retrieval/qrnn/test_qrnn.py @@ -17,23 +17,26 @@ backends = [] try: import typhon.retrieval.qrnn.models.keras + backends += ["keras"] except: pass try: import typhon.retrieval.qrnn.models.pytorch + backends += ["pytorch"] except: pass + class TestQrnn: def setup_method(self): dir = os.path.dirname(os.path.realpath(__file__)) path = os.path.join(dir, "test_data") x_train = np.load(os.path.join(path, "x_train.npy")) - x_mean = np.mean(x_train, keepdims = True) - x_sigma = np.std(x_train, keepdims = True) + x_mean = np.mean(x_train, keepdims=True) + x_sigma = np.std(x_train, keepdims=True) self.x_train = (x_train - x_mean) / x_sigma self.y_train = np.load(os.path.join(path, "y_train.npy")) @@ -43,10 +46,8 @@ def test_qrnn(self, backend): Test training of QRNNs using numpy arrays as input. """ set_backend(backend) - qrnn = QRNN(self.x_train.shape[1], - np.linspace(0.05, 0.95, 10)) - qrnn.train((self.x_train, self.y_train), - maximum_epochs = 1) + qrnn = QRNN(self.x_train.shape[1], np.linspace(0.05, 0.95, 10)) + qrnn.train((self.x_train, self.y_train), maximum_epochs=1) qrnn.predict(self.x_train) @@ -60,10 +61,11 @@ def test_qrnn(self, backend): mu = qrnn.posterior_mean(self.x_train[:2, :]) assert len(mu.shape) == 1 - r = qrnn.sample_posterior(self.x_train[:2, :], n=2) - assert r.shape == (2, 2) + r = qrnn.sample_posterior(self.x_train[:4, :], n=2) + assert r.shape == (4, 2) - r = qrnn.sample_posterior_gaussian_fit(self.x_train[:2, :], n=2) + r = qrnn.sample_posterior_gaussian_fit(self.x_train[:4, :], n=2) + assert r.shape == (4, 2) @pytest.mark.parametrize("backend", backends) def test_qrnn_datasets(self, backend): @@ -72,13 +74,9 @@ def test_qrnn_datasets(self, backend): """ set_backend(backend) backend = get_backend(backend) - data = backend.BatchedDataset((self.x_train, self.y_train), - 256) - qrnn = QRNN(self.x_train.shape[1], - np.linspace(0.05, 0.95, 10)) - qrnn.train(data, - maximum_epochs = 1) - + data = backend.BatchedDataset((self.x_train, self.y_train), 256) + qrnn = QRNN(self.x_train.shape[1], np.linspace(0.05, 0.95, 10)) + qrnn.train(data, maximum_epochs=1) @pytest.mark.parametrize("backend", backends) def test_save_qrnn(self, backend): @@ -86,8 +84,7 @@ def test_save_qrnn(self, backend): Test saving and loading of QRNNs. """ set_backend(backend) - qrnn = QRNN(self.x_train.shape[1], - np.linspace(0.05, 0.95, 10)) + qrnn = QRNN(self.x_train.shape[1], np.linspace(0.05, 0.95, 10)) f = tempfile.NamedTemporaryFile() qrnn.save(f.name) qrnn_loaded = QRNN.load(f.name) @@ -98,4 +95,4 @@ def test_save_qrnn(self, backend): if not type(x_pred) == np.ndarray: x_pred = x_pred.detach() - assert(np.allclose(x_pred, x_pred_loaded)) + assert np.allclose(x_pred, x_pred_loaded) From 0f471bf39175d545dbf27abaa3d91f6e3d25b949 Mon Sep 17 00:00:00 2001 From: Simon Pfreundschuh Date: Mon, 28 Sep 2020 07:57:35 +0200 Subject: [PATCH 16/19] Refactoring of model submodule. --- typhon/retrieval/qrnn/models/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 typhon/retrieval/qrnn/models/__init__.py diff --git a/typhon/retrieval/qrnn/models/__init__.py b/typhon/retrieval/qrnn/models/__init__.py new file mode 100644 index 00000000..e69de29b From 98d23095e1bc85c697974a45f61dddf5d12fe5cb Mon Sep 17 00:00:00 2001 From: Simon Pfreundschuh Date: Mon, 28 Sep 2020 07:58:47 +0200 Subject: [PATCH 17/19] Refactored models submodule. --- typhon/retrieval/qrnn/models/keras.py | 16 ++--- .../qrnn/models/pytorch/fully_connected.py | 14 ++-- typhon/retrieval/qrnn/models/pytorch/unet.py | 68 ++++++++++++++----- 3 files changed, 69 insertions(+), 29 deletions(-) diff --git a/typhon/retrieval/qrnn/models/keras.py b/typhon/retrieval/qrnn/models/keras.py index f1b31970..e049809a 100644 --- a/typhon/retrieval/qrnn/models/keras.py +++ b/typhon/retrieval/qrnn/models/keras.py @@ -5,13 +5,13 @@ This module provides Keras neural network models that can be used as backend models with the :py:class:`typhon.retrieval.qrnn.QRNN` class. """ +import logging import numpy as np import keras from keras.models import Sequential from keras.layers import Dense, Activation, deserialize from keras.optimizers import SGD import keras.backend as K -import logging def save_model(f, model): @@ -60,7 +60,7 @@ def make_fully_connected(layers=None, **kwargs): # Quantile loss ################################################################################ -logger = logging.getLogger(__name__) +LOGGER = logging.getLogger(__name__) def skewed_absolute_error(y_true, y_pred, tau): @@ -145,7 +145,7 @@ def __init__(self, training_data, batch_size): self.i = 0 def __iter__(self): - logger.info("iter...") + LOGGER.info("iter...") return self def __len__(self): @@ -188,7 +188,7 @@ def __init__(self, training_data, sigma_noise=None): self.sigma_noise = sigma_noise def __iter__(self): - logger.info("iter...") + LOGGER.info("iter...") return self def __len__(self): @@ -229,7 +229,7 @@ def __init__(self, training_data, input_gradients, eps): self.eps = eps def __iter__(self): - logger.info("iter...") + LOGGER.info("iter...") return self def __len__(self): @@ -327,7 +327,7 @@ def on_epoch_end(self, epoch, logs={}): lr = keras.backend.get_value(self.model.optimizer.lr) keras.backend.set_value(self.model.optimizer.lr, lr / self.lr_decay) self.steps = 0 - logger.info("\n Reduced learning rate to " + str(lr)) + LOGGER.info("\n Reduced learning rate to " + str(lr)) if lr < self.lr_minimum: self.model.stop_training = True @@ -487,9 +487,9 @@ def __init__(self, input_dimension, quantiles, arch, layers=None): else: d, w, a = arch layers = [Dense(w, input_shape=(input_dimension,))] - for i in range(d - 1): + for _ in range(d - 1): layers.append(Dense(w, input_shape=(w,))) - if not a is None: + if a is not None: layers.append(Activation(a)) layers.append(Dense(output_dimension, input_shape=(w,))) diff --git a/typhon/retrieval/qrnn/models/pytorch/fully_connected.py b/typhon/retrieval/qrnn/models/pytorch/fully_connected.py index 47412ba9..f84983ba 100644 --- a/typhon/retrieval/qrnn/models/pytorch/fully_connected.py +++ b/typhon/retrieval/qrnn/models/pytorch/fully_connected.py @@ -1,5 +1,11 @@ +""" +typhon.retrieval.qrnn.models.pytorch.fully_connected +==================================================== + +This module provides an implementation of a fully-connected feed forward +neural network in pytorch. +""" from torch import nn -from torch import optim from typhon.retrieval.qrnn.models.pytorch.common import PytorchModel, activations ################################################################################ @@ -34,12 +40,12 @@ def __init__(self, input_dimension, quantiles, arch): layers = [nn.Linear(input_dimension, output_dimension)] else: d, w, act = arch - if type(act) == str: + if isinstance(act, str): act = activations[act] layers = [nn.Linear(input_dimension, w)] - for i in range(d - 1): + for _ in range(d - 1): layers.append(nn.Linear(w, w)) - if not act is None: + if act is not None: layers.append(act()) layers.append(nn.Linear(w, output_dimension)) nn.Sequential.__init__(self, *layers) diff --git a/typhon/retrieval/qrnn/models/pytorch/unet.py b/typhon/retrieval/qrnn/models/pytorch/unet.py index 0a9d9403..85b2d8b6 100644 --- a/typhon/retrieval/qrnn/models/pytorch/unet.py +++ b/typhon/retrieval/qrnn/models/pytorch/unet.py @@ -1,3 +1,14 @@ +""" +typhon.retrieval.qrnn.models.pytorch.unet +========================================= + +This module provides an implementation of the UNet [unet]_ +architecture. + +.. [unet] O. Ronneberger, P. Fischer and T. Brox, "U-net: Convolutional networks +for biomedical image segmentation", Proc. Int. Conf. Med. Image Comput. +Comput.-Assist. Intervent. (MICCAI), pp. 234-241, 2015. +""" import torch from torch import nn @@ -26,7 +37,6 @@ def __init__( self, features_in, features_out, - batch_norm=True, kernel_size=3, activation=nn.ReLU, skip_connection=False, @@ -35,7 +45,7 @@ def __init__( self._features_out = features_out self.skip_connection = skip_connection - if not activation is None: + if activation is not None: modules = [ nn.ConstantPad2d(1, 0.0), nn.Conv2d(features_in, features_out, kernel_size), @@ -52,12 +62,15 @@ def __init__( @property def features_out(self): + """ + The number outgoing channels of the layer. + """ if self.skip_connection: return self._features_in + self._features_out - else: - return self._features_out + return self._features_out def forward(self, x): + """ Forward input through layer. """ y = nn.Sequential.forward(self, x) if self.skip_connection: y = torch.cat([x, y], dim=1) @@ -67,16 +80,6 @@ def forward(self, x): class Block(nn.Sequential): """ A block bundles a set of layers. - - Args: - features_in(:code:`int`): The number of input features of the block - features_out(:code:`int`): The number of output features of the block. - depth(:code:`int`): The number of layers of the block - activation(:code:`nn.Module`): Pytorch activation layer to - use. :code:`nn.ReLU` by default. - skip_connection(:code:`str`): Whether or not to insert skip - connections before all layers (:code:`"all"`) or just at - the end (:code:`"end"`). """ def __init__( @@ -89,7 +92,17 @@ def __init__( kernel_size=3, skip_connection=None, ): - + """ + Args: + features_in(:code:`int`): The number of input features of the block + features_out(:code:`int`): The number of output features of the block. + depth(:code:`int`): The number of layers of the block + activation(:code:`nn.Module`): Pytorch activation layer to + use. :code:`nn.ReLU` by default. + skip_connection(:code:`str`): Whether or not to insert skip + connections before all layers (:code:`"all"`) or just at + the end (:code:`"end"`). + """ self._features_in = features_in if skip_connection == "all": @@ -122,12 +135,16 @@ def __init__( @property def features_out(self): + """ + The number outgoing channels of the layer. + """ if self.skip_connection: return self._features_in + self._features_out else: return self._features_out def forward(self, x): + """ Forward input through layer. """ y = nn.Sequential.forward(self, x) if self.skip_connection: y = torch.cat([x, y], dim=1) @@ -135,12 +152,18 @@ def forward(self, x): class DownSampler(nn.Sequential): + """ + A downsampling block reduces the input resolution by applying max-pooling. + """ def __init__(self): modules = [nn.MaxPool2d(2)] super().__init__(*modules) class UpSampler(nn.Sequential): + """ + An upsampling block increases the input resolution by transposed convolution. + """ def __init__(self, features_in, features_out): modules = [ nn.ConvTranspose2d( @@ -151,10 +174,21 @@ def __init__(self, features_in, features_out): class UNet(PytorchModel, nn.Module): + """ + Pytorch implementation of the UNet architecture for image segmentation. + """ def __init__( self, input_features, quantiles, n_features=32, n_levels=4, skip_connection=None ): - + """ + Args: + input_features(``int``): The number of channels of the input image. + quantiles(``np.array``): Array containing the quantiles to predict. + n_features: The number of channels of the first convolution block. + n_level: The number of down-sampling steps. + skip_connection: Whether or not to include skip connections in + each block. + """ nn.Module.__init__(self) PytorchModel.__init__(self, input_features, quantiles) @@ -197,7 +231,7 @@ def __init__( ) def forward(self, x): - + """ Propagate input through layer. """ features = [] for (b, s) in zip(self.down_blocks, self.down_samplers): x = b(x) From 67e682daafcbe7b61c4bc797ce811ec4b7aabfed Mon Sep 17 00:00:00 2001 From: Simon Pfreundschuh Date: Mon, 28 Sep 2020 08:27:56 +0200 Subject: [PATCH 18/19] Reverted setup.py. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 525618ed..ff29fdb2 100644 --- a/setup.py +++ b/setup.py @@ -86,7 +86,7 @@ "numexpr", "numpy>=1.13", "pandas", - "scikit-image>=0.15", + "scikit-image", "scikit-learn", "scipy>=0.15.1", "setuptools>=0.7.2", From 5404c468e77c4fc01b74d7faa05c75c046f8071c Mon Sep 17 00:00:00 2001 From: Simon Pfreundschuh Date: Wed, 30 Sep 2020 11:19:33 +0200 Subject: [PATCH 19/19] Fixed typos. --- doc/typhon.retrieval.qrnn.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/typhon.retrieval.qrnn.rst b/doc/typhon.retrieval.qrnn.rst index 1fa24dcd..917f4305 100644 --- a/doc/typhon.retrieval.qrnn.rst +++ b/doc/typhon.retrieval.qrnn.rst @@ -30,8 +30,8 @@ Backends -------- Currently both `keras `_ and `pytorch `_ -are supported as backends for neural network. The QRNN implementation will -automatically use the one the is available on your system. If both are available +are supported as backends for neural networks. The QRNN implementation will +automatically use the one that is available on your system. If both are available you can choose a specific backend using the :py:meth:`~typhon.retrieval.qrnn.set_backend` function. Neural network models @@ -64,8 +64,8 @@ API documentation FullyConnected UNet -.. automodule:: typhon.retrieval.qrnn.models.pytorch -.. currentmodule:: typhon.retrieval.qrnn.models.pytorch +.. automodule:: typhon.retrieval.qrnn.models.keras +.. currentmodule:: typhon.retrieval.qrnn.models.keras .. autosummary:: :toctree: generated