diff --git a/.github/workflows/continuous-integration-workflow.yml b/.github/workflows/continuous-integration-workflow.yml index f13d2a13..f63df22f 100644 --- a/.github/workflows/continuous-integration-workflow.yml +++ b/.github/workflows/continuous-integration-workflow.yml @@ -23,9 +23,26 @@ jobs: environment-file: environment.yml python-version: ${{ matrix.python-version }} - - name: Run test suite. + - name: Run test suite on non-Windows. + if: runner.os != 'Windows' + shell: bash -l {0} + run: | + + pytest --cov=econsa --cov-report=xml + + # We want to make sure proper formatting of the notebooks. + flake8-nb; [ $? -eq 0 ] || exit 1 + black-nb .; [ $? -eq 0 ] || exit 1 + + # We want to pull the submodules. + # This can be removed once temfpy is live on conda. + git submodule update --init --recursive + + - name: Run test suite on Windows. + if: runner.os == 'Windows' shell: bash -l {0} run: | + python -m rpy2.situation pytest --cov=econsa --cov-report=xml # We want to make sure proper formatting of the notebooks. diff --git a/docs/source/conf.py b/docs/source/conf.py index 6c8fac72..d0872a43 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -58,7 +58,7 @@ def setup(app): "sphinx.ext.extlinks", "sphinx.ext.intersphinx", "sphinx.ext.viewcode", - "numpydoc", + "sphinx.ext.napoleon", "nbsphinx", "sphinx.ext.mathjax", "sphinxcontrib.bibtex", @@ -93,9 +93,18 @@ def setup(app): # The name of the Pygments (syntax highlighting) style to use. pygments_style = None -# Configuration for numpydoc -numpydoc_xref_param_type = True -numpydoc_xref_ignore = {"type", "optional", "default"} +# Napoleon settings +napoleon_google_docstring = True +napoleon_numpy_docstring = True +napoleon_include_init_with_doc = False +napoleon_include_private_with_doc = False +napoleon_include_special_with_doc = True +napoleon_use_admonition_for_examples = False +napoleon_use_admonition_for_notes = False +napoleon_use_admonition_for_references = False +napoleon_use_ivar = True +napoleon_use_param = True +napoleon_use_rtype = True # -- Options for HTML output ------------------------------------------------- diff --git a/docs/source/methods/index.rst b/docs/source/methods/index.rst index 21819e3d..1eaf337a 100644 --- a/docs/source/methods/index.rst +++ b/docs/source/methods/index.rst @@ -9,3 +9,4 @@ Here we explain and document in detail, the methods we implement in the ``econsa qualitative-analysis quantitative-analysis + sampling diff --git a/docs/source/methods/sampling.rst b/docs/source/methods/sampling.rst new file mode 100644 index 00000000..a4304c7a --- /dev/null +++ b/docs/source/methods/sampling.rst @@ -0,0 +1,5 @@ +Sampling methods +================ + +.. automodule:: econsa.sampling + :members: diff --git a/docs/source/tutorials/conditional_sampling.ipynb b/docs/source/tutorials/conditional_sampling.ipynb new file mode 100644 index 00000000..3afd6afe --- /dev/null +++ b/docs/source/tutorials/conditional_sampling.ipynb @@ -0,0 +1,89 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Conditional sampling" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can rely on `chaospy`'s features to construct flexible multivariate distributions for input paramters using copulas. This allows us to sample from the **joint distribution** of input paramters as is required for uncertainty propagation. However, we now also want to be able to conduct a quantitative sensitivity analysis using, for example, Sobol indicies (**@timmens**) an Shapley values (**@lindamaok899**). This remains a straightforward task where the multivariate distribution is known to be Gaussian as analytical expressions exist ([Wikipedia](https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Conditional_distributions)).\n", + "\n", + "We need the capablity to construct these **conditional distributions** in a more flexible way to, for example, compute the Sobol indices and Shapley values for the EQO model where the marginal distribution of the input parameters is uniform instead of normal.\n", + "\n", + "Again, we will pursue a copula approach and use a Gaussian copula to generate the samples." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## $1^{st}$ Step: Flexible sampler for conditional Gaussian distribution" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There appears to be no flexible Python code out there as part of a sound library that simply samples from the conditional normal distribution (please confirm). We do have some work here already done by Janos and Tim, please reach out to them and see what can be useful for us. \n", + "\n", + "However, there is an implementation available to construct the conditional mean and variance in $R$ as part of the [condMVNorm](https://cran.r-project.org/web/packages/condMVNorm/condMVNorm.pdf) package. Please construct an analogous Python implementation.\n", + "\n", + "Be sure to validate your implementation against the original implementation. You can probably use the function in Python as well by using [rpy2](https://rpy2.github.io/). Simply set up a loop that specifies numerous multivariate normal distributions, specifies a request for a conditional distribution, and then compares the output between the two implementations.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## $2^{nd}$ Step: Gaussian copula with flexible marginals" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We need to extend our conditional sampler to allow for more flexible marginal distributions. This boils down to transforming the quantile from the marginal distribution into the corresponding value of the Gaussian. We use the special case that is equivalent to a simple multivariate normal distribution for testing." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## $3^{rd}$ Step: Mapping correlation matrix" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We need to map the correlation between the orginal variables to the implied correlation for the Gaussian copula. Here, Section 4 in this [paper](https://doi.org/10.1016/j.cpc.2011.12.020) is a good starting point." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/econsa/sampling.py b/econsa/sampling.py index 6b2228cd..02b8a2a9 100644 --- a/econsa/sampling.py +++ b/econsa/sampling.py @@ -1,9 +1,99 @@ """Capabilities for sampling of random input parameters. -This module contains all we need to sample random input parameters. +This module contains functions to sample random input parameters. """ +import numpy as np -def get_sample(*args, **kwargs): - return +def cond_mvn( + mean, sigma, dependent_ind, given_ind=None, given_value=None, check_sigma=True, +): + r"""Conditional mean and variance function. + + This function provides the conditional mean and variance-covariance matrix of [Y given X], + where Z = (X,Y) is the fully-joint multivariate normal distribution with mean equal to + ``mean`` and covariance matrix ``sigma``. + + This is a translation of the main function of R package condMVNorm_. + + .. _condMVNorm: https://cran.r-project.org/package=condMVNorm + + + Parameters + ---------- + mean : array_like + The mean vector of the multivariate normal distribution. + + sigma : array_like + Symmetric and positive-definite covariance matrix of + the multivariate normal distribution. + + dependent_ind : array_like + The indices of dependent variables. + + given_ind : array_like, optional + The indices of independent variables (default value is ``None``). + If not specified return unconditional values. + + given_value : array_like, optional + The conditioning values (default value is ``None``). Should be the same length as + ``given_ind``, otherwise throw an error. + + check_sigma : bool, optional + Check that ``sigma`` is symmetric, and no eigenvalue is zero (default value is + ``True``). + + Returns + ------- + cond_mean : array_like + The conditional mean of dependent variables. + + cond_cov : arrray_like + The conditional covariance matrix of dependent variables. + + Examples + -------- + >>> mean = np.array([1, 1, 1]) + >>> sigma = np.array([[4.0677098, -0.9620331, 0.9897267], + ... [-0.9620331, 2.2775449, 0.7475968], + ... [0.9897267, 0.7475968, 0.7336631]]) + >>> dependent_ind = [0, ] + >>> given_ind = [1, 2] + >>> given_value = [1, -1] + >>> cond_mean, cond_cov = cond_mvn(mean, sigma, dependent_ind, given_ind, given_value) + >>> np.testing.assert_almost_equal(cond_mean, -4.347531, decimal=6) + >>> np.testing.assert_almost_equal(cond_cov, 0.170718, decimal=6) + """ + mean_np = np.array(mean) + sigma_np = np.array(sigma) + given_ind_np = np.array(given_ind, ndmin=1) + given_value_np = np.array(given_value, ndmin=1) + + # Check `sigma` is symmetric and positive-definite: + if check_sigma: + if not np.allclose(sigma_np, sigma_np.T): + raise ValueError("sigma is not a symmetric matrix") + elif np.all(np.linalg.eigvals(sigma_np) > 0) == 0: + raise ValueError("sigma is not positive-definite") + + # When `given_ind` is None, return mean and variances of dependent values: + if given_ind is None: + cond_mean = np.array(mean_np[dependent_ind]) + cond_cov = np.array(sigma_np[dependent_ind, :][:, dependent_ind]) + return cond_mean, cond_cov + + # Make sure that `given_value` aligns with `given_len`. This includes the case that + # `given_value` is empty. + if len(given_value_np) != len(given_ind_np): + raise ValueError("lengths of given_value and given_ind must be the same") + + b = sigma_np[dependent_ind, :][:, dependent_ind] + c = sigma_np[dependent_ind, :][:, given_ind] + d = sigma_np[given_ind, :][:, given_ind] + c_dinv = c @ np.linalg.inv(d) + + cond_mean = mean_np[dependent_ind] + c_dinv @ (given_value - mean_np[given_ind]) + cond_cov = b - c_dinv @ (c.T) + + return cond_mean, cond_cov diff --git a/econsa/tests/test_sampling.py b/econsa/tests/test_sampling.py index bca59b89..16775a53 100644 --- a/econsa/tests/test_sampling.py +++ b/econsa/tests/test_sampling.py @@ -1,9 +1,103 @@ """Tests for the sampling. -This module contains all tests for th sampling setup. +This module contains all tests for the sampling setup. """ +import numpy as np +import pytest +from numpy.random import default_rng +from econsa.sampling import cond_mvn +from econsa.tests.wrapper_r import r_cond_mvn -def test_sampling(): - pass +rng = default_rng() + + +def get_strategies(name): + n = rng.integers(low=4, high=21) + mean = rng.integers(low=-2, high=2, size=n) + dependent_n = rng.integers(low=1, high=n - 2) + dependent = rng.choice(range(0, n), replace=False, size=dependent_n) + + if name == "cond_mvn": + sigma = rng.standard_normal(size=(n, n)) + sigma = sigma @ sigma.T + given_ind = [x for x in range(0, n) if x not in dependent] + given_value = rng.integers(low=-2, high=2, size=len(given_ind)) + elif name == "cond_mvn_exception_given": + sigma = rng.standard_normal(size=(n, n)) + sigma = sigma @ sigma.T + given_ind = ( + [x for x in range(0, n) if x not in dependent] if n % 3 == 0 else None + ) + given_value = ( + rng.integers(low=-2, high=2, size=n - dependent_n + 1) + if n % 2 == 0 + else None + ) + elif name == "test_cond_mvn_exception_sigma": + sigma = ( + rng.standard_normal(size=(n, n)) if n % 3 == 0 else np.diagflat([-1] * n) + ) + sigma = sigma @ sigma.T if n % 2 == 0 else sigma + given_ind = [x for x in range(0, n) if x not in dependent] + given_value = rng.integers(low=-2, high=2, size=len(given_ind)) + else: + raise NotImplementedError + + strategy = (mean, sigma, dependent, given_ind, given_value) + + return strategy + + +def test_cond_mvn(): + """Test cond_mvn against the original from R package cond_mvnorm. + """ + request = get_strategies("cond_mvn") + + r_cond_mean, r_cond_cov = r_cond_mvn(*request) + cond_mean, cond_cov = cond_mvn(*request) + + np.testing.assert_allclose(cond_mean, r_cond_mean) + np.testing.assert_allclose(cond_cov, r_cond_cov) + + +def test_cond_mvn_exception_given(): + """Test cond_mvn raises exceptions when invalid `given_ind` or `given_value` is passed. + """ + mean, sigma, dependent_ind, given_ind, given_value = get_strategies( + "cond_mvn_exception_given", + ) + + n = sigma.shape[0] + if n % 3 != 0: + # Valid case: only `given_ind` is empty or both `given_ind` and `given_value` are empty + cond_mvn(mean, sigma, dependent_ind, given_ind, given_value) + else: + # `given_value` is empty or does not align with `given_ind` + with pytest.raises(ValueError) as e: + cond_mvn(mean, sigma, dependent_ind, given_ind, given_value) + assert "lengths of given_value and given_ind must be the same" in str(e.value) + + +def test_cond_mvn_exception_sigma(): + """Test cond_mvn raises exceptions when invalid `sigma` is passed. + """ + mean, sigma, dependent_ind, given_ind, given_value = get_strategies( + "test_cond_mvn_exception_sigma", + ) + + n = sigma.shape[0] + + if n % 3 != 0 and n % 2 != 0: + # `sigma` is negative definite matrix + with pytest.raises(ValueError) as e: + cond_mvn(mean, sigma, dependent_ind, given_ind, given_value) + assert "sigma is not positive-definite" in str(e.value) + elif n % 2 != 0: + # `sigma` is not symmetric + with pytest.raises(ValueError) as e: + cond_mvn(mean, sigma, dependent_ind, given_ind, given_value) + assert "sigma is not a symmetric matrix" in str(e.value) + else: + cond_mvn(mean, sigma, dependent_ind, given_ind, given_value) diff --git a/econsa/tests/wrapper_r.py b/econsa/tests/wrapper_r.py new file mode 100644 index 00000000..9b2b4f14 --- /dev/null +++ b/econsa/tests/wrapper_r.py @@ -0,0 +1,30 @@ +"""Wrapping R. + +This module contains all functionality related to the use of functions from R for testing purposes. + +""" +import numpy as np +import rpy2.robjects.packages as rpackages +from rpy2 import robjects +from rpy2.robjects import numpy2ri + +r_package_cond_mvnorm = rpackages.importr("condMVNorm") + + +def r_cond_mvn(mean, sigma, dependent_ind, given_ind, given_value): + numpy2ri.activate() + r_mean = robjects.FloatVector(mean) + n = sigma.shape[0] + r_sigma = robjects.r.matrix(sigma, n, n) + r_dependent_ind = robjects.IntVector([x + 1 for x in dependent_ind]) + r_given_ind = robjects.IntVector([x + 1 for x in given_ind]) + r_given_value = robjects.IntVector(given_value) + + args = (r_mean, r_sigma, r_dependent_ind, r_given_ind, r_given_value) + r_cond_mean, r_cond_cov = r_package_cond_mvnorm.condMVN(*args) + + r_cond_mean, r_cond_cov = np.array(r_cond_mean), np.array(r_cond_cov) + + numpy2ri.deactivate() + + return r_cond_mean, r_cond_cov diff --git a/environment.yml b/environment.yml index 38590cf1..7561a3c6 100644 --- a/environment.yml +++ b/environment.yml @@ -9,7 +9,7 @@ dependencies: - python=3.7 - sphinx - pandas>=0.25 - - numpy=1.16 + - numpy>=1.17 - pip - scipy - jupyter @@ -27,7 +27,9 @@ dependencies: - nbsphinx - sphinx_rtd_theme - sphinxcontrib-bibtex - + - r-base + - r-r.utils + - r-condmvnorm - pip: - salib==1.3.8 - isort @@ -38,3 +40,4 @@ dependencies: - flake8-nb - numpydoc - pre-commit + - rpy2 diff --git a/readthedocs.yml b/readthedocs.yml index cb25a0a8..d2313193 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -9,5 +9,6 @@ python: conda: environment: configurations/rtd_environment.yml + formats: - pdf