Skip to content
This repository has been archived by the owner on Jun 10, 2022. It is now read-only.

conditional sampling #33

Merged
merged 48 commits into from
Jul 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
2e14b12
added next steps
peisenha Jun 19, 2020
fc2cab4
add condMVN function
loikein Jun 23, 2020
e45da3f
add condMVN test
loikein Jun 23, 2020
e138eee
setup R for CI
loikein Jun 23, 2020
a99f286
use newer version of R
loikein Jun 23, 2020
ad27a1e
install r packages
loikein Jun 23, 2020
f4cfdf1
use integrated r for rpy2; install r package on the fly
loikein Jun 23, 2020
0048494
fix RPY2_CFFI_MODE
loikein Jun 23, 2020
bfba26e
install r & use env variable
loikein Jun 23, 2020
e0679ba
fix range is zero error
loikein Jun 23, 2020
04b7ec6
typo
loikein Jun 23, 2020
4d4f948
debug CI: try rpy2.situation
loikein Jun 23, 2020
63ef441
use r 3.6.3
loikein Jun 23, 2020
83154d2
skip importing rpy2 for autodoc
loikein Jun 24, 2020
19b92e1
add rtd_environment
loikein Jun 24, 2020
3654117
change readthedocs.yml
loikein Jun 24, 2020
d99b9e7
test condMVN exceptions
loikein Jun 24, 2020
2fe8f4c
trigger diff
loikein Jun 24, 2020
b43827a
add test bad sigma
loikein Jun 24, 2020
989625c
use assert in instead of equal
loikein Jun 24, 2020
d5d4328
add test non-p-d sigma
loikein Jun 24, 2020
9607a2b
fix lenth error
loikein Jun 24, 2020
425398d
rewrite test_condMVN_exception
loikein Jun 24, 2020
0a53a2a
skip B101 for test files
loikein Jun 24, 2020
4fa2f35
delete .bandit
loikein Jun 24, 2020
abff465
delete test codes
loikein Jun 25, 2020
bd15a6a
adjust sampling input validation
loikein Jun 25, 2020
a07e08e
trigger diff
loikein Jun 25, 2020
b353ef2
adjust test_condMVN_exception frequency
loikein Jun 25, 2020
f28453c
moved r-base to conda
peisenha Jul 1, 2020
94b5def
no camel case
loikein Jul 1, 2020
09cbf86
move some strategies out of if
loikein Jul 1, 2020
bea4875
upgrade numpy
loikein Jul 1, 2020
eee93ec
move test cases
loikein Jul 1, 2020
205326b
fix documentation types
loikein Jul 2, 2020
195edc1
add some math
loikein Jul 2, 2020
b6eef7e
Merge pull request #37 from OpenSourceEconomics/peisenha_review
loikein Jul 2, 2020
6f0ecda
refactoring R wrapper
peisenha Jul 3, 2020
109ff09
further cleanup
peisenha Jul 3, 2020
3404720
fix pre-commit
loikein Jul 3, 2020
9dbacef
Merge pull request #39 from OpenSourceEconomics/peisenha_finalize
loikein Jul 3, 2020
3ba4880
test rhome on windows
loikein Jul 4, 2020
10edee3
add rhome to windows
loikein Jul 4, 2020
23efdbc
refresh env
loikein Jul 4, 2020
24caa26
set & setx
loikein Jul 4, 2020
25f7bfe
try https://bitbucket.org/rpy2/rpy2/issues/487/
loikein Jul 4, 2020
323a647
rollback
loikein Jul 4, 2020
e75d8f6
Merge branch 'master' into conditional-sampler
loikein Jul 4, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion .github/workflows/continuous-integration-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,26 @@ jobs:
environment-file: environment.yml
python-version: ${{ matrix.python-version }}

- name: Run test suite.
- name: Run test suite on non-Windows.
if: runner.os != 'Windows'
shell: bash -l {0}
run: |

pytest --cov=econsa --cov-report=xml

# We want to make sure proper formatting of the notebooks.
flake8-nb; [ $? -eq 0 ] || exit 1
black-nb .; [ $? -eq 0 ] || exit 1

# We want to pull the submodules.
# This can be removed once temfpy is live on conda.
git submodule update --init --recursive

- name: Run test suite on Windows.
if: runner.os == 'Windows'
shell: bash -l {0}
run: |
python -m rpy2.situation
pytest --cov=econsa --cov-report=xml

# We want to make sure proper formatting of the notebooks.
Expand Down
17 changes: 13 additions & 4 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def setup(app):
"sphinx.ext.extlinks",
"sphinx.ext.intersphinx",
"sphinx.ext.viewcode",
"numpydoc",
"sphinx.ext.napoleon",
"nbsphinx",
"sphinx.ext.mathjax",
"sphinxcontrib.bibtex",
Expand Down Expand Up @@ -93,9 +93,18 @@ def setup(app):
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = None

# Configuration for numpydoc
numpydoc_xref_param_type = True
numpydoc_xref_ignore = {"type", "optional", "default"}
# Napoleon settings
napoleon_google_docstring = True
napoleon_numpy_docstring = True
napoleon_include_init_with_doc = False
napoleon_include_private_with_doc = False
napoleon_include_special_with_doc = True
napoleon_use_admonition_for_examples = False
napoleon_use_admonition_for_notes = False
napoleon_use_admonition_for_references = False
napoleon_use_ivar = True
napoleon_use_param = True
napoleon_use_rtype = True

# -- Options for HTML output -------------------------------------------------

Expand Down
1 change: 1 addition & 0 deletions docs/source/methods/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ Here we explain and document in detail, the methods we implement in the ``econsa

qualitative-analysis
quantitative-analysis
sampling
5 changes: 5 additions & 0 deletions docs/source/methods/sampling.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Sampling methods
================

.. automodule:: econsa.sampling
:members:
89 changes: 89 additions & 0 deletions docs/source/tutorials/conditional_sampling.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Conditional sampling"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can rely on `chaospy`'s features to construct flexible multivariate distributions for input paramters using copulas. This allows us to sample from the **joint distribution** of input paramters as is required for uncertainty propagation. However, we now also want to be able to conduct a quantitative sensitivity analysis using, for example, Sobol indicies (**@timmens**) an Shapley values (**@lindamaok899**). This remains a straightforward task where the multivariate distribution is known to be Gaussian as analytical expressions exist ([Wikipedia](https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Conditional_distributions)).\n",
"\n",
"We need the capablity to construct these **conditional distributions** in a more flexible way to, for example, compute the Sobol indices and Shapley values for the EQO model where the marginal distribution of the input parameters is uniform instead of normal.\n",
"\n",
"Again, we will pursue a copula approach and use a Gaussian copula to generate the samples."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## $1^{st}$ Step: Flexible sampler for conditional Gaussian distribution"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"There appears to be no flexible Python code out there as part of a sound library that simply samples from the conditional normal distribution (please confirm). We do have some work here already done by Janos and Tim, please reach out to them and see what can be useful for us. \n",
"\n",
"However, there is an implementation available to construct the conditional mean and variance in $R$ as part of the [condMVNorm](https://cran.r-project.org/web/packages/condMVNorm/condMVNorm.pdf) package. Please construct an analogous Python implementation.\n",
"\n",
"Be sure to validate your implementation against the original implementation. You can probably use the function in Python as well by using [rpy2](https://rpy2.github.io/). Simply set up a loop that specifies numerous multivariate normal distributions, specifies a request for a conditional distribution, and then compares the output between the two implementations.\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## $2^{nd}$ Step: Gaussian copula with flexible marginals"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We need to extend our conditional sampler to allow for more flexible marginal distributions. This boils down to transforming the quantile from the marginal distribution into the corresponding value of the Gaussian. We use the special case that is equivalent to a simple multivariate normal distribution for testing."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## $3^{rd}$ Step: Mapping correlation matrix"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We need to map the correlation between the orginal variables to the implied correlation for the Gaussian copula. Here, Section 4 in this [paper](https://doi.org/10.1016/j.cpc.2011.12.020) is a good starting point."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
96 changes: 93 additions & 3 deletions econsa/sampling.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,99 @@
"""Capabilities for sampling of random input parameters.

This module contains all we need to sample random input parameters.
This module contains functions to sample random input parameters.

"""
import numpy as np


def get_sample(*args, **kwargs):
return
def cond_mvn(
mean, sigma, dependent_ind, given_ind=None, given_value=None, check_sigma=True,
):
r"""Conditional mean and variance function.

This function provides the conditional mean and variance-covariance matrix of [Y given X],
where Z = (X,Y) is the fully-joint multivariate normal distribution with mean equal to
``mean`` and covariance matrix ``sigma``.

This is a translation of the main function of R package condMVNorm_.

.. _condMVNorm: https://cran.r-project.org/package=condMVNorm


Parameters
----------
mean : array_like
The mean vector of the multivariate normal distribution.

sigma : array_like
Symmetric and positive-definite covariance matrix of
the multivariate normal distribution.

dependent_ind : array_like
The indices of dependent variables.

given_ind : array_like, optional
The indices of independent variables (default value is ``None``).
If not specified return unconditional values.

given_value : array_like, optional
The conditioning values (default value is ``None``). Should be the same length as
``given_ind``, otherwise throw an error.

check_sigma : bool, optional
Check that ``sigma`` is symmetric, and no eigenvalue is zero (default value is
``True``).

Returns
-------
cond_mean : array_like
The conditional mean of dependent variables.

cond_cov : arrray_like
The conditional covariance matrix of dependent variables.

Examples
--------
>>> mean = np.array([1, 1, 1])
>>> sigma = np.array([[4.0677098, -0.9620331, 0.9897267],
... [-0.9620331, 2.2775449, 0.7475968],
... [0.9897267, 0.7475968, 0.7336631]])
>>> dependent_ind = [0, ]
>>> given_ind = [1, 2]
>>> given_value = [1, -1]
>>> cond_mean, cond_cov = cond_mvn(mean, sigma, dependent_ind, given_ind, given_value)
>>> np.testing.assert_almost_equal(cond_mean, -4.347531, decimal=6)
>>> np.testing.assert_almost_equal(cond_cov, 0.170718, decimal=6)
"""
mean_np = np.array(mean)
sigma_np = np.array(sigma)
given_ind_np = np.array(given_ind, ndmin=1)
given_value_np = np.array(given_value, ndmin=1)

# Check `sigma` is symmetric and positive-definite:
if check_sigma:
if not np.allclose(sigma_np, sigma_np.T):
raise ValueError("sigma is not a symmetric matrix")
elif np.all(np.linalg.eigvals(sigma_np) > 0) == 0:
raise ValueError("sigma is not positive-definite")

# When `given_ind` is None, return mean and variances of dependent values:
if given_ind is None:
cond_mean = np.array(mean_np[dependent_ind])
cond_cov = np.array(sigma_np[dependent_ind, :][:, dependent_ind])
return cond_mean, cond_cov

# Make sure that `given_value` aligns with `given_len`. This includes the case that
# `given_value` is empty.
if len(given_value_np) != len(given_ind_np):
raise ValueError("lengths of given_value and given_ind must be the same")

b = sigma_np[dependent_ind, :][:, dependent_ind]
c = sigma_np[dependent_ind, :][:, given_ind]
d = sigma_np[given_ind, :][:, given_ind]
c_dinv = c @ np.linalg.inv(d)

cond_mean = mean_np[dependent_ind] + c_dinv @ (given_value - mean_np[given_ind])
cond_cov = b - c_dinv @ (c.T)

return cond_mean, cond_cov
100 changes: 97 additions & 3 deletions econsa/tests/test_sampling.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,103 @@
"""Tests for the sampling.

This module contains all tests for th sampling setup.
This module contains all tests for the sampling setup.

"""
import numpy as np
import pytest
from numpy.random import default_rng

from econsa.sampling import cond_mvn
from econsa.tests.wrapper_r import r_cond_mvn

def test_sampling():
pass
rng = default_rng()


def get_strategies(name):
n = rng.integers(low=4, high=21)
mean = rng.integers(low=-2, high=2, size=n)
dependent_n = rng.integers(low=1, high=n - 2)
dependent = rng.choice(range(0, n), replace=False, size=dependent_n)

if name == "cond_mvn":
sigma = rng.standard_normal(size=(n, n))
sigma = sigma @ sigma.T
given_ind = [x for x in range(0, n) if x not in dependent]
given_value = rng.integers(low=-2, high=2, size=len(given_ind))
elif name == "cond_mvn_exception_given":
sigma = rng.standard_normal(size=(n, n))
sigma = sigma @ sigma.T
given_ind = (
[x for x in range(0, n) if x not in dependent] if n % 3 == 0 else None
)
given_value = (
rng.integers(low=-2, high=2, size=n - dependent_n + 1)
if n % 2 == 0
else None
)
elif name == "test_cond_mvn_exception_sigma":
sigma = (
rng.standard_normal(size=(n, n)) if n % 3 == 0 else np.diagflat([-1] * n)
)
sigma = sigma @ sigma.T if n % 2 == 0 else sigma
given_ind = [x for x in range(0, n) if x not in dependent]
given_value = rng.integers(low=-2, high=2, size=len(given_ind))
else:
raise NotImplementedError

strategy = (mean, sigma, dependent, given_ind, given_value)

return strategy


def test_cond_mvn():
"""Test cond_mvn against the original from R package cond_mvnorm.
"""
request = get_strategies("cond_mvn")

r_cond_mean, r_cond_cov = r_cond_mvn(*request)
cond_mean, cond_cov = cond_mvn(*request)

np.testing.assert_allclose(cond_mean, r_cond_mean)
np.testing.assert_allclose(cond_cov, r_cond_cov)


def test_cond_mvn_exception_given():
"""Test cond_mvn raises exceptions when invalid `given_ind` or `given_value` is passed.
"""
mean, sigma, dependent_ind, given_ind, given_value = get_strategies(
"cond_mvn_exception_given",
)

n = sigma.shape[0]
if n % 3 != 0:
# Valid case: only `given_ind` is empty or both `given_ind` and `given_value` are empty
cond_mvn(mean, sigma, dependent_ind, given_ind, given_value)
else:
# `given_value` is empty or does not align with `given_ind`
with pytest.raises(ValueError) as e:
cond_mvn(mean, sigma, dependent_ind, given_ind, given_value)
assert "lengths of given_value and given_ind must be the same" in str(e.value)


def test_cond_mvn_exception_sigma():
"""Test cond_mvn raises exceptions when invalid `sigma` is passed.
"""
mean, sigma, dependent_ind, given_ind, given_value = get_strategies(
"test_cond_mvn_exception_sigma",
)

n = sigma.shape[0]

if n % 3 != 0 and n % 2 != 0:
# `sigma` is negative definite matrix
with pytest.raises(ValueError) as e:
cond_mvn(mean, sigma, dependent_ind, given_ind, given_value)
assert "sigma is not positive-definite" in str(e.value)
elif n % 2 != 0:
# `sigma` is not symmetric
with pytest.raises(ValueError) as e:
cond_mvn(mean, sigma, dependent_ind, given_ind, given_value)
assert "sigma is not a symmetric matrix" in str(e.value)
else:
cond_mvn(mean, sigma, dependent_ind, given_ind, given_value)
Loading