Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Adding support for adaptation of a dense mass matrix based on sample covariances #3596

Merged
merged 12 commits into from
Dec 4, 2019
Merged
161 changes: 152 additions & 9 deletions pymc3/step_methods/hmc/quadpotential.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@


__all__ = ['quad_potential', 'QuadPotentialDiag', 'QuadPotentialFull',
'QuadPotentialFullInv', 'QuadPotentialDiagAdapt', 'isquadpotential']
'QuadPotentialFullInv', 'QuadPotentialDiagAdapt',
'QuadPotentialFullAdapt', 'isquadpotential']


def quad_potential(C, is_cov):
Expand Down Expand Up @@ -416,7 +417,7 @@ def velocity_energy(self, x, v_out):
class QuadPotentialFull(QuadPotential):
"""Basic QuadPotential object for Hamiltonian calculations."""

def __init__(self, A, dtype=None):
def __init__(self, cov, dtype=None):
"""Compute the lower cholesky decomposition of the potential.

Parameters
Expand All @@ -427,32 +428,174 @@ def __init__(self, A, dtype=None):
if dtype is None:
dtype = theano.config.floatX
self.dtype = dtype
self.A = A.astype(self.dtype)
self.L = scipy.linalg.cholesky(A, lower=True)
self._cov = np.array(cov, dtype=self.dtype, copy=True)
self._chol = scipy.linalg.cholesky(self._cov, lower=True)
self._n = len(self._cov)

def velocity(self, x, out=None):
"""Compute the current velocity at a position in parameter space."""
return np.dot(self.A, x, out=out)
return np.dot(self._cov, x, out=out)

def random(self):
"""Draw random value from QuadPotential."""
n = floatX(normal(size=self.L.shape[0]))
return scipy.linalg.solve_triangular(self.L.T, n)
vals = np.random.normal(size=self._n).astype(self.dtype)
return scipy.linalg.solve_triangular(self._chol.T, vals,
overwrite_b=True)

def energy(self, x, velocity=None):
"""Compute kinetic energy at a position in parameter space."""
if velocity is None:
velocity = self.velocity(x)
return .5 * x.dot(velocity)
return 0.5 * np.dot(x, velocity)

def velocity_energy(self, x, v_out):
"""Compute velocity and return kinetic energy at a position in parameter space."""
self.velocity(x, out=v_out)
return 0.5 * np.dot(x, v_out)
return self.energy(x, v_out)

__call__ = random


class QuadPotentialFullAdapt(QuadPotentialFull):
"""Adapt a dense mass matrix using the sample covariances

If the parameter ``doubling`` is true, the adaptation window is doubled
every time it is passed. This can lead to better convergence of the mass
matrix estimation.

"""
def __init__(
self,
n,
initial_mean,
initial_cov=None,
initial_weight=0,
adaptation_window=101,
doubling=True,
dtype=None,
):
if initial_cov is not None and initial_cov.ndim != 2:
raise ValueError("Initial covariance must be two-dimensional.")
if initial_mean.ndim != 1:
raise ValueError("Initial mean must be one-dimensional.")
if initial_cov is not None and initial_cov.shape != (n, n):
raise ValueError(
"Wrong shape for initial_cov: expected %s got %s"
% (n, initial_cov.shape)
)
if len(initial_mean) != n:
raise ValueError(
"Wrong shape for initial_mean: expected %s got %s"
% (n, len(initial_mean))
)

if dtype is None:
dtype = theano.config.floatX

if initial_cov is None:
initial_cov = np.eye(n, dtype=dtype)
initial_weight = 1

self.dtype = dtype
self._n = n
self._cov = np.array(initial_cov, dtype=self.dtype, copy=True)
self._cov_theano = theano.shared(self._cov)
eigenfoo marked this conversation as resolved.
Show resolved Hide resolved
self._chol = scipy.linalg.cholesky(self._cov, lower=True)
self._chol_error = None
self._foreground_cov = _WeightedCovariance(
self._n, initial_mean, initial_cov, initial_weight, self.dtype
)
self._background_cov = _WeightedCovariance(self._n, dtype=self.dtype)
self._n_samples = 0

self._doubling = doubling
self._adaptation_window = int(adaptation_window)
self._previous_update = 0

def _update_from_weightvar(self, weightvar):
weightvar.current_covariance(out=self._cov)
try:
self._chol = scipy.linalg.cholesky(self._cov, lower=True)
except scipy.linalg.LinAlgError as error:
self._chol_error = error
self._cov_theano.set_value(self._cov)

def update(self, sample, grad, tune):
if not tune:
return

self._foreground_cov.add_sample(sample, weight=1)
self._background_cov.add_sample(sample, weight=1)
self._update_from_weightvar(self._foreground_cov)

# Steps since previous update
delta = self._n_samples - self._previous_update
if delta >= self._adaptation_window:
self._foreground_cov = self._background_cov
self._background_cov = _WeightedCovariance(
self._n, dtype=self.dtype
)

self._previous_update = self._n_samples
if self._doubling:
self._adaptation_window *= 2
eigenfoo marked this conversation as resolved.
Show resolved Hide resolved

self._n_samples += 1

def raise_ok(self, vmap):
if self._chol_error is not None:
raise ValueError("{0}".format(self._chol_error))


class _WeightedCovariance:
"""Online algorithm for computing mean and covariance."""
eigenfoo marked this conversation as resolved.
Show resolved Hide resolved

def __init__(
self,
nelem,
initial_mean=None,
initial_covariance=None,
initial_weight=0,
dtype="d",
):
self._dtype = dtype
self.n_samples = float(initial_weight)
if initial_mean is None:
self.mean = np.zeros(nelem, dtype="d")
else:
self.mean = np.array(initial_mean, dtype="d", copy=True)
if initial_covariance is None:
self.raw_cov = np.eye(nelem, dtype="d")
else:
self.raw_cov = np.array(initial_covariance, dtype="d", copy=True)

self.raw_cov[:] *= self.n_samples

if self.raw_cov.shape != (nelem, nelem):
raise ValueError("Invalid shape for initial covariance.")
if self.mean.shape != (nelem,):
raise ValueError("Invalid shape for initial mean.")

def add_sample(self, x, weight):
x = np.asarray(x)
self.n_samples += 1
old_diff = x - self.mean
self.mean[:] += old_diff / self.n_samples
new_diff = x - self.mean
self.raw_cov[:] += weight * new_diff[:, None] * old_diff[None, :]

def current_covariance(self, out=None):
if self.n_samples == 0:
raise ValueError("Can not compute covariance without samples.")
if out is not None:
return np.divide(self.raw_cov, self.n_samples - 1, out=out)
else:
return (self.raw_cov / (self.n_samples - 1)).astype(self._dtype)

def current_mean(self):
return np.array(self.mean, dtype=self._dtype)


try:
import sksparse.cholmod as cholmod
chol_available = True
Expand Down
61 changes: 61 additions & 0 deletions pymc3/tests/test_quadpotential.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,64 @@ def energy(self, x, velocity=None):
step = pymc3.NUTS(potential=pot)
pymc3.sample(10, init=None, step=step, chains=1)
assert called


def test_weighted_covariance(ndim=10, seed=5432):
np.random.seed(seed)

L = np.random.randn(ndim, ndim)
L[np.triu_indices_from(L, 1)] = 0.0
L[np.diag_indices_from(L)] = np.exp(L[np.diag_indices_from(L)])
cov = np.dot(L, L.T)
mean = np.random.randn(ndim)

samples = np.random.multivariate_normal(mean, cov, size=100)
mu_est0 = np.mean(samples, axis=0)
cov_est0 = np.cov(samples, rowvar=0)

est = quadpotential._WeightedCovariance(ndim)
for sample in samples:
est.add_sample(sample, 1)
mu_est = est.current_mean()
cov_est = est.current_covariance()

assert np.allclose(mu_est, mu_est0)
assert np.allclose(cov_est, cov_est0)

# Make sure that the weighted estimate also works
est2 = quadpotential._WeightedCovariance(
ndim,
np.mean(samples[:10], axis=0),
np.cov(samples[:10], rowvar=0, bias=True),
10,
)
for sample in samples[10:]:
est2.add_sample(sample, 1)
mu_est2 = est2.current_mean()
cov_est2 = est2.current_covariance()

assert np.allclose(mu_est2, mu_est0)
assert np.allclose(cov_est2, cov_est0)


def test_full_adapt_sample_p(seed=4566):
# ref: https://github.com/stan-dev/stan/pull/2672
np.random.seed(seed)
m = np.array([[3.0, -2.0], [-2.0, 4.0]])
m_inv = np.linalg.inv(m)

var = np.array(
[
[2 * m[0, 0], m[1, 0] * m[1, 0] + m[1, 1] * m[0, 0]],
[m[0, 1] * m[0, 1] + m[1, 1] * m[0, 0], 2 * m[1, 1]],
]
)

n_samples = 1000
pot = quadpotential.QuadPotentialFullAdapt(2, np.zeros(2), m_inv, 1)
samples = [pot.random() for n in range(n_samples)]
sample_cov = np.cov(samples, rowvar=0)

# Covariance matrix within 5 sigma of expected value
# (comes from a Wishart distribution)
assert np.all(np.abs(m - sample_cov) < 5 * np.sqrt(var / n_samples))