Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pymc3/step_methods/hmc/base_hmc.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False,
if isinstance(scaling, dict):
scaling = guess_scaling(Point(scaling, model=model), model=model, vars=vars)

n = scaling.shape[0]
n = model.dict_to_array(model.test_point).shape[0]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This indeed looks like a bug!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you submit this separately?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a better way to get at the number of dimensions? This seems a bit like a hack to me...

Copy link
Member

@twiecki twiecki Feb 3, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, I don't think there is though. Perhaps we should push this into Model as a property ndim.

self.step_size = step_scale / (n ** 0.25)
self.potential = quad_potential(scaling, is_cov, as_cov=False)

Expand Down
155 changes: 153 additions & 2 deletions pymc3/step_methods/hmc/quadpotential.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
from numpy import dot
from numpy.random import normal
import scipy.linalg
from scipy import linalg
from theano.tensor import slinalg
import theano.tensor as tt
import theano
from scipy.sparse import issparse
import itertools

from pymc3.theanof import floatX

import numpy as np

__all__ = ['quad_potential', 'ElemWiseQuadPotential', 'QuadPotential',
'QuadPotential_Inv', 'isquadpotential']
'QuadPotential_Inv', 'isquadpotential', 'QuadPotentialSample']


def quad_potential(C, is_cov, as_cov):
Expand All @@ -18,7 +22,8 @@ def quad_potential(C, is_cov, as_cov):
----------
C : arraylike, 0 <= ndim <= 2
scaling matrix for the potential
vector treated as diagonal matrix
vector treated as diagonal matrix.
If C is already a potential, it is returned unchanged.
is_cov : Boolean
whether C is provided as a covariance matrix or hessian
as_cov : Boolean
Expand All @@ -29,6 +34,8 @@ def quad_potential(C, is_cov, as_cov):
-------
q : Quadpotential
"""
if isquadpotential(C):
return C
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is also a good feature regardless of the stuff below (for the separate PR).


if issparse(C):
if not chol_available:
Expand Down Expand Up @@ -135,6 +142,150 @@ def energy(self, x):

__call__ = random


def _solve_lanczos(V, alphas, betas, estimator=None):
e = np.zeros(len(alphas))
e[0] = 1
T = [alphas, betas]
# TODO Is there a way to avoid computing all eigenvectors?
eig, eigv = linalg.eig_banded(T, lower=True)
if estimator is None:
if np.any(eig < 0):
raise ValueError("Matrix is not positive definite")
eig = 1 / np.sqrt(eig)
else:
eig = estimator(eig)
f_T = eigv @ (eig * np.linalg.solve(eigv, e))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we currently still support python 2.7.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ups, that happens when you develop in the notebook...

return np.dot(np.transpose(V), f_T)


def _sample_mvn_lanczos(matprot, z, epsilon, maxiter=None,
verbose=False, estimator=None):
"""Sample from a multivariate normal using matrix-free lanczos.

Given a matrix :math:`A`, sample from :math:`N(0, A^{-1})`, using
only matrix vector products :math:`Ax`. This is done by approximating
:math:`A^{-1/2}z` for a :math:`z \sim N(0, I)` using a Lanczos process
as described in [1].

Parameters
----------
matprot : function(x) -> Ax
A function that computes the matrix-vector product :math:`Ax`.
z : ndarray
A standard normal distributed vector.
epsilon : float
Stop iteration, if the relative change of the estimate is below
`epsilon`.
maxiter : int, optional
Maximum number of iterations.
estimator : function, optional
A function that maps the eigenvalues of the krylov subspace to
what is supposed to be estimated. By default this is
`lambda eig: 1 / np.sqrt(eig)`. If you want to sample from
:math:`N(0, A)` instead of from :math:`N(0, A^{-1/2})`, you can
set this to `lambda eig: np.sqrt(eig)`.

References
----------
[1] Chow, E., and Y. Saad. “Preconditioned Krylov Subspace Methods for
Sampling Multivariate Gaussian Distributions.” SIAM Journal on
Scientific Computing 36, no. 2 (January 1, 2014): A588–608.
doi:10.1137/130920587.
"""
n = len(z)
z_norm = np.linalg.norm(z)
z = z / z_norm
q = z.copy()
beta = 1

alphas = []
betas = []
V = []

for n in itertools.count():
if maxiter is not None and n > maxiter:
raise ArithmeticError("Maximum number of iterations reached.")
v = q / beta
if n > 0:
q = matprot(v) - beta * V[-1]
else:
q = matprot(v)

alpha = v @ q
q = q - alpha * v
beta = np.linalg.norm(q)

alphas.append(alpha)
betas.append(beta)
V.append(v)

y_new = _solve_lanczos(V, alphas, betas, estimator)

if beta == 0:
return z_norm * y_new

if n > 0:
error = np.linalg.norm(y_new - y_old) / np.linalg.norm(y_new)
if np.isnan(error):
raise ArithmeticError("Current error is nan.")
if verbose and n % 100 == 0:
print(error)
if error < epsilon:
return z_norm * y_new
y_old = y_new


class QuadPotentialSample:
def __init__(self, samples, lam=0.01, epsilon=1e-8, maxiter=None):
"""Use posterior samples to estimate the covariance.

Parameters
----------
samples : ndarray
An array of posterior samples with shape `(s, n)`, where
`s` is the number of samples and `n` the number of parameters.
lam : float
Regularization parameter. Use :math:`(1 - \lambda)\Sigma +
\lambda D` as covariance, where `\Sigma` is the sample covariance
and D is the diagonal matrix containing the sample variances.
epsilon : float, optional
Error tolerance used in the convergence criterion of the
lanczos process.
maxiter : int, optional
The maximum number of iterations in the lanczos process.
"""
self.k, self.n = samples.shape
self.lam = lam
self.samples = samples.astype(np.float64).copy()
self.samples[:] -= self.samples.mean(axis=0)
self.stds = self.samples.std(axis=0)
self.vars = self.stds ** 2
self.epsilon = epsilon
self.maxiter = maxiter

def velocity(self, x):
samples = theano.shared(floatX(np.copy(self.samples, 'C')))
D = theano.shared(floatX(self.vars))
Ax = tt.dot(samples, x)
Ax = tt.dot(samples.T, Ax) / (self.k - 1)
return floatX(1 - self.lam) * Ax + floatX(self.lam) * D * x

def matvec(self, x):
Ax = self.samples.T.dot(self.samples.dot(x)) / (self.k - 1)
return (1 - self.lam) * Ax + self.lam * x * self.vars

def energy(self, x):
return floatX(0.5) * x.dot(self.velocity(x))

def random(self):
b = normal(size=self.n)
# Use the diagonal matrix as preconditioner
matvec = lambda x: self.matvec(x / self.stds) / self.stds
sample = _sample_mvn_lanczos(matvec, b, self.epsilon, self.maxiter)
return floatX(sample / self.stds)


try:
import sksparse.cholmod as cholmod
chol_available = True
Expand Down
67 changes: 50 additions & 17 deletions pymc3/tests/test_quadpotential.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import scipy.sparse
import theano.tensor as tt
import theano
import numpy.testing as npt

from pymc3.step_methods.hmc import quadpotential

Expand Down Expand Up @@ -64,23 +65,35 @@ def test_equal_diag():
x_ = np.random.randn(5)
x = tt.vector()
x.tag.test_value = x_

samples = np.random.randn(100000, 5) * np.sqrt(diag)

pots = [
quadpotential.quad_potential(diag, False, False),
quadpotential.quad_potential(1. / diag, True, False),
quadpotential.quad_potential(np.diag(diag), False, False),
quadpotential.quad_potential(np.diag(1. / diag), True, False),
quadpotential.quad_potential(diag, True, False),
quadpotential.quad_potential(1. / diag, False, False),
quadpotential.quad_potential(np.diag(diag), True, False),
quadpotential.quad_potential(np.diag(1. / diag), False, False),
]
if quadpotential.chol_available:
diag_ = scipy.sparse.csc_matrix(np.diag(1. / diag))
diag_ = scipy.sparse.csc_matrix(np.diag(diag))
pots.append(quadpotential.quad_potential(diag_, True, False))

v = np.diag(1. / diag).dot(x_)
e = x_.dot(np.diag(1. / diag).dot(x_)) / 2
for pot in pots:
pots.append(quadpotential.QuadPotentialSample(samples, 0))

v = np.diag(diag).dot(x_)
e = x_.dot(np.diag(diag).dot(x_)) / 2
for pot in pots[:-1]:
v_function = theano.function([x], pot.velocity(x))
e_function = theano.function([x], pot.energy(x))
npt.assert_allclose(v_function(x_), v)
npt.assert_allclose(e_function(x_), e)

# QuadPotentialSample is less accurate
for pot in pots[-1:]:
v_function = theano.function([x], pot.velocity(x))
e_function = theano.function([x], pot.energy(x))
assert np.allclose(v_function(x_), v)
assert np.allclose(e_function(x_), e)
npt.assert_allclose(v_function(x_), v, rtol=0.2)
npt.assert_allclose(e_function(x_), e, rtol=0.2)


def test_equal_dense():
Expand All @@ -94,35 +107,51 @@ def test_equal_dense():
x_ = np.random.randn(5)
x = tt.vector()
x.tag.test_value = x_

samples = np.random.multivariate_normal(np.zeros_like(x_), cov, 1000)

pots = [
quadpotential.quad_potential(cov, False, False),
quadpotential.quad_potential(inv, True, False),
quadpotential.quad_potential(cov, True, False),
quadpotential.quad_potential(inv, False, False),
]
if quadpotential.chol_available:
pots.append(quadpotential.quad_potential(cov, False, False))
pots.append(quadpotential.quad_potential(cov, True, False))

pots.append(quadpotential.QuadPotentialSample(samples, 0))

v = np.linalg.solve(cov, x_)
v = np.dot(cov, x_)
e = 0.5 * x_.dot(v)
for pot in pots:
for pot in pots[:-1]:
v_function = theano.function([x], pot.velocity(x))
e_function = theano.function([x], pot.energy(x))
assert np.allclose(v_function(x_), v)
assert np.allclose(e_function(x_), e)
npt.assert_allclose(v_function(x_), v)
npt.assert_allclose(e_function(x_), e)

for pot in pots[:-1]:
v_function = theano.function([x], pot.velocity(x))
e_function = theano.function([x], pot.energy(x))
npt.assert_allclose(v_function(x_), v, rtol=0.5)
npt.assert_allclose(e_function(x_), e, rtol=0.5)


def test_random_diag():
d = np.arange(10) + 1
np.random.seed(42)

samples = np.random.randn(1000, 10) * np.sqrt(d)

pots = [
quadpotential.quad_potential(d, True, False),
quadpotential.quad_potential(1./d, False, False),
quadpotential.quad_potential(np.diag(d), True, False),
quadpotential.quad_potential(np.diag(1./d), False, False),
quadpotential.QuadPotentialSample(samples, 0),
]
if quadpotential.chol_available:
d_ = scipy.sparse.csc_matrix(np.diag(d))
pot = quadpotential.quad_potential(d, True, False)
pots.append(pot)

for pot in pots:
vals = np.array([pot.random() for _ in range(1000)])
assert np.allclose(vals.std(0), np.sqrt(1./d), atol=0.1)
Expand All @@ -137,13 +166,17 @@ def test_random_dense():
inv = np.linalg.inv(cov)
assert np.allclose(inv.dot(cov), np.eye(5))

samples = np.random.multivariate_normal(np.zeros(5), cov, 1000)

pots = [
quadpotential.QuadPotential(cov),
quadpotential.QuadPotential_Inv(inv),
quadpotential.QuadPotentialSample(samples, 0),
]
if quadpotential.chol_available:
pot = quadpotential.QuadPotential_Sparse(scipy.sparse.csc_matrix(cov))
pots.append(pot)

for pot in pots:
cov_ = np.cov(np.array([pot.random() for _ in range(1000)]).T)
assert np.allclose(cov_, inv, atol=0.1)