Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CAR random variables #4596

Merged
merged 31 commits into from
Jul 27, 2021
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
6ceccd0
add CAR random variable and change CAR distribution
aerubanov Mar 31, 2021
8e1a751
fix formating
aerubanov Mar 31, 2021
52e8608
fix ndims_params
aerubanov Mar 31, 2021
6eceea7
fix formating
aerubanov Apr 1, 2021
bc08f93
add make_node
aerubanov Apr 2, 2021
887ef7e
move convertion to at in make_node
aerubanov Apr 5, 2021
0eb57a0
add size support in rng_fn
aerubanov Apr 5, 2021
c7a4b80
change make node
aerubanov Apr 6, 2021
a485d1b
fix typo
aerubanov Apr 6, 2021
7807b44
Add CAR RandomVariable Op and refactor CAR distribution
aerubanov Mar 31, 2021
71c8d72
add 3d test for CAR logpt
aerubanov Apr 9, 2021
999b0aa
add test for CARRV rng_fn()
aerubanov Apr 13, 2021
88ce616
change test_car_rng_fn
aerubanov Apr 19, 2021
251057e
change seed setting and rebase
aerubanov May 9, 2021
9ae5c49
remove numpy usage from dist method
aerubanov May 11, 2021
f7d91a8
move symmetry check to make_node
aerubanov May 11, 2021
e2a551d
remove sparce argument
aerubanov May 12, 2021
8106990
fix logp
aerubanov May 12, 2021
6b1f143
add tests for sparse W
aerubanov May 14, 2021
26ee24d
add _infer_shape()
aerubanov Jun 15, 2021
885446b
walkaround for sparse .sum()
aerubanov Jun 16, 2021
b09faf3
move test_car_rng_fn()
aerubanov Jun 24, 2021
da22cb7
Update pymc3/distributions/multivariate.py
aerubanov Jul 12, 2021
9572625
Update pymc3/tests/test_distributions.py
aerubanov Jul 12, 2021
10f7fb1
add floatx conversion
aerubanov Jul 12, 2021
c52047e
remove hack for .sum()
aerubanov Jul 12, 2021
6668ad2
fix formating
aerubanov Jul 13, 2021
0ccd54d
add tolerance setting based on float type
aerubanov Jul 13, 2021
35c4a43
add W symmetry check
aerubanov Jul 14, 2021
5ddd1d5
add aesara assert
aerubanov Jul 14, 2021
c89dad2
add test for ndim check
aerubanov Jul 26, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 98 additions & 57 deletions pymc3/distributions/multivariate.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

from aesara.graph.basic import Apply
from aesara.graph.op import Op
from aesara.sparse.basic import sp_sum
from aesara.tensor import gammaln, sigmoid
from aesara.tensor.nlinalg import det, eigh, matrix_inverse, trace
from aesara.tensor.random.basic import MultinomialRV, dirichlet, multivariate_normal
Expand Down Expand Up @@ -397,7 +398,6 @@ def __new__(cls, name, *args, **kwargs):

@classmethod
def dist(cls, a, **kwargs):

a = at.as_tensor_variable(a)
# mean = a / at.sum(a)
# mode = at.switch(at.all(a > 1), (a - 1) / at.sum(a - 1), np.nan)
Expand Down Expand Up @@ -491,7 +491,6 @@ class Multinomial(Discrete):

@classmethod
def dist(cls, n, p, *args, **kwargs):

p = p / at.sum(p, axis=-1, keepdims=True)
n = at.as_tensor_variable(n)
p = at.as_tensor_variable(p)
Expand Down Expand Up @@ -1925,6 +1924,73 @@ def _distr_parameters_for_repr(self):
return ["mu"]


class CARRV(RandomVariable):
name = "car"
ndim_supp = 1
ndims_params = [1, 2, 0, 0]
dtype = "floatX"
_print_name = ("CAR", "\\operatorname{CAR}")

def make_node(self, rng, size, dtype, mu, W, alpha, tau):
mu = at.as_tensor_variable(floatX(mu))

W = aesara.sparse.as_sparse_or_tensor_variable(floatX(W))
if not W.ndim == 2:
ricardoV94 marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError("W must be a symmetric adjacency matrix.")

tau = at.as_tensor_variable(floatX(tau))
alpha = at.as_tensor_variable(floatX(alpha))

return super().make_node(rng, size, dtype, mu, W, alpha, tau)

def _infer_shape(self, size, dist_params, param_shapes=None):
shape = tuple(size) + tuple(dist_params[0].shape)
return shape

@classmethod
def rng_fn(cls, rng: np.random.RandomState, mu, W, alpha, tau, size):
"""
Implementation of algorithm from paper
Havard Rue, 2001. "Fast sampling of Gaussian Markov random fields,"
Journal of the Royal Statistical Society Series B, Royal Statistical Society,
vol. 63(2), pages 325-338. DOI: 10.1111/1467-9868.00288
"""
if not scipy.sparse.issparse(W):
W = scipy.sparse.csr_matrix(W)
s = np.asarray(W.sum(axis=0))[0]
D = scipy.sparse.diags(s)
tau = scipy.sparse.csr_matrix(tau)
alpha = scipy.sparse.csr_matrix(alpha)

Q = tau.multiply(D - alpha.multiply(W))

perm_array = scipy.sparse.csgraph.reverse_cuthill_mckee(Q, symmetric_mode=True)
inv_perm = np.argsort(perm_array)

Q = Q[perm_array, :][:, perm_array]

Qb = Q.diagonal()
u = 1
while np.count_nonzero(Q.diagonal(u)) > 0:
Qb = np.vstack((np.pad(Q.diagonal(u), (u, 0), constant_values=(0, 0)), Qb))
u += 1

L = scipy.linalg.cholesky_banded(Qb, lower=False)

size = tuple(size or ())
if size:
mu = np.broadcast_to(mu, size + mu.shape)
z = rng.normal(size=mu.shape)
samples = np.empty(z.shape)
for idx in np.ndindex(mu.shape[:-1]):
samples[idx] = scipy.linalg.cho_solve_banded((L, False), z[idx]) + mu[idx][perm_array]
samples = samples[..., inv_perm]
return samples


car = CARRV()


class CAR(Continuous):
r"""
Likelihood for a conditional autoregression. This is a special case of the
Expand Down Expand Up @@ -1966,45 +2032,13 @@ class CAR(Continuous):
"Generalized Hierarchical Multivariate CAR Models for Areal Data"
Biometrics, Vol. 61, No. 4 (Dec., 2005), pp. 950-961
"""
rv_op = car

def __init__(self, mu, W, alpha, tau, sparse=False, *args, **kwargs):
super().__init__(*args, **kwargs)

D = W.sum(axis=0)
d, _ = W.shape

self.d = d
self.median = self.mode = self.mean = self.mu = at.as_tensor_variable(mu)
self.sparse = sparse

if not W.ndim == 2 or not np.allclose(W, W.T):
raise ValueError("W must be a symmetric adjacency matrix.")

if sparse:
W_sparse = scipy.sparse.csr_matrix(W)
self.W = aesara.sparse.as_sparse_variable(W_sparse)
else:
self.W = at.as_tensor_variable(W)

# eigenvalues of D^−1/2 * W * D^−1/2
Dinv_sqrt = np.diag(1 / np.sqrt(D))
DWD = np.matmul(np.matmul(Dinv_sqrt, W), Dinv_sqrt)
self.lam = scipy.linalg.eigvalsh(DWD)
self.D = at.as_tensor_variable(D)

tau = at.as_tensor_variable(tau)
if tau.ndim > 0:
self.tau = tau[:, None]
else:
self.tau = tau

alpha = at.as_tensor_variable(alpha)
if alpha.ndim > 0:
self.alpha = alpha[:, None]
else:
self.alpha = alpha
@classmethod
def dist(cls, mu, W, alpha, tau, *args, **kwargs):
return super().dist([mu, W, alpha, tau], **kwargs)

def logp(self, value):
def logp(value, mu, W, alpha, tau):
"""
Calculate log-probability of a CAR-distributed vector
at specified value. This log probability function differs from
Expand All @@ -2021,30 +2055,37 @@ def logp(self, value):
TensorVariable
"""

sparse = isinstance(W, aesara.sparse.SparseVariable)

if sparse:
D = sp_sum(W, axis=0)
Dinv_sqrt = at.diag(1 / at.sqrt(D))
DWD = at.dot(aesara.sparse.dot(Dinv_sqrt, W), Dinv_sqrt)
else:
D = W.sum(axis=0)
Dinv_sqrt = at.diag(1 / at.sqrt(D))
DWD = at.dot(at.dot(Dinv_sqrt, W), Dinv_sqrt)
lam = at.slinalg.eigvalsh(DWD, at.eye(DWD.shape[0]))

d, _ = W.shape

if value.ndim == 1:
value = value[None, :]

logtau = self.d * at.log(self.tau).sum()
logdet = at.log(1 - self.alpha.T * self.lam[:, None]).sum()
delta = value - self.mu
logtau = d * at.log(tau).sum()
logdet = at.log(1 - alpha.T * lam[:, None]).sum()
delta = value - mu

if self.sparse:
Wdelta = aesara.sparse.dot(delta, self.W)
if sparse:
Wdelta = aesara.sparse.dot(delta, W)
else:
Wdelta = at.dot(delta, self.W)
Wdelta = at.dot(delta, W)

tau_dot_delta = self.D[None, :] * delta - self.alpha * Wdelta
logquad = (self.tau * delta * tau_dot_delta).sum(axis=-1)
tau_dot_delta = D[None, :] * delta - alpha * Wdelta
logquad = (tau * delta * tau_dot_delta).sum(axis=-1)
return bound(
0.5 * (logtau + logdet - logquad),
self.alpha >= -1,
self.alpha <= 1,
self.tau > 0,
broadcast_conditions=False,
at.all(alpha <= 1),
at.all(alpha >= -1),
tau > 0,
)

def random(self, point=None, size=None):
raise NotImplementedError("Sampling from a CAR distribution is not supported.")

def _distr_parameters_for_repr(self):
return ["mu", "W", "alpha", "tau"]
11 changes: 6 additions & 5 deletions pymc3/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@

import pymc3 as pm

# @pytest.fixture(scope="function", autouse=True)
# def aesara_config():
# config = aesara.config.change_flags(compute_test_value="raise")
# with config:
# yield

@pytest.fixture(scope="function", autouse=True)
def aesara_config():
config = aesara.config.change_flags(on_opt_error="raise")
with config:
yield


@pytest.fixture(scope="function", autouse=True)
Expand Down
27 changes: 19 additions & 8 deletions pymc3/tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3032,41 +3032,52 @@ def test_ordered_probit_probs():
assert isinstance(x, TensorVariable)


@pytest.mark.xfail(reason="Distribution not refactored yet")
@pytest.mark.parametrize("size", [(1,), (4,)], ids=str)
def test_car_logp(size):
@pytest.mark.parametrize(
"sparse, size",
[(False, ()), (False, (1,)), (False, (4,)), (False, (4, 4, 4)), (True, ()), (True, (4,))],
ids=str,
)
def test_car_logp(sparse, size):
"""
Tests the log probability function for the CAR distribution by checking
against Scipy's multivariate normal logpdf, up to an additive constant.
The formula used by the CAR logp implementation omits several additive terms.
"""
np.random.seed(1)

xs = np.random.randn(*size)

# d x d adjacency matrix for a square (d=4) of rook-adjacent sites
W = np.array(
[[0.0, 1.0, 1.0, 0.0], [1.0, 0.0, 0.0, 1.0], [1.0, 0.0, 0.0, 1.0], [0.0, 1.0, 1.0, 0.0]]
)

tau = 2.0
tau = 2
alpha = 0.5
mu = np.zeros(4)

xs = np.random.randn(*(size + mu.shape))

# Compute CAR covariance matrix and resulting MVN logp
D = W.sum(axis=0)
prec = tau * (np.diag(D) - alpha * W)
cov = np.linalg.inv(prec)
scipy_logp = scipy.stats.multivariate_normal.logpdf(xs, mu, cov)

car_logp = logp(CAR.dist(mu, W, alpha, tau, size=size), xs).eval()
W = aesara.tensor.as_tensor_variable(W)
if sparse:
W = aesara.sparse.csr_from_dense(W)

car_dist = CAR.dist(mu, W, alpha, tau, size=size)
car_logp = logp(car_dist, xs).eval()

# Check to make sure that the CAR and MVN log PDFs are equivalent
# up to an additive constant which is independent of the CAR parameters
delta_logp = scipy_logp - car_logp

# Check to make sure all the delta values are identical.
assert np.allclose(delta_logp - delta_logp[0], 0.0)
tol = 1e-08
if aesara.config.floatX == "float32":
tol = 1e-5
assert np.allclose(delta_logp - delta_logp[0], 0.0, atol=tol)


class TestBugfixes:
Expand Down
40 changes: 40 additions & 0 deletions pymc3/tests/test_distributions_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -2353,3 +2353,43 @@ def test_with_cov_rv(self, sample_shape, dist_shape, mu_shape):
prior = pm.sample_prior_predictive(samples=sample_shape)

assert prior["mv"].shape == to_tuple(sample_shape) + dist_shape


@pytest.mark.parametrize("sparse", [True, False])
def test_car_rng_fn(sparse):
delta = 0.05 # limit for KS p-value
n_fails = 20 # Allows the KS fails a certain number of times
size = (100,)

W = np.array(
[[0.0, 1.0, 1.0, 0.0], [1.0, 0.0, 0.0, 1.0], [1.0, 0.0, 0.0, 1.0], [0.0, 1.0, 1.0, 0.0]]
)

tau = 2
alpha = 0.5
mu = np.array([1, 1, 1, 1])

D = W.sum(axis=0)
prec = tau * (np.diag(D) - alpha * W)
cov = np.linalg.inv(prec)
W = aesara.tensor.as_tensor_variable(W)
if sparse:
W = aesara.sparse.csr_from_dense(W)

with pm.Model(rng_seeder=1):
car = pm.CAR("car", mu, W, alpha, tau, size=size)
mn = pm.MvNormal("mn", mu, cov, size=size)
check = pm.sample_prior_predictive(n_fails)

p, f = delta, n_fails
while p <= delta and f > 0:
car_smp, mn_smp = check["car"][f - 1, :, :], check["mn"][f - 1, :, :]
p = min(
st.ks_2samp(
np.atleast_1d(car_smp[..., idx]).flatten(),
np.atleast_1d(mn_smp[..., idx]).flatten(),
)[1]
for idx in range(car_smp.shape[-1])
)
f -= 1
assert p > delta