Skip to content

Commit

Permalink
add test for CARRV rng_fn()
Browse files Browse the repository at this point in the history
  • Loading branch information
aerubanov committed Apr 13, 2021
1 parent 20befe6 commit d0bc5a5
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 7 deletions.
16 changes: 10 additions & 6 deletions pymc3/distributions/multivariate.py
Original file line number Diff line number Diff line change
Expand Up @@ -1978,25 +1978,29 @@ def rng_fn(cls, rng: np.random.RandomState, mu, W, alpha, tau, size):
tau = scipy.sparse.csr_matrix(tau)
alpha = scipy.sparse.csr_matrix(alpha)

perm_array = scipy.sparse.csgraph.reverse_cuthill_mckee(W, symmetric_mode=True)
W = W[perm_array, :]
W = W[:, perm_array]

Q = tau.multiply(D - alpha.multiply(W))

perm_array = scipy.sparse.csgraph.reverse_cuthill_mckee(Q, symmetric_mode=True)
inv_perm = np.argsort(perm_array)

Q = Q[perm_array, :][:, perm_array]

Qb = Q.diagonal()
u = 1
while np.count_nonzero(Q.diagonal(u)) > 0:
Qb = np.vstack((np.pad(Q.diagonal(u), (u, 0), constant_values=(0, 0)), Qb))
u += 1

L = scipy.linalg.cholesky_banded(Qb, lower=False)

size = tuple(size or ())
if size:
mu = np.broadcast_to(mu, size + mu.shape)
z = rng.normal(loc=mu)
z = rng.normal(size=mu.shape)
samples = np.empty(z.shape)
for idx in np.ndindex(mu.shape[:-1]):
samples[idx] = scipy.linalg.cho_solve_banded((L, False), z[idx])
samples[idx] = scipy.linalg.cho_solve_banded((L, False), z[idx]) + mu[idx][perm_array]
samples = samples[..., inv_perm]
return samples


Expand Down
33 changes: 32 additions & 1 deletion pymc3/tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2781,7 +2781,7 @@ def test_car_logp(size):
scipy_logp = scipy.stats.multivariate_normal.logpdf(xs, mu, cov)

car_dist = CAR.dist(mu, W, alpha, tau, size=size)
#xs = np.broadcast_to(xs, size + mu.shape)
# xs = np.broadcast_to(xs, size + mu.shape)
car_logp = logpt(car_dist, xs).eval()

# Check to make sure that the CAR and MVN log PDFs are equivalent
Expand All @@ -2792,6 +2792,37 @@ def test_car_logp(size):
assert np.allclose(delta_logp - delta_logp[0], 0.0)


@pytest.mark.parametrize("size", [(100,), (100, 2)], ids=str)
def test_car_rng_fn(size):
delta = 0.05 # limit for KS p-value
n_fails = 10 # Allows the KS fails a certain number of times

W = np.array(
[[0.0, 1.0, 1.0, 0.0], [1.0, 0.0, 0.0, 1.0], [1.0, 0.0, 0.0, 1.0], [0.0, 1.0, 1.0, 0.0]]
)

tau = 2
alpha = 0.5
mu = np.array([1, 1, 1, 1])

D = W.sum(axis=0)
prec = tau * (np.diag(D) - alpha * W)
cov = np.linalg.inv(prec)

p, f = delta, n_fails
while p <= delta and f > 0:
with Model():
car = pm.CAR("car", mu, W, alpha, tau, size=size)
mn = pm.MvNormal("mn", mu, cov, size=size)
check = pm.sample_prior_predictive(100)
car_smp, mn_smp = check["car"], check["mn"]
_, p = scipy.stats.ks_2samp(
np.atleast_1d(car_smp).flatten(), np.atleast_1d(mn_smp).flatten()

This comment has been minimized.

Copy link
@ckrapu

ckrapu Apr 13, 2021

Contributor

If I am understanding this code correctly, this application of the KS test statistic function from scipy isn't appropriate here. Ideally, ks_2samp is used to test for equality in distribution between scalar random variables but we have vector-valued random variables here. If we compared a CAR distribution with unit diagonal variance (but nontrivial off-diagonal terms) and a multivariate Gaussian with an identity covariance matrix, this test in current form would, provided enough data, tell you that they are equal in distribution even though they are clearly different. Perhaps you could try selecting multiple specific elements of the CAR vector and applying the KS statistic to those elements' distributions piece by piece. I am still not sure if that's a good idea but it's an improvement over the current test.

This comment has been minimized.

Copy link
@ckrapu

ckrapu Apr 13, 2021

Contributor

To add on my previous comment, I think a good way to do this is to use a test statistic designed for spatial data like Moran's I: https://en.wikipedia.org/wiki/Moran%27s_I. I seem to remember from school that this statistic tends to require a large amount of data to cleanly estimate low spatial correlations, however, so this approach may be expensive for test cases with low alpha.

)
f -= 1
assert p > delta


class TestBugfixes:
@pytest.mark.parametrize(
"dist_cls,kwargs", [(MvNormal, dict(mu=0)), (MvStudentT, dict(mu=0, nu=2))]
Expand Down

0 comments on commit d0bc5a5

Please sign in to comment.