diff --git a/numpyro/diagnostics.py b/numpyro/diagnostics.py index cf3d04a9e..a3eae2af0 100644 --- a/numpyro/diagnostics.py +++ b/numpyro/diagnostics.py @@ -96,12 +96,13 @@ def _fft_next_fast_len(target): target += 1 -def autocorrelation(x, axis=0): +def autocorrelation(x, axis=0, bias=True): """ Computes the autocorrelation of samples at dimension ``axis``. :param numpy.ndarray x: the input array. :param int axis: the dimension to calculate autocorrelation. + :param bias: whether to use a biased estimator. :return: autocorrelation of ``x``. :rtype: numpy.ndarray """ @@ -127,25 +128,32 @@ def autocorrelation(x, axis=0): # truncate and normalize the result, then transpose back to original shape autocorr = autocorr[..., :N] - autocorr = autocorr / np.arange(N, 0.0, -1) + + # the unbiased estimator is known to have "wild" tails, due to few samples at longer lags. + # see Geyer (1992) and Priestley (1981) for a discussion. also note that it is only strictly + # unbiased when the mean is known, whereas we it estimate from samples here. + if not bias: + autocorr = autocorr / np.arange(N, 0.0, -1) + with np.errstate(invalid="ignore", divide="ignore"): autocorr = autocorr / autocorr[..., :1] return np.swapaxes(autocorr, axis, -1) -def autocovariance(x, axis=0): +def autocovariance(x, axis=0, bias=True): """ Computes the autocovariance of samples at dimension ``axis``. :param numpy.ndarray x: the input array. :param int axis: the dimension to calculate autocovariance. + :param bias: whether to use a biased estimator. :return: autocovariance of ``x``. :rtype: numpy.ndarray """ - return autocorrelation(x, axis) * x.var(axis=axis, keepdims=True) + return autocorrelation(x, axis, bias) * x.var(axis=axis, keepdims=True) -def effective_sample_size(x): +def effective_sample_size(x, bias=True): """ Computes effective sample size of input ``x``, where the first dimension of ``x`` is chain dimension and the second dimension of ``x`` is draw dimension. @@ -158,6 +166,7 @@ def effective_sample_size(x): Stan Development Team :param numpy.ndarray x: the input array. + :param bias: whether to use a biased estimator of the autocovariance. :return: effective sample size of ``x``. :rtype: numpy.ndarray """ @@ -166,7 +175,7 @@ def effective_sample_size(x): assert x.shape[1] >= 2 # find autocovariance for each chain at lag k - gamma_k_c = autocovariance(x, axis=1) + gamma_k_c = autocovariance(x, axis=1, bias=bias) # find autocorrelation at lag k (from Stan reference) var_within, var_estimator = _compute_chain_variance_stats(x) diff --git a/test/test_diagnostics.py b/test/test_diagnostics.py index d0a32df87..e423deab4 100644 --- a/test/test_diagnostics.py +++ b/test/test_diagnostics.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import numpy as np -from numpy.testing import assert_allclose +from numpy.testing import assert_, assert_allclose import pytest from scipy.fftpack import next_fast_len @@ -60,14 +60,26 @@ def test_hpdi(): def test_autocorrelation(): x = np.arange(10.0) - actual = autocorrelation(x) + actual = autocorrelation(x, bias=False) expected = np.array([1, 0.78, 0.52, 0.21, -0.13, -0.52, -0.94, -1.4, -1.91, -2.45]) assert_allclose(actual, expected, atol=0.01) + actual = autocorrelation(x, bias=True) + expected = expected * np.arange(len(x), 0.0, -1) / len(x) + assert_allclose(actual, expected, atol=0.01) + + # the unbiased estimator has variance O(1) at large lags + x = np.random.normal(size=20000) + ac = autocorrelation(x, bias=False) + assert_(np.any(np.abs(ac[-100:]) > 0.1)) + + ac = autocorrelation(x, bias=True) + assert_allclose(np.abs(ac[-100:]), 0.0, atol=0.01) + def test_autocovariance(): x = np.arange(10.0) - actual = autocovariance(x) + actual = autocovariance(x, bias=False) expected = np.array( [8.25, 6.42, 4.25, 1.75, -1.08, -4.25, -7.75, -11.58, -15.75, -20.25] ) @@ -93,4 +105,4 @@ def test_split_gelman_rubin_agree_with_gelman_rubin(): def test_effective_sample_size(): x = np.arange(1000.0).reshape(100, 10) - assert_allclose(effective_sample_size(x), 52.64, atol=0.01) + assert_allclose(effective_sample_size(x, bias=False), 52.64, atol=0.01)