Skip to content

Commit

Permalink
use biased autocorrelation estimate by default (resolves #1785) (#1877)
Browse files Browse the repository at this point in the history
* use biased autocorrelation estimate by default (resolves #1785)

* increase tolerance in unbiased autocorrelation test

* review comments, fixed autocovariance test.
  • Loading branch information
jonny-so authored Oct 6, 2024
1 parent 0791b1f commit e462292
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 10 deletions.
21 changes: 15 additions & 6 deletions numpyro/diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,13 @@ def _fft_next_fast_len(target):
target += 1


def autocorrelation(x, axis=0):
def autocorrelation(x, axis=0, bias=True):
"""
Computes the autocorrelation of samples at dimension ``axis``.
:param numpy.ndarray x: the input array.
:param int axis: the dimension to calculate autocorrelation.
:param bias: whether to use a biased estimator.
:return: autocorrelation of ``x``.
:rtype: numpy.ndarray
"""
Expand All @@ -127,25 +128,32 @@ def autocorrelation(x, axis=0):

# truncate and normalize the result, then transpose back to original shape
autocorr = autocorr[..., :N]
autocorr = autocorr / np.arange(N, 0.0, -1)

# the unbiased estimator is known to have "wild" tails, due to few samples at longer lags.
# see Geyer (1992) and Priestley (1981) for a discussion. also note that it is only strictly
# unbiased when the mean is known, whereas we it estimate from samples here.
if not bias:
autocorr = autocorr / np.arange(N, 0.0, -1)

with np.errstate(invalid="ignore", divide="ignore"):
autocorr = autocorr / autocorr[..., :1]
return np.swapaxes(autocorr, axis, -1)


def autocovariance(x, axis=0):
def autocovariance(x, axis=0, bias=True):
"""
Computes the autocovariance of samples at dimension ``axis``.
:param numpy.ndarray x: the input array.
:param int axis: the dimension to calculate autocovariance.
:param bias: whether to use a biased estimator.
:return: autocovariance of ``x``.
:rtype: numpy.ndarray
"""
return autocorrelation(x, axis) * x.var(axis=axis, keepdims=True)
return autocorrelation(x, axis, bias) * x.var(axis=axis, keepdims=True)


def effective_sample_size(x):
def effective_sample_size(x, bias=True):
"""
Computes effective sample size of input ``x``, where the first dimension of
``x`` is chain dimension and the second dimension of ``x`` is draw dimension.
Expand All @@ -158,6 +166,7 @@ def effective_sample_size(x):
Stan Development Team
:param numpy.ndarray x: the input array.
:param bias: whether to use a biased estimator of the autocovariance.
:return: effective sample size of ``x``.
:rtype: numpy.ndarray
"""
Expand All @@ -166,7 +175,7 @@ def effective_sample_size(x):
assert x.shape[1] >= 2

# find autocovariance for each chain at lag k
gamma_k_c = autocovariance(x, axis=1)
gamma_k_c = autocovariance(x, axis=1, bias=bias)

# find autocorrelation at lag k (from Stan reference)
var_within, var_estimator = _compute_chain_variance_stats(x)
Expand Down
20 changes: 16 additions & 4 deletions test/test_diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# SPDX-License-Identifier: Apache-2.0

import numpy as np
from numpy.testing import assert_allclose
from numpy.testing import assert_, assert_allclose
import pytest
from scipy.fftpack import next_fast_len

Expand Down Expand Up @@ -60,14 +60,26 @@ def test_hpdi():

def test_autocorrelation():
x = np.arange(10.0)
actual = autocorrelation(x)
actual = autocorrelation(x, bias=False)
expected = np.array([1, 0.78, 0.52, 0.21, -0.13, -0.52, -0.94, -1.4, -1.91, -2.45])
assert_allclose(actual, expected, atol=0.01)

actual = autocorrelation(x, bias=True)
expected = expected * np.arange(len(x), 0.0, -1) / len(x)
assert_allclose(actual, expected, atol=0.01)

# the unbiased estimator has variance O(1) at large lags
x = np.random.normal(size=20000)
ac = autocorrelation(x, bias=False)
assert_(np.any(np.abs(ac[-100:]) > 0.1))

ac = autocorrelation(x, bias=True)
assert_allclose(np.abs(ac[-100:]), 0.0, atol=0.01)


def test_autocovariance():
x = np.arange(10.0)
actual = autocovariance(x)
actual = autocovariance(x, bias=False)
expected = np.array(
[8.25, 6.42, 4.25, 1.75, -1.08, -4.25, -7.75, -11.58, -15.75, -20.25]
)
Expand All @@ -93,4 +105,4 @@ def test_split_gelman_rubin_agree_with_gelman_rubin():

def test_effective_sample_size():
x = np.arange(1000.0).reshape(100, 10)
assert_allclose(effective_sample_size(x), 52.64, atol=0.01)
assert_allclose(effective_sample_size(x, bias=False), 52.64, atol=0.01)

0 comments on commit e462292

Please sign in to comment.