Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DEMetropolisZ stepper #3784

Merged
merged 16 commits into from
Feb 5, 2020
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
### New features
- use [fastprogress](https://github.com/fastai/fastprogress) instead of tqdm [#3693](https://github.com/pymc-devs/pymc3/pull/3693)
- `DEMetropolis` can now tune both `lambda` and `scaling` parameters, but by default neither of them are tuned. See [#3743](https://github.com/pymc-devs/pymc3/pull/3743) for more info.
- `DEMetropolisZ`, an improved variant of `DEMetropolisZ` brings better parallelization and high efficiency with few chains with a slower initial convergence. This implementation is experimental. See [#3784](https://github.com/pymc-devs/pymc3/pull/3784) for more info.
michaelosthege marked this conversation as resolved.
Show resolved Hide resolved
- Notebooks that give insight into `DEMetropolis`, `DEMetropolisZ` and the `DifferentialEquation` interface are now located in the [Tutorials/Deep Dive]() section.
michaelosthege marked this conversation as resolved.
Show resolved Hide resolved

### Maintenance
- Remove `sample_ppc` and `sample_ppc_w` that were deprecated in 3.6.
Expand Down
2,066 changes: 2,066 additions & 0 deletions docs/source/notebooks/DEMetropolisZ_EfficiencyComparison.ipynb

Large diffs are not rendered by default.

1,795 changes: 1,795 additions & 0 deletions docs/source/notebooks/DEMetropolisZ_tune_drop_fraction.ipynb

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion docs/source/notebooks/table_of_contents_examples.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,5 @@ Gallery.contents = {
"GLM-hierarchical-advi-minibatch": "Variational Inference",
"ODE_with_manual_gradients": "Inference in ODE models",
"ODE_API_introduction": "Inference in ODE models",
"ODE_API_shapes_and_benchmarking": "Inference in ODE models",
"probabilistic_matrix_factorization": "Case Studies"
}
3 changes: 3 additions & 0 deletions docs/source/notebooks/table_of_contents_tutorials.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ Gallery.contents = {
"Diagnosing_biased_Inference_with_Divergences": "Deep dives",
"Advanced_usage_of_Theano_in_PyMC3.rst": "Deep dives",
"getting_started": "Deep dives",
"ODE_API_shapes_and_benchmarking": "Deep dives",
"DEMetropolisZ_EfficiencyComparison": "Deep dives",
"DEMetropolisZ_tune_drop_fraction": "Deep dives",
"PyMC3_tips_and_heuristic": "How-To",
"blackbox_external_likelihood": "How-To",
"profiling": "How-To",
Expand Down
2 changes: 2 additions & 0 deletions pymc3/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -875,6 +875,8 @@ def _iter_sample(

try:
step.tune = bool(tune)
if hasattr(step, 'reset_tuning'):
step.reset_tuning()
for i in range(draws):
stats = None

Expand Down
2 changes: 1 addition & 1 deletion pymc3/step_methods/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from .hmc import HamiltonianMC, NUTS

from .metropolis import Metropolis
from .metropolis import DEMetropolis
from .metropolis import DEMetropolis, DEMetropolisZ
from .metropolis import BinaryMetropolis
from .metropolis import BinaryGibbsMetropolis
from .metropolis import CategoricalGibbsMetropolis
Expand Down
5 changes: 5 additions & 0 deletions pymc3/step_methods/compound.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ def stop_tuning(self):
for method in self.methods:
method.stop_tuning()

def reset_tuning(self):
for method in self.methods:
if hasattr(method, 'reset_tuning'):
method.reset_tuning()

@property
def vars_shape_dtype(self):
dtype_shapes = {}
Expand Down
173 changes: 172 additions & 1 deletion pymc3/step_methods/metropolis.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@
import numpy.random as nr
import theano
import scipy.linalg
import warnings

from ..distributions import draw_values
from .arraystep import ArrayStepShared, PopulationArrayStepShared, ArrayStep, metrop_select, Competence
import pymc3 as pm
from pymc3.theanof import floatX

__all__ = ['Metropolis', 'DEMetropolis', 'BinaryMetropolis', 'BinaryGibbsMetropolis',
__all__ = ['Metropolis', 'DEMetropolis', 'DEMetropolisZ', 'BinaryMetropolis', 'BinaryGibbsMetropolis',
'CategoricalGibbsMetropolis', 'NormalProposal', 'CauchyProposal',
'LaplaceProposal', 'PoissonProposal', 'MultivariateNormalProposal']

Expand Down Expand Up @@ -630,6 +631,176 @@ def competence(var, has_grad):
return Competence.COMPATIBLE


class DEMetropolisZ(ArrayStepShared):
"""
Adaptive Differential Evolution Metropolis sampling step that uses the past to inform jumps.

Parameters
----------
lamb : float
Lambda parameter of the DE proposal mechanism. Defaults to 2.38 / sqrt(2 * ndim)
vars : list
List of variables for sampler
S : standard deviation or covariance matrix
Some measure of variance to parameterize proposal distribution
proposal_dist : function
Function that returns zero-mean deviates when parameterized with
S (and n). Defaults to Uniform(-S,+S).
scaling : scalar or array
Initial scale factor for epsilon. Defaults to 0.001
tune : str
Which hyperparameter to tune. Defaults to 'lambda', but can also be 'scaling' or None.
tune_interval : int
The frequency of tuning. Defaults to 100 iterations.
tune_drop_fraction : float
Fraction of tuning steps that will be removed from the samplers history when the tuning ends.
Defaults to 0.9 - keeping the last 10% of tuning steps for good mixing while removing 90% of
potentially unconverged tuning positions.
model : PyMC Model
Optional model for sampling step. Defaults to None (taken from context).
mode : string or `Mode` instance.
compilation mode passed to Theano functions

References
----------
.. [Braak2006] Cajo C.F. ter Braak (2006).
Differential Evolution Markov Chain with snooker updater and fewer chains.
Statistics and Computing
`link <https://doi.org/10.1007/s11222-008-9104-9>`__
"""
name = 'DEMetropolisZ'

default_blocked = True
generates_stats = True
stats_dtypes = [{
'accept': np.float64,
'accepted': np.bool,
'tune': np.bool,
'scaling': np.float64,
'lambda': np.float64,
}]

def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001,
tune='lambda', tune_interval=100, tune_drop_fraction:float=0.9, model=None, mode=None, **kwargs):

warnings.warn(
'The DEMetropolisZ implementation in PyMC3 is very young. You should be extra critical about its results.'
' See Pull Request #3784 for more information.'
)

model = pm.modelcontext(model)

if vars is None:
vars = model.cont_vars
vars = pm.inputvars(vars)

if S is None:
S = np.ones(model.ndim)

if proposal_dist is not None:
self.proposal_dist = proposal_dist(S)
else:
self.proposal_dist = UniformProposal(S)

self.scaling = np.atleast_1d(scaling).astype('d')
if lamb is None:
lamb = 2.38 / np.sqrt(2 * model.ndim)
self.lamb = float(lamb)
if not tune in {None, 'scaling', 'lambda'}:
raise ValueError('The parameter "tune" must be one of {None, scaling, lambda}')
self.tune = True
self.tune_target = tune
self.tune_interval = tune_interval
self.tune_drop_fraction = tune_drop_fraction
self.steps_until_tune = tune_interval
self.accepted = 0

# cache local history for the Z-proposals
self._history = []
# remember initial settings before tuning so they can be reset
self._untuned_settings = dict(
scaling=self.scaling,
lamb=self.lamb,
steps_until_tune=tune_interval,
accepted=self.accepted
)

self.mode = mode

shared = pm.make_shared_replacements(vars, model)
self.delta_logp = delta_logp(model.logpt, vars, shared)
super().__init__(vars, shared)

def reset_tuning(self):
# history can't be reset via the _untuned_settings dict because it's a list
self._history = []
for attr, initial_value in self._untuned_settings.items():
setattr(self, attr, initial_value)
return

def astep(self, q0):
# same tuning scheme as DEMetropolis
if not self.steps_until_tune and self.tune:
if self.tune_target == 'scaling':
self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval))
elif self.tune_target == 'lambda':
self.lamb = tune(self.lamb, self.accepted / float(self.tune_interval))
# Reset counter
self.steps_until_tune = self.tune_interval
self.accepted = 0

epsilon = self.proposal_dist() * self.scaling

it = len(self._history)
# use the DE-MCMC-Z proposal scheme as soon as the history has 2 entries
if it > 1:
# differential evolution proposal
# select two other chains
iz1 = np.random.randint(it)
iz2 = np.random.randint(it)
while iz2 == iz1:
iz2 = np.random.randint(it)

z1 = self._history[iz1]
z2 = self._history[iz2]
# propose a jump
q = floatX(q0 + self.lamb * (z1 - z2) + epsilon)
else:
# propose just with noise in the first 2 iterations
q = floatX(q0 + epsilon)

accept = self.delta_logp(q, q0)
q_new, accepted = metrop_select(accept, q, q0)
self.accepted += accepted
self._history.append(q_new)

self.steps_until_tune -= 1

stats = {
'tune': self.tune,
'scaling': self.scaling,
'lambda': self.lamb,
'accept': np.exp(accept),
'accepted': accepted
}

return q_new, [stats]

def stop_tuning(self):
# remove the first x% of the tuning phase so future steps are not
# informed by bad unconverged tuning iterations
it = len(self._history)
n_drop = int(self.tune_drop_fraction * it)
self._history = self._history[n_drop:]
return super().stop_tuning()

@staticmethod
def competence(var, has_grad):
if var.dtype in pm.discrete_types:
return Competence.INCOMPATIBLE
return Competence.COMPATIBLE


def sample_except(limit, excluded):
candidate = nr.choice(limit - 1)
if candidate >= excluded:
Expand Down
132 changes: 132 additions & 0 deletions pymc3/tests/test_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
HamiltonianMC,
EllipticalSlice,
DEMetropolis,
DEMetropolisZ,
)
from pymc3.theanof import floatX
from pymc3.distributions import Binomial, Normal, Bernoulli, Categorical, Beta, HalfNormal
Expand Down Expand Up @@ -778,6 +779,137 @@ def test_parallelized_chains_are_random(self):
pass


class TestDEMetropolisZ:
def test_tuning_lambda_sequential(self):
with Model() as pmodel:
Normal('n', 0, 2, shape=(3,))
trace = sample(
tune=1000,
draws=500,
step=DEMetropolisZ(tune='lambda', lamb=0.92),
cores=1,
chains=3,
discard_tuned_samples=False
)
for c in range(trace.nchains):
# check that the tuned settings changed and were reset
assert trace.get_sampler_stats('lambda', chains=c)[0] == 0.92
assert trace.get_sampler_stats('lambda', chains=c)[-1] != 0.92
assert set(trace.get_sampler_stats('tune', chains=c)) == {True, False}
pass

def test_tuning_epsilon_parallel(self):
with Model() as pmodel:
Normal('n', 0, 2, shape=(3,))
trace = sample(
tune=1000,
draws=500,
step=DEMetropolisZ(tune='scaling', scaling=0.002),
cores=2,
chains=2,
discard_tuned_samples=False
)
for c in range(trace.nchains):
# check that the tuned settings changed and were reset
assert trace.get_sampler_stats('scaling', chains=c)[0] == 0.002
assert trace.get_sampler_stats('scaling', chains=c)[-1] != 0.002
assert set(trace.get_sampler_stats('tune', chains=c)) == {True, False}
pass

def test_tuning_none(self):
with Model() as pmodel:
Normal('n', 0, 2, shape=(3,))
trace = sample(
tune=1000,
draws=500,
step=DEMetropolisZ(tune=None),
cores=1,
chains=2,
discard_tuned_samples=False
)
for c in range(trace.nchains):
# check that all tunable parameters remained constant
assert len(set(trace.get_sampler_stats('lambda', chains=c))) == 1
assert len(set(trace.get_sampler_stats('scaling', chains=c))) == 1
assert set(trace.get_sampler_stats('tune', chains=c)) == {True, False}
pass

def test_tuning_reset(self):
"""Re-use of the step method instance with cores=1 must not leak tuning information between chains."""
with Model() as pmodel:
D = 3
Normal('n', 0, 2, shape=(D,))
trace = sample(
tune=1000,
draws=500,
step=DEMetropolisZ(tune='scaling', scaling=0.002),
cores=1,
chains=3,
discard_tuned_samples=False
)
for c in range(trace.nchains):
# check that the tuned settings changed and were reset
assert trace.get_sampler_stats('scaling', chains=c)[0] == 0.002
assert trace.get_sampler_stats('scaling', chains=c)[-1] != 0.002
# check that the variance of the first 50 iterations is much lower than the last 100
for d in range(D):
var_start = np.var(trace.get_values('n', chains=c)[:50,d])
var_end = np.var(trace.get_values('n', chains=c)[-100:,d])
assert var_start < 0.1 * var_end
pass

def test_tune_drop_fraction(self):
with Model() as pmodel:
Normal('n', 0, 2, shape=(3,))
step = DEMetropolisZ(tune_drop_fraction=0.85)
trace = sample(
tune=300,
draws=200,
step=step,
cores=1,
chains=1,
discard_tuned_samples=False
)
assert len(trace) == 500
assert len(step._history) == (300 - 300 * 0.85) + 200
pass

def test_competence(self):
with Model() as pmodel:
n = Normal('n', 0, 2, shape=(3,))
b = Binomial('b', n=2, p=0.3)
assert DEMetropolisZ.competence(n, has_grad=True) == 1
assert DEMetropolisZ.competence(n, has_grad=False) == 1
assert DEMetropolisZ.competence(b, has_grad=True) == 0
assert DEMetropolisZ.competence(b, has_grad=False) == 0
pass

def test_invalid_tune(self):
with Model() as pmodel:
Normal('n', 0, 2, shape=(3,))
with pytest.raises(ValueError):
DEMetropolisZ(tune='foo')
with pytest.raises(ValueError):
DEMetropolisZ(tune=True)
with pytest.raises(ValueError):
DEMetropolisZ(tune=False)
pass

def test_custom_proposal_dist(self):
with Model() as pmodel:
D = 3
Normal('n', 0, 2, shape=(D,))
trace = sample(
tune=100,
draws=50,
step=DEMetropolisZ(proposal_dist=NormalProposal),
cores=1,
chains=3,
discard_tuned_samples=False
)
pass


@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
class TestNutsCheckTrace:
def test_multiple_samplers(self, caplog):
Expand Down