Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DEMetropolisZ stepper #3784

Merged
merged 16 commits into from
Feb 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
### New features
- use [fastprogress](https://github.com/fastai/fastprogress) instead of tqdm [#3693](https://github.com/pymc-devs/pymc3/pull/3693)
- `DEMetropolis` can now tune both `lambda` and `scaling` parameters, but by default neither of them are tuned. See [#3743](https://github.com/pymc-devs/pymc3/pull/3743) for more info.
- `DEMetropolisZ`, an improved variant of `DEMetropolis` brings better parallelization and higher efficiency with fewer chains with a slower initial convergence. This implementation is experimental. See [#3784](https://github.com/pymc-devs/pymc3/pull/3784) for more info.
- Notebooks that give insight into `DEMetropolis`, `DEMetropolisZ` and the `DifferentialEquation` interface are now located in the [Tutorials/Deep Dive](https://docs.pymc.io/nb_tutorials/index.html) section.

### Maintenance
- Remove `sample_ppc` and `sample_ppc_w` that were deprecated in 3.6.
Expand Down
2,459 changes: 2,459 additions & 0 deletions docs/source/notebooks/DEMetropolisZ_EfficiencyComparison.ipynb

Large diffs are not rendered by default.

2,284 changes: 2,284 additions & 0 deletions docs/source/notebooks/DEMetropolisZ_tune_drop_fraction.ipynb

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion docs/source/notebooks/table_of_contents_examples.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,5 @@ Gallery.contents = {
"GLM-hierarchical-advi-minibatch": "Variational Inference",
"ODE_with_manual_gradients": "Inference in ODE models",
"ODE_API_introduction": "Inference in ODE models",
"ODE_API_shapes_and_benchmarking": "Inference in ODE models",
"probabilistic_matrix_factorization": "Case Studies"
}
3 changes: 3 additions & 0 deletions docs/source/notebooks/table_of_contents_tutorials.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ Gallery.contents = {
"Diagnosing_biased_Inference_with_Divergences": "Deep dives",
"Advanced_usage_of_Theano_in_PyMC3.rst": "Deep dives",
"getting_started": "Deep dives",
"ODE_API_shapes_and_benchmarking": "Deep dives",
"DEMetropolisZ_EfficiencyComparison": "Deep dives",
"DEMetropolisZ_tune_drop_fraction": "Deep dives",
"PyMC3_tips_and_heuristic": "How-To",
"blackbox_external_likelihood": "How-To",
"profiling": "How-To",
Expand Down
2 changes: 2 additions & 0 deletions pymc3/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -875,6 +875,8 @@ def _iter_sample(

try:
step.tune = bool(tune)
if hasattr(step, 'reset_tuning'):
step.reset_tuning()
for i in range(draws):
stats = None

Expand Down
2 changes: 1 addition & 1 deletion pymc3/step_methods/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from .hmc import HamiltonianMC, NUTS

from .metropolis import Metropolis
from .metropolis import DEMetropolis
from .metropolis import DEMetropolis, DEMetropolisZ
from .metropolis import BinaryMetropolis
from .metropolis import BinaryGibbsMetropolis
from .metropolis import CategoricalGibbsMetropolis
Expand Down
5 changes: 5 additions & 0 deletions pymc3/step_methods/compound.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ def stop_tuning(self):
for method in self.methods:
method.stop_tuning()

def reset_tuning(self):
for method in self.methods:
if hasattr(method, 'reset_tuning'):
method.reset_tuning()

@property
def vars_shape_dtype(self):
dtype_shapes = {}
Expand Down
179 changes: 177 additions & 2 deletions pymc3/step_methods/metropolis.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@
import numpy.random as nr
import theano
import scipy.linalg
import warnings

from ..distributions import draw_values
from .arraystep import ArrayStepShared, PopulationArrayStepShared, ArrayStep, metrop_select, Competence
import pymc3 as pm
from pymc3.theanof import floatX

__all__ = ['Metropolis', 'DEMetropolis', 'BinaryMetropolis', 'BinaryGibbsMetropolis',
__all__ = ['Metropolis', 'DEMetropolis', 'DEMetropolisZ', 'BinaryMetropolis', 'BinaryGibbsMetropolis',
'CategoricalGibbsMetropolis', 'NormalProposal', 'CauchyProposal',
'LaplaceProposal', 'PoissonProposal', 'MultivariateNormalProposal']

Expand Down Expand Up @@ -572,9 +573,10 @@ def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.0

self.scaling = np.atleast_1d(scaling).astype('d')
if lamb is None:
# default to the optimal lambda for normally distributed targets
lamb = 2.38 / np.sqrt(2 * model.ndim)
self.lamb = float(lamb)
if not tune in {None, 'scaling', 'lambda'}:
if tune not in {None, 'scaling', 'lambda'}:
raise ValueError('The parameter "tune" must be one of {None, scaling, lambda}')
self.tune = tune
self.tune_interval = tune_interval
Expand Down Expand Up @@ -630,6 +632,179 @@ def competence(var, has_grad):
return Competence.COMPATIBLE


class DEMetropolisZ(ArrayStepShared):
"""
Adaptive Differential Evolution Metropolis sampling step that uses the past to inform jumps.

Parameters
----------
lamb : float
Lambda parameter of the DE proposal mechanism. Defaults to 2.38 / sqrt(2 * ndim)
vars : list
List of variables for sampler
S : standard deviation or covariance matrix
Some measure of variance to parameterize proposal distribution
proposal_dist : function
Function that returns zero-mean deviates when parameterized with
S (and n). Defaults to Uniform(-S,+S).
scaling : scalar or array
Initial scale factor for epsilon. Defaults to 0.001
tune : str
Which hyperparameter to tune. Defaults to 'lambda', but can also be 'scaling' or None.
tune_interval : int
The frequency of tuning. Defaults to 100 iterations.
tune_drop_fraction : float
Fraction of tuning steps that will be removed from the samplers history when the tuning ends.
Defaults to 0.9 - keeping the last 10% of tuning steps for good mixing while removing 90% of
potentially unconverged tuning positions.
model : PyMC Model
Optional model for sampling step. Defaults to None (taken from context).
mode : string or `Mode` instance.
compilation mode passed to Theano functions

References
----------
.. [Braak2006] Cajo C.F. ter Braak (2006).
Differential Evolution Markov Chain with snooker updater and fewer chains.
Statistics and Computing
`link <https://doi.org/10.1007/s11222-008-9104-9>`__
"""
name = 'DEMetropolisZ'

default_blocked = True
generates_stats = True
stats_dtypes = [{
'accept': np.float64,
'accepted': np.bool,
'tune': np.bool,
'scaling': np.float64,
'lambda': np.float64,
}]

def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001,
tune='lambda', tune_interval=100, tune_drop_fraction:float=0.9, model=None, mode=None, **kwargs):

warnings.warn(
'The DEMetropolisZ implementation in PyMC3 is very young. You should be extra critical about its results.'
' See Pull Request #3784 for more information.'
)

model = pm.modelcontext(model)

if vars is None:
vars = model.cont_vars
vars = pm.inputvars(vars)

if S is None:
S = np.ones(model.ndim)

if proposal_dist is not None:
self.proposal_dist = proposal_dist(S)
else:
self.proposal_dist = UniformProposal(S)

self.scaling = np.atleast_1d(scaling).astype('d')
if lamb is None:
# default to the optimal lambda for normally distributed targets
lamb = 2.38 / np.sqrt(2 * model.ndim)
self.lamb = float(lamb)
if tune not in {None, 'scaling', 'lambda'}:
raise ValueError('The parameter "tune" must be one of {None, scaling, lambda}')
self.tune = True
self.tune_target = tune
self.tune_interval = tune_interval
self.tune_drop_fraction = tune_drop_fraction
self.steps_until_tune = tune_interval
self.accepted = 0

# cache local history for the Z-proposals
self._history = []
# remember initial settings before tuning so they can be reset
self._untuned_settings = dict(
scaling=self.scaling,
lamb=self.lamb,
steps_until_tune=tune_interval,
accepted=self.accepted
)

self.mode = mode

shared = pm.make_shared_replacements(vars, model)
self.delta_logp = delta_logp(model.logpt, vars, shared)
super().__init__(vars, shared)

def reset_tuning(self):
"""Resets the tuned sampler parameters and history to their initial values."""
# history can't be reset via the _untuned_settings dict because it's a list
self._history = []
for attr, initial_value in self._untuned_settings.items():
setattr(self, attr, initial_value)
return

def astep(self, q0):
# same tuning scheme as DEMetropolis
if not self.steps_until_tune and self.tune:
if self.tune_target == 'scaling':
self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval))
elif self.tune_target == 'lambda':
self.lamb = tune(self.lamb, self.accepted / float(self.tune_interval))
# Reset counter
self.steps_until_tune = self.tune_interval
self.accepted = 0

epsilon = self.proposal_dist() * self.scaling

it = len(self._history)
# use the DE-MCMC-Z proposal scheme as soon as the history has 2 entries
if it > 1:
# differential evolution proposal
# select two other chains
iz1 = np.random.randint(it)
iz2 = np.random.randint(it)
while iz2 == iz1:
iz2 = np.random.randint(it)

z1 = self._history[iz1]
z2 = self._history[iz2]
# propose a jump
q = floatX(q0 + self.lamb * (z1 - z2) + epsilon)
else:
# propose just with noise in the first 2 iterations
q = floatX(q0 + epsilon)

accept = self.delta_logp(q, q0)
q_new, accepted = metrop_select(accept, q, q0)
self.accepted += accepted
self._history.append(q_new)

self.steps_until_tune -= 1

stats = {
'tune': self.tune,
'scaling': self.scaling,
'lambda': self.lamb,
'accept': np.exp(accept),
'accepted': accepted
}

return q_new, [stats]

def stop_tuning(self):
"""At the end of the tuning phase, this method removes the first x% of the history
so future proposals are not informed by unconverged tuning iterations.
"""
it = len(self._history)
n_drop = int(self.tune_drop_fraction * it)
self._history = self._history[n_drop:]
return super().stop_tuning()

@staticmethod
def competence(var, has_grad):
if var.dtype in pm.discrete_types:
return Competence.INCOMPATIBLE
return Competence.COMPATIBLE


def sample_except(limit, excluded):
candidate = nr.choice(limit - 1)
if candidate >= excluded:
Expand Down
Loading