Skip to content

Commit

Permalink
Add DEMetropolisZ stepper (#3784)
Browse files Browse the repository at this point in the history
* implement DEMetropolisZ

* use len instead of attribute

* introduce tune_drop_fraction
+ improves efficiency in situations with slow initial tuning convergence (see PR #3784)

* reset tuned settings in sequential singleprocess sampling

* tune DEMetropolisZ lambda by default

* use new empty list to reset history

* fix dual-use conflict of self.tune attribute

* add warning and initialize self.tune attribute

* add tests for DEMetropolisZ

* add tests to hit more of the diff

* add and move notebooks, update release notes

* address review feedback

* apply review suggestions

* more accurate wording

Co-Authored-By: Thomas Wiecki <thomas.wiecki@gmail.com>

Co-authored-by: Thomas Wiecki <thomas.wiecki@gmail.com>
  • Loading branch information
michaelosthege and twiecki authored Feb 5, 2020
1 parent dfc9102 commit bb574a7
Show file tree
Hide file tree
Showing 10 changed files with 5,063 additions and 4 deletions.
2 changes: 2 additions & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
### New features
- use [fastprogress](https://github.com/fastai/fastprogress) instead of tqdm [#3693](https://github.com/pymc-devs/pymc3/pull/3693)
- `DEMetropolis` can now tune both `lambda` and `scaling` parameters, but by default neither of them are tuned. See [#3743](https://github.com/pymc-devs/pymc3/pull/3743) for more info.
- `DEMetropolisZ`, an improved variant of `DEMetropolis` brings better parallelization and higher efficiency with fewer chains with a slower initial convergence. This implementation is experimental. See [#3784](https://github.com/pymc-devs/pymc3/pull/3784) for more info.
- Notebooks that give insight into `DEMetropolis`, `DEMetropolisZ` and the `DifferentialEquation` interface are now located in the [Tutorials/Deep Dive](https://docs.pymc.io/nb_tutorials/index.html) section.

### Maintenance
- Remove `sample_ppc` and `sample_ppc_w` that were deprecated in 3.6.
Expand Down
2,459 changes: 2,459 additions & 0 deletions docs/source/notebooks/DEMetropolisZ_EfficiencyComparison.ipynb

Large diffs are not rendered by default.

2,284 changes: 2,284 additions & 0 deletions docs/source/notebooks/DEMetropolisZ_tune_drop_fraction.ipynb

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion docs/source/notebooks/table_of_contents_examples.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,5 @@ Gallery.contents = {
"GLM-hierarchical-advi-minibatch": "Variational Inference",
"ODE_with_manual_gradients": "Inference in ODE models",
"ODE_API_introduction": "Inference in ODE models",
"ODE_API_shapes_and_benchmarking": "Inference in ODE models",
"probabilistic_matrix_factorization": "Case Studies"
}
3 changes: 3 additions & 0 deletions docs/source/notebooks/table_of_contents_tutorials.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ Gallery.contents = {
"Diagnosing_biased_Inference_with_Divergences": "Deep dives",
"Advanced_usage_of_Theano_in_PyMC3.rst": "Deep dives",
"getting_started": "Deep dives",
"ODE_API_shapes_and_benchmarking": "Deep dives",
"DEMetropolisZ_EfficiencyComparison": "Deep dives",
"DEMetropolisZ_tune_drop_fraction": "Deep dives",
"PyMC3_tips_and_heuristic": "How-To",
"blackbox_external_likelihood": "How-To",
"profiling": "How-To",
Expand Down
2 changes: 2 additions & 0 deletions pymc3/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -875,6 +875,8 @@ def _iter_sample(

try:
step.tune = bool(tune)
if hasattr(step, 'reset_tuning'):
step.reset_tuning()
for i in range(draws):
stats = None

Expand Down
2 changes: 1 addition & 1 deletion pymc3/step_methods/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from .hmc import HamiltonianMC, NUTS

from .metropolis import Metropolis
from .metropolis import DEMetropolis
from .metropolis import DEMetropolis, DEMetropolisZ
from .metropolis import BinaryMetropolis
from .metropolis import BinaryGibbsMetropolis
from .metropolis import CategoricalGibbsMetropolis
Expand Down
5 changes: 5 additions & 0 deletions pymc3/step_methods/compound.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ def stop_tuning(self):
for method in self.methods:
method.stop_tuning()

def reset_tuning(self):
for method in self.methods:
if hasattr(method, 'reset_tuning'):
method.reset_tuning()

@property
def vars_shape_dtype(self):
dtype_shapes = {}
Expand Down
179 changes: 177 additions & 2 deletions pymc3/step_methods/metropolis.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@
import numpy.random as nr
import theano
import scipy.linalg
import warnings

from ..distributions import draw_values
from .arraystep import ArrayStepShared, PopulationArrayStepShared, ArrayStep, metrop_select, Competence
import pymc3 as pm
from pymc3.theanof import floatX

__all__ = ['Metropolis', 'DEMetropolis', 'BinaryMetropolis', 'BinaryGibbsMetropolis',
__all__ = ['Metropolis', 'DEMetropolis', 'DEMetropolisZ', 'BinaryMetropolis', 'BinaryGibbsMetropolis',
'CategoricalGibbsMetropolis', 'NormalProposal', 'CauchyProposal',
'LaplaceProposal', 'PoissonProposal', 'MultivariateNormalProposal']

Expand Down Expand Up @@ -572,9 +573,10 @@ def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.0

self.scaling = np.atleast_1d(scaling).astype('d')
if lamb is None:
# default to the optimal lambda for normally distributed targets
lamb = 2.38 / np.sqrt(2 * model.ndim)
self.lamb = float(lamb)
if not tune in {None, 'scaling', 'lambda'}:
if tune not in {None, 'scaling', 'lambda'}:
raise ValueError('The parameter "tune" must be one of {None, scaling, lambda}')
self.tune = tune
self.tune_interval = tune_interval
Expand Down Expand Up @@ -630,6 +632,179 @@ def competence(var, has_grad):
return Competence.COMPATIBLE


class DEMetropolisZ(ArrayStepShared):
"""
Adaptive Differential Evolution Metropolis sampling step that uses the past to inform jumps.
Parameters
----------
lamb : float
Lambda parameter of the DE proposal mechanism. Defaults to 2.38 / sqrt(2 * ndim)
vars : list
List of variables for sampler
S : standard deviation or covariance matrix
Some measure of variance to parameterize proposal distribution
proposal_dist : function
Function that returns zero-mean deviates when parameterized with
S (and n). Defaults to Uniform(-S,+S).
scaling : scalar or array
Initial scale factor for epsilon. Defaults to 0.001
tune : str
Which hyperparameter to tune. Defaults to 'lambda', but can also be 'scaling' or None.
tune_interval : int
The frequency of tuning. Defaults to 100 iterations.
tune_drop_fraction : float
Fraction of tuning steps that will be removed from the samplers history when the tuning ends.
Defaults to 0.9 - keeping the last 10% of tuning steps for good mixing while removing 90% of
potentially unconverged tuning positions.
model : PyMC Model
Optional model for sampling step. Defaults to None (taken from context).
mode : string or `Mode` instance.
compilation mode passed to Theano functions
References
----------
.. [Braak2006] Cajo C.F. ter Braak (2006).
Differential Evolution Markov Chain with snooker updater and fewer chains.
Statistics and Computing
`link <https://doi.org/10.1007/s11222-008-9104-9>`__
"""
name = 'DEMetropolisZ'

default_blocked = True
generates_stats = True
stats_dtypes = [{
'accept': np.float64,
'accepted': np.bool,
'tune': np.bool,
'scaling': np.float64,
'lambda': np.float64,
}]

def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001,
tune='lambda', tune_interval=100, tune_drop_fraction:float=0.9, model=None, mode=None, **kwargs):

warnings.warn(
'The DEMetropolisZ implementation in PyMC3 is very young. You should be extra critical about its results.'
' See Pull Request #3784 for more information.'
)

model = pm.modelcontext(model)

if vars is None:
vars = model.cont_vars
vars = pm.inputvars(vars)

if S is None:
S = np.ones(model.ndim)

if proposal_dist is not None:
self.proposal_dist = proposal_dist(S)
else:
self.proposal_dist = UniformProposal(S)

self.scaling = np.atleast_1d(scaling).astype('d')
if lamb is None:
# default to the optimal lambda for normally distributed targets
lamb = 2.38 / np.sqrt(2 * model.ndim)
self.lamb = float(lamb)
if tune not in {None, 'scaling', 'lambda'}:
raise ValueError('The parameter "tune" must be one of {None, scaling, lambda}')
self.tune = True
self.tune_target = tune
self.tune_interval = tune_interval
self.tune_drop_fraction = tune_drop_fraction
self.steps_until_tune = tune_interval
self.accepted = 0

# cache local history for the Z-proposals
self._history = []
# remember initial settings before tuning so they can be reset
self._untuned_settings = dict(
scaling=self.scaling,
lamb=self.lamb,
steps_until_tune=tune_interval,
accepted=self.accepted
)

self.mode = mode

shared = pm.make_shared_replacements(vars, model)
self.delta_logp = delta_logp(model.logpt, vars, shared)
super().__init__(vars, shared)

def reset_tuning(self):
"""Resets the tuned sampler parameters and history to their initial values."""
# history can't be reset via the _untuned_settings dict because it's a list
self._history = []
for attr, initial_value in self._untuned_settings.items():
setattr(self, attr, initial_value)
return

def astep(self, q0):
# same tuning scheme as DEMetropolis
if not self.steps_until_tune and self.tune:
if self.tune_target == 'scaling':
self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval))
elif self.tune_target == 'lambda':
self.lamb = tune(self.lamb, self.accepted / float(self.tune_interval))
# Reset counter
self.steps_until_tune = self.tune_interval
self.accepted = 0

epsilon = self.proposal_dist() * self.scaling

it = len(self._history)
# use the DE-MCMC-Z proposal scheme as soon as the history has 2 entries
if it > 1:
# differential evolution proposal
# select two other chains
iz1 = np.random.randint(it)
iz2 = np.random.randint(it)
while iz2 == iz1:
iz2 = np.random.randint(it)

z1 = self._history[iz1]
z2 = self._history[iz2]
# propose a jump
q = floatX(q0 + self.lamb * (z1 - z2) + epsilon)
else:
# propose just with noise in the first 2 iterations
q = floatX(q0 + epsilon)

accept = self.delta_logp(q, q0)
q_new, accepted = metrop_select(accept, q, q0)
self.accepted += accepted
self._history.append(q_new)

self.steps_until_tune -= 1

stats = {
'tune': self.tune,
'scaling': self.scaling,
'lambda': self.lamb,
'accept': np.exp(accept),
'accepted': accepted
}

return q_new, [stats]

def stop_tuning(self):
"""At the end of the tuning phase, this method removes the first x% of the history
so future proposals are not informed by unconverged tuning iterations.
"""
it = len(self._history)
n_drop = int(self.tune_drop_fraction * it)
self._history = self._history[n_drop:]
return super().stop_tuning()

@staticmethod
def competence(var, has_grad):
if var.dtype in pm.discrete_types:
return Competence.INCOMPATIBLE
return Competence.COMPATIBLE


def sample_except(limit, excluded):
candidate = nr.choice(limit - 1)
if candidate >= excluded:
Expand Down
Loading

0 comments on commit bb574a7

Please sign in to comment.