pymc-devs · aloctavodia · Feb 5, 2020 · Jan 21, 2020 · Jan 22, 2020 · Jan 23, 2020
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -5,6 +5,8 @@
 ### New features
 - use [fastprogress](https://github.com/fastai/fastprogress) instead of tqdm [#3693](https://github.com/pymc-devs/pymc3/pull/3693)
 - `DEMetropolis` can now tune both `lambda` and `scaling` parameters, but by default neither of them are tuned. See [#3743](https://github.com/pymc-devs/pymc3/pull/3743) for more info.
+- `DEMetropolisZ`, an improved variant of `DEMetropolisZ` brings better parallelization and high efficiency with few chains with a slower initial convergence. This implementation is experimental. See [#3784](https://github.com/pymc-devs/pymc3/pull/3784) for more info.
+- Notebooks that give insight into `DEMetropolis`, `DEMetropolisZ` and the `DifferentialEquation` interface are now located in the [Tutorials/Deep Dive]() section.
 
 ### Maintenance
 - Remove `sample_ppc` and `sample_ppc_w` that were deprecated in 3.6.

diff --git a/docs/source/notebooks/DEMetropolisZ_EfficiencyComparison.ipynb b/docs/source/notebooks/DEMetropolisZ_EfficiencyComparison.ipynb
diff --git a/docs/source/notebooks/DEMetropolisZ_tune_drop_fraction.ipynb b/docs/source/notebooks/DEMetropolisZ_tune_drop_fraction.ipynb
diff --git a/docs/source/notebooks/table_of_contents_examples.js b/docs/source/notebooks/table_of_contents_examples.js
@@ -56,6 +56,5 @@ Gallery.contents = {
     "GLM-hierarchical-advi-minibatch": "Variational Inference",
     "ODE_with_manual_gradients": "Inference in ODE models",
     "ODE_API_introduction": "Inference in ODE models",
-    "ODE_API_shapes_and_benchmarking": "Inference in ODE models",
     "probabilistic_matrix_factorization": "Case Studies"
 }
diff --git a/docs/source/notebooks/table_of_contents_tutorials.js b/docs/source/notebooks/table_of_contents_tutorials.js
@@ -10,6 +10,9 @@ Gallery.contents = {
     "Diagnosing_biased_Inference_with_Divergences": "Deep dives",
     "Advanced_usage_of_Theano_in_PyMC3.rst": "Deep dives",
     "getting_started": "Deep dives",
+    "ODE_API_shapes_and_benchmarking": "Deep dives",
+    "DEMetropolisZ_EfficiencyComparison": "Deep dives",
+    "DEMetropolisZ_tune_drop_fraction": "Deep dives",
     "PyMC3_tips_and_heuristic": "How-To",
     "blackbox_external_likelihood": "How-To",
     "profiling": "How-To",

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
@@ -875,6 +875,8 @@ def _iter_sample(
 
     try:
         step.tune = bool(tune)
+        if hasattr(step, 'reset_tuning'):
+            step.reset_tuning()
         for i in range(draws):
             stats = None
 

diff --git a/pymc3/step_methods/__init__.py b/pymc3/step_methods/__init__.py
@@ -17,7 +17,7 @@
 from .hmc import HamiltonianMC, NUTS
 
 from .metropolis import Metropolis
-from .metropolis import DEMetropolis
+from .metropolis import DEMetropolis, DEMetropolisZ
 from .metropolis import BinaryMetropolis
 from .metropolis import BinaryGibbsMetropolis
 from .metropolis import CategoricalGibbsMetropolis

diff --git a/pymc3/step_methods/compound.py b/pymc3/step_methods/compound.py
@@ -67,6 +67,11 @@ def stop_tuning(self):
         for method in self.methods:
             method.stop_tuning()
 
+    def reset_tuning(self):
+        for method in self.methods:
+            if hasattr(method, 'reset_tuning'):
+                method.reset_tuning()
+
     @property
     def vars_shape_dtype(self):
         dtype_shapes = {}

diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
@@ -16,13 +16,14 @@
 import numpy.random as nr
 import theano
 import scipy.linalg
+import warnings
 
 from ..distributions import draw_values
 from .arraystep import ArrayStepShared, PopulationArrayStepShared, ArrayStep, metrop_select, Competence
 import pymc3 as pm
 from pymc3.theanof import floatX
 
-__all__ = ['Metropolis', 'DEMetropolis', 'BinaryMetropolis', 'BinaryGibbsMetropolis',
+__all__ = ['Metropolis', 'DEMetropolis', 'DEMetropolisZ', 'BinaryMetropolis', 'BinaryGibbsMetropolis',
            'CategoricalGibbsMetropolis', 'NormalProposal', 'CauchyProposal',
            'LaplaceProposal', 'PoissonProposal', 'MultivariateNormalProposal']
 
@@ -630,6 +631,176 @@ def competence(var, has_grad):
         return Competence.COMPATIBLE
 
 
+class DEMetropolisZ(ArrayStepShared):
+    """
+    Adaptive Differential Evolution Metropolis sampling step that uses the past to inform jumps.
+
+    Parameters
+    ----------
+    lamb : float
+        Lambda parameter of the DE proposal mechanism. Defaults to 2.38 / sqrt(2 * ndim)
+    vars : list
+        List of variables for sampler
+    S : standard deviation or covariance matrix
+        Some measure of variance to parameterize proposal distribution
+    proposal_dist : function
+        Function that returns zero-mean deviates when parameterized with
+        S (and n). Defaults to Uniform(-S,+S).
+    scaling : scalar or array
+        Initial scale factor for epsilon. Defaults to 0.001
+    tune : str
+        Which hyperparameter to tune. Defaults to 'lambda', but can also be 'scaling' or None.
+    tune_interval : int
+        The frequency of tuning. Defaults to 100 iterations.
+    tune_drop_fraction : float
+        Fraction of tuning steps that will be removed from the samplers history when the tuning ends.
+        Defaults to 0.9 - keeping the last 10% of tuning steps for good mixing while removing 90% of
+        potentially unconverged tuning positions.
+    model : PyMC Model
+        Optional model for sampling step. Defaults to None (taken from context).
+    mode :  string or `Mode` instance.
+        compilation mode passed to Theano functions
+
+    References
+    ----------
+    .. [Braak2006] Cajo C.F. ter Braak (2006).
+        Differential Evolution Markov Chain with snooker updater and fewer chains.
+        Statistics and Computing
+        `link <https://doi.org/10.1007/s11222-008-9104-9>`__
+    """
+    name = 'DEMetropolisZ'
+
+    default_blocked = True
+    generates_stats = True
+    stats_dtypes = [{
+        'accept': np.float64,
+        'accepted': np.bool,
+        'tune': np.bool,
+        'scaling': np.float64,
+        'lambda': np.float64,
+    }]
+
+    def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001,
+                 tune='lambda', tune_interval=100, tune_drop_fraction:float=0.9, model=None, mode=None, **kwargs):
+
+        warnings.warn(
+            'The DEMetropolisZ implementation in PyMC3 is very young. You should be extra critical about its results.'
+            ' See Pull Request #3784 for more information.'
+        )
+
+        model = pm.modelcontext(model)
+
+        if vars is None:
+            vars = model.cont_vars
+        vars = pm.inputvars(vars)
+
+        if S is None:
+            S = np.ones(model.ndim)
+
+        if proposal_dist is not None:
+            self.proposal_dist = proposal_dist(S)
+        else:
+            self.proposal_dist = UniformProposal(S)
+
+        self.scaling = np.atleast_1d(scaling).astype('d')
+        if lamb is None:
+            lamb = 2.38 / np.sqrt(2 * model.ndim)
+        self.lamb = float(lamb)
+        if not tune in {None, 'scaling', 'lambda'}:
+            raise ValueError('The parameter "tune" must be one of {None, scaling, lambda}')
+        self.tune = True
+        self.tune_target = tune
+        self.tune_interval = tune_interval
+        self.tune_drop_fraction = tune_drop_fraction
+        self.steps_until_tune = tune_interval
+        self.accepted = 0
+
+        # cache local history for the Z-proposals
+        self._history = []
+        # remember initial settings before tuning so they can be reset
+        self._untuned_settings = dict(
+            scaling=self.scaling,
+            lamb=self.lamb,
+            steps_until_tune=tune_interval,
+            accepted=self.accepted
+        )
+
+        self.mode = mode
+
+        shared = pm.make_shared_replacements(vars, model)
+        self.delta_logp = delta_logp(model.logpt, vars, shared)
+        super().__init__(vars, shared)
+
+    def reset_tuning(self):
+        # history can't be reset via the _untuned_settings dict because it's a list
+        self._history = []
+        for attr, initial_value in self._untuned_settings.items():
+            setattr(self, attr, initial_value)
+        return
+
+    def astep(self, q0):
+        # same tuning scheme as DEMetropolis
+        if not self.steps_until_tune and self.tune:
+            if self.tune_target == 'scaling':
+                self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval))
+            elif self.tune_target == 'lambda':
+                self.lamb = tune(self.lamb, self.accepted / float(self.tune_interval))
+            # Reset counter
+            self.steps_until_tune = self.tune_interval
+            self.accepted = 0
+
+        epsilon = self.proposal_dist() * self.scaling
+
+        it = len(self._history)
+        # use the DE-MCMC-Z proposal scheme as soon as the history has 2 entries
+        if it > 1:
+            # differential evolution proposal
+            # select two other chains
+            iz1 = np.random.randint(it)
+            iz2 = np.random.randint(it)
+            while iz2 == iz1:
+                iz2 = np.random.randint(it)
+
+            z1 = self._history[iz1]
+            z2 = self._history[iz2]
+            # propose a jump
+            q = floatX(q0 + self.lamb * (z1 - z2) + epsilon)
+        else:
+            # propose just with noise in the first 2 iterations
+            q = floatX(q0 + epsilon)
+
+        accept = self.delta_logp(q, q0)
+        q_new, accepted = metrop_select(accept, q, q0)
+        self.accepted += accepted
+        self._history.append(q_new)
+
+        self.steps_until_tune -= 1
+
+        stats = {
+            'tune': self.tune,
+            'scaling': self.scaling,
+            'lambda': self.lamb,
+            'accept': np.exp(accept),
+            'accepted': accepted
+        }
+
+        return q_new, [stats]
+
+    def stop_tuning(self):
+        # remove the first x% of the tuning phase so future steps are not
+        # informed by bad unconverged tuning iterations
+        it = len(self._history)
+        n_drop = int(self.tune_drop_fraction * it)
+        self._history = self._history[n_drop:]
+        return super().stop_tuning()
+
+    @staticmethod
+    def competence(var, has_grad):
+        if var.dtype in pm.discrete_types:
+            return Competence.INCOMPATIBLE
+        return Competence.COMPATIBLE
+
+
 def sample_except(limit, excluded):
     candidate = nr.choice(limit - 1)
     if candidate >= excluded:

diff --git a/pymc3/tests/test_step.py b/pymc3/tests/test_step.py
@@ -40,6 +40,7 @@
     HamiltonianMC,
     EllipticalSlice,
     DEMetropolis,
+    DEMetropolisZ,
 )
 from pymc3.theanof import floatX
 from pymc3.distributions import Binomial, Normal, Bernoulli, Categorical, Beta, HalfNormal
@@ -778,6 +779,137 @@ def test_parallelized_chains_are_random(self):
         pass
 
 
+class TestDEMetropolisZ:
+    def test_tuning_lambda_sequential(self):
+        with Model() as pmodel:
+            Normal('n', 0, 2, shape=(3,))
+            trace = sample(
+                tune=1000,
+                draws=500,
+                step=DEMetropolisZ(tune='lambda', lamb=0.92),
+                cores=1,
+                chains=3,
+                discard_tuned_samples=False
+            )
+        for c in range(trace.nchains):
+            # check that the tuned settings changed and were reset
+            assert trace.get_sampler_stats('lambda', chains=c)[0] == 0.92
+            assert trace.get_sampler_stats('lambda', chains=c)[-1] != 0.92
+            assert set(trace.get_sampler_stats('tune', chains=c)) == {True, False}
+        pass
+
+    def test_tuning_epsilon_parallel(self):
+        with Model() as pmodel:
+            Normal('n', 0, 2, shape=(3,))
+            trace = sample(
+                tune=1000,
+                draws=500,
+                step=DEMetropolisZ(tune='scaling', scaling=0.002),
+                cores=2,
+                chains=2,
+                discard_tuned_samples=False
+            )
+        for c in range(trace.nchains):
+            # check that the tuned settings changed and were reset
+            assert trace.get_sampler_stats('scaling', chains=c)[0] == 0.002
+            assert trace.get_sampler_stats('scaling', chains=c)[-1] != 0.002
+            assert set(trace.get_sampler_stats('tune', chains=c)) == {True, False}
+        pass
+
+    def test_tuning_none(self):
+        with Model() as pmodel:
+            Normal('n', 0, 2, shape=(3,))
+            trace = sample(
+                tune=1000,
+                draws=500,
+                step=DEMetropolisZ(tune=None),
+                cores=1,
+                chains=2,
+                discard_tuned_samples=False
+            )
+        for c in range(trace.nchains):
+            # check that all tunable parameters remained constant
+            assert len(set(trace.get_sampler_stats('lambda', chains=c))) == 1
+            assert len(set(trace.get_sampler_stats('scaling', chains=c))) == 1
+            assert set(trace.get_sampler_stats('tune', chains=c)) == {True, False}
+        pass
+
+    def test_tuning_reset(self):
+        """Re-use of the step method instance with cores=1 must not leak tuning information between chains."""
+        with Model() as pmodel:
+            D = 3
+            Normal('n', 0, 2, shape=(D,))
+            trace = sample(
+                tune=1000,
+                draws=500,
+                step=DEMetropolisZ(tune='scaling', scaling=0.002),
+                cores=1,
+                chains=3,
+                discard_tuned_samples=False
+            )
+        for c in range(trace.nchains):
+            # check that the tuned settings changed and were reset
+            assert trace.get_sampler_stats('scaling', chains=c)[0] == 0.002
+            assert trace.get_sampler_stats('scaling', chains=c)[-1] != 0.002
+            # check that the variance of the first 50 iterations is much lower than the last 100
+            for d in range(D):
+                var_start = np.var(trace.get_values('n', chains=c)[:50,d])
+                var_end = np.var(trace.get_values('n', chains=c)[-100:,d])
+                assert var_start < 0.1 * var_end
+        pass
+
+    def test_tune_drop_fraction(self):
+        with Model() as pmodel:
+            Normal('n', 0, 2, shape=(3,))
+            step = DEMetropolisZ(tune_drop_fraction=0.85)
+            trace = sample(
+                tune=300,
+                draws=200,
+                step=step,
+                cores=1,
+                chains=1,
+                discard_tuned_samples=False
+            )
+            assert len(trace) == 500
+            assert len(step._history) == (300 - 300 * 0.85) + 200
+        pass
+
+    def test_competence(self):
+        with Model() as pmodel:
+            n = Normal('n', 0, 2, shape=(3,))
+            b = Binomial('b', n=2, p=0.3)
+        assert DEMetropolisZ.competence(n, has_grad=True) == 1
+        assert DEMetropolisZ.competence(n, has_grad=False) == 1
+        assert DEMetropolisZ.competence(b, has_grad=True) == 0
+        assert DEMetropolisZ.competence(b, has_grad=False) == 0
+        pass
+
+    def test_invalid_tune(self):
+        with Model() as pmodel:
+            Normal('n', 0, 2, shape=(3,))
+            with pytest.raises(ValueError):
+                DEMetropolisZ(tune='foo')
+            with pytest.raises(ValueError):
+                DEMetropolisZ(tune=True)
+            with pytest.raises(ValueError):
+                DEMetropolisZ(tune=False)
+        pass
+
+    def test_custom_proposal_dist(self):
+        with Model() as pmodel:
+            D = 3
+            Normal('n', 0, 2, shape=(D,))
+            trace = sample(
+                tune=100,
+                draws=50,
+                step=DEMetropolisZ(proposal_dist=NormalProposal),
+                cores=1,
+                chains=3,
+                discard_tuned_samples=False
+            )
+        pass
+
+
 @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
 class TestNutsCheckTrace:
     def test_multiple_samplers(self, caplog):