emcee: expose and support plotting of autocorrelation function (#457)

* implements phoebe.helpers.process_acf, process_acf_from_solution, acf, acf_ci, acf_ci_bartlett * implement style='acf' and 'acf_lnprobabilities' to plot from emcee solutions
phoebe-project · May 30, 2021 · 1d92e69 · 1d92e69
1 parent 6d56052
commit 1d92e69
Show file tree

Hide file tree

Showing 6 changed files with 275 additions and 10 deletions.
diff --git a/phoebe/dependencies/autofig/call.py b/phoebe/dependencies/autofig/call.py
@@ -1471,6 +1471,8 @@ def __init__(self, x=None, y=None, c=None, i=None,
         cmap = kwargs.pop('colormap', cmap)
         self._c = CallDimensionC(self, c, None, cunit, clabel, cmap=cmap)
 
+        self.alpha = kwargs.pop('alpha', 0.6)
+
         self.linebreak = linebreak
 
         if x is None:
@@ -1579,6 +1581,19 @@ def get_cmap(self, cmapcycler=None):
 
         return cmap
 
+    @property
+    def alpha(self):
+        return self._alpha
+
+    @alpha.setter
+    def alpha(self, alpha):
+        if not isinstance(alpha, float):
+            raise TypeError("alpha must be of type float")
+        if alpha < 0 or alpha > 1:
+            raise ValueError("alpha must be between 0 and 1")
+
+        self._alpha = alpha
+
     @property
     def linebreak(self):
         if self._linebreak is None:
@@ -1710,7 +1725,7 @@ def draw(self, ax=None, i=None,
 
             fb_kwargs = {}
             fb_kwargs['color'] = color
-            fb_kwargs['alpha'] = 0.6 # TODO: make this an option
+            fb_kwargs['alpha'] = self.alpha # defaults to 0.6
             if do_colorscale:
                 fb_kwargs['norm'] = self.axes_c.get_norm(i=i) if self.axes_c is not None else None
                 fb_kwargs['cmap'] = self.axes_c.cmap if self.axes_c is not None else None

diff --git a/phoebe/frontend/bundle.py b/phoebe/frontend/bundle.py
@@ -906,7 +906,7 @@ def existing_value(param):
             b._attach_params(_setting.settings(**existing_values_settings), context='setting')
 
 
-            for compute in b.filter(context='compute').computes:
+            for compute in b.filter(context='compute', **_skip_filter_checks).computes:
                 logger.info("attempting to update compute='{}' to new version requirements".format(compute))
                 ps_compute = b.filter(context='compute', compute=compute, **_skip_filter_checks)
                 compute_kind = ps_compute.kind
@@ -920,14 +920,25 @@ def existing_value(param):
                     if param.component is None and param.dataset is None: continue
                     b.set_value(qualifier=param.qualifier, compute=compute, dataset=param.dataset, component=param.component, value=param.get_value(), **_skip_filter_checks)
 
-            for solver in b.filter(context='solver').solvers:
+            for solver in b.filter(context='solver', **_skip_filter_checks).solvers:
                 logger.info("attempting to update solver='{}' to new version requirements".format(solver))
                 ps_solver = b.filter(context='solver', solver=solver, **_skip_filter_checks)
                 solver_kind = ps_solver.kind
                 dict_solver = _ps_dict(ps_solver)
                 b.remove_solver(solver, context=['solver'])
                 b.add_solver(solver_kind, solver=solver, check_label=False, overwrite=True, **dict_solver)
 
+            for solution in b.filter(context='solution', kind='emcee', **_skip_filter_checks).solutions:
+                burnin = b.get_value(qualifier='burnin', solution=solution, **_skip_filter_checks)
+                niters = b.get_value(qualifier='niters', solution=solution, **_skip_filter_checks)
+                autocorr_times = b.get_value(qualifier='autocorr_times', solution=solution, **_skip_filter_checks)
+                nlags_default = 3 * np.max(autocorr_times)
+                if nlags_default > niters-burnin:
+                    nlags_default = niters-burnin
+
+                p = IntParameter(qualifier='nlags', value=nlags_default, limit=(1,1e6), description='number of lags to use when computing/plotting the autocorrelation function')
+                b._attach_params([p], context='solution', solution='round_1', kind='emcee')
+
 
         if conf_interactive_checks:
             logger.debug("re-enabling interactive_checks")

diff --git a/phoebe/helpers/__init__.py b/phoebe/helpers/__init__.py
@@ -1,5 +1,7 @@
 import numpy as np
 from copy import deepcopy as _deepcopy
+from scipy.special import erfinv as _erfinv
+from scipy.stats import norm as _norm
 
 from phoebe import u
 
@@ -155,6 +157,156 @@ def process_mcmc_chains(lnprobabilities, samples, burnin=0, thin=1, lnprob_cutof
 
     return lnprobabilities, samples
 
+def acf(ts, nlags=0, submean=False, normed=True):
+    """
+    Compute the autocorrelation function of any input array.
+
+    See also:
+    * <phoebe.helpers.process_acf>
+    * <phoebe.helpers.process_acf_from_solution>
+
+    Arguments
+    ------------
+    * `ts`
+    * `nlags`
+    * `submean`
+    * `normed`
+
+    Returns
+    -----------
+    * (array): autocorrelation function
+    """
+    nlags = len(ts) if nlags==0 else nlags
+    c0 = np.sum((ts-ts.mean())**2)/len(ts) if normed else 1.0
+    if submean:
+        acf = np.array([np.sum((ts[k:]-ts[k:].mean())*(ts[:len(ts)-k]-ts[:len(ts)-k].mean()))/c0/len(ts) for k in range(nlags)])
+    else:
+        acf = np.array([np.sum((ts[k:]-ts.mean())*(ts[:len(ts)-k]-ts.mean()))/c0/len(ts) for k in range(nlags)])
+
+    return acf
+
+def acf_ci(lents, p=0.05):
+    return np.sqrt(2)*_erfinv(1-p)/np.sqrt(lents)
+
+def acf_ci_bartlett(lents, acf, p=0.05):
+    vacf = np.ones_like(acf)/lents
+    vacf[0] = 0
+    vacf[1] = 1/lents
+    vacf[2:] *= 1+2*np.cumsum(acf[1:-1]**2)
+    return _norm.ppf(1-p/2) * np.sqrt(vacf)
+
+
+def process_acf(lnprobabilities, samples, nlags=0):
+    """
+    Compute the autocorrelation function from input lnprobabilities and samples.
+
+    To apply burnin, call <phoebe.helpers.process_mcmc_chains> first or use
+    <phoebe.helpers.process_acf_from_solution> instead.
+
+    See also:
+    * <phoebe.helpers.process_acf_from_solution>
+    * <phoebe.helpers.acf>
+    * <phoebe.helpers.acf_ci_bartlett>
+    * <phoebe.helpers.acf_ci>
+
+    Arguments
+    ---------------
+    * `lnprobabilities` (array): array of all lnprobabilites as returned by MCMC.
+        Should have shape: (`niters`, `nwalkers`).
+    * `samples` (array): array of all samples from the chains as returned by MCMC.
+        Should have shape: (`niters`, `nwalkers`, `nparams`).
+    * `nlags` (int, optional, default=0): number of lags to use.  Passed
+        directly to <phoebe.helpers.acf>.  If 0, will default to the number
+        of iterations in `lnprobabilities` and `samples`.
+
+    Returns
+    ---------------
+    * (list of arrays, list of arrays, float): output from <phoebe.helpers.acf>
+        for `lnprobabilities` and each parameter in `samples`, output from
+        <phoebe.helpers.acf_ci_bartlett> for `lnprobabilities_proc` and each
+        parameter in `samples`, output from <phoebe.helpers.acf_ci>.
+    """
+    niters, nwalkers, nparams = samples.shape
+
+    if nlags > niters:
+        raise ValueError("nlags must be <= number of remaining iterations (after burnin)")
+    if nlags == 1:
+        raise ValueError("nlags must be > 1")
+
+    acfs = [np.array([acf(lnprobabilities[:,iwalker], nlags=nlags) for iwalker in range(nwalkers)])]
+
+    for iparam in range(nparams):
+        acfs.append(np.array([acf(samples[:,iwalker,iparam], nlags=nlags) for iwalker in range(nwalkers)]))
+
+    ci_bartletts = [np.array([acf_ci_bartlett(niters, acfs[iparam][iwalker]) for iwalker in range(nwalkers)]) for iparam in range(len(acfs))]
+
+    # acfs list with length nparams+1 of arrays vs lag
+    # ci_bartletts with length nparams+1 of arrays vs lag
+    # acf_ci float
+    return acfs, ci_bartletts, acf_ci(niters)
+
+def process_acf_from_solution(b, solution, nlags=None, burnin=None, lnprob_cutoff=None, adopt_parameters=None):
+    """
+    Compute the autocorrelation function from an emcee solution.
+
+    See also:
+    * <phoebe.helpers.process_acf>
+    * <phoebe.helpers.acf>
+    * <phoebe.helpers.acf_ci_bartlett>
+    * <phoebe.helpers.acf_ci>
+
+    Arguments
+    --------------
+    * `b` (<phoebe.frontend.bundle.Bundle>): the Bundle
+    * `solution` (string): solution label
+    * `nlags` (int, optional, default=None): If not None, will override the
+        value in the solution.
+    * `burnin` (int, optional, default=None): If not None, will override
+        the value in the solution.
+    * `lnprob_cutoff` (float, optional, default=None): If not None, will override
+        the value in the solution.
+    * `adopt_parameters` (list, optional, default=None): If not None, will
+        override the value in the solution.
+
+    Returns
+    ------------
+    * (dictionary, dictionary, float) dictionary with keys being the parameter twigs
+        (or 'lnprobabilities') and values being the output of <phoebe.helpers.acf>
+        (array with shape (nwalkers, nlags)), dictionary with keys being the
+        parameter twigs (or 'lnprobabilities') and values being the output
+        of <phoebe.helpers.acf_ci_bartlett), and float being the
+        confidence intervale from <phoebe.helpers.acf_ci>.
+    """
+    solution_ps = b.get_solution(solution=solution, **_skip_filter_checks)
+    if solution_ps.kind != 'emcee':
+        raise TypeError("solution does not point to an emcee solution")
+
+    lnprobabilities, samples = process_mcmc_chains_from_solution(b,
+                                                                 solution,
+                                                                 burnin=burnin,
+                                                                 thin=1,
+                                                                 lnprob_cutoff=lnprob_cutoff,
+                                                                 adopt_parameters=adopt_parameters,
+                                                                 flatten=False)
+
+
+    nlags = solution_ps.get_value(qualifier='nlags', nlags=nlags, **_skip_filter_checks)
+
+    acfs, acf_bartletts, acf_ci = process_acf(lnprobabilities, samples, nlags)
+
+    if adopt_parameters is None:
+        adopt_parameters = solution_ps.get_value(qualifier='fitted_twigs', **_skip_filter_checks)
+
+    keys = ['lnprobabilities'] + list(adopt_parameters)
+    if len(keys) != len(acfs):
+        # TODO: if adopt_parameters has a wildcard, we may instead want to get the indices and manually retrieve the matching twigs/uniqueids
+        raise ValueError("adopt_parameters length does not match returned length of samples")
+
+    acf_dict = {k: acf for k,acf in zip(keys, acfs)}
+    acf_bartletts = {k: acf_bartlett for k,acf_bartlett in zip(keys, acf_bartletts)}
+    return acf_dict, acf_bartletts, acf_ci
+
+
 def get_emcee_object_from_solution(b, solution, adopt_parameters=None):
     """
     Expose the `EnsembleSampler` object in `emcee` from the solution <phoebe.parameters.ParameterSet>.

diff --git a/phoebe/parameters/parameters.py b/phoebe/parameters/parameters.py
@@ -264,7 +264,7 @@
                       'priors_combine', 'maxiter', 'maxfev', 'adaptive',
                       'xatol', 'fatol', 'bounds', 'bounds_combine', 'bounds_sigma',
                       'strategy', 'popsize', 'continue_from', 'init_from_combine',
-                      'burnin_factor', 'thin_factor', 'progress_every_niters',
+                      'burnin_factor', 'thin_factor', 'nlags_factor', 'progress_every_niters',
                       'nlive', 'maxcall', 'lc_geometry', 'rv_geometry', 'lc_periodogram', 'rv_periodogram', 'ebai',
                       'nelder_mead', 'differential_evolution', 'cg', 'powell', 'emcee', 'dynesty']
 
@@ -276,7 +276,7 @@
                       'fitted_uniqueids', 'fitted_twigs', 'fitted_values', 'fitted_units',
                       'adopt_parameters', 'adopt_distributions', 'distributions_convert', 'distributions_bins',
                       'failed_samples', 'lnprobabilities', 'acceptance_fractions',
-                      'autocorr_time', 'burnin', 'thin', 'lnprob_cutoff',
+                      'autocorr_time', 'burnin', 'thin', 'lnprob_cutoff', 'nlags',
                       'progress',
                       'period_factor', 'power',
                       'nlive', 'niter', 'ncall', 'eff', 'samples', 'samples_id', 'samples_it', 'samples_u',
@@ -5012,14 +5012,14 @@ def _phase_wrap(phase):
                     # but in order to handle the possibility of indexes in array parameters
                     # we need to find the matches in adopt_uniqueids which includes the index
                     if kwargs.get('y', None):
-                        y = kwargs.get('y')
+                        ys = kwargs.get('y')
                         if isinstance(ys, str):
                             ys = [ys]
 
                         # ys are currently assumed to twigs (with or without indices)
                         # we need a list of uniqueids, including indices when necessary
                         def _uniqueids_for_y(fitted_ps, twig=None):
-                            y, index = _extract_index_from_string(y)
+                            y, index = _extract_index_from_string(twig)
                             p = fitted_ps.get_parameter(twig=y, **_skip_filter_checks)
                             if index is None:
                                 if p.__class__.__name__ == 'FloatArrayParameter':
@@ -5082,6 +5082,85 @@ def _uniqueids_for_y(fitted_ps, twig=None):
 
                             # TODO: support for c = twig/lnprobabilities
                             return_ += [kwargs]
+                elif style in ['acf', 'acf_lnprobabilities']:
+                    nlags = ps.get_value(qualifier='nlags', nlags=kwargs.get('nlags', None), **_skip_filter_checks)
+
+                    kwargs['plot_package'] = 'autofig'
+                    if 'parameters' in kwargs.keys():
+                        raise ValueError("cannot currently plot {} while providing parameters.  Pass or set adopt_parameters to plot a subset of available parameters".format(style))
+                    kwargs['autofig_method'] = 'plot'
+                    kwargs.setdefault('marker', 'None')
+                    kwargs.setdefault('linestyle', 'solid')
+
+                    fitted_uniqueids = self._bundle.get_value(qualifier='fitted_uniqueids', context='solution', solution=ps.solution, **_skip_filter_checks)
+                    # fitted_twigs = self._bundle.get_value(qualifier='fitted_twigs', context='solution', solution=ps.solution, **_skip_filter_checks)
+                    fitted_units = self._bundle.get_value(qualifier='fitted_units', context='solution', solution=ps.solution, **_skip_filter_checks)
+                    fitted_ps = self._bundle.filter(uniqueid=list(fitted_uniqueids), **_skip_filter_checks)
+
+                    lnprobabilities_proc, samples_proc = _helpers.process_mcmc_chains(lnprobabilities, samples, burnin, 1, lnprob_cutoff, adopt_inds, flatten=False)
+                    if nlags == 0:
+                        nlags = samples_proc.shape[0]
+                    acf_proc, ci_b_proc, ci_proc = _helpers.process_acf(lnprobabilities_proc, samples_proc, nlags)
+                    # acf_proc [parameter, nwalkers, lag]
+                    # acf_b_proc [parameters, nwalkers, lag]
+                    # ci_proc (float)
+
+                    c = kwargs.get('c', None)
+                    kwargs_orig = _deepcopy(kwargs)
+
+                    plot_uniqueids = adopt_uniqueids if style=='acf' else [0]
+                    for plot_uniqueid in plot_uniqueids:
+                        if style=='acf':
+                            parameter_ind = list(adopt_uniqueids).index(plot_uniqueid)
+                            _, index = _extract_index_from_string(plot_uniqueid)
+                            yparam = fitted_ps.get_parameter(uniqueid=plot_uniqueid, **_skip_filter_checks)
+                        else:
+                            parameter_ind = -1  # to force the lnprobability in position 0
+
+                        nwalkers = acf_proc[parameter_ind+1].shape[0]
+                        for walker_ind in range(nwalkers):
+                            kwargs = _deepcopy(kwargs_orig)
+
+                            if c is None:
+                                pass
+                            else:
+                                # assume named color?
+                                kwargs['c'] = c
+
+                            # this needs to be the unflattened version
+                            acf_y = acf_proc[parameter_ind+1][walker_ind, :]
+                            x = np.arange(len(acf_y), dtype=float)
+                            kwargs['x'] = x
+                            kwargs['xlabel'] = 'lag (nlags={}, burnin={}, lnprob_cutoff={})'.format(nlags, burnin, lnprob_cutoff)
+
+                            kwargs['y'] = acf_y
+                            kwargs['ylabel'] = 'normalized autocorrelation ({})'.format('lnprobabilities' if style=='acf_lnprobabilities' else _corner_twig(yparam, index=index))
+
+                            ci_b = ci_b_proc[parameter_ind+1][walker_ind, :]
+                            ci_b_fb = {'plot_package': 'autofig',
+                                       'autofig_method': 'fill_between',
+                                       'color': 'k',
+                                       'alpha': 0.5 / nwalkers,
+                                       'x': x,
+                                       'y':np.array([-1*ci_b,ci_b]).T}
+
+                            return_ += [kwargs, ci_b_fb]
+
+                        axhline_u =  {'plot_package': 'autofig',
+                                      'autofig_method': 'plot',
+                                      'color': 'k',
+                                      'linestyle': 'dashed',
+                                      'axhline': True,
+                                      'y': ci_proc}
+                        axhline_l =  {'plot_package': 'autofig',
+                                      'autofig_method': 'plot',
+                                      'color': 'k',
+                                      'linestyle': 'dashed',
+                                      'axhline': True,
+                                      'y': -ci_proc}
+
+                        return_ += [axhline_u, axhline_l]
+
                 else:
                     raise NotImplementedError()
 
@@ -5715,7 +5794,7 @@ def _plot_failed_samples(mplfig, failed_samples):
 
                 elif plot_package == 'autofig':
                     y = plot_kwargs.get('y', [])
-                    axvline = plot_kwargs.pop('axvline', False)
+                    axvline = plot_kwargs.pop('axvline', False) or plot_kwargs.pop('axhline', False)
                     if axvline or (isinstance(y, u.Quantity) and isinstance(y.value, float)) or (hasattr(y, 'value') and isinstance(y.value, float)):
                         pass
                     elif not len(y):