Skip to content

Commit

Permalink
emcee: expose and support plotting of autocorrelation function (#457)
Browse files Browse the repository at this point in the history
* implements phoebe.helpers.process_acf, process_acf_from_solution, acf, acf_ci, acf_ci_bartlett
* implement style='acf' and 'acf_lnprobabilities' to plot from emcee solutions
  • Loading branch information
kecnry authored May 30, 2021
1 parent 6d56052 commit 1d92e69
Show file tree
Hide file tree
Showing 6 changed files with 275 additions and 10 deletions.
17 changes: 16 additions & 1 deletion phoebe/dependencies/autofig/call.py
Original file line number Diff line number Diff line change
Expand Up @@ -1471,6 +1471,8 @@ def __init__(self, x=None, y=None, c=None, i=None,
cmap = kwargs.pop('colormap', cmap)
self._c = CallDimensionC(self, c, None, cunit, clabel, cmap=cmap)

self.alpha = kwargs.pop('alpha', 0.6)

self.linebreak = linebreak

if x is None:
Expand Down Expand Up @@ -1579,6 +1581,19 @@ def get_cmap(self, cmapcycler=None):

return cmap

@property
def alpha(self):
return self._alpha

@alpha.setter
def alpha(self, alpha):
if not isinstance(alpha, float):
raise TypeError("alpha must be of type float")
if alpha < 0 or alpha > 1:
raise ValueError("alpha must be between 0 and 1")

self._alpha = alpha

@property
def linebreak(self):
if self._linebreak is None:
Expand Down Expand Up @@ -1710,7 +1725,7 @@ def draw(self, ax=None, i=None,

fb_kwargs = {}
fb_kwargs['color'] = color
fb_kwargs['alpha'] = 0.6 # TODO: make this an option
fb_kwargs['alpha'] = self.alpha # defaults to 0.6
if do_colorscale:
fb_kwargs['norm'] = self.axes_c.get_norm(i=i) if self.axes_c is not None else None
fb_kwargs['cmap'] = self.axes_c.cmap if self.axes_c is not None else None
Expand Down
15 changes: 13 additions & 2 deletions phoebe/frontend/bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -906,7 +906,7 @@ def existing_value(param):
b._attach_params(_setting.settings(**existing_values_settings), context='setting')


for compute in b.filter(context='compute').computes:
for compute in b.filter(context='compute', **_skip_filter_checks).computes:
logger.info("attempting to update compute='{}' to new version requirements".format(compute))
ps_compute = b.filter(context='compute', compute=compute, **_skip_filter_checks)
compute_kind = ps_compute.kind
Expand All @@ -920,14 +920,25 @@ def existing_value(param):
if param.component is None and param.dataset is None: continue
b.set_value(qualifier=param.qualifier, compute=compute, dataset=param.dataset, component=param.component, value=param.get_value(), **_skip_filter_checks)

for solver in b.filter(context='solver').solvers:
for solver in b.filter(context='solver', **_skip_filter_checks).solvers:
logger.info("attempting to update solver='{}' to new version requirements".format(solver))
ps_solver = b.filter(context='solver', solver=solver, **_skip_filter_checks)
solver_kind = ps_solver.kind
dict_solver = _ps_dict(ps_solver)
b.remove_solver(solver, context=['solver'])
b.add_solver(solver_kind, solver=solver, check_label=False, overwrite=True, **dict_solver)

for solution in b.filter(context='solution', kind='emcee', **_skip_filter_checks).solutions:
burnin = b.get_value(qualifier='burnin', solution=solution, **_skip_filter_checks)
niters = b.get_value(qualifier='niters', solution=solution, **_skip_filter_checks)
autocorr_times = b.get_value(qualifier='autocorr_times', solution=solution, **_skip_filter_checks)
nlags_default = 3 * np.max(autocorr_times)
if nlags_default > niters-burnin:
nlags_default = niters-burnin

p = IntParameter(qualifier='nlags', value=nlags_default, limit=(1,1e6), description='number of lags to use when computing/plotting the autocorrelation function')
b._attach_params([p], context='solution', solution='round_1', kind='emcee')


if conf_interactive_checks:
logger.debug("re-enabling interactive_checks")
Expand Down
152 changes: 152 additions & 0 deletions phoebe/helpers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import numpy as np
from copy import deepcopy as _deepcopy
from scipy.special import erfinv as _erfinv
from scipy.stats import norm as _norm

from phoebe import u

Expand Down Expand Up @@ -155,6 +157,156 @@ def process_mcmc_chains(lnprobabilities, samples, burnin=0, thin=1, lnprob_cutof

return lnprobabilities, samples

def acf(ts, nlags=0, submean=False, normed=True):
"""
Compute the autocorrelation function of any input array.
See also:
* <phoebe.helpers.process_acf>
* <phoebe.helpers.process_acf_from_solution>
Arguments
------------
* `ts`
* `nlags`
* `submean`
* `normed`
Returns
-----------
* (array): autocorrelation function
"""
nlags = len(ts) if nlags==0 else nlags
c0 = np.sum((ts-ts.mean())**2)/len(ts) if normed else 1.0
if submean:
acf = np.array([np.sum((ts[k:]-ts[k:].mean())*(ts[:len(ts)-k]-ts[:len(ts)-k].mean()))/c0/len(ts) for k in range(nlags)])
else:
acf = np.array([np.sum((ts[k:]-ts.mean())*(ts[:len(ts)-k]-ts.mean()))/c0/len(ts) for k in range(nlags)])

return acf

def acf_ci(lents, p=0.05):
return np.sqrt(2)*_erfinv(1-p)/np.sqrt(lents)

def acf_ci_bartlett(lents, acf, p=0.05):
vacf = np.ones_like(acf)/lents
vacf[0] = 0
vacf[1] = 1/lents
vacf[2:] *= 1+2*np.cumsum(acf[1:-1]**2)
return _norm.ppf(1-p/2) * np.sqrt(vacf)


def process_acf(lnprobabilities, samples, nlags=0):
"""
Compute the autocorrelation function from input lnprobabilities and samples.
To apply burnin, call <phoebe.helpers.process_mcmc_chains> first or use
<phoebe.helpers.process_acf_from_solution> instead.
See also:
* <phoebe.helpers.process_acf_from_solution>
* <phoebe.helpers.acf>
* <phoebe.helpers.acf_ci_bartlett>
* <phoebe.helpers.acf_ci>
Arguments
---------------
* `lnprobabilities` (array): array of all lnprobabilites as returned by MCMC.
Should have shape: (`niters`, `nwalkers`).
* `samples` (array): array of all samples from the chains as returned by MCMC.
Should have shape: (`niters`, `nwalkers`, `nparams`).
* `nlags` (int, optional, default=0): number of lags to use. Passed
directly to <phoebe.helpers.acf>. If 0, will default to the number
of iterations in `lnprobabilities` and `samples`.
Returns
---------------
* (list of arrays, list of arrays, float): output from <phoebe.helpers.acf>
for `lnprobabilities` and each parameter in `samples`, output from
<phoebe.helpers.acf_ci_bartlett> for `lnprobabilities_proc` and each
parameter in `samples`, output from <phoebe.helpers.acf_ci>.
"""
niters, nwalkers, nparams = samples.shape

if nlags > niters:
raise ValueError("nlags must be <= number of remaining iterations (after burnin)")
if nlags == 1:
raise ValueError("nlags must be > 1")

acfs = [np.array([acf(lnprobabilities[:,iwalker], nlags=nlags) for iwalker in range(nwalkers)])]

for iparam in range(nparams):
acfs.append(np.array([acf(samples[:,iwalker,iparam], nlags=nlags) for iwalker in range(nwalkers)]))

ci_bartletts = [np.array([acf_ci_bartlett(niters, acfs[iparam][iwalker]) for iwalker in range(nwalkers)]) for iparam in range(len(acfs))]

# acfs list with length nparams+1 of arrays vs lag
# ci_bartletts with length nparams+1 of arrays vs lag
# acf_ci float
return acfs, ci_bartletts, acf_ci(niters)

def process_acf_from_solution(b, solution, nlags=None, burnin=None, lnprob_cutoff=None, adopt_parameters=None):
"""
Compute the autocorrelation function from an emcee solution.
See also:
* <phoebe.helpers.process_acf>
* <phoebe.helpers.acf>
* <phoebe.helpers.acf_ci_bartlett>
* <phoebe.helpers.acf_ci>
Arguments
--------------
* `b` (<phoebe.frontend.bundle.Bundle>): the Bundle
* `solution` (string): solution label
* `nlags` (int, optional, default=None): If not None, will override the
value in the solution.
* `burnin` (int, optional, default=None): If not None, will override
the value in the solution.
* `lnprob_cutoff` (float, optional, default=None): If not None, will override
the value in the solution.
* `adopt_parameters` (list, optional, default=None): If not None, will
override the value in the solution.
Returns
------------
* (dictionary, dictionary, float) dictionary with keys being the parameter twigs
(or 'lnprobabilities') and values being the output of <phoebe.helpers.acf>
(array with shape (nwalkers, nlags)), dictionary with keys being the
parameter twigs (or 'lnprobabilities') and values being the output
of <phoebe.helpers.acf_ci_bartlett), and float being the
confidence intervale from <phoebe.helpers.acf_ci>.
"""
solution_ps = b.get_solution(solution=solution, **_skip_filter_checks)
if solution_ps.kind != 'emcee':
raise TypeError("solution does not point to an emcee solution")

lnprobabilities, samples = process_mcmc_chains_from_solution(b,
solution,
burnin=burnin,
thin=1,
lnprob_cutoff=lnprob_cutoff,
adopt_parameters=adopt_parameters,
flatten=False)


nlags = solution_ps.get_value(qualifier='nlags', nlags=nlags, **_skip_filter_checks)

acfs, acf_bartletts, acf_ci = process_acf(lnprobabilities, samples, nlags)

if adopt_parameters is None:
adopt_parameters = solution_ps.get_value(qualifier='fitted_twigs', **_skip_filter_checks)

keys = ['lnprobabilities'] + list(adopt_parameters)
if len(keys) != len(acfs):
# TODO: if adopt_parameters has a wildcard, we may instead want to get the indices and manually retrieve the matching twigs/uniqueids
raise ValueError("adopt_parameters length does not match returned length of samples")

acf_dict = {k: acf for k,acf in zip(keys, acfs)}
acf_bartletts = {k: acf_bartlett for k,acf_bartlett in zip(keys, acf_bartletts)}
return acf_dict, acf_bartletts, acf_ci


def get_emcee_object_from_solution(b, solution, adopt_parameters=None):
"""
Expose the `EnsembleSampler` object in `emcee` from the solution <phoebe.parameters.ParameterSet>.
Expand Down
89 changes: 84 additions & 5 deletions phoebe/parameters/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@
'priors_combine', 'maxiter', 'maxfev', 'adaptive',
'xatol', 'fatol', 'bounds', 'bounds_combine', 'bounds_sigma',
'strategy', 'popsize', 'continue_from', 'init_from_combine',
'burnin_factor', 'thin_factor', 'progress_every_niters',
'burnin_factor', 'thin_factor', 'nlags_factor', 'progress_every_niters',
'nlive', 'maxcall', 'lc_geometry', 'rv_geometry', 'lc_periodogram', 'rv_periodogram', 'ebai',
'nelder_mead', 'differential_evolution', 'cg', 'powell', 'emcee', 'dynesty']

Expand All @@ -276,7 +276,7 @@
'fitted_uniqueids', 'fitted_twigs', 'fitted_values', 'fitted_units',
'adopt_parameters', 'adopt_distributions', 'distributions_convert', 'distributions_bins',
'failed_samples', 'lnprobabilities', 'acceptance_fractions',
'autocorr_time', 'burnin', 'thin', 'lnprob_cutoff',
'autocorr_time', 'burnin', 'thin', 'lnprob_cutoff', 'nlags',
'progress',
'period_factor', 'power',
'nlive', 'niter', 'ncall', 'eff', 'samples', 'samples_id', 'samples_it', 'samples_u',
Expand Down Expand Up @@ -5012,14 +5012,14 @@ def _phase_wrap(phase):
# but in order to handle the possibility of indexes in array parameters
# we need to find the matches in adopt_uniqueids which includes the index
if kwargs.get('y', None):
y = kwargs.get('y')
ys = kwargs.get('y')
if isinstance(ys, str):
ys = [ys]

# ys are currently assumed to twigs (with or without indices)
# we need a list of uniqueids, including indices when necessary
def _uniqueids_for_y(fitted_ps, twig=None):
y, index = _extract_index_from_string(y)
y, index = _extract_index_from_string(twig)
p = fitted_ps.get_parameter(twig=y, **_skip_filter_checks)
if index is None:
if p.__class__.__name__ == 'FloatArrayParameter':
Expand Down Expand Up @@ -5082,6 +5082,85 @@ def _uniqueids_for_y(fitted_ps, twig=None):

# TODO: support for c = twig/lnprobabilities
return_ += [kwargs]
elif style in ['acf', 'acf_lnprobabilities']:
nlags = ps.get_value(qualifier='nlags', nlags=kwargs.get('nlags', None), **_skip_filter_checks)

kwargs['plot_package'] = 'autofig'
if 'parameters' in kwargs.keys():
raise ValueError("cannot currently plot {} while providing parameters. Pass or set adopt_parameters to plot a subset of available parameters".format(style))
kwargs['autofig_method'] = 'plot'
kwargs.setdefault('marker', 'None')
kwargs.setdefault('linestyle', 'solid')

fitted_uniqueids = self._bundle.get_value(qualifier='fitted_uniqueids', context='solution', solution=ps.solution, **_skip_filter_checks)
# fitted_twigs = self._bundle.get_value(qualifier='fitted_twigs', context='solution', solution=ps.solution, **_skip_filter_checks)
fitted_units = self._bundle.get_value(qualifier='fitted_units', context='solution', solution=ps.solution, **_skip_filter_checks)
fitted_ps = self._bundle.filter(uniqueid=list(fitted_uniqueids), **_skip_filter_checks)

lnprobabilities_proc, samples_proc = _helpers.process_mcmc_chains(lnprobabilities, samples, burnin, 1, lnprob_cutoff, adopt_inds, flatten=False)
if nlags == 0:
nlags = samples_proc.shape[0]
acf_proc, ci_b_proc, ci_proc = _helpers.process_acf(lnprobabilities_proc, samples_proc, nlags)
# acf_proc [parameter, nwalkers, lag]
# acf_b_proc [parameters, nwalkers, lag]
# ci_proc (float)

c = kwargs.get('c', None)
kwargs_orig = _deepcopy(kwargs)

plot_uniqueids = adopt_uniqueids if style=='acf' else [0]
for plot_uniqueid in plot_uniqueids:
if style=='acf':
parameter_ind = list(adopt_uniqueids).index(plot_uniqueid)
_, index = _extract_index_from_string(plot_uniqueid)
yparam = fitted_ps.get_parameter(uniqueid=plot_uniqueid, **_skip_filter_checks)
else:
parameter_ind = -1 # to force the lnprobability in position 0

nwalkers = acf_proc[parameter_ind+1].shape[0]
for walker_ind in range(nwalkers):
kwargs = _deepcopy(kwargs_orig)

if c is None:
pass
else:
# assume named color?
kwargs['c'] = c

# this needs to be the unflattened version
acf_y = acf_proc[parameter_ind+1][walker_ind, :]
x = np.arange(len(acf_y), dtype=float)
kwargs['x'] = x
kwargs['xlabel'] = 'lag (nlags={}, burnin={}, lnprob_cutoff={})'.format(nlags, burnin, lnprob_cutoff)

kwargs['y'] = acf_y
kwargs['ylabel'] = 'normalized autocorrelation ({})'.format('lnprobabilities' if style=='acf_lnprobabilities' else _corner_twig(yparam, index=index))

ci_b = ci_b_proc[parameter_ind+1][walker_ind, :]
ci_b_fb = {'plot_package': 'autofig',
'autofig_method': 'fill_between',
'color': 'k',
'alpha': 0.5 / nwalkers,
'x': x,
'y':np.array([-1*ci_b,ci_b]).T}

return_ += [kwargs, ci_b_fb]

axhline_u = {'plot_package': 'autofig',
'autofig_method': 'plot',
'color': 'k',
'linestyle': 'dashed',
'axhline': True,
'y': ci_proc}
axhline_l = {'plot_package': 'autofig',
'autofig_method': 'plot',
'color': 'k',
'linestyle': 'dashed',
'axhline': True,
'y': -ci_proc}

return_ += [axhline_u, axhline_l]

else:
raise NotImplementedError()

Expand Down Expand Up @@ -5715,7 +5794,7 @@ def _plot_failed_samples(mplfig, failed_samples):

elif plot_package == 'autofig':
y = plot_kwargs.get('y', [])
axvline = plot_kwargs.pop('axvline', False)
axvline = plot_kwargs.pop('axvline', False) or plot_kwargs.pop('axhline', False)
if axvline or (isinstance(y, u.Quantity) and isinstance(y.value, float)) or (hasattr(y, 'value') and isinstance(y.value, float)):
pass
elif not len(y):
Expand Down
Loading

0 comments on commit 1d92e69

Please sign in to comment.