From 1d92e69b892946f05a6e372c6a672160c55fa626 Mon Sep 17 00:00:00 2001 From: Kyle Conroy Date: Sat, 29 May 2021 21:11:42 -0400 Subject: [PATCH] emcee: expose and support plotting of autocorrelation function (#457) * implements phoebe.helpers.process_acf, process_acf_from_solution, acf, acf_ci, acf_ci_bartlett * implement style='acf' and 'acf_lnprobabilities' to plot from emcee solutions --- phoebe/dependencies/autofig/call.py | 17 ++- phoebe/frontend/bundle.py | 15 ++- phoebe/helpers/__init__.py | 152 ++++++++++++++++++++++++ phoebe/parameters/parameters.py | 89 +++++++++++++- phoebe/parameters/solver/sampler.py | 5 +- phoebe/solverbackends/solverbackends.py | 7 ++ 6 files changed, 275 insertions(+), 10 deletions(-) diff --git a/phoebe/dependencies/autofig/call.py b/phoebe/dependencies/autofig/call.py index 50c971b9f..da48db321 100644 --- a/phoebe/dependencies/autofig/call.py +++ b/phoebe/dependencies/autofig/call.py @@ -1471,6 +1471,8 @@ def __init__(self, x=None, y=None, c=None, i=None, cmap = kwargs.pop('colormap', cmap) self._c = CallDimensionC(self, c, None, cunit, clabel, cmap=cmap) + self.alpha = kwargs.pop('alpha', 0.6) + self.linebreak = linebreak if x is None: @@ -1579,6 +1581,19 @@ def get_cmap(self, cmapcycler=None): return cmap + @property + def alpha(self): + return self._alpha + + @alpha.setter + def alpha(self, alpha): + if not isinstance(alpha, float): + raise TypeError("alpha must be of type float") + if alpha < 0 or alpha > 1: + raise ValueError("alpha must be between 0 and 1") + + self._alpha = alpha + @property def linebreak(self): if self._linebreak is None: @@ -1710,7 +1725,7 @@ def draw(self, ax=None, i=None, fb_kwargs = {} fb_kwargs['color'] = color - fb_kwargs['alpha'] = 0.6 # TODO: make this an option + fb_kwargs['alpha'] = self.alpha # defaults to 0.6 if do_colorscale: fb_kwargs['norm'] = self.axes_c.get_norm(i=i) if self.axes_c is not None else None fb_kwargs['cmap'] = self.axes_c.cmap if self.axes_c is not None else None diff --git a/phoebe/frontend/bundle.py b/phoebe/frontend/bundle.py index a10b482c5..dd8df9ced 100644 --- a/phoebe/frontend/bundle.py +++ b/phoebe/frontend/bundle.py @@ -906,7 +906,7 @@ def existing_value(param): b._attach_params(_setting.settings(**existing_values_settings), context='setting') - for compute in b.filter(context='compute').computes: + for compute in b.filter(context='compute', **_skip_filter_checks).computes: logger.info("attempting to update compute='{}' to new version requirements".format(compute)) ps_compute = b.filter(context='compute', compute=compute, **_skip_filter_checks) compute_kind = ps_compute.kind @@ -920,7 +920,7 @@ def existing_value(param): if param.component is None and param.dataset is None: continue b.set_value(qualifier=param.qualifier, compute=compute, dataset=param.dataset, component=param.component, value=param.get_value(), **_skip_filter_checks) - for solver in b.filter(context='solver').solvers: + for solver in b.filter(context='solver', **_skip_filter_checks).solvers: logger.info("attempting to update solver='{}' to new version requirements".format(solver)) ps_solver = b.filter(context='solver', solver=solver, **_skip_filter_checks) solver_kind = ps_solver.kind @@ -928,6 +928,17 @@ def existing_value(param): b.remove_solver(solver, context=['solver']) b.add_solver(solver_kind, solver=solver, check_label=False, overwrite=True, **dict_solver) + for solution in b.filter(context='solution', kind='emcee', **_skip_filter_checks).solutions: + burnin = b.get_value(qualifier='burnin', solution=solution, **_skip_filter_checks) + niters = b.get_value(qualifier='niters', solution=solution, **_skip_filter_checks) + autocorr_times = b.get_value(qualifier='autocorr_times', solution=solution, **_skip_filter_checks) + nlags_default = 3 * np.max(autocorr_times) + if nlags_default > niters-burnin: + nlags_default = niters-burnin + + p = IntParameter(qualifier='nlags', value=nlags_default, limit=(1,1e6), description='number of lags to use when computing/plotting the autocorrelation function') + b._attach_params([p], context='solution', solution='round_1', kind='emcee') + if conf_interactive_checks: logger.debug("re-enabling interactive_checks") diff --git a/phoebe/helpers/__init__.py b/phoebe/helpers/__init__.py index cf8de6201..0094e1aa7 100644 --- a/phoebe/helpers/__init__.py +++ b/phoebe/helpers/__init__.py @@ -1,5 +1,7 @@ import numpy as np from copy import deepcopy as _deepcopy +from scipy.special import erfinv as _erfinv +from scipy.stats import norm as _norm from phoebe import u @@ -155,6 +157,156 @@ def process_mcmc_chains(lnprobabilities, samples, burnin=0, thin=1, lnprob_cutof return lnprobabilities, samples +def acf(ts, nlags=0, submean=False, normed=True): + """ + Compute the autocorrelation function of any input array. + + See also: + * + * + + Arguments + ------------ + * `ts` + * `nlags` + * `submean` + * `normed` + + Returns + ----------- + * (array): autocorrelation function + """ + nlags = len(ts) if nlags==0 else nlags + c0 = np.sum((ts-ts.mean())**2)/len(ts) if normed else 1.0 + if submean: + acf = np.array([np.sum((ts[k:]-ts[k:].mean())*(ts[:len(ts)-k]-ts[:len(ts)-k].mean()))/c0/len(ts) for k in range(nlags)]) + else: + acf = np.array([np.sum((ts[k:]-ts.mean())*(ts[:len(ts)-k]-ts.mean()))/c0/len(ts) for k in range(nlags)]) + + return acf + +def acf_ci(lents, p=0.05): + return np.sqrt(2)*_erfinv(1-p)/np.sqrt(lents) + +def acf_ci_bartlett(lents, acf, p=0.05): + vacf = np.ones_like(acf)/lents + vacf[0] = 0 + vacf[1] = 1/lents + vacf[2:] *= 1+2*np.cumsum(acf[1:-1]**2) + return _norm.ppf(1-p/2) * np.sqrt(vacf) + + +def process_acf(lnprobabilities, samples, nlags=0): + """ + Compute the autocorrelation function from input lnprobabilities and samples. + + To apply burnin, call first or use + instead. + + See also: + * + * + * + * + + Arguments + --------------- + * `lnprobabilities` (array): array of all lnprobabilites as returned by MCMC. + Should have shape: (`niters`, `nwalkers`). + * `samples` (array): array of all samples from the chains as returned by MCMC. + Should have shape: (`niters`, `nwalkers`, `nparams`). + * `nlags` (int, optional, default=0): number of lags to use. Passed + directly to . If 0, will default to the number + of iterations in `lnprobabilities` and `samples`. + + Returns + --------------- + * (list of arrays, list of arrays, float): output from + for `lnprobabilities` and each parameter in `samples`, output from + for `lnprobabilities_proc` and each + parameter in `samples`, output from . + """ + niters, nwalkers, nparams = samples.shape + + if nlags > niters: + raise ValueError("nlags must be <= number of remaining iterations (after burnin)") + if nlags == 1: + raise ValueError("nlags must be > 1") + + acfs = [np.array([acf(lnprobabilities[:,iwalker], nlags=nlags) for iwalker in range(nwalkers)])] + + for iparam in range(nparams): + acfs.append(np.array([acf(samples[:,iwalker,iparam], nlags=nlags) for iwalker in range(nwalkers)])) + + ci_bartletts = [np.array([acf_ci_bartlett(niters, acfs[iparam][iwalker]) for iwalker in range(nwalkers)]) for iparam in range(len(acfs))] + + # acfs list with length nparams+1 of arrays vs lag + # ci_bartletts with length nparams+1 of arrays vs lag + # acf_ci float + return acfs, ci_bartletts, acf_ci(niters) + +def process_acf_from_solution(b, solution, nlags=None, burnin=None, lnprob_cutoff=None, adopt_parameters=None): + """ + Compute the autocorrelation function from an emcee solution. + + See also: + * + * + * + * + + Arguments + -------------- + * `b` (): the Bundle + * `solution` (string): solution label + * `nlags` (int, optional, default=None): If not None, will override the + value in the solution. + * `burnin` (int, optional, default=None): If not None, will override + the value in the solution. + * `lnprob_cutoff` (float, optional, default=None): If not None, will override + the value in the solution. + * `adopt_parameters` (list, optional, default=None): If not None, will + override the value in the solution. + + Returns + ------------ + * (dictionary, dictionary, float) dictionary with keys being the parameter twigs + (or 'lnprobabilities') and values being the output of + (array with shape (nwalkers, nlags)), dictionary with keys being the + parameter twigs (or 'lnprobabilities') and values being the output + of . + """ + solution_ps = b.get_solution(solution=solution, **_skip_filter_checks) + if solution_ps.kind != 'emcee': + raise TypeError("solution does not point to an emcee solution") + + lnprobabilities, samples = process_mcmc_chains_from_solution(b, + solution, + burnin=burnin, + thin=1, + lnprob_cutoff=lnprob_cutoff, + adopt_parameters=adopt_parameters, + flatten=False) + + + nlags = solution_ps.get_value(qualifier='nlags', nlags=nlags, **_skip_filter_checks) + + acfs, acf_bartletts, acf_ci = process_acf(lnprobabilities, samples, nlags) + + if adopt_parameters is None: + adopt_parameters = solution_ps.get_value(qualifier='fitted_twigs', **_skip_filter_checks) + + keys = ['lnprobabilities'] + list(adopt_parameters) + if len(keys) != len(acfs): + # TODO: if adopt_parameters has a wildcard, we may instead want to get the indices and manually retrieve the matching twigs/uniqueids + raise ValueError("adopt_parameters length does not match returned length of samples") + + acf_dict = {k: acf for k,acf in zip(keys, acfs)} + acf_bartletts = {k: acf_bartlett for k,acf_bartlett in zip(keys, acf_bartletts)} + return acf_dict, acf_bartletts, acf_ci + + def get_emcee_object_from_solution(b, solution, adopt_parameters=None): """ Expose the `EnsembleSampler` object in `emcee` from the solution . diff --git a/phoebe/parameters/parameters.py b/phoebe/parameters/parameters.py index 4754cf699..483322fa6 100644 --- a/phoebe/parameters/parameters.py +++ b/phoebe/parameters/parameters.py @@ -264,7 +264,7 @@ 'priors_combine', 'maxiter', 'maxfev', 'adaptive', 'xatol', 'fatol', 'bounds', 'bounds_combine', 'bounds_sigma', 'strategy', 'popsize', 'continue_from', 'init_from_combine', - 'burnin_factor', 'thin_factor', 'progress_every_niters', + 'burnin_factor', 'thin_factor', 'nlags_factor', 'progress_every_niters', 'nlive', 'maxcall', 'lc_geometry', 'rv_geometry', 'lc_periodogram', 'rv_periodogram', 'ebai', 'nelder_mead', 'differential_evolution', 'cg', 'powell', 'emcee', 'dynesty'] @@ -276,7 +276,7 @@ 'fitted_uniqueids', 'fitted_twigs', 'fitted_values', 'fitted_units', 'adopt_parameters', 'adopt_distributions', 'distributions_convert', 'distributions_bins', 'failed_samples', 'lnprobabilities', 'acceptance_fractions', - 'autocorr_time', 'burnin', 'thin', 'lnprob_cutoff', + 'autocorr_time', 'burnin', 'thin', 'lnprob_cutoff', 'nlags', 'progress', 'period_factor', 'power', 'nlive', 'niter', 'ncall', 'eff', 'samples', 'samples_id', 'samples_it', 'samples_u', @@ -5012,14 +5012,14 @@ def _phase_wrap(phase): # but in order to handle the possibility of indexes in array parameters # we need to find the matches in adopt_uniqueids which includes the index if kwargs.get('y', None): - y = kwargs.get('y') + ys = kwargs.get('y') if isinstance(ys, str): ys = [ys] # ys are currently assumed to twigs (with or without indices) # we need a list of uniqueids, including indices when necessary def _uniqueids_for_y(fitted_ps, twig=None): - y, index = _extract_index_from_string(y) + y, index = _extract_index_from_string(twig) p = fitted_ps.get_parameter(twig=y, **_skip_filter_checks) if index is None: if p.__class__.__name__ == 'FloatArrayParameter': @@ -5082,6 +5082,85 @@ def _uniqueids_for_y(fitted_ps, twig=None): # TODO: support for c = twig/lnprobabilities return_ += [kwargs] + elif style in ['acf', 'acf_lnprobabilities']: + nlags = ps.get_value(qualifier='nlags', nlags=kwargs.get('nlags', None), **_skip_filter_checks) + + kwargs['plot_package'] = 'autofig' + if 'parameters' in kwargs.keys(): + raise ValueError("cannot currently plot {} while providing parameters. Pass or set adopt_parameters to plot a subset of available parameters".format(style)) + kwargs['autofig_method'] = 'plot' + kwargs.setdefault('marker', 'None') + kwargs.setdefault('linestyle', 'solid') + + fitted_uniqueids = self._bundle.get_value(qualifier='fitted_uniqueids', context='solution', solution=ps.solution, **_skip_filter_checks) + # fitted_twigs = self._bundle.get_value(qualifier='fitted_twigs', context='solution', solution=ps.solution, **_skip_filter_checks) + fitted_units = self._bundle.get_value(qualifier='fitted_units', context='solution', solution=ps.solution, **_skip_filter_checks) + fitted_ps = self._bundle.filter(uniqueid=list(fitted_uniqueids), **_skip_filter_checks) + + lnprobabilities_proc, samples_proc = _helpers.process_mcmc_chains(lnprobabilities, samples, burnin, 1, lnprob_cutoff, adopt_inds, flatten=False) + if nlags == 0: + nlags = samples_proc.shape[0] + acf_proc, ci_b_proc, ci_proc = _helpers.process_acf(lnprobabilities_proc, samples_proc, nlags) + # acf_proc [parameter, nwalkers, lag] + # acf_b_proc [parameters, nwalkers, lag] + # ci_proc (float) + + c = kwargs.get('c', None) + kwargs_orig = _deepcopy(kwargs) + + plot_uniqueids = adopt_uniqueids if style=='acf' else [0] + for plot_uniqueid in plot_uniqueids: + if style=='acf': + parameter_ind = list(adopt_uniqueids).index(plot_uniqueid) + _, index = _extract_index_from_string(plot_uniqueid) + yparam = fitted_ps.get_parameter(uniqueid=plot_uniqueid, **_skip_filter_checks) + else: + parameter_ind = -1 # to force the lnprobability in position 0 + + nwalkers = acf_proc[parameter_ind+1].shape[0] + for walker_ind in range(nwalkers): + kwargs = _deepcopy(kwargs_orig) + + if c is None: + pass + else: + # assume named color? + kwargs['c'] = c + + # this needs to be the unflattened version + acf_y = acf_proc[parameter_ind+1][walker_ind, :] + x = np.arange(len(acf_y), dtype=float) + kwargs['x'] = x + kwargs['xlabel'] = 'lag (nlags={}, burnin={}, lnprob_cutoff={})'.format(nlags, burnin, lnprob_cutoff) + + kwargs['y'] = acf_y + kwargs['ylabel'] = 'normalized autocorrelation ({})'.format('lnprobabilities' if style=='acf_lnprobabilities' else _corner_twig(yparam, index=index)) + + ci_b = ci_b_proc[parameter_ind+1][walker_ind, :] + ci_b_fb = {'plot_package': 'autofig', + 'autofig_method': 'fill_between', + 'color': 'k', + 'alpha': 0.5 / nwalkers, + 'x': x, + 'y':np.array([-1*ci_b,ci_b]).T} + + return_ += [kwargs, ci_b_fb] + + axhline_u = {'plot_package': 'autofig', + 'autofig_method': 'plot', + 'color': 'k', + 'linestyle': 'dashed', + 'axhline': True, + 'y': ci_proc} + axhline_l = {'plot_package': 'autofig', + 'autofig_method': 'plot', + 'color': 'k', + 'linestyle': 'dashed', + 'axhline': True, + 'y': -ci_proc} + + return_ += [axhline_u, axhline_l] + else: raise NotImplementedError() @@ -5715,7 +5794,7 @@ def _plot_failed_samples(mplfig, failed_samples): elif plot_package == 'autofig': y = plot_kwargs.get('y', []) - axvline = plot_kwargs.pop('axvline', False) + axvline = plot_kwargs.pop('axvline', False) or plot_kwargs.pop('axhline', False) if axvline or (isinstance(y, u.Quantity) and isinstance(y.value, float)) or (hasattr(y, 'value') and isinstance(y.value, float)): pass elif not len(y): diff --git a/phoebe/parameters/solver/sampler.py b/phoebe/parameters/solver/sampler.py index 87173db2a..91079f4f7 100644 --- a/phoebe/parameters/solver/sampler.py +++ b/phoebe/parameters/solver/sampler.py @@ -143,8 +143,9 @@ def emcee(**kwargs): params += [IntParameter(visible_if='continue_from:None', qualifier='nwalkers', value=kwargs.get('nwalkers', 16), limits=(1,1e5), description='Number of walkers')] params += [IntParameter(qualifier='niters', value=kwargs.get('niters', 100), limits=(1,1e12), description='Number of iterations')] - params += [FloatParameter(qualifier='burnin_factor', value=kwargs.get('burnin_factor', 2), default_unit=u.dimensionless_unscaled, limits=(1, 1000), description='factor of max(autocorr_times) to apply for burnin (burnin not applied until adopting the solution)')] - params += [FloatParameter(qualifier='thin_factor', value=kwargs.get('thin_factor', 0.5), default_unit=u.dimensionless_unscaled, limits=(0.001, 1000), description='factor of min(autocorr_times) to apply for thinning (thinning not applied until adopting the solution)')] + params += [FloatParameter(qualifier='burnin_factor', value=kwargs.get('burnin_factor', 2), default_unit=u.dimensionless_unscaled, limits=(1, 1000), description='factor of max(autocorr_times) to apply for the default solution burnin (burnin not applied until adopting the solution)')] + params += [FloatParameter(qualifier='thin_factor', value=kwargs.get('thin_factor', 0.5), default_unit=u.dimensionless_unscaled, limits=(0.001, 1000), description='factor of min(autocorr_times) to apply for the default solution thinning (thinning not applied until adopting the solution)')] + params += [FloatParameter(qualifier='nlags_factor', value=kwargs.get('nlags_factor', 3), default_unit=u.dimensionless_unscaled, limits=(0, 1000), description='factor of max(autocorr_times) to apply for the default solution nlags. nlags only applied when accessing/plotting the autocorrelation function and will not default to larger than niters-burnin. If 0, nlags will default to 0 which will in turn default to nlags=niters-burnin.')] params += [IntParameter(qualifier='progress_every_niters', value=kwargs.get('progress_every_niters', 0), limits=(0,1e6), description='save the progress of the solution every n iterations. The solution can only be recovered from an early termination by loading the bundle from a saved file and then calling b.import_solution(filename). The filename of the saved file will default to solution.ps.progress within run_solver, or the output filename provided to export_solver suffixed with .progress. If using detach=True within run_solver, attach job will load the progress and allow re-attaching until the job is completed. If 0 will not save and will only return after completion.')] diff --git a/phoebe/solverbackends/solverbackends.py b/phoebe/solverbackends/solverbackends.py index 7fcae6495..d9d34d59c 100644 --- a/phoebe/solverbackends/solverbackends.py +++ b/phoebe/solverbackends/solverbackends.py @@ -1179,6 +1179,7 @@ def _get_packet_and_solution(self, b, solver, **kwargs): solution_params += [_parameters.ArrayParameter(qualifier='autocorr_times', value=[], readonly=True, description='measured autocorrelation time with shape (len(fitted_twigs)) before applying burnin/thin. To access with a custom burnin/thin, see phoebe.helpers.get_emcee_object_from_solution')] solution_params += [_parameters.IntParameter(qualifier='burnin', value=0, limits=(0,1e6), description='burnin to use when adopting/plotting the solution')] solution_params += [_parameters.IntParameter(qualifier='thin', value=1, limits=(1,1e6), description='thin to use when adopting/plotting the solution')] + solution_params += [_parameters.IntParameter(qualifier='nlags', value=1, limit=(1,1e6), description='number of lags to use when computing/plotting the autocorrelation function. If 0, will default to niters-burnin.')] solution_params += [_parameters.FloatParameter(qualifier='lnprob_cutoff', value=-np.inf, default_unit=u.dimensionless_unscaled, description='lower limit cuttoff on lnproabilities to use when adopting/plotting the solution')] solution_params += [_parameters.FloatParameter(qualifier='progress', value=0, limits=(0,100), default_unit=u.dimensionless_unscaled, advanced=True, readonly=True, description='percentage of requested iterations completed')] @@ -1208,6 +1209,7 @@ def _get_packetlist(): {'qualifier': 'autocorr_times', 'value': autocorr_times}, {'qualifier': 'burnin', 'value': burnin}, {'qualifier': 'thin', 'value': thin}, + {'qualifier': 'nlags', 'value': nlags}, {'qualifier': 'progress', 'value': progress}] if expose_failed: @@ -1296,6 +1298,7 @@ def mpi_failed_samples_callback(result): burnin_factor = kwargs.get('burnin_factor') thin_factor = kwargs.get('thin_factor') + nlags_factor = kwargs.get('nlags_factor') solution_ps = kwargs.get('solution_ps') solution = kwargs.get('solution') @@ -1453,9 +1456,13 @@ def mpi_failed_samples_callback(result): thin = int(thin_factor * np.nanmin(autocorr_times)) if thin==0: thin = 1 + nlags = int(nlags_factor * np.nanmax(autocorr_times)) + if nlags < sampler.iteration - burnin: + nlags = sampler.iteration - burnin else: burnin =0 thin = 1 + nlags = 1 if progress_every_niters > 0: logger.info("emcee: saving output from iteration {}".format(sampler.iteration))