From 39bed4cefb7d13c46a208d973d8529514ac3e050 Mon Sep 17 00:00:00 2001 From: aloctavodia Date: Mon, 18 May 2020 10:31:41 -0300 Subject: [PATCH 1/3] replace hpd with hdi --- arviz/plots/__init__.py | 3 +- arviz/plots/backends/bokeh/__init__.py | 2 +- arviz/plots/backends/bokeh/forestplot.py | 2 +- .../backends/bokeh/{hpdplot.py => hdiplot.py} | 6 +- arviz/plots/backends/bokeh/loopitplot.py | 12 +- arviz/plots/backends/bokeh/posteriorplot.py | 14 +- arviz/plots/backends/matplotlib/__init__.py | 2 +- .../plots/backends/matplotlib/densityplot.py | 2 +- arviz/plots/backends/matplotlib/forestplot.py | 2 +- .../matplotlib/{hpdplot.py => hdiplot.py} | 8 +- arviz/plots/backends/matplotlib/loopitplot.py | 12 +- .../backends/matplotlib/posteriorplot.py | 20 +-- arviz/plots/densityplot.py | 13 +- arviz/plots/hpdplot.py | 136 ------------------ arviz/plots/loopitplot.py | 50 +++---- arviz/plots/posteriorplot.py | 6 +- arviz/plots/violinplot.py | 4 +- arviz/stats/stats.py | 2 +- arviz/tests/base_tests/test_plots_bokeh.py | 16 +-- .../tests/base_tests/test_plots_matplotlib.py | 16 +-- doc/api.rst | 4 +- .../{bokeh_plot_hpd.py => bokeh_plot_hdi.py} | 4 +- .../{mpl_plot_hpd.py => mpl_plot_hdi.py} | 4 +- 23 files changed, 103 insertions(+), 237 deletions(-) rename arviz/plots/backends/bokeh/{hpdplot.py => hdiplot.py} (92%) rename arviz/plots/backends/matplotlib/{hpdplot.py => hdiplot.py} (79%) delete mode 100644 arviz/plots/hpdplot.py rename examples/bokeh/{bokeh_plot_hpd.py => bokeh_plot_hdi.py} (84%) rename examples/matplotlib/{mpl_plot_hpd.py => mpl_plot_hdi.py} (80%) diff --git a/arviz/plots/__init__.py b/arviz/plots/__init__.py index 5cc2a6c087..62441edccd 100644 --- a/arviz/plots/__init__.py +++ b/arviz/plots/__init__.py @@ -7,7 +7,7 @@ from .energyplot import plot_energy from .essplot import plot_ess from .forestplot import plot_forest -from .hpdplot import plot_hpd +from .hdiplot import plot_hdi, plot_hpd from .jointplot import plot_joint from .kdeplot import plot_kde from .khatplot import plot_khat @@ -32,6 +32,7 @@ "plot_energy", "plot_ess", "plot_forest", + "plot_hdi", "plot_hpd", "plot_joint", "plot_kde", diff --git a/arviz/plots/backends/bokeh/__init__.py b/arviz/plots/backends/bokeh/__init__.py index 36a01fe950..b6c0049773 100644 --- a/arviz/plots/backends/bokeh/__init__.py +++ b/arviz/plots/backends/bokeh/__init__.py @@ -36,7 +36,7 @@ def backend_kwarg_defaults(*args, **kwargs): from .energyplot import plot_energy from .essplot import plot_ess from .forestplot import plot_forest -from .hpdplot import plot_hpd +from .hdiplot import plot_hdi from .jointplot import plot_joint from .kdeplot import plot_kde from .khatplot import plot_khat diff --git a/arviz/plots/backends/bokeh/forestplot.py b/arviz/plots/backends/bokeh/forestplot.py index 6432514afa..91819f6018 100644 --- a/arviz/plots/backends/bokeh/forestplot.py +++ b/arviz/plots/backends/bokeh/forestplot.py @@ -417,7 +417,7 @@ def forestplot(self, hdi_prob, quartiles, linewidth, markersize, ax, rope): x=values[mid], y=y, size=markersize * 0.75, fill_color=color, ) _title = Title() - _title.text = "{:.1%} hdi Interval".format(hdi_prob) + _title.text = "{:.1%} hdi".format(hdi_prob) ax.title = _title return ax diff --git a/arviz/plots/backends/bokeh/hpdplot.py b/arviz/plots/backends/bokeh/hdiplot.py similarity index 92% rename from arviz/plots/backends/bokeh/hpdplot.py rename to arviz/plots/backends/bokeh/hdiplot.py index a5a23dc8c4..cad0ab172d 100644 --- a/arviz/plots/backends/bokeh/hpdplot.py +++ b/arviz/plots/backends/bokeh/hdiplot.py @@ -1,4 +1,4 @@ -"""Bokeh hpdplot.""" +"""Bokeh hdiplot.""" from itertools import cycle import bokeh.plotting as bkp @@ -9,8 +9,8 @@ from .. import show_layout -def plot_hpd(ax, x_data, y_data, plot_kwargs, fill_kwargs, backend_kwargs, show): - """Bokeh hpd plot.""" +def plot_hdi(ax, x_data, y_data, plot_kwargs, fill_kwargs, backend_kwargs, show): + """Bokeh hdi plot.""" if backend_kwargs is None: backend_kwargs = {} diff --git a/arviz/plots/backends/bokeh/loopitplot.py b/arviz/plots/backends/bokeh/loopitplot.py index 78c9628561..11a00c8c3e 100644 --- a/arviz/plots/backends/bokeh/loopitplot.py +++ b/arviz/plots/backends/bokeh/loopitplot.py @@ -4,7 +4,7 @@ from . import backend_kwarg_defaults from .. import show_layout -from ...hpdplot import plot_hpd +from ...hdiplot import plot_hdi from ...kdeplot import _fast_kde @@ -19,10 +19,10 @@ def plot_loo_pit( p025, fill_kwargs, ecdf_fill, - use_hpd, + use_hdi, x_vals, unif_densities, - hpd_kwargs, + hdi_kwargs, n_unif, unif, plot_unif_kwargs, @@ -114,15 +114,15 @@ def plot_loo_pit( line_width=plot_unif_kwargs.get("linewidth", 1.0), ) else: - if use_hpd: - plot_hpd( + if use_hdi: + plot_hdi( x_vals, unif_densities, backend="bokeh", ax=ax, backend_kwargs={}, show=False, - **hpd_kwargs + **hdi_kwargs ) else: for idx in range(n_unif): diff --git a/arviz/plots/backends/bokeh/posteriorplot.py b/arviz/plots/backends/bokeh/posteriorplot.py index 17eb494bbb..c48d3a8c20 100644 --- a/arviz/plots/backends/bokeh/posteriorplot.py +++ b/arviz/plots/backends/bokeh/posteriorplot.py @@ -195,24 +195,24 @@ def display_point_estimate(max_data): ax.text(x=[point_value], y=[max_data * 0.8], text=[point_text], text_align="center") - def display_hpd(max_data): + def display_hdi(max_data): # np.ndarray with 2 entries, min and max # pylint: disable=line-too-long hdi_probs = hdi(values, hdi_prob=hdi_prob, multimodal=multimodal) # type: np.ndarray - for hpdi in np.atleast_2d(hdi_probs): + for hdi_i in np.atleast_2d(hdi_probs): ax.line( - hpdi, + hdi_i, (max_data * 0.02, max_data * 0.02), line_width=linewidth * 2, line_color="black", ) ax.text( - x=list(hpdi) + [(hpdi[0] + hpdi[1]) / 2], + x=list(hdi_i) + [(hdi_i[0] + hdi_i[1]) / 2], y=[max_data * 0.07, max_data * 0.07, max_data * 0.3], - text=list(map(str, map(lambda x: round_num(x, round_to), hpdi))) - + [format_as_percent(hdi_prob) + " HPD"], + text=list(map(str, map(lambda x: round_num(x, round_to), hdi_i))) + + [format_as_percent(hdi_prob) + " HDI"], text_align="center", ) @@ -254,7 +254,7 @@ def format_axes(): format_axes() max_data = hist.max() if hdi_prob != "hide": - display_hpd(max_data) + display_hdi(max_data) display_point_estimate(max_data) display_ref_val(max_data) display_rope(max_data) diff --git a/arviz/plots/backends/matplotlib/__init__.py b/arviz/plots/backends/matplotlib/__init__.py index c28c4f3023..8c3026c334 100644 --- a/arviz/plots/backends/matplotlib/__init__.py +++ b/arviz/plots/backends/matplotlib/__init__.py @@ -31,7 +31,7 @@ def backend_show(show): from .energyplot import plot_energy from .essplot import plot_ess from .forestplot import plot_forest -from .hpdplot import plot_hpd +from .hdiplot import plot_hdi from .jointplot import plot_joint from .kdeplot import plot_kde from .khatplot import plot_khat diff --git a/arviz/plots/backends/matplotlib/densityplot.py b/arviz/plots/backends/matplotlib/densityplot.py index 8f279d423a..acb4053cb6 100644 --- a/arviz/plots/backends/matplotlib/densityplot.py +++ b/arviz/plots/backends/matplotlib/densityplot.py @@ -122,7 +122,7 @@ def _d_helper( markersize : float Size of markers hdi_prob : float - hdi intervals. Defaults to 0.94 + Probability for the highest density interval. Defaults to 0.94 point_estimate : Optional[str] Plot point estimate per variable. Values should be 'mean', 'median', 'mode' or None. Defaults to 'auto' i.e. it falls back to default set in rcParams. diff --git a/arviz/plots/backends/matplotlib/forestplot.py b/arviz/plots/backends/matplotlib/forestplot.py index 1ba5519143..d16786497a 100644 --- a/arviz/plots/backends/matplotlib/forestplot.py +++ b/arviz/plots/backends/matplotlib/forestplot.py @@ -338,7 +338,7 @@ def forestplot( color=color, ) ax.tick_params(labelsize=xt_labelsize) - ax.set_title("{:.1%} hdi Interval".format(hdi_prob), fontsize=titlesize, wrap=True) + ax.set_title("{:.1%} hdi".format(hdi_prob), fontsize=titlesize, wrap=True) if rope is None or isinstance(rope, dict): return elif len(rope) == 2: diff --git a/arviz/plots/backends/matplotlib/hpdplot.py b/arviz/plots/backends/matplotlib/hdiplot.py similarity index 79% rename from arviz/plots/backends/matplotlib/hpdplot.py rename to arviz/plots/backends/matplotlib/hdiplot.py index 2293407708..db786ee4a2 100644 --- a/arviz/plots/backends/matplotlib/hpdplot.py +++ b/arviz/plots/backends/matplotlib/hdiplot.py @@ -1,16 +1,16 @@ -"""Matplotlib hpdplot.""" +"""Matplotlib hdiplot.""" import warnings import matplotlib.pyplot as plt from . import backend_show -def plot_hpd(ax, x_data, y_data, plot_kwargs, fill_kwargs, backend_kwargs, show): - """Matplotlib hpd plot.""" +def plot_hdi(ax, x_data, y_data, plot_kwargs, fill_kwargs, backend_kwargs, show): + """Matplotlib hdi plot.""" if backend_kwargs is not None: warnings.warn( ( - "Argument backend_kwargs has not effect in matplotlib.plot_hpd" + "Argument backend_kwargs has not effect in matplotlib.plot_hdi" "Supplied value won't be used" ) ) diff --git a/arviz/plots/backends/matplotlib/loopitplot.py b/arviz/plots/backends/matplotlib/loopitplot.py index f50fc908e8..d21070fe37 100644 --- a/arviz/plots/backends/matplotlib/loopitplot.py +++ b/arviz/plots/backends/matplotlib/loopitplot.py @@ -4,7 +4,7 @@ from . import backend_kwarg_defaults, backend_show from ....numeric_utils import _fast_kde -from ...hpdplot import plot_hpd +from ...hdiplot import plot_hdi def plot_loo_pit( @@ -18,10 +18,10 @@ def plot_loo_pit( p025, fill_kwargs, ecdf_fill, - use_hpd, + use_hdi, x_vals, unif_densities, - hpd_kwargs, + hdi_kwargs, n_unif, unif, plot_unif_kwargs, @@ -54,8 +54,8 @@ def plot_loo_pit( else: ax.plot(unif_ecdf, p975 - unif_ecdf, unif_ecdf, p025 - unif_ecdf, **plot_unif_kwargs) else: - if use_hpd: - plot_hpd(x_vals, unif_densities, **hpd_kwargs) + if use_hdi: + plot_hdi(x_vals, unif_densities, **hdi_kwargs) else: for idx in range(n_unif): unif_density, _, _ = _fast_kde(unif[idx, :], xmin=0, xmax=1) @@ -64,7 +64,7 @@ def plot_loo_pit( ax.tick_params(labelsize=xt_labelsize) if legend: - if not (use_hpd or (ecdf and ecdf_fill)): + if not (use_hdi or (ecdf and ecdf_fill)): label = "{:.3g}% credible interval".format(credible_interval) if ecdf else "Uniform" ax.plot([], label=label, **plot_unif_kwargs) ax.legend() diff --git a/arviz/plots/backends/matplotlib/posteriorplot.py b/arviz/plots/backends/matplotlib/posteriorplot.py index b8e21aa9d3..fa9ff17fec 100644 --- a/arviz/plots/backends/matplotlib/posteriorplot.py +++ b/arviz/plots/backends/matplotlib/posteriorplot.py @@ -195,37 +195,37 @@ def display_point_estimate(): horizontalalignment="center", ) - def display_hpd(): + def display_hdi(): # np.ndarray with 2 entries, min and max # pylint: disable=line-too-long hdi_probs = hdi(values, hdi_prob=hdi_prob, multimodal=multimodal) # type: np.ndarray - for hpdi in np.atleast_2d(hdi_probs): + for hdi_i in np.atleast_2d(hdi_probs): ax.plot( - hpdi, + hdi_i, (plot_height * 0.02, plot_height * 0.02), lw=linewidth * 2, color="k", solid_capstyle="butt", ) ax.text( - hpdi[0], + hdi_i[0], plot_height * 0.07, - round_num(hpdi[0], round_to), + round_num(hdi_i[0], round_to), size=ax_labelsize, horizontalalignment="center", ) ax.text( - hpdi[1], + hdi_i[1], plot_height * 0.07, - round_num(hpdi[1], round_to), + round_num(hdi_i[1], round_to), size=ax_labelsize, horizontalalignment="center", ) ax.text( - (hpdi[0] + hpdi[1]) / 2, + (hdi_i[0] + hdi_i[1]) / 2, plot_height * 0.3, - format_as_percent(hdi_prob) + " HPD", + format_as_percent(hdi_prob) + " HDI", size=ax_labelsize, horizontalalignment="center", ) @@ -270,7 +270,7 @@ def format_axes(): format_axes() if hdi_prob != "hide": - display_hpd() + display_hdi() display_point_estimate() display_ref_val() display_rope() diff --git a/arviz/plots/densityplot.py b/arviz/plots/densityplot.py index 10bce051e6..9e31de1a34 100644 --- a/arviz/plots/densityplot.py +++ b/arviz/plots/densityplot.py @@ -40,8 +40,8 @@ def plot_density( ): """Generate KDE plots for continuous variables and histograms for discrete ones. - Plots are truncated at their 100*(1-alpha)% hpd intervals. Plots are grouped per variable - and colors assigned to models. + Plots are truncated at their 100*(1-alpha)% highest density intervals. Plots are grouped per + variable and colors assigned to models. Parameters ---------- @@ -63,7 +63,8 @@ def plot_density( transform : callable Function to transform data (defaults to None i.e. the identity function) hdi_prob : float - hpd interval. Should be in the interval (0, 1]. Defaults to 0.94. + Probability for the highest density interval. Should be in the interval (0, 1]. + Defaults to 0.94. point_estimate : Optional[str] Plot point estimate per variable. Values should be 'mean', 'median', 'mode' or None. Defaults to 'auto' i.e. it falls back to default set in rcParams. @@ -75,8 +76,8 @@ def plot_density( outline : bool Use a line to draw KDEs and histograms. Default to True hdi_markers : str - A valid `matplotlib.markers` like 'v', used to indicate the limits of the hpd interval. - Defaults to empty string (no marker). + A valid `matplotlib.markers` like 'v', used to indicate the limits of the highest density + interval. Defaults to empty string (no marker). shade : Optional[float] Alpha blending value for the shaded area under the curve, between 0 (no shade) and 1 (opaque). Defaults to 0. @@ -132,7 +133,7 @@ def plot_density( >>> az.plot_density([centered, non_centered], var_names=["mu"], group="prior") - Specify hpd interval + Specify highest density interval .. plot:: :context: close-figs diff --git a/arviz/plots/hpdplot.py b/arviz/plots/hpdplot.py deleted file mode 100644 index b6ac0f6f67..0000000000 --- a/arviz/plots/hpdplot.py +++ /dev/null @@ -1,136 +0,0 @@ -"""Plot hpd intervals for regression data.""" -import numpy as np -from scipy.interpolate import griddata -from scipy.signal import savgol_filter - -from ..stats import hdi -from .plot_utils import get_plotting_function, matplotlib_kwarg_dealiaser -from ..rcparams import rcParams -from ..utils import credible_interval_warning - - -def plot_hpd( - x, - y, - hdi_prob=None, - color="C1", - circular=False, - smooth=True, - smooth_kwargs=None, - fill_kwargs=None, - plot_kwargs=None, - ax=None, - backend=None, - backend_kwargs=None, - show=None, - credible_interval=None, -): - r""" - Plot hdi intervals for regression data. - - Parameters - ---------- - x : array-like - Values to plot - y : array-like - values from which to compute the hpd. Assumed shape (chain, draw, \*shape). - hdi_prob : float, optional - HDI interval to plot. Defaults to 0.94. - color : str - Color used for the limits of the HPD interval and fill. Should be a valid matplotlib color - circular : bool, optional - Whether to compute the hpd taking into account `x` is a circular variable - (in the range [-np.pi, np.pi]) or not. Defaults to False (i.e non-circular variables). - smooth : boolean - If True the result will be smoothed by first computing a linear interpolation of the data - over a regular grid and then applying the Savitzky-Golay filter to the interpolated data. - Defaults to True. - smooth_kwargs : dict, optional - Additional keywords modifying the Savitzky-Golay filter. See Scipy's documentation for - details - fill_kwargs : dict - Keywords passed to `fill_between` (use fill_kwargs={'alpha': 0} to disable fill). - plot_kwargs : dict - Keywords passed to HPD limits - ax: axes, optional - Matplotlib axes or bokeh figures. - backend: str, optional - Select plotting backend {"matplotlib","bokeh"}. Default "matplotlib". - backend_kwargs: bool, optional - These are kwargs specific to the backend being used. For additional documentation - check the plotting method of the backend. - show : bool, optional - Call backend show function. - credible_interval: float, optional - deprecated: Please see hdi_prob - - Returns - ------- - axes : matplotlib axes or bokeh figures - """ - if credible_interval: - hdi_prob = credible_interval_warning(credible_interval, hdi_prob) - - plot_kwargs = matplotlib_kwarg_dealiaser(plot_kwargs, "plot") - plot_kwargs.setdefault("color", color) - plot_kwargs.setdefault("alpha", 0) - - fill_kwargs = matplotlib_kwarg_dealiaser(fill_kwargs, "hexbin") - fill_kwargs.setdefault("color", color) - fill_kwargs.setdefault("alpha", 0.5) - - x = np.asarray(x) - y = np.asarray(y) - - x_shape = x.shape - y_shape = y.shape - if y_shape[-len(x_shape) :] != x_shape: - msg = "Dimension mismatch for x: {} and y: {}." - msg += " y-dimensions should be (chain, draw, *x.shape) or" - msg += " (draw, *x.shape)" - raise TypeError(msg.format(x_shape, y_shape)) - - if len(y_shape[: -len(x_shape)]) > 1: - new_shape = tuple([-1] + list(x_shape)) - y = y.reshape(new_shape) - - if hdi_prob is None: - hdi_prob = rcParams["stats.hdi_prob"] - else: - if not 1 >= hdi_prob > 0: - raise ValueError("The value of hdi_prob should be in the interval (0, 1]") - - hdi_ = hdi(y, hdi_prob=hdi_prob, circular=circular, multimodal=False) - - if smooth: - if smooth_kwargs is None: - smooth_kwargs = {} - smooth_kwargs.setdefault("window_length", 55) - smooth_kwargs.setdefault("polyorder", 2) - x_data = np.linspace(x.min(), x.max(), 200) - x_data[0] = (x_data[0] + x_data[1]) / 2 - hpd_interp = griddata(x, hdi_, x_data) - y_data = savgol_filter(hpd_interp, axis=0, **smooth_kwargs) - else: - idx = np.argsort(x) - x_data = x[idx] - y_data = hdi_[idx] - - hpdplot_kwargs = dict( - ax=ax, - x_data=x_data, - y_data=y_data, - plot_kwargs=plot_kwargs, - fill_kwargs=fill_kwargs, - backend_kwargs=backend_kwargs, - show=show, - ) - - if backend is None: - backend = rcParams["plot.backend"] - backend = backend.lower() - - # TODO: Add backend kwargs - plot = get_plotting_function("plot_hpd", "hpdplot", backend) - ax = plot(**hpdplot_kwargs) - return ax diff --git a/arviz/plots/loopitplot.py b/arviz/plots/loopitplot.py index 4a1cf9e72e..e4cff48f6d 100644 --- a/arviz/plots/loopitplot.py +++ b/arviz/plots/loopitplot.py @@ -22,7 +22,7 @@ def plot_loo_pit( ecdf=False, ecdf_fill=True, n_unif=100, - use_hpd=False, + use_hdi=False, credible_interval=None, figsize=None, textsize=None, @@ -31,7 +31,7 @@ def plot_loo_pit( ax=None, plot_kwargs=None, plot_unif_kwargs=None, - hpd_kwargs=None, + hdi_kwargs=None, fill_kwargs=None, backend=None, backend_kwargs=None, @@ -64,10 +64,10 @@ def plot_loo_pit( border lines. n_unif : int, optional Number of datasets to simulate and overlay from the uniform distribution. - use_hpd : bool, optional - Use plot_hpd to fill between hpd values instead of overlaying the uniform distributions. + use_hdi : bool, optional + Use plot_hdi to fill between hdi values instead of overlaying the uniform distributions. credible_interval : float, optional - Credible interval of the hpd or of the ECDF theoretical credible interval + Credible interval of the hdi or of the ECDF theoretical credible interval figsize : figure size tuple, optional If None, size is (8 + numvars, 8 + numvars) textsize: int, optional @@ -85,8 +85,8 @@ def plot_loo_pit( plot_unif_kwargs : dict, optional Additional keywords passed to ax.plot for overlaid uniform distributions or for beta credible interval lines if ``ecdf=True`` - hpd_kwargs : dict, optional - Additional keywords passed to az.plot_hpd + hdi_kwargs : dict, optional + Additional keywords passed to az.plot_hdi fill_kwargs : dict, optional Additional kwargs passed to ax.fill_between backend: str, optional @@ -119,7 +119,7 @@ def plot_loo_pit( >>> idata = az.load_arviz_data("centered_eight") >>> az.plot_loo_pit(idata=idata, y="obs") - Fill the area containing the 94% credible interval of the difference between uniform + Fill the area containing the 94% highest density interval of the difference between uniform variables empirical CDF and the real uniform CDF. A LOO-PIT ECDF clearly outside of these theoretical boundaries indicates that the observations and the posterior predictive samples do not follow the same distribution. @@ -130,8 +130,8 @@ def plot_loo_pit( >>> az.plot_loo_pit(idata=idata, y="obs", ecdf=True) """ - if ecdf and use_hpd: - raise ValueError("use_hpd is incompatible with ecdf plot") + if ecdf and use_hdi: + raise ValueError("use_hdi is incompatible with ecdf plot") (figsize, _, _, xt_labelsize, linewidth, _) = _scale_fig_size(figsize, textsize, 1, 1) @@ -210,14 +210,14 @@ def plot_loo_pit( unif = np.random.uniform(size=(n_unif, loo_pit.size)) x_vals = np.linspace(0, 1, len(loo_pit_kde)) - if use_hpd: - if hpd_kwargs is None: - hpd_kwargs = {} - hpd_kwargs.setdefault("color", to_hex(hsv_to_rgb(light_color))) - hpd_fill_kwargs = hpd_kwargs.pop("fill_kwargs", {}) - hpd_fill_kwargs.setdefault("label", "Uniform HPD") - hpd_kwargs["fill_kwargs"] = hpd_fill_kwargs - hpd_kwargs["credible_interval"] = credible_interval + if use_hdi: + if hdi_kwargs is None: + hdi_kwargs = {} + hdi_kwargs.setdefault("color", to_hex(hsv_to_rgb(light_color))) + hdi_fill_kwargs = hdi_kwargs.pop("fill_kwargs", {}) + hdi_fill_kwargs.setdefault("label", "Uniform hdi") + hdi_kwargs["fill_kwargs"] = hdi_fill_kwargs + hdi_kwargs["credible_interval"] = credible_interval unif_densities = np.empty((n_unif, len(loo_pit_kde))) @@ -232,10 +232,10 @@ def plot_loo_pit( p025=p025, fill_kwargs=fill_kwargs, ecdf_fill=ecdf_fill, - use_hpd=use_hpd, + use_hdi=use_hdi, x_vals=x_vals, unif_densities=unif_densities, - hpd_kwargs=hpd_kwargs, + hdi_kwargs=hdi_kwargs, n_unif=n_unif, unif=unif, plot_unif_kwargs=plot_unif_kwargs, @@ -255,12 +255,12 @@ def plot_loo_pit( if backend == "bokeh": if ( - loo_pit_kwargs["hpd_kwargs"] is not None - and "fill_kwargs" in loo_pit_kwargs["hpd_kwargs"] - and loo_pit_kwargs["hpd_kwargs"]["fill_kwargs"] is not None - and "label" in loo_pit_kwargs["hpd_kwargs"]["fill_kwargs"] + loo_pit_kwargs["hdi_kwargs"] is not None + and "fill_kwargs" in loo_pit_kwargs["hdi_kwargs"] + and loo_pit_kwargs["hdi_kwargs"]["fill_kwargs"] is not None + and "label" in loo_pit_kwargs["hdi_kwargs"]["fill_kwargs"] ): - loo_pit_kwargs["hpd_kwargs"]["fill_kwargs"].pop("label") + loo_pit_kwargs["hdi_kwargs"]["fill_kwargs"].pop("label") loo_pit_kwargs.pop("legend") loo_pit_kwargs.pop("xt_labelsize") loo_pit_kwargs.pop("credible_interval") diff --git a/arviz/plots/posteriorplot.py b/arviz/plots/posteriorplot.py index 6c6b0c4c98..7245df0740 100644 --- a/arviz/plots/posteriorplot.py +++ b/arviz/plots/posteriorplot.py @@ -64,8 +64,8 @@ def plot_posterior( Text size scaling factor for labels, titles and lines. If None it will be autoscaled based on figsize. hdi_prob: float, optional - Plots highest posterior density interval for chosen percentage of density. - Use 'hide' to hide the HPD interval. Defaults to 0.94. + Plots highest density interval for chosen percentage of density. + Use 'hide' to hide the highest density interval. Defaults to 0.94. multimodal: bool If true (default) it may compute more than one credible interval if the distribution is multimodal and the modes are well separated. @@ -184,7 +184,7 @@ def plot_posterior( >>> az.plot_posterior(data, var_names=['mu'], kind='hist') - Change size of HPD interval + Change size of highest density interval .. plot:: :context: close-figs diff --git a/arviz/plots/violinplot.py b/arviz/plots/violinplot.py index 0daad2087c..39ea40231d 100644 --- a/arviz/plots/violinplot.py +++ b/arviz/plots/violinplot.py @@ -72,8 +72,8 @@ def plot_violin( figsize: tuple Figure size. If None it will be defined automatically. textsize: int - Text size of the point_estimates, axis ticks, and HPD. If None it will be autoscaled - based on figsize. + Text size of the point_estimates, axis ticks, and highest density interval. If None it will + be autoscaled based on figsize. sharex: bool Defaults to True, violinplots share a common x-axis scale. sharey: bool diff --git a/arviz/stats/stats.py b/arviz/stats/stats.py index e87031e00c..a6c112d47f 100644 --- a/arviz/stats/stats.py +++ b/arviz/stats/stats.py @@ -387,7 +387,7 @@ def hdi( skipna: bool If true ignores nan values when computing the hdi interval. Defaults to false. group: str, optional - Specifies which InferenceData group should be used to calculate hpd. + Specifies which InferenceData group should be used to calculate hdi. Defaults to 'posterior' var_names: list, optional Names of variables to include in the hdi report. Prefix the variables by `~` diff --git a/arviz/tests/base_tests/test_plots_bokeh.py b/arviz/tests/base_tests/test_plots_bokeh.py index 3c4d8799de..cb98bf67d1 100644 --- a/arviz/tests/base_tests/test_plots_bokeh.py +++ b/arviz/tests/base_tests/test_plots_bokeh.py @@ -25,7 +25,7 @@ plot_energy, plot_ess, plot_forest, - plot_hpd, + plot_hdi, plot_joint, plot_kde, plot_khat, @@ -488,8 +488,8 @@ def test_plot_forest_bad(models, model_fits): {"smooth": False}, ], ) -def test_plot_hpd(models, data, kwargs): - axis = plot_hpd( +def test_plot_hdi(models, data, kwargs): + axis = plot_hdi( data["y"], models.model_1.posterior["theta"], backend="bokeh", show=False, **kwargs ) assert axis @@ -602,9 +602,9 @@ def test_plot_khat_bad_input(models): [ {}, {"n_unif": 50}, - {"use_hpd": True, "color": "gray"}, - {"use_hpd": True, "credible_interval": 0.68, "plot_kwargs": {"alpha": 0.9}}, - {"use_hpd": True, "hpd_kwargs": {"smooth": False}}, + {"use_hdi": True, "color": "gray"}, + {"use_hdi": True, "credible_interval": 0.68, "plot_kwargs": {"alpha": 0.9}}, + {"use_hdi": True, "hdi_kwargs": {"smooth": False}}, {"ecdf": True}, {"ecdf": True, "ecdf_fill": False, "plot_unif_kwargs": {"line_dash": "--"}}, {"ecdf": True, "credible_interval": 0.97, "fill_kwargs": {"color": "red"}}, @@ -616,10 +616,10 @@ def test_plot_loo_pit(models, kwargs): def test_plot_loo_pit_incompatible_args(models): - """Test error when both ecdf and use_hpd are True.""" + """Test error when both ecdf and use_hdi are True.""" with pytest.raises(ValueError, match="incompatible"): plot_loo_pit( - idata=models.model_1, y="y", ecdf=True, use_hpd=True, backend="bokeh", show=False + idata=models.model_1, y="y", ecdf=True, use_hdi=True, backend="bokeh", show=False ) diff --git a/arviz/tests/base_tests/test_plots_matplotlib.py b/arviz/tests/base_tests/test_plots_matplotlib.py index eebba74586..a01a62465a 100644 --- a/arviz/tests/base_tests/test_plots_matplotlib.py +++ b/arviz/tests/base_tests/test_plots_matplotlib.py @@ -34,7 +34,7 @@ plot_compare, plot_kde, plot_khat, - plot_hpd, + plot_hdi, plot_dist, plot_rank, plot_elpd, @@ -805,8 +805,8 @@ def test_plot_compare_no_ic(models): {"smooth": False}, ], ) -def test_plot_hpd(models, data, kwargs): - plot_hpd(data["y"], models.model_1.posterior["theta"], **kwargs) +def test_plot_hdi(models, data, kwargs): + plot_hdi(data["y"], models.model_1.posterior["theta"], **kwargs) @pytest.mark.parametrize("limits", [(-10.0, 10.0), (-5, 5), (None, None)]) @@ -1076,9 +1076,9 @@ def test_plot_ess_no_divergences(models): [ {}, {"n_unif": 50, "legend": False}, - {"use_hpd": True, "color": "gray"}, - {"use_hpd": True, "credible_interval": 0.68, "plot_kwargs": {"ls": "--"}}, - {"use_hpd": True, "hpd_kwargs": {"smooth": False}}, + {"use_hdi": True, "color": "gray"}, + {"use_hdi": True, "credible_interval": 0.68, "plot_kwargs": {"ls": "--"}}, + {"use_hdi": True, "hdi_kwargs": {"smooth": False}}, {"ecdf": True}, {"ecdf": True, "ecdf_fill": False, "plot_unif_kwargs": {"ls": "--"}}, {"ecdf": True, "credible_interval": 0.97, "fill_kwargs": {"hatch": "/"}}, @@ -1090,9 +1090,9 @@ def test_plot_loo_pit(models, kwargs): def test_plot_loo_pit_incompatible_args(models): - """Test error when both ecdf and use_hpd are True.""" + """Test error when both ecdf and use_hdi are True.""" with pytest.raises(ValueError, match="incompatible"): - plot_loo_pit(idata=models.model_1, y="y", ecdf=True, use_hpd=True) + plot_loo_pit(idata=models.model_1, y="y", ecdf=True, use_hdi=True) @pytest.mark.parametrize( diff --git a/doc/api.rst b/doc/api.rst index 35dffd86d3..d21abb9fdb 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -21,7 +21,7 @@ Plots plot_energy plot_ess plot_forest - plot_hpd + plot_hdi plot_joint plot_kde plot_khat @@ -45,7 +45,7 @@ Stats apply_test_function compare - hpd + hdi loo loo_pit psislw diff --git a/examples/bokeh/bokeh_plot_hpd.py b/examples/bokeh/bokeh_plot_hdi.py similarity index 84% rename from examples/bokeh/bokeh_plot_hpd.py rename to examples/bokeh/bokeh_plot_hdi.py index f1a0a54ab2..f6d222475e 100644 --- a/examples/bokeh/bokeh_plot_hpd.py +++ b/examples/bokeh/bokeh_plot_hdi.py @@ -1,5 +1,5 @@ """ -Plot HPD +Plot HDI ======== _thumb: .8, .8 @@ -13,7 +13,7 @@ y_data_rep = np.random.normal(y_data, 0.5, (200, 100)) x_data_sorted = np.sort(x_data) -ax = az.plot_hpd(x_data, y_data_rep, color="red", backend="bokeh", show=False) +ax = az.plot_hdi(x_data, y_data_rep, color="red", backend="bokeh", show=False) ax.line(x_data_sorted, 2 + x_data_sorted * 0.5, line_color="black", line_width=3) if az.rcParams["plot.bokeh.show"]: diff --git a/examples/matplotlib/mpl_plot_hpd.py b/examples/matplotlib/mpl_plot_hdi.py similarity index 80% rename from examples/matplotlib/mpl_plot_hpd.py rename to examples/matplotlib/mpl_plot_hdi.py index 5b4df904a3..249ba363cf 100644 --- a/examples/matplotlib/mpl_plot_hpd.py +++ b/examples/matplotlib/mpl_plot_hdi.py @@ -1,5 +1,5 @@ """ -Plot HPD +Plot HDI ======== _thumb: .8, .8 @@ -14,6 +14,6 @@ y_data = 2 + x_data * 0.5 y_data_rep = np.random.normal(y_data, 0.5, (200, 100)) plt.plot(x_data, y_data, "C6") -az.plot_hpd(x_data, y_data_rep, color="k", plot_kwargs={"ls": "--"}) +az.plot_hdi(x_data, y_data_rep, color="k", plot_kwargs={"ls": "--"}) plt.show() From 1b05ad3481968e9fe2d5cb32410a011a432552cc Mon Sep 17 00:00:00 2001 From: aloctavodia Date: Mon, 18 May 2020 10:50:39 -0300 Subject: [PATCH 2/3] add plot_hdi and update changelog --- CHANGELOG.md | 3 +- arviz/plots/hdiplot.py | 143 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+), 1 deletion(-) create mode 100644 arviz/plots/hdiplot.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 1cbc1254b0..33809b738d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,7 +38,8 @@ * plot_ppc animation: improve docs and error handling (#1162) ### Deprecation -* `credible_interval` argument replaced by `hdi_prob`throughout with exception of `plot_loo_pit` (#1176) +* `hpd` function deprecated in favor of `hdi`. `credible_interval` argument replaced by `hdi_prob`throughout with exception of `plot_loo_pit` (#1176) +* `plot_hpd` function deprecated in favor of `plot_hdi`. (#1190) ### Documentation * Add classifier to `setup.py` including Matplotlib framework (#1133) diff --git a/arviz/plots/hdiplot.py b/arviz/plots/hdiplot.py new file mode 100644 index 0000000000..336cd7baa2 --- /dev/null +++ b/arviz/plots/hdiplot.py @@ -0,0 +1,143 @@ +"""Plot highest density intervals for regression data.""" +import warnings + +import numpy as np +from scipy.interpolate import griddata +from scipy.signal import savgol_filter + +from ..stats import hdi +from .plot_utils import get_plotting_function, matplotlib_kwarg_dealiaser +from ..rcparams import rcParams +from ..utils import credible_interval_warning + + +def plot_hdi( + x, + y, + hdi_prob=None, + color="C1", + circular=False, + smooth=True, + smooth_kwargs=None, + fill_kwargs=None, + plot_kwargs=None, + ax=None, + backend=None, + backend_kwargs=None, + show=None, + credible_interval=None, +): + r""" + Plot hdi intervals for regression data. + + Parameters + ---------- + x : array-like + Values to plot + y : array-like + values from which to compute the hdi. Assumed shape (chain, draw, \*shape). + hdi_prob : float, optional + Probability for the highest density interval. Defaults to 0.94. + color : str + Color used for the limits of the hdi and fill. Should be a valid matplotlib color + circular : bool, optional + Whether to compute the hdi taking into account `x` is a circular variable + (in the range [-np.pi, np.pi]) or not. Defaults to False (i.e non-circular variables). + smooth : boolean + If True the result will be smoothed by first computing a linear interpolation of the data + over a regular grid and then applying the Savitzky-Golay filter to the interpolated data. + Defaults to True. + smooth_kwargs : dict, optional + Additional keywords modifying the Savitzky-Golay filter. See Scipy's documentation for + details + fill_kwargs : dict + Keywords passed to `fill_between` (use fill_kwargs={'alpha': 0} to disable fill). + plot_kwargs : dict + Keywords passed to hdi limits + ax: axes, optional + Matplotlib axes or bokeh figures. + backend: str, optional + Select plotting backend {"matplotlib","bokeh"}. Default "matplotlib". + backend_kwargs: bool, optional + These are kwargs specific to the backend being used. For additional documentation + check the plotting method of the backend. + show : bool, optional + Call backend show function. + credible_interval: float, optional + deprecated: Please see hdi_prob + + Returns + ------- + axes : matplotlib axes or bokeh figures + """ + if credible_interval: + hdi_prob = credible_interval_warning(credible_interval, hdi_prob) + + plot_kwargs = matplotlib_kwarg_dealiaser(plot_kwargs, "plot") + plot_kwargs.setdefault("color", color) + plot_kwargs.setdefault("alpha", 0) + + fill_kwargs = matplotlib_kwarg_dealiaser(fill_kwargs, "hexbin") + fill_kwargs.setdefault("color", color) + fill_kwargs.setdefault("alpha", 0.5) + + x = np.asarray(x) + y = np.asarray(y) + + x_shape = x.shape + y_shape = y.shape + if y_shape[-len(x_shape) :] != x_shape: + msg = "Dimension mismatch for x: {} and y: {}." + msg += " y-dimensions should be (chain, draw, *x.shape) or" + msg += " (draw, *x.shape)" + raise TypeError(msg.format(x_shape, y_shape)) + + if len(y_shape[: -len(x_shape)]) > 1: + new_shape = tuple([-1] + list(x_shape)) + y = y.reshape(new_shape) + + if hdi_prob is None: + hdi_prob = rcParams["stats.hdi_prob"] + else: + if not 1 >= hdi_prob > 0: + raise ValueError("The value of hdi_prob should be in the interval (0, 1]") + + hdi_ = hdi(y, hdi_prob=hdi_prob, circular=circular, multimodal=False) + + if smooth: + if smooth_kwargs is None: + smooth_kwargs = {} + smooth_kwargs.setdefault("window_length", 55) + smooth_kwargs.setdefault("polyorder", 2) + x_data = np.linspace(x.min(), x.max(), 200) + x_data[0] = (x_data[0] + x_data[1]) / 2 + hdi_interp = griddata(x, hdi_, x_data) + y_data = savgol_filter(hdi_interp, axis=0, **smooth_kwargs) + else: + idx = np.argsort(x) + x_data = x[idx] + y_data = hdi_[idx] + + hdiplot_kwargs = dict( + ax=ax, + x_data=x_data, + y_data=y_data, + plot_kwargs=plot_kwargs, + fill_kwargs=fill_kwargs, + backend_kwargs=backend_kwargs, + show=show, + ) + + if backend is None: + backend = rcParams["plot.backend"] + backend = backend.lower() + + # TODO: Add backend kwargs + plot = get_plotting_function("plot_hdi", "hdiplot", backend) + ax = plot(**hdiplot_kwargs) + return ax + + +def plot_hpd(*args, **kwargs): + warnings.warn("plot_hdi has been deprecated, please use plot_hdi", DeprecationWarning) + return plot_hdi(*args, **kwargs) From f0c69ab2f23c6ea254b867300b2adfc2f6197c24 Mon Sep 17 00:00:00 2001 From: aloctavodia Date: Mon, 18 May 2020 11:21:37 -0300 Subject: [PATCH 3/3] skip pydocstyle error --- arviz/plots/hdiplot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arviz/plots/hdiplot.py b/arviz/plots/hdiplot.py index 336cd7baa2..ba5be26b8c 100644 --- a/arviz/plots/hdiplot.py +++ b/arviz/plots/hdiplot.py @@ -138,6 +138,6 @@ def plot_hdi( return ax -def plot_hpd(*args, **kwargs): +def plot_hpd(*args, **kwargs): # noqa: D103 warnings.warn("plot_hdi has been deprecated, please use plot_hdi", DeprecationWarning) return plot_hdi(*args, **kwargs)