diff --git a/arviz/plots/__init__.py b/arviz/plots/__init__.py index 26006dd5df..ae417351c7 100644 --- a/arviz/plots/__init__.py +++ b/arviz/plots/__init__.py @@ -9,7 +9,7 @@ from .forestplot import plot_forest from .hpdplot import plot_hpd from .jointplot import plot_joint -from .kdeplot import plot_kde, _fast_kde, _fast_kde_2d +from .kdeplot import plot_kde from .khatplot import plot_khat from .loopitplot import plot_loo_pit from .mcseplot import plot_mcse @@ -20,6 +20,7 @@ from .rankplot import plot_rank from .traceplot import plot_trace from .violinplot import plot_violin +from .plot_utils import _fast_kde, _fast_kde_2d __all__ = [ diff --git a/arviz/plots/backends/bokeh/densityplot.py b/arviz/plots/backends/bokeh/densityplot.py index 7022ba2c85..5063bc3492 100644 --- a/arviz/plots/backends/bokeh/densityplot.py +++ b/arviz/plots/backends/bokeh/densityplot.py @@ -5,8 +5,12 @@ from bokeh.models.annotations import Title from . import backend_kwarg_defaults, backend_show -from ...kdeplot import _fast_kde -from ...plot_utils import _create_axes_grid, make_label +from ...plot_utils import ( + make_label, + _create_axes_grid, + calculate_point_estimate, + _fast_kde, +) from ....stats import hpd from ....stats.stats_utils import histogram @@ -186,10 +190,7 @@ def _d_helper( ax.diamond(xmax, 0, line_color="black", fill_color=color, size=markersize) if point_estimate is not None: - if point_estimate == "mean": - est = np.mean(vec) - elif point_estimate == "median": - est = np.median(vec) + est = calculate_point_estimate(point_estimate, vec, bw) ax.circle(est, 0, fill_color=color, line_color="black", size=markersize) _title = Title() diff --git a/arviz/plots/backends/bokeh/forestplot.py b/arviz/plots/backends/bokeh/forestplot.py index c5a4a74554..f7aa58b101 100644 --- a/arviz/plots/backends/bokeh/forestplot.py +++ b/arviz/plots/backends/bokeh/forestplot.py @@ -12,8 +12,7 @@ from bokeh.models.tickers import FixedTicker from . import backend_kwarg_defaults, backend_show -from ...kdeplot import _fast_kde -from ...plot_utils import _scale_fig_size, xarray_var_iter, make_label, get_bins +from ...plot_utils import _scale_fig_size, xarray_var_iter, make_label, get_bins, _fast_kde from ....rcparams import rcParams from ....stats import hpd from ....stats.diagnostics import _ess, _rhat diff --git a/arviz/plots/backends/bokeh/posteriorplot.py b/arviz/plots/backends/bokeh/posteriorplot.py index 16da53c895..10b0384b9e 100644 --- a/arviz/plots/backends/bokeh/posteriorplot.py +++ b/arviz/plots/backends/bokeh/posteriorplot.py @@ -6,15 +6,15 @@ import numpy as np from bokeh.layouts import gridplot from bokeh.models.annotations import Title -from scipy.stats import mode from . import backend_kwarg_defaults, backend_show -from ...kdeplot import plot_kde, _fast_kde +from ...kdeplot import plot_kde from ...plot_utils import ( make_label, _create_axes_grid, format_sig_figs, round_num, + calculate_point_estimate, ) from ....stats import hpd @@ -189,19 +189,7 @@ def display_rope(max_data): def display_point_estimate(max_data): if not point_estimate: return - if point_estimate not in ("mode", "mean", "median"): - raise ValueError("Point Estimate should be in ('mode','mean','median')") - if point_estimate == "mean": - point_value = values.mean() - elif point_estimate == "mode": - if isinstance(values[0], float): - density, lower, upper = _fast_kde(values, bw=bw) - x = np.linspace(lower, upper, len(density)) - point_value = x[np.argmax(density)] - else: - point_value = mode(values)[0][0] - elif point_estimate == "median": - point_value = np.median(values) + point_value = calculate_point_estimate(point_estimate, values, bw) sig_figs = format_sig_figs(point_value, round_to) point_text = "{point_estimate}={point_value:.{sig_figs}g}".format( point_estimate=point_estimate, point_value=point_value, sig_figs=sig_figs diff --git a/arviz/plots/backends/matplotlib/densityplot.py b/arviz/plots/backends/matplotlib/densityplot.py index e320ed6326..18d130c110 100644 --- a/arviz/plots/backends/matplotlib/densityplot.py +++ b/arviz/plots/backends/matplotlib/densityplot.py @@ -4,8 +4,12 @@ from . import backend_show from ....stats import hpd -from ...kdeplot import _fast_kde -from ...plot_utils import _create_axes_grid, make_label +from ...plot_utils import ( + make_label, + _create_axes_grid, + calculate_point_estimate, + _fast_kde, +) def plot_density( @@ -115,8 +119,9 @@ def _d_helper( Size of markers credible_interval : float Credible intervals. Defaults to 0.94 - point_estimate : str or None - 'mean' or 'median' + point_estimate : Optional[str] + Plot point estimate per variable. Values should be 'mean', 'median', 'mode' or None. + Defaults to 'auto' i.e. it falls back to default set in rcParams. shade : float Alpha blending value for the shaded area under the curve, between 0 (no shade) and 1 (opaque). Defaults to 0. @@ -156,10 +161,7 @@ def _d_helper( ax.plot(xmax, 0, hpd_markers, color=color, markeredgecolor="k", markersize=markersize) if point_estimate is not None: - if point_estimate == "mean": - est = np.mean(vec) - elif point_estimate == "median": - est = np.median(vec) + est = calculate_point_estimate(point_estimate, vec, bw) ax.plot(est, 0, "o", color=color, markeredgecolor="k", markersize=markersize) ax.set_yticks([]) diff --git a/arviz/plots/backends/matplotlib/forestplot.py b/arviz/plots/backends/matplotlib/forestplot.py index 0e37defc34..6fec8e375e 100644 --- a/arviz/plots/backends/matplotlib/forestplot.py +++ b/arviz/plots/backends/matplotlib/forestplot.py @@ -10,8 +10,7 @@ from ....stats import hpd from ....stats.diagnostics import _ess, _rhat from ....stats.stats_utils import histogram -from ...plot_utils import _scale_fig_size, xarray_var_iter, make_label, get_bins -from ...kdeplot import _fast_kde +from ...plot_utils import _scale_fig_size, xarray_var_iter, make_label, get_bins, _fast_kde from ....utils import conditional_jit diff --git a/arviz/plots/backends/matplotlib/loopitplot.py b/arviz/plots/backends/matplotlib/loopitplot.py index f607fae474..e375a7a4c7 100644 --- a/arviz/plots/backends/matplotlib/loopitplot.py +++ b/arviz/plots/backends/matplotlib/loopitplot.py @@ -3,7 +3,7 @@ import numpy as np from . import backend_kwarg_defaults, backend_show -from ...kdeplot import _fast_kde +from ...plot_utils import _fast_kde from ...hpdplot import plot_hpd diff --git a/arviz/plots/backends/matplotlib/posteriorplot.py b/arviz/plots/backends/matplotlib/posteriorplot.py index d1524f2306..e43a45cd11 100644 --- a/arviz/plots/backends/matplotlib/posteriorplot.py +++ b/arviz/plots/backends/matplotlib/posteriorplot.py @@ -3,16 +3,16 @@ from numbers import Number import matplotlib.pyplot as plt import numpy as np -from scipy.stats import mode from . import backend_show from ....stats import hpd -from ...kdeplot import plot_kde, _fast_kde +from ...kdeplot import plot_kde from ...plot_utils import ( make_label, _create_axes_grid, format_sig_figs, round_num, + calculate_point_estimate, ) @@ -181,19 +181,7 @@ def display_rope(): def display_point_estimate(): if not point_estimate: return - if point_estimate not in ("mode", "mean", "median"): - raise ValueError("Point Estimate should be in ('mode','mean','median')") - if point_estimate == "mean": - point_value = values.mean() - elif point_estimate == "mode": - if isinstance(values[0], float): - density, lower, upper = _fast_kde(values, bw=bw) - x = np.linspace(lower, upper, len(density)) - point_value = x[np.argmax(density)] - else: - point_value = mode(values)[0][0] - elif point_estimate == "median": - point_value = np.median(values) + point_value = calculate_point_estimate(point_estimate, values, bw) sig_figs = format_sig_figs(point_value, round_to) point_text = "{point_estimate}={point_value:.{sig_figs}g}".format( point_estimate=point_estimate, point_value=point_value, sig_figs=sig_figs diff --git a/arviz/plots/backends/matplotlib/ppcplot.py b/arviz/plots/backends/matplotlib/ppcplot.py index 008c4066c5..e80280c15a 100644 --- a/arviz/plots/backends/matplotlib/ppcplot.py +++ b/arviz/plots/backends/matplotlib/ppcplot.py @@ -4,11 +4,12 @@ import numpy as np from . import backend_show -from ...kdeplot import plot_kde, _fast_kde +from ...kdeplot import plot_kde from ...plot_utils import ( make_label, _create_axes_grid, get_bins, + _fast_kde, ) from ....stats.stats_utils import histogram diff --git a/arviz/plots/backends/matplotlib/violinplot.py b/arviz/plots/backends/matplotlib/violinplot.py index 4ae1e033f9..4556207fcc 100644 --- a/arviz/plots/backends/matplotlib/violinplot.py +++ b/arviz/plots/backends/matplotlib/violinplot.py @@ -5,8 +5,7 @@ from . import backend_show from ....stats import hpd from ....stats.stats_utils import histogram -from ...kdeplot import _fast_kde -from ...plot_utils import get_bins, make_label, _create_axes_grid +from ...plot_utils import get_bins, make_label, _create_axes_grid, _fast_kde def plot_violin( diff --git a/arviz/plots/densityplot.py b/arviz/plots/densityplot.py index d1baf386fa..e8256b75d8 100644 --- a/arviz/plots/densityplot.py +++ b/arviz/plots/densityplot.py @@ -23,7 +23,7 @@ def plot_density( data_labels=None, var_names=None, credible_interval=0.94, - point_estimate="mean", + point_estimate="auto", colors="cycle", outline=True, hpd_markers="", @@ -61,8 +61,8 @@ def plot_density( credible_interval : float Credible intervals. Should be in the interval (0, 1]. Defaults to 0.94. point_estimate : Optional[str] - Plot point estimate per variable. Values should be 'mean', 'median' or None. - Defaults to 'mean'. + Plot point estimate per variable. Values should be 'mean', 'median', 'mode' or None. + Defaults to 'auto' i.e. it falls back to default set in rcParams. colors : Optional[Union[List[str],str]] List with valid matplotlib colors, one color per model. Alternative a string can be passed. If the string is `cycle`, it will automatically choose a color per model from matplotlib's @@ -153,12 +153,6 @@ def plot_density( datasets = [convert_to_dataset(datum, group=group) for datum in data] var_names = _var_names(var_names, datasets) - - if point_estimate not in ("mean", "median", None): - raise ValueError( - "Point estimate should be 'mean'," "median' or None, not {}".format(point_estimate) - ) - n_data = len(datasets) if data_labels is None: diff --git a/arviz/plots/kdeplot.py b/arviz/plots/kdeplot.py index ec1e36c57f..f1e62ce058 100644 --- a/arviz/plots/kdeplot.py +++ b/arviz/plots/kdeplot.py @@ -1,15 +1,9 @@ # pylint: disable=unexpected-keyword-arg """One-dimensional kernel density estimate plots.""" -import warnings -import numpy as np -from scipy.signal import gaussian, convolve, convolve2d # pylint: disable=no-name-in-module -from scipy.sparse import coo_matrix import xarray as xr from ..data import InferenceData -from ..utils import conditional_jit, _stack -from ..stats.stats_utils import histogram -from .plot_utils import get_plotting_function +from .plot_utils import get_plotting_function, _fast_kde, _fast_kde_2d def plot_kde( @@ -226,171 +220,3 @@ def plot_kde( ax = plot(**kde_plot_args) return ax - - -def _fast_kde(x, cumulative=False, bw=4.5, xmin=None, xmax=None): - """Fast Fourier transform-based Gaussian kernel density estimate (KDE). - - The code was adapted from https://github.com/mfouesneau/faststats - - Parameters - ---------- - x : Numpy array or list - cumulative : bool - If true, estimate the cdf instead of the pdf - bw : float - Bandwidth scaling factor for the KDE. Should be larger than 0. The higher this number the - smoother the KDE will be. Defaults to 4.5 which is essentially the same as the Scott's rule - of thumb (the default rule used by SciPy). - xmin : float - Manually set lower limit. - xmax : float - Manually set upper limit. - - Returns - ------- - density: A gridded 1D KDE of the input points (x) - xmin: minimum value of x - xmax: maximum value of x - """ - x = np.asarray(x, dtype=float) - x = x[np.isfinite(x)] - if x.size == 0: - warnings.warn("kde plot failed, you may want to check your data") - return np.array([np.nan]), np.nan, np.nan - - len_x = len(x) - n_points = 200 if (xmin or xmax) is None else 500 - - if xmin is None: - xmin = np.min(x) - if xmax is None: - xmax = np.max(x) - - assert np.min(x) >= xmin - assert np.max(x) <= xmax - - log_len_x = np.log(len_x) * bw - - n_bins = min(int(len_x ** (1 / 3) * log_len_x * 2), n_points) - if n_bins < 2: - warnings.warn("kde plot failed, you may want to check your data") - return np.array([np.nan]), np.nan, np.nan - - _, grid, _ = histogram(x, n_bins, range_hist=(xmin, xmax)) - - scotts_factor = len_x ** (-0.2) - kern_nx = int(scotts_factor * 2 * np.pi * log_len_x) - kernel = gaussian(kern_nx, scotts_factor * log_len_x) - - npad = min(n_bins, 2 * kern_nx) - grid = np.concatenate([grid[npad:0:-1], grid, grid[n_bins : n_bins - npad : -1]]) - density = convolve(grid, kernel, mode="same", method="direct")[npad : npad + n_bins] - norm_factor = (2 * np.pi * log_len_x ** 2 * scotts_factor ** 2) ** 0.5 - - density /= norm_factor - - if cumulative: - density = density.cumsum() / density.sum() - - return density, xmin, xmax - - -def _cov_1d(x): - x = x - x.mean(axis=0) - ddof = x.shape[0] - 1 - return np.dot(x.T, x.conj()) / ddof - - -def _cov(data): - if data.ndim == 1: - return _cov_1d(data) - elif data.ndim == 2: - x = data.astype(float) - avg, _ = np.average(x, axis=1, weights=None, returned=True) - ddof = x.shape[1] - 1 - if ddof <= 0: - warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning, stacklevel=2) - ddof = 0.0 - x -= avg[:, None] - prod = _dot(x, x.T.conj()) - prod *= np.true_divide(1, ddof) - return prod.squeeze() - else: - raise ValueError("{} dimension arrays are not supported".format(data.ndim)) - - -@conditional_jit(cache=True) -def _dot(x, y): - return np.dot(x, y) - - -def _fast_kde_2d(x, y, gridsize=(128, 128), circular=False): - """ - 2D fft-based Gaussian kernel density estimate (KDE). - - The code was adapted from https://github.com/mfouesneau/faststats - - Parameters - ---------- - x : Numpy array or list - y : Numpy array or list - gridsize : tuple - Number of points used to discretize data. Use powers of 2 for fft optimization - circular: bool - If True, use circular boundaries. Defaults to False - Returns - ------- - grid: A gridded 2D KDE of the input points (x, y) - xmin: minimum value of x - xmax: maximum value of x - ymin: minimum value of y - ymax: maximum value of y - """ - x = np.asarray(x, dtype=float) - x = x[np.isfinite(x)] - y = np.asarray(y, dtype=float) - y = y[np.isfinite(y)] - - xmin, xmax = x.min(), x.max() - ymin, ymax = y.min(), y.max() - - len_x = len(x) - weights = np.ones(len_x) - n_x, n_y = gridsize - - d_x = (xmax - xmin) / (n_x - 1) - d_y = (ymax - ymin) / (n_y - 1) - - xyi = _stack(x, y).T - xyi -= [xmin, ymin] - xyi /= [d_x, d_y] - xyi = np.floor(xyi, xyi).T - - scotts_factor = len_x ** (-1 / 6) - cov = _cov(xyi) - std_devs = np.diag(cov ** 0.5) - kern_nx, kern_ny = np.round(scotts_factor * 2 * np.pi * std_devs) - - inv_cov = np.linalg.inv(cov * scotts_factor ** 2) - - x_x = np.arange(kern_nx) - kern_nx / 2 - y_y = np.arange(kern_ny) - kern_ny / 2 - x_x, y_y = np.meshgrid(x_x, y_y) - - kernel = _stack(x_x.flatten(), y_y.flatten()) - kernel = _dot(inv_cov, kernel) * kernel - kernel = np.exp(-kernel.sum(axis=0) / 2) - kernel = kernel.reshape((int(kern_ny), int(kern_nx))) - - boundary = "wrap" if circular else "symm" - - grid = coo_matrix((weights, xyi), shape=(n_x, n_y)).toarray() - grid = convolve2d(grid, kernel, mode="same", boundary=boundary) - - norm_factor = np.linalg.det(2 * np.pi * cov * scotts_factor ** 2) - norm_factor = len_x * d_x * d_y * norm_factor ** 0.5 - - grid /= norm_factor - - return grid, xmin, xmax, ymin, ymax diff --git a/arviz/plots/loopitplot.py b/arviz/plots/loopitplot.py index b8c2197a29..306d3b7635 100644 --- a/arviz/plots/loopitplot.py +++ b/arviz/plots/loopitplot.py @@ -5,8 +5,11 @@ from xarray import DataArray from ..stats import loo_pit as _loo_pit -from .plot_utils import _scale_fig_size, get_plotting_function -from .kdeplot import _fast_kde +from .plot_utils import ( + _scale_fig_size, + get_plotting_function, + _fast_kde, +) def plot_loo_pit( diff --git a/arviz/plots/plot_utils.py b/arviz/plots/plot_utils.py index 898cbc2e98..603f70bbeb 100644 --- a/arviz/plots/plot_utils.py +++ b/arviz/plots/plot_utils.py @@ -2,6 +2,10 @@ import warnings from itertools import product, tee import importlib +from scipy.signal import convolve, convolve2d +from scipy.sparse import coo_matrix +from scipy.signal.windows import gaussian +from scipy.stats import mode import packaging import numpy as np @@ -10,7 +14,7 @@ import xarray as xr -from ..utils import conditional_jit +from ..utils import conditional_jit, _stack from ..rcparams import rcParams @@ -674,3 +678,217 @@ def get_plotting_function(plot_name, plot_module, backend): plotting_method = getattr(module, plot_name) return plotting_method + + +def calculate_point_estimate(point_estimate, values, bw): + """Validate and calculate the point estimate. + + Parameters + ---------- + point_estimate : Optional[str] + Plot point estimate per variable. Values should be 'mean', 'median', 'mode' or None. + Defaults to 'auto' i.e. it falls back to default set in rcParams. + values : 1-d array + bw : float + Bandwidth scaling factor. Should be larger than 0. The higher this number the smoother the + KDE will be. Defaults to 4.5 which is essentially the same as the Scott's rule of thumb + (the default used rule by SciPy). + + Returns + ------- + point_value : float + best estimate of data distribution + """ + point_value = None + if point_estimate == "auto": + point_estimate = rcParams["plot.point_estimate"] + elif point_estimate not in ("mean", "median", "mode", None): + raise ValueError( + "Point estimate should be 'mean', 'median', 'mode' or None, not {}".format( + point_estimate + ) + ) + if point_estimate == "mean": + point_value = values.mean() + elif point_estimate == "mode": + if isinstance(values[0], float): + density, lower, upper = _fast_kde(values, bw=bw) + x = np.linspace(lower, upper, len(density)) + point_value = x[np.argmax(density)] + else: + point_value = mode(values)[0][0] + elif point_estimate == "median": + point_value = np.median(values) + + return point_value + + +def _fast_kde(x, cumulative=False, bw=4.5, xmin=None, xmax=None): + """Fast Fourier transform-based Gaussian kernel density estimate (KDE). + + The code was adapted from https://github.com/mfouesneau/faststats + + Parameters + ---------- + x : Numpy array or list + cumulative : bool + If true, estimate the cdf instead of the pdf + bw : float + Bandwidth scaling factor for the KDE. Should be larger than 0. The higher this number the + smoother the KDE will be. Defaults to 4.5 which is essentially the same as the Scott's rule + of thumb (the default rule used by SciPy). + xmin : float + Manually set lower limit. + xmax : float + Manually set upper limit. + + Returns + ------- + density: A gridded 1D KDE of the input points (x) + xmin: minimum value of x + xmax: maximum value of x + """ + x = np.asarray(x, dtype=float) + x = x[np.isfinite(x)] + if x.size == 0: + warnings.warn("kde plot failed, you may want to check your data") + return np.array([np.nan]), np.nan, np.nan + + len_x = len(x) + n_points = 200 if (xmin or xmax) is None else 500 + + if xmin is None: + xmin = np.min(x) + if xmax is None: + xmax = np.max(x) + + assert np.min(x) >= xmin + assert np.max(x) <= xmax + + log_len_x = np.log(len_x) * bw + + n_bins = min(int(len_x ** (1 / 3) * log_len_x * 2), n_points) + if n_bins < 2: + warnings.warn("kde plot failed, you may want to check your data") + return np.array([np.nan]), np.nan, np.nan + + hist, bin_edges = np.histogram(x, bins=n_bins, range=(xmin, xmax)) + grid = hist / (hist.sum() * np.diff(bin_edges)) + + # _, grid, _ = histogram(x, n_bins, range_hist=(xmin, xmax)) + + scotts_factor = len_x ** (-0.2) + kern_nx = int(scotts_factor * 2 * np.pi * log_len_x) + kernel = gaussian(kern_nx, scotts_factor * log_len_x) + + npad = min(n_bins, 2 * kern_nx) + grid = np.concatenate([grid[npad:0:-1], grid, grid[n_bins : n_bins - npad : -1]]) + density = convolve(grid, kernel, mode="same", method="direct")[npad : npad + n_bins] + norm_factor = (2 * np.pi * log_len_x ** 2 * scotts_factor ** 2) ** 0.5 + + density /= norm_factor + + if cumulative: + density = density.cumsum() / density.sum() + + return density, xmin, xmax + + +def _cov_1d(x): + x = x - x.mean(axis=0) + ddof = x.shape[0] - 1 + return np.dot(x.T, x.conj()) / ddof + + +def _cov(data): + if data.ndim == 1: + return _cov_1d(data) + elif data.ndim == 2: + x = data.astype(float) + avg, _ = np.average(x, axis=1, weights=None, returned=True) + ddof = x.shape[1] - 1 + if ddof <= 0: + warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning, stacklevel=2) + ddof = 0.0 + x -= avg[:, None] + prod = _dot(x, x.T.conj()) + prod *= np.true_divide(1, ddof) + return prod.squeeze() + else: + raise ValueError("{} dimension arrays are not supported".format(data.ndim)) + + +@conditional_jit(cache=True) +def _dot(x, y): + return np.dot(x, y) + + +def _fast_kde_2d(x, y, gridsize=(128, 128), circular=False): + """ + 2D fft-based Gaussian kernel density estimate (KDE). + + The code was adapted from https://github.com/mfouesneau/faststats + + Parameters + ---------- + x : Numpy array or list + y : Numpy array or list + gridsize : tuple + Number of points used to discretize data. Use powers of 2 for fft optimization + circular: bool + If True, use circular boundaries. Defaults to False + Returns + ------- + grid: A gridded 2D KDE of the input points (x, y) + xmin: minimum value of x + xmax: maximum value of x + ymin: minimum value of y + ymax: maximum value of y + """ + x = np.asarray(x, dtype=float) + x = x[np.isfinite(x)] + y = np.asarray(y, dtype=float) + y = y[np.isfinite(y)] + + xmin, xmax = x.min(), x.max() + ymin, ymax = y.min(), y.max() + + len_x = len(x) + weights = np.ones(len_x) + n_x, n_y = gridsize + + d_x = (xmax - xmin) / (n_x - 1) + d_y = (ymax - ymin) / (n_y - 1) + + xyi = _stack(x, y).T + xyi -= [xmin, ymin] + xyi /= [d_x, d_y] + xyi = np.floor(xyi, xyi).T + + scotts_factor = len_x ** (-1 / 6) + cov = _cov(xyi) + std_devs = np.diag(cov ** 0.5) + kern_nx, kern_ny = np.round(scotts_factor * 2 * np.pi * std_devs) + + inv_cov = np.linalg.inv(cov * scotts_factor ** 2) + + x_x = np.arange(kern_nx) - kern_nx / 2 + y_y = np.arange(kern_ny) - kern_ny / 2 + x_x, y_y = np.meshgrid(x_x, y_y) + + kernel = _stack(x_x.flatten(), y_y.flatten()) + kernel = _dot(inv_cov, kernel) * kernel + kernel = np.exp(-kernel.sum(axis=0) / 2) + kernel = kernel.reshape((int(kern_ny), int(kern_nx))) + + boundary = "wrap" if circular else "symm" + + grid = coo_matrix((weights, xyi), shape=(n_x, n_y)).toarray() + grid = convolve2d(grid, kernel, mode="same", boundary=boundary) + + norm_factor = np.linalg.det(2 * np.pi * cov * scotts_factor ** 2) + norm_factor = len_x * d_x * d_y * norm_factor ** 0.5 + + grid /= norm_factor + + return grid, xmin, xmax, ymin, ymax diff --git a/arviz/plots/posteriorplot.py b/arviz/plots/posteriorplot.py index 0f9a4ae47b..99f5454b7d 100644 --- a/arviz/plots/posteriorplot.py +++ b/arviz/plots/posteriorplot.py @@ -22,7 +22,7 @@ def plot_posterior( credible_interval=0.94, multimodal=False, round_to: Optional[int] = None, - point_estimate="mean", + point_estimate="auto", group="posterior", rope=None, ref_val=None, @@ -58,8 +58,9 @@ def plot_posterior( multimodal and the modes are well separated. round_to : int, optional Controls formatting of floats. Defaults to 2 or the integer part, whichever is bigger. - point_estimate: str - Must be in ('mode', 'mean', 'median', None) + point_estimate : Optional[str] + Plot point estimate per variable. Values should be 'mean', 'median', 'mode' or None. + Defaults to 'auto' i.e. it falls back to default set in rcParams. group : str, optional Specifies which InferenceData group should be plotted. Defaults to ‘posterior’. rope: tuple or dictionary of tuples diff --git a/arviz/stats/diagnostics.py b/arviz/stats/diagnostics.py index eae8692165..21577b62f4 100644 --- a/arviz/stats/diagnostics.py +++ b/arviz/stats/diagnostics.py @@ -46,6 +46,7 @@ def bfmi(data): Examples -------- Compute the BFMI of an InferenceData object + .. ipython:: In [1]: import arviz as az @@ -526,9 +527,9 @@ def _bfmi(energy): energy_mat = np.atleast_2d(energy) num = np.square(np.diff(energy_mat, axis=1)).mean(axis=1) # pylint: disable=no-member if energy_mat.ndim == 2: - den = _numba_var(svar, np.var, energy_mat, axis=1, ddof=0) + den = _numba_var(svar, np.var, energy_mat, axis=1, ddof=1) else: - den = np.var(energy, axis=1) + den = np.var(energy, axis=1, ddof=1) return num / den diff --git a/arviz/stats/stats.py b/arviz/stats/stats.py index 6e4226d5ed..68b98e4901 100644 --- a/arviz/stats/stats.py +++ b/arviz/stats/stats.py @@ -12,9 +12,8 @@ from scipy.optimize import minimize import xarray as xr +from ..plots.plot_utils import _fast_kde, get_bins from ..data import convert_to_inference_data, convert_to_dataset, InferenceData, CoordSpec, DimSpec -from ..plots.kdeplot import _fast_kde -from ..plots.plot_utils import get_bins from .diagnostics import _multichain_statistics, _mc_error, ess, _circular_standard_deviation from .stats_utils import ( make_ufunc as _make_ufunc, @@ -22,9 +21,9 @@ logsumexp as _logsumexp, ELPDData, stats_variance_2d as svar, + histogram, ) from ..utils import _var_names, Numba, _numba_var -from ..stats.stats_utils import histogram from ..rcparams import rcParams _log = logging.getLogger(__name__) diff --git a/arviz/tests/test_diagnostics.py b/arviz/tests/test_diagnostics.py index 1ba514ad5d..4100cdbd19 100644 --- a/arviz/tests/test_diagnostics.py +++ b/arviz/tests/test_diagnostics.py @@ -45,7 +45,7 @@ def data(): class TestDiagnostics: def test_bfmi(self): energy = np.array([1, 2, 3, 4]) - assert_almost_equal(bfmi(energy), 0.8) + assert_almost_equal(bfmi(energy), 0.6) def test_bfmi_dataset(self): data = load_arviz_data("centered_eight") diff --git a/arviz/tests/test_plots_bokeh.py b/arviz/tests/test_plots_bokeh.py index f55bfa769e..e484911b12 100644 --- a/arviz/tests/test_plots_bokeh.py +++ b/arviz/tests/test_plots_bokeh.py @@ -883,7 +883,7 @@ def test_plot_ppc_ax(models, kind): {"rope": {"mu": [{"rope": (-2, 2)}], "theta": [{"school": "Choate", "rope": (2, 4)}]}}, {"point_estimate": "mode"}, {"point_estimate": "median"}, - {"point_estimate": False}, + {"point_estimate": None}, {"ref_val": 0}, {"ref_val": None}, {"ref_val": {"mu": [{"ref_val": 1}]}}, diff --git a/arviz/tests/test_plots_matplotlib.py b/arviz/tests/test_plots_matplotlib.py index 3076e44973..6d32eaf402 100644 --- a/arviz/tests/test_plots_matplotlib.py +++ b/arviz/tests/test_plots_matplotlib.py @@ -33,7 +33,6 @@ plot_violin, plot_compare, plot_kde, - _fast_kde, plot_khat, plot_hpd, plot_dist, @@ -42,7 +41,7 @@ plot_loo_pit, plot_mcse, ) -from ..plots.kdeplot import _cov +from ..plots.plot_utils import _fast_kde, _cov rcParams["data.load"] = "eager" @@ -657,7 +656,7 @@ def test_plot_rank(models, kwargs): {"rope": {"mu": [{"rope": (-2, 2)}], "theta": [{"school": "Choate", "rope": (2, 4)}]}}, {"point_estimate": "mode"}, {"point_estimate": "median"}, - {"point_estimate": False}, + {"point_estimate": None}, {"ref_val": 0}, {"ref_val": None}, {"ref_val": {"mu": [{"ref_val": 1}]}},