arviz-devs · aloctavodia · Apr 2, 2020 · Mar 11, 2020 · Mar 13, 2020 · Mar 13, 2020
diff --git a/arviz/stats/stats.py b/arviz/stats/stats.py
@@ -305,16 +305,32 @@ def _ic_matrix(ics, ic_i):
     return rows, cols, ic_i_val
 
 
-def hpd(ary, credible_interval=None, circular=False, multimodal=False, skipna=False):
+def hpd(
+    ary,
+    *,
+    group="posterior",
+    var_names=None,
+    credible_interval=None,
+    circular=False,
+    multimodal=False,
+    skipna=False,
+    **kwargs
+):
     """
     Calculate highest posterior density (HPD) of array for given credible_interval.
 
     The HPD is the minimum width Bayesian credible interval (BCI).
 
     Parameters
     ----------
-    ary : Numpy array
-        An array containing posterior samples
+    ary : obj
+        object containing posterior samples.
+        Any object that can be converted to an az.InferenceData object.
+        Refer to documentation of az.convert_to_dataset for details.
+    group : str, optional
+         Specifies which InferenceData group should be used to calculate hpd. Defaults to 'posterior'
+    var_names : list
+        Names of variables to include in the hpd report
     credible_interval : float, optional
         Credible interval to compute. Defaults to 0.94.
     circular : bool, optional
@@ -326,10 +342,14 @@ def hpd(ary, credible_interval=None, circular=False, multimodal=False, skipna=Fa
         modes are well separated.
     skipna : bool
         If true ignores nan values when computing the hpd interval. Defaults to false.
+    kwargs : dict, optional
+        Additional keywords passed to ax.scatter
+        For example, to calculate hpd over "chain" dimension, Pass "input_core_dims" to "chain" in
+        the dictionary.
 
     Returns
     -------
-    np.ndarray
+    np.ndarray or xarray.Dataset, depending upon input
         lower(s) and upper(s) values of the interval(s).
 
     Examples
@@ -349,86 +369,103 @@ def hpd(ary, credible_interval=None, circular=False, multimodal=False, skipna=Fa
         if not 1 >= credible_interval > 0:
             raise ValueError("The value of credible_interval should be in the interval (0, 1]")
 
-    if ary.ndim > 1:
-        hpd_array = np.array(
-            [
-                hpd(
-                    row,
-                    credible_interval=credible_interval,
-                    circular=circular,
-                    multimodal=multimodal,
-                )
-                for row in ary.T
-            ]
-        )
-        return hpd_array
-
     if multimodal:
-        if skipna:
-            ary = ary[~np.isnan(ary)]
-
-        if ary.dtype.kind == "f":
-            density, lower, upper = _fast_kde(ary)
-            range_x = upper - lower
-            dx = range_x / len(density)
-            bins = np.linspace(lower, upper, len(density))
-        else:
-            bins = get_bins(ary)
-            _, density, _ = histogram(ary, bins=bins)
-            dx = np.diff(bins)[0]
+        return _hpd_multimodal(ary, credible_interval, skipna)
 
-        density *= dx
+    func_kwargs = {
+        "credible_interval": credible_interval,
+        "circular": circular,
+        "skipna": skipna,
+        "out_shape": (2,),
+    }
+    kwargs.setdefault("output_core_dims", [["hpd"]])
 
-        idx = np.argsort(-density)
-        intervals = bins[idx][density[idx].cumsum() <= credible_interval]
-        intervals.sort()
+    if isinstance(ary, np.ndarray):
+        if len(ary.shape) == 1:
+            return _hpd(ary, credible_interval, circular, skipna)
+        ary = convert_to_dataset(ary)
+        kwargs.setdefault("input_core_dims", [["chain"]])
+        return _wrap_xarray_ufunc(_hpd, ary, func_kwargs=func_kwargs, **kwargs).x.values
 
-        intervals_splitted = np.split(intervals, np.where(np.diff(intervals) >= dx * 1.1)[0] + 1)
+    ary = convert_to_dataset(ary, group=group)
+    var_names = _var_names(var_names, ary)
 
-        hpd_intervals = []
-        for interval in intervals_splitted:
-            if interval.size == 0:
-                hpd_intervals.append((bins[0], bins[0]))
-            else:
-                hpd_intervals.append((interval[0], interval[-1]))
+    ary = ary if var_names is None else ary[var_names]
 
-        hpd_intervals = np.array(hpd_intervals)
+    return _wrap_xarray_ufunc(_hpd, ary, func_kwargs=func_kwargs, **kwargs)
 
-    else:
-        if skipna:
-            nans = np.isnan(ary)
-            if not nans.all():
-                ary = ary[~nans]
-        n = len(ary)
 
-        if circular:
-            mean = st.circmean(ary, high=np.pi, low=-np.pi)
-            ary = ary - mean
-            ary = np.arctan2(np.sin(ary), np.cos(ary))
+def _hpd(ary, credible_interval, circular, skipna):
+    """Compute hpd over the flattened array."""
+    ary = ary.flatten()
+    if skipna:
+        nans = np.isnan(ary)
+        if not nans.all():
+            ary = ary[~nans]
+    n = len(ary)
+
+    if circular:
+        mean = st.circmean(ary, high=np.pi, low=-np.pi)
+        ary = ary - mean
+        ary = np.arctan2(np.sin(ary), np.cos(ary))
 
-        ary = np.sort(ary)
-        interval_idx_inc = int(np.floor(credible_interval * n))
-        n_intervals = n - interval_idx_inc
-        interval_width = ary[interval_idx_inc:] - ary[:n_intervals]
+    ary = np.sort(ary)
+    interval_idx_inc = int(np.floor(credible_interval * n))
+    n_intervals = n - interval_idx_inc
+    interval_width = ary[interval_idx_inc:] - ary[:n_intervals]
 
-        if len(interval_width) == 0:
-            raise ValueError("Too few elements for interval calculation. ")
+    if len(interval_width) == 0:
+        raise ValueError("Too few elements for interval calculation. ")
 
-        min_idx = np.argmin(interval_width)
-        hdi_min = ary[min_idx]
-        hdi_max = ary[min_idx + interval_idx_inc]
+    min_idx = np.argmin(interval_width)
+    hdi_min = ary[min_idx]
+    hdi_max = ary[min_idx + interval_idx_inc]
 
-        if circular:
-            hdi_min = hdi_min + mean
-            hdi_max = hdi_max + mean
-            hdi_min = np.arctan2(np.sin(hdi_min), np.cos(hdi_min))
-            hdi_max = np.arctan2(np.sin(hdi_max), np.cos(hdi_max))
+    if circular:
+        hdi_min = hdi_min + mean
+        hdi_max = hdi_max + mean
+        hdi_min = np.arctan2(np.sin(hdi_min), np.cos(hdi_min))
+        hdi_max = np.arctan2(np.sin(hdi_max), np.cos(hdi_max))
 
-        hpd_intervals = np.array([hdi_min, hdi_max])
+    hpd_intervals = np.array([hdi_min, hdi_max])
 
     return hpd_intervals
 
 
+def _hpd_multimodal(ary, credible_interval, skipna):
+    """Compute hpd if the distribution is multimodal"""
+    ary = ary.flatten()
+    if skipna:
+        ary = ary[~np.isnan(ary)]
+
+    if ary.dtype.kind == "f":
+        density, lower, upper = _fast_kde(ary)
+        range_x = upper - lower
+        dx = range_x / len(density)
+        bins = np.linspace(lower, upper, len(density))
+    else:
+        bins = get_bins(ary)
+        _, density, _ = histogram(ary, bins=bins)
+        dx = np.diff(bins)[0]
+
+    density *= dx
+
+    idx = np.argsort(-density)
+    intervals = bins[idx][density[idx].cumsum() <= credible_interval]
+    intervals.sort()
+
+    intervals_splitted = np.split(intervals, np.where(np.diff(intervals) >= dx * 1.1)[0] + 1)
+
+    hpd_intervals = []
+    for interval in intervals_splitted:
+        if interval.size == 0:
+            hpd_intervals.append((bins[0], bins[0]))
+        else:
+            hpd_intervals.append((interval[0], interval[-1]))
+
+    return np.array(hpd_intervals)
+
+
 def loo(data, pointwise=False, reff=None, scale=None):
     """Pareto-smoothed importance sampling leave-one-out cross-validation.
 

diff --git a/arviz/tests/base_tests/test_stats.py b/arviz/tests/base_tests/test_stats.py
@@ -47,6 +47,35 @@ def test_hpd():
     assert_array_almost_equal(interval, [-1.88, 1.88], 2)
 
 
+def test_hpd_2darray():
+    normal_sample = np.random.randn(12000, 5)
+    result = hpd(normal_sample)
+    assert result.shape == (5, 2,)
+
+
+def test_hpd_multidimension():
+    normal_sample = np.random.randn(12000, 10, 3)
+    result = hpd(normal_sample)
+    assert result.shape == (10, 3, 2,)
+
+
+def test_hpd_idata():
+    data = load_arviz_data("centered_eight")
+    normal_sample = data.posterior
+    result = hpd(normal_sample)
+    assert isinstance(result, Dataset)
+    assert result.dims == {"school": 8, "hpd": 2}
+
+    result = hpd(normal_sample, **{"input_core_dims": [["chain"]]})
+    assert isinstance(result, Dataset)
+    assert result.dims == {"draw": 500, "hpd": 2, "school": 8}
+
+    result = hpd(normal_sample, var_names=["mu", "theta"])
+    assert isinstance(result, Dataset)
+    assert result.dims == {"hpd": 2, "school": 8}
+    assert list(result.data_vars.keys()) == ["mu", "theta"]
+
+
 def test_hpd_multimodal():
     normal_sample = np.concatenate(
         (np.random.normal(-4, 1, 2500000), np.random.normal(2, 0.5, 2500000))