arviz-devs · aloctavodia · Aug 30, 2024 · Aug 2, 2024 · Aug 2, 2024 · Aug 3, 2024
diff --git a/docs/source/api/plots.rst b/docs/source/api/plots.rst
@@ -17,6 +17,7 @@ A complementary introduction and guide to ``plot_...`` functions is available at
 .. autosummary::
    :toctree: generated/
 
+   plot_compare
    plot_dist
    plot_forest
    plot_ridge

diff --git a/docs/source/gallery/model_comparison/plot_compare.py b/docs/source/gallery/model_comparison/plot_compare.py
@@ -0,0 +1,34 @@
+"""
+(gallery_forest_pp_obs)=
+# Posterior predictive and observations forest plot
+
+Overlay of forest plot for the posterior predictive samples and the actual observations
+
+---
+
+:::{seealso}
+API Documentation: {func}`~arviz_plots.plot_forest`
+
+Other gallery examples using `plot_forest`: {ref}`gallery_forest`, {ref}`gallery_forest_shade`
+:::
+"""
+from importlib import import_module
+
+from arviz_base import load_arviz_data
+
+import arviz_plots as azp
+
+azp.style.use("arviz-clean")
+
+backend="none"  # change to preferred backend
+
+cmp_df = pd.DataFrame({"elpd_loo": [-4.575778, -14.309050, -16], 
+                       "p_loo": [2.646204, 2.399241, 2], 
+                       "elpd_diff": [0.000000, 9.733272, 11], 
+                       "weight": [1.000000e+00, 3.215206e-13, 0], 
+                       "se": [2.318739, 2.673219, 2], 
+                       "dse": [0.00000, 2.68794, 2], 
+                       "warning": [False, False, False], 
+                       "scale": ["log", "log", "log"]}, index=["modelo_p", "modelo_l", "modelo_d"])
+
+azp.plot_compare(cmp_df, backend=backend)
diff --git a/docs/sphinxext/gallery_generator.py b/docs/sphinxext/gallery_generator.py
@@ -14,6 +14,7 @@
     "distribution_comparison": "Distribution comparison",
     "inference_diagnostics": "Inference diagnostics",
     "model_criticism": "Model criticism",
+    "model_comparison": "Model comparison",
 }
 
 toctree_template = """

diff --git a/src/arviz_plots/plots/__init__.py b/src/arviz_plots/plots/__init__.py
@@ -1,9 +1,17 @@
 """Batteries-included ArviZ plots."""
 
+from .compareplot import plot_compare
 from .distplot import plot_dist
 from .forestplot import plot_forest
 from .ridgeplot import plot_ridge
 from .tracedistplot import plot_trace_dist
 from .traceplot import plot_trace
 
-__all__ = ["plot_dist", "plot_forest", "plot_trace", "plot_trace_dist", "plot_ridge"]
+__all__ = [
+    "plot_compare",
+    "plot_dist",
+    "plot_forest",
+    "plot_trace",
+    "plot_trace_dist",
+    "plot_ridge",
+]
diff --git a/src/arviz_plots/plots/compareplot.py b/src/arviz_plots/plots/compareplot.py
@@ -0,0 +1,137 @@
+"""Compare plot code."""
+from importlib import import_module
+
+from arviz_base import rcParams
+
+
+def plot_compare(
+    cmp_df,
+    color="black",
+    similar_band=True,
+    relative_scale=False,
+    figsize=None,
+    target=None,
+    backend=None,
+):
+    r"""Summary plot for model comparison.
+
+    Models are compared based on their expected log pointwise predictive density (ELPD).
+
+    Notes
+    -----
+    The ELPD is estimated either by Pareto smoothed importance sampling leave-one-out
+    cross-validation (LOO) or using the widely applicable information criterion (WAIC).
+    We recommend LOO in line with the work presented by [1]_.
+
+    Parameters
+    ----------
+    comp_df : pandas.DataFrame
+        Result of the :func:`arviz.compare` method.
+    color : str, optional
+        Color for the plot elements. Defaults to "black".
+    similar_band : bool, optional
+        If True, a band is drawn to indicate models with similar
+        predictive performance to the best model. Defaults to True.
+    relative_scale : bool, optional.
+        If True scale the ELPD values relative to the best model.
+        Defaults to False.
+    figsize : (float, float), optional
+        If `None`, size is (10, num of models) inches.
+    target : bokeh figure, matplotlib axes, or plotly figure optional
+    backend : {"bokeh", "matplotlib", "plotly"}
+        Select plotting backend. Defaults to rcParams["plot.backend"].
+
+    Returns
+    -------
+    axes :bokeh figure, matplotlib axes or plotly figure
+
+    See Also
+    --------
+    plot_elpd : Plot pointwise elpd differences between two or more models.
+    compare : Compare models based on PSIS-LOO loo or WAIC waic cross-validation.
+    loo : Compute Pareto-smoothed importance sampling leave-one-out cross-validation (PSIS-LOO-CV).
+    waic : Compute the widely applicable information criterion.
+
+    References
+    ----------
+    .. [1] Vehtari et al. (2016). Practical Bayesian model evaluation using leave-one-out
+       cross-validation and WAIC https://arxiv.org/abs/1507.04544
+    """
+    information_criterion = ["elpd_loo", "elpd_waic"]
+    column_index = [c.lower() for c in cmp_df.columns]
+    for i_c in information_criterion:
+        if i_c in column_index:
+            break
+    else:
+        raise ValueError(
+            "cmp_df must contain one of the following "
+            f"information criterion: {information_criterion}"
+        )
+
+    if backend is None:
+        backend = rcParams["plot.backend"]
+
+    if relative_scale:
+        cmp_df = cmp_df.copy()
+        cmp_df[i_c] = cmp_df[i_c] - cmp_df[i_c].iloc[0]
+
+    if figsize is None:
+        figsize = (10, len(cmp_df))
+
+    p_be = import_module(f"arviz_plots.backend.{backend}")
+    _, target = p_be.create_plotting_grid(1, figsize=figsize)
+    linestyle = p_be.get_default_aes("linestyle", 2, {})[-1]
+
+    # Compute positions of yticks
+    yticks_pos = list(range(len(cmp_df), 0, -1))
+
+    # Get scale and adjust it if necessary
+    scale = cmp_df["scale"].iloc[0]
+    if scale == "negative_log":
+        scale = "-log"
+
+    # Compute values for standard error bars
+    se_list = list(zip((cmp_df[i_c] - cmp_df["se"]), (cmp_df[i_c] + cmp_df["se"])))
+
+    # Plot ELPD point statimes
+    p_be.scatter(cmp_df[i_c], yticks_pos, target, color=color)
+    # Plot ELPD standard error bars
+    for se_vals, ytick in zip(se_list, yticks_pos):
+        p_be.line(se_vals, (ytick, ytick), target, color=color)
+
+    # Add reference line for the best model
+    p_be.line(
+        (cmp_df[i_c].iloc[0], cmp_df[i_c].iloc[0]),
+        (yticks_pos[0], yticks_pos[-1]),
+        target,
+        color=color,
+        linestyle=linestyle,
+        alpha=0.5,
+    )
+
+    # Add band for statistically undistinguishable models
+    if similar_band:
+        if scale == "log":
+            x_0, x_1 = cmp_df[i_c].iloc[0] - 4, cmp_df[i_c].iloc[0]
+        else:
+            x_0, x_1 = cmp_df[i_c].iloc[0], cmp_df[i_c].iloc[0] + 4
+
+        p_be.fill_between_y(
+            x=[x_0, x_1],
+            y_bottom=yticks_pos[-1],
+            y_top=yticks_pos[0],
+            target=target,
+            color=color,
+            alpha=0.1,
+        )
+
+    # Add title and labels
+    p_be.title(
+        f"Model comparison\n{'higher' if scale == 'log' else 'lower'} is better",
+        target,
+    )
+    p_be.ylabel("ranked models", target)
+    p_be.xlabel(f"ELPD ({scale})", target)
+    p_be.yticks(yticks_pos, cmp_df.index, target)
+
+    return target