🧷persistence with initialized lead=0 (#706)

* add failing test * implemented for PerfectModel * doctest * rm add_attrs * docstring * implement for PM.bootstrap * add warning before control missing persistence * show plots in CHANGELOG
pangeo-data · Dec 8, 2021 · 5e01fdb · 5e01fdb
1 parent bbd9f9b
commit 5e01fdb
Show file tree

Hide file tree

Showing 14 changed files with 373 additions and 91 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -2,6 +2,16 @@
 What's New
 ==========
 
+.. ipython:: python
+    :suppress:
+
+    import climpred
+    from climpred import HindcastEnsemble
+    import matplotlib as mpl
+    mpl.rcdefaults()
+    mpl.use('Agg')
+    # cut border when saving (for maps)
+    mpl.rcParams["savefig.bbox"] = "tight"
 
 climpred unreleased (202x-xx-xx)
 ================================
@@ -22,9 +32,8 @@ New Features
 
   .. code-block:: python
 
-      >>> import climpred
       >>> hind = climpred.tutorial.load_dataset('CESM-DP-SST')
-      >>> hind.lead.attrs['units'] = 'years'
+      >>> hind.lead.attrs["units"] = "years"
       >>> climpred.HindcastEnsemble(hind).get_initialized()
       <xarray.Dataset>
       Dimensions:     (lead: 10, member: 10, init: 64)
@@ -75,20 +84,36 @@ New Features
       >>> skill.sst.plot(hue="model", col="month", col_wrap=3)
 
   (:issue:`635`, :pr:`690`) `Aaron Spring`_.
-- :py:meth:`~climpred.classes.HindcastEnsemble.plot_alignment` shows how forecast and
-  observations are aligned based on the `alignment <alignment.html>`_ keyword.
-  This may help understanding which dates are matched for the different ``alignment``
-  approaches. (:issue:`701`, :pr:`702`) `Aaron Spring`_.
+- :py:meth:`~climpred.classes.HindcastEnsemble.plot_alignment` shows how forecast and observations are
+  aligned based on the `alignment <alignment.html>`_ keyword. This may help
+  understanding which dates are matched for the different ``alignment`` approaches.
+  (:issue:`701`, :pr:`702`) `Aaron Spring`_.
+
+  .. ipython:: python
+      :okwarning:
+
+      from climpred.tutorial import load_dataset
+      hindcast = climpred.HindcastEnsemble(load_dataset("CESM-DP-SST")).add_observations(load_dataset("ERSST"))
+      @savefig plotting_MEOW.png width=100%
+      hindcast.plot_alignment(edgecolor="w")
+
 - Add ``attrs`` to new ``coordinates`` created by ``climpred``.
   (:issue:`695`, :pr:`697`) `Aaron Spring`_.
 - Add ``seasonality="weekofyear"`` in ``reference="climatology"``.
   (:pr:`703`) `Aaron Spring`_.
+- Compute ``reference="persistence"`` in
+  :py:class:`~climpred.classes.PerfectModelEnsemble` from ``initialized`` first ``lead``
+  if :py:class:`~climpred.options.set_options`
+  ``(PerfectModel_persistence_from_initialized_lead_0=True)`` (``False`` by default)
+  using :py:func:`~climpred.reference.compute_persistence_from_first_lead`.
+  (:issue:`637`, :pr:`706`) `Aaron Spring`_.
 
 
 Internals/Minor Fixes
 ---------------------
-- Reduce dependencies (:pr:`686`) `Aaron Spring`_.
-- Add `typing <https://docs.python.org/3/library/typing.html>`_ (:issue:`685`, :pr:`692`) `Aaron Spring`_.
+- Reduce dependencies. (:pr:`686`) `Aaron Spring`_.
+- Add `typing <https://docs.python.org/3/library/typing.html>`_.
+  (:issue:`685`, :pr:`692`) `Aaron Spring`_.
 - refactor ``add_attrs`` into :py:meth:`~climpred.classes.HindcastEnsemble.verify` and
   :py:meth:`~climpred.classes.HindcastEnsemble.bootstrap`. Now all keywords are
   captured in the skill dataset attributes ``.attrs``.

diff --git a/climpred/alignment.py b/climpred/alignment.py
@@ -59,7 +59,7 @@ def return_inits_and_verif_dates(forecast, verif, alignment, reference=None, his
     if "valid_time" not in forecast.coords:  # old: create init_lead_matrix
         init_lead_matrix = _construct_init_lead_matrix(forecast, n, freq, leads)
     else:  # new: use valid_time(init, lead)
-        init_lead_matrix = forecast["valid_time"].drop("valid_time").rename(None)
+        init_lead_matrix = forecast["valid_time"].drop_vars("valid_time").rename(None)
     if dask.is_dask_collection(init_lead_matrix):
         init_lead_matrix = init_lead_matrix.compute()
 

diff --git a/climpred/bootstrap.py b/climpred/bootstrap.py
@@ -19,8 +19,13 @@
 )
 from .exceptions import KeywordError
 from .metrics import ALL_METRICS, METRIC_ALIASES
+from .options import OPTIONS
 from .prediction import compute_hindcast, compute_perfect_model
-from .reference import compute_climatology, compute_persistence
+from .reference import (
+    compute_climatology,
+    compute_persistence,
+    compute_persistence_from_first_lead,
+)
 from .stats import dpp
 
 try:
@@ -752,6 +757,20 @@ def bootstrap_compute(
     if reference is None:
         reference = []
 
+    compute_persistence_func = compute_persistence_from_first_lead
+    if (
+        OPTIONS["PerfectModel_persistence_from_initialized_lead_0"]
+        and compute.__name__ == "compute_perfect_model"
+    ):
+        compute_persistence_func = compute_persistence_from_first_lead
+        if hind.lead[0] != 0:
+            if OPTIONS["warn_for_failed_PredictionEnsemble_xr_call"]:
+                warnings.warn(
+                    f"Calculate persistence from lead={int(hind.lead[0].values)} instead of lead=0 (recommended)."
+                )
+    else:
+        compute_persistence_func = compute_persistence
+
     p, ci_low, ci_high = _p_ci_from_sig(sig)
     p_pers, ci_low_pers, ci_high_pers = _p_ci_from_sig(pers_sig)
 
@@ -886,7 +905,7 @@ def bootstrap_compute(
             **metric_kwargs,
         )
         if "persistence" in reference:
-            pers_skill = compute_persistence(
+            pers_skill = compute_persistence_func(
                 hind,
                 verif,
                 metric=metric,
@@ -895,7 +914,7 @@ def bootstrap_compute(
             )
             # bootstrap pers
             if resample_dim == "init":
-                bootstrapped_pers_skill = compute_persistence(
+                bootstrapped_pers_skill = compute_persistence_func(
                     bootstrapped_hind,
                     verif,
                     metric=metric,
@@ -920,7 +939,7 @@ def bootstrap_compute(
         # uninit skill as mean resampled uninit skill
         unin_skill = bootstrapped_uninit_skill.mean("iteration")  # noqa: F841
     if "persistence" in reference:
-        pers_skill = compute_persistence(
+        pers_skill = compute_persistence_func(
             hind, verif, metric=metric, dim=dim, **metric_kwargs_reference
         )
     if "climatology" in reference:
@@ -1191,7 +1210,9 @@ def bootstrap_perfect_model(
              to initialization and external forcing
             - `uninitialized` for the uninitialized/historical and approximates skill
              from external forcing
-            - `persistence` for the persistence forecast
+            - `persistence` for the persistence forecast computed by
+             `compute_persistence` or `compute_persistence_from_first_lead` depending
+             on set_options('PerfectModel_persistence_from_initialized_lead_0')
             - `climatology`
 
         the different results:

diff --git a/climpred/classes.py b/climpred/classes.py
@@ -66,7 +66,11 @@
     _get_metric_comparison_dim,
     compute_perfect_model,
 )
-from .reference import compute_climatology, compute_persistence
+from .reference import (
+    compute_climatology,
+    compute_persistence,
+    compute_persistence_from_first_lead,
+)
 from .smoothing import (
     _reset_temporal_axis,
     smooth_goddard_2013,
@@ -1051,7 +1055,8 @@ def verify(
                 ``comparison=e2c``. Defaults to ``None`` meaning that all dimensions
                 other than ``lead`` are reduced.
             reference (str, list of str): Type of reference forecasts with which to
-                verify. One or more of ['uninitialized', 'persistence', 'climatology'].
+                verify. One or more of ``['uninitialized', 'persistence', 'climatology']``.
+                For ``persistence``, choose between ``set_options(PerfectModel_persistence_from_initialized_lead_0)`` ``=False`` (default) using `climpred.reference.compute_persistence <https://climpred.readthedocs.io/en/stable/api/climpred.reference.compute_persistence.html#climpred.reference.compute_persistence>`_ or ``=True`` using `climpred.reference.compute_persistence_from_first_lead <https://climpred.readthedocs.io/en/stable/api/climpred.reference.compute_persistence_from_first_lead.html#climpred.reference.compute_persistence_from_first_lead>`_.
             groupby (str, xr.DataArray): group ``init`` before passing ``initialized`` to ``verify``.
             **metric_kwargs (optional): Arguments passed to ``metric``.
 
@@ -1099,14 +1104,15 @@ def verify(
             Data variables:
                 tos      (skill, lead) float64 0.7941 0.7489 0.5623 ... 0.1327 0.4547 0.3253
             Attributes:
-                prediction_skill_software:     climpred https://climpred.readthedocs.io/
-                skill_calculated_by_function:  PerfectModelEnsemble.verify()
-                number_of_initializations:     12
-                number_of_members:             10
-                metric:                        pearson_r
-                comparison:                    m2m
-                dim:                           ['init', 'member']
-                reference:                     ['persistence', 'climatology', 'uninitiali...
+                prediction_skill_software:                         climpred https://clim...
+                skill_calculated_by_function:                      PerfectModelEnsemble....
+                number_of_initializations:                         12
+                number_of_members:                                 10
+                metric:                                            pearson_r
+                comparison:                                        m2m
+                dim:                                               ['init', 'member']
+                reference:                                         ['persistence', 'clim...
+                PerfectModel_persistence_from_initialized_lead_0:  False
         """
         if groupby is not None:
             return self._groupby(
@@ -1144,7 +1150,10 @@ def verify(
                 dim_orig = deepcopy(dim)  # preserve dim, because
                 ref_compute_kwargs = metric_kwargs.copy()  # persistence changes dim
                 ref_compute_kwargs.update({"dim": dim_orig, "metric": metric})
-                if r != "persistence":
+                if (
+                    not OPTIONS["PerfectModel_persistence_from_initialized_lead_0"]
+                    and r != "persistence"
+                ):
                     ref_compute_kwargs["comparison"] = comparison
                 ref = getattr(self, f"_compute_{r}")(**ref_compute_kwargs)
                 result = xr.concat([result, ref], dim="skill", **CONCAT_KWARGS)
@@ -1226,6 +1235,9 @@ def _compute_persistence(
     ):
         """Verify a simple persistence forecast of the control run against itself.
 
+        Note: uses climpred.reference.compute_persistence_from_first_lead
+        if OPTIONS["PerfectModel_persistence_from_initialized_lead_0"] else climpred.reference.compute_persistence.
+
         Args:
             metric (str, :py:class:`~climpred.metrics.Metric`): Metric to use when
             verifying skill of the persistence forecast. See `metrics </metrics.html>`_.
@@ -1245,18 +1257,33 @@ def _compute_persistence(
               Van den Dool, Huug. Empirical methods in short-term climate
               prediction. Oxford University Press, 2007.
         """
-        has_dataset(
-            self._datasets["control"], "control", "compute a persistence forecast"
-        )
+        if dim is None:
+            dim = list(self._datasets["initialized"].isel(lead=0).dims)
+        compute_persistence_func = compute_persistence_from_first_lead
+        if OPTIONS["PerfectModel_persistence_from_initialized_lead_0"]:
+            compute_persistence_func = compute_persistence_from_first_lead
+            if self.get_initialized().lead[0] != 0:
+                if OPTIONS["warn_for_failed_PredictionEnsemble_xr_call"]:
+                    warnings.warn(
+                        f"Calculate persistence from lead={int(self.get_initialized().lead[0].values)} instead of lead=0 (recommended)."
+                    )
+        else:
+            compute_persistence_func = compute_persistence
+            if self._datasets["control"] == {}:
+                warnings.warn(
+                    "You may also calculate persistence based on ``initialized.isel(lead=0)`` by changing ``OPTIONS['PerfectModel_persistence_from_initialized_lead_0']=True``."
+                )
+            has_dataset(
+                self._datasets["control"], "control", "compute a persistence forecast"
+            )
         input_dict = {
             "ensemble": self._datasets["initialized"],
             "control": self._datasets["control"],
             "init": True,
         }
-        if dim is None:
-            dim = list(self._datasets["initialized"].isel(lead=0).dims)
+
         res = self._apply_climpred_function(
-            compute_persistence,
+            compute_persistence_func,
             input_dict=input_dict,
             metric=metric,
             alignment="same_inits",
@@ -1345,8 +1372,9 @@ def bootstrap(
                 ``comparison=e2c``. Defaults to ``None`` meaning that all dimensions
                 other than ``lead`` are reduced.
             reference (str, list of str): Type of reference forecasts with which to
-                verify. One or more of ['uninitialized', 'persistence', 'climatology'].
+                verify. One or more of ``['uninitialized', 'persistence', 'climatology']``.
                 If None or empty, returns no p value.
+                For ``persistence``, choose between ``set_options(PerfectModel_persistence_from_initialized_lead_0)`` ``=False`` (default) using `climpred.reference.compute_persistence <https://climpred.readthedocs.io/en/stable/api/climpred.reference.compute_persistence.html#climpred.reference.compute_persistence>`_ or ``=True`` using `climpred.reference.compute_persistence_from_first_lead <https://climpred.readthedocs.io/en/stable/api/climpred.reference.compute_persistence_from_first_lead.html#climpred.reference.compute_persistence_from_first_lead>`_.
             iterations (int): Number of resampling iterations for bootstrapping with
                 replacement. Recommended >= 500.
             resample_dim (str or list): dimension to resample from. default: 'member'.
@@ -1408,19 +1436,20 @@ def bootstrap(
               * skill    (skill) <U13 'initialized' 'persistence' ... 'uninitialized'
             Data variables:
                 tos      (skill, results, lead) float64 0.7941 0.7489 ... 0.1494 0.1466
-            Attributes:
-                prediction_skill_software:     climpred https://climpred.readthedocs.io/
-                skill_calculated_by_function:  PerfectModelEnsemble.bootstrap()
-                number_of_initializations:     12
-                number_of_members:             10
-                metric:                        pearson_r
-                comparison:                    m2m
-                dim:                           ['init', 'member']
-                reference:                     ['persistence', 'climatology', 'uninitiali...
-                resample_dim:                  member
-                sig:                           95
-                iterations:                    50
-                confidence_interval_levels:    0.975-0.025
+            Attributes: (12/13)
+                prediction_skill_software:                         climpred https://clim...
+                skill_calculated_by_function:                      PerfectModelEnsemble...
+                number_of_initializations:                         12
+                number_of_members:                                 10
+                metric:                                            pearson_r
+                comparison:                                        m2m
+                ...
+                reference:                                         ['persistence', 'clim...
+                PerfectModel_persistence_from_initialized_lead_0:  False
+                resample_dim:                                      member
+                sig:                                               95
+                iterations:                                        50
+                confidence_interval_levels:                        0.975-0.025
 
         """
         if groupby is not None:
@@ -1696,7 +1725,7 @@ def plot_alignment(
 
         Example:
             >>> HindcastEnsemble.plot_alignment(alignment=None, return_xr=True)
-            <xarray.DataArray 'days since 1960-01-01' (alignment: 3, lead: 10, init: 61)>
+            <xarray.DataArray 'valid_time' (alignment: 3, lead: 10, init: 61)>
             array([[[-1826., -1461., -1095., ...,    nan,    nan,    nan],
                     [-1461., -1095.,  -730., ...,    nan,    nan,    nan],
                     [-1095.,  -730.,  -365., ...,    nan,    nan,    nan],
@@ -1724,6 +1753,8 @@ def plot_alignment(
               * init       (init) object 1954-01-01 00:00:00 ... 2014-01-01 00:00:00
               * lead       (lead) int32 1 2 3 4 5 6 7 8 9 10
               * alignment  (alignment) <U10 'same_init' 'same_verif' 'maximize'
+            Attributes:
+                units:    days since 1960-01-01
 
             >>> HindcastEnsemble.plot_alignment(alignment="same_verifs")  # doctest: +SKIP
             <matplotlib.collections.QuadMesh object at 0x1405c1520>

diff --git a/climpred/graphics.py b/climpred/graphics.py
@@ -410,7 +410,7 @@ def plot_ensemble_perfect_model(
 
 
 def _verif_dates_xr(hindcast, alignment, reference, date2num_units):
-    """Create verif dates xr.DataArray with dims lead and init."""
+    """Create valid_time xr.DataArray with dims lead and init in units passed to cftime.date2num."""
     inits, verif_dates = return_inits_and_verif_dates(
         hindcast.get_initialized().rename({"init": "time"}),
         hindcast.get_observations(),
@@ -427,7 +427,8 @@ def _verif_dates_xr(hindcast, alignment, reference, date2num_units):
                 cftime.date2num(verif_dates[k], date2num_units),
                 dims="init",
                 coords={"init": v.rename({"time": "init"}).to_index()},
-                name=date2num_units,
+                name="valid_time",
+                attrs=dict(units=date2num_units),
             )
             for k, v in inits.items()
         ],

diff --git a/climpred/metrics.py b/climpred/metrics.py
@@ -941,13 +941,13 @@ def _spread(forecast, verif, dim=None, **metric_kwargs):
     """Ensemble spread taking the standard deviation over the member dimension.
 
     .. math::
-        spread = \\std{f}
+        spread = std(f) = \\sigma^2(f) = \\sqrt\\frac{\\sum{(f-\\overline{f})^2}}{N}
 
     Args:
         forecast (xarray object): Forecast.
         verif (xarray object): Verification data (not used).
         dim (str): Dimension(s) to perform metric over.
-        metric_kwargs (dict): see :py:func:`~xr.std`
+        metric_kwargs (dict): see :py:func:`~xarray.std`
 
     Details:
         +-----------------+-----------+