Skip to content

Commit

Permalink
🧷persistence with initialized lead=0 (#706)
Browse files Browse the repository at this point in the history
* add failing test

* implemented for PerfectModel

* doctest

* rm add_attrs

* docstring

* implement for PM.bootstrap

* add warning before control missing persistence

* show plots in CHANGELOG
  • Loading branch information
aaronspring authored Dec 8, 2021
1 parent bbd9f9b commit 5e01fdb
Show file tree
Hide file tree
Showing 14 changed files with 373 additions and 91 deletions.
41 changes: 33 additions & 8 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@
What's New
==========

.. ipython:: python
:suppress:
import climpred
from climpred import HindcastEnsemble
import matplotlib as mpl
mpl.rcdefaults()
mpl.use('Agg')
# cut border when saving (for maps)
mpl.rcParams["savefig.bbox"] = "tight"
climpred unreleased (202x-xx-xx)
================================
Expand All @@ -22,9 +32,8 @@ New Features

.. code-block:: python
>>> import climpred
>>> hind = climpred.tutorial.load_dataset('CESM-DP-SST')
>>> hind.lead.attrs['units'] = 'years'
>>> hind.lead.attrs["units"] = "years"
>>> climpred.HindcastEnsemble(hind).get_initialized()
<xarray.Dataset>
Dimensions: (lead: 10, member: 10, init: 64)
Expand Down Expand Up @@ -75,20 +84,36 @@ New Features
>>> skill.sst.plot(hue="model", col="month", col_wrap=3)
(:issue:`635`, :pr:`690`) `Aaron Spring`_.
- :py:meth:`~climpred.classes.HindcastEnsemble.plot_alignment` shows how forecast and
observations are aligned based on the `alignment <alignment.html>`_ keyword.
This may help understanding which dates are matched for the different ``alignment``
approaches. (:issue:`701`, :pr:`702`) `Aaron Spring`_.
- :py:meth:`~climpred.classes.HindcastEnsemble.plot_alignment` shows how forecast and observations are
aligned based on the `alignment <alignment.html>`_ keyword. This may help
understanding which dates are matched for the different ``alignment`` approaches.
(:issue:`701`, :pr:`702`) `Aaron Spring`_.

.. ipython:: python
:okwarning:
from climpred.tutorial import load_dataset
hindcast = climpred.HindcastEnsemble(load_dataset("CESM-DP-SST")).add_observations(load_dataset("ERSST"))
@savefig plotting_MEOW.png width=100%
hindcast.plot_alignment(edgecolor="w")
- Add ``attrs`` to new ``coordinates`` created by ``climpred``.
(:issue:`695`, :pr:`697`) `Aaron Spring`_.
- Add ``seasonality="weekofyear"`` in ``reference="climatology"``.
(:pr:`703`) `Aaron Spring`_.
- Compute ``reference="persistence"`` in
:py:class:`~climpred.classes.PerfectModelEnsemble` from ``initialized`` first ``lead``
if :py:class:`~climpred.options.set_options`
``(PerfectModel_persistence_from_initialized_lead_0=True)`` (``False`` by default)
using :py:func:`~climpred.reference.compute_persistence_from_first_lead`.
(:issue:`637`, :pr:`706`) `Aaron Spring`_.


Internals/Minor Fixes
---------------------
- Reduce dependencies (:pr:`686`) `Aaron Spring`_.
- Add `typing <https://docs.python.org/3/library/typing.html>`_ (:issue:`685`, :pr:`692`) `Aaron Spring`_.
- Reduce dependencies. (:pr:`686`) `Aaron Spring`_.
- Add `typing <https://docs.python.org/3/library/typing.html>`_.
(:issue:`685`, :pr:`692`) `Aaron Spring`_.
- refactor ``add_attrs`` into :py:meth:`~climpred.classes.HindcastEnsemble.verify` and
:py:meth:`~climpred.classes.HindcastEnsemble.bootstrap`. Now all keywords are
captured in the skill dataset attributes ``.attrs``.
Expand Down
2 changes: 1 addition & 1 deletion climpred/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def return_inits_and_verif_dates(forecast, verif, alignment, reference=None, his
if "valid_time" not in forecast.coords: # old: create init_lead_matrix
init_lead_matrix = _construct_init_lead_matrix(forecast, n, freq, leads)
else: # new: use valid_time(init, lead)
init_lead_matrix = forecast["valid_time"].drop("valid_time").rename(None)
init_lead_matrix = forecast["valid_time"].drop_vars("valid_time").rename(None)
if dask.is_dask_collection(init_lead_matrix):
init_lead_matrix = init_lead_matrix.compute()

Expand Down
31 changes: 26 additions & 5 deletions climpred/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,13 @@
)
from .exceptions import KeywordError
from .metrics import ALL_METRICS, METRIC_ALIASES
from .options import OPTIONS
from .prediction import compute_hindcast, compute_perfect_model
from .reference import compute_climatology, compute_persistence
from .reference import (
compute_climatology,
compute_persistence,
compute_persistence_from_first_lead,
)
from .stats import dpp

try:
Expand Down Expand Up @@ -752,6 +757,20 @@ def bootstrap_compute(
if reference is None:
reference = []

compute_persistence_func = compute_persistence_from_first_lead
if (
OPTIONS["PerfectModel_persistence_from_initialized_lead_0"]
and compute.__name__ == "compute_perfect_model"
):
compute_persistence_func = compute_persistence_from_first_lead
if hind.lead[0] != 0:
if OPTIONS["warn_for_failed_PredictionEnsemble_xr_call"]:
warnings.warn(
f"Calculate persistence from lead={int(hind.lead[0].values)} instead of lead=0 (recommended)."
)
else:
compute_persistence_func = compute_persistence

p, ci_low, ci_high = _p_ci_from_sig(sig)
p_pers, ci_low_pers, ci_high_pers = _p_ci_from_sig(pers_sig)

Expand Down Expand Up @@ -886,7 +905,7 @@ def bootstrap_compute(
**metric_kwargs,
)
if "persistence" in reference:
pers_skill = compute_persistence(
pers_skill = compute_persistence_func(
hind,
verif,
metric=metric,
Expand All @@ -895,7 +914,7 @@ def bootstrap_compute(
)
# bootstrap pers
if resample_dim == "init":
bootstrapped_pers_skill = compute_persistence(
bootstrapped_pers_skill = compute_persistence_func(
bootstrapped_hind,
verif,
metric=metric,
Expand All @@ -920,7 +939,7 @@ def bootstrap_compute(
# uninit skill as mean resampled uninit skill
unin_skill = bootstrapped_uninit_skill.mean("iteration") # noqa: F841
if "persistence" in reference:
pers_skill = compute_persistence(
pers_skill = compute_persistence_func(
hind, verif, metric=metric, dim=dim, **metric_kwargs_reference
)
if "climatology" in reference:
Expand Down Expand Up @@ -1191,7 +1210,9 @@ def bootstrap_perfect_model(
to initialization and external forcing
- `uninitialized` for the uninitialized/historical and approximates skill
from external forcing
- `persistence` for the persistence forecast
- `persistence` for the persistence forecast computed by
`compute_persistence` or `compute_persistence_from_first_lead` depending
on set_options('PerfectModel_persistence_from_initialized_lead_0')
- `climatology`
the different results:
Expand Down
95 changes: 63 additions & 32 deletions climpred/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,11 @@
_get_metric_comparison_dim,
compute_perfect_model,
)
from .reference import compute_climatology, compute_persistence
from .reference import (
compute_climatology,
compute_persistence,
compute_persistence_from_first_lead,
)
from .smoothing import (
_reset_temporal_axis,
smooth_goddard_2013,
Expand Down Expand Up @@ -1051,7 +1055,8 @@ def verify(
``comparison=e2c``. Defaults to ``None`` meaning that all dimensions
other than ``lead`` are reduced.
reference (str, list of str): Type of reference forecasts with which to
verify. One or more of ['uninitialized', 'persistence', 'climatology'].
verify. One or more of ``['uninitialized', 'persistence', 'climatology']``.
For ``persistence``, choose between ``set_options(PerfectModel_persistence_from_initialized_lead_0)`` ``=False`` (default) using `climpred.reference.compute_persistence <https://climpred.readthedocs.io/en/stable/api/climpred.reference.compute_persistence.html#climpred.reference.compute_persistence>`_ or ``=True`` using `climpred.reference.compute_persistence_from_first_lead <https://climpred.readthedocs.io/en/stable/api/climpred.reference.compute_persistence_from_first_lead.html#climpred.reference.compute_persistence_from_first_lead>`_.
groupby (str, xr.DataArray): group ``init`` before passing ``initialized`` to ``verify``.
**metric_kwargs (optional): Arguments passed to ``metric``.
Expand Down Expand Up @@ -1099,14 +1104,15 @@ def verify(
Data variables:
tos (skill, lead) float64 0.7941 0.7489 0.5623 ... 0.1327 0.4547 0.3253
Attributes:
prediction_skill_software: climpred https://climpred.readthedocs.io/
skill_calculated_by_function: PerfectModelEnsemble.verify()
number_of_initializations: 12
number_of_members: 10
metric: pearson_r
comparison: m2m
dim: ['init', 'member']
reference: ['persistence', 'climatology', 'uninitiali...
prediction_skill_software: climpred https://clim...
skill_calculated_by_function: PerfectModelEnsemble....
number_of_initializations: 12
number_of_members: 10
metric: pearson_r
comparison: m2m
dim: ['init', 'member']
reference: ['persistence', 'clim...
PerfectModel_persistence_from_initialized_lead_0: False
"""
if groupby is not None:
return self._groupby(
Expand Down Expand Up @@ -1144,7 +1150,10 @@ def verify(
dim_orig = deepcopy(dim) # preserve dim, because
ref_compute_kwargs = metric_kwargs.copy() # persistence changes dim
ref_compute_kwargs.update({"dim": dim_orig, "metric": metric})
if r != "persistence":
if (
not OPTIONS["PerfectModel_persistence_from_initialized_lead_0"]
and r != "persistence"
):
ref_compute_kwargs["comparison"] = comparison
ref = getattr(self, f"_compute_{r}")(**ref_compute_kwargs)
result = xr.concat([result, ref], dim="skill", **CONCAT_KWARGS)
Expand Down Expand Up @@ -1226,6 +1235,9 @@ def _compute_persistence(
):
"""Verify a simple persistence forecast of the control run against itself.
Note: uses climpred.reference.compute_persistence_from_first_lead
if OPTIONS["PerfectModel_persistence_from_initialized_lead_0"] else climpred.reference.compute_persistence.
Args:
metric (str, :py:class:`~climpred.metrics.Metric`): Metric to use when
verifying skill of the persistence forecast. See `metrics </metrics.html>`_.
Expand All @@ -1245,18 +1257,33 @@ def _compute_persistence(
Van den Dool, Huug. Empirical methods in short-term climate
prediction. Oxford University Press, 2007.
"""
has_dataset(
self._datasets["control"], "control", "compute a persistence forecast"
)
if dim is None:
dim = list(self._datasets["initialized"].isel(lead=0).dims)
compute_persistence_func = compute_persistence_from_first_lead
if OPTIONS["PerfectModel_persistence_from_initialized_lead_0"]:
compute_persistence_func = compute_persistence_from_first_lead
if self.get_initialized().lead[0] != 0:
if OPTIONS["warn_for_failed_PredictionEnsemble_xr_call"]:
warnings.warn(
f"Calculate persistence from lead={int(self.get_initialized().lead[0].values)} instead of lead=0 (recommended)."
)
else:
compute_persistence_func = compute_persistence
if self._datasets["control"] == {}:
warnings.warn(
"You may also calculate persistence based on ``initialized.isel(lead=0)`` by changing ``OPTIONS['PerfectModel_persistence_from_initialized_lead_0']=True``."
)
has_dataset(
self._datasets["control"], "control", "compute a persistence forecast"
)
input_dict = {
"ensemble": self._datasets["initialized"],
"control": self._datasets["control"],
"init": True,
}
if dim is None:
dim = list(self._datasets["initialized"].isel(lead=0).dims)

res = self._apply_climpred_function(
compute_persistence,
compute_persistence_func,
input_dict=input_dict,
metric=metric,
alignment="same_inits",
Expand Down Expand Up @@ -1345,8 +1372,9 @@ def bootstrap(
``comparison=e2c``. Defaults to ``None`` meaning that all dimensions
other than ``lead`` are reduced.
reference (str, list of str): Type of reference forecasts with which to
verify. One or more of ['uninitialized', 'persistence', 'climatology'].
verify. One or more of ``['uninitialized', 'persistence', 'climatology']``.
If None or empty, returns no p value.
For ``persistence``, choose between ``set_options(PerfectModel_persistence_from_initialized_lead_0)`` ``=False`` (default) using `climpred.reference.compute_persistence <https://climpred.readthedocs.io/en/stable/api/climpred.reference.compute_persistence.html#climpred.reference.compute_persistence>`_ or ``=True`` using `climpred.reference.compute_persistence_from_first_lead <https://climpred.readthedocs.io/en/stable/api/climpred.reference.compute_persistence_from_first_lead.html#climpred.reference.compute_persistence_from_first_lead>`_.
iterations (int): Number of resampling iterations for bootstrapping with
replacement. Recommended >= 500.
resample_dim (str or list): dimension to resample from. default: 'member'.
Expand Down Expand Up @@ -1408,19 +1436,20 @@ def bootstrap(
* skill (skill) <U13 'initialized' 'persistence' ... 'uninitialized'
Data variables:
tos (skill, results, lead) float64 0.7941 0.7489 ... 0.1494 0.1466
Attributes:
prediction_skill_software: climpred https://climpred.readthedocs.io/
skill_calculated_by_function: PerfectModelEnsemble.bootstrap()
number_of_initializations: 12
number_of_members: 10
metric: pearson_r
comparison: m2m
dim: ['init', 'member']
reference: ['persistence', 'climatology', 'uninitiali...
resample_dim: member
sig: 95
iterations: 50
confidence_interval_levels: 0.975-0.025
Attributes: (12/13)
prediction_skill_software: climpred https://clim...
skill_calculated_by_function: PerfectModelEnsemble...
number_of_initializations: 12
number_of_members: 10
metric: pearson_r
comparison: m2m
...
reference: ['persistence', 'clim...
PerfectModel_persistence_from_initialized_lead_0: False
resample_dim: member
sig: 95
iterations: 50
confidence_interval_levels: 0.975-0.025
"""
if groupby is not None:
Expand Down Expand Up @@ -1696,7 +1725,7 @@ def plot_alignment(
Example:
>>> HindcastEnsemble.plot_alignment(alignment=None, return_xr=True)
<xarray.DataArray 'days since 1960-01-01' (alignment: 3, lead: 10, init: 61)>
<xarray.DataArray 'valid_time' (alignment: 3, lead: 10, init: 61)>
array([[[-1826., -1461., -1095., ..., nan, nan, nan],
[-1461., -1095., -730., ..., nan, nan, nan],
[-1095., -730., -365., ..., nan, nan, nan],
Expand Down Expand Up @@ -1724,6 +1753,8 @@ def plot_alignment(
* init (init) object 1954-01-01 00:00:00 ... 2014-01-01 00:00:00
* lead (lead) int32 1 2 3 4 5 6 7 8 9 10
* alignment (alignment) <U10 'same_init' 'same_verif' 'maximize'
Attributes:
units: days since 1960-01-01
>>> HindcastEnsemble.plot_alignment(alignment="same_verifs") # doctest: +SKIP
<matplotlib.collections.QuadMesh object at 0x1405c1520>
Expand Down
5 changes: 3 additions & 2 deletions climpred/graphics.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ def plot_ensemble_perfect_model(


def _verif_dates_xr(hindcast, alignment, reference, date2num_units):
"""Create verif dates xr.DataArray with dims lead and init."""
"""Create valid_time xr.DataArray with dims lead and init in units passed to cftime.date2num."""
inits, verif_dates = return_inits_and_verif_dates(
hindcast.get_initialized().rename({"init": "time"}),
hindcast.get_observations(),
Expand All @@ -427,7 +427,8 @@ def _verif_dates_xr(hindcast, alignment, reference, date2num_units):
cftime.date2num(verif_dates[k], date2num_units),
dims="init",
coords={"init": v.rename({"time": "init"}).to_index()},
name=date2num_units,
name="valid_time",
attrs=dict(units=date2num_units),
)
for k, v in inits.items()
],
Expand Down
4 changes: 2 additions & 2 deletions climpred/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -941,13 +941,13 @@ def _spread(forecast, verif, dim=None, **metric_kwargs):
"""Ensemble spread taking the standard deviation over the member dimension.
.. math::
spread = \\std{f}
spread = std(f) = \\sigma^2(f) = \\sqrt\\frac{\\sum{(f-\\overline{f})^2}}{N}
Args:
forecast (xarray object): Forecast.
verif (xarray object): Verification data (not used).
dim (str): Dimension(s) to perform metric over.
metric_kwargs (dict): see :py:func:`~xr.std`
metric_kwargs (dict): see :py:func:`~xarray.std`
Details:
+-----------------+-----------+
Expand Down
Loading

0 comments on commit 5e01fdb

Please sign in to comment.