From d781ef3c07dcbe9e358a02c070c328e1d80919c0 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Thu, 9 Dec 2021 01:40:04 +0100
Subject: [PATCH 01/56] intersphinx xclim

---
 .pre-commit-config.yaml |  19 +++-
 CHANGELOG.rst           |   1 +
 climpred/classes.py     | 200 ++++++++++++++++++++++++++++------------
 docs/source/conf.py     |   1 +
 4 files changed, 160 insertions(+), 61 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 36d242fd2..d118bc405 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -29,9 +29,9 @@ repos:
       hooks:
       -   id: isort
 
-  - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.910-1
-    hooks:
+  -   repo: https://github.com/pre-commit/mirrors-mypy
+      rev: v0.910-1
+      hooks:
       - id: mypy
         exclude: "asv_bench"
         additional_dependencies: [
@@ -44,3 +44,16 @@ repos:
             # Dependencies that are typed
             numpy,
           ]
+
+  -   repo: https://github.com/pycqa/pydocstyle
+      rev: 6.1.1
+      hooks:
+      -   id: pydocstyle
+      args:
+      - --ignore=W503
+      - --max-line-length=93
+
+  -   repo: https://github.com/pycqa/doc8
+      rev: 0.10.1
+      hooks:
+      -   id: doc8
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index b92746538..6cb27c4b1 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -13,6 +13,7 @@ What's New
     # cut border when saving (for maps)
     mpl.rcParams["savefig.bbox"] = "tight"
 
+
 climpred unreleased (202x-xx-xx)
 ================================
 
diff --git a/climpred/classes.py b/climpred/classes.py
index 81f2f9fed..fefdb11a5 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -1,6 +1,7 @@
 import warnings
 from copy import deepcopy
 from typing import (
+    TYPE_CHECKING,
     Any,
     Callable,
     Dict,
@@ -13,7 +14,7 @@
     Union,
 )
 
-import cf_xarray
+import cf_xarray  # noqa
 import numpy as np
 import xarray as xr
 from dask import is_dask_collection
@@ -93,8 +94,6 @@
 groupbyType = Optional[Union[str, xr.DataArray]]
 metric_kwargsType = Optional[Any]
 
-from typing import TYPE_CHECKING
-
 if TYPE_CHECKING:
     import matplotlib.pyplot as plt
 
@@ -237,9 +236,10 @@ def _groupby(self, call: str, groupby: Union[str, xr.DataArray], **kwargs: Any):
         skill_group = xr.concat(skill_group, new_dim_name).assign_coords(
             {new_dim_name: group_label}
         )
-        skill_group[new_dim_name] = skill_group[new_dim_name].assign_attrs(  # type: ignore
+        skill_group[new_dim_name] = skill_group[new_dim_name].assign_attrs(  # type: ignore # noqa: E501
             {
-                "description": "new dimension showing skill grouped by init.{groupby} created by .verify(groupby) or .bootstrap(groupby)"
+                "description": "new dimension showing skill grouped by init.{groupby}"
+                " created by .verify(groupby) or .bootstrap(groupby)"
             }
         )
         return skill_group
@@ -268,7 +268,8 @@ def nbytes(self) -> int:
 
     @property
     def sizes(self) -> Mapping[Hashable, int]:
-        """Mapping from dimension names to lengths for all PredictionEnsemble._datasets."""
+        """Mapping from dimension names to lengths for all
+        PredictionEnsemble._datasets."""
         pe_dims = dict(self.get_initialized().dims)
         for ds in self._datasets.values():
             if isinstance(ds, xr.Dataset):
@@ -293,8 +294,8 @@ def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
 
     @property
     def chunksizes(self) -> Mapping[Hashable, Tuple[int, ...]]:
-        """Mapping from dimension names to block lengths for this dataset's data, or None if
-        the underlying data is not a dask array.
+        """Mapping from dimension names to block lengths for this dataset's data, or
+        None if the underlying data is not a dask array.
         Cannot be modified directly, but can be modified by calling .chunk().
         Same as Dataset.chunks.
         """
@@ -302,7 +303,8 @@ def chunksizes(self) -> Mapping[Hashable, Tuple[int, ...]]:
 
     @property
     def data_vars(self) -> DataVariables:
-        """Dictionary of DataArray objects corresponding to data variables available in all PredictionEnsemble._datasets."""
+        """Dictionary of DataArray objects corresponding to data variables available in
+        all PredictionEnsemble._datasets."""
         varset = set(self.get_initialized().data_vars)
         for ds in self._datasets.values():
             if isinstance(ds, xr.Dataset):
@@ -324,7 +326,8 @@ def __len__(self) -> int:
         return len(self.data_vars)
 
     def __iter__(self) -> Iterator[Hashable]:
-        """Iterate over underlying xr.Datasets for initialized, uninitialized, observations."""
+        """Iterate over underlying xr.Datasets for initialized, uninitialized,
+        observations or initialized, uninitialized, control."""
         return iter(self._datasets.values())
 
     def __delitem__(self, key: Hashable) -> None:
@@ -495,7 +498,7 @@ def div(a, b):
             )
         # catch other dimensions in other
         if isinstance(other, tuple([xr.Dataset, xr.DataArray])):
-            if not set(other.dims).issubset(self._datasets["initialized"].dims):  # type: ignore
+            if not set(other.dims).issubset(self._datasets["initialized"].dims):  # type: ignore # noqa: E501
                 raise DimensionError(f"{error_str} containing new dimensions.")
         # catch xr.Dataset with different data_vars
         if isinstance(other, xr.Dataset):
@@ -608,7 +611,7 @@ def _apply_xr_func(v, name, *args, **kwargs):
                     error_type = type(e).__name__
                     if func_name:
                         if len(args) > 1:
-                            msg = f"{func_name}({args[1:]}, {kwargs}) failed\n{error_type}: {e}"
+                            msg = f"{func_name}({args[1:]}, {kwargs}) failed\n{error_type}: {e}"  # noqa: E501
                         else:
                             msg = f"{func_name}({kwargs}) failed\n{error_type}: {e}"
                     else:
@@ -623,7 +626,7 @@ def _apply_xr_func(v, name, *args, **kwargs):
                         if dim not in v.dims:
                             if OPTIONS["warn_for_failed_PredictionEnsemble_xr_call"]:
                                 warnings.warn(
-                                    f"Error due to verification/control/uninitialized: {msg}"
+                                    f"Error due to verification/control/uninitialized: {msg}"  # noqa: E501
                                 )
                     else:
                         if OPTIONS["warn_for_failed_PredictionEnsemble_xr_call"]:
@@ -658,7 +661,8 @@ def _apply_func(
 
         # More explicit than nested dictionary comprehension.
         for key, ds in datasets.items():
-            # If ds is xr.Dataset, apply the function directly to it. else, e.g. for {} ignore
+            # If ds is xr.Dataset, apply the function directly to it
+            # else, e.g. for {} ignore
             if isinstance(ds, xr.Dataset):
                 dim = kwargs.get("dim", "")
                 if "_or_" in dim:
@@ -724,7 +728,8 @@ def smooth(
             Uninitialized:
                 None
 
-            ``smooth`` simultaneously aggregates spatially listening to ``lon`` and ``lat`` and temporally listening to ``lead`` or ``time``.
+            ``smooth`` simultaneously aggregates spatially listening to ``lon`` and
+            ``lat`` and temporally listening to ``lead`` or ``time``.
 
             >>> HindcastEnsemble_3D.smooth({'lead': 2, 'lat': 5, 'lon': 4}).get_initialized().coords
             Coordinates:
@@ -860,8 +865,8 @@ def _remove_seasonality(ds, initialized_dim="init", seasonality=None):
         )
 
     def _warn_if_chunked_along_init_member_time(self) -> None:
-        """Warn upon instantiation when CLIMPRED_DIMS except ``lead`` are chunked with
-        more than one chunk to show how to circumvent ``xskillscore`` chunking
+        """Warn upon instantiation when ``CLIMPRED_DIMS`` except ``lead`` are chunked
+        with more than one chunk to show how to circumvent ``xskillscore`` chunking
         ``ValueError``."""
         suggest_one_chunk = []
         for d in self.chunks:
@@ -879,17 +884,32 @@ def _warn_if_chunked_along_init_member_time(self) -> None:
             if "time" in suggest_one_chunk_time_to_init:
                 suggest_one_chunk_time_to_init.remove("time")
                 suggest_one_chunk_time_to_init.append("init")
-            msg = f"{name} is chunked along dimensions {suggest_one_chunk} with more than one chunk. `{name}.chunks={self.chunks}`.\nYou cannot call `{name}.verify` or `{name}.bootstrap` in combination with any of {suggest_one_chunk_time_to_init} passed as `dim`. In order to do so, please rechunk {suggest_one_chunk} with `{name}.chunk({{dim:-1}}).verify(dim=dim).`\nIf you do not want to use dimensions {suggest_one_chunk_time_to_init} in `{name}.verify(dim=dim)`, you can disregard this warning."
+            msg = (
+                f"{name} is chunked along dimensions {suggest_one_chunk} with more "
+                f"than one chunk. `{name}.chunks={self.chunks}`.\nYou cannot call "
+                f"`{name}.verify` or `{name}.bootstrap` in combination with any of "
+                f" {suggest_one_chunk_time_to_init} passed as `dim`. In order to do "
+                f"so, please rechunk {suggest_one_chunk} with `{name}.chunk("
+                "{{dim:-1}}).verify(dim=dim).`\nIf you do not want to use dimensions "
+                f" {suggest_one_chunk_time_to_init} in `{name}.verify(dim=dim)`, you "
+                "can disregard this warning."
+            )
             # chunk lead:1 in HindcastEnsemble
             if self.kind == "hindcast":
-                msg += '\nIn `HindcastEnsemble`s you may also create one chunk per lead, as the `climpred` internally loops over lead, e.g. `.chunk({{"lead": 1}}).verify().`'
+                msg += '\nIn `HindcastEnsemble`s you may also create one chunk per "\
+                " lead, as the `climpred` internally loops over lead, e.g. "\
+                " `.chunk({{"lead": 1}}).verify().`'
             # chunk auto on non-climpred dims
             ndims = list(self.sizes)
             for d in CLIMPRED_DIMS:
                 if d in ndims:
                     ndims.remove(d)
             if len(ndims) > 0:
-                msg += f'\nConsider chunking embarassingly parallel dimensions such as {ndims} automatically, i.e. `{name}.chunk({ndims[0]}="auto").verify(...).'
+                msg += (
+                    f"\nConsider chunking embarassingly parallel dimensions such as "
+                    f"{ndims} automatically, i.e. "
+                    f'`{name}.chunk({ndims[0]}="auto").verify(...).'
+                )
             warnings.warn(msg)
 
 
@@ -1055,8 +1075,15 @@ def verify(
                 ``comparison=e2c``. Defaults to ``None`` meaning that all dimensions
                 other than ``lead`` are reduced.
             reference (str, list of str): Type of reference forecasts with which to
-                verify. One or more of ``['uninitialized', 'persistence', 'climatology']``.
-                For ``persistence``, choose between ``set_options(PerfectModel_persistence_from_initialized_lead_0)`` ``=False`` (default) using `climpred.reference.compute_persistence <https://climpred.readthedocs.io/en/stable/api/climpred.reference.compute_persistence.html#climpred.reference.compute_persistence>`_ or ``=True`` using `climpred.reference.compute_persistence_from_first_lead <https://climpred.readthedocs.io/en/stable/api/climpred.reference.compute_persistence_from_first_lead.html#climpred.reference.compute_persistence_from_first_lead>`_.
+                verify against.
+                One or more of ``["uninitialized", "persistence", "climatology"]``.
+                Defaults to ``None`` meaning no reference.
+                For ``persistence``, choose between
+                ``set_options(PerfectModel_persistence_from_initialized_lead_0)=False``
+                (default) using :py:func:`~climpred.reference.compute_persistence` or
+                ``set_options(PerfectModel_persistence_from_initialized_lead_0)=True``
+                using
+                :py:func:`~climpred.reference.compute_persistence_from_first_lead`.
             groupby (str, xr.DataArray): group ``init`` before passing ``initialized`` to ``verify``.
             **metric_kwargs (optional): Arguments passed to ``metric``.
 
@@ -1235,8 +1262,9 @@ def _compute_persistence(
     ):
         """Verify a simple persistence forecast of the control run against itself.
 
-        Note: uses climpred.reference.compute_persistence_from_first_lead
-        if OPTIONS["PerfectModel_persistence_from_initialized_lead_0"] else climpred.reference.compute_persistence.
+        Note: uses :py:func:`~climpred.reference.compute_persistence_from_first_lead`
+        if ``set_options("PerfectModel_persistence_from_initialized_lead_0"=True)`` else
+        :py:func:`~climpred.reference.compute_persistence`.
 
         Args:
             metric (str, :py:class:`~climpred.metrics.Metric`): Metric to use when
@@ -1265,13 +1293,17 @@ def _compute_persistence(
             if self.get_initialized().lead[0] != 0:
                 if OPTIONS["warn_for_failed_PredictionEnsemble_xr_call"]:
                     warnings.warn(
-                        f"Calculate persistence from lead={int(self.get_initialized().lead[0].values)} instead of lead=0 (recommended)."
+                        "Calculate persistence from "
+                        f"lead={int(self.get_initialized().lead[0].values)} instead "
+                        "of lead=0 (recommended)."
                     )
         else:
             compute_persistence_func = compute_persistence
             if self._datasets["control"] == {}:
                 warnings.warn(
-                    "You may also calculate persistence based on ``initialized.isel(lead=0)`` by changing ``OPTIONS['PerfectModel_persistence_from_initialized_lead_0']=True``."
+                    "You may also calculate persistence based on "
+                    "``initialized.isel(lead=0)`` by changing "
+                    " ``set_options(PerfectModel_persistence_from_initialized_lead_0=True)``."
                 )
             has_dataset(
                 self._datasets["control"], "control", "compute a persistence forecast"
@@ -1372,12 +1404,20 @@ def bootstrap(
                 ``comparison=e2c``. Defaults to ``None`` meaning that all dimensions
                 other than ``lead`` are reduced.
             reference (str, list of str): Type of reference forecasts with which to
-                verify. One or more of ``['uninitialized', 'persistence', 'climatology']``.
+                verify against.
+                One or more of ``["uninitialized", "persistence", "climatology"]``.
+                Defaults to ``None`` meaning no reference.
                 If None or empty, returns no p value.
-                For ``persistence``, choose between ``set_options(PerfectModel_persistence_from_initialized_lead_0)`` ``=False`` (default) using `climpred.reference.compute_persistence <https://climpred.readthedocs.io/en/stable/api/climpred.reference.compute_persistence.html#climpred.reference.compute_persistence>`_ or ``=True`` using `climpred.reference.compute_persistence_from_first_lead <https://climpred.readthedocs.io/en/stable/api/climpred.reference.compute_persistence_from_first_lead.html#climpred.reference.compute_persistence_from_first_lead>`_.
+                For ``persistence``, choose between
+                ``set_options(PerfectModel_persistence_from_initialized_lead_0)=False``
+                (default) using :py:func:`~climpred.reference.compute_persistence` or
+                ``set_options(PerfectModel_persistence_from_initialized_lead_0)=True``
+                using
+                :py:func:`~climpred.reference.compute_persistence_from_first_lead`.
             iterations (int): Number of resampling iterations for bootstrapping with
                 replacement. Recommended >= 500.
-            resample_dim (str or list): dimension to resample from. default: 'member'.
+            resample_dim (str or list of str): dimension to resample from.
+                Defaults to `"member"``.
 
                 - 'member': select a different set of members from hind
                 - 'init': select a different set of initializations from hind
@@ -1386,7 +1426,8 @@ def bootstrap(
                 uninitialized and persistence beat initialized skill.
             pers_sig (int): If not ``None``, the separate significance level for
                 persistence. Defaults to ``None``, or the same significance as ``sig``.
-            groupby (str, xr.DataArray): group ``init`` before passing ``initialized`` to ``bootstrap``.
+            groupby (str, xr.DataArray): group ``init`` before passing ``initialized``
+                to ``bootstrap``.
             **metric_kwargs (optional): arguments passed to ``metric``.
 
         Returns:
@@ -1605,6 +1646,18 @@ def add_observations(
         datasets.update({"observations": xobj})
         return self._construct_direct(datasets, kind="hindcast")
 
+    def add_verification(
+        self, xobj: Union[xr.DataArray, xr.Dataset]
+    ) -> "HindcastEnsemble":
+        """Add verification data against which to verify the initialized ensemble.
+        Same as add_observations()
+
+        Args:
+            xobj (xarray object): Dataset/DataArray to append to the
+                ``HindcastEnsemble`` object.
+        """
+        return self.add_observations(xobj)
+
     @is_xarray(1)
     def add_uninitialized(
         self, xobj: Union[xr.DataArray, xr.Dataset]
@@ -1707,9 +1760,10 @@ def plot_alignment(
                   each lead should be based on the same set of verification dates.
 
                 - None defaults to the three above
-            reference (str, list of str): Type of reference forecasts to also verify against the
-                observations. Choose one or more of ['uninitialized', 'persistence', 'climatology'].
-                Defaults to None.
+            reference (str, list of str): Type of reference forecasts with which to
+                verify against.
+                One or more of ``["uninitialized", "persistence", "climatology"]``.
+                Defaults to ``None`` meaning no reference.
             date2num_units : str
                 passed to cftime.date2num as units
             return_xr : bool
@@ -1793,13 +1847,15 @@ def plot_alignment(
         if return_xr:
             return verif_dates_xr
         try:
-            import nc_time_axis
+            import nc_time_axis  # noqa:
 
             assert int(nc_time_axis.__version__.replace(".", "")) >= 140
             return verif_dates_xr.plot(cmap=cmap, edgecolors=edgecolors, **plot_kwargs)
         except ImportError:
             raise ValueError("nc_time_axis>1.4.0 required for plotting.")
 
+    from .docstrings import comparison_docstring
+
     def verify(
         self,
         metric: metricType = None,
@@ -1817,9 +1873,6 @@ def verify(
             between the initialized ensemble and observations/verification data.
 
         Args:
-            reference (str, list of str): Type of reference forecasts to also verify against the
-                observations. Choose one or more of ['uninitialized', 'persistence', 'climatology'].
-                Defaults to None.
             metric (str, :py:class:`~climpred.metrics.Metric`): Metric to apply for
                 verification. see `metrics </metrics.html>`_.
             comparison (str, :py:class:`~climpred.comparisons.Comparison`): How to
@@ -1843,6 +1896,10 @@ def verify(
                   prior to computing metric. This philosophy follows the thought that
                   each lead should be based on the same set of verification dates.
 
+            reference (str, list of str): Type of reference forecasts with which to
+                verify against.
+                One or more of ``["uninitialized", "persistence", "climatology"]``.
+                Defaults to ``None`` meaning no reference.
             groupby (str): group ``init`` before passing ``initialized`` to ``verify``.
             **metric_kwargs (optional): arguments passed to ``metric``.
 
@@ -2090,7 +2147,9 @@ def bootstrap(
                 but should not contain ``member`` when ``comparison='e2o'``. Defaults to
                 ``None`` meaning that all dimensions other than ``lead`` are reduced.
             reference (str, list of str): Type of reference forecasts with which to
-                verify. One or more of ['uninitialized', 'persistence', 'climatology'].
+                verify against.
+                One or more of ``["uninitialized", "persistence", "climatology"]``.
+                Defaults to ``None`` meaning no reference.
                 If None or empty, returns no p value.
             alignment (str): which inits or verification times should be aligned?
 
@@ -2202,7 +2261,8 @@ def bootstrap(
             self.get_uninitialized(), xr.Dataset
         ):
             raise ValueError(
-                "reference uninitialized requires uninitialized dataset. Use HindcastEnsemble.add_uninitialized(uninitialized_ds)."
+                "`reference='uninitialized'` requires `uninitialized` dataset."
+                "Use `HindcastEnsemble.add_uninitialized(uninitialized_ds)``."
             )
         bootstrapped_skill = bootstrap_hindcast(
             self.get_initialized(),
@@ -2250,7 +2310,9 @@ def remove_bias(
     ) -> "HindcastEnsemble":
         """Calculate and remove bias from
         :py:class:`~climpred.classes.HindcastEnsemble`.
-        Bias is grouped by ``seasonality`` set via :py:class:`~climpred.options.set_options`. When wrapping xclim.sbda.adjustment use ``group`` instead.
+        Bias is grouped by ``seasonality`` set via
+        :py:class:`~climpred.options.set_options`.
+        When wrapping `xclim.sbda.adjustment` use ``group`` instead.
 
         Args:
             alignment (str): which inits or verification times should be aligned?
@@ -2266,23 +2328,27 @@ def remove_bias(
                   prior to computing metric. This philosophy follows the thought that
                   each lead should be based on the same set of verification dates.
 
-            how (str): what kind of bias removal to perform. Defaults to 'additive_mean'. Select from:
+            how (str): what kind of bias removal to perform.
+                Defaults to 'additive_mean'. Select from:
 
                 - 'additive_mean': correcting the mean forecast additively
                 - 'multiplicative_mean': correcting the mean forecast multiplicatively
-                - 'multiplicative_std': correcting the standard deviation multiplicatively
+                - 'multiplicative_std': correcting the standard deviation
+                    multiplicatively
                 - 'modified_quantile': `Reference <https://www.sciencedirect.com/science/article/abs/pii/S0034425716302000?via%3Dihub>`_
                 - 'basic_quantile': `Reference <https://rmets.onlinelibrary.wiley.com/doi/pdf/10.1002/joc.2168>`_
                 - 'gamma_mapping': `Reference <https://www.hydrol-earth-syst-sci.net/21/2649/2017/>`_
                 - 'normal_mapping': `Reference <https://www.hydrol-earth-syst-sci.net/21/2649/2017/>`_
-                - 'EmpiricalQuantileMapping': `Reference <https://xclim.readthedocs.io/en/stable/sdba_api.html#xclim.sdba.adjustment.EmpiricalQuantileMapping>`_
-                - 'DetrendedQuantileMapping': `Reference <https://xclim.readthedocs.io/en/stable/sdba_api.html#xclim.sdba.adjustment.DetrendedQuantileMapping>`_
-                - 'PrincipalComponents': `Reference <https://xclim.readthedocs.io/en/stable/sdba_api.html#xclim.sdba.adjustment.PrincipalComponents>`_
-                - 'QuantileDeltaMapping': `Reference <https://xclim.readthedocs.io/en/stable/sdba_api.html#xclim.sdba.adjustment.QuantileDeltaMapping>`_
-                - 'Scaling': `Reference <https://xclim.readthedocs.io/en/stable/sdba_api.html#xclim.sdba.adjustment.Scaling>`_
-                - 'LOCI': `Reference <https://xclim.readthedocs.io/en/stable/sdba_api.html#xclim.sdba.adjustment.LOCI>`_
+                - :py:class:`~xclim.sdba.adjustment.EmpiricalQuantileMapping`
+                - :py:class:`~xclim.sdba.adjustment.DetrendedQuantileMapping`
+                - :py:class:`~xclim.sdba.adjustment.PrincipalComponents`
+                - :py:class:`~xclim.sdba.adjustment.QuantileDeltaMapping`
+                - :py:class:`~xclim.sdba.adjustment.Scaling`
+                - :py:class:`~xclim.sdba.adjustment.LOCI`
 
-            train_test_split (str): How to separate train period to calculate the bias and test period to apply bias correction to? For a detailed description, see `Risbey et al. 2021 <http://www.nature.com/articles/s41467-021-23771-z>`_:
+            train_test_split (str): How to separate train period to calculate the bias
+                and test period to apply bias correction to? For a detailed
+                description, see `Risbey et al. 2021 <http://www.nature.com/articles/s41467-021-23771-z>`_:
 
                 - `fair`: no overlap between `train` and `test` (recommended).
                     Set either `train_init` or `train_time`.
@@ -2296,14 +2362,16 @@ def remove_bias(
                 when ``alignment='same_inits/maximize'``.
             train_time (xr.DataArray, slice): Define time for training
                 when ``alignment='same_verif'``.
-            cv (bool or str): Only relevant when `train_test_split='unfair-cv'`. Defaults to False.
+            cv (bool or str): Only relevant when `train_test_split='unfair-cv'`.
+                Defaults to False.
 
                 - True/'LOO': Calculate bias by `leaving given initialization out <https://en.wikipedia.org/wiki/Cross-validation_(statistics)#Leave-one-out_cross-validation>`_
                 - False: include all initializations in the calculation of bias, which
                     is much faster and but yields similar skill with a large N of
                     initializations.
 
-            **metric_kwargs (dict): passed to ``xclim.sdba`` (including ``group``) or ``XBias_Correction``
+            **metric_kwargs (dict): passed to ``xclim.sdba`` (including ``group``)
+                or ``XBias_Correction``
 
         Returns:
             HindcastEnsemble: bias removed HindcastEnsemble.
@@ -2386,7 +2454,9 @@ def remove_bias(
                 dim:                           init
                 reference:                     []
 
-            Wrapping methods ``how`` from `xclim <https://xclim.readthedocs.io/en/stable/sdba_api.html>`_ and providing ``group`` for ``groupby``:
+            Wrapping methods ``how`` from
+            `xclim <https://xclim.readthedocs.io/en/stable/sdba_api.html>`_ and
+            providing ``group`` for ``groupby``:
 
             >>> HindcastEnsemble.remove_bias(alignment='same_init', group='init',
             ...     how='DetrendedQuantileMapping', train_test_split='unfair',
@@ -2436,7 +2506,12 @@ def remove_bias(
         """
         if train_test_split not in BIAS_CORRECTION_TRAIN_TEST_SPLIT_METHODS:
             raise NotImplementedError(
-                f"train_test_split='{train_test_split}' not implemented. Please choose `train_test_split` from {BIAS_CORRECTION_TRAIN_TEST_SPLIT_METHODS}, see Risbey et al. 2021 http://www.nature.com/articles/s41467-021-23771-z for description and https://github.com/pangeo-data/climpred/issues/648 for implementation status."
+                f"train_test_split='{train_test_split}' not implemented. Please choose "
+                f" `train_test_split` from {BIAS_CORRECTION_TRAIN_TEST_SPLIT_METHODS}, "
+                "see Risbey et al. 2021 "
+                "http://www.nature.com/articles/s41467-021-23771-z for description and "
+                "https://github.com/pangeo-data/climpred/issues/648 for implementation "
+                " status."
             )
 
         alignment = _check_valud_alignment(alignment)
@@ -2447,14 +2522,20 @@ def remove_bias(
                 or not isinstance(train_init, (slice, xr.DataArray))
             ) and (alignment in ["same_inits", "maximize"]):
                 raise ValueError(
-                    f'When alignment="{alignment}", please provide `train_init` as xr.DataArray, e.g. `hindcast.coords["init"].slice(start, end)` or slice, e.g. `slice(start, end)`, got `train_init={train_init}`.'
+                    f'When alignment="{alignment}", please provide `train_init` as '
+                    f"`xr.DataArray`, e.g. "
+                    '`HindcastEnsemble.coords["init"].slice(start, end)` '
+                    "or slice, e.g. `slice(start, end)`, got `train_init={train_init}`."
                 )
             if (
                 (train_time is None)
                 or not isinstance(train_time, (slice, xr.DataArray))
             ) and (alignment in ["same_verif"]):
                 raise ValueError(
-                    f'When alignment="{alignment}", please provide `train_time` as xr.DataArray, e.g. `hindcast.coords["time"].slice(start, end)` or slice, e.g. `slice(start, end)`, got `train_time={train_time}`'
+                    f'When alignment="{alignment}", please provide `train_time` as '
+                    "`xr.DataArray`, e.g. "
+                    '`HindcastEnsemble.coords["time"].slice(start, end)` '
+                    "or slice, e.g. `slice(start, end)`, got `train_time={train_time}`."
                 )
 
             if isinstance(train_init, slice):
@@ -2472,19 +2553,22 @@ def remove_bias(
             func = xclim_sdba
         else:
             raise NotImplementedError(
-                f"bias removal '{how}' is not implemented, please choose from {INTERNAL_BIAS_CORRECTION_METHODS+BIAS_CORRECTION_BIAS_CORRECTION_METHODS}."
+                f"bias removal '{how}' is not implemented, please choose from "
+                f" {INTERNAL_BIAS_CORRECTION_METHODS+BIAS_CORRECTION_BIAS_CORRECTION_METHODS}."
             )
 
         if train_test_split in ["unfair-cv"]:
             if cv not in [True, "LOO"]:
                 raise ValueError(
-                    f"Please provide `cv='LOO'` when train_test_split='unfair-cv', found `cv='{cv}'`"
+                    f"Please provide cross-validation keyword `cv='LOO'` when using "
+                    f"`train_test_split='unfair-cv'`, found `cv='{cv}'`."
                 )
             else:
                 cv = "LOO"  # backward compatibility
             if cv not in CROSS_VALIDATE_METHODS:
                 raise NotImplementedError(
-                    f"cross validation method {cv} not implemented. Please choose cv from {CROSS_VALIDATE_METHODS}."
+                    f"Cross validation method {cv} not implemented. "
+                    f"Please choose cv from {CROSS_VALIDATE_METHODS}."
                 )
             metric_kwargs["cv"] = cv
 
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 2e5a8495e..f5860f46b 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -89,6 +89,7 @@
     "xarray": ("https://xarray.pydata.org/en/stable/", None),
     "numpy": ("https://docs.scipy.org/doc/numpy/", None),
     "xskillscore": ("https://xskillscore.readthedocs.io/en/stable", None),
+    "xclim": ("https://xclim.readthedocs.io/en/latest/", None),
 }
 
 # Should only be uncommented when testing page development while notebooks

From feb88e15d411e53f54612ff681e92692c5951bd2 Mon Sep 17 00:00:00 2001
From: Aaron Spring <aaronspring@users.noreply.github.com>
Date: Thu, 9 Dec 2021 03:28:36 +0100
Subject: [PATCH 02/56] Update climpred/classes.py

---
 climpred/classes.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/climpred/classes.py b/climpred/classes.py
index fefdb11a5..5a073abd1 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -1854,7 +1854,6 @@ def plot_alignment(
         except ImportError:
             raise ValueError("nc_time_axis>1.4.0 required for plotting.")
 
-    from .docstrings import comparison_docstring
 
     def verify(
         self,

From f38064d8d396a05d5ff58f190d53fd8854c1440f Mon Sep 17 00:00:00 2001
From: Aaron Spring <aaronspring@users.noreply.github.com>
Date: Thu, 9 Dec 2021 03:29:01 +0100
Subject: [PATCH 03/56] Update climpred/classes.py

---
 climpred/classes.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/climpred/classes.py b/climpred/classes.py
index 5a073abd1..87ba5eaed 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -1854,7 +1854,6 @@ def plot_alignment(
         except ImportError:
             raise ValueError("nc_time_axis>1.4.0 required for plotting.")
 
-
     def verify(
         self,
         metric: metricType = None,

From 445f439324f9154f444f156d6ee16afa2308fe25 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Thu, 9 Dec 2021 13:10:14 +0100
Subject: [PATCH 04/56] smoothing almost done

---
 .pre-commit-config.yaml  |   8 +-
 climpred/bias_removal.py | 109 +++++----
 climpred/bootstrap.py    |  52 +++--
 climpred/checks.py       |  23 +-
 climpred/classes.py      | 127 ++++++----
 climpred/comparisons.py  |  48 ++--
 climpred/conftest.py     |   6 +-
 climpred/constants.py    |   2 +-
 climpred/graphics.py     |   3 +-
 climpred/horizon.py      |  26 ++-
 climpred/logging.py      |   4 +-
 climpred/metrics.py      | 485 +++++++++++++++++++++++++++------------
 climpred/options.py      |  54 +++--
 climpred/prediction.py   |   3 +-
 climpred/reference.py    |  89 ++++---
 climpred/smoothing.py    | 194 ++++++++--------
 climpred/tutorial.py     |   8 +-
 setup.cfg                |   8 +
 18 files changed, 787 insertions(+), 462 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d118bc405..0bac364c5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -50,10 +50,16 @@ repos:
       hooks:
       -   id: pydocstyle
       args:
-      - --ignore=W503
+      #- --ignore=W503
       - --max-line-length=93
+      - --convention google # https://google.github.io/styleguide/pyguide.html#Comments
 
   -   repo: https://github.com/pycqa/doc8
       rev: 0.10.1
       hooks:
       -   id: doc8
+
+  - repo: https://github.com/keewis/blackdoc
+    rev: v0.3.4
+    hooks:
+    -   id: blackdoc
diff --git a/climpred/bias_removal.py b/climpred/bias_removal.py
index 40ca5a6c1..cdeb6fb5c 100644
--- a/climpred/bias_removal.py
+++ b/climpred/bias_removal.py
@@ -37,7 +37,9 @@ def div(a, b):
 def leave_one_out(bias, dim):
     """Leave-one-out creating a new dimension 'sample' and fill with np.NaN.
 
-    See also: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.LeaveOneOut.html"""
+    See also:
+        * https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.LeaveOneOut.html # noqa: E501
+    """
     bias_nan = []
     for i in range(bias[dim].size):
         bias_nan.append(
@@ -277,27 +279,31 @@ def gaussian_bias_removal(
     train_init=None,
     **metric_kwargs,
 ):
-    """Calc bias based on OPTIONS['seasonality'] and remove bias from py:class:`~climpred.classes.HindcastEnsemble`.
+    """Calc bias based on ``OPTIONS['seasonality']`` and remove bias from
+    py:class:`~climpred.classes.HindcastEnsemble`.
 
     Args:
         hindcast (HindcastEnsemble): hindcast.
         alignment (str): which inits or verification times should be aligned?
-            - maximize/None: maximize the degrees of freedom by slicing ``hind`` and
-            ``verif`` to a common time frame at each lead.
-            - same_inits: slice to a common init frame prior to computing
-            metric. This philosophy follows the thought that each lead should be
-            based on the same set of initializations.
-            - same_verif: slice to a common/consistent verification time frame prior
-            to computing metric. This philosophy follows the thought that each lead
-            should be based on the same set of verification dates.
+
+            - ``maximize``: maximize the degrees of freedom by slicing ``initialized``
+                and ``verif`` to a common time frame at each lead.
+            - ``same_inits``: slice to a common ``init`` frame prior to computing
+                metric. This philosophy follows the thought that each lead should be
+                based on the same set of initializations.
+            - ``same_verif``: slice to a common/consistent verification time frame prior
+                to computing metric. This philosophy follows the thought that each lead
+                should be based on the same set of verification dates.
+
         how (str): what kind of bias removal to perform. Select
-            from ['additive_mean', 'multiplicative_mean','multiplicative_std']. Defaults to 'additive_mean'.
-        cv (bool or str): Defaults to True.
+            from ``['additive_mean', 'multiplicative_mean','multiplicative_std']``.
+            Defaults to ``'additive_mean'``.
+        cv (bool or str): Defaults to ``True``.
 
-            - True: Use cross validation in bias removal function.
+            - ``True``: Use cross validation in bias removal function.
                 This excludes the given initialization from the bias calculation.
-            - 'LOO': see True
-            - False: include the given initialization in the calculation, which
+            - ``'LOO'``: see ``True``
+            - ``False``: include the given initialization in the calculation, which
                 is much faster and but yields similar skill with a large N of
                 initializations.
 
@@ -401,26 +407,30 @@ def bias_correction(
     train_init=None,
     **metric_kwargs,
 ):
-    """Calc bias based on OPTIONS['seasonality'] and remove bias from py:class:`~climpred.classes.HindcastEnsemble`.
+    """Calc bias based on OPTIONS['seasonality'] and remove bias from
+    py:class:`~climpred.classes.HindcastEnsemble`.
 
     Args:
         hindcast (HindcastEnsemble): hindcast.
         alignment (str): which inits or verification times should be aligned?
-            - maximize/None: maximize the degrees of freedom by slicing ``hind`` and
-            ``verif`` to a common time frame at each lead.
-            - same_inits: slice to a common init frame prior to computing
-            metric. This philosophy follows the thought that each lead should be
-            based on the same set of initializations.
-            - same_verif: slice to a common/consistent verification time frame prior
-            to computing metric. This philosophy follows the thought that each lead
-            should be based on the same set of verification dates.
+
+            - ``maximize``: maximize the degrees of freedom by slicing ``initialized``
+                and ``verif`` to a common time frame at each lead.
+            - ``same_inits``: slice to a common ``init`` frame prior to computing
+                metric. This philosophy follows the thought that each lead should be
+                based on the same set of initializations.
+            - ``same_verif``: slice to a common/consistent verification time frame prior
+                to computing metric. This philosophy follows the thought that each lead
+                should be based on the same set of verification dates.
+
         how (str): what kind of bias removal to perform. Select
-            from ['additive_mean', 'multiplicative_mean','multiplicative_std']. Defaults to 'additive_mean'.
+            from ``['additive_mean', 'multiplicative_mean','multiplicative_std']``.
+            Defaults to ``'additive_mean'``.
         cv (bool): Use cross validation in bias removal function. This
             excludes the given initialization from the bias calculation. With False,
             include the given initialization in the calculation, which is much faster
             but yields similar skill with a large N of initializations.
-            Defaults to True.
+            Defaults to ``True``.
 
     Returns:
         HindcastEnsemble: bias removed hindcast.
@@ -435,12 +445,16 @@ def bc_func(
         cv=False,
         **metric_kwargs,
     ):
-        """Wrapping https://github.com/pankajkarman/bias_correction/blob/master/bias_correction.py.
+        """Wrapping
+        https://github.com/pankajkarman/bias_correction/blob/master/bias_correction.py.
 
-        Functions to perform bias correction of datasets to remove biases across datasets. Implemented methods include:
-        - quantile mapping: https://rmets.onlinelibrary.wiley.com/doi/pdf/10.1002/joc.2168)
+        Functions to perform bias correction of datasets to remove biases across
+        datasets. Implemented methods include:
+        - quantile_mapping:
+            https://rmets.onlinelibrary.wiley.com/doi/pdf/10.1002/joc.2168)
         - modified quantile mapping: https://www.sciencedirect.com/science/article/abs/pii/S0034425716302000?via%3Dihub
-        - scaled distribution mapping (Gamma and Normal Corrections): https://www.hydrol-earth-syst-sci.net/21/2649/2017/
+        - scaled distribution mapping (Gamma and Normal Corrections):
+            https://www.hydrol-earth-syst-sci.net/21/2649/2017/
         """
         corrected = []
         seasonality = OPTIONS["seasonality"]
@@ -507,7 +521,6 @@ def bc_func(
                 )
             dim_used = dim2 if "member" in forecast.dims else dim
 
-            # using bias-correction: https://github.com/pankajkarman/bias_correction/blob/master/bias_correction.py
             with warnings.catch_warnings():
                 warnings.simplefilter("ignore", category=RuntimeWarning)
                 bc = XBiasCorrection(
@@ -574,22 +587,27 @@ def xclim_sdba(
     train_init=None,
     **metric_kwargs,
 ):
-    """Calc bias based on grouper to be passed as metric_kwargs and remove bias from py:class:`~climpred.classes.HindcastEnsemble`.
+    """Calc bias based on ``grouper`` to be passed as ``metric_kwargs`` and remove bias
+    from py:class:`~climpred.classes.HindcastEnsemble`.
 
-    See climpred.constants.XCLIM_BIAS_CORRECTION_METHODS for implemented methods for ``how``.
+    See :py:func:`~climpred.constants.XCLIM_BIAS_CORRECTION_METHODS` for implemented
+    methods for ``how``.
 
     Args:
         hindcast (HindcastEnsemble): hindcast.
         alignment (str): which inits or verification times should be aligned?
-            - maximize/None: maximize the degrees of freedom by slicing ``hind`` and
-            ``verif`` to a common time frame at each lead.
-            - same_inits: slice to a common init frame prior to computing
-            metric. This philosophy follows the thought that each lead should be
-            based on the same set of initializations.
-            - same_verif: slice to a common/consistent verification time frame prior
-            to computing metric. This philosophy follows the thought that each lead
-            should be based on the same set of verification dates.
-        how (str): not used
+
+            - ``maximize``: maximize the degrees of freedom by slicing ``initialized``
+                and ``verif`` to a common time frame at each lead.
+            - ``same_inits``: slice to a common ``init`` frame prior to computing
+                metric. This philosophy follows the thought that each lead should be
+                based on the same set of initializations.
+            - ``same_verif``: slice to a common/consistent verification time frame prior
+                to computing metric. This philosophy follows the thought that each lead
+                should be based on the same set of verification dates.
+
+        how (str): methods for bias reduction, see
+            :py:func:`~climpred.constants.XCLIM_BIAS_CORRECTION_METHODS`
         cv (bool): Use cross validation in removal function. This
             excludes the given initialization from the bias calculation. With False,
             include the given initialization in the calculation, which is much faster
@@ -609,9 +627,12 @@ def bc_func(
         cv=False,
         **metric_kwargs,
     ):
-        """Wrapping https://github.com/Ouranosinc/xclim/blob/master/xclim/sdba/adjustment.py.
+        """Wrapping
+        https://github.com/Ouranosinc/xclim/blob/master/xclim/sdba/adjustment.py.
 
-        Functions to perform bias correction of datasets to remove biases across datasets. See climpred.constants.XCLIM_BIAS_CORRECTION_METHODS for implemented methods.
+        Functions to perform bias correction of datasets to remove biases across
+        datasets. See :py:func:`~climpred.constants.XCLIM_BIAS_CORRECTION_METHODS`
+        for implemented methods.
         """
         seasonality = OPTIONS["seasonality"]
         dim = "time"
diff --git a/climpred/bootstrap.py b/climpred/bootstrap.py
index 965888122..6babab7c8 100644
--- a/climpred/bootstrap.py
+++ b/climpred/bootstrap.py
@@ -158,7 +158,7 @@ def _resample_iterations_idx(
         init = init.copy(deep=True)
 
     def select_bootstrap_indices_ufunc(x, idx):
-        """Selects multi-level indices ``idx`` from xarray object ``x`` for all
+        """Selects multi-level indices ``idx`` from xr.Dataset ``x`` for all
         iterations."""
         # `apply_ufunc` sometimes adds a singleton dimension on the end, so we squeeze
         # it out here. This leverages multi-level indexing from numpy, so we can
@@ -210,13 +210,13 @@ def _distribution_to_ci(ds, ci_low, ci_high, dim="iteration"):
     Needed for bootstrapping confidence intervals and p_values of a metric.
 
     Args:
-        ds (xarray object): distribution.
+        ds (xr.Dataset): distribution.
         ci_low (float): low confidence interval.
         ci_high (float): high confidence interval.
         dim (str): dimension to apply xr.quantile to. Default: 'iteration'
 
     Returns:
-        uninit_hind (xarray object): uninitialize hindcast with hind.coords.
+        uninit_hind (xr.Dataset): uninitialize hindcast with hind.coords.
     """
     ds = rechunk_to_single_chunk_if_more_than_one_chunk_along_dim(ds, dim)
     if isinstance(ds, xr.Dataset):
@@ -234,18 +234,18 @@ def _pvalue_from_distributions(ref_skill, init_skill, metric=None):
     uninitialized skill) is larger than initialized skill.
 
     Needed for bootstrapping confidence intervals and p_values of a metric in
-    the hindcast framework. Checks whether a simple forecast like persistence, climatology
-    or uninitialized performs better than initialized forecast. Need to keep in
-    mind the orientation of metric (whether larger values are better or worse
+    the hindcast framework. Checks whether a simple forecast like persistence,
+    climatology or uninitialized performs better than initialized forecast. Need to
+    keep in mind the orientation of metric (whether larger values are better or worse
     than smaller ones.)
 
     Args:
-        ref_skill (xarray object): persistence or uninitialized skill.
-        init_skill (xarray object): initialized skill.
+        ref_skill (xr.Dataset): persistence or uninitialized skill.
+        init_skill (xr.Dataset): initialized skill.
         metric (Metric): metric class Metric
 
     Returns:
-        pv (xarray object): probability that simple forecast performs better
+        pv (xr.Dataset): probability that simple forecast performs better
                             than initialized forecast.
     """
     pv = ((ref_skill - init_skill) > 0).mean("iteration")
@@ -263,11 +263,11 @@ def bootstrap_uninitialized_ensemble(hind, hist):
         same forcing and rearranges them into ensemble and member dimensions.
 
     Args:
-        hind (xarray object): hindcast.
-        hist (xarray object): historical uninitialized.
+        hind (xr.Dataset): hindcast.
+        hist (xr.Dataset): historical uninitialized.
 
     Returns:
-        uninit_hind (xarray object): uninitialize hindcast with hind.coords.
+        uninit_hind (xr.Dataset): uninitialize hindcast with hind.coords.
     """
     has_dims(hist, "member", "historical ensemble")
     has_dims(hind, "member", "initialized hindcast ensemble")
@@ -324,11 +324,11 @@ def bootstrap_uninit_pm_ensemble_from_control_cftime(init_pm, control):
         them into ensemble and member dimensions.
 
     Args:
-        init_pm (xarray object): initialized ensemble simulation.
-        control (xarray object): control simulation.
+        init_pm (xr.Dataset): initialized ensemble simulation.
+        control (xr.Dataset): control simulation.
 
     Returns:
-        uninit_pm (xarray object): uninitialized ensemble generated from control run.
+        uninit_pm (xr.Dataset): uninitialized ensemble generated from control run.
     """
     lead_units_equal_control_time_stride(init_pm, control)
     # short cut if annual leads
@@ -388,16 +388,20 @@ def create_pseudo_members(init):
 
 def resample_uninitialized_from_initialized(init, resample_dim=["init", "member"]):
     """
-    Generate an uninitialized ensemble by resampling without replacement from the initialized prediction ensemble.
-    Full years of the first lead present from the initialized are relabeled to a different year.
+    Generate an uninitialized ensemble by resampling without replacement from the
+    initialized prediction ensemble. Full years of the first lead present from the
+    initialized are relabeled to a different year.
     """
     if (init.init.dt.year.groupby("init.year").count().diff("year") != 0).any():
         raise ValueError(
-            f'`resample_uninitialized_from_initialized` only works if the same number of initializations is present each year, found {init.init.dt.year.groupby("init.year").count()}'
+            "`resample_uninitialized_from_initialized` only works if the same number "
+            " of initializations is present each year, found "
+            f'{init.init.dt.year.groupby("init.year").count()}.'
         )
     if "init" not in resample_dim:
         raise ValueError(
-            f"Only resampling on `init` makes forecasts uninitialzed. Found resample_dim={resample_dim}."
+            f"Only resampling on `init` makes forecasts uninitialzed."
+            f"Found resample_dim={resample_dim}."
         )
     init = init.isel(lead=0, drop=True)
     # resample init
@@ -435,8 +439,11 @@ def resample_uninitialized_from_initialized(init, resample_dim=["init", "member"
 
     resampled_uninit.attrs.update(
         {
-            "description": "created by `HindcastEnsemble.generate_uninitialized()` resampling years without replacement from initialized",
-            "documentation": f"https://climpred.readthedocs.io/en/v{version}/api/climpred.classes.HindcastEnsemble.generate_uninitialized.html#climpred.classes.HindcastEnsemble.generate_uninitialized",
+            "description": (
+                "created by `HindcastEnsemble.generate_uninitialized()` "
+                " resampling years without replacement from initialized"
+            ),
+            "documentation": f"https://climpred.readthedocs.io/en/v{version}/api/climpred.classes.HindcastEnsemble.generate_uninitialized.html#climpred.classes.HindcastEnsemble.generate_uninitialized",  # noqa: E501
         }
     )
     return resampled_uninit
@@ -766,7 +773,8 @@ def bootstrap_compute(
         if hind.lead[0] != 0:
             if OPTIONS["warn_for_failed_PredictionEnsemble_xr_call"]:
                 warnings.warn(
-                    f"Calculate persistence from lead={int(hind.lead[0].values)} instead of lead=0 (recommended)."
+                    f"Calculate persistence from lead={int(hind.lead[0].values)} "
+                    "instead of lead=0 (recommended)."
                 )
     else:
         compute_persistence_func = compute_persistence
diff --git a/climpred/checks.py b/climpred/checks.py
index 6dc8e91d7..0239bed5a 100644
--- a/climpred/checks.py
+++ b/climpred/checks.py
@@ -1,5 +1,6 @@
 import warnings
 from functools import wraps
+from typing import List, Optional, Union
 
 import dask
 import xarray as xr
@@ -193,11 +194,13 @@ def match_initialized_dims(init, verif, uninitialized=False):
     if (set(verif.dims) - set(init_dims)) != set():
         unmatch_dims = set(verif.dims) ^ set(init_dims)
         raise DimensionError(
-            f"Verification contains more dimensions than initialized. These dimensions do not match: {unmatch_dims}."
+            "Verification contains more dimensions than initialized. These dimensions "
+            f" do not match: {unmatch_dims}."
         )
     if (set(init_dims) - set(verif.dims)) != set():
         warnings.warn(
-            f"Initialized contains more dimensions than verification. Dimension(s) {set(init_dims) - set(verif.dims)} will be broadcasted."
+            "Initialized contains more dimensions than verification. "
+            f"Dimension(s) {set(init_dims) - set(verif.dims)} will be broadcasted."
         )
     return True
 
@@ -254,11 +257,15 @@ def rename_to_climpred_dims(xobj):
                     xobj[climpred_d].attrs["standard_name"] = cf_standard_name
                     if OPTIONS["warn_for_rename_to_climpred_dims"]:
                         warnings.warn(
-                            f'Did not find dimension "{climpred_d}", but renamed dimension {d} with CF-complying standard_name "{cf_standard_name}" to {climpred_d}.'
+                            f'Did not find dimension "{climpred_d}", but renamed '
+                            f"dimension {d} with CF-complying standard_name "
+                            f'"{cf_standard_name}" to {climpred_d}.'
                         )
     if not set(["init", "lead"]).issubset(set(xobj.dims)):
         warnings.warn(
-            f'Could not find dimensions ["init", "lead"] in initialized, found dimension {xobj.dims}. Also searched coordinates for CF-complying standard_names {CF_STANDARD_NAMES}.'
+            'Could not find dimensions ["init", "lead"] in initialized, found '
+            f"dimension {xobj.dims}. Also searched coordinates for CF-complying "
+            f"standard_names {CF_STANDARD_NAMES}."
         )
     return xobj
 
@@ -307,10 +314,7 @@ def warn_if_chunking_would_increase_performance(ds, crit_size_in_MB=100):
             )
 
 
-from typing import List, Optional, Union
-
-
-def _check_valid_reference(reference: Optional[Union[List[str], str]]) -> List:
+def _check_valid_reference(reference: Optional[Union[List[str], str]]) -> List[str]:
     """Enforce reference as list and check for valid entries."""
     if reference is None:
         reference = []
@@ -328,7 +332,8 @@ def _check_valid_reference(reference: Optional[Union[List[str], str]]) -> List:
 def _check_valud_alignment(alignment):
     if alignment not in VALID_ALIGNMENTS:
         raise ValueError(
-            f"Please provide alignment from {VALID_ALIGNMENTS}, found alignment='{alignment}'."
+            f"Please provide alignment from {VALID_ALIGNMENTS}, "
+            f"found alignment='{alignment}'."
         )
     else:
         if alignment == "same_init":
diff --git a/climpred/classes.py b/climpred/classes.py
index fefdb11a5..6a0d6bf37 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -716,10 +716,12 @@ def smooth(
         Examples:
             >>> PerfectModelEnsemble.get_initialized().lead.size
             20
-            >>> PerfectModelEnsemble.smooth({'lead':4}, how='sum').get_initialized().lead.size
+            >>> PerfectModelEnsemble.smooth(
+            ...     {"lead": 4}, how="sum"
+            ... ).get_initialized().lead.size
             17
 
-            >>> HindcastEnsemble_3D.smooth({'lon':1, 'lat':1})
+            >>> HindcastEnsemble_3D.smooth({"lon": 1, "lat": 1})
             <climpred.HindcastEnsemble>
             Initialized Ensemble:
                 SST      (init, lead, lat, lon) float32 -0.3236 -0.3161 -0.3083 ... 0.0 0.0
@@ -731,14 +733,16 @@ def smooth(
             ``smooth`` simultaneously aggregates spatially listening to ``lon`` and
             ``lat`` and temporally listening to ``lead`` or ``time``.
 
-            >>> HindcastEnsemble_3D.smooth({'lead': 2, 'lat': 5, 'lon': 4}).get_initialized().coords
+            >>> HindcastEnsemble_3D.smooth(
+            ...     {"lead": 2, "lat": 5, "lon": 4}
+            ... ).get_initialized().coords
             Coordinates:
               * init        (init) object 1954-01-01 00:00:00 ... 2017-01-01 00:00:00
               * lead        (lead) int32 1 2 3 4 5 6 7 8 9
               * lon         (lon) float64 250.8 254.8 258.8 262.8
               * lat         (lat) float64 -9.75 -4.75
                 valid_time  (lead, init) object 1955-01-01 00:00:00 ... 2026-01-01 00:00:00
-            >>> HindcastEnsemble_3D.smooth('goddard2013').get_initialized().coords
+            >>> HindcastEnsemble_3D.smooth("goddard2013").get_initialized().coords
             Coordinates:
               * init        (init) object 1954-01-01 00:00:00 ... 2017-01-01 00:00:00
               * lead        (lead) int32 1 2 3 4 5 6 7
@@ -1096,8 +1100,9 @@ def verify(
             ensemble mean forecast (``m2e``) for all leads reducing dimensions
             ``init`` and ``member``:
 
-            >>> PerfectModelEnsemble.verify(metric='rmse', comparison='m2e',
-            ...     dim=['init','member'])
+            >>> PerfectModelEnsemble.verify(
+            ...     metric="rmse", comparison="m2e", dim=["init", "member"]
+            ... )
             <xarray.Dataset>
             Dimensions:  (lead: 20)
             Coordinates:
@@ -1120,9 +1125,12 @@ def verify(
             also calculating reference skill for the ``persistence``, ``climatology``
             and ``uninitialized`` forecast.
 
-            >>> PerfectModelEnsemble.verify(metric='acc', comparison='m2m',
-            ...     dim=['init', 'member'],
-            ...     reference=['persistence', 'climatology' ,'uninitialized'])
+            >>> PerfectModelEnsemble.verify(
+            ...     metric="acc",
+            ...     comparison="m2m",
+            ...     dim=["init", "member"],
+            ...     reference=["persistence", "climatology", "uninitialized"],
+            ... )
             <xarray.Dataset>
             Dimensions:  (skill: 4, lead: 20)
             Coordinates:
@@ -1466,9 +1474,14 @@ def bootstrap(
             reference forecast performs better than initialized and the lower and
             upper bound of the resample.
 
-            >>> PerfectModelEnsemble.bootstrap(metric='acc', comparison='m2m',
-            ...     dim=['init', 'member'], iterations=50, resample_dim='member',
-            ...     reference=['persistence', 'climatology' ,'uninitialized'])
+            >>> PerfectModelEnsemble.bootstrap(
+            ...     metric="acc",
+            ...     comparison="m2m",
+            ...     dim=["init", "member"],
+            ...     iterations=50,
+            ...     resample_dim="member",
+            ...     reference=["persistence", "climatology", "uninitialized"],
+            ... )
             <xarray.Dataset>
             Dimensions:  (skill: 4, results: 4, lead: 20)
             Coordinates:
@@ -1810,7 +1823,9 @@ def plot_alignment(
             Attributes:
                 units:    days since 1960-01-01
 
-            >>> HindcastEnsemble.plot_alignment(alignment="same_verifs")  # doctest: +SKIP
+            >>> HindcastEnsemble.plot_alignment(
+            ...     alignment="same_verifs"
+            ... )  # doctest: +SKIP
             <matplotlib.collections.QuadMesh object at 0x1405c1520>
 
         See also:
@@ -1912,8 +1927,12 @@ def verify(
             verification (``m2o``) over the same verification time (``same_verifs``)
             for all leads reducing dimensions ``init`` and ``member``:
 
-            >>> HindcastEnsemble.verify(metric='rmse', comparison='m2o',
-            ...     alignment='same_verifs', dim=['init','member'])
+            >>> HindcastEnsemble.verify(
+            ...     metric="rmse",
+            ...     comparison="m2o",
+            ...     alignment="same_verifs",
+            ...     dim=["init", "member"],
+            ... )
             <xarray.Dataset>
             Dimensions:  (lead: 10)
             Coordinates:
@@ -1938,9 +1957,13 @@ def verify(
             skill for the ``persistence``, ``climatology`` and ``uninitialized``
             forecast.
 
-            >>> HindcastEnsemble.verify(metric='acc', comparison='e2o',
-            ...     alignment='same_inits', dim='init',
-            ...     reference=['persistence', 'climatology' ,'uninitialized'])
+            >>> HindcastEnsemble.verify(
+            ...     metric="acc",
+            ...     comparison="e2o",
+            ...     alignment="same_inits",
+            ...     dim="init",
+            ...     reference=["persistence", "climatology", "uninitialized"],
+            ... )
             <xarray.Dataset>
             Dimensions:  (skill: 4, lead: 10)
             Coordinates:
@@ -2208,10 +2231,15 @@ def bootstrap(
             reference forecast performs better than initialized and the lower and
             upper bound of the resample.
 
-            >>> HindcastEnsemble.bootstrap(metric='acc', comparison='e2o',
-            ...     dim='init', iterations=50, resample_dim='member',
-            ...     alignment='same_verifs',
-            ...     reference=['persistence', 'climatology' ,'uninitialized'])
+            >>> HindcastEnsemble.bootstrap(
+            ...     metric="acc",
+            ...     comparison="e2o",
+            ...     dim="init",
+            ...     iterations=50,
+            ...     resample_dim="member",
+            ...     alignment="same_verifs",
+            ...     reference=["persistence", "climatology", "uninitialized"],
+            ... )
             <xarray.Dataset>
             Dimensions:  (skill: 4, results: 4, lead: 10)
             Coordinates:
@@ -2311,8 +2339,8 @@ def remove_bias(
         """Calculate and remove bias from
         :py:class:`~climpred.classes.HindcastEnsemble`.
         Bias is grouped by ``seasonality`` set via
-        :py:class:`~climpred.options.set_options`.
-        When wrapping `xclim.sbda.adjustment` use ``group`` instead.
+        :py:class:`~climpred.options.set_options`. When wrapping
+        :py:class:`~xclim.sdba.adjustment.TrainAdjust` use ``group`` instead.
 
         Args:
             alignment (str): which inits or verification times should be aligned?
@@ -2348,7 +2376,7 @@ def remove_bias(
 
             train_test_split (str): How to separate train period to calculate the bias
                 and test period to apply bias correction to? For a detailed
-                description, see `Risbey et al. 2021 <http://www.nature.com/articles/s41467-021-23771-z>`_:
+                description, see `Risbey et al. 2021 <http://www.nature.com/articles/s41467-021-23771-z>`_:  # noqa: E501
 
                 - `fair`: no overlap between `train` and `test` (recommended).
                     Set either `train_init` or `train_time`.
@@ -2380,8 +2408,9 @@ def remove_bias(
 
             Skill from raw model output without bias reduction:
 
-            >>> HindcastEnsemble.verify(metric='rmse', comparison='e2o',
-            ...     alignment='maximize', dim='init')
+            >>> HindcastEnsemble.verify(
+            ...     metric="rmse", comparison="e2o", alignment="maximize", dim="init"
+            ... )
             <xarray.Dataset>
             Dimensions:  (lead: 10)
             Coordinates:
@@ -2404,10 +2433,11 @@ def remove_bias(
             ``train_test_split='unfair'`` has hardly any effect. Use all
             initializations to calculate bias and verify skill:
 
-            >>> HindcastEnsemble.remove_bias(alignment='maximize',
-            ...     how='additive_mean', test_train_split='unfair'
-            ... ).verify(metric='rmse', comparison='e2o', alignment='maximize',
-            ... dim='init')
+            >>> HindcastEnsemble.remove_bias(
+            ...     alignment="maximize", how="additive_mean", test_train_split="unfair"
+            ... ).verify(
+            ...     metric="rmse", comparison="e2o", alignment="maximize", dim="init"
+            ... )
             <xarray.Dataset>
             Dimensions:  (lead: 10)
             Coordinates:
@@ -2432,10 +2462,14 @@ def remove_bias(
             ``train_test_split='fair'`` is recommended to use for a fair
             comparison against real-time forecasts.
 
-            >>> HindcastEnsemble.remove_bias(alignment='maximize',
-            ...     how='additive_mean', train_test_split='fair',
-            ...     train_init=slice('1954', '1980')).verify(metric='rmse',
-            ...     comparison='e2o', alignment='maximize', dim='init')
+            >>> HindcastEnsemble.remove_bias(
+            ...     alignment="maximize",
+            ...     how="additive_mean",
+            ...     train_test_split="fair",
+            ...     train_init=slice("1954", "1980"),
+            ... ).verify(
+            ...     metric="rmse", comparison="e2o", alignment="maximize", dim="init"
+            ... )
             <xarray.Dataset>
             Dimensions:  (lead: 10)
             Coordinates:
@@ -2458,10 +2492,14 @@ def remove_bias(
             `xclim <https://xclim.readthedocs.io/en/stable/sdba_api.html>`_ and
             providing ``group`` for ``groupby``:
 
-            >>> HindcastEnsemble.remove_bias(alignment='same_init', group='init',
-            ...     how='DetrendedQuantileMapping', train_test_split='unfair',
-            ...     ).verify(metric='rmse',
-            ...     comparison='e2o', alignment='maximize', dim='init')
+            >>> HindcastEnsemble.remove_bias(
+            ...     alignment="same_init",
+            ...     group="init",
+            ...     how="DetrendedQuantileMapping",
+            ...     train_test_split="unfair",
+            ... ).verify(
+            ...     metric="rmse", comparison="e2o", alignment="maximize", dim="init"
+            ... )
             <xarray.Dataset>
             Dimensions:  (lead: 10)
             Coordinates:
@@ -2482,10 +2520,13 @@ def remove_bias(
 
             Wrapping methods ``how`` from `bias_correction <https://github.com/pankajkarman/bias_correction/blob/master/bias_correction.py>`_:
 
-            >>> HindcastEnsemble.remove_bias(alignment='same_init',
-            ...     how='modified_quantile', train_test_split='unfair',
-            ...     ).verify(metric='rmse',
-            ...     comparison='e2o', alignment='maximize', dim='init')
+            >>> HindcastEnsemble.remove_bias(
+            ...     alignment="same_init",
+            ...     how="modified_quantile",
+            ...     train_test_split="unfair",
+            ... ).verify(
+            ...     metric="rmse", comparison="e2o", alignment="maximize", dim="init"
+            ... )
             <xarray.Dataset>
             Dimensions:  (lead: 10)
             Coordinates:
diff --git a/climpred/comparisons.py b/climpred/comparisons.py
index 5616344c8..955ea3089 100644
--- a/climpred/comparisons.py
+++ b/climpred/comparisons.py
@@ -54,15 +54,15 @@ def __init__(
 
         Args:
             name (str): name of comparison.
-            function (function): comparison function.
-            hindcast (bool): Can comparison be used in `compute_hindcast`?
-                `False` means `compute_perfect_model`
+            function (Callable): comparison function.
+            hindcast (bool): Can comparison be used in ``HindcastEnsemble``?
+                ``False`` means ``PerfectModelEnsemble``
             probabilistic (bool): Can this comparison be used for probabilistic
                 metrics also? Probabilistic metrics require multiple forecasts.
                 `False` means that comparison is only deterministic.
                 `True` means that comparison can be used both deterministic and
                 probabilistic.
-            long_name (str, optional): longname of comparison. Defaults to None.
+            long_name (str, optional): longname of comparison. Defaults to ``None``.
             aliases (list of str, optional): Allowed aliases for this comparison.
                 Defaults to ``None``.
 
@@ -92,13 +92,13 @@ def _m2m(ds, metric=None):
     ``member``.
 
     Args:
-        ds (xarray object): xr.Dataset/xr.DataArray with ``member`` dimension.
+        ds (xr.Dataset): initialized with ``member`` dimension.
         metric (Metric):
             If deterministic, forecast and reference have ``member`` dim.
             If probabilistic, only forecast has ``member`` dim.
 
     Returns:
-        xr.object: forecast, reference.
+        (xr.Dataset, xr.Dataset): forecast, reference.
     """
     reference_list = []
     forecast_list = []
@@ -134,14 +134,13 @@ def _m2e(ds, metric=None):
      ensemble mean.
 
     Args:
-        ds (xarray object): xr.Dataset/xr.DataArray with member and ensemble
-                            dimension.
+        ds (xr.Dataset): ``initialized`` with ``member`` dimension.
         metric (Metric): needed for probabilistic metrics.
-                      therefore useless in m2e comparison,
+                      therefore useless in ``m2e`` comparison,
                       but expected by internal API.
 
     Returns:
-        xr.object: forecast, reference.
+        (xr.Dataset, xr.Dataset): forecast, reference.
     """
     reference_list = []
     forecast_list = []
@@ -180,13 +179,12 @@ def _m2c(ds, metric=None):
     to the control simulation.
 
     Args:
-        ds (xarray object): xr.Dataset/xr.DataArray with member and ensemble
-                            dimension.
+        ds (xr.Dataset): ``initialized`` with ``member`` dimension.
         metric (Metric): if deterministic, forecast and reference both have member dim
-                      if probabilistic, only forecast has member dim
+                      if probabilistic, only forecast has ``member`` dim
 
     Returns:
-        xr.object: forecast, reference.
+        (xr.Dataset, xr.Dataset): forecast, reference.
     """
     control_member = ds.member.values[0]
     reference = ds.sel(member=control_member, drop=True)
@@ -214,14 +212,13 @@ def _e2c(ds, metric=None):
     other member forecasts to the control simulation.
 
     Args:
-        ds (xarray object): xr.Dataset/xr.DataArray with member and ensemble
-                            dimension.
+        ds (xr.Dataset): ``initialized`` with ``member`` dimension.
         metric (Metric): needed for probabilistic metrics.
-                      therefore useless in e2c comparison,
+                      therefore useless in ``e2c`` comparison,
                       but expected by internal API.
 
     Returns:
-        xr.object: forecast, reference.
+        (xr.Dataset, xr.Dataset): forecast, reference.
     """
     control_member = ds.member.values[0]
     reference = ds.sel(member=control_member, drop=True)
@@ -247,8 +244,8 @@ def _e2o(hind, verif, metric=None):
     ``HindcastEnsemble`` setup.
 
     Args:
-        hind (xarray object): Hindcast with optional ``member`` dimension.
-        verif (xarray object): Verification data.
+        hind (xr.Dataset): Hindcast with optional ``member`` dimension.
+        verif (xr.Dataset): Verification data.
         metric (Metric): needed for probabilistic metrics.
                       therefore useless in ``e2o`` comparison,
                       but expected by internal API.
@@ -278,14 +275,14 @@ def _m2o(hind, verif, metric=None):
     ``HindcastEnsemble`` setup.
 
     Args:
-        hind (xarray object): Hindcast with ``member`` dimension.
-        verif (xarray object): Verification data.
+        hind (xr.Dataset): ``initialized`` with ``member`` dimension.
+        verif (xr.Dataset): Verification data.
         metric (Metric):
             If deterministic, forecast and verif both have ``member`` dim;
             If probabilistic, only forecast has ``member`` dim.
 
     Returns:
-        xr.object: forecast, verif.
+        (xr.Dataset, xr.Dataset): forecast, verif.
     """
     # check that this contains more than one member
     has_dims(hind, "member", "decadal prediction ensemble")
@@ -317,20 +314,15 @@ def _m2o(hind, verif, metric=None):
             COMPARISON_ALIASES[a] = c.name
 
 # Which comparisons work with which set of metrics.
-# ['e2o', 'm2o']
 HINDCAST_COMPARISONS = [c.name for c in __ALL_COMPARISONS__ if c.hindcast]
-# ['m2c', 'e2c', 'm2m', 'm2e']
 PM_COMPARISONS = [c.name for c in __ALL_COMPARISONS__ if not c.hindcast]
 ALL_COMPARISONS = HINDCAST_COMPARISONS + PM_COMPARISONS
-# ['m2c', 'm2m']
 PROBABILISTIC_PM_COMPARISONS = [
     c.name for c in __ALL_COMPARISONS__ if (not c.hindcast and c.probabilistic)
 ]
 NON_PROBABILISTIC_PM_COMPARISONS = [
     c.name for c in __ALL_COMPARISONS__ if (not c.hindcast and not c.probabilistic)
 ]
-
-# ['m2o']
 PROBABILISTIC_HINDCAST_COMPARISONS = [
     c.name for c in __ALL_COMPARISONS__ if (c.hindcast and c.probabilistic)
 ]
diff --git a/climpred/conftest.py b/climpred/conftest.py
index 5eaad9468..451890d9f 100644
--- a/climpred/conftest.py
+++ b/climpred/conftest.py
@@ -384,7 +384,8 @@ def hindcast_recon_1d_dm(hindcast_recon_1d_ym):
 
 @pytest.fixture()
 def hindcast_S2S_Germany():
-    """S2S ECMWF on-the-fly hindcasts with daily leads and weekly inits and related observations from CPC (t2m, pr) and ERA5 (gh_500)."""
+    """S2S ECMWF on-the-fly hindcasts with daily leads and weekly inits and related
+    observations from CPC (t2m, pr) and ERA5 (gh_500)."""
     init = load_dataset("ECMWF_S2S_Germany")
     obs = load_dataset("Observations_Germany")
     return HindcastEnsemble(init).add_observations(obs)
@@ -392,7 +393,8 @@ def hindcast_S2S_Germany():
 
 @pytest.fixture()
 def hindcast_NMME_Nino34():
-    """NMME hindcasts with monthly leads and monthly inits and related IOv2 observations for SST of the Nino34 region."""
+    """NMME hindcasts with monthly leads and monthly inits and related IOv2
+    observations for SST of the Nino34 region."""
     init = load_dataset("NMME_hindcast_Nino34_sst")
     obs = load_dataset("NMME_OIv2_Nino34_sst")
     init["sst"].attrs["units"] = "C"
diff --git a/climpred/constants.py b/climpred/constants.py
index 6c93c5007..7b6f3a46a 100644
--- a/climpred/constants.py
+++ b/climpred/constants.py
@@ -1,6 +1,6 @@
 # for general checks of climpred-required dimensions
 CLIMPRED_ENSEMBLE_DIMS = ["init", "member", "lead"]
-# corresponding CF-complying standard_names from http://cfconventions.org/Data/cf-standard-names/current/build/cf-standard-name-table.html to rename from
+# corresponding CF-complying standard_names from http://cfconventions.org/Data/cf-standard-names/current/build/cf-standard-name-table.html to rename from  # noqa: E501
 CF_STANDARD_NAMES = {
     "init": "forecast_reference_time",
     "member": "realization",
diff --git a/climpred/graphics.py b/climpred/graphics.py
index cff6f920f..cd6bb783e 100644
--- a/climpred/graphics.py
+++ b/climpred/graphics.py
@@ -410,7 +410,8 @@ def plot_ensemble_perfect_model(
 
 
 def _verif_dates_xr(hindcast, alignment, reference, date2num_units):
-    """Create valid_time xr.DataArray with dims lead and init in units passed to cftime.date2num."""
+    """Create ``valid_time`` ``xr.DataArray`` with dims lead and init in units passed to
+    cftime.date2num."""
     inits, verif_dates = return_inits_and_verif_dates(
         hindcast.get_initialized().rename({"init": "time"}),
         hindcast.get_observations(),
diff --git a/climpred/horizon.py b/climpred/horizon.py
index 21a4975cc..f6c7ce488 100644
--- a/climpred/horizon.py
+++ b/climpred/horizon.py
@@ -29,9 +29,9 @@ def _last_item_cond_true(cond, dim):
     # reset where always true to len(lead)
     reached = reached.where(~cond.all("lead"), other=cond[dim].size)
     # fix locations where always nan to nan
-    mask = cond.notnull().all("lead")  # ~(cond == False).all("lead")
+    mask = cond.notnull().all("lead")
     reached = reached.where(mask, other=np.nan)
-    ## shift back into coordinate space ##
+    # shift back into coordinate space
     # problem: cannot convert nan to idx in isel
     # therefore set to dim:0 and mask again afterwards
     reached_notnull = reached.notnull()  # remember where not masked
@@ -67,10 +67,13 @@ def horizon(cond):
         xr.DataArray, xr.Dataset: predictability horizon reduced by ``lead`` dimension.
 
     Example:
-        >>> skill = PerfectModelEnsemble.verify(metric='acc', comparison='m2e',
-        ...     dim=['init','member'], reference=['persistence'])
-        >>> horizon(skill.sel(skill='initialized') >
-        ...     skill.sel(skill='persistence'))
+        >>> skill = PerfectModelEnsemble.verify(
+        ...     metric="acc",
+        ...     comparison="m2e",
+        ...     dim=["init", "member"],
+        ...     reference=["persistence"],
+        ... )
+        >>> horizon(skill.sel(skill="initialized") > skill.sel(skill="persistence"))
         <xarray.Dataset>
         Dimensions:  ()
         Data variables:
@@ -82,9 +85,14 @@ def horizon(cond):
             description:    Forecast period is the time interval between the forecast...
 
 
-        >>> bskill = PerfectModelEnsemble.bootstrap(metric='acc', comparison='m2e',
-        ...     dim=['init','member'], reference='uninitialized', iterations=201)
-        >>> horizon(bskill.sel(skill='uninitialized', results='p') <= 0.05)
+        >>> bskill = PerfectModelEnsemble.bootstrap(
+        ...     metric="acc",
+        ...     comparison="m2e",
+        ...     dim=["init", "member"],
+        ...     reference="uninitialized",
+        ...     iterations=201,
+        ... )
+        >>> horizon(bskill.sel(skill="uninitialized", results="p") <= 0.05)
         <xarray.Dataset>
         Dimensions:  ()
         Coordinates:
diff --git a/climpred/logging.py b/climpred/logging.py
index 65943210a..d9eb9518b 100644
--- a/climpred/logging.py
+++ b/climpred/logging.py
@@ -6,8 +6,8 @@ def log_compute_hindcast_header(metric, comparison, dim, alignment, reference):
     """Add header to the log for a `compute_hindcast` instance."""
     logging.info(
         f"`compute_hindcast` for metric {metric.name}, "
-        f"comparison {comparison.name}, dim {dim}, alignment {alignment} and reference {reference} at "
-        f"{str(datetime.now())}\n"
+        f"comparison {comparison.name}, dim {dim}, alignment {alignment} and "
+        f"reference {reference} at {str(datetime.now())}\n"
         f"++++++++++++++++++++++++++++++++++++++++++++++++"
     )
 
diff --git a/climpred/metrics.py b/climpred/metrics.py
index 159e71000..7ed35cd04 100644
--- a/climpred/metrics.py
+++ b/climpred/metrics.py
@@ -64,9 +64,10 @@ def _get_norm_factor(comparison):
 
     Example:
         >>> # check skill saturation value of roughly 1 for different comparisons
-        >>> for c in ['m2m', 'm2e', 'm2c', 'e2c']:  # doctest: +ELLIPSIS
-        ...        s = PerfectModelEnsemble.verify(metric='nrmse', dim=None, comparison=c)
-        ...        s.tos.plot(label='nrmse {c}')
+        >>> for c in ["m2m", "m2e", "m2c", "e2c"]:  # doctest: +ELLIPSIS
+        ...     s = PerfectModelEnsemble.verify(metric="nrmse", dim=None, comparison=c)
+        ...     s.tos.plot(label="nrmse {c}")
+        ...
         [...
 
     Reference:
@@ -214,26 +215,27 @@ def __init__(
         Args:
             name (str): name of metric.
             function (function): metric function.
-            positive (bool or None): Is metric positively oriented? If True, higher metric
-             value means better skill. If False, lower metric value means better skill.
-             None if different differentiation.
-            probabilistic (bool): Is metric probabilistic? `False` means
-             deterministic.
+            positive (bool or None): Is metric positively oriented?
+                If ``True``, higher skill value means better skill.
+                If ``False``, lower metric value means better skill.
+                ``None`` if different differentiation.
+            probabilistic (bool): Is metric probabilistic?
+                ``False`` means deterministic.
             unit_power (float, int): Power of the unit of skill based on unit
-             of input, e.g. input unit [m]: skill unit [(m)**unit_power]
-            long_name (str, optional): long_name of metric. Defaults to None.
+                of input, e.g. input unit [m]: skill unit [(m)**unit_power]
+            long_name (str, optional): long name of metric. Defaults to ``None``.
             aliases (list of str, optional): Allowed aliases for this metric.
-             Defaults to None.
-            min (float, optional): Minimum skill for metric. Defaults to None.
-            max (float, optional): Maxmimum skill for metric. Defaults to None.
-            perfect (float, optional): Perfect skill for metric. Defaults to None.
+                Defaults to ``None``.
+            min (float, optional): Minimum skill for metric. Defaults to ``None``.
+            max (float, optional): Maxmimum skill for metric. Defaults to ``None``.
+            perfect (float, optional): Perfect skill for metric. Defaults to ``None``.
             normalize (bool, optional): Will the metric be normalized? Then metric
-             function will require to get Comparison passed. Defaults to False.
+                function will require to get Comparison passed. Defaults to ``False``.
             allows_logical (bool, optional): Does the metric allow a logical to be
-              passed in metric_kwargs? Some probabilistic metrics allow this. Defaults
-              to False.
+                passed in metric_kwargs? Some probabilistic metrics allow this.
+                Defaults to ``False``.
             requires_member_dim (bool, optional):
-              Does xskillscore.metric expect a member dimension?
+                Does xskillscore.metric expect a member dimension?
 
         Returns:
             Metric: metric class Metric.
@@ -302,8 +304,12 @@ def _pearson_r(forecast, verif, dim=None, **metric_kwargs):
         * :py:func:`~climpred.metrics._pearson_r_eff_p_value`
 
     Example:
-        >>> HindcastEnsemble.verify(metric='pearson_r', comparison='e2o',
-        ...     alignment='same_verifs', dim=['init'])
+        >>> HindcastEnsemble.verify(
+        ...     metric="pearson_r",
+        ...     comparison="e2o",
+        ...     alignment="same_verifs",
+        ...     dim=["init"],
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -373,8 +379,12 @@ def _pearson_r_p_value(forecast, verif, dim=None, **metric_kwargs):
         * :py:func:`~climpred.metrics._pearson_r_eff_p_value`
 
     Example:
-        >>> HindcastEnsemble.verify(metric='pearson_r_p_value', comparison='e2o',
-        ...     alignment='same_verifs', dim='init')
+        >>> HindcastEnsemble.verify(
+        ...     metric="pearson_r_p_value",
+        ...     comparison="e2o",
+        ...     alignment="same_verifs",
+        ...     dim="init",
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -462,8 +472,12 @@ def _effective_sample_size(forecast, verif, dim=None, **metric_kwargs):
           freedom of a time-varying field." Journal of climate 12.7 (1999): 1990-2009.
 
     Example:
-        >>> HindcastEnsemble.verify(metric='effective_sample_size', comparison='e2o',
-        ...     alignment='same_verifs', dim='init')
+        >>> HindcastEnsemble.verify(
+        ...     metric="effective_sample_size",
+        ...     comparison="e2o",
+        ...     alignment="same_verifs",
+        ...     dim="init",
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -501,8 +515,8 @@ def _effective_sample_size(forecast, verif, dim=None, **metric_kwargs):
 
 
 def _pearson_r_eff_p_value(forecast, verif, dim=None, **metric_kwargs):
-    """Probability that forecast and verification data are linearly uncorrelated, accounting
-    for autocorrelation.
+    """Probability that forecast and verification data are linearly uncorrelated,
+    accounting for autocorrelation.
 
     .. note::
         Weights are not included here due to the dependence on temporal autocorrelation.
@@ -552,8 +566,12 @@ def _pearson_r_eff_p_value(forecast, verif, dim=None, **metric_kwargs):
         * :py:func:`~climpred.metrics._spearman_r_eff_p_value`
 
     Example:
-        >>> HindcastEnsemble.verify(metric='pearson_r_eff_p_value', comparison='e2o',
-        ...     alignment='same_verifs', dim='init')
+        >>> HindcastEnsemble.verify(
+        ...     metric="pearson_r_eff_p_value",
+        ...     comparison="e2o",
+        ...     alignment="same_verifs",
+        ...     dim="init",
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -644,8 +662,12 @@ def _spearman_r(forecast, verif, dim=None, **metric_kwargs):
         * :py:func:`~climpred.metrics._spearman_r_eff_p_value`
 
     Example:
-        >>> HindcastEnsemble.verify(metric='spearman_r', comparison='e2o',
-        ...     alignment='same_verifs', dim='init')
+        >>> HindcastEnsemble.verify(
+        ...     metric="spearman_r",
+        ...     comparison="e2o",
+        ...     alignment="same_verifs",
+        ...     dim="init",
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -715,8 +737,12 @@ def _spearman_r_p_value(forecast, verif, dim=None, **metric_kwargs):
         * :py:func:`~climpred.metrics._spearman_r_eff_p_value`
 
     Example:
-        >>> HindcastEnsemble.verify(metric='spearman_r_p_value', comparison='e2o',
-        ...     alignment='same_verifs', dim='init')
+        >>> HindcastEnsemble.verify(
+        ...     metric="spearman_r_p_value",
+        ...     comparison="e2o",
+        ...     alignment="same_verifs",
+        ...     dim="init",
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -813,8 +839,12 @@ def _spearman_r_eff_p_value(forecast, verif, dim=None, **metric_kwargs):
           freedom of a time-varying field." Journal of climate 12.7 (1999): 1990-2009.
 
     Example:
-        >>> HindcastEnsemble.verify(metric='spearman_r_eff_p_value', comparison='e2o',
-        ...     alignment='same_verifs', dim='init')
+        >>> HindcastEnsemble.verify(
+        ...     metric="spearman_r_eff_p_value",
+        ...     comparison="e2o",
+        ...     alignment="same_verifs",
+        ...     dim="init",
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -901,8 +931,9 @@ def _mse(forecast, verif, dim=None, **metric_kwargs):
           URL: http://doi.wiley.com/10.1002/9781119960003.
 
     Example:
-        >>> HindcastEnsemble.verify(metric='mse', comparison='e2o', alignment='same_verifs',
-        ...     dim='init')
+        >>> HindcastEnsemble.verify(
+        ...     metric="mse", comparison="e2o", alignment="same_verifs", dim="init"
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -962,8 +993,12 @@ def _spread(forecast, verif, dim=None, **metric_kwargs):
 
 
     Example:
-        >>> HindcastEnsemble.verify(metric='spread', comparison='m2o', alignment='same_verifs',
-        ...     dim=['member','init'])
+        >>> HindcastEnsemble.verify(
+        ...     metric="spread",
+        ...     comparison="m2o",
+        ...     alignment="same_verifs",
+        ...     dim=["member", "init"],
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -1028,8 +1063,9 @@ def _rmse(forecast, verif, dim=None, **metric_kwargs):
         * :py:func:`~xskillscore.rmse`
 
     Example:
-        >>> HindcastEnsemble.verify(metric='rmse', comparison='e2o', alignment='same_verifs',
-        ...     dim='init')
+        >>> HindcastEnsemble.verify(
+        ...     metric="rmse", comparison="e2o", alignment="same_verifs", dim="init"
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -1102,8 +1138,9 @@ def _mae(forecast, verif, dim=None, **metric_kwargs):
 
 
     Example:
-        >>> HindcastEnsemble.verify(metric='mae', comparison='e2o', alignment='same_verifs',
-        ...     dim='init')
+        >>> HindcastEnsemble.verify(
+        ...     metric="mae", comparison="e2o", alignment="same_verifs", dim="init"
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -1168,8 +1205,12 @@ def _median_absolute_error(forecast, verif, dim=None, **metric_kwargs):
         * :py:func:`~xskillscore.median_absolute_error`
 
     Example:
-        >>> HindcastEnsemble.verify(metric='median_absolute_error', comparison='e2o',
-        ...     alignment='same_verifs', dim='init')
+        >>> HindcastEnsemble.verify(
+        ...     metric="median_absolute_error",
+        ...     comparison="e2o",
+        ...     alignment="same_verifs",
+        ...     dim="init",
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -1261,8 +1302,9 @@ def _nmse(forecast, verif, dim=None, **metric_kwargs):
           https://doi.org/10/fc7mxd.
 
     Example:
-        >>> HindcastEnsemble.verify(metric='nmse', comparison='e2o', alignment='same_verifs',
-        ...     dim='init')
+        >>> HindcastEnsemble.verify(
+        ...     metric="nmse", comparison="e2o", alignment="same_verifs", dim="init"
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -1364,8 +1406,9 @@ def _nmae(forecast, verif, dim=None, **metric_kwargs):
           https://doi.org/10/fc7mxd.
 
     Example:
-        >>> HindcastEnsemble.verify(metric='nmae', comparison='e2o', alignment='same_verifs',
-        ...     dim='init')
+        >>> HindcastEnsemble.verify(
+        ...     metric="nmae", comparison="e2o", alignment="same_verifs", dim="init"
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -1474,8 +1517,9 @@ def _nrmse(forecast, verif, dim=None, **metric_kwargs):
         https://doi.org/10/fc7mxd.
 
     Example:
-        >>> HindcastEnsemble.verify(metric='nrmse', comparison='e2o', alignment='same_verifs',
-        ...     dim='init')
+        >>> HindcastEnsemble.verify(
+        ...     metric="nrmse", comparison="e2o", alignment="same_verifs", dim="init"
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -1587,8 +1631,9 @@ def _msess(forecast, verif, dim=None, **metric_kwargs):
 
 
     Example:
-        >>> HindcastEnsemble.verify(metric='msess', comparison='e2o', alignment='same_verifs',
-        ...     dim='init')
+        >>> HindcastEnsemble.verify(
+        ...     metric="msess", comparison="e2o", alignment="same_verifs", dim="init"
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -1665,8 +1710,9 @@ def _mape(forecast, verif, dim=None, **metric_kwargs):
         * :py:func:`~xskillscore.mape`
 
     Example:
-        >>> HindcastEnsemble.verify(metric='mape', comparison='e2o', alignment='same_verifs',
-        ...     dim='init')
+        >>> HindcastEnsemble.verify(
+        ...     metric="mape", comparison="e2o", alignment="same_verifs", dim="init"
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -1731,8 +1777,9 @@ def _smape(forecast, verif, dim=None, **metric_kwargs):
         * :py:func:`~xskillscore.smape`
 
     Example:
-        >>> HindcastEnsemble.verify(metric='smape', comparison='e2o', alignment='same_verifs',
-        ...     dim='init')
+        >>> HindcastEnsemble.verify(
+        ...     metric="smape", comparison="e2o", alignment="same_verifs", dim="init"
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -1825,8 +1872,9 @@ def _uacc(forecast, verif, dim=None, **metric_kwargs):
           116(12):2417–2424, December 1988. https://doi.org/10/fc7mxd.
 
     Example:
-        >>> HindcastEnsemble.verify(metric='uacc', comparison='e2o', alignment='same_verifs',
-        ...     dim='init')
+        >>> HindcastEnsemble.verify(
+        ...     metric="uacc", comparison="e2o", alignment="same_verifs", dim="init"
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -1897,8 +1945,12 @@ def _std_ratio(forecast, verif, dim=None, **metric_kwargs):
         * https://www-miklip.dkrz.de/about/murcss/
 
     Example:
-        >>> HindcastEnsemble.verify(metric='std_ratio', comparison='e2o',
-        ...     alignment='same_verifs', dim='init')
+        >>> HindcastEnsemble.verify(
+        ...     metric="std_ratio",
+        ...     comparison="e2o",
+        ...     alignment="same_verifs",
+        ...     dim="init",
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -1961,8 +2013,12 @@ def _unconditional_bias(forecast, verif, dim=None, **metric_kwargs):
         * https://www-miklip.dkrz.de/about/murcss/
 
     Example:
-        >>> HindcastEnsemble.verify(metric='unconditional_bias', comparison='e2o',
-        ...     alignment='same_verifs', dim='init')
+        >>> HindcastEnsemble.verify(
+        ...     metric="unconditional_bias",
+        ...     comparison="e2o",
+        ...     alignment="same_verifs",
+        ...     dim="init",
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -1984,9 +2040,13 @@ def _unconditional_bias(forecast, verif, dim=None, **metric_kwargs):
         Conditional bias is removed by
         :py:meth:`~climpred.classes.HindcastEnsemble.remove_bias`.
 
-        >>> HindcastEnsemble = HindcastEnsemble.remove_bias(alignment='same_verifs')
-        >>> HindcastEnsemble.verify(metric='unconditional_bias', comparison='e2o',
-        ...     alignment='same_verifs', dim='init')
+        >>> HindcastEnsemble = HindcastEnsemble.remove_bias(alignment="same_verifs")
+        >>> HindcastEnsemble.verify(
+        ...     metric="unconditional_bias",
+        ...     comparison="e2o",
+        ...     alignment="same_verifs",
+        ...     dim="init",
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -2048,8 +2108,12 @@ def _mul_bias(forecast, verif, dim=None, **metric_kwargs):
 
     Example:
 
-        >>> HindcastEnsemble.verify(metric='multiplicative_bias', comparison='e2o',
-        ...     alignment='same_verifs', dim='init')
+        >>> HindcastEnsemble.verify(
+        ...     metric="multiplicative_bias",
+        ...     comparison="e2o",
+        ...     alignment="same_verifs",
+        ...     dim="init",
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -2116,8 +2180,12 @@ def _conditional_bias(forecast, verif, dim=None, **metric_kwargs):
         * https://www-miklip.dkrz.de/about/murcss/
 
     Example:
-        >>> HindcastEnsemble.verify(metric='conditional_bias', comparison='e2o',
-        ...     alignment='same_verifs', dim='init')
+        >>> HindcastEnsemble.verify(
+        ...     metric="conditional_bias",
+        ...     comparison="e2o",
+        ...     alignment="same_verifs",
+        ...     dim="init",
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -2187,8 +2255,12 @@ def _bias_slope(forecast, verif, dim=None, **metric_kwargs):
         * https://www-miklip.dkrz.de/about/murcss/
 
     Example:
-        >>> HindcastEnsemble.verify(metric='bias_slope', comparison='e2o',
-        ...     alignment='same_verifs', dim='init')
+        >>> HindcastEnsemble.verify(
+        ...     metric="bias_slope",
+        ...     comparison="e2o",
+        ...     alignment="same_verifs",
+        ...     dim="init",
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -2269,9 +2341,13 @@ def _msess_murphy(forecast, verif, dim=None, **metric_kwargs):
           https://doi.org/10/fc7mxd.
 
     Example:
-        >>> HindcastEnsemble = HindcastEnsemble.remove_bias(alignment='same_verifs')
-        >>> HindcastEnsemble.verify(metric='msess_murphy', comparison='e2o',
-        ...     dim='init', alignment='same_verifs')
+        >>> HindcastEnsemble = HindcastEnsemble.remove_bias(alignment="same_verifs")
+        >>> HindcastEnsemble.verify(
+        ...     metric="msess_murphy",
+        ...     comparison="e2o",
+        ...     dim="init",
+        ...     alignment="same_verifs",
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -2377,14 +2453,21 @@ def _brier_score(forecast, verif, dim=None, **metric_kwargs):
     Example:
         Define a boolean/logical function for binary scoring:
 
-        >>> def pos(x): return x > 0  # checking binary outcomes
+        >>> def pos(x):
+        ...     return x > 0  # checking binary outcomes
+        ...
 
         Option 1. Pass with keyword ``logical``: (specifically designed for
         :py:class:`~climpred.classes.PerfectModelEnsemble`, where binary verification
         can only be created after comparison)
 
-        >>> HindcastEnsemble.verify(metric='brier_score', comparison='m2o',
-        ...     dim=['member', 'init'], alignment='same_verifs', logical=pos)
+        >>> HindcastEnsemble.verify(
+        ...     metric="brier_score",
+        ...     comparison="m2o",
+        ...     dim=["member", "init"],
+        ...     alignment="same_verifs",
+        ...     logical=pos,
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -2407,8 +2490,12 @@ def _brier_score(forecast, verif, dim=None, **metric_kwargs):
         Option 2. Pre-process to generate a binary multi-member forecast and
         binary verification product:
 
-        >>> HindcastEnsemble.map(pos).verify(metric='brier_score',
-        ...     comparison='m2o', dim=['member', 'init'], alignment='same_verifs')
+        >>> HindcastEnsemble.map(pos).verify(
+        ...     metric="brier_score",
+        ...     comparison="m2o",
+        ...     dim=["member", "init"],
+        ...     alignment="same_verifs",
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -2428,11 +2515,15 @@ def _brier_score(forecast, verif, dim=None, **metric_kwargs):
             reference:                     []
 
         Option 3. Pre-process to generate a probability forecast and binary
-        verification product. because ``member`` not present in ``hindcast`` anymore, use
-        ``comparison='e2o'`` and ``dim='init'``:
-
-        >>> HindcastEnsemble.map(pos).mean('member').verify(metric='brier_score',
-        ...     comparison='e2o', dim='init', alignment='same_verifs')
+        verification product. because ``member`` not present in ``hindcast`` anymore,
+        use ``comparison="e2o"`` and ``dim="init"``:
+
+        >>> HindcastEnsemble.map(pos).mean("member").verify(
+        ...     metric="brier_score",
+        ...     comparison="e2o",
+        ...     dim="init",
+        ...     alignment="same_verifs",
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10)
         Coordinates:
@@ -2520,8 +2611,13 @@ def _threshold_brier_score(forecast, verif, dim=None, **metric_kwargs):
     Example:
 
         >>> # get threshold brier score for each init
-        >>> HindcastEnsemble.verify(metric='threshold_brier_score', comparison='m2o',
-        ...     dim='member', threshold=.2, alignment='same_inits')
+        >>> HindcastEnsemble.verify(
+        ...     metric="threshold_brier_score",
+        ...     comparison="m2o",
+        ...     dim="member",
+        ...     threshold=0.2,
+        ...     alignment="same_inits",
+        ... )
         <xarray.Dataset>
         Dimensions:     (lead: 10, init: 52)
         Coordinates:
@@ -2544,8 +2640,13 @@ def _threshold_brier_score(forecast, verif, dim=None, **metric_kwargs):
             threshold:                     0.2
 
         >>> # multiple thresholds averaging over init dimension
-        >>> HindcastEnsemble.verify(metric='threshold_brier_score', comparison='m2o',
-        ...     dim=['member', 'init'], threshold=[.2, .3], alignment='same_verifs')
+        >>> HindcastEnsemble.verify(
+        ...     metric="threshold_brier_score",
+        ...     comparison="m2o",
+        ...     dim=["member", "init"],
+        ...     threshold=[0.2, 0.3],
+        ...     alignment="same_verifs",
+        ... )
         <xarray.Dataset>
         Dimensions:    (lead: 10, threshold: 2)
         Coordinates:
@@ -2642,8 +2743,9 @@ def _crps(forecast, verif, dim=None, **metric_kwargs):
         * :py:func:`~xskillscore.crps_ensemble`
 
     Example:
-        >>> HindcastEnsemble.verify(metric='crps', comparison='m2o', dim='member',
-        ...     alignment='same_inits')
+        >>> HindcastEnsemble.verify(
+        ...     metric="crps", comparison="m2o", dim="member", alignment="same_inits"
+        ... )
         <xarray.Dataset>
         Dimensions:     (lead: 10, init: 52)
         Coordinates:
@@ -2759,8 +2861,9 @@ def _crpss(forecast, verif, dim=None, **metric_kwargs):
           https://doi.org/10/c6758w.
 
     Example:
-        >>> HindcastEnsemble.verify(metric='crpss', comparison='m2o',
-        ...     alignment='same_inits', dim='member')
+        >>> HindcastEnsemble.verify(
+        ...     metric="crpss", comparison="m2o", alignment="same_inits", dim="member"
+        ... )
         <xarray.Dataset>
         Dimensions:     (init: 52, lead: 10)
         Coordinates:
@@ -2781,9 +2884,16 @@ def _crpss(forecast, verif, dim=None, **metric_kwargs):
             reference:                     []
 
         >>> import scipy
-        >>> PerfectModelEnsemble..isel(lead=[0, 1]).verify(metric='crpss', comparison='m2m',
-        ...     dim='member', gaussian=False, cdf_or_dist=scipy.stats.norm, xmin=-10,
-        ...     xmax=10, tol=1e-6)  # doctest: +SKIP
+        >>> PerfectModelEnsemble.isel(lead=[0, 1]).verify(
+        ...     metric="crpss",
+        ...     comparison="m2m",
+        ...     dim="member",
+        ...     gaussian=False,
+        ...     cdf_or_dist=scipy.stats.norm,
+        ...     xmin=-10,
+        ...     xmax=10,
+        ...     tol=1e-6,
+        ... )  # doctest: +SKIP
         <xarray.Dataset>
         Dimensions:  (init: 12, lead: 2, member: 9)
         Coordinates:
@@ -2884,8 +2994,12 @@ def _crpss_es(forecast, verif, dim=None, **metric_kwargs):
           631–43. https://doi.org/10/f9jrhw.
 
     Example:
-        >>> HindcastEnsemble.verify(metric='crpss_es', comparison='m2o',
-        ...     alignment='same_verifs', dim='member')
+        >>> HindcastEnsemble.verify(
+        ...     metric="crpss_es",
+        ...     comparison="m2o",
+        ...     alignment="same_verifs",
+        ...     dim="member",
+        ... )
         <xarray.Dataset>
         Dimensions:     (init: 52, lead: 10)
         Coordinates:
@@ -2916,7 +3030,8 @@ def _crpss_es(forecast, verif, dim=None, **metric_kwargs):
     ensemble_spread = forecast.std("member").mean(dim=dim_no_member, **metric_kwargs)
     if forecast.member.size == 1:
         warnings.warn(
-            "Ensemble spread is 0. CRPSS_ES yields NaNs for persistence and climatology reference skill."
+            f"Ensemble spread is 0 because only {forecast.member.size} members."
+            "`CRPSS_ES` yields NaNs for persistence and climatology reference skill."
         )
     mse_h = _mse(forecast.mean("member"), verif, dim=dim_no_member, **metric_kwargs)
     crps_h = crps_gaussian(verif, mu, mse_h, dim=dim_no_member, **metric_kwargs)
@@ -2984,14 +3099,21 @@ def _discrimination(forecast, verif, dim=None, **metric_kwargs):
     Example:
         Define a boolean/logical function for binary scoring:
 
-        >>> def pos(x): return x > 0  # checking binary outcomes
+        >>> def pos(x):
+        ...     return x > 0  # checking binary outcomes
+        ...
 
         Option 1. Pass with keyword ``logical``: (especially designed for
         :py:class:`~climpred.classes.PerfectModelEnsemble`, where binary verification
         can only be created after comparison)
 
-        >>> HindcastEnsemble.verify(metric='discrimination', comparison='m2o',
-        ...     dim=['member', 'init'], alignment='same_verifs', logical=pos)
+        >>> HindcastEnsemble.verify(
+        ...     metric="discrimination",
+        ...     comparison="m2o",
+        ...     dim=["member", "init"],
+        ...     alignment="same_verifs",
+        ...     logical=pos,
+        ... )
         <xarray.Dataset>
         Dimensions:               (lead: 10, forecast_probability: 5, event: 2)
         Coordinates:
@@ -3004,8 +3126,12 @@ def _discrimination(forecast, verif, dim=None, **metric_kwargs):
 
         Option 2. Pre-process to generate a binary forecast and verification product:
 
-        >>> HindcastEnsemble.map(pos).verify(metric='discrimination',
-        ...     comparison='m2o', dim=['member','init'], alignment='same_verifs')
+        >>> HindcastEnsemble.map(pos).verify(
+        ...     metric="discrimination",
+        ...     comparison="m2o",
+        ...     dim=["member", "init"],
+        ...     alignment="same_verifs",
+        ... )
         <xarray.Dataset>
         Dimensions:               (lead: 10, forecast_probability: 5, event: 2)
         Coordinates:
@@ -3020,8 +3146,12 @@ def _discrimination(forecast, verif, dim=None, **metric_kwargs):
         verification product. because ``member`` not present in ``hindcast``, use
         ``comparison='e2o'`` and ``dim='init'``:
 
-        >>> HindcastEnsemble.map(pos).mean('member').verify(metric='discrimination',
-        ...     comparison='e2o', dim='init', alignment='same_verifs')
+        >>> HindcastEnsemble.map(pos).mean("member").verify(
+        ...     metric="discrimination",
+        ...     comparison="e2o",
+        ...     dim="init",
+        ...     alignment="same_verifs",
+        ... )
         <xarray.Dataset>
         Dimensions:               (lead: 10, forecast_probability: 5, event: 2)
         Coordinates:
@@ -3092,14 +3222,21 @@ def _reliability(forecast, verif, dim=None, **metric_kwargs):
     Example:
         Define a boolean/logical function for binary scoring:
 
-        >>> def pos(x): return x > 0  # checking binary outcomes
+        >>> def pos(x):
+        ...     return x > 0  # checking binary outcomes
+        ...
 
         Option 1. Pass with keyword ``logical``: (especially designed for
         :py:class:`~climpred.classes.PerfectModelEnsemble`, where binary verification
         can only be created after comparison))
 
-        >>> HindcastEnsemble.verify(metric='reliability', comparison='m2o',
-        ...     dim=['member','init'], alignment='same_verifs', logical=pos)
+        >>> HindcastEnsemble.verify(
+        ...     metric="reliability",
+        ...     comparison="m2o",
+        ...     dim=["member", "init"],
+        ...     alignment="same_verifs",
+        ...     logical=pos,
+        ... )
         <xarray.Dataset>
         Dimensions:               (lead: 10, forecast_probability: 5)
         Coordinates:
@@ -3123,8 +3260,12 @@ def _reliability(forecast, verif, dim=None, **metric_kwargs):
 
         Option 2. Pre-process to generate a binary forecast and verification product:
 
-        >>> HindcastEnsemble.map(pos).verify(metric='reliability',
-        ...     comparison='m2o', dim=['init', 'member'], alignment='same_verifs')
+        >>> HindcastEnsemble.map(pos).verify(
+        ...     metric="reliability",
+        ...     comparison="m2o",
+        ...     dim=["init", "member"],
+        ...     alignment="same_verifs",
+        ... )
         <xarray.Dataset>
         Dimensions:               (lead: 10, forecast_probability: 5)
         Coordinates:
@@ -3149,8 +3290,12 @@ def _reliability(forecast, verif, dim=None, **metric_kwargs):
         verification product. because ``member`` not present in ``hindcast``, use
         ``comparison='e2o'`` and ``dim='init'``:
 
-        >>> HindcastEnsemble.map(pos).mean('member').verify(metric='reliability',
-        ...     comparison='e2o', dim='init', alignment='same_verifs')
+        >>> HindcastEnsemble.map(pos).mean("member").verify(
+        ...     metric="reliability",
+        ...     comparison="e2o",
+        ...     dim="init",
+        ...     alignment="same_verifs",
+        ... )
         <xarray.Dataset>
         Dimensions:               (lead: 10, forecast_probability: 5)
         Coordinates:
@@ -3216,8 +3361,12 @@ def _rank_histogram(forecast, verif, dim=None, **metric_kwargs):
         * :py:func:`~xskillscore.rank_histogram`
 
     Example:
-        >>> HindcastEnsemble.verify(metric='rank_histogram', comparison='m2o',
-        ...     dim=['member', 'init'], alignment='same_verifs')
+        >>> HindcastEnsemble.verify(
+        ...     metric="rank_histogram",
+        ...     comparison="m2o",
+        ...     dim=["member", "init"],
+        ...     alignment="same_verifs",
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 10, rank: 11)
         Coordinates:
@@ -3237,8 +3386,9 @@ def _rank_histogram(forecast, verif, dim=None, **metric_kwargs):
             dim:                           ['member', 'init']
             reference:                     []
 
-        >>> PerfectModelEnsemble.verify(metric='rank_histogram', comparison='m2c',
-        ...     dim=['member', 'init'])
+        >>> PerfectModelEnsemble.verify(
+        ...     metric="rank_histogram", comparison="m2c", dim=["member", "init"]
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 20, rank: 10)
         Coordinates:
@@ -3306,10 +3456,14 @@ def _rps(forecast, verif, dim=None, **metric_kwargs):
         * :py:func:`~xskillscore.rps`
 
     Example:
-        >>> category_edges = np.array([-.5, 0., .5, 1.])
-        >>> HindcastEnsemble.verify(metric='rps', comparison='m2o',
-        ...     dim=['member', 'init'], alignment='same_verifs',
-        ...     category_edges=category_edges)
+        >>> category_edges = np.array([-0.5, 0.0, 0.5, 1.0])
+        >>> HindcastEnsemble.verify(
+        ...     metric="rps",
+        ...     comparison="m2o",
+        ...     dim=["member", "init"],
+        ...     alignment="same_verifs",
+        ...     category_edges=category_edges,
+        ... )
         <xarray.Dataset>
         Dimensions:                     (lead: 10)
         Coordinates:
@@ -3335,10 +3489,18 @@ def _rps(forecast, verif, dim=None, **metric_kwargs):
         Provide ``category_edges`` as ``xr.Dataset`` for category edges varying along
         dimensions.
 
-        >>> category_edges = xr.DataArray([9.5, 10., 10.5, 11.], dims='category_edge').assign_coords(category_edge=[9.5, 10., 10.5, 11.]).to_dataset(name='tos')
+        >>> category_edges = (
+        ...     xr.DataArray([9.5, 10.0, 10.5, 11.0], dims="category_edge")
+        ...     .assign_coords(category_edge=[9.5, 10.0, 10.5, 11.0])
+        ...     .to_dataset(name="tos")
+        ... )
         >>> # category_edges = np.array([9.5, 10., 10.5, 11.]) # identical
-        >>> PerfectModelEnsemble.verify(metric='rps', comparison='m2c',
-        ...     dim=['member', 'init'], category_edges=category_edges)
+        >>> PerfectModelEnsemble.verify(
+        ...     metric="rps",
+        ...     comparison="m2c",
+        ...     dim=["member", "init"],
+        ...     category_edges=category_edges,
+        ... )
         <xarray.Dataset>
         Dimensions:                     (lead: 20)
         Coordinates:
@@ -3363,12 +3525,26 @@ def _rps(forecast, verif, dim=None, **metric_kwargs):
         forecasts and observations.
 
         >>> q = [1 / 3, 2 / 3]  # terciles by month
-        >>> forecast_edges = HindcastEnsemble.get_initialized().groupby('init.month').quantile(q=q, dim=['init', 'member']).rename({'quantile':'category_edge'})
-        >>> obs_edges = HindcastEnsemble.get_observations().groupby('time.month').quantile(q=q, dim='time').rename({'quantile':'category_edge'})
+        >>> forecast_edges = (
+        ...     HindcastEnsemble.get_initialized()
+        ...     .groupby("init.month")
+        ...     .quantile(q=q, dim=["init", "member"])
+        ...     .rename({"quantile": "category_edge"})
+        ... )
+        >>> obs_edges = (
+        ...     HindcastEnsemble.get_observations()
+        ...     .groupby("time.month")
+        ...     .quantile(q=q, dim="time")
+        ...     .rename({"quantile": "category_edge"})
+        ... )
         >>> category_edges = (obs_edges, forecast_edges)
-        >>> HindcastEnsemble.verify(metric='rps', comparison='m2o',
-        ...     dim=['member', 'init'], alignment='same_verifs',
-        ...     category_edges=category_edges)
+        >>> HindcastEnsemble.verify(
+        ...     metric="rps",
+        ...     comparison="m2o",
+        ...     dim=["member", "init"],
+        ...     alignment="same_verifs",
+        ...     category_edges=category_edges,
+        ... )
         <xarray.Dataset>
         Dimensions:                     (lead: 10)
         Coordinates:
@@ -3468,10 +3644,15 @@ def _contingency(forecast, verif, score="table", dim=None, **metric_kwargs):
 
     Example:
         >>> category_edges = np.array([-0.5, 0.0, 0.5, 1.0])
-        >>> HindcastEnsemble.verify(metric='contingency', score='table', comparison='m2o',
-        ...     dim=['member', 'init'], alignment='same_verifs',
+        >>> HindcastEnsemble.verify(
+        ...     metric="contingency",
+        ...     score="table",
+        ...     comparison="m2o",
+        ...     dim=["member", "init"],
+        ...     alignment="same_verifs",
         ...     observation_category_edges=category_edges,
-        ...     forecast_category_edges=category_edges).isel(lead=[0, 1]).SST
+        ...     forecast_category_edges=category_edges,
+        ... ).isel(lead=[0, 1]).SST
         <xarray.DataArray 'SST' (lead: 2, observations_category: 3, forecasts_category: 3)>
         array([[[221,  29,   0],
                 [ 53, 217,   0],
@@ -3492,10 +3673,14 @@ def _contingency(forecast, verif, score="table", dim=None, **metric_kwargs):
 
         >>> # contingency-based dichotomous accuracy score
         >>> category_edges = np.array([9.5, 10.0, 10.5])
-        >>> PerfectModelEnsemble.verify(metric='contingency', score='hit_rate',
-        ...     comparison='m2c', dim=['member','init'],
+        >>> PerfectModelEnsemble.verify(
+        ...     metric="contingency",
+        ...     score="hit_rate",
+        ...     comparison="m2c",
+        ...     dim=["member", "init"],
         ...     observation_category_edges=category_edges,
-        ...     forecast_category_edges=category_edges)
+        ...     forecast_category_edges=category_edges,
+        ... )
         <xarray.Dataset>
         Dimensions:  (lead: 20)
         Coordinates:
@@ -3592,10 +3777,13 @@ def _roc(forecast, verif, dim=None, **metric_kwargs):
 
     Example:
         >>> bin_edges = np.array([-0.5, 0.0, 0.5, 1.0])
-        >>> HindcastEnsemble.verify(metric='roc', comparison='m2o',
-        ...     dim=['member', 'init'], alignment='same_verifs',
+        >>> HindcastEnsemble.verify(
+        ...     metric="roc",
+        ...     comparison="m2o",
+        ...     dim=["member", "init"],
+        ...     alignment="same_verifs",
         ...     bin_edges=bin_edges,
-        ...     ).SST
+        ... ).SST
         <xarray.DataArray 'SST' (lead: 10)>
         array([0.84385185, 0.82841667, 0.81358547, 0.8393463 , 0.82551752,
                0.81987778, 0.80719573, 0.80081909, 0.79046553, 0.78037564])
@@ -3607,11 +3795,17 @@ def _roc(forecast, verif, dim=None, **metric_kwargs):
 
         Get area under the curve, false positive rate and true positive rate as ``metric`` dimension by specifying ``return_results='all_as_metric_dim'``:
 
-        >>> def f(ds): return ds > 0
-        >>> HindcastEnsemble.map(f).verify(metric='roc', comparison='m2o',
-        ...     dim=['member', 'init'], alignment='same_verifs',
-        ...     bin_edges='continuous', return_results='all_as_metric_dim'
-        ...     ).SST.isel(lead=[0, 1])
+        >>> def f(ds):
+        ...     return ds > 0
+        ...
+        >>> HindcastEnsemble.map(f).verify(
+        ...     metric="roc",
+        ...     comparison="m2o",
+        ...     dim=["member", "init"],
+        ...     alignment="same_verifs",
+        ...     bin_edges="continuous",
+        ...     return_results="all_as_metric_dim",
+        ... ).SST.isel(lead=[0, 1])
         <xarray.DataArray 'SST' (lead: 2, metric: 3, probability_bin: 3)>
         array([[[0.        , 0.116     , 1.        ],
                 [0.        , 0.8037037 , 1.        ],
@@ -3685,8 +3879,11 @@ def _less(forecast, verif, dim=None, **metric_kwargs):
         >>> # better detrend before
         >>> from climpred.stats import rm_poly
         >>> HindcastEnsemble.map(rm_poly, dim="init_or_time", deg=2).verify(
-        ...     metric='less', comparison='m2o', dim=['member', 'init'],
-        ...     alignment='same_verifs').SST
+        ...     metric="less",
+        ...     comparison="m2o",
+        ...     dim=["member", "init"],
+        ...     alignment="same_verifs",
+        ... ).SST
         <xarray.DataArray 'SST' (lead: 10)>
         array([ 0.12633664, -0.12707636, -0.26143181, -0.25096537, -0.29267366,
                -0.2905725 , -0.43579508, -0.33774947, -0.46008438, -0.61010386])
diff --git a/climpred/options.py b/climpred/options.py
index f3e2392ad..94df35c0c 100644
--- a/climpred/options.py
+++ b/climpred/options.py
@@ -27,7 +27,7 @@
 
 class set_options:
     """Set options for climpred in a controlled context. Analogous to
-    `xarray.set_options(**option) <http://xarray.pydata.org/en/stable/generated/xarray.set_options.html>`_.
+    :py:class:`~xarray.options.set_options`.
 
     Currently supported options:
 
@@ -38,36 +38,46 @@ class set_options:
         - Allowed: [``"dayofyear"``, ``"weekofyear"``, ``"month"``, ``"season"``]
         - Default: ``dayofyear``.
     - ``PerfectModel_persistence_from_initialized_lead_0``
-        - Use ``climpred.reference.compute_persistence_from_first_lead`` in PerfectModelEnsemble.verify/bootstrap(reference='persistence'). If ``False`` uses `climpred.reference.compute_persistence <https://climpred.readthedocs.io/en/stable/api/climpred.reference.compute_persistence.html#climpred.reference.compute_persistence>`_ and if ``True`` uses  `climpred.reference.compute_persistence_from_first_lead <https://climpred.readthedocs.io/en/stable/api/climpred.reference.compute_persistence_from_first_lead.html#climpred.reference.compute_persistence_from_first_lead>`_, see `example <https://climpred.readthedocs.io/en/stable/api/climpred.reference.compute_persistence_from_first_lead.html#climpred.reference.compute_persistence_from_first_lead>`_.
-        - Allowed: [True, False]
-        - Default: False
+        - Which persistence function to use in
+            ``PerfectModelEnsemble.verify/bootstrap(reference="persistence")``.
+            If ``False`` use :py:func:`~climpred.reference.compute_persistence`.
+            If ``True`` use
+            :py:func:`~climpred.reference.compute_persistence_from_first_lead`.
+        - Allowed: [``True``, ``False``]
+        - Default: ``False``
     - ``warn_for_failed_PredictionEnsemble_xr_call``
-        - Raise UserWarning when PredictionEnsemble.xr_call,
+        - Raise ``UserWarning`` when ``PredictionEnsemble.xr_call``,
             e.g. ``.sel(lead=[1])`` fails on one of the datasets.
-        - Allowed: [True, False]
-        - Default: True
+        - Allowed: [``True``, ``False``]
+        - Default: ``True``
     - ``warn_for_rename_to_climpred_dims``
-        - Raise UserWarning when dimensions are renamed to ``CLIMPRED_DIMS`` when
-            PredictionEnsemble is instantiated.
-        - Allowed: [True, False]
-        - Default: True
+        - Raise ``UserWarning`` when dimensions are renamed to ``CLIMPRED_DIMS`` when
+            :py:class:`~climpred.classes.PredictionEnsemble` is instantiated.
+        - Allowed: [``True``, ``False``]
+        - Default: ``True``
     - ``warn_for_init_coords_int_to_annual``
-        - Raise UserWarning when ``init`` coordinate is of type integer and gets
-            converted to annual cftime_range when PredictionEnsemble is instantiated.
-        - Allowed: [True, False]
-        - Default: True
+        - Raise ``UserWarning`` when ``init`` coordinate is of type integer and gets
+            converted to annual cftime_range when :py:class:`~climpred.classes.PredictionEnsemble` is instantiated.
+        - Allowed: [``True``, ``False``]
+        - Default: ``True``
     - ``climpred_warnings``
         - Overwrites all options containing ``"*warn*"``.
-        - Allowed: [True, False]
-        - Default: True
+        - Allowed: [``True``, ``False``]
+        - Default: ``True``
 
     Examples:
         You can use ``set_options`` either as a context manager:
 
-        >>> kw = dict(metric='mse', comparison='e2o', dim='init',
-        ...           alignment='same_verifs', reference='climatology')
-        >>> with climpred.set_options(seasonality='month'):
-        ...     HindcastEnsemble.verify(**kw).SST.sel(skill='climatology')
+        >>> kw = dict(
+        ...     metric="mse",
+        ...     comparison="e2o",
+        ...     dim="init",
+        ...     alignment="same_verifs",
+        ...     reference="climatology",
+        ... )
+        >>> with climpred.set_options(seasonality="month"):
+        ...     HindcastEnsemble.verify(**kw).SST.sel(skill="climatology")
+        ...
         <xarray.DataArray 'SST' (lead: 10)>
         array([0.03712573, 0.03712573, 0.03712573, 0.03712573, 0.03712573,
                0.03712573, 0.03712573, 0.03712573, 0.03712573, 0.03712573])
@@ -79,7 +89,7 @@ class set_options:
 
         Or to set global options:
 
-        >>> climpred.set_options(seasonality='month')  # doctest: +ELLIPSIS
+        >>> climpred.set_options(seasonality="month")  # doctest: +ELLIPSIS
         <climpred.options.set_options object at 0x...>
     """
 
diff --git a/climpred/prediction.py b/climpred/prediction.py
index 1bdba5a17..98dcc5db8 100644
--- a/climpred/prediction.py
+++ b/climpred/prediction.py
@@ -83,7 +83,8 @@ def _apply_metric_at_given_lead(
 
     lforecast["time"] = lverif[
         "time"
-    ]  # a bit dangerous: what if different? more clear once https://github.com/pangeo-data/climpred/issues/523#issuecomment-728951645 implemented
+    ]  # a bit dangerous: what if different? more clear once implemented
+    # https://github.com/pangeo-data/climpred/issues/523#issuecomment-728951645
     dim = _rename_dim(
         dim, hind, verif
     )  # dim should be much clearer once time in initialized.coords
diff --git a/climpred/reference.py b/climpred/reference.py
index 2a0b0f3d2..62bc50c0e 100644
--- a/climpred/reference.py
+++ b/climpred/reference.py
@@ -110,7 +110,8 @@ def climatology(verif, inits, verif_dates, lead):
 
 
 def uninitialized(hist, verif, verif_dates, lead):
-    """Uninitialized forecast uses a simulation without any initialization (assimilation/nudging). Also called historical in some communities."""
+    """Uninitialized forecast uses a simulation without any initialization
+    (assimilation/nudging). Also called historical in some communities."""
     lforecast = hist.sel(time=verif_dates[lead])
     lverif = verif.sel(time=verif_dates[lead])
     return lforecast, lverif
@@ -121,8 +122,10 @@ def uninitialized(hist, verif, verif_dates, lead):
 
 
 def _adapt_member_for_reference_forecast(lforecast, lverif, metric, comparison, dim):
-    """Maybe drop member from dim or add single-member dimension. Used in reference forecasts: climatology, uninitialized, persistence."""
-    # persistence or climatology forecasts wont have member dimension, create if required
+    """Maybe drop member from dim or add single-member dimension. Used in
+    reference forecasts: climatology, uninitialized, persistence."""
+    # persistence or climatology forecasts wont have member dimension, create if
+    # required
     # some metrics dont allow member dimension, remove and try mean
     # delete member from dim if needed
     if "member" in dim:
@@ -251,14 +254,14 @@ def compute_persistence(
             Default: 'pearson_r'
         alignment (str): which inits or verification times should be aligned?
 
-            - maximize/None: maximize the degrees of freedom by slicing ``initialized`` and
-            ``verif`` to a common time frame at each lead.
-            - same_inits: slice to a common init frame prior to computing
-            metric. This philosophy follows the thought that each lead should be based
-            on the same set of initializations.
-            - same_verif: slice to a common/consistent verification time frame prior to
-            computing metric. This philosophy follows the thought that each lead
-            should be based on the same set of verification dates.
+            - ``maximize``: maximize the degrees of freedom by slicing
+                ``initialized`` and ``verif`` to a common time frame at each lead.
+            - ``same_inits``: slice to a common init frame prior to computing
+                metric. This philosophy follows the thought that each lead should be
+                based on the same set of initializations.
+            - ``same_verif``: slice to a common/consistent verification time frame
+                prior to computing metric. This philosophy follows the thought that
+                each lead should be based on the same set of verification dates.
 
         dim (str or list of str): dimension to apply metric over.
         ** metric_kwargs (dict): additional keywords to be passed to metric
@@ -351,14 +354,14 @@ def compute_persistence_from_first_lead(
             Default: 'pearson_r'
         alignment (str): which inits or verification times should be aligned?
 
-            - ``maximize``: maximize the degrees of freedom by slicing ``initialized`` and
-              ``verif`` to a common time frame at each lead.
-            - ``same_inits``: slice to a common init frame prior to computing
-              metric. This philosophy follows the thought that each lead should be based
-              on the same set of initializations.
-            - ``same_verif``: slice to a common/consistent verification time frame prior to
-              computing metric. This philosophy follows the thought that each lead
-              should be based on the same set of verification dates.
+            - ``maximize``: maximize the degrees of freedom by slicing ``initialized``
+                and ``verif`` to a common time frame at each lead.
+            - ``same_inits``: slice to a common ``init`` frame prior to computing
+                metric. This philosophy follows the thought that each lead should be
+                based on the same set of initializations.
+            - ``same_verif``: slice to a common/consistent verification time frame
+                prior to computing metric. This philosophy follows the thought that
+                each lead should be based on the same set of verification dates.
 
         dim (str or list of str): dimension to apply metric over.
         ** metric_kwargs (dict): additional keywords to be passed to metric
@@ -369,10 +372,17 @@ def compute_persistence_from_first_lead(
             applied.
 
     Example:
-        >>> with climpred.set_options(PerfectModel_persistence_from_initialized_lead_0=True):
-        ...     PerfectModelEnsemble.verify(metric="mse", comparison="m2e",
-        ...         dim=["init", "member"], reference="persistence"
-        ...     ).sel(skill='persistence')  # persistence sensitive to comparison
+        >>> with climpred.set_options(
+        ...     PerfectModel_persistence_from_initialized_lead_0=True
+        ... ):
+        ...     PerfectModelEnsemble.verify(
+        ...         metric="mse",
+        ...         comparison="m2e",
+        ...         dim=["init", "member"],
+        ...         reference="persistence",
+        ...     ).sel(
+        ...         skill="persistence"
+        ...     )  # persistence sensitive to comparison
         <xarray.Dataset>
         Dimensions:  (lead: 20)
         Coordinates:
@@ -392,10 +402,17 @@ def compute_persistence_from_first_lead(
             PerfectModel_persistence_from_initialized_lead_0:  True
 
 
-        >>> with climpred.set_options(PerfectModel_persistence_from_initialized_lead_0=False):
-        ...     PerfectModelEnsemble.verify(metric="mse", comparison="m2e",
-        ...         dim=["init", "member"], reference="persistence"
-        ...     ).sel(skill='persistence')  # persistence not sensitive to comparison
+        >>> with climpred.set_options(
+        ...     PerfectModel_persistence_from_initialized_lead_0=False
+        ... ):
+        ...     PerfectModelEnsemble.verify(
+        ...         metric="mse",
+        ...         comparison="m2e",
+        ...         dim=["init", "member"],
+        ...         reference="persistence",
+        ...     ).sel(
+        ...         skill="persistence"
+        ...     )  # persistence not sensitive to comparison
         <xarray.Dataset>
         Dimensions:  (lead: 20)
         Coordinates:
@@ -475,24 +492,24 @@ def compute_uninitialized(
             data.
         comparison (str):
             How to compare the uninitialized ensemble to the verification data:
-                * e2o : ensemble mean to verification data (Default)
-                * m2o : each member to the verification data
+                * `"e2o"` : ensemble mean to verification data (Default)
+                * `"m2o"` : each member to the verification data
         dim (str or list of str): dimension to apply metric over.
         alignment (str): which inits or verification times should be aligned?
 
-            - maximize/None: maximize the degrees of freedom by slicing ``initialized`` and
-            ``verif`` to a common time frame at each lead.
-            - same_inits: slice to a common init frame prior to computing
+            - ``maximize``: maximize the degrees of freedom by slicing ``initialized``
+                and ``verif`` to a common time frame at each lead.
+            - ``same_inits``: slice to a common init frame prior to computing
             metric. This philosophy follows the thought that each lead should be based
             on the same set of initializations.
-            - same_verif: slice to a common/consistent verification time frame prior to
-            computing metric. This philosophy follows the thought that each lead
-            should be based on the same set of verification dates.
+            - ``same_verif``: slice to a common/consistent verification time frame
+                prior to computing metric. This philosophy follows the thought that
+                each lead should be based on the same set of verification dates.
 
         ** metric_kwargs (dict): additional keywords to be passed to metric
 
     Returns:
-        u (xarray.Dataset): Results from comparison at the first lag.
+        uninit_skill (xarray.Dataset): Results from comparison at the first lag.
 
     """
     if isinstance(dim, str):
diff --git a/climpred/smoothing.py b/climpred/smoothing.py
index 5476e0705..883b36ed1 100644
--- a/climpred/smoothing.py
+++ b/climpred/smoothing.py
@@ -1,3 +1,7 @@
+"""Spatial/temporal smoothing implemented in PredictionEnsemble.smooth()."""
+
+from typing import Any, Dict, Optional
+
 import numpy as np
 import xarray as xr
 
@@ -9,55 +13,52 @@
     xe = None
 
 
-@is_xarray(0)
 def spatial_smoothing_xesmf(
-    ds,
-    d_lon_lat_kws={"lon": 5, "lat": 5},
-    method="bilinear",
-    periodic=False,
-    filename=None,
-    reuse_weights=False,
-    tsmooth_kws=None,
-    how=None,
-):
-    """
-    Quick regridding function. Adapted from
-    https://github.com/JiaweiZhuang/xESMF/pull/27/files#diff-b537ef68c98c2ec11e64e4803fe4a113R105.
+    ds: xr.Dataset,
+    d_lon_lat_kws: dict = {"lon": 5, "lat": 5},
+    method: str = "bilinear",
+    periodic: bool = False,
+    filename: str = None,
+    reuse_weights: bool = False,
+    tsmooth_kws: Optional[dict] = None,
+    how: str = None,
+) -> xr.Dataset:
+    """Quick regridding function.
+
+    Adapted from https://github.com/JiaweiZhuang/xESMF/pull/27/files#diff-b537ef68c98c2ec11e64e4803fe4a113R105.
 
     Args:
-        ds (xarray-object): Contain input and output grid coordinates.
+        ds: Contain input and output grid coordinates.
             Look for coordinates ``lon``, ``lat``, and optionally ``lon_b``,
             ``lat_b`` for conservative method. Also any coordinate which is C/F
             compliant, .i.e. standard_name in ['longitude', 'latitude'] is allowed.
             Shape can be 1D (Nlon,) and (Nlat,) for rectilinear grids,
             or 2D (Ny, Nx) for general curvilinear grids.
             Shape of bounds should be (N+1,) or (Ny+1, Nx+1).
-        d_lon_lat_kws (dict): optional
-            Longitude/Latitude step size (grid resolution); if not provided,
+        d_lon_lat_kws: Longitude/Latitude step size (grid resolution); if not provided,
             lon will equal 5 and lat will equal lon
-            (optional)
-        method (str): Regridding method. Options are:
+        method: Regridding method. Options are:
+
             - 'bilinear'
-            - 'conservative', **need grid corner information**
+            - 'conservative', **requires grid corner information**
             - 'patch'
             - 'nearest_s2d'
             - 'nearest_d2s'
-        periodic (bool): Periodic in longitude? Default to False. optional
+
+        periodic: Periodic in longitude? Defaults to ``False``.
             Only useful for global grids with non-conservative regridding.
             Will be forced to False for conservative regridding.
-        filename (str): Name for the weight file. (optional)
-            The default naming scheme is:
-                {method}_{Ny_in}x{Nx_in}_{Ny_out}x{Nx_out}.nc
-                e.g. bilinear_400x600_300x400.nc
-        reuse_weights (bool) Whether to read existing weight file to save
-            computing time. False by default. (optional)
-        tsmooth_kws (None): leads nowhere but consistent with `temporal_smoothing`.
-        how (None): leads nowhere but consistent with `temporal_smoothing`.
+        filename: Name for the weight file.
+            The default naming scheme is "{method}_{Ny_in}x{Nx_in}_{Ny_out}x{Nx_out}.nc"
+            e.g. "bilinear_400x600_300x400.nc"
+        reuse_weights: Whether to read existing weight file to save
+            computing time. Defaults to ``False``.
+        tsmooth_kws: leads nowhere but consistent with ``temporal_smoothing``.
+        how: leads nowhere but consistent with ``temporal_smoothing``.
 
-        Returns:
-            ds (xarray.object) regridded
+    Returns:
+        regridded
     """
-
     if xe is None:
         raise ImportError(
             "xesmf is not installed; see"
@@ -65,8 +66,7 @@ def spatial_smoothing_xesmf(
         )
 
     def _regrid_it(da, d_lon, d_lat, **kwargs):
-        """
-        Global 2D rectilinear grid centers and bounds
+        """Global 2D rectilinear grid centers and bounds.
 
         Args:
             da (xarray.DataArray): Contain input and output grid coords.
@@ -77,10 +77,10 @@ def _regrid_it(da, d_lon, d_lat, **kwargs):
                 Shape of bounds should be (N+1,) or (Ny+1, Nx+1).
             d_lon (float): Longitude step size, i.e. grid resolution
             d_lat (float): Latitude step size, i.e. grid resolution
+
         Returns:
-            da : xarray DataArray with coordinate values
+            da : xarray.DataArray with coordinate values
         """
-
         if "lon" in da.coords:
             lon = da.lon
         else:
@@ -88,7 +88,9 @@ def _regrid_it(da, d_lon, d_lat, **kwargs):
                 lon = da.cf["longitude"]
             except KeyError:
                 raise KeyError(
-                    "Could not find `lon` as coordinate or any C/F compliant `latitude` coordinate, see https://pangeo-xesmf.readthedocs.io and https://cf-xarray.readthedocs.io"
+                    "Could not find `lon` as coordinate or any C/F compliant"
+                    "`latitude` coordinate, see https://pangeo-xesmf.readthedocs.io "
+                    "and https://cf-xarray.readthedocs.io"
                 )
 
         if "lat" in da.coords:
@@ -98,7 +100,8 @@ def _regrid_it(da, d_lon, d_lat, **kwargs):
                 lat = da.cf["latitude"]
             except KeyError:
                 raise KeyError(
-                    "C/F compliant or `lat` as coordinate, see https://pangeo-xesmf.readthedocs.io"
+                    "C/F compliant or `lat` as coordinate, see "
+                    "https://pangeo-xesmf.readthedocs.io"
                 )
 
         grid_out = xr.Dataset(
@@ -134,38 +137,41 @@ def _regrid_it(da, d_lon, d_lat, **kwargs):
     return ds
 
 
-@is_xarray(0)
-def temporal_smoothing(ds, tsmooth_kws=None, how="mean", d_lon_lat_kws=None):
+def temporal_smoothing(
+    ds: xr.Dataset,
+    tsmooth_kws: Optional[dict] = None,
+    how: str = "mean",
+    d_lon_lat_kws: Optional[dict] = None,
+) -> xr.Dataset:
     """Apply temporal smoothing by creating rolling smooth-timestep means.
 
-    Reference:
-    * Goddard, L., A. Kumar, A. Solomon, D. Smith, G. Boer, P.
-     Gonzalez, V. Kharin, et al. “A Verification Framework for
-     Interannual - to - Decadal Predictions Experiments.” Climate
-     Dynamics 40, no. 1–2 (January 1, 2013): 245–72.
-     https://doi.org/10/f4jjvf.
-
     Args:
-        ds(xr.object): input.
-        tsmooth_kws(dict): length of smoothing of timesteps.
-            Defaults to {'time': 4} (see Goddard et al. 2013).
-        how(str): aggregation type for smoothing. default: 'mean'
-        d_lon_lat_kws (None): leads nowhere but consistent with
-            `spatial_smoothing_xesmf`.
+        ds: input to be smoothed.
+        tsmooth_kws: length of smoothing of timesteps.
+            Defaults to ``{'time': 4}`` (see Goddard et al. 2013).
+        how: aggregation type for smoothing. Allowed: ``["mean", "sum"]``.
+            Default: 'mean'
+        d_lon_lat_kws: leads nowhere but consistent with ``spatial_smoothing_xesmf``.
 
     Returns:
-        ds_smoothed(xr.object): input with `smooth` timesteps less
-            and labeling '1-(smooth-1)', '...', ... .
+        input with ``smooth`` timesteps less and
+        labeling ``'1-(smooth-1)', '...', ...`` .
+
+    Reference:
+        Goddard, L., A. Kumar, A. Solomon et al.
+        “A Verification Framework for Interannual to Decadal Predictions Experiments.”
+        Climate Dynamics 40, no. 1–2 (January 1, 2013): 245–72.
+        https://doi.org/10/f4jjvf.
 
     """
     # unpack dict
     if not isinstance(tsmooth_kws, dict):
         raise ValueError(
-            "Please provide tsmooth_kws as dict, found ", type(tsmooth_kws)
+            "Please provide `tsmooth_kws` as dict, found ", type(tsmooth_kws)
         )
     if not ("time" in tsmooth_kws or "lead" in tsmooth_kws):
         raise ValueError(
-            'tsmooth_kws doesnt contain a time dimension \
+            '`tsmooth_kws` doesnt contain a `time` dimension \
             (either "lead" or "time").',
             tsmooth_kws,
         )
@@ -187,19 +193,24 @@ def temporal_smoothing(ds, tsmooth_kws=None, how="mean", d_lon_lat_kws=None):
     return ds_smoothed
 
 
-def _reset_temporal_axis(ds_smoothed, tsmooth_kws, dim="lead", set_lead_center=True):
-    """Reduce and reset temporal axis. See temporal_smoothing(). Should be
-    used after calculation of skill to maintain readable labels for skill
+def _reset_temporal_axis(
+    ds_smoothed: xr.Dataset,
+    tsmooth_kws: Dict[str, int],
+    dim: str = "lead",
+    set_lead_center: bool = True,
+) -> xr.Dataset:
+    """Reduce and reset temporal axis. See temporal_smoothing.
+
+    Should be used after calculation of skill to maintain readable labels for skill
     computation.
 
     Args:
-        ds_smoothed (xarray object): Smoothed dataset.
-        tsmooth_kws (dict): Keywords smoothing is performed over.
-        dim (str): Dimension smoothing is performed over. Defaults to 'lead'.
-        set_center (bool): Whether to set new coord `{dim}_center`.
-            Defaults to True.
+        ds_smoothed: Smoothed dataset.
+        tsmooth_kws: Keywords smoothing is performed over.
+        dim: Dimension smoothing is performed over. Defaults to ``'lead'``.
+        set_center: Whether to set new coord `{dim}_center`. Defaults to `True`.
 
-    Returns:
+    Returns
         Smoothed Dataset with updated labels for smoothed temporal dimension.
     """
     # bugfix: actually tsmooth_kws should only dict
@@ -216,50 +227,47 @@ def _reset_temporal_axis(ds_smoothed, tsmooth_kws, dim="lead", set_lead_center=T
     return ds_smoothed
 
 
-def _set_center_coord(ds, dim="lead"):
+def _set_center_coord(ds: xr.Dataset, dim: str = "lead") -> xr.Dataset:
     """Set lead_center as a new coordinate."""
     new_dim = []
     old_dim = ds[dim].values
     for i in old_dim:
         new_dim.append(eval(i.replace("-", "+")) / 2)
-    new_dim = np.array(new_dim)
-    ds.coords[f"{dim}_center"] = (dim, new_dim)
+    ds.coords[f"{dim}_center"] = (dim, np.array(new_dim))
     return ds
 
 
-@is_xarray(0)
 def smooth_goddard_2013(
-    ds,
-    tsmooth_kws={"lead": 4},
-    d_lon_lat_kws={"lon": 5, "lat": 5},
-    how="mean",
-    **xesmf_kwargs,
-):
-    """Wrapper to smooth as suggested by Goddard et al. 2013:
-        - 4-year composites
-        - 5x5 degree regridding
+    ds: xr.Dataset,
+    tsmooth_kws: Dict[str, int] = {"lead": 4},
+    d_lon_lat_kws: Dict[str, int] = {"lon": 5, "lat": 5},
+    how: str = "mean",
+    **xesmf_kwargs: Any,
+) -> xr.Dataset:
+    """Wrapper to smooth as suggested by Goddard et al. 2013.
 
-    Reference:
-    * Goddard, L., A. Kumar, A. Solomon, D. Smith, G. Boer, P.
-        Gonzalez, V. Kharin, et al. “A Verification Framework for
-        Interannual - to - Decadal Predictions Experiments.” Climate
-        Dynamics 40, no. 1–2 (January 1, 2013): 245–72.
-        https: // doi.org / 10 / f4jjvf.
+    - 4-year composites
+    - 5x5 degree regridding
 
     Args:
-        ds(xr.object): input.
-        tsmooth_kws(dict): length of smoothing of timesteps (applies to ``lead``
-                          in forecast and ``time`` in verification data).
-                          Default: {'time': 4} (see Goddard et al. 2013).
-        d_lon_lat_kws (dict): target grid for regridding.
-                              Default: {'lon':5 , 'lat': 5}
-        how(str): aggregation type for smoothing. default: 'mean'
+        ds: input to be smoothed.
+        tsmooth_kws: length of smoothing of timesteps (applies to ``lead``
+            in forecast and ``time`` in verification data).
+            Default: ``{'time': 4}`` (see Goddard et al. 2013).
+        d_lon_lat_kws: target grid for regridding.
+            Default: ``{'lon':5 , 'lat': 5}``.
+        how: aggregation type for smoothing. Allowed: ``["mean", "sum"]``.
+            Default: ``'mean'``.
         **xesmf_kwargs (kwargs): kwargs passed to `spatial_smoothing_xesmf`.
 
     Returns:
-        ds_smoothed_regridded (xr.object): input with `smooth` timesteps less
-                                           and labeling '1-(smooth-1)', '...' .
+        input with `smooth` timesteps less and labeling '1-(smooth-1)', '...' .
 
+    Reference:
+        Goddard, L., A. Kumar, A. Solomon et al.
+        “A Verification Framework for Interannual to Decadal Predictions Experiments.”
+        Climate Dynamics 40, no. 1–2 (January 1, 2013): 245–72.
+        https://doi.org/10/f4jjvf.
     """
     # first temporal smoothing
     ds_smoothed = temporal_smoothing(ds, tsmooth_kws=tsmooth_kws)
diff --git a/climpred/tutorial.py b/climpred/tutorial.py
index 9b487f228..62de3da75 100644
--- a/climpred/tutorial.py
+++ b/climpred/tutorial.py
@@ -73,8 +73,8 @@
     "observed RMM with interannual variablity included",
     "S2S ECMWF on-the-fly hindcasts from the S2S Project for Germany",
     "CPC/ERA5 observations for S2S forecasts over Germany",
-    "monthly multi-member hindcasts of sea-surface temperature averaged over the Nino3.4 region from the NMME project from IRIDL",
-    "monthly Reyn_SmithOIv2 sea-surface temperature observations averaged over the Nino3.4 region",
+    "monthly multi-member hindcasts of sea-surface temperature averaged over the Nino3.4 region from the NMME project from IRIDL",  # noqa: E501
+    "monthly Reyn_SmithOIv2 sea-surface temperature observations averaged over the Nino3.4 region",  # noqa: E501
 ]
 
 FILE_ALIAS_DICT = dict(zip(aliases, true_file_names))
@@ -152,8 +152,8 @@ def load_dataset(
 
     Examples:
         >>> from climpred.tutorial import load_dataset
-        >>> proxy_dict = {'http': '127.0.0.1'}
-        >>> ds = load_dataset('FOSI-SST', cache=False, proxy_dict=proxy_dict)
+        >>> proxy_dict = {"http": "127.0.0.1"}
+        >>> ds = load_dataset("FOSI-SST", cache=False, proxy_dict=proxy_dict)
     """
     if name is None:
         return _get_datasets()
diff --git a/setup.cfg b/setup.cfg
index c27bc77c5..f31e4bcf4 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -56,6 +56,14 @@ markers =
 [aliases]
 test = pytest
 
+[pydocstyle]
+inherit = false
+ignore = W503
+# match = .*\.py
+
+[doc8]
+# ignore-path=/tmp/stuff,/tmp/other_stuff
+max-line-length=93
 
 [mypy]
 exclude = asv_bench|doc

From 70c8314e9e20f7109dd510c2c61cbd0017045493 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Thu, 9 Dec 2021 13:14:40 +0100
Subject: [PATCH 05/56] smoothing done

---
 climpred/classes.py   | 3 +--
 climpred/smoothing.py | 4 ++--
 setup.cfg             | 5 +----
 3 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/climpred/classes.py b/climpred/classes.py
index 6a0d6bf37..517ae6b4e 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -756,6 +756,7 @@ def smooth(
             return self
         tsmooth_kws: Optional[Union[str, Dict[str, int]]] = None
         d_lon_lat_kws: Optional[Union[str, Dict[str, int]]] = None
+        smooth_fct: Callable[..., xr.Dataset]
         # get proper smoothing function based on smooth args
         if isinstance(smooth_kws, str):
             if "goddard" in smooth_kws:
@@ -1869,8 +1870,6 @@ def plot_alignment(
         except ImportError:
             raise ValueError("nc_time_axis>1.4.0 required for plotting.")
 
-    from .docstrings import comparison_docstring
-
     def verify(
         self,
         metric: metricType = None,
diff --git a/climpred/smoothing.py b/climpred/smoothing.py
index 883b36ed1..999e1097e 100644
--- a/climpred/smoothing.py
+++ b/climpred/smoothing.py
@@ -244,7 +244,7 @@ def smooth_goddard_2013(
     how: str = "mean",
     **xesmf_kwargs: Any,
 ) -> xr.Dataset:
-    """Wrapper to smooth as suggested by Goddard et al. 2013.
+    """Wrap to smooth as suggested by Goddard et al. 2013.
 
     - 4-year composites
     - 5x5 degree regridding
@@ -258,7 +258,7 @@ def smooth_goddard_2013(
             Default: ``{'lon':5 , 'lat': 5}``.
         how: aggregation type for smoothing. Allowed: ``["mean", "sum"]``.
             Default: ``'mean'``.
-        **xesmf_kwargs (kwargs): kwargs passed to `spatial_smoothing_xesmf`.
+        **xesmf_kwargs: kwargs passed to `spatial_smoothing_xesmf`.
 
     Returns:
         input with `smooth` timesteps less and labeling '1-(smooth-1)', '...' .
diff --git a/setup.cfg b/setup.cfg
index f31e4bcf4..ccb9bf454 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -57,12 +57,9 @@ markers =
 test = pytest
 
 [pydocstyle]
-inherit = false
-ignore = W503
-# match = .*\.py
+
 
 [doc8]
-# ignore-path=/tmp/stuff,/tmp/other_stuff
 max-line-length=93
 
 [mypy]

From 555ff85819428f0c3f04b4f1f7cf039a50bb9c6a Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Thu, 9 Dec 2021 17:27:56 +0100
Subject: [PATCH 06/56] classes.py

---
 climpred/checks.py    |   2 +-
 climpred/classes.py   | 604 ++++++++++++++++++++++--------------------
 climpred/smoothing.py |  20 +-
 climpred/tutorial.py  |  57 ++--
 docs/source/conf.py   |  16 +-
 5 files changed, 369 insertions(+), 330 deletions(-)

diff --git a/climpred/checks.py b/climpred/checks.py
index 0239bed5a..6152218e4 100644
--- a/climpred/checks.py
+++ b/climpred/checks.py
@@ -329,7 +329,7 @@ def _check_valid_reference(reference: Optional[Union[List[str], str]]) -> List[s
     return reference
 
 
-def _check_valud_alignment(alignment):
+def _check_valid_alignment(alignment):
     if alignment not in VALID_ALIGNMENTS:
         raise ValueError(
             f"Please provide alignment from {VALID_ALIGNMENTS}, "
diff --git a/climpred/classes.py b/climpred/classes.py
index 517ae6b4e..1b28e72ff 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -1,3 +1,5 @@
+"""Main module instantiating ``PerfectModelEnsemble`` and ``HindcastEnsemble."""
+
 import warnings
 from copy import deepcopy
 from typing import (
@@ -34,14 +36,13 @@
     resample_uninitialized_from_initialized,
 )
 from .checks import (
+    _check_valid_alignment,
     _check_valid_reference,
-    _check_valud_alignment,
     attach_long_names,
     attach_standard_names,
     has_dataset,
     has_dims,
     has_valid_lead_units,
-    is_xarray,
     match_calendars,
     match_initialized_dims,
     match_initialized_vars,
@@ -104,12 +105,13 @@
 
 def _display_metadata(self) -> str:
     """
-    This is called in the following case:
+    Print the contents of the ``PredictionEnsemble`` as text.
+
+    Example:
+        >>> init = climpred.tutorial.load_dataset("CESM-DP-SST")
+        >>> hindcast = climpred.HindcastEnsemble(init)
+        >>> print(hindcast)
 
-    ```
-    dp = cp.HindcastEnsemble(dple)
-    print(dp)
-    ```
     """
     SPACE = "    "
     header = f"<climpred.{type(self).__name__}>"
@@ -155,6 +157,7 @@ def _display_metadata(self) -> str:
 
 
 def _display_metadata_html(self) -> str:
+    """Print the contents of the ``PredictionEnsemble`` as html."""
     header = f"<h4>climpred.{type(self).__name__}</h4>"
     display_html(header, raw=True)
     init_repr_str = dataset_repr(self._datasets["initialized"])
@@ -186,43 +189,45 @@ def _display_metadata_html(self) -> str:
 
 class PredictionEnsemble:
     """
-    The main object. This is the super of both `PerfectModelEnsemble` and
-    `HindcastEnsemble`. This cannot be called directly by a user, but
+    The main object ``PredictionEnsemble``.
+
+    This is the super of both ```PerfectModelEnsemble`` and
+    ```HindcastEnsemble``. This cannot be called directly by a user, but
     should house functions that both ensemble types can use.
     """
 
-    @is_xarray(1)
-    def __init__(self, xobj: Union[xr.DataArray, xr.Dataset]):
-        if isinstance(xobj, xr.DataArray):
+    def __init__(self, initialized: Union[xr.DataArray, xr.Dataset]):
+        """Create a ``PredictionEnsemble`` object."""
+        if isinstance(initialized, xr.DataArray):
             # makes applying prediction functions easier, etc.
-            xobj = xobj.to_dataset()
-        xobj = rename_to_climpred_dims(xobj)
-        has_dims(xobj, ["init", "lead"], "PredictionEnsemble")
+            initialized = initialized.to_dataset()
+        initialized = rename_to_climpred_dims(initialized)
+        has_dims(initialized, ["init", "lead"], "PredictionEnsemble")
         # Check that init is int, cftime, or datetime; convert ints or cftime to
         # datetime.
-        xobj = convert_time_index(xobj, "init", "xobj[init]")
+        initialized = convert_time_index(initialized, "init", "initialized[init]")
         # Put this after `convert_time_index` since it assigns 'years' attribute if the
         # `init` dimension is a `float` or `int`.
-        xobj = convert_Timedelta_to_lead_units(xobj)
-        has_valid_lead_units(xobj)
-        xobj = add_time_from_init_lead(xobj)
+        initialized = convert_Timedelta_to_lead_units(initialized)
+        has_valid_lead_units(initialized)
+        initialized = add_time_from_init_lead(initialized)
         # add metadata
-        xobj = attach_standard_names(xobj)
-        xobj = attach_long_names(xobj)
-        xobj = xobj.cf.add_canonical_attributes(
+        initialized = attach_standard_names(initialized)
+        initialized = attach_long_names(initialized)
+        initialized = initialized.cf.add_canonical_attributes(
             verbose=False, override=True, skip="units"
         )
-        del xobj.attrs["history"]  # better only delete xclim message or not?
+        del initialized.attrs["history"]  # better only delete xclim message or not?
         # Add initialized dictionary and reserve sub-dictionary for an uninitialized
         # run.
-        self._datasets = {"initialized": xobj, "uninitialized": {}}
+        self._datasets = {"initialized": initialized, "uninitialized": {}}
         self.kind = "prediction"
         self._temporally_smoothed: Optional[Dict[str, int]] = None
         self._is_annual_lead = None
         self._warn_if_chunked_along_init_member_time()
 
     def _groupby(self, call: str, groupby: Union[str, xr.DataArray], **kwargs: Any):
-        """Helper for verify/bootstrap(groupby='month')"""
+        """Help for verify/bootstrap(groupby="month")."""
         skill_group, group_label = [], []
         groupby_str = f"init.{groupby}" if isinstance(groupby, str) else groupby
         for group, hind_group in self.get_initialized().init.groupby(groupby_str):
@@ -246,8 +251,13 @@ def _groupby(self, call: str, groupby: Union[str, xr.DataArray], **kwargs: Any):
 
     @property
     def coords(self) -> DatasetCoordinates:
-        """Dictionary of xarray.DataArray objects corresponding to coordinate
+        """Return coordinates of ``PredictionEnsemble``.
+
+        Dictionary of xarray.DataArray objects corresponding to coordinate
         variables available in all PredictionEnsemble._datasets.
+
+        See also:
+            :py:meth:`~xarray.Dataset.coords`
         """
         pe_coords = self.get_initialized().coords.to_dataset()
         for ds in self._datasets.values():
@@ -257,7 +267,11 @@ def coords(self) -> DatasetCoordinates:
 
     @property
     def nbytes(self) -> int:
-        """Bytes sizes of all PredictionEnsemble._datasets."""
+        """Bytes sizes of all PredictionEnsemble._datasets.
+
+        See also:
+            :py:meth:`~xarray.Dataset.nbytes`
+        """
         return sum(
             [
                 sum(v.nbytes for v in ds.variables.values())
@@ -268,8 +282,14 @@ def nbytes(self) -> int:
 
     @property
     def sizes(self) -> Mapping[Hashable, int]:
-        """Mapping from dimension names to lengths for all
-        PredictionEnsemble._datasets."""
+        """
+        Return sizes of ``PredictionEnsemble``.
+
+        Mapping from dimension names to lengths for all PredictionEnsemble._datasets.
+
+        See also:
+            :py:meth:`~xarray.Dataset.equals`
+        """
         pe_dims = dict(self.get_initialized().dims)
         for ds in self._datasets.values():
             if isinstance(ds, xr.Dataset):
@@ -278,12 +298,26 @@ def sizes(self) -> Mapping[Hashable, int]:
 
     @property
     def dims(self) -> Mapping[Hashable, int]:
-        """Mapping from dimension names to lengths all PredictionEnsemble._datasets."""
+        """
+        Return dimension of ``PredictionEnsemble``.
+
+        Mapping from dimension names to lengths all PredictionEnsemble._datasets.
+
+        See also:
+            :py:meth:`~xarray.Dataset.dims`
+        """
         return Frozen(self.sizes)
 
     @property
     def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
-        """Mapping from chunks all PredictionEnsemble._datasets."""
+        """
+        Return chunks of ``PredictionEnsemble``.
+
+        Mapping from chunks all PredictionEnsemble._datasets.
+
+        See also:
+            :py:meth:`~xarray.Dataset.chunks`
+        """
         pe_chunks = dict(self.get_initialized().chunks)
         for ds in self._datasets.values():
             if isinstance(ds, xr.Dataset):
@@ -294,17 +328,29 @@ def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
 
     @property
     def chunksizes(self) -> Mapping[Hashable, Tuple[int, ...]]:
-        """Mapping from dimension names to block lengths for this dataset's data, or
+        """Return chunksizes of ``PredictionEnsemble``.
+
+        Mapping from dimension names to block lengths for this dataset's data, or
         None if the underlying data is not a dask array.
         Cannot be modified directly, but can be modified by calling .chunk().
         Same as Dataset.chunks.
+
+        See also:
+            :py:meth:`~xarray.Dataset.chunksizes`
         """
         return self.chunks
 
     @property
     def data_vars(self) -> DataVariables:
-        """Dictionary of DataArray objects corresponding to data variables available in
-        all PredictionEnsemble._datasets."""
+        """
+        Return data variables of ``PredictionEnsemble``.
+
+        Dictionary of DataArray objects corresponding to data variables available in
+        all PredictionEnsemble._datasets.
+
+        See also:
+            :py:meth:`~xarray.Dataset.data_vars`
+        """
         varset = set(self.get_initialized().data_vars)
         for ds in self._datasets.values():
             if isinstance(ds, xr.Dataset):
@@ -316,22 +362,22 @@ def data_vars(self) -> DataVariables:
     # when you just print it interactively
     # https://stackoverflow.com/questions/1535327/how-to-print-objects-of-class-using-print
     def __repr__(self) -> str:
+        """Return for print(PredictionEnsemble)."""
         if XR_OPTIONS["display_style"] == "html":
             return _display_metadata_html(self)
         else:
             return _display_metadata(self)
 
     def __len__(self) -> int:
-        """Number of all variables in all PredictionEnsemble._datasets."""
+        """Return number of all variables in ``PredictionEnsemble``."""
         return len(self.data_vars)
 
     def __iter__(self) -> Iterator[Hashable]:
-        """Iterate over underlying xr.Datasets for initialized, uninitialized,
-        observations or initialized, uninitialized, control."""
+        """Iterate over underlying xarray.Datasets."""
         return iter(self._datasets.values())
 
     def __delitem__(self, key: Hashable) -> None:
-        """Remove a variable from this PredictionEnsemble."""
+        """Remove a variable from ``PredictionEnsemble``."""
         del self._datasets["initialized"][key]
         for ds in self._datasets.values():
             if isinstance(ds, xr.Dataset):
@@ -339,8 +385,10 @@ def __delitem__(self, key: Hashable) -> None:
                     del ds[key]
 
     def __contains__(self, key: Hashable) -> bool:
-        """The 'in' operator will return true or false depending on whether
-        'key' is an array in all PredictionEnsemble._datasets or not.
+        """Check variable in ``PredictionEnsemble``.
+
+        The ``"in"`` operator will return true or false depending on whether
+        ``"key"`` is in any PredictionEnsemble._datasets.
         """
         contained = True
         for ds in self._datasets.values():
@@ -350,12 +398,18 @@ def __contains__(self, key: Hashable) -> bool:
         return contained
 
     def equals(self, other: Union["PredictionEnsemble", Any]) -> bool:
-        """Two PredictionEnsembles are equal if they have matching variables and
+        """Check if ``PredictionEnsemble`` is equal to other ``PredictionEnsemble``.
+
+        Two ``PredictionEnsemble``s are equal if they have matching variables and
         coordinates, all of which are equal.
-        PredictionEnsembles can still be equal (like pandas objects) if they have NaN
+        ``PredictionEnsembles`` can still be equal (like pandas objects) if they have NaN
         values in the same locations.
         This method is necessary because `v1 == v2` for ``PredictionEnsembles``
-        does element-wise comparisons (like numpy.ndarrays)."""
+        does element-wise comparisons (like numpy.ndarrays).
+
+        See also:
+            :py:meth:`~xarray.Dataset.equals`
+        """
         if not isinstance(other, PredictionEnsemble):
             return False
         if other.kind != self.kind:
@@ -371,8 +425,15 @@ def equals(self, other: Union["PredictionEnsemble", Any]) -> bool:
         return equal
 
     def identical(self, other: Union["PredictionEnsemble", Any]) -> bool:
-        """Like equals, but also checks all dataset attributes and the
-        attributes on all variables and coordinates."""
+        """
+        Check if ``PredictionEnsemble`` is identical to other ``PredictionEnsemble``.
+
+        Like ``equals``, but also checks all dataset attributes and the
+        attributes on all variables and coordinates.
+
+        See also:
+            :py:meth:`~xarray.Dataset.identical`
+        """
         if not isinstance(other, PredictionEnsemble):
             return False
         if other.kind != self.kind:
@@ -394,17 +455,17 @@ def plot(
         cmap: Optional[str] = None,
         x: str = "time",
     ) -> "plt.Axes":
-        """Plot datasets from PredictionEnsemble.
+        """Plot datasets from ``PredictionEnsemble``.
 
         Args:
-            variable (str or None): `variable` to show. Defaults to first in data_vars.
-            ax (plt.axes): Axis to use in plotting. By default, creates a new axis.
-            show_members (bool): whether to display all members individually.
+            variable: `variable` to show. Defaults to first in data_vars.
+            ax: Axis to use in plotting. By default, creates a new axis.
+            show_members: whether to display all members individually.
                 Defaults to False.
-            cmap (str): Name of matplotlib-recognized colorbar. Defaults to `viridis`
+            cmap: Name of matplotlib-recognized colorbar. Defaults to `viridis`
                 for :py:class:`~climpred.classes.HindcastEnsemble`
                 and ``tab10`` for :py:class:`~climpred.classes.PerfectModelEnsemble`.
-            x (str): Name of x-axis. Use ``'time'`` to show observations and
+            x: Name of x-axis. Use ``'time'`` to show observations and
                 hindcasts in real time. Use ``'init'`` to see hindcasts as
                 initializations. For ``x='init'`` only initialized is shown and only
                 works for :py:class:`~climpred.classes.HindcastEnsemble`.
@@ -450,7 +511,7 @@ def _math(
         other: mathType,
         operator: str,
     ):
-        """Helper function for __add__, __sub__, __mul__, __truediv__.
+        """Help function for __add__, __sub__, __mul__, __truediv__.
 
         Allows math operations with type:
             - int
@@ -529,23 +590,26 @@ def div(a, b):
             return self._apply_func(_operator, other)
 
     def __add__(self, other: mathType) -> "PredictionEnsemble":
+        """Add."""
         return self._math(other, operator="add")
 
     def __sub__(self, other: mathType) -> "PredictionEnsemble":
+        """Sub."""
         return self._math(other, operator="sub")
 
     def __mul__(self, other: mathType) -> "PredictionEnsemble":
+        """Mul."""
         return self._math(other, operator="mul")
 
     def __truediv__(self, other: mathType) -> "PredictionEnsemble":
+        """Div."""
         return self._math(other, operator="div")
 
     def __getitem__(self, varlist: Union[str, List[str]]) -> "PredictionEnsemble":
-        """Allows subsetting data variable from PredictionEnsemble as from xr.Dataset.
+        """Allow subsetting variable(s) from ``PredictionEnsemble`` as from xr.Dataset.
 
         Args:
-            * varlist (list of str, str): list of names or name of data variable(s) to
-                subselect
+            * varlist: list of names or name of data variable(s) to subselect
         """
         if isinstance(varlist, str):
             varlist = [varlist]
@@ -564,22 +628,21 @@ def sel_vars(ds, varlist):
     def __getattr__(
         self, name: str
     ) -> Callable:  # -> Callable[[VarArg(Any), KwArg(Any)], Any]
-        """Allows for xarray methods to be applied to our prediction objects.
+        """Allow for ``xarray`` methods to be applied to our prediction objects.
 
         Args:
-            * name: Function, e.g., .isel() or .sum().
+            * name: str of xarray function, e.g., ``.isel()`` or ``.sum()``.
         """
 
         def wrapper(*args, **kwargs):
-            """Applies arbitrary function to all datasets in the PredictionEnsemble
-            object.
+            """Apply arbitrary function to all datasets in ``PredictionEnsemble``.
 
             Got this from: https://stackoverflow.com/questions/41919499/
             how-to-call-undefined-methods-sequentially-in-python-class
             """
 
             def _apply_xr_func(v, name, *args, **kwargs):
-                """Handles exceptions in our dictionary comprehension.
+                """Handle exceptions in our dictionary comprehension.
 
                 In other words, this will skip applying the arbitrary function
                 to a sub-dataset if a ValueError is thrown. This specifically
@@ -639,8 +702,7 @@ def _apply_xr_func(v, name, *args, **kwargs):
 
     @classmethod
     def _construct_direct(cls, datasets, kind):
-        """Shortcut around __init__ for internal use to avoid inplace
-        operations.
+        """Shortcut around __init__ for internal use to avoid inplace operations.
 
         Pulled from xarrray Dataset class.
         https://github.com/pydata/xarray/blob/master/xarray/core/dataset.py
@@ -652,9 +714,9 @@ def _construct_direct(cls, datasets, kind):
         return obj
 
     def _apply_func(
-        self, func: Callable[..., Any], *args: Any, **kwargs: Any
+        self, func: Callable[..., xr.Dataset], *args: Any, **kwargs: Any
     ) -> "PredictionEnsemble":
-        """Apply a function to all datasets in a `PredictionEnsemble`."""
+        """Apply a function to all datasets in a ``PredictionEnsemble``."""
         # Create temporary copy to modify to avoid inplace operation.
         # isnt that essentially the same as .map(func)?
         datasets = self._datasets.copy()
@@ -685,32 +747,31 @@ def _apply_func(
         return self._construct_direct(datasets, kind=self.kind)
 
     def get_initialized(self) -> xr.Dataset:
-        """Returns the xarray dataset for the initialized ensemble."""
+        """Return the xarray.Dataset for the initialized ensemble."""
         return self._datasets["initialized"]
 
     def get_uninitialized(self) -> xr.Dataset:
-        """Returns the xarray dataset for the uninitialized ensemble."""
+        """Return the xarray.Dataset for the uninitialized ensemble."""
         return self._datasets["uninitialized"]
 
     def smooth(
         self,
         smooth_kws: Optional[Union[str, Dict[str, int]]] = None,
         how: str = "mean",
-        **xesmf_kwargs: Any,
+        **xesmf_kwargs: str,
     ):
-        """Smooth all entries of PredictionEnsemble in the same manner to be
-        able to still calculate prediction skill afterwards.
+        """Smooth in space/aggregate in time ``PredictionEnsemble``.
 
         Args:
-            smooth_kws (dict or str): Dictionary to specify the dims to
+            smooth_kws: Dictionary to specify the dims to
                 smooth compatible with
                 :py:func:`~climpred.smoothing.spatial_smoothing_xesmf` or
                 :py:func:`~climpred.smoothing.temporal_smoothing`.
                 Shortcut for Goddard et al. 2013 recommendations:
                 'goddard2013'. Defaults to None.
-            how (str): how to smooth temporally. From ['mean','sum']. Defaults to
+            how: how to smooth temporally. From ['mean','sum']. Defaults to
                 'mean'.
-            **xesmf_kwargs (args): kwargs passed to
+            **xesmf_kwargs: kwargs passed to
                 :py:func:`~climpred.smoothing.spatial_smoothing_xesmf`
 
         Examples:
@@ -831,9 +892,9 @@ def remove_seasonality(
         """Remove seasonal cycle from all climpred datasets.
 
         Args:
-            initialized_dim (str): dimension name of initialized dataset to calculate
+            initialized_dim: dimension name of initialized dataset to calculate
                 climatology over. Defaults to "init".
-            seasonality (str): Seasonality to be removed. Choose from:
+            seasonality: Seasonality to be removed. Choose from:
                 ["season", "month", "dayofyear"]. Defaults to OPTIONS["seasonality"].
 
         Examples:
@@ -849,7 +910,7 @@ def remove_seasonality(
         """
 
         def _remove_seasonality(ds, initialized_dim="init", seasonality=None):
-            """Remove the seasonal cycle from the data"""
+            """Remove the seasonal cycle from the data."""
             if ds is {}:
                 return {}
             if seasonality is None:
@@ -870,9 +931,12 @@ def _remove_seasonality(ds, initialized_dim="init", seasonality=None):
         )
 
     def _warn_if_chunked_along_init_member_time(self) -> None:
-        """Warn upon instantiation when ``CLIMPRED_DIMS`` except ``lead`` are chunked
-        with more than one chunk to show how to circumvent ``xskillscore`` chunking
-        ``ValueError``."""
+        """
+        Warn when ``CLIMPRED_DIMS`` except ``lead`` are wrongly chunked.
+
+        When more than one chunk to show how to circumvent ``xskillscore`` chunking
+        ``ValueError``.
+        """
         suggest_one_chunk = []
         for d in self.chunks:
             if d in ["time", "init", "member"]:
@@ -919,23 +983,21 @@ def _warn_if_chunked_along_init_member_time(self) -> None:
 
 
 class PerfectModelEnsemble(PredictionEnsemble):
-    """An object for "perfect model" climate prediction ensembles.
+    """An object for "perfect model" prediction ensembles.
 
-    `PerfectModelEnsemble` is a sub-class of `PredictionEnsemble`. It tracks
+    ``PerfectModelEnsemble`` is a sub-class of ``PredictionEnsemble``. It tracks
     the control run used to initialize the ensemble for easy computations,
     bootstrapping, etc.
 
-    This object is built on `xarray` and thus requires the input object to
-    be an `xarray` Dataset or DataArray.
+    This object is built on ``xarray`` and thus requires the input object to
+    be an xarray.Dataset or xr.DataArray.
     """
 
-    def __init__(self, xobj: Union[xr.DataArray, xr.Dataset]) -> None:
-        """Create a `PerfectModelEnsemble` object by inputting output from the
-        control run in `xarray` format.
+    def __init__(self, initialized: Union[xr.DataArray, xr.Dataset]) -> None:
+        """Create a ``PerfectModelEnsemble`` object.
 
         Args:
-          xobj (xarray object):
-            decadal prediction ensemble output.
+          initialized: prediction ensemble output.
 
         Attributes:
             control: Dictionary of control run associated with the initialized
@@ -943,24 +1005,22 @@ def __init__(self, xobj: Union[xr.DataArray, xr.Dataset]) -> None:
             uninitialized: Dictionary of uninitialized run that is
                            bootstrapped from the initialized run.
         """
-
-        super().__init__(xobj)
+        super().__init__(initialized)
         # Reserve sub-dictionary for the control simulation.
         self._datasets.update({"control": {}})
         self.kind = "perfect"
 
     def _apply_climpred_function(
         self,
-        func: Callable[..., Any],
+        func: Callable[..., xr.Dataset],
         input_dict: Dict[str, Any],
         **kwargs: Any,
     ) -> Union["PerfectModelEnsemble", xr.Dataset]:
-        """Helper function to loop through observations and apply an arbitrary climpred
-        function.
+        """Loop through observations and apply an arbitrary climpred function.
 
         Args:
-            func (function): climpred function to apply to object.
-            input_dict (dict): dictionary with the following things:
+            func: climpred function to apply to object.
+            input_dict: dictionary with the following things:
                 * ensemble: initialized or uninitialized ensemble.
                 * control: control dictionary from HindcastEnsemble.
                 * init (bool): True if the initialized ensemble, False if uninitialized.
@@ -975,8 +1035,7 @@ def _apply_climpred_function(
         return func(ensemble, control, **kwargs)
 
     def _vars_to_drop(self, init: bool = True) -> Tuple[List[str], List[str]]:
-        """Returns list of variables to drop when comparing
-        initialized/uninitialized to a control.
+        """Return list of variables to drop when comparing datasets.
 
         This is useful if the two products being compared do not share the same
         variables. I.e., if the control has ['SST'] and the initialized has
@@ -1005,36 +1064,35 @@ def _vars_to_drop(self, init: bool = True) -> Tuple[List[str], List[str]]:
             init_vars_to_drop, ctrl_vars_to_drop = [], []
         return init_vars_to_drop, ctrl_vars_to_drop
 
-    @is_xarray(1)
     def add_control(
-        self, xobj: Union[xr.DataArray, xr.Dataset]
+        self, control: Union[xr.DataArray, xr.Dataset]
     ) -> "PerfectModelEnsemble":
-        """Add the control run that initialized the climate prediction
-        ensemble.
+        """Add the control run that initialized the prediction ensemble.
 
         Args:
-            xobj (xarray object): Dataset/DataArray of the control run.
+            control: control run.
         """
         # NOTE: These should all be decorators.
-        if isinstance(xobj, xr.DataArray):
-            xobj = xobj.to_dataset()
-        match_initialized_dims(self._datasets["initialized"], xobj)
-        match_initialized_vars(self._datasets["initialized"], xobj)
+        if isinstance(control, xr.DataArray):
+            control = control.to_dataset()
+        match_initialized_dims(self._datasets["initialized"], control)
+        match_initialized_vars(self._datasets["initialized"], control)
         # Check that init is int, cftime, or datetime; convert ints or cftime to
         # datetime.
-        xobj = convert_time_index(xobj, "time", "xobj[time]")
+        control = convert_time_index(control, "time", "control[time]")
         # Check that converted/original cftime calendar is the same as the
         # initialized calendar to avoid any alignment errors.
-        match_calendars(self._datasets["initialized"], xobj, kind2="control")
+        match_calendars(self._datasets["initialized"], control, kind2="control")
         datasets = self._datasets.copy()
-        datasets.update({"control": xobj})
+        datasets.update({"control": control})
         return self._construct_direct(datasets, kind="perfect")
 
     def generate_uninitialized(self) -> "PerfectModelEnsemble":
         """Generate an uninitialized ensemble by resampling from the control simulation.
 
         Returns:
-            PerfectModelEnsemble with resampled (uninitialized) ensemble from control
+            ``PerfectModelEnsemble`` with resampled (uninitialized) ensemble from
+            control
         """
         has_dataset(
             self._datasets["control"], "control", "generate an uninitialized ensemble."
@@ -1049,7 +1107,7 @@ def generate_uninitialized(self) -> "PerfectModelEnsemble":
         return self._construct_direct(datasets, kind="perfect")
 
     def get_control(self) -> xr.Dataset:
-        """Returns the control as an xarray dataset."""
+        """Return the control as an xarray.Dataset."""
         return self._datasets["control"]
 
     def verify(
@@ -1061,26 +1119,23 @@ def verify(
         groupby: groupbyType = None,
         **metric_kwargs: metric_kwargsType,
     ) -> xr.Dataset:
-        """Verify initialized predictions against a configuration of other ensemble members.
+        """Verify initialized predictions against a configuration of its members.
 
         .. note::
             The configuration of the other ensemble members is based off of the
             ``comparison`` keyword argument.
 
         Args:
-            metric (str, :py:class:`~climpred.metrics.Metric`): Metric to apply in the
-                comparison. See `metrics </metrics.html>`_.
-            comparison (str, :py:class:`~climpred.comparisons.Comparison`): How to
-                compare the initialized prediction ensemble with itself, see
-                `comparisons </comparisons.html>`_.
-            dim (str, list of str): Dimension(s) over which to apply ``metric``.
+            metric: Metric to apply for verification, see `metrics <metrics.html>`_
+            comparison: How to compare the initialized prediction ensemble with itself,
+                see `comparisons </comparisons.html>`_.
+            dim: Dimension(s) over which to apply ``metric``.
                 ``dim`` is passed on to xskillscore.{metric} and includes xskillscore's
                 ``member_dim``. ``dim`` should contain ``member`` when ``comparison``
                 is probabilistic but should not contain ``member`` when
                 ``comparison=e2c``. Defaults to ``None`` meaning that all dimensions
                 other than ``lead`` are reduced.
-            reference (str, list of str): Type of reference forecasts with which to
-                verify against.
+            reference: Type of reference forecasts with which to verify against.
                 One or more of ``["uninitialized", "persistence", "climatology"]``.
                 Defaults to ``None`` meaning no reference.
                 For ``persistence``, choose between
@@ -1089,8 +1144,8 @@ def verify(
                 ``set_options(PerfectModel_persistence_from_initialized_lead_0)=True``
                 using
                 :py:func:`~climpred.reference.compute_persistence_from_first_lead`.
-            groupby (str, xr.DataArray): group ``init`` before passing ``initialized`` to ``verify``.
-            **metric_kwargs (optional): Arguments passed to ``metric``.
+            groupby: group ``init`` before passing ``initialized`` to ``verify``.
+            **metric_kwargs: Arguments passed to ``metric``.
 
         Returns:
             Dataset with dimension skill reduced by dim containing initialized and
@@ -1220,18 +1275,16 @@ def _compute_uninitialized(
             ``comparison`` keyword argument.
 
         Args:
-            metric (str, :py:class:`~climpred.metrics.Metric`): Metric to apply in the
-                comparison. See `metrics </metrics.html>`_.
-            comparison (str, :py:class:`~climpred.comparisons.Comparison`): How to
-                compare the uninitialized against itself, see
+            metric: Metric to apply for verification, see `metrics <metrics.html>`_
+            comparison: How to compare the uninitialized against itself, see
                 `comparisons </comparisons.html>`_.
-            dim (str, list of str): Dimension(s) over which to apply metric.
+            dim: Dimension(s) over which to apply metric.
                 ``dim`` is passed on to xskillscore.{metric} and includes xskillscore's
                 ``member_dim``. ``dim`` should contain ``member`` when ``comparison``
                 is probabilistic but should not contain ``member`` when
                 ``comparison=e2c``. Defaults to ``None``, meaning that all dimensions
                 other than ``lead`` are reduced.
-            **metric_kwargs (optional): Arguments passed to ``metric``.
+            **metric_kwargs: Arguments passed to ``metric``.
 
         Returns:
             Dataset with dimension skill containing initialized and reference skill(s).
@@ -1272,19 +1325,21 @@ def _compute_persistence(
         """Verify a simple persistence forecast of the control run against itself.
 
         Note: uses :py:func:`~climpred.reference.compute_persistence_from_first_lead`
-        if ``set_options("PerfectModel_persistence_from_initialized_lead_0"=True)`` else
+        if ``set_options(PerfectModel_persistence_from_initialized_lead_0=True)`` else
         :py:func:`~climpred.reference.compute_persistence`.
 
         Args:
-            metric (str, :py:class:`~climpred.metrics.Metric`): Metric to use when
-            verifying skill of the persistence forecast. See `metrics </metrics.html>`_.
-            dim (str, list of str): Dimension(s) over which to apply metric.
+            metric: Metric to apply for verification, see `metrics <metrics.html>`_
+            comparison: How to compare the persistence against itself, see
+                `comparisons </comparisons.html>`_. Only valid if
+                ``PerfectModel_persistence_from_initialized_lead_0=True``.
+            dim: Dimension(s) over which to apply metric.
                 ``dim`` is passed on to xskillscore.{metric} and includes xskillscore's
                 ``member_dim``. ``dim`` should contain ``member`` when ``comparison``
                 is probabilistic but should not contain ``member`` when
                 ``comparison=e2c``. Defaults to ``None``, meaning that all dimensions
                 other than ``lead`` are reduced.
-            **metric_kwargs (optional): Arguments passed to ``metric``.
+            **metric_kwargs: Arguments passed to ``metric``.
 
         Returns:
             Dataset of persistence forecast results.
@@ -1312,7 +1367,7 @@ def _compute_persistence(
                 warnings.warn(
                     "You may also calculate persistence based on "
                     "``initialized.isel(lead=0)`` by changing "
-                    " ``set_options(PerfectModel_persistence_from_initialized_lead_0=True)``."
+                    " ``set_options(PerfectModel_persistence_from_initialized_lead_0=True)``."  # noqa: E501
                 )
             has_dataset(
                 self._datasets["control"], "control", "compute a persistence forecast"
@@ -1343,18 +1398,19 @@ def _compute_climatology(
         dim: dimType = None,
         **metric_kwargs: metric_kwargsType,
     ) -> xr.Dataset:
-        """Verify a climatology forecast of the control run against itself.
+        """Verify a climatology forecast.
 
         Args:
-            metric (str, :py:class:`~climpred.metrics.Metric`): Metric to use when
-            verifying skill of the persistence forecast. See `metrics </metrics.html>`_.
-            dim (str, list of str): Dimension(s) over which to apply metric.
+            metric: Metric to apply for verification, see `metrics <metrics.html>`_
+            comparison: How to compare the climatology against itself, see
+                `comparisons </comparisons.html>`_
+            dim: Dimension(s) over which to apply metric.
                 ``dim`` is passed on to xskillscore.{metric} and includes xskillscore's
                 ``member_dim``. ``dim`` should contain ``member`` when ``comparison``
                 is probabilistic but should not contain ``member`` when
                 ``comparison=e2c``. Defaults to ``None``, meaning that all dimensions
                 other than ``lead`` are reduced.
-            **metric_kwargs (optional): Arguments passed to ``metric``.
+            **metric_kwargs: Arguments passed to ``metric``.
 
         Returns:
             Dataset of persistence forecast results.
@@ -1402,18 +1458,16 @@ def bootstrap(
         """Bootstrap with replacement according to Goddard et al. 2013.
 
         Args:
-            metric (str, :py:class:`~climpred.metrics.Metric`): Metric to verify
-                bootstrapped skill, see `metrics </metrics.html>`_.
-            comparison (str, :py:class:`~climpred.comparisons.Comparison`): Comparison
-                passed to verify, see `comparisons </comparisons.html>`_.
-            dim (str, list of str): Dimension(s) over which to apply metric.
+            metric: Metric to apply for verification, see `metrics <metrics.html>`_
+            comparison: How to compare the forecast against itself, see
+                `comparisons </comparisons.html>`_
+            dim: Dimension(s) over which to apply metric.
                 ``dim`` is passed on to xskillscore.{metric} and includes xskillscore's
                 ``member_dim``. ``dim`` should contain ``member`` when ``comparison``
                 is probabilistic but should not contain ``member`` when
                 ``comparison=e2c``. Defaults to ``None`` meaning that all dimensions
                 other than ``lead`` are reduced.
-            reference (str, list of str): Type of reference forecasts with which to
-                verify against.
+            reference: Type of reference forecasts with which to verify against.
                 One or more of ``["uninitialized", "persistence", "climatology"]``.
                 Defaults to ``None`` meaning no reference.
                 If None or empty, returns no p value.
@@ -1423,21 +1477,20 @@ def bootstrap(
                 ``set_options(PerfectModel_persistence_from_initialized_lead_0)=True``
                 using
                 :py:func:`~climpred.reference.compute_persistence_from_first_lead`.
-            iterations (int): Number of resampling iterations for bootstrapping with
+            iterations: Number of resampling iterations for bootstrapping with
                 replacement. Recommended >= 500.
-            resample_dim (str or list of str): dimension to resample from.
-                Defaults to `"member"``.
+            resample_dim: dimension to resample from. Defaults to `"member"``.
 
                 - 'member': select a different set of members from hind
                 - 'init': select a different set of initializations from hind
 
-            sig (int, default 95): Significance level in percent for deciding whether
+            sig: Significance level in percent for deciding whether
                 uninitialized and persistence beat initialized skill.
-            pers_sig (int): If not ``None``, the separate significance level for
+            pers_sig: If not ``None``, the separate significance level for
                 persistence. Defaults to ``None``, or the same significance as ``sig``.
-            groupby (str, xr.DataArray): group ``init`` before passing ``initialized``
+            groupby: group ``init`` before passing ``initialized``
                 to ``bootstrap``.
-            **metric_kwargs (optional): arguments passed to ``metric``.
+            **metric_kwargs: arguments passed to ``metric``.
 
         Returns:
             xr.Datasets: with dimensions ``results`` (holding ``verify skill``, ``p``,
@@ -1459,11 +1512,10 @@ def bootstrap(
                     bootstrapping with replacement.
 
         Reference:
-            * Goddard, L., A. Kumar, A. Solomon, D. Smith, G. Boer, P.
-              Gonzalez, V. Kharin, et al. “A Verification Framework for
-              Interannual-to-Decadal Predictions Experiments.” Climate
-              Dynamics 40, no. 1–2 (January 1, 2013): 245–72.
-              https://doi.org/10/f4jjvf.
+            Goddard, L., A. Kumar, A. Solomon et al.
+            “A Verification Framework for Interannual to Decadal Predictions Experiments.”
+            Climate Dynamics 40, no. 1–2 (January 1, 2013): 245–72.
+            https://doi.org/10/f4jjvf.
 
         Example:
             Calculate the Pearson's Anomaly Correlation ('acc') comparing every member
@@ -1562,24 +1614,21 @@ def bootstrap(
 
 
 class HindcastEnsemble(PredictionEnsemble):
-    """An object for climate prediction ensembles initialized by a data-like
-    product.
+    """An object for initialized prediction ensembles.
 
-    `HindcastEnsemble` is a sub-class of `PredictionEnsemble`. It tracks a single
+    ``HindcastEnsemble`` is a sub-class of ``PredictionEnsemble``. It tracks a
     verification dataset (i.e., observations) associated with the hindcast ensemble
     for easy computation across multiple variables.
 
-    This object is built on `xarray` and thus requires the input object to
-    be an `xarray` Dataset or DataArray.
+    This object is built on xarray.Dataset and thus requires the input object to
+    be an xarray.Dataset or xr.DataArray.
     """
 
-    def __init__(self, xobj: Union[xr.DataArray, xr.Dataset]) -> None:
-        """Create a `HindcastEnsemble` object by inputting output from a
-        prediction ensemble in `xarray` format.
+    def __init__(self, initialized: Union[xr.DataArray, xr.Dataset]) -> None:
+        """Create ``HindcastEnsemble`` from initialized prediction ensemble output.
 
         Args:
-          xobj (xarray object):
-            decadal prediction ensemble output.
+          initialized: initialized prediction ensemble output.
 
         Attributes:
           observations: Dictionary of verification data to associate with the decadal
@@ -1587,19 +1636,18 @@ def __init__(self, xobj: Union[xr.DataArray, xr.Dataset]) -> None:
           uninitialized: Dictionary of companion (or bootstrapped)
               uninitialized ensemble run.
         """
-        super().__init__(xobj)
+        super().__init__(initialized)
         self._datasets.update({"observations": {}})
         self.kind = "hindcast"
 
     def _apply_climpred_function(
-        self, func: Callable[..., Any], init: bool, **kwargs: Any
+        self, func: Callable[..., xr.Dataset], init: bool, **kwargs: Any
     ) -> Union["HindcastEnsemble", xr.Dataset]:
-        """Helper function to loop through verification data and apply an arbitrary
-        climpred function.
+        """Loop through verification data and apply an arbitrary climpred function.
 
         Args:
-            func (function): climpred function to apply to object.
-            init (bool): Whether or not it's the initialized ensemble.
+            func: climpred function to apply to object.
+            init: Whether or not it's the initialized ensemble.
         """
         # fixme: essentially the same as map?
         hind = self._datasets["initialized"]
@@ -1608,8 +1656,9 @@ def _apply_climpred_function(
         return func(hind.drop_vars(drop_init), verif.drop_vars(drop_obs), **kwargs)
 
     def _vars_to_drop(self, init: bool = True) -> Tuple[List[str], List[str]]:
-        """Returns list of variables to drop when comparing
-        initialized/uninitialized to observations.
+        """Return list of variables to drop.
+
+        When comparing initialized/uninitialized to observations.
 
         This is useful if the two products being compared do not share the same
         variables. I.e., if the observations have ['SST'] and the initialized has
@@ -1617,7 +1666,7 @@ def _vars_to_drop(self, init: bool = True) -> Tuple[List[str], List[str]]:
         from the initialized.
 
         Args:
-          init (bool, default True):
+          init:
             If ``True``, check variables on the initialized.
             If ``False``, check variables on the uninitialized.
 
@@ -1636,83 +1685,81 @@ def _vars_to_drop(self, init: bool = True) -> Tuple[List[str], List[str]]:
         obs_vars_to_drop = list(set(obs_vars) - set(init_vars))
         return init_vars_to_drop, obs_vars_to_drop
 
-    @is_xarray(1)
     def add_observations(
-        self, xobj: Union[xr.DataArray, xr.Dataset]
+        self, obs: Union[xr.DataArray, xr.Dataset]
     ) -> "HindcastEnsemble":
         """Add verification data against which to verify the initialized ensemble.
 
+        Same as :py:meth:`~climpred.classes.HindcastEnsemble.add_verification`.
+
         Args:
-            xobj (xarray object): Dataset/DataArray to append to the
-                ``HindcastEnsemble`` object.
+            obs: observations added to ``HindcastEnsemble``.
         """
-        if isinstance(xobj, xr.DataArray):
-            xobj = xobj.to_dataset()
-        match_initialized_dims(self._datasets["initialized"], xobj)
-        match_initialized_vars(self._datasets["initialized"], xobj)
+        if isinstance(obs, xr.DataArray):
+            obs = obs.to_dataset()
+        match_initialized_dims(self._datasets["initialized"], obs)
+        match_initialized_vars(self._datasets["initialized"], obs)
         # Check that time is int, cftime, or datetime; convert ints or cftime to
         # datetime.
-        xobj = convert_time_index(xobj, "time", "xobj[time]")
+        obs = convert_time_index(obs, "time", "obs[time]")
         # Check that converted/original cftime calendar is the same as the
         # initialized calendar to avoid any alignment errors.
-        match_calendars(self._datasets["initialized"], xobj)
+        match_calendars(self._datasets["initialized"], obs)
         datasets = self._datasets.copy()
-        datasets.update({"observations": xobj})
+        datasets.update({"observations": obs})
         return self._construct_direct(datasets, kind="hindcast")
 
     def add_verification(
-        self, xobj: Union[xr.DataArray, xr.Dataset]
+        self, verif: Union[xr.DataArray, xr.Dataset]
     ) -> "HindcastEnsemble":
         """Add verification data against which to verify the initialized ensemble.
-        Same as add_observations()
+
+        Same as :py:meth:`~climpred.classes.HindcastEnsemble.add_observations`.
 
         Args:
-            xobj (xarray object): Dataset/DataArray to append to the
-                ``HindcastEnsemble`` object.
+            verif: verification added to ``HindcastEnsemble``.
         """
-        return self.add_observations(xobj)
+        return self.add_observations(verif)
 
-    @is_xarray(1)
     def add_uninitialized(
-        self, xobj: Union[xr.DataArray, xr.Dataset]
+        self, uninit: Union[xr.DataArray, xr.Dataset]
     ) -> "HindcastEnsemble":
         """Add a companion uninitialized ensemble for comparison to verification data.
 
         Args:
-            xobj (xarray object): Dataset/DataArray of the uninitialzed
-                                  ensemble.
+            uninit: uninitialzed ensemble.
         """
-        if isinstance(xobj, xr.DataArray):
-            xobj = xobj.to_dataset()
-        match_initialized_dims(self._datasets["initialized"], xobj, uninitialized=True)
-        match_initialized_vars(self._datasets["initialized"], xobj)
+        if isinstance(uninit, xr.DataArray):
+            uninit = uninit.to_dataset()
+        match_initialized_dims(
+            self._datasets["initialized"], uninit, uninitialized=True
+        )
+        match_initialized_vars(self._datasets["initialized"], uninit)
         # Check that init is int, cftime, or datetime; convert ints or cftime to
         # datetime.
-        xobj = convert_time_index(xobj, "time", "xobj[time]")
+        uninit = convert_time_index(uninit, "time", "uninit[time]")
         # Check that converted/original cftime calendar is the same as the
         # initialized calendar to avoid any alignment errors.
-        match_calendars(self._datasets["initialized"], xobj, kind2="uninitialized")
+        match_calendars(self._datasets["initialized"], uninit, kind2="uninitialized")
         datasets = self._datasets.copy()
-        datasets.update({"uninitialized": xobj})
+        datasets.update({"uninitialized": uninit})
         return self._construct_direct(datasets, kind="hindcast")
 
     def get_observations(self) -> xr.Dataset:
-        """Returns xarray Datasets of the observations/verification data.
+        """Return xarray.Dataset of the observations/verification data.
 
         Returns:
-            ``xarray`` Dataset of observations.
+            observations.
         """
         return self._datasets["observations"]
 
     def generate_uninitialized(
         self, resample_dim: List[str] = ["init", "member"]
     ) -> "HindcastEnsemble":
-        """Generate an uninitialized ensemble by resampling from the
-        initialized prediction ensemble.
+        """Generate ``uninitialized`` by resampling from ``initialized``.
 
         Args:
-            resample_dim : list of str
-                dimension to resample from. Must contain "init".
+            resample_dim: dimension to resample from. Must contain "init".
 
         Returns:
             resampled uninitialized ensemble added to HindcastEnsemble
@@ -1754,39 +1801,33 @@ def plot_alignment(
         **plot_kwargs: Any,
     ) -> Any:
         """
-        Plot ``initialized`` ``valid_time`` where matching
-        ``verification``/``observation`` ``time`` depending on ``alignment``.
-        Plots ``days since reference date`` controlled by ``date2num_units``.
-        ``NaN`` / white space shows where no verification is done.
+        Plot ``initialized`` ``valid_time`` where matching ``verification`` ``time``.
 
-        Args:
-            alignment (str or list of str): which inits or verification times should be aligned?
+        Depends on ``alignment``. Plots ``days since reference date`` controlled by
+        ``date2num_units``. ``NaN`` / white space shows where no verification is done.
 
-                - 'maximize': maximize the degrees of freedom by slicing ``hind`` and
-                  ``verif`` to a common time frame at each lead.
+        Args:
+            alignment: which inits or verification times should be aligned?
 
-                - 'same_inits': slice to a common init frame prior to computing
-                  metric. This philosophy follows the thought that each lead should be
-                  based on the same set of initializations.
+                - ``'maximize'``: maximize the degrees of freedom by slicing
+                    ``initialized`` and ``verif`` to a common time frame at each lead.
+                - ``"same_inits"`: slice to a common ``init`` frame prior to computing
+                    metric. This philosophy follows the thought that each lead should be
+                    based on the same set of initializations.
 
-                - 'same_verif': slice to a common/consistent verification time frame
+                - ``"same_verif"``: slice to a common/consistent verification time frame
                   prior to computing metric. This philosophy follows the thought that
                   each lead should be based on the same set of verification dates.
 
-                - None defaults to the three above
-            reference (str, list of str): Type of reference forecasts with which to
-                verify against.
+                - ``None`` defaults to the three above.
+            reference: Type of reference forecasts with which to verify against.
                 One or more of ``["uninitialized", "persistence", "climatology"]``.
                 Defaults to ``None`` meaning no reference.
-            date2num_units : str
-                passed to cftime.date2num as units
-            return_xr : bool
-                see return
-            cmap : str
-                color palette
-            edgecolors : str
-                color of the edges in the plot
-            **plot_kwargs (optional): arguments passed to ``plot``.
+            date2num_units: passed to cftime.date2num as units
+            return_xr:  return
+            cmap: color palette
+            edgecolors: color of the edges in the plot
+            **plot_kwargs: arguments passed to ``plot``.
 
         Return:
             xarray.DataArray if return_xr else plot
@@ -1880,42 +1921,39 @@ def verify(
         groupby: groupbyType = None,
         **metric_kwargs: metric_kwargsType,
     ) -> xr.Dataset:
-        """Verifies the initialized ensemble against observations.
+        """Verify the initialized ensemble against observations.
 
         .. note::
             This will automatically verify against all shared variables
             between the initialized ensemble and observations/verification data.
 
         Args:
-            metric (str, :py:class:`~climpred.metrics.Metric`): Metric to apply for
-                verification. see `metrics </metrics.html>`_.
-            comparison (str, :py:class:`~climpred.comparisons.Comparison`): How to
-                compare to the observations/verification data. See
-                `comparisons </comparisons.html>`_.
-            dim (str, list of str): Dimension(s) to apply metric over. ``dim`` is passed
+            metric: Metric to apply for verification, see `metrics <metrics.html>`_
+            comparison: How to compare to the observations/verification data.
+                See `comparisons </comparisons.html>`_.
+            dim: Dimension(s) to apply metric over. ``dim`` is passed
                 on to xskillscore.{metric} and includes xskillscore's ``member_dim``.
                 ``dim`` should contain ``member`` when ``comparison`` is probabilistic
                 but should not contain ``member`` when ``comparison=e2o``. Defaults to
                 ``None`` meaning that all dimensions other than ``lead`` are reduced.
-            alignment (str): which inits or verification times should be aligned?
+            alignment: which inits or verification times should be aligned?
 
-                - 'maximize': maximize the degrees of freedom by slicing ``hind`` and
+                - ``'maximize'``: maximize the degrees of freedom by slicing ``initialized`` and
                   ``verif`` to a common time frame at each lead.
 
-                - 'same_inits': slice to a common init frame prior to computing
+                - ``"same_inits"`: slice to a common ``init`` frame prior to computing
                   metric. This philosophy follows the thought that each lead should be
                   based on the same set of initializations.
 
-                - 'same_verif': slice to a common/consistent verification time frame
+                - ``"same_verif"``: slice to a common/consistent verification time frame
                   prior to computing metric. This philosophy follows the thought that
                   each lead should be based on the same set of verification dates.
 
-            reference (str, list of str): Type of reference forecasts with which to
-                verify against.
+            reference: Type of reference forecasts with which to verify against.
                 One or more of ``["uninitialized", "persistence", "climatology"]``.
                 Defaults to ``None`` meaning no reference.
-            groupby (str): group ``init`` before passing ``initialized`` to ``verify``.
-            **metric_kwargs (optional): arguments passed to ``metric``.
+            groupby: group ``init`` before passing ``initialized`` to ``verify``.
+            **metric_kwargs: arguments passed to ``metric``.
 
         Returns:
             Dataset with dimension skill reduced by dim containing initialized and
@@ -2158,47 +2196,43 @@ def bootstrap(
         """Bootstrap with replacement according to Goddard et al. 2013.
 
         Args:
-            metric (str, :py:class:`~climpred.metrics.Metric`): Metric to apply for
-                verification, see `metrics <metrics.html>`_.
-            comparison (str, :py:class:`~climpred.comparisons.Comparison`): How to
-                compare to the observations/verification data, see
-                `comparisons </comparisons.html>`_.
-            dim (str, list of str): dimension(s) to apply metric over. ``dim`` is passed
+            metric: Metric to apply for verification, see `metrics <metrics.html>`_
+            comparison: How to compare to the observations/verification data.
+                See `comparisons </comparisons.html>`_.
+            dim: Dimension(s) to apply metric over. ``dim`` is passed
                 on to xskillscore.{metric} and includes xskillscore's ``member_dim``.
                 ``dim`` should contain ``member`` when ``comparison`` is probabilistic
                 but should not contain ``member`` when ``comparison='e2o'``. Defaults to
                 ``None`` meaning that all dimensions other than ``lead`` are reduced.
-            reference (str, list of str): Type of reference forecasts with which to
-                verify against.
+            reference: Type of reference forecasts with which to verify against.
                 One or more of ``["uninitialized", "persistence", "climatology"]``.
                 Defaults to ``None`` meaning no reference.
                 If None or empty, returns no p value.
-            alignment (str): which inits or verification times should be aligned?
+            alignment: which inits or verification times should be aligned?
 
                 - 'maximize': maximize the degrees of freedom by slicing ``init`` and
                   ``verif`` to a common time frame at each lead.
 
-                - 'same_inits': slice to a common init frame prior to computing
+                - ``"same_inits"`: slice to a common ``init`` frame prior to computing
                   metric. This philosophy follows the thought that each lead should be
                   based on the same set of initializations.
 
-                - 'same_verif': slice to a common/consistent verification time frame
+                - ``"same_verif"``: slice to a common/consistent verification time frame
                   prior to computing metric. This philosophy follows the thought that
                   each lead should be based on the same set of verification dates.
 
-            iterations (int): Number of resampling iterations for bootstrapping with
+            iterations: Number of resampling iterations for bootstrapping with
                 replacement. Recommended >= 500.
-            sig (int, default 95): Significance level in percent for deciding whether
+            sig: Significance level in percent for deciding whether
                 uninitialized and persistence beat initialized skill.
             resample_dim (str or list): dimension to resample from. default: 'member'.
 
                 - 'member': select a different set of members from hind
                 - 'init': select a different set of initializations from hind
 
-            pers_sig (int, default None):
-                If not None, the separate significance level for persistence.
-            groupby (str, xr.DataArray): group ``init`` before passing ``initialized`` to ``bootstrap``.
-            **metric_kwargs (optional): arguments passed to ``metric``.
+            pers_sig: If not None, the separate significance level for persistence.
+            groupby: group ``init`` before passing ``initialized`` to ``bootstrap``.
+            **metric_kwargs: arguments passed to ``metric``.
 
         Returns:
             xr.Datasets: with dimensions ``results`` (holding ``skill``, ``p``,
@@ -2335,37 +2369,37 @@ def remove_bias(
         cv: Union[bool, str] = False,
         **metric_kwargs: metric_kwargsType,
     ) -> "HindcastEnsemble":
-        """Calculate and remove bias from
-        :py:class:`~climpred.classes.HindcastEnsemble`.
+        """Remove bias from :py:class:`~climpred.classes.HindcastEnsemble`.
+
         Bias is grouped by ``seasonality`` set via
         :py:class:`~climpred.options.set_options`. When wrapping
         :py:class:`~xclim.sdba.adjustment.TrainAdjust` use ``group`` instead.
 
         Args:
-            alignment (str): which inits or verification times should be aligned?
+            alignment: which inits or verification times should be aligned?
 
-                - 'maximize': maximize the degrees of freedom by slicing ``hind`` and
+                - ``'maximize'``: maximize the degrees of freedom by slicing ``initialized`` and
                   ``verif`` to a common time frame at each lead.
 
-                - 'same_inits': slice to a common init frame prior to computing
+                - ``"same_inits"`: slice to a common ``init`` frame prior to computing
                   metric. This philosophy follows the thought that each lead should be
                   based on the same set of initializations.
 
-                - 'same_verif': slice to a common/consistent verification time frame
+                - ``"same_verif"``: slice to a common/consistent verification time frame
                   prior to computing metric. This philosophy follows the thought that
                   each lead should be based on the same set of verification dates.
 
-            how (str): what kind of bias removal to perform.
+            how: what kind of bias removal to perform.
                 Defaults to 'additive_mean'. Select from:
 
-                - 'additive_mean': correcting the mean forecast additively
-                - 'multiplicative_mean': correcting the mean forecast multiplicatively
-                - 'multiplicative_std': correcting the standard deviation
+                - ``"additive_mean"``: correcting the mean forecast additively
+                - ``"multiplicative_mean"``: correcting the mean forecast multiplicatively
+                - ``"multiplicative_std"``: correcting the standard deviation
                     multiplicatively
-                - 'modified_quantile': `Reference <https://www.sciencedirect.com/science/article/abs/pii/S0034425716302000?via%3Dihub>`_
-                - 'basic_quantile': `Reference <https://rmets.onlinelibrary.wiley.com/doi/pdf/10.1002/joc.2168>`_
-                - 'gamma_mapping': `Reference <https://www.hydrol-earth-syst-sci.net/21/2649/2017/>`_
-                - 'normal_mapping': `Reference <https://www.hydrol-earth-syst-sci.net/21/2649/2017/>`_
+                - ``"modified_quantile"``: `Reference <https://www.sciencedirect.com/science/article/abs/pii/S0034425716302000?via%3Dihub>`_
+                - ``"basic_quantile"``: `Reference <https://rmets.onlinelibrary.wiley.com/doi/pdf/10.1002/joc.2168>`_
+                - ``"gamma_mapping"``: `Reference <https://www.hydrol-earth-syst-sci.net/21/2649/2017/>`_
+                - ``"normal_mapping"``: `Reference <https://www.hydrol-earth-syst-sci.net/21/2649/2017/>`_
                 - :py:class:`~xclim.sdba.adjustment.EmpiricalQuantileMapping`
                 - :py:class:`~xclim.sdba.adjustment.DetrendedQuantileMapping`
                 - :py:class:`~xclim.sdba.adjustment.PrincipalComponents`
@@ -2373,7 +2407,7 @@ def remove_bias(
                 - :py:class:`~xclim.sdba.adjustment.Scaling`
                 - :py:class:`~xclim.sdba.adjustment.LOCI`
 
-            train_test_split (str): How to separate train period to calculate the bias
+            train_test_split: How to separate train period to calculate the bias
                 and test period to apply bias correction to? For a detailed
                 description, see `Risbey et al. 2021 <http://www.nature.com/articles/s41467-021-23771-z>`_:  # noqa: E501
 
@@ -2385,11 +2419,11 @@ def remove_bias(
                     `init`, which is `left out <https://en.wikipedia.org/wiki/Cross-validation_(statistics)#Leave-one-out_cross-validation>`_
                     (set `cv='LOO'`).
 
-            train_init (xr.DataArray, slice): Define initializations for training
+            train_init: Define initializations for training
                 when ``alignment='same_inits/maximize'``.
-            train_time (xr.DataArray, slice): Define time for training
+            train_time: Define time for training
                 when ``alignment='same_verif'``.
-            cv (bool or str): Only relevant when `train_test_split='unfair-cv'`.
+            cv: Only relevant when `train_test_split='unfair-cv'`.
                 Defaults to False.
 
                 - True/'LOO': Calculate bias by `leaving given initialization out <https://en.wikipedia.org/wiki/Cross-validation_(statistics)#Leave-one-out_cross-validation>`_
@@ -2397,7 +2431,7 @@ def remove_bias(
                     is much faster and but yields similar skill with a large N of
                     initializations.
 
-            **metric_kwargs (dict): passed to ``xclim.sdba`` (including ``group``)
+            **metric_kwargs: passed to ``xclim.sdba`` (including ``group``)
                 or ``XBias_Correction``
 
         Returns:
@@ -2554,7 +2588,7 @@ def remove_bias(
                 " status."
             )
 
-        alignment = _check_valud_alignment(alignment)
+        alignment = _check_valid_alignment(alignment)
 
         if train_test_split in ["fair"]:
             if (
diff --git a/climpred/smoothing.py b/climpred/smoothing.py
index 999e1097e..cc180b2ca 100644
--- a/climpred/smoothing.py
+++ b/climpred/smoothing.py
@@ -15,13 +15,13 @@
 
 def spatial_smoothing_xesmf(
     ds: xr.Dataset,
-    d_lon_lat_kws: dict = {"lon": 5, "lat": 5},
+    d_lon_lat_kws: Dict[str, float] = {"lon": 5, "lat": 5},
     method: str = "bilinear",
     periodic: bool = False,
-    filename: str = None,
+    filename: Optional[str] = None,
     reuse_weights: bool = False,
-    tsmooth_kws: Optional[dict] = None,
-    how: str = None,
+    tsmooth_kws: Optional[Dict[str, int]] = None,
+    how: Optional[str] = None,
 ) -> xr.Dataset:
     """Quick regridding function.
 
@@ -139,9 +139,9 @@ def _regrid_it(da, d_lon, d_lat, **kwargs):
 
 def temporal_smoothing(
     ds: xr.Dataset,
-    tsmooth_kws: Optional[dict] = None,
+    tsmooth_kws: Optional[Dict[str, int]] = None,
     how: str = "mean",
-    d_lon_lat_kws: Optional[dict] = None,
+    d_lon_lat_kws: Optional[Dict[str, float]] = None,
 ) -> xr.Dataset:
     """Apply temporal smoothing by creating rolling smooth-timestep means.
 
@@ -150,7 +150,7 @@ def temporal_smoothing(
         tsmooth_kws: length of smoothing of timesteps.
             Defaults to ``{'time': 4}`` (see Goddard et al. 2013).
         how: aggregation type for smoothing. Allowed: ``["mean", "sum"]``.
-            Default: 'mean'
+            Default: ``"mean"``.
         d_lon_lat_kws: leads nowhere but consistent with ``spatial_smoothing_xesmf``.
 
     Returns:
@@ -240,9 +240,9 @@ def _set_center_coord(ds: xr.Dataset, dim: str = "lead") -> xr.Dataset:
 def smooth_goddard_2013(
     ds: xr.Dataset,
     tsmooth_kws: Dict[str, int] = {"lead": 4},
-    d_lon_lat_kws: Dict[str, int] = {"lon": 5, "lat": 5},
+    d_lon_lat_kws: Dict[str, float] = {"lon": 5, "lat": 5},
     how: str = "mean",
-    **xesmf_kwargs: Any,
+    **xesmf_kwargs: str,
 ) -> xr.Dataset:
     """Wrap to smooth as suggested by Goddard et al. 2013.
 
@@ -272,6 +272,6 @@ def smooth_goddard_2013(
     # first temporal smoothing
     ds_smoothed = temporal_smoothing(ds, tsmooth_kws=tsmooth_kws)
     ds_smoothed_regridded = spatial_smoothing_xesmf(
-        ds_smoothed, d_lon_lat_kws=d_lon_lat_kws, **xesmf_kwargs
+        ds_smoothed, d_lon_lat_kws=d_lon_lat_kws, **xesmf_kwargs  # type: ignore
     )
     return ds_smoothed_regridded
diff --git a/climpred/tutorial.py b/climpred/tutorial.py
index 62de3da75..c4f67a623 100644
--- a/climpred/tutorial.py
+++ b/climpred/tutorial.py
@@ -1,11 +1,16 @@
+"""Implement climpred.tutorial.load_dataset to load analysis ready datasets."""
+
 import hashlib
 import os as _os
 import urllib
+from os import PathLike
+from typing import Optional, Union
 from urllib.request import urlretrieve as _urlretrieve
 
+import xarray as xr
 from xarray.backends.api import open_dataset as _open_dataset
 
-_default_cache_dir = _os.sep.join(("~", ".climpred_data"))
+_default_cache_dir: str = _os.sep.join(("~", ".climpred_data"))
 
 aliases = [
     "MPI-control-1D",
@@ -89,8 +94,7 @@ def _file_md5_checksum(fname):
 
 
 def _get_datasets():
-    """Prints out available datasets for the user to load if no args are
-    given."""
+    """Print out available datasets for the user to load if no args are given."""
     for key in FILE_DESCRIPTIONS.keys():
         print(f"'{key}': {FILE_DESCRIPTIONS[key]}")
 
@@ -102,8 +106,7 @@ def _cache_all():
 
 
 def _initialize_proxy(proxy_dict):
-    """Opens a proxy for firewalled servers so that the downloads can go
-    through.
+    """Open a proxy for firewalled servers so that the downloads can go through.
 
     Args:
         proxy_dict (dictionary): Keys are either 'http' or 'https' and
@@ -118,37 +121,33 @@ def _initialize_proxy(proxy_dict):
 
 
 def load_dataset(
-    name=None,
-    cache=True,
-    cache_dir=_default_cache_dir,
-    github_url="https://github.com/pangeo-data/climpred-data",
-    branch="master",
-    extension=None,
-    proxy_dict=None,
+    name: Optional[str] = None,
+    cache: bool = True,
+    cache_dir: Union[str, PathLike[str]] = _default_cache_dir,
+    github_url: str = "https://github.com/pangeo-data/climpred-data",
+    branch: str = "master",
+    extension: Optional[str] = None,
+    proxy_dict: Optional[dict[str, str]] = None,
     **kws,
-):
+) -> xr.Dataset:
     """Load example data or a mask from an online repository.
 
     Args:
-        name: (str, default None) Name of the netcdf file containing the
-              dataset, without the .nc extension. If None, this function
+        name: Name of the netcdf file containing the
+              dataset, without the ``.nc`` extension. If ``None``, this function
               prints out the available datasets to import.
-        cache_dir: (str, optional) The directory in which to search
-                   for and cache the data.
-        cache: (bool, optional) If True, cache data locally for use on later
-               calls.
-        github_url: (str, optional) Github repository where the data is stored.
-        branch: (str, optional) The git branch to download from.
-        extension: (str, optional) Subfolder within the repository where the
-                   data is stored.
-        proxy_dict: (dict, optional) Dictionary with keys as either 'http' or
-                    'https' and values as the proxy server. This is useful
-                    if you are on a work computer behind a firewall and need
-                    to use a proxy out to download data.
-        kws: (dict, optional) Keywords passed to xarray.open_dataset
+        cache: If ``True``, cache data locally for use on later calls.
+        cache_dir: The directory in which to search for and cache the data.
+        github_url: Github repository where the data is stored.
+        branch: The git branch to download from.
+        extension: Subfolder within the repository where the data is stored.
+        proxy_dict: Dictionary with keys as either "http" or "https" and values as the
+            proxy server. This is useful if you are on a work computer behind a
+            firewall and need to use a proxy out to download data.
+        kws: Keywords passed to :py:meth:`~xarray.open_dataset`.
 
     Returns:
-        The desired xarray dataset.
+        The desired xarray.Dataset.
 
     Examples:
         >>> from climpred.tutorial import load_dataset
diff --git a/docs/source/conf.py b/docs/source/conf.py
index f5860f46b..ded83fe60 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -1,8 +1,9 @@
-# Configuration file for the Sphinx documentation builder.
-#
-# u This file only contains a selection of the most common options. For a full
-# list see the documentation:
-# http://www.sphinx-doc.org/en/master/config
+"""Configuration file for the Sphinx documentation builder.
+
+This file only contains a selection of the most common options. For a full
+list see the documentation:
+http://www.sphinx-doc.org/en/master/config
+"""
 
 # -- Path setup --------------------------------------------------------------
 
@@ -10,8 +11,13 @@
 import os
 import sys
 
+import xarray
+
 import climpred
 
+xarray.DataArray.__module__ = "xarray"
+
+
 sys.path.insert(0, os.path.abspath("../.."))
 
 

From e2b86b8bf5f379e3d6d0af718bbd7bf1a5f7ec79 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Thu, 9 Dec 2021 18:57:21 +0100
Subject: [PATCH 07/56] stats checks reference

---
 climpred/bootstrap.py  |   2 +-
 climpred/checks.py     |  75 ++-----------
 climpred/classes.py    |   1 +
 climpred/prediction.py |  17 ++-
 climpred/reference.py  | 249 ++++++++++++++++++++++++-----------------
 climpred/smoothing.py  |   2 -
 climpred/stats.py      |  85 ++++++++------
 climpred/utils.py      | 103 ++++++++++-------
 8 files changed, 281 insertions(+), 253 deletions(-)

diff --git a/climpred/bootstrap.py b/climpred/bootstrap.py
index 6babab7c8..35020ea4e 100644
--- a/climpred/bootstrap.py
+++ b/climpred/bootstrap.py
@@ -31,7 +31,7 @@
 try:
     from .stats import varweighted_mean_period
 except ImportError:
-    varweighted_mean_period = None
+    varweighted_mean_period = None  # type: ignore
 from .utils import (
     _transpose_and_rechunk_to,
     convert_time_index,
diff --git a/climpred/checks.py b/climpred/checks.py
index 6152218e4..40456c073 100644
--- a/climpred/checks.py
+++ b/climpred/checks.py
@@ -1,3 +1,5 @@
+"""Common checks for climpred operations."""
+
 import warnings
 from functools import wraps
 from typing import List, Optional, Union
@@ -19,19 +21,11 @@
 NCPU = dask.system.CPU_COUNT
 
 
-def dec_args_kwargs(wrapper):
-    # https://stackoverflow.com/questions/10610824/
-    # python-shortcut-for-writing-decorators-which-accept-arguments
-    return lambda *dec_args, **dec_kwargs: lambda func: wrapper(
-        func, *dec_args, **dec_kwargs
-    )
-
-
 # --------------------------------------#
 # CHECKS
 # --------------------------------------#
 def has_dataset(obj, kind, what):
-    """Checks that the PredictionEnsemble has a specific dataset in it."""
+    """Check that the PredictionEnsemble has a specific dataset in it."""
     if len(obj) == 0:
         raise DatasetError(
             f"You need to add at least one {kind} dataset before "
@@ -41,9 +35,7 @@ def has_dataset(obj, kind, what):
 
 
 def has_dims(xobj, dims, kind):
-    """
-    Checks that at the minimum, the object has provided dimensions.
-    """
+    """Check that at the minimum, the object has provided dimensions."""
     if isinstance(dims, str):
         dims = [dims]
 
@@ -57,9 +49,7 @@ def has_dims(xobj, dims, kind):
 
 
 def has_min_len(arr, len_, kind):
-    """
-    Checks that the array is at least the specified length.
-    """
+    """Check that the array is at least the specified length."""
     arr_len = len(arr)
     if arr_len < len_:
         raise DimensionError(
@@ -70,9 +60,7 @@ def has_min_len(arr, len_, kind):
 
 
 def has_valid_lead_units(xobj):
-    """
-    Checks that the object has valid units for the lead dimension.
-    """
+    """Check that the object has valid units for the lead dimension."""
     LEAD_UNIT_ERROR = (
         "The lead dimension must must have a valid "
         f"units attribute. Valid options are: {VALID_LEAD_UNITS}"
@@ -106,56 +94,15 @@ def is_in_list(item, list_, kind):
     return True
 
 
-@dec_args_kwargs
-def is_xarray(func, *dec_args):
-    """
-    Decorate a function to ensure the first arg being submitted is
-    either a Dataset or DataArray.
-    """
-
-    @wraps(func)
-    def wrapper(*args, **kwargs):
-        try:
-            ds_da_locs = dec_args[0]
-            if not isinstance(ds_da_locs, list):
-                ds_da_locs = [ds_da_locs]
-
-            for loc in ds_da_locs:
-                if isinstance(loc, int):
-                    ds_da = args[loc]
-                elif isinstance(loc, str):
-                    ds_da = kwargs[loc]
-
-                is_ds_da = isinstance(ds_da, (xr.Dataset, xr.DataArray))
-                if not is_ds_da:
-                    typecheck = type(ds_da)
-                    raise IOError(
-                        f"""The input data is not an xarray DataArray or
-                        Dataset. climpred is built to wrap xarray to make
-                        use of its awesome features. Please input an xarray
-                        object and retry the function.
-
-                        Your input was of type: {typecheck}"""
-                    )
-        except IndexError:
-            pass
-        # this is outside of the try/except so that the traceback is relevant
-        # to the actual function call rather than showing a simple Exception
-        # (probably IndexError from trying to subselect an empty dec_args list)
-        return func(*args, **kwargs)
-
-    return wrapper
-
-
 def match_calendars(
     ds1, ds2, ds1_time="init", ds2_time="time", kind1="initialized", kind2="observation"
 ):
-    """Checks that calendars match between two xarray Datasets.
+    """Check that calendars match between two xarray Datasets.
 
     This assumes that the two datasets coming in have cftime time axes.
 
     Args:
-        ds1, ds2 (xarray object): Datasets/DataArrays to compare calendars on. For
+        ds1, ds2 (xarray.Dataset, xr.DataArrays): to compare calendars on. For
             classes, ds1 can be thought of Dataset already existing in the object,
             and ds2 the one being added.
         ds1_time, ds2_time (str, default 'time'): Name of time dimension to look
@@ -178,8 +125,7 @@ def match_calendars(
 
 
 def match_initialized_dims(init, verif, uninitialized=False):
-    """Checks that the verification data dimensions match appropriate initialized
-    dimensions.
+    """Check that the verification dimensions match initialized dimensions.
 
     If uninitialized, ignore ``member``. Otherwise, ignore ``lead`` and ``member``.
     """
@@ -206,8 +152,7 @@ def match_initialized_dims(init, verif, uninitialized=False):
 
 
 def match_initialized_vars(init, verif):
-    """Checks that a new verification dataset has at least one variable
-    in common with the initialized dataset.
+    """Check that verification has at least one variable in common with the initialized.
 
     This ensures that they can be compared pairwise.
 
diff --git a/climpred/classes.py b/climpred/classes.py
index 1b28e72ff..bd2ec8957 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -1351,6 +1351,7 @@ def _compute_persistence(
         """
         if dim is None:
             dim = list(self._datasets["initialized"].isel(lead=0).dims)
+        compute_persistence_func: Callable[..., xr.Dataset]
         compute_persistence_func = compute_persistence_from_first_lead
         if OPTIONS["PerfectModel_persistence_from_initialized_lead_0"]:
             compute_persistence_func = compute_persistence_from_first_lead
diff --git a/climpred/prediction.py b/climpred/prediction.py
index 98dcc5db8..c4023a17b 100644
--- a/climpred/prediction.py
+++ b/climpred/prediction.py
@@ -1,7 +1,9 @@
+"""Prediction module: _apply_metric_at_given_lead and compute functions."""
+
 import xarray as xr
 
 from .alignment import return_inits_and_verif_dates
-from .checks import has_valid_lead_units, is_in_list, is_xarray
+from .checks import has_valid_lead_units, is_in_list
 from .comparisons import (
     COMPARISON_ALIASES,
     HINDCAST_COMPARISONS,
@@ -43,7 +45,7 @@ def _apply_metric_at_given_lead(
     dim=None,
     **metric_kwargs,
 ):
-    """Applies a metric between two time series at a given lead.
+    """Apply a metric between two time series at a given lead.
 
     Args:
         verif (xr object): Verification data.
@@ -101,7 +103,7 @@ def _apply_metric_at_given_lead(
 
 
 def _rename_dim(dim, forecast, verif):
-    """rename `dim` to `time` or `init` if forecast and verif dims require to do so."""
+    """Rename `dim` to `time` or `init` if forecast and verif dims requires."""
     if "init" in dim and "time" in forecast.dims and "time" in verif.dims:
         dim = dim.copy()
         dim.remove("init")
@@ -118,7 +120,7 @@ def _rename_dim(dim, forecast, verif):
 
 
 def _sanitize_to_list(dim):
-    """Make dim to list if string, tuple or set, pass if None else raise ValueError."""
+    """Ensure dim is List, raises ValueError if not str, set, tuple or None."""
     if isinstance(dim, str):
         dim = [dim]
     elif isinstance(dim, set):
@@ -135,7 +137,7 @@ def _sanitize_to_list(dim):
 
 
 def _get_metric_comparison_dim(initialized, metric, comparison, dim, kind):
-    """Returns `metric`, `comparison` and `dim` for compute functions.
+    """Return `metric`, `comparison` and `dim` for compute functions.
 
     Args:
         initialized (xr.object): initialized dataset: init_pm or hind
@@ -203,7 +205,6 @@ def _get_metric_comparison_dim(initialized, metric, comparison, dim, kind):
     return metric, comparison, dim
 
 
-@is_xarray([0])
 def compute_perfect_model(
     init_pm,
     control=None,
@@ -213,8 +214,7 @@ def compute_perfect_model(
     **metric_kwargs,
 ):
     """
-    Compute a predictability skill score for a perfect-model framework
-    simulation dataset.
+    Compute a predictability skill score in a perfect-model framework.
 
     Args:
         init_pm (xarray object): ensemble with dims ``lead``, ``init``, ``member``.
@@ -255,7 +255,6 @@ def compute_perfect_model(
     return skill
 
 
-@is_xarray([0, 1])
 def compute_hindcast(
     hind,
     verif,
diff --git a/climpred/reference.py b/climpred/reference.py
index 62bc50c0e..55ce33795 100644
--- a/climpred/reference.py
+++ b/climpred/reference.py
@@ -1,11 +1,13 @@
-import inspect
+"""Reference forecasts: climatology, persistence, uninitialized."""
 import warnings
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import pandas as pd
 import xarray as xr
 
 from .alignment import return_inits_and_verif_dates
-from .checks import has_valid_lead_units, is_xarray
+from .checks import has_valid_lead_units
+from .comparison import Comparison
 from .comparisons import (
     ALL_COMPARISONS,
     COMPARISON_ALIASES,
@@ -19,10 +21,12 @@
     DETERMINISTIC_HINDCAST_METRICS,
     METRIC_ALIASES,
     PM_METRICS,
+    Metric,
     _rename_dim,
 )
 from .options import OPTIONS
 from .utils import (
+    convert_cftime_to_datetime_coords,
     convert_time_index,
     get_comparison_class,
     get_lead_cftime_shift_args,
@@ -30,9 +34,17 @@
     shift_cftime_index,
 )
 
+metricType = Union[str, Metric]
+comparisonType = Union[str, Comparison]
+dimType = Optional[Union[str, List[str]]]
+alignmentType = str
+metric_kwargsType = Optional[Any]
 
-def _maybe_seasons_to_int(ds):
-    """set season str values or coords to int"""
+
+def _maybe_seasons_to_int(
+    ds: Union[xr.Dataset, xr.DataArray]
+) -> Union[xr.Dataset, xr.DataArray]:
+    """Set season str values or coords to int."""
     seasonal = False
     for season in ["DJF", "MAM", "JJA", "SON"]:
         if season in ds:
@@ -62,19 +74,29 @@ def _maybe_seasons_to_int(ds):
     return ds
 
 
-def persistence(verif, inits, verif_dates, lead):
+def persistence(
+    verif: xr.Dataset,
+    inits: Dict[float, xr.DataArray],
+    verif_dates: Dict[float, xr.DataArray],
+    lead: float,
+) -> Tuple[xr.Dataset, xr.Dataset]:
+    """Create forecast, verification tuple at lead for persistence forecast."""
     lforecast = verif.where(verif.time.isin(inits[lead]), drop=True)
     lverif = verif.sel(time=verif_dates[lead])
     return lforecast, lverif
 
 
-def climatology(verif, inits, verif_dates, lead):
+def climatology(
+    verif: xr.Dataset,
+    inits: Dict[float, xr.DataArray],
+    verif_dates: Dict[float, xr.DataArray],
+    lead: float,
+) -> Tuple[xr.Dataset, xr.Dataset]:
+    """Create forecast, verification tuple at lead for climatology forecast."""
     init_lead = inits[lead].copy()
     seasonality_str = OPTIONS["seasonality"]
     if seasonality_str == "weekofyear":
         # convert to datetime for weekofyear operations
-        from .utils import convert_cftime_to_datetime_coords
-
         verif = convert_cftime_to_datetime_coords(verif, "time")
         init_lead["time"] = init_lead["time"].to_index().to_datetimeindex()
         init_lead = init_lead["time"]
@@ -90,7 +112,7 @@ def climatology(verif, inits, verif_dates, lead):
         .sel(
             {
                 seasonality_str: _maybe_seasons_to_int(
-                    getattr(verif_hind_union.time.dt, seasonality_str)
+                    getattr(verif_hind_union.time.dt, seasonality_str)  # type: ignore
                 )
             },
             method="nearest",  # nearest may be a bit incorrect but doesnt error
@@ -109,9 +131,18 @@ def climatology(verif, inits, verif_dates, lead):
     return lforecast, lverif
 
 
-def uninitialized(hist, verif, verif_dates, lead):
-    """Uninitialized forecast uses a simulation without any initialization
-    (assimilation/nudging). Also called historical in some communities."""
+def uninitialized(
+    hist: xr.Dataset,
+    verif: xr.Dataset,
+    verif_dates: Dict[float, xr.DataArray],
+    lead: float,
+) -> Tuple[xr.Dataset, xr.Dataset]:
+    """
+    Create forecast, verification tuple at lead for uninitialized forecast.
+
+    Uninitialized forecast uses a simulation without any initialization
+    (assimilation/nudging). Also called historical in some communities.
+    """
     lforecast = hist.sel(time=verif_dates[lead])
     lverif = verif.sel(time=verif_dates[lead])
     return lforecast, lverif
@@ -122,8 +153,11 @@ def uninitialized(hist, verif, verif_dates, lead):
 
 
 def _adapt_member_for_reference_forecast(lforecast, lverif, metric, comparison, dim):
-    """Maybe drop member from dim or add single-member dimension. Used in
-    reference forecasts: climatology, uninitialized, persistence."""
+    """
+    Maybe drop member from dim or add single-member dimension.
+
+    Used in reference forecasts: climatology, uninitialized, persistence.
+    """
     # persistence or climatology forecasts wont have member dimension, create if
     # required
     # some metrics dont allow member dimension, remove and try mean
@@ -170,19 +204,19 @@ def compute_climatology(
     dim="init",
     **metric_kwargs,
 ):
-    """Computes the skill of a climatology forecast.
+    """Compute the skill of a climatology forecast.
 
     Args:
-        initialized (xarray.Dataset): The initialized ensemble.
-        verif (xarray.Dataset): control data, not needed
-        metric (str): Metric name to apply at each lag for the persistence computation.
+        initialized: The initialized ensemble.
+        verif: control data, not needed
+        metric: Metric name to apply at each lag for the persistence computation.
             Default: 'pearson_r'
-        dim (str or list of str): dimension to apply metric over.
-        ** metric_kwargs (dict): additional keywords to be passed to metric
+        dim: dimension to apply metric over.
+        ** metric_kwargs: additional keywords to be passed to metric
             (see the arguments required for a given metric in :ref:`Metrics`).
 
     Returns:
-        clim (xarray.Dataset): Results of climatology forecast with the input metric
+        clim: Results of climatology forecast with the input metric
             applied.
     """
     seasonality_str = OPTIONS["seasonality"]
@@ -192,11 +226,13 @@ def compute_climatology(
     has_valid_lead_units(initialized)
 
     # get metric/comparison function name, not the alias
-    metric = METRIC_ALIASES.get(metric, metric)
-    comparison = COMPARISON_ALIASES.get(comparison, comparison)
+    if isinstance(metric, str):
+        metric = METRIC_ALIASES.get(metric, metric)
+        metric = get_metric_class(metric, ALL_METRICS)
 
-    comparison = get_comparison_class(comparison, ALL_COMPARISONS)
-    metric = get_metric_class(metric, ALL_METRICS)
+    if isinstance(comparison, str):
+        comparison = COMPARISON_ALIASES.get(comparison, comparison)
+        comparison = get_comparison_class(comparison, ALL_COMPARISONS)
 
     if "iteration" in initialized.dims:
         initialized = initialized.isel(iteration=0, drop=True)
@@ -235,24 +271,27 @@ def compute_climatology(
     return clim_skill
 
 
-@is_xarray([0, 1])
 def compute_persistence(
-    initialized,
-    verif,
-    metric="pearson_r",
-    alignment="same_verifs",
-    dim="init",
-    comparison="m2o",
-    **metric_kwargs,
-):
-    """Computes the skill of a persistence forecast from a simulation.
+    initialized: xr.Dataset,
+    verif: xr.Dataset,
+    metric: metricType = "acc",
+    comparison: comparisonType = "m2o",
+    dim: dimType = "init",
+    alignment: alignmentType = "same_verifs",
+    **metric_kwargs: Any,
+) -> xr.Dataset:
+    """Compute the skill of a persistence forecast from a simulation.
+
+    This function unlike
+    :py:func:`~climpred.reference.compute_persistence_from_first_lead` is
+    not sensitive to ``comparison``. Requires
+    ``climpred.set_options(PerfectModel_persistence_from_initialized_lead_0=False)``.
 
     Args:
-        initialized (xarray.Dataset): The initialized ensemble.
-        verif (xarray.Dataset): Verification data.
-        metric (str): Metric name to apply at each lag for the persistence computation.
-            Default: 'pearson_r'
-        alignment (str): which inits or verification times should be aligned?
+        initialized: The initialized ensemble.
+        verif: Verification data.
+        metric: Metric name to apply at each lag for the persistence computation. Default: 'pearson_r'
+        alignment: which inits or verification times should be aligned?
 
             - ``maximize``: maximize the degrees of freedom by slicing
                 ``initialized`` and ``verif`` to a common time frame at each lead.
@@ -263,12 +302,12 @@ def compute_persistence(
                 prior to computing metric. This philosophy follows the thought that
                 each lead should be based on the same set of verification dates.
 
-        dim (str or list of str): dimension to apply metric over.
-        ** metric_kwargs (dict): additional keywords to be passed to metric
+        dim: dimension to apply metric over.
+        ** metric_kwargs: additional keywords to be passed to metric
             (see the arguments required for a given metric in :ref:`Metrics`).
 
     Returns:
-        pers (xarray.Dataset): Results of persistence forecast with the input metric
+        pers: Results of persistence forecast with the input metric
             applied.
 
     Reference:
@@ -277,7 +316,7 @@ def compute_persistence(
           Oxford University Press, 2007.
 
     See also:
-        * :py:func:`~climpred.reference.compute_persistence`
+        * :py:func:`~climpred.reference.compute_persistence_from_first_lead`
     """
     if isinstance(dim, str):
         dim = [dim]
@@ -289,13 +328,14 @@ def compute_persistence(
     has_valid_lead_units(initialized)
 
     # get metric/comparison function name, not the alias
-    metric = METRIC_ALIASES.get(metric, metric)
-    comparison = COMPARISON_ALIASES.get(comparison, comparison)
+    if isinstance(metric, str):
+        metric = METRIC_ALIASES.get(metric, metric)
+        metric = get_metric_class(metric, ALL_METRICS)
 
-    comparison = get_comparison_class(comparison, ALL_COMPARISONS)
+    if isinstance(metric, str):
+        comparison = COMPARISON_ALIASES.get(comparison, comparison)
+        comparison = get_comparison_class(comparison, ALL_COMPARISONS)
 
-    # get class metric(Metric)
-    metric = get_metric_class(metric, ALL_METRICS)
     # If lead 0, need to make modifications to get proper persistence, since persistence
     # at lead 0 is == 1.
     if [0] in initialized.lead.values:
@@ -334,25 +374,26 @@ def compute_persistence(
 
 
 def compute_persistence_from_first_lead(
-    initialized,
-    verif,
-    metric="pearson_r",
-    alignment="same_inits",
-    dim="init",
-    comparison="m2e",
-    **metric_kwargs,
+    initialized: xr.Dataset,
+    verif: Optional[xr.Dataset] = None,
+    metric: metricType = "pearson_r",
+    alignment: alignmentType = "same_inits",
+    dim: dimType = "init",
+    comparison: comparisonType = "m2e",
+    **metric_kwargs: metric_kwargsType,
 ):
-    """Computes the skill of a persistence forecast based on the first lead available
-    in the initialized dataset. This function unlike ``compute_persistence`` is
+    """Compute persistence skill based on first ``lead`` in ``initialized``.
+
+    This function unlike :py:func:`~climpred.reference.compute_persistence` is
     sensitive to ``comparison``. Requires
     ``climpred.set_options(PerfectModel_persistence_from_initialized_lead_0=True)``.
 
     Args:
-        initialized (xarray.Dataset): The initialized ensemble.
-        verif (xarray.Dataset): Verification data. Not used.
-        metric (str): Metric name to apply at each lag for the persistence computation.
+        initialized: The initialized ensemble.
+        verif: Verification data. Not used.
+        metric: Metric name to apply at each lag for the persistence computation.
             Default: 'pearson_r'
-        alignment (str): which inits or verification times should be aligned?
+        alignment: which inits or verification times should be aligned?
 
             - ``maximize``: maximize the degrees of freedom by slicing ``initialized``
                 and ``verif`` to a common time frame at each lead.
@@ -363,12 +404,12 @@ def compute_persistence_from_first_lead(
                 prior to computing metric. This philosophy follows the thought that
                 each lead should be based on the same set of verification dates.
 
-        dim (str or list of str): dimension to apply metric over.
-        ** metric_kwargs (dict): additional keywords to be passed to metric
+        dim: dimension to apply metric over.
+        ** metric_kwargs: additional keywords to be passed to metric
             (see the arguments required for a given metric in :ref:`Metrics`).
 
     Returns:
-        pers (xarray.Dataset): Results of persistence forecast with the input metric
+        pers: Results of persistence forecast with the input metric
             applied.
 
     Example:
@@ -447,11 +488,12 @@ def compute_persistence_from_first_lead(
     has_valid_lead_units(initialized)
 
     # get metric/comparison function name, not the alias
-    metric = METRIC_ALIASES.get(metric, metric)
-    comparison = COMPARISON_ALIASES.get(comparison, comparison)
-
-    comparison = get_comparison_class(comparison, ALL_COMPARISONS)
-    metric = get_metric_class(metric, ALL_METRICS)
+    if isinstance(metric, str):
+        metric = METRIC_ALIASES.get(metric, metric)
+        metric = get_metric_class(metric, ALL_METRICS)
+    if isinstance(comparison, str):
+        comparison = COMPARISON_ALIASES.get(comparison, comparison)
+        comparison = get_comparison_class(comparison, ALL_COMPARISONS)
 
     forecast, observations = comparison.function(initialized, metric=metric)
     forecast, dim = _adapt_member_for_reference_forecast(
@@ -465,16 +507,15 @@ def compute_persistence_from_first_lead(
     return persistence_skill
 
 
-@is_xarray([0, 1])
 def compute_uninitialized(
-    initialized,
-    uninit,
-    verif,
-    metric="pearson_r",
-    comparison="e2o",
-    dim="time",
-    alignment="same_verifs",
-    **metric_kwargs,
+    initialized: xr.Dataset,
+    uninit: xr.Dataset,
+    verif: xr.Dataset,
+    metric: metricType = "pearson_r",
+    comparison: comparisonType = "e2o",
+    dim: dimType = "time",
+    alignment: alignmentType = "same_verifs",
+    **metric_kwargs: metric_kwargsType,
 ):
     """Verify an uninitialized ensemble against verification data.
 
@@ -483,33 +524,35 @@ def compute_uninitialized(
         first lag and then projected out to any further lags being analyzed.
 
     Args:
-        initialized (xarray.Dataset): Initialized ensemble.
-        uninit (xarray.Dataset): Uninitialized ensemble.
-        verif (xarray.Dataset): Verification data with some temporal overlap with the
+        initialized: Initialized ensemble.
+        uninit: Uninitialized ensemble.
+        verif: Verification data with some temporal overlap with the
             uninitialized ensemble.
-        metric (str):
+        metric:
             Metric used in comparing the uninitialized ensemble with the verification
             data.
-        comparison (str):
+        comparison:
             How to compare the uninitialized ensemble to the verification data:
                 * `"e2o"` : ensemble mean to verification data (Default)
                 * `"m2o"` : each member to the verification data
-        dim (str or list of str): dimension to apply metric over.
-        alignment (str): which inits or verification times should be aligned?
-
-            - ``maximize``: maximize the degrees of freedom by slicing ``initialized``
-                and ``verif`` to a common time frame at each lead.
-            - ``same_inits``: slice to a common init frame prior to computing
-            metric. This philosophy follows the thought that each lead should be based
-            on the same set of initializations.
-            - ``same_verif``: slice to a common/consistent verification time frame
-                prior to computing metric. This philosophy follows the thought that
-                each lead should be based on the same set of verification dates.
-
-        ** metric_kwargs (dict): additional keywords to be passed to metric
+        dim: dimension to apply metric over.
+        alignment: which inits or verification times should be aligned?
+
+            - ``"maximize"``: maximize the degrees of freedom by slicing
+                ``initialized`` and ``verif`` to a common time frame at each
+                lead.
+            - ``"same_inits"``: slice to a common init frame prior to computing
+                metric. This philosophy follows the thought that each lead
+                should be based on the same set of initializations.
+            - ``same_verif``: slice to a common/consistent verification time
+                frame prior to computing metric. This philosophy follows the
+                thought that each lead should be based on the same set of
+                verification dates.
+
+        ** metric_kwargs: additional keywords to be passed to metric
 
     Returns:
-        uninit_skill (xarray.Dataset): Results from comparison at the first lag.
+        uninit_skill: Results from comparison at the first lag.
 
     """
     if isinstance(dim, str):
@@ -521,11 +564,13 @@ def compute_uninitialized(
     has_valid_lead_units(initialized)
 
     # get metric/comparison function name, not the alias
-    metric = METRIC_ALIASES.get(metric, metric)
-    comparison = COMPARISON_ALIASES.get(comparison, comparison)
+    if isinstance(metric, str):
+        metric = METRIC_ALIASES.get(metric, metric)
+        metric = get_metric_class(metric, DETERMINISTIC_HINDCAST_METRICS)
+    if isinstance(comparison, str):
+        comparison = COMPARISON_ALIASES.get(comparison, comparison)
+        comparison = get_comparison_class(comparison, HINDCAST_COMPARISONS)
 
-    comparison = get_comparison_class(comparison, HINDCAST_COMPARISONS)
-    metric = get_metric_class(metric, DETERMINISTIC_HINDCAST_METRICS)
     forecast, verif = comparison.function(uninit, verif, metric=metric)
 
     initialized = initialized.rename({"init": "time"})
diff --git a/climpred/smoothing.py b/climpred/smoothing.py
index cc180b2ca..efe35db57 100644
--- a/climpred/smoothing.py
+++ b/climpred/smoothing.py
@@ -5,8 +5,6 @@
 import numpy as np
 import xarray as xr
 
-from .checks import is_xarray
-
 try:
     import xesmf as xe
 except ImportError:
diff --git a/climpred/stats.py b/climpred/stats.py
index ea5c62658..654f92516 100644
--- a/climpred/stats.py
+++ b/climpred/stats.py
@@ -1,4 +1,7 @@
+"""Statistical functions to diagnose potential predictability due to variability."""
+
 import warnings
+from typing import Any, Dict, List, Union
 
 import numpy as np
 import xarray as xr
@@ -8,11 +11,12 @@
     from xrft import power_spectrum
 except ImportError:
     power_spectrum = None
-from .checks import is_xarray
 
 
-def rm_poly(ds, dim="time", deg=2, **kwargs):
-    """Remove degree polynomial along dimension dim from ds."""
+def rm_poly(
+    ds: Union[xr.Dataset, xr.DataArray], dim: str = "time", deg: int = 2, **kwargs: Any
+) -> Union[xr.Dataset, xr.DataArray]:
+    """Remove degree polynomial of degree ``deg`` along dimension ``dim``."""
     coefficients = ds.polyfit(dim, deg=deg, **kwargs)
     coord = ds[dim]
     fits = []
@@ -30,22 +34,25 @@ def rm_poly(ds, dim="time", deg=2, **kwargs):
     return ds_rm_poly
 
 
-def rm_trend(ds, dim="time", **kwargs):
-    """Remove degree polynomial along dimension dim from ds."""
+def rm_trend(
+    ds: Union[xr.Dataset, xr.DataArray], dim: str = "time", **kwargs: Any
+) -> Union[xr.Dataset, xr.DataArray]:
+    """Remove degree polynomial along dimension ``dim``."""
     return rm_poly(ds, dim=dim, deg=1, **kwargs)
 
 
-@is_xarray(0)
-def decorrelation_time(da, iterations=20, dim="time"):
-    """Calculate the decorrelaton time of a time series.
+def decorrelation_time(
+    da: Union[xr.Dataset, xr.DataArray], iterations: int = 20, dim: str = "time"
+) -> Union[xr.Dataset, xr.DataArray]:
+    r"""Calculate the decorrelaton time of a time series.
 
     .. math::
         \\tau_{d} = 1 + 2 * \\sum_{k=1}^{r}(\\alpha_{k})^{k}
 
     Args:
-        da (xarray object): input.
-        iterations (optional int): Number of iterations to run the above formula.
-        dim (optional str): Time dimension for xarray object.
+        da: input.
+        iterations: Number of iterations to run the above formula.
+        dim: Time dimension for xarray object.
 
     Returns:
         Decorrelation time of time series.
@@ -58,7 +65,7 @@ def decorrelation_time(da, iterations=20, dim="time"):
     """
 
     def _lag_corr(x, y, dim, lead):
-        """Helper function to shift the two time series and correlate."""
+        """Help function to shift the two time series and correlate."""
         N = x[dim].size
         normal = x.isel({dim: slice(0, N - lead)})
         shifted = y.isel({dim: slice(0 + lead, N)})
@@ -73,8 +80,13 @@ def _lag_corr(x, y, dim, lead):
     ).sum("it")
 
 
-def dpp(ds, dim="time", m=10, chunk=True):
-    """Calculates the Diagnostic Potential Predictability (dpp)
+def dpp(
+    ds: Union[xr.Dataset, xr.DataArray],
+    dim: str = "time",
+    m: int = 10,
+    chunk: bool = True,
+) -> Union[xr.Dataset, xr.DataArray]:
+    r"""Calculate the Diagnostic Potential Predictability (DPP).
 
     .. math::
 
@@ -86,15 +98,15 @@ def dpp(ds, dim="time", m=10, chunk=True):
         in a slightly different way: chunk=False.
 
     Args:
-        ds (xr.DataArray): control simulation with time dimension as years.
-        dim (str): dimension to apply DPP on. Default: time.
-        m (optional int): separation time scale in years between predictable
-                          low-freq component and high-freq noise.
-        chunk (optional boolean): Whether chunking is applied. Default: True.
-                    If False, then uses Resplandy 2015 / Seferian 2018 method.
+        ds: control simulation with time dimension as years.
+        dim: dimension to apply DPP on. Default: ``"time"``.
+        m: separation time scale in years between predictable
+            low-freq component and high-freq noise.
+        chunk: Whether chunking is applied. Default: True.
+            If False, then uses Resplandy 2015 / Seferian 2018 method.
 
     Returns:
-        dpp (xr.DataArray): ds without time dimension.
+        dpp: ds without time dimension.
 
     References:
         * Boer, G. J. “Long Time-Scale Potential Predictability in an Ensemble of
@@ -110,7 +122,12 @@ def dpp(ds, dim="time", m=10, chunk=True):
 
     """
 
-    def _chunking(ds, dim="time", number_chunks=False, chunk_length=False):
+    def _chunking(
+        ds: Union[xr.Dataset, xr.DataArray],
+        dim: str = "time",
+        number_chunks: Union[bool, int] = False,
+        chunk_length: Union[bool, int] = False,
+    ) -> Union[xr.Dataset, xr.DataArray]:
         """
         Separate data into chunks and reshapes chunks in a c dimension.
 
@@ -118,13 +135,13 @@ def _chunking(ds, dim="time", number_chunks=False, chunk_length=False):
         Needed for dpp.
 
         Args:
-            ds (xr.DataArray): control simulation with time dimension as years.
-            dim (str): dimension to apply chunking to. Default: time
-            chunk_length (int): see dpp(m)
-            number_chunks (int): number of chunks in the return data.
+            ds: control simulation with time dimension as years.
+            dim: dimension to apply chunking to. Default: time
+            chunk_length: see dpp(m)
+            number_chunks: number of chunks in the return data.
 
         Returns:
-            c (xr.DataArray): chunked ds, but with additional dimension c.
+            c: chunked ds, but with additional dimension c.
 
         """
         if number_chunks and not chunk_length:
@@ -164,17 +181,19 @@ def _chunking(ds, dim="time", number_chunks=False, chunk_length=False):
     return dpp
 
 
-@is_xarray(0)
-def varweighted_mean_period(da, dim="time", **kwargs):
-    """Calculate the variance weighted mean period of time series based on
-    xrft.power_spectrum.
+def varweighted_mean_period(
+    da: Union[xr.Dataset, xr.DataArray],
+    dim: Union[str, List[str]] = "time",
+    **kwargs: Any,
+) -> Union[xr.Dataset, xr.DataArray]:
+    r"""Calculate the variance weighted mean period of time series.
 
     .. math::
         P_{x} = \\frac{\\sum_k V(f_k,x)}{\\sum_k f_k  \\cdot V(f_k,x)}
 
     Args:
-        da (xarray object): input data including dim.
-        dim (optional str): Name of time dimension.
+        da: input data including dim.
+        dim: Name of time dimension.
         for **kwargs see xrft.power_spectrum
 
     Reference:
diff --git a/climpred/utils.py b/climpred/utils.py
index 03fdd51d2..6b96888af 100644
--- a/climpred/utils.py
+++ b/climpred/utils.py
@@ -1,6 +1,9 @@
+"""Utility functions used by other modules."""
+
 import datetime
 import logging
 import warnings
+from typing import List, Union
 
 import cftime
 import dask
@@ -11,14 +14,16 @@
 
 from . import comparisons, metrics
 from .checks import is_in_list
+from .comparison import Comparison
 from .comparisons import COMPARISON_ALIASES
 from .constants import FREQ_LIST_TO_INFER_STRIDE, HINDCAST_CALENDAR_STR
 from .exceptions import CoordinateError
-from .metrics import ALL_METRICS, METRIC_ALIASES
+from .metrics import ALL_METRICS, METRIC_ALIASES, Metric
 from .options import OPTIONS
 
 
 def add_attrs_to_climpred_coords(results):
+    """Write attrs for coords added by climpred."""
     from . import __version__ as version
 
     if "results" in results.coords:
@@ -149,10 +154,10 @@ def assign_attrs(
 def convert_time_index(
     xobj, time_string, kind="object", calendar=HINDCAST_CALENDAR_STR
 ):
-    """Converts incoming time index to a standard xr.CFTimeIndex.
+    """Convert incoming time index to a :py:class:`~xarray.CFTimeIndex`.
 
     Args:
-        xobj (xarray object): Dataset or DataArray with a time dimension to convert.
+        xobj (xarray.Dataset): with a time dimension to convert.
         time_string (str): Name of time dimension.
         kind (str): Kind of object for error message.
         calendar (str): calendar to set time dimension to.
@@ -227,9 +232,12 @@ def convert_cftime_to_datetime_coords(ds, dim):
 
 
 def find_start_dates_for_given_init(control, single_init):
-    """Find the same start dates for cftime single_init across different years in
-    control. Return control.time. Requires calendar=Datetime(No)Leap for consistent
-    `dayofyear`."""
+    """
+    Find same start dates for cftime single_init in different years in control.
+
+    Return control.time. Requires calendar=Datetime(No)Leap for consistent
+    `dayofyear`.
+    """
     # check that Leap or NoLeap calendar
     for dim in [single_init.init, control.time]:
         # dirty workaround .values requires a dimension but single_init is only a
@@ -251,11 +259,13 @@ def find_start_dates_for_given_init(control, single_init):
 
 
 def return_time_series_freq(ds, dim):
-    """Return the temporal frequency of the input time series. Finds the frequency
-    starting from high frequencies at which all ds.dim are not equal.
+    """Return the temporal frequency of the input time series.
+
+    Finds the frequency starting from high frequencies at which all ds.dim are
+    not equal.
 
     Args:
-        ds (xr.object): input with dimension `dim`.
+        ds (xr.Dataset): input with dimension `dim`.
         dim (str): name of dimension.
 
     Returns:
@@ -270,10 +280,9 @@ def return_time_series_freq(ds, dim):
             return freq
 
 
-def get_metric_class(metric, list_):
+def get_metric_class(metric: Union[str, Metric], list_: List) -> Metric:
     """
-    This allows the user to submit a string representing the desired metric
-    to the corresponding metric class.
+    Convert string representing the desired metric to corresponding metric class.
 
     Currently compatable with functions:
     * compute_persistence()
@@ -281,11 +290,11 @@ def get_metric_class(metric, list_):
     * compute_hindcast()
 
     Args:
-        metric (str): name of metric.
-        list_ (list): check whether metric in list
+        metric: name of metric.
+        list_: check whether metric in list
 
     Returns:
-        metric (Metric): class object of the metric.
+        class object of the metric.
     """
     if isinstance(metric, metrics.Metric):
         return metric
@@ -293,17 +302,16 @@ def get_metric_class(metric, list_):
         # check if metric allowed
         is_in_list(metric, list_, "metric")
         metric = METRIC_ALIASES.get(metric, metric)
-        return getattr(metrics, "__" + metric)
+        return getattr(metrics, f"__{metric}")
     else:
         raise ValueError(
             f"Please provide metric as str or Metric class, found {type(metric)}"
         )
 
 
-def get_comparison_class(comparison, list_):
+def get_comparison_class(comparison: Union[str, Comparison], list_: List) -> Comparison:
     """
-    Converts a string comparison entry from the user into a Comparison class
-     for the package to interpret.
+    Convert string comparison entry into a Comparison class.
 
     Perfect Model:
 
@@ -318,10 +326,10 @@ def get_comparison_class(comparison, list_):
         * m2o: Compare each ensemble member to the verification data.
 
     Args:
-        comparison (str): name of comparison.
+        comparison: name of comparison.
 
     Returns:
-        comparison (Comparison): comparison class.
+        comparison: comparison class.
 
     """
     if isinstance(comparison, comparisons.Comparison):
@@ -337,8 +345,8 @@ def get_comparison_class(comparison, list_):
 
 
 def get_lead_cftime_shift_args(units, lead):
-    """Determines the date increment to use when adding the lead time to init time based
-    on the units attribute.
+    """
+    Determine date increment when adding lead to init based on units attribute.
 
     Args:
         units (str): Units associated with the lead dimension. Must be
@@ -376,8 +384,7 @@ def get_lead_cftime_shift_args(units, lead):
 
 
 def get_multiple_lead_cftime_shift_args(units, leads):
-    """Returns ``CFTimeIndex.shift()`` offset increment for an arbitrary number of
-    leads.
+    """Return ``CFTimeIndex.shift()`` offset for arbitrary number of leads.
 
     Args:
         units (str): Units associated with the lead dimension. Must be one of
@@ -396,20 +403,18 @@ def get_multiple_lead_cftime_shift_args(units, leads):
 
 
 def intersect(lst1, lst2):
-    """
-    Custom intersection, since `set.intersection()` changes type of list.
-    """
+    """Return custom intersection as `set.intersection()` changes type."""
     lst3 = [value for value in lst1 if value in lst2]
     return np.array(lst3)
 
 
 def lead_units_equal_control_time_stride(init, verif):
-    """Check that the lead units of the initialized ensemble have the same frequency as
-    the control stride.
+    """
+    Check that lead units of initialized has same frequency as control stride.
 
     Args:
-        init (xr.object): initialized ensemble with lead units.
-        verif (xr.object): control, uninitialized historical simulation / observations.
+        init (xr.Dataset): initialized ensemble with lead units.
+        verif (xr.Dataset): control, uninitialized historical simulation / observations.
 
     Returns:
         bool: Possible to continue or raise warning.
@@ -447,8 +452,8 @@ def rechunk_to_single_chunk_if_more_than_one_chunk_along_dim(ds, dim):
 
 
 def shift_cftime_index(xobj, time_string, n, freq):
-    """Shifts a ``CFTimeIndex`` over a specified number of time steps at a given
-    temporal frequency.
+    """
+    Shift a ``CFTimeIndex`` over n time steps at a given temporal frequency.
 
     This leverages the handy ``.shift()`` method from ``xarray.CFTimeIndex``. It's a
     simple call, but is used throughout ``climpred`` so it is documented here clearly
@@ -492,9 +497,12 @@ def shift_cftime_singular(cftime, n, freq):
 
 
 def _transpose_and_rechunk_to(new_chunk_ds, ori_chunk_ds):
-    """Chunk xr.object `new_chunk_ds` as another xr.object `ori_chunk_ds`.
+    """
+    Assume same chunks and dimension order.
+
     This is needed after some operations which reduce chunks to size 1.
-    First transpose a to ds.dims then apply ds chunking to a."""
+    First transpose a to ds.dims then apply ds chunking to a.
+    """
     transpose_kwargs = (
         {"transpose_coords": False} if isinstance(new_chunk_ds, xr.DataArray) else {}
     )
@@ -504,7 +512,11 @@ def _transpose_and_rechunk_to(new_chunk_ds, ori_chunk_ds):
 
 
 def convert_Timedelta_to_lead_units(ds):
-    """Convert lead as pd.Timedelta to lead as int and corresponding lead.attrs['units'] and convert to longest integer lead unit possible."""
+    """
+    Convert lead as pd.Timedelta to int and corresponding lead.attrs['units'].
+
+    Converts to longest integer lead unit possible.
+    """
     if ds["lead"].dtype == "<m8[ns]":
         ds["lead"] = (ds.lead * 1e-9).astype(int)
         ds["lead"].attrs["units"] = "seconds"
@@ -528,8 +540,12 @@ def convert_Timedelta_to_lead_units(ds):
 
 
 def broadcast_time_grouped_to_time(forecast, category_edges, dim):
-    """Broadcast time.groupby('time.month/dayofyear/weekofyear').mean() from
-    category_edges back to dim matching forecast."""
+    """
+    Help function for rps metric.
+
+    Broadcast time.groupby('time.month/dayofyear/weekofyear').mean() from
+    category_edges back to dim matching forecast.
+    """
     category_edges_time_dim = [
         d
         for d in category_edges.dims
@@ -550,7 +566,11 @@ def broadcast_time_grouped_to_time(forecast, category_edges, dim):
 
 
 def broadcast_metric_kwargs_for_rps(forecast, verif, metric_kwargs):
-    """Apply broadcast_time_grouped_to_time to category_edges in metric_kwargs."""
+    """
+    Help function for rps metric.
+
+    Apply broadcast_time_grouped_to_time to category_edges in metric_kwargs.
+    """
     category_edges = metric_kwargs.get("category_edges", None)
     logging.debug("enter climpred.utils.broadcast_metric_kwargs_for_rps")
     if category_edges is not None:
@@ -572,7 +592,8 @@ def broadcast_metric_kwargs_for_rps(forecast, verif, metric_kwargs):
             logging.debug("category_edges is np.array")
         else:
             raise ValueError(
-                f"excepted category edges as tuple, xr.Dataset or np.array, found {type(category_edges)}"
+                "excepted category edges as tuple, xr.Dataset or np.array, "
+                f"found {type(category_edges)}"
             )
         return metric_kwargs
 

From 47aa5eac1f773940c898bf88240eba0340b63c23 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Thu, 9 Dec 2021 22:11:56 +0100
Subject: [PATCH 08/56] fix comparison

---
 climpred/reference.py         |   8 +--
 climpred/tests/test_checks.py | 108 +---------------------------------
 climpred/utils.py             |   7 +--
 3 files changed, 9 insertions(+), 114 deletions(-)

diff --git a/climpred/reference.py b/climpred/reference.py
index 55ce33795..3225f707a 100644
--- a/climpred/reference.py
+++ b/climpred/reference.py
@@ -7,12 +7,12 @@
 
 from .alignment import return_inits_and_verif_dates
 from .checks import has_valid_lead_units
-from .comparison import Comparison
 from .comparisons import (
     ALL_COMPARISONS,
     COMPARISON_ALIASES,
     HINDCAST_COMPARISONS,
     PM_COMPARISONS,
+    Comparison,
     __e2c,
 )
 from .constants import CLIMPRED_DIMS, M2M_MEMBER_DIM
@@ -332,7 +332,7 @@ def compute_persistence(
         metric = METRIC_ALIASES.get(metric, metric)
         metric = get_metric_class(metric, ALL_METRICS)
 
-    if isinstance(metric, str):
+    if isinstance(comparison, str):
         comparison = COMPARISON_ALIASES.get(comparison, comparison)
         comparison = get_comparison_class(comparison, ALL_COMPARISONS)
 
@@ -495,7 +495,7 @@ def compute_persistence_from_first_lead(
         comparison = COMPARISON_ALIASES.get(comparison, comparison)
         comparison = get_comparison_class(comparison, ALL_COMPARISONS)
 
-    forecast, observations = comparison.function(initialized, metric=metric)
+    forecast, observations = comparison.function(initialized, metric=metric)  # type: ignore
     forecast, dim = _adapt_member_for_reference_forecast(
         forecast, observations, metric, comparison, dim
     )
@@ -571,7 +571,7 @@ def compute_uninitialized(
         comparison = COMPARISON_ALIASES.get(comparison, comparison)
         comparison = get_comparison_class(comparison, HINDCAST_COMPARISONS)
 
-    forecast, verif = comparison.function(uninit, verif, metric=metric)
+    forecast, verif = comparison.function(uninit, verif)
 
     initialized = initialized.rename({"init": "time"})
 
diff --git a/climpred/tests/test_checks.py b/climpred/tests/test_checks.py
index ef1162882..0d4b32822 100644
--- a/climpred/tests/test_checks.py
+++ b/climpred/tests/test_checks.py
@@ -1,3 +1,5 @@
+"""Testing checks.py."""
+
 import numpy as np
 import pytest
 
@@ -7,7 +9,6 @@
     has_min_len,
     has_valid_lead_units,
     is_in_list,
-    is_xarray,
     match_initialized_dims,
     match_initialized_vars,
 )
@@ -15,111 +16,6 @@
 from climpred.exceptions import DatasetError, DimensionError, VariableError
 
 
-@is_xarray(0)
-def _arbitrary_ds_da_func(ds_da, *args, **kwargs):
-    """Function for testing if checking the first item in arg list is ds/da."""
-    return ds_da, args, kwargs
-
-
-@is_xarray([0, 1])
-def _arbitrary_two_xr_func(ds_da1, ds_da2, *args, **kwargs):
-    """Function for testing if checking the first two items in arg list is ds/da."""
-    return ds_da1, ds_da2, args, kwargs
-
-
-@is_xarray([0, 2])
-def _arbitrary_two_xr_func_random_loc(ds_da1, some_arg, ds_da2, **kwargs):
-    """Function for testing if checking the 1st and 3rd item in arg list is ds/da."""
-    return ds_da1, some_arg, ds_da2, kwargs
-
-
-@is_xarray([0, "da", "other_da"])
-def _arbitrary_three_xr_func_args_keys(ds, da=None, other_da=None, **kwargs):
-    """Function for testing if checking the first in arg list and the
-    keywords da/other_da is ds/da."""
-    return ds, da, other_da, kwargs
-
-
-def test_is_xarray_ds(ds1):
-    """Test if checking the first item in arg list is ds."""
-    ds, args, kwargs = _arbitrary_ds_da_func(ds1, "arg1", "arg2", kwarg1="kwarg1")
-    assert (ds1 == ds).all()
-    assert args == ("arg1", "arg2")
-    assert kwargs == {"kwarg1": "kwarg1"}
-
-
-def test_is_xarray_not_ds():
-    """Test if checking the first item in arg list is not a ds/da, raise an error."""
-    not_a_ds = "not_a_ds"
-    with pytest.raises(IOError) as e:
-        _arbitrary_ds_da_func(not_a_ds, "arg1", "arg2", kwarg1="kwarg1")
-    assert "The input data is not an xarray" in str(e.value)
-
-
-def test_is_xarray_da(da1):
-    """Test if checking the first item in arg list is da."""
-    da, args, kwargs = _arbitrary_ds_da_func(da1, "arg1", "arg2", kwarg1="kwarg1")
-    assert (da1 == da).all()
-    assert args == ("arg1", "arg2")
-    assert kwargs == {"kwarg1": "kwarg1"}
-
-
-def test_is_xarray_ds_da(ds1, da1):
-    """Test if checking the first two items in arg list is ds/da."""
-    ds, da, args, kwargs = _arbitrary_two_xr_func(
-        ds1, da1, "arg1", kwarg1="kwarg1", kwarg2="kwarg2"
-    )
-    assert (ds1 == ds).all()
-    assert (da1 == da).all()
-    assert args == ("arg1",)
-    assert kwargs == {"kwarg1": "kwarg1", "kwarg2": "kwarg2"}
-
-
-def test_is_xarray_ds_da_random_loc(ds1, da1):
-    """Test if checking the first and third items in arg list is ds/da."""
-    ds, arg, da, kwargs = _arbitrary_two_xr_func_random_loc(
-        ds1, "arg1", da1, kwarg1="kwarg1", kwarg2="kwarg2"
-    )
-    assert (ds1 == ds).all()
-    assert (da1 == da).all()
-    assert arg == "arg1"
-    assert kwargs == {"kwarg1": "kwarg1", "kwarg2": "kwarg2"}
-
-
-def test_is_xarray_ds_da_args_keys(ds1, da1, da2):
-    """Test if checking the args and kwargs are ds/da."""
-    ds, da, other_da, kwargs = _arbitrary_three_xr_func_args_keys(
-        ds1, da=da1, other_da=da2, kwarg1="kwarg1"
-    )
-    assert (ds1 == ds).all()
-    assert (da1 == da).all()
-    assert (da2 == other_da).all()
-    assert kwargs == {"kwarg1": "kwarg1"}
-
-
-def test_is_xarray_ds_da_args_keys_not(ds1, da2):
-    """Test if checking the args and kwargs are not ds/da, it raises an error."""
-    not_a_da = np.array([0, 1, 2])
-    with pytest.raises(IOError) as e:
-        _arbitrary_three_xr_func_args_keys(
-            ds1, da=not_a_da, other_da=da2, kwarg1="kwarg1"
-        )
-    assert "The input data is not an xarray" in str(e.value)
-
-
-class _ArbitraryClass:
-    @is_xarray(1)
-    def __init__(self, xobj):
-        pass
-
-
-def test_is_xarray_class_not():
-    """Function for testing if checking class init is ds/da, it raises an error."""
-    with pytest.raises(IOError) as e:
-        _ArbitraryClass("totally not a ds")
-    assert "The input data is not an xarray" in str(e.value)
-
-
 def test_has_dims_str(da1):
     """Test if check works for a string."""
     assert has_dims(da1, "x", "arbitrary")
diff --git a/climpred/utils.py b/climpred/utils.py
index 6b96888af..84a24ec62 100644
--- a/climpred/utils.py
+++ b/climpred/utils.py
@@ -14,8 +14,7 @@
 
 from . import comparisons, metrics
 from .checks import is_in_list
-from .comparison import Comparison
-from .comparisons import COMPARISON_ALIASES
+from .comparisons import COMPARISON_ALIASES, Comparison
 from .constants import FREQ_LIST_TO_INFER_STRIDE, HINDCAST_CALENDAR_STR
 from .exceptions import CoordinateError
 from .metrics import ALL_METRICS, METRIC_ALIASES, Metric
@@ -338,10 +337,10 @@ def get_comparison_class(comparison: Union[str, Comparison], list_: List) -> Com
         # check if comparison allowed
         is_in_list(comparison, list_, "comparison")
         comparison = COMPARISON_ALIASES.get(comparison, comparison)
-        return getattr(comparisons, "__" + comparison)
+        return getattr(comparisons, f"__{comparison}")
     else:
         is_in_list(comparison, list_, "comparison")
-        return getattr(comparisons, "__" + comparison)
+        return getattr(comparisons, f"__{comparison}")
 
 
 def get_lead_cftime_shift_args(units, lead):

From a5ae204665a6c3dad959dbaeb2c0bd2a2af234dd Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Thu, 9 Dec 2021 22:17:54 +0100
Subject: [PATCH 09/56] fix tutorial

---
 climpred/tutorial.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/climpred/tutorial.py b/climpred/tutorial.py
index c4f67a623..af3f11311 100644
--- a/climpred/tutorial.py
+++ b/climpred/tutorial.py
@@ -3,7 +3,6 @@
 import hashlib
 import os as _os
 import urllib
-from os import PathLike
 from typing import Optional, Union
 from urllib.request import urlretrieve as _urlretrieve
 
@@ -123,7 +122,7 @@ def _initialize_proxy(proxy_dict):
 def load_dataset(
     name: Optional[str] = None,
     cache: bool = True,
-    cache_dir: Union[str, PathLike[str]] = _default_cache_dir,
+    cache_dir: str = _default_cache_dir,
     github_url: str = "https://github.com/pangeo-data/climpred-data",
     branch: str = "master",
     extension: Optional[str] = None,

From 64182c04c47490e5e62ddba0151c9152444e982f Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Thu, 9 Dec 2021 23:32:44 +0100
Subject: [PATCH 10/56] add PredictionEnsemble to API

---
 .pre-commit-config.yaml                       |   2 +-
 climpred/classes.py                           | 126 ++++++++----------
 climpred/reference.py                         |  42 +++---
 climpred/stats.py                             |  10 +-
 docs/source/api.rst                           |  93 ++++++++++---
 ...red.classes.PredictionEnsemble.__add__.rst |   6 +
 ...lasses.PredictionEnsemble.__contains__.rst |   6 +
 ...classes.PredictionEnsemble.__delitem__.rst |   6 +
 ...classes.PredictionEnsemble.__getattr__.rst |   6 +
 ...classes.PredictionEnsemble.__getitem__.rst |   6 +
 ...ed.classes.PredictionEnsemble.__init__.rst |   6 +
 ...ed.classes.PredictionEnsemble.__iter__.rst |   6 +
 ...red.classes.PredictionEnsemble.__len__.rst |   6 +
 ...red.classes.PredictionEnsemble.__mul__.rst |   6 +
 ...red.classes.PredictionEnsemble.__sub__.rst |   6 +
 ...classes.PredictionEnsemble.__truediv__.rst |   6 +
 ...pred.classes.PredictionEnsemble.chunks.rst |   6 +
 ....classes.PredictionEnsemble.chunksizes.rst |   6 +
 ...pred.classes.PredictionEnsemble.coords.rst |   6 +
 ...d.classes.PredictionEnsemble.data_vars.rst |   6 +
 ...impred.classes.PredictionEnsemble.dims.rst |   6 +
 ...pred.classes.PredictionEnsemble.equals.rst |   6 +
 ...d.classes.PredictionEnsemble.identical.rst |   6 +
 ...pred.classes.PredictionEnsemble.nbytes.rst |   6 +
 ...mpred.classes.PredictionEnsemble.sizes.rst |   6 +
 25 files changed, 279 insertions(+), 114 deletions(-)
 create mode 100644 docs/source/api/climpred.classes.PredictionEnsemble.__add__.rst
 create mode 100644 docs/source/api/climpred.classes.PredictionEnsemble.__contains__.rst
 create mode 100644 docs/source/api/climpred.classes.PredictionEnsemble.__delitem__.rst
 create mode 100644 docs/source/api/climpred.classes.PredictionEnsemble.__getattr__.rst
 create mode 100644 docs/source/api/climpred.classes.PredictionEnsemble.__getitem__.rst
 create mode 100644 docs/source/api/climpred.classes.PredictionEnsemble.__init__.rst
 create mode 100644 docs/source/api/climpred.classes.PredictionEnsemble.__iter__.rst
 create mode 100644 docs/source/api/climpred.classes.PredictionEnsemble.__len__.rst
 create mode 100644 docs/source/api/climpred.classes.PredictionEnsemble.__mul__.rst
 create mode 100644 docs/source/api/climpred.classes.PredictionEnsemble.__sub__.rst
 create mode 100644 docs/source/api/climpred.classes.PredictionEnsemble.__truediv__.rst
 create mode 100644 docs/source/api/climpred.classes.PredictionEnsemble.chunks.rst
 create mode 100644 docs/source/api/climpred.classes.PredictionEnsemble.chunksizes.rst
 create mode 100644 docs/source/api/climpred.classes.PredictionEnsemble.coords.rst
 create mode 100644 docs/source/api/climpred.classes.PredictionEnsemble.data_vars.rst
 create mode 100644 docs/source/api/climpred.classes.PredictionEnsemble.dims.rst
 create mode 100644 docs/source/api/climpred.classes.PredictionEnsemble.equals.rst
 create mode 100644 docs/source/api/climpred.classes.PredictionEnsemble.identical.rst
 create mode 100644 docs/source/api/climpred.classes.PredictionEnsemble.nbytes.rst
 create mode 100644 docs/source/api/climpred.classes.PredictionEnsemble.sizes.rst

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0bac364c5..e2936649d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -50,7 +50,7 @@ repos:
       hooks:
       -   id: pydocstyle
       args:
-      #- --ignore=W503
+      - --ignore=D301
       - --max-line-length=93
       - --convention google # https://google.github.io/styleguide/pyguide.html#Comments
 
diff --git a/climpred/classes.py b/climpred/classes.py
index bd2ec8957..400aa1c84 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -760,7 +760,7 @@ def smooth(
         how: str = "mean",
         **xesmf_kwargs: str,
     ):
-        """Smooth in space/aggregate in time ``PredictionEnsemble``.
+        """Smooth in space and/or aggregate in time ``PredictionEnsemble``.
 
         Args:
             smooth_kws: Dictionary to specify the dims to
@@ -769,8 +769,8 @@ def smooth(
                 :py:func:`~climpred.smoothing.temporal_smoothing`.
                 Shortcut for Goddard et al. 2013 recommendations:
                 'goddard2013'. Defaults to None.
-            how: how to smooth temporally. From ['mean','sum']. Defaults to
-                'mean'.
+            how: how to smooth temporally. From Choose from ``["mean", "sum"]``.
+                Defaults to ``"mean"``.
             **xesmf_kwargs: kwargs passed to
                 :py:func:`~climpred.smoothing.spatial_smoothing_xesmf`
 
@@ -887,15 +887,14 @@ def smooth(
         return self
 
     def remove_seasonality(
-        self, initialized_dim: str = "init", seasonality: Union[None, str] = None
+        self, seasonality: Union[None, str] = None
     ) -> "PredictionEnsemble":
         """Remove seasonal cycle from all climpred datasets.
 
         Args:
-            initialized_dim: dimension name of initialized dataset to calculate
-                climatology over. Defaults to "init".
             seasonality: Seasonality to be removed. Choose from:
-                ["season", "month", "dayofyear"]. Defaults to OPTIONS["seasonality"].
+                ``["season", "month", "dayofyear"]``.
+                Defaults to ``OPTIONS["seasonality"]``.
 
         Examples:
             >>> # example already without seasonal cycle
@@ -926,7 +925,6 @@ def _remove_seasonality(ds, initialized_dim="init", seasonality=None):
 
         return self.map(
             _remove_seasonality,
-            initialized_dim=initialized_dim,
             seasonality=seasonality,
         )
 
@@ -1000,10 +998,9 @@ def __init__(self, initialized: Union[xr.DataArray, xr.Dataset]) -> None:
           initialized: prediction ensemble output.
 
         Attributes:
-            control: Dictionary of control run associated with the initialized
-                     ensemble.
-            uninitialized: Dictionary of uninitialized run that is
-                           bootstrapped from the initialized run.
+            control: datasets dictionary item of control simulation associated with the
+                initialized ensemble.
+            uninitialized: datasets dictionary item of uninitialized forecast.
         """
         super().__init__(initialized)
         # Reserve sub-dictionary for the control simulation.
@@ -1176,8 +1173,8 @@ def verify(
                 reference:                     []
 
 
-            Pearson's Anomaly Correlation ('acc') comparing every member to every
-            other member (``m2m``) reducing dimensions ``member`` and ``init`` while
+            Pearson's Anomaly Correlation (``"acc"``) comparing every member to every
+            other member (``"m2m"``) reducing dimensions ``member`` and ``init`` while
             also calculating reference skill for the ``persistence``, ``climatology``
             and ``uninitialized`` forecast.
 
@@ -1471,7 +1468,7 @@ def bootstrap(
             reference: Type of reference forecasts with which to verify against.
                 One or more of ``["uninitialized", "persistence", "climatology"]``.
                 Defaults to ``None`` meaning no reference.
-                If None or empty, returns no p value.
+                If ``None`` or ``[]``, returns no p value.
                 For ``persistence``, choose between
                 ``set_options(PerfectModel_persistence_from_initialized_lead_0)=False``
                 (default) using :py:func:`~climpred.reference.compute_persistence` or
@@ -1519,11 +1516,11 @@ def bootstrap(
             https://doi.org/10/f4jjvf.
 
         Example:
-            Calculate the Pearson's Anomaly Correlation ('acc') comparing every member
-            to every other member (``m2m``) reducing dimensions ``member`` and
+            Calculate the Pearson's Anomaly Correlation (``"acc"``) comparing every
+            member to every other member (``"m2m"``) reducing dimensions ``member`` and
             ``init`` 50 times after resampling ``member`` dimension with replacement.
-            Also calculate reference skill for the ``persistence``, ``climatology``
-            and ``uninitialized`` forecast and compare whether initialized skill is
+            Also calculate reference skill for the ``"persistence"``, ``"climatology"``
+            and ``"uninitialized"`` forecast and compare whether initialized skill is
             better than reference skill: Returns verify skill, probability that
             reference forecast performs better than initialized and the lower and
             upper bound of the resample.
@@ -1632,10 +1629,9 @@ def __init__(self, initialized: Union[xr.DataArray, xr.Dataset]) -> None:
           initialized: initialized prediction ensemble output.
 
         Attributes:
-          observations: Dictionary of verification data to associate with the decadal
-              prediction ensemble.
-          uninitialized: Dictionary of companion (or bootstrapped)
-              uninitialized ensemble run.
+          observations: datasets dictionary item of verification data to associate with
+            the prediction ensemble.
+          uninitialized: datasets dictionary item of uninitialized forecast.
         """
         super().__init__(initialized)
         self._datasets.update({"observations": {}})
@@ -1760,7 +1756,7 @@ def generate_uninitialized(
         """Generate ``uninitialized`` by resampling from ``initialized``.
 
         Args:
-            resample_dim: dimension to resample from. Must contain "init".
+            resample_dim: dimension to resample from. Must contain ``"init"``.
 
         Returns:
             resampled uninitialized ensemble added to HindcastEnsemble
@@ -1810,22 +1806,21 @@ def plot_alignment(
         Args:
             alignment: which inits or verification times should be aligned?
 
-                - ``'maximize'``: maximize the degrees of freedom by slicing
-                    ``initialized`` and ``verif`` to a common time frame at each lead.
-                - ``"same_inits"`: slice to a common ``init`` frame prior to computing
-                    metric. This philosophy follows the thought that each lead should be
-                    based on the same set of initializations.
-
+                - ``"maximize"``: maximize the degrees of freedom by slicing
+                  ``initialized`` and ``verif`` to a common time frame at each lead.
+                - ``"same_inits"``: slice to a common ``init`` frame prior to computing
+                  metric. This philosophy follows the thought that each lead should be
+                  based on the same set of initializations.
                 - ``"same_verif"``: slice to a common/consistent verification time frame
                   prior to computing metric. This philosophy follows the thought that
                   each lead should be based on the same set of verification dates.
-
                 - ``None`` defaults to the three above.
+
             reference: Type of reference forecasts with which to verify against.
                 One or more of ``["uninitialized", "persistence", "climatology"]``.
                 Defaults to ``None`` meaning no reference.
-            date2num_units: passed to cftime.date2num as units
-            return_xr:  return
+            date2num_units: passed to ``cftime.date2num`` as units
+            return_xr: if ``True`` return xarray.DataArray else plot
             cmap: color palette
             edgecolors: color of the edges in the plot
             **plot_kwargs: arguments passed to ``plot``.
@@ -1862,7 +1857,7 @@ def plot_alignment(
             Coordinates:
               * init       (init) object 1954-01-01 00:00:00 ... 2014-01-01 00:00:00
               * lead       (lead) int32 1 2 3 4 5 6 7 8 9 10
-              * alignment  (alignment) <U10 'same_init' 'same_verif' 'maximize'
+              * alignment  (alignment) <U10 'same_init' 'same_verif' ""maximize
             Attributes:
                 units:    days since 1960-01-01
 
@@ -1939,13 +1934,11 @@ def verify(
                 ``None`` meaning that all dimensions other than ``lead`` are reduced.
             alignment: which inits or verification times should be aligned?
 
-                - ``'maximize'``: maximize the degrees of freedom by slicing ``initialized`` and
-                  ``verif`` to a common time frame at each lead.
-
-                - ``"same_inits"`: slice to a common ``init`` frame prior to computing
+                - ``"maximize"``: maximize the degrees of freedom by slicing
+                  ``initialized`` and ``verif`` to a common time frame at each lead.
+                - ``"same_inits"``: slice to a common ``init`` frame prior to computing
                   metric. This philosophy follows the thought that each lead should be
                   based on the same set of initializations.
-
                 - ``"same_verif"``: slice to a common/consistent verification time frame
                   prior to computing metric. This philosophy follows the thought that
                   each lead should be based on the same set of verification dates.
@@ -1989,11 +1982,11 @@ def verify(
                 dim:                           ['init', 'member']
                 reference:                     []
 
-            Pearson's Anomaly Correlation ('acc') comparing the ensemble mean with the
-            verification (``e2o``) over the same initializations (``same_inits``) for
-            all leads reducing dimension ``init`` while also calculating reference
-            skill for the ``persistence``, ``climatology`` and ``uninitialized``
-            forecast.
+            Pearson's Anomaly Correlation (``"acc"``) comparing the ensemble mean with
+            the verification (``"e2o"``) over the same initializations
+            (``"same_inits"``) for all leads reducing dimension ``init`` while also
+            calculating reference skill for the ``"persistence"``, ``"climatology"``
+            and ``'uninitialized'`` forecast.
 
             >>> HindcastEnsemble.verify(
             ...     metric="acc",
@@ -2208,16 +2201,14 @@ def bootstrap(
             reference: Type of reference forecasts with which to verify against.
                 One or more of ``["uninitialized", "persistence", "climatology"]``.
                 Defaults to ``None`` meaning no reference.
-                If None or empty, returns no p value.
+                If ``None`` or ``[]``, returns no p value.
             alignment: which inits or verification times should be aligned?
 
-                - 'maximize': maximize the degrees of freedom by slicing ``init`` and
+                - ""maximize: maximize the degrees of freedom by slicing ``init`` and
                   ``verif`` to a common time frame at each lead.
-
-                - ``"same_inits"`: slice to a common ``init`` frame prior to computing
+                - ``"same_inits"``: slice to a common ``init`` frame prior to computing
                   metric. This philosophy follows the thought that each lead should be
                   based on the same set of initializations.
-
                 - ``"same_verif"``: slice to a common/consistent verification time frame
                   prior to computing metric. This philosophy follows the thought that
                   each lead should be based on the same set of verification dates.
@@ -2255,12 +2246,12 @@ def bootstrap(
                     bootstrapping with replacement.
 
         Example:
-            Calculate the Pearson's Anomaly Correlation ('acc') comparing the ensemble
-            mean forecast to the verification (``e2o``) over the same verification
-            times (``same_verifs``) for all leads reducing dimensions ``init`` 50
-            times after resampling ``member`` dimension with replacement. Also
-            calculate reference skill for the ``persistence``, ``climatology``
-            and ``uninitialized`` forecast and compare whether initialized skill is
+            Calculate the Pearson's Anomaly Correlation (``"acc"``) comparing the
+            ensemble mean forecast to the verification (``"e2o"``) over the same
+            verification times (``"same_verifs"``) for all leads reducing dimensions
+            ``init`` 50 times after resampling ``member`` dimension with replacement.
+            Also calculate reference skill for the ``"persistence"``, ``"climatology"``
+            and ``"uninitialized"`` forecast and compare whether initialized skill is
             better than reference skill: Returns verify skill, probability that
             reference forecast performs better than initialized and the lower and
             upper bound of the resample.
@@ -2379,13 +2370,11 @@ def remove_bias(
         Args:
             alignment: which inits or verification times should be aligned?
 
-                - ``'maximize'``: maximize the degrees of freedom by slicing ``initialized`` and
-                  ``verif`` to a common time frame at each lead.
-
-                - ``"same_inits"`: slice to a common ``init`` frame prior to computing
+                - ``""maximize``: maximize the degrees of freedom by slicing
+                  ``initialized`` and ``verif`` to a common time frame at each lead.
+                - ``"same_inits"``: slice to a common ``init`` frame prior to computing
                   metric. This philosophy follows the thought that each lead should be
                   based on the same set of initializations.
-
                 - ``"same_verif"``: slice to a common/consistent verification time frame
                   prior to computing metric. This philosophy follows the thought that
                   each lead should be based on the same set of verification dates.
@@ -2394,9 +2383,10 @@ def remove_bias(
                 Defaults to 'additive_mean'. Select from:
 
                 - ``"additive_mean"``: correcting the mean forecast additively
-                - ``"multiplicative_mean"``: correcting the mean forecast multiplicatively
+                - ``"multiplicative_mean"``: correcting the mean forecast
+                  multiplicatively
                 - ``"multiplicative_std"``: correcting the standard deviation
-                    multiplicatively
+                  multiplicatively
                 - ``"modified_quantile"``: `Reference <https://www.sciencedirect.com/science/article/abs/pii/S0034425716302000?via%3Dihub>`_
                 - ``"basic_quantile"``: `Reference <https://rmets.onlinelibrary.wiley.com/doi/pdf/10.1002/joc.2168>`_
                 - ``"gamma_mapping"``: `Reference <https://www.hydrol-earth-syst-sci.net/21/2649/2017/>`_
@@ -2412,11 +2402,11 @@ def remove_bias(
                 and test period to apply bias correction to? For a detailed
                 description, see `Risbey et al. 2021 <http://www.nature.com/articles/s41467-021-23771-z>`_:  # noqa: E501
 
-                - `fair`: no overlap between `train` and `test` (recommended).
+                - ``"fair"```: no overlap between `train` and `test` (recommended).
                     Set either `train_init` or `train_time`.
-                - `unfair`: completely overlapping `train` and `test`
+                - ``"unfair"``: completely overlapping `train` and `test`
                     (climpred default).
-                - `unfair-cv`: overlapping `train` and `test` except for current
+                - ``"unfair-cv"```: overlapping `train` and `test` except for current
                     `init`, which is `left out <https://en.wikipedia.org/wiki/Cross-validation_(statistics)#Leave-one-out_cross-validation>`_
                     (set `cv='LOO'`).
 
@@ -2464,7 +2454,7 @@ def remove_bias(
                 reference:                     []
 
             Note that this HindcastEnsemble is already bias reduced, therefore
-            ``train_test_split='unfair'`` has hardly any effect. Use all
+            ``train_test_split="unfair"`` has hardly any effect. Use all
             initializations to calculate bias and verify skill:
 
             >>> HindcastEnsemble.remove_bias(
@@ -2492,8 +2482,8 @@ def remove_bias(
 
             Separate initializations 1954 - 1980 to calculate bias. Note that
             this HindcastEnsemble is already bias reduced, therefore
-            ``train_test_split='fair'`` worsens skill here. Generally,
-            ``train_test_split='fair'`` is recommended to use for a fair
+            ``train_test_split="fair"`` worsens skill here. Generally,
+            ``train_test_split="fair"`` is recommended to use for a fair
             comparison against real-time forecasts.
 
             >>> HindcastEnsemble.remove_bias(
diff --git a/climpred/reference.py b/climpred/reference.py
index 3225f707a..62ba9459d 100644
--- a/climpred/reference.py
+++ b/climpred/reference.py
@@ -290,17 +290,18 @@ def compute_persistence(
     Args:
         initialized: The initialized ensemble.
         verif: Verification data.
-        metric: Metric name to apply at each lag for the persistence computation. Default: 'pearson_r'
+        metric: Metric name to apply at each lag for the persistence computation.
+            Default: ``"pearson_r"``.
         alignment: which inits or verification times should be aligned?
 
-            - ``maximize``: maximize the degrees of freedom by slicing
-                ``initialized`` and ``verif`` to a common time frame at each lead.
-            - ``same_inits``: slice to a common init frame prior to computing
-                metric. This philosophy follows the thought that each lead should be
-                based on the same set of initializations.
-            - ``same_verif``: slice to a common/consistent verification time frame
-                prior to computing metric. This philosophy follows the thought that
-                each lead should be based on the same set of verification dates.
+            - ``"maximize"``: maximize the degrees of freedom by slicing
+              ``initialized`` and ``verif`` to a common time frame at each lead.
+            - ``"same_inits"``: slice to a common init frame prior to computing
+              metric. This philosophy follows the thought that each lead should be
+              based on the same set of initializations.
+            - ``"same_verif"``: slice to a common/consistent verification time frame
+              prior to computing metric. This philosophy follows the thought that
+              each lead should be based on the same set of verification dates.
 
         dim: dimension to apply metric over.
         ** metric_kwargs: additional keywords to be passed to metric
@@ -396,13 +397,13 @@ def compute_persistence_from_first_lead(
         alignment: which inits or verification times should be aligned?
 
             - ``maximize``: maximize the degrees of freedom by slicing ``initialized``
-                and ``verif`` to a common time frame at each lead.
+              and ``verif`` to a common time frame at each lead.
             - ``same_inits``: slice to a common ``init`` frame prior to computing
-                metric. This philosophy follows the thought that each lead should be
-                based on the same set of initializations.
+              metric. This philosophy follows the thought that each lead should be
+              based on the same set of initializations.
             - ``same_verif``: slice to a common/consistent verification time frame
-                prior to computing metric. This philosophy follows the thought that
-                each lead should be based on the same set of verification dates.
+              prior to computing metric. This philosophy follows the thought that
+              each lead should be based on the same set of verification dates.
 
         dim: dimension to apply metric over.
         ** metric_kwargs: additional keywords to be passed to metric
@@ -539,15 +540,14 @@ def compute_uninitialized(
         alignment: which inits or verification times should be aligned?
 
             - ``"maximize"``: maximize the degrees of freedom by slicing
-                ``initialized`` and ``verif`` to a common time frame at each
-                lead.
+              ``initialized`` and ``verif`` to a common time frame at each lead.
             - ``"same_inits"``: slice to a common init frame prior to computing
-                metric. This philosophy follows the thought that each lead
-                should be based on the same set of initializations.
+              metric. This philosophy follows the thought that each lead
+              should be based on the same set of initializations.
             - ``same_verif``: slice to a common/consistent verification time
-                frame prior to computing metric. This philosophy follows the
-                thought that each lead should be based on the same set of
-                verification dates.
+              frame prior to computing metric. This philosophy follows the
+              thought that each lead should be based on the same set of
+              verification dates.
 
         ** metric_kwargs: additional keywords to be passed to metric
 
diff --git a/climpred/stats.py b/climpred/stats.py
index 654f92516..7cc6dcda4 100644
--- a/climpred/stats.py
+++ b/climpred/stats.py
@@ -47,7 +47,7 @@ def decorrelation_time(
     r"""Calculate the decorrelaton time of a time series.
 
     .. math::
-        \\tau_{d} = 1 + 2 * \\sum_{k=1}^{r}(\\alpha_{k})^{k}
+        \tau_{d} = 1 + 2 * \sum_{k=1}^{r}(\alpha_{k})^{k}
 
     Args:
         da: input.
@@ -59,7 +59,7 @@ def decorrelation_time(
 
     Reference:
         * Storch, H. v, and Francis W. Zwiers. Statistical Analysis in Climate
-          Research. Cambridge ; New York: Cambridge University Press, 1999.,
+          Research. Cambridge; New York: Cambridge University Press, 1999.,
           p.373
 
     """
@@ -90,8 +90,8 @@ def dpp(
 
     .. math::
 
-        DPP_{\\mathrm{unbiased}}(m) = \\frac{\\sigma^{2}_{m} -
-        \\frac{1}{m}\\cdot\\sigma^{2}}{\\sigma^{2}}
+        DPP_{\mathrm{unbiased}}(m) = \frac{\sigma^{2}_{m} -
+        \frac{1}{m}\cdot\\sigma^{2}}{\sigma^{2}}
 
     Note:
         Resplandy et al. 2015 and Seferian et al. 2018 calculate unbiased DPP
@@ -189,7 +189,7 @@ def varweighted_mean_period(
     r"""Calculate the variance weighted mean period of time series.
 
     .. math::
-        P_{x} = \\frac{\\sum_k V(f_k,x)}{\\sum_k f_k  \\cdot V(f_k,x)}
+        P_{x} = \frac{\sum_k V(f_k,x)}{\sum_k f_k  \cdot V(f_k,x)}
 
     Args:
         da: input data including dim.
diff --git a/docs/source/api.rst b/docs/source/api.rst
index d4ebf8346..7c2a861d3 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -1,8 +1,9 @@
 API Reference
 =============
 
-This page provides an auto-generated summary of climpred's API.
-For more details and examples, refer to the relevant chapters in the main part of the documentation.
+This page provides an auto-generated summary of ``climpred``'s API.
+For more details and examples, refer to the relevant chapters in the main part of the
+documentation.
 
 High-Level Classes
 ------------------
@@ -13,21 +14,76 @@ High-Level Classes
 A primary feature of ``climpred`` is our prediction ensemble objects,
 :py:class:`~climpred.classes.HindcastEnsemble` and
 :py:class:`~climpred.classes.PerfectModelEnsemble`. Users can append their initialized
-ensemble to these classes, as well as an arbitrary number of verification products (assimilations,
-reconstructions, observations), control runs, and uninitialized ensembles.
+ensemble to these classes, as well as an arbitrary number of verification products
+(assimilations, reconstructions, observations), control runs, and uninitialized
+ensembles.
+
+PredictionEnsemble
+~~~~~~~~~~~~~~~~~~
+
+:py:class:`~climpred.classes.PredictionEnsemble` is the base class for
+:py:class:`~climpred.classes.HindcastEnsemble` and
+:py:class:`~climpred.classes.PerfectModelEnsemble`.
+:py:class:`~climpred.classes.PredictionEnsemble` cannot be called directly, but
+:py:class:`~climpred.classes.HindcastEnsemble` and
+:py:class:`~climpred.classes.PerfectModelEnsemble` inherit the common base
+functionality.
+
+.. autosummary::
+    :toctree: api/
+
+    PredictionEnsemble
+    PredictionEnsemble.__init__
+
+-------
+Builtin
+-------
+
+.. autosummary::
+    :toctree: api/
+
+    PredictionEnsemble.__len__
+    PredictionEnsemble.__iter__
+    PredictionEnsemble.__delitem__
+    PredictionEnsemble.__contains__
+    PredictionEnsemble.__add__
+    PredictionEnsemble.__sub__
+    PredictionEnsemble.__mul__
+    PredictionEnsemble.__truediv__
+    PredictionEnsemble.__getitem__
+    PredictionEnsemble.__getattr__
+
+----------
+Properties
+----------
+
+.. autosummary::
+    :toctree: api/
+
+    PredictionEnsemble.coords
+    PredictionEnsemble.nbytes
+    PredictionEnsemble.sizes
+    PredictionEnsemble.dims
+    PredictionEnsemble.chunks
+    PredictionEnsemble.chunksizes
+    PredictionEnsemble.data_vars
+    PredictionEnsemble.equals
+    PredictionEnsemble.identical
+
 
 HindcastEnsemble
 ~~~~~~~~~~~~~~~~
 
 A ``HindcastEnsemble`` is a prediction ensemble that is initialized off of some form of
-observations (an assimilation, renanalysis, etc.). Thus, it is anticipated that forecasts are
-verified against observation-like products. Read more about the terminology
-`here <terminology.html>`_.
+observations (an assimilation, renanalysis, etc.). Thus, it is anticipated that
+forecasts are verified against observation-like products. Read more about the
+terminology `here <terminology.html>`_.
 
 .. autosummary::
     :toctree: api/
 
     HindcastEnsemble
+    HindcastEnsemble.__init__
 
 -------------------------
 Add and Retrieve Datasets
@@ -36,7 +92,6 @@ Add and Retrieve Datasets
 .. autosummary::
     :toctree: api/
 
-    HindcastEnsemble.__init__
     HindcastEnsemble.add_observations
     HindcastEnsemble.add_uninitialized
     HindcastEnsemble.get_initialized
@@ -87,15 +142,17 @@ Visualization
 PerfectModelEnsemble
 ~~~~~~~~~~~~~~~~~~~~
 
-A ``PerfectModelEnsemble`` is a prediction ensemble that is initialized off of a control simulation
-for a number of randomly chosen initialization dates. Thus, forecasts cannot be verified against
-real-world observations. Instead, they are `compared <comparisons.html>`_ to one another and to the
+A ``PerfectModelEnsemble`` is a prediction ensemble that is initialized off of a
+control simulation for a number of randomly chosen initialization dates. Thus,
+forecasts cannot be verified against real-world observations.
+Instead, they are `compared <comparisons.html>`_ to one another and to the
 original control run. Read more about the terminology `here <terminology.html>`_.
 
 .. autosummary::
     :toctree: api/
 
     PerfectModelEnsemble
+    PerfectModelEnsemble.__init__
 
 -------------------------
 Add and Retrieve Datasets
@@ -104,7 +161,6 @@ Add and Retrieve Datasets
 .. autosummary::
     :toctree: api/
 
-    PerfectModelEnsemble.__init__
     PerfectModelEnsemble.add_control
     PerfectModelEnsemble.get_initialized
     PerfectModelEnsemble.get_control
@@ -152,14 +208,12 @@ Visualization
 Direct Function Calls
 ---------------------
 
-A user can directly call functions in ``climpred``. This requires entering more arguments, e.g.
-the initialized ensemble
-:py:class:`~xarray.core.dataset.Dataset`/:py:class:`xarray.core.dataarray.DataArray` directly as
+While not encouraged anymore, a user can directly call functions in ``climpred``.
+This requires entering more arguments, e.g. the initialized ensemble directly as
 well as a verification product. Our object
 :py:class:`~climpred.classes.HindcastEnsemble` and
-:py:class:`~climpred.classes.PerfectModelEnsemble` wrap most of these functions, making the
-analysis process much simpler. Once we have wrapped all of the functions in their entirety, we will
-likely deprecate the ability to call them directly.
+:py:class:`~climpred.classes.PerfectModelEnsemble` wrap most of these functions, making
+the analysis process much simpler.
 
 Bootstrap
 ~~~~~~~~~
@@ -329,7 +383,8 @@ For a thorough look at our metrics library, please see the
 
 Config
 ------
-Set options analogous to `xarray <http://xarray.pydata.org/en/stable/generated/xarray.set_options.html>`_.
+Set options analogous to
+`xarray <http://xarray.pydata.org/en/stable/generated/xarray.set_options.html>`_.
 
 .. currentmodule:: climpred.options
 
diff --git a/docs/source/api/climpred.classes.PredictionEnsemble.__add__.rst b/docs/source/api/climpred.classes.PredictionEnsemble.__add__.rst
new file mode 100644
index 000000000..dd27e6fb9
--- /dev/null
+++ b/docs/source/api/climpred.classes.PredictionEnsemble.__add__.rst
@@ -0,0 +1,6 @@
+climpred.classes.PredictionEnsemble.\_\_add\_\_
+===============================================
+
+.. currentmodule:: climpred.classes
+
+.. automethod:: PredictionEnsemble.__add__
diff --git a/docs/source/api/climpred.classes.PredictionEnsemble.__contains__.rst b/docs/source/api/climpred.classes.PredictionEnsemble.__contains__.rst
new file mode 100644
index 000000000..95217fae0
--- /dev/null
+++ b/docs/source/api/climpred.classes.PredictionEnsemble.__contains__.rst
@@ -0,0 +1,6 @@
+climpred.classes.PredictionEnsemble.\_\_contains\_\_
+====================================================
+
+.. currentmodule:: climpred.classes
+
+.. automethod:: PredictionEnsemble.__contains__
diff --git a/docs/source/api/climpred.classes.PredictionEnsemble.__delitem__.rst b/docs/source/api/climpred.classes.PredictionEnsemble.__delitem__.rst
new file mode 100644
index 000000000..9f9fac932
--- /dev/null
+++ b/docs/source/api/climpred.classes.PredictionEnsemble.__delitem__.rst
@@ -0,0 +1,6 @@
+climpred.classes.PredictionEnsemble.\_\_delitem\_\_
+===================================================
+
+.. currentmodule:: climpred.classes
+
+.. automethod:: PredictionEnsemble.__delitem__
diff --git a/docs/source/api/climpred.classes.PredictionEnsemble.__getattr__.rst b/docs/source/api/climpred.classes.PredictionEnsemble.__getattr__.rst
new file mode 100644
index 000000000..6296eb1e7
--- /dev/null
+++ b/docs/source/api/climpred.classes.PredictionEnsemble.__getattr__.rst
@@ -0,0 +1,6 @@
+climpred.classes.PredictionEnsemble.\_\_getattr\_\_
+===================================================
+
+.. currentmodule:: climpred.classes
+
+.. automethod:: PredictionEnsemble.__getattr__
diff --git a/docs/source/api/climpred.classes.PredictionEnsemble.__getitem__.rst b/docs/source/api/climpred.classes.PredictionEnsemble.__getitem__.rst
new file mode 100644
index 000000000..330d62149
--- /dev/null
+++ b/docs/source/api/climpred.classes.PredictionEnsemble.__getitem__.rst
@@ -0,0 +1,6 @@
+climpred.classes.PredictionEnsemble.\_\_getitem\_\_
+===================================================
+
+.. currentmodule:: climpred.classes
+
+.. automethod:: PredictionEnsemble.__getitem__
diff --git a/docs/source/api/climpred.classes.PredictionEnsemble.__init__.rst b/docs/source/api/climpred.classes.PredictionEnsemble.__init__.rst
new file mode 100644
index 000000000..3cbdc04a7
--- /dev/null
+++ b/docs/source/api/climpred.classes.PredictionEnsemble.__init__.rst
@@ -0,0 +1,6 @@
+climpred.classes.PredictionEnsemble.\_\_init\_\_
+================================================
+
+.. currentmodule:: climpred.classes
+
+.. automethod:: PredictionEnsemble.__init__
diff --git a/docs/source/api/climpred.classes.PredictionEnsemble.__iter__.rst b/docs/source/api/climpred.classes.PredictionEnsemble.__iter__.rst
new file mode 100644
index 000000000..2c60da8ff
--- /dev/null
+++ b/docs/source/api/climpred.classes.PredictionEnsemble.__iter__.rst
@@ -0,0 +1,6 @@
+climpred.classes.PredictionEnsemble.\_\_iter\_\_
+================================================
+
+.. currentmodule:: climpred.classes
+
+.. automethod:: PredictionEnsemble.__iter__
diff --git a/docs/source/api/climpred.classes.PredictionEnsemble.__len__.rst b/docs/source/api/climpred.classes.PredictionEnsemble.__len__.rst
new file mode 100644
index 000000000..460b72a7e
--- /dev/null
+++ b/docs/source/api/climpred.classes.PredictionEnsemble.__len__.rst
@@ -0,0 +1,6 @@
+climpred.classes.PredictionEnsemble.\_\_len\_\_
+===============================================
+
+.. currentmodule:: climpred.classes
+
+.. automethod:: PredictionEnsemble.__len__
diff --git a/docs/source/api/climpred.classes.PredictionEnsemble.__mul__.rst b/docs/source/api/climpred.classes.PredictionEnsemble.__mul__.rst
new file mode 100644
index 000000000..a300969dd
--- /dev/null
+++ b/docs/source/api/climpred.classes.PredictionEnsemble.__mul__.rst
@@ -0,0 +1,6 @@
+climpred.classes.PredictionEnsemble.\_\_mul\_\_
+===============================================
+
+.. currentmodule:: climpred.classes
+
+.. automethod:: PredictionEnsemble.__mul__
diff --git a/docs/source/api/climpred.classes.PredictionEnsemble.__sub__.rst b/docs/source/api/climpred.classes.PredictionEnsemble.__sub__.rst
new file mode 100644
index 000000000..ac701d2bd
--- /dev/null
+++ b/docs/source/api/climpred.classes.PredictionEnsemble.__sub__.rst
@@ -0,0 +1,6 @@
+climpred.classes.PredictionEnsemble.\_\_sub\_\_
+===============================================
+
+.. currentmodule:: climpred.classes
+
+.. automethod:: PredictionEnsemble.__sub__
diff --git a/docs/source/api/climpred.classes.PredictionEnsemble.__truediv__.rst b/docs/source/api/climpred.classes.PredictionEnsemble.__truediv__.rst
new file mode 100644
index 000000000..abeeaa6e6
--- /dev/null
+++ b/docs/source/api/climpred.classes.PredictionEnsemble.__truediv__.rst
@@ -0,0 +1,6 @@
+climpred.classes.PredictionEnsemble.\_\_truediv\_\_
+===================================================
+
+.. currentmodule:: climpred.classes
+
+.. automethod:: PredictionEnsemble.__truediv__
diff --git a/docs/source/api/climpred.classes.PredictionEnsemble.chunks.rst b/docs/source/api/climpred.classes.PredictionEnsemble.chunks.rst
new file mode 100644
index 000000000..332963776
--- /dev/null
+++ b/docs/source/api/climpred.classes.PredictionEnsemble.chunks.rst
@@ -0,0 +1,6 @@
+climpred.classes.PredictionEnsemble.chunks
+==========================================
+
+.. currentmodule:: climpred.classes
+
+.. autoproperty:: PredictionEnsemble.chunks
diff --git a/docs/source/api/climpred.classes.PredictionEnsemble.chunksizes.rst b/docs/source/api/climpred.classes.PredictionEnsemble.chunksizes.rst
new file mode 100644
index 000000000..fc41e9d0f
--- /dev/null
+++ b/docs/source/api/climpred.classes.PredictionEnsemble.chunksizes.rst
@@ -0,0 +1,6 @@
+climpred.classes.PredictionEnsemble.chunksizes
+==============================================
+
+.. currentmodule:: climpred.classes
+
+.. autoproperty:: PredictionEnsemble.chunksizes
diff --git a/docs/source/api/climpred.classes.PredictionEnsemble.coords.rst b/docs/source/api/climpred.classes.PredictionEnsemble.coords.rst
new file mode 100644
index 000000000..fed9a3950
--- /dev/null
+++ b/docs/source/api/climpred.classes.PredictionEnsemble.coords.rst
@@ -0,0 +1,6 @@
+climpred.classes.PredictionEnsemble.coords
+==========================================
+
+.. currentmodule:: climpred.classes
+
+.. autoproperty:: PredictionEnsemble.coords
diff --git a/docs/source/api/climpred.classes.PredictionEnsemble.data_vars.rst b/docs/source/api/climpred.classes.PredictionEnsemble.data_vars.rst
new file mode 100644
index 000000000..dfaebd0e3
--- /dev/null
+++ b/docs/source/api/climpred.classes.PredictionEnsemble.data_vars.rst
@@ -0,0 +1,6 @@
+climpred.classes.PredictionEnsemble.data\_vars
+==============================================
+
+.. currentmodule:: climpred.classes
+
+.. autoproperty:: PredictionEnsemble.data_vars
diff --git a/docs/source/api/climpred.classes.PredictionEnsemble.dims.rst b/docs/source/api/climpred.classes.PredictionEnsemble.dims.rst
new file mode 100644
index 000000000..18bc52703
--- /dev/null
+++ b/docs/source/api/climpred.classes.PredictionEnsemble.dims.rst
@@ -0,0 +1,6 @@
+climpred.classes.PredictionEnsemble.dims
+========================================
+
+.. currentmodule:: climpred.classes
+
+.. autoproperty:: PredictionEnsemble.dims
diff --git a/docs/source/api/climpred.classes.PredictionEnsemble.equals.rst b/docs/source/api/climpred.classes.PredictionEnsemble.equals.rst
new file mode 100644
index 000000000..f692dff85
--- /dev/null
+++ b/docs/source/api/climpred.classes.PredictionEnsemble.equals.rst
@@ -0,0 +1,6 @@
+climpred.classes.PredictionEnsemble.equals
+==========================================
+
+.. currentmodule:: climpred.classes
+
+.. automethod:: PredictionEnsemble.equals
diff --git a/docs/source/api/climpred.classes.PredictionEnsemble.identical.rst b/docs/source/api/climpred.classes.PredictionEnsemble.identical.rst
new file mode 100644
index 000000000..c63429c0d
--- /dev/null
+++ b/docs/source/api/climpred.classes.PredictionEnsemble.identical.rst
@@ -0,0 +1,6 @@
+climpred.classes.PredictionEnsemble.identical
+=============================================
+
+.. currentmodule:: climpred.classes
+
+.. automethod:: PredictionEnsemble.identical
diff --git a/docs/source/api/climpred.classes.PredictionEnsemble.nbytes.rst b/docs/source/api/climpred.classes.PredictionEnsemble.nbytes.rst
new file mode 100644
index 000000000..bcfa107a6
--- /dev/null
+++ b/docs/source/api/climpred.classes.PredictionEnsemble.nbytes.rst
@@ -0,0 +1,6 @@
+climpred.classes.PredictionEnsemble.nbytes
+==========================================
+
+.. currentmodule:: climpred.classes
+
+.. autoproperty:: PredictionEnsemble.nbytes
diff --git a/docs/source/api/climpred.classes.PredictionEnsemble.sizes.rst b/docs/source/api/climpred.classes.PredictionEnsemble.sizes.rst
new file mode 100644
index 000000000..e280debc0
--- /dev/null
+++ b/docs/source/api/climpred.classes.PredictionEnsemble.sizes.rst
@@ -0,0 +1,6 @@
+climpred.classes.PredictionEnsemble.sizes
+=========================================
+
+.. currentmodule:: climpred.classes
+
+.. autoproperty:: PredictionEnsemble.sizes

From b68c639988485f809a0fb0b736a0eef0b8156960 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Thu, 9 Dec 2021 23:37:36 +0100
Subject: [PATCH 11/56] fix test

---
 CHANGELOG.rst                       | 7 +++++++
 climpred/classes.py                 | 4 ++--
 climpred/tests/test_bias_removal.py | 4 +++-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 6cb27c4b1..587601826 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -119,6 +119,13 @@ Internals/Minor Fixes
   :py:meth:`~climpred.classes.HindcastEnsemble.bootstrap`. Now all keywords are
   captured in the skill dataset attributes ``.attrs``.
   (:issue:`475`, :pr:`694`) `Aaron Spring`_.
+- docstrings formatting with `blackdocs <https://github.com/keewis/blackdoc>`_. (:pr:`708`) `Aaron Spring`_.
+- documentation linting with `doc8 <https://github.com/PyCQA/doc8>`_ and `pydocstyle <http://www.pydocstyle.org/en/stable/usage.html>`_. (:pr:`708`) `Aaron Spring`_.
+
+Documentation
+-------------
+- Refresh all docs. (:issue:`707`, :pr:`708`) `Aaron Spring`_.
+- (:issue:`707`, :pr:`708`) `Aaron Spring`_.
 
 
 climpred v2.1.6 (2021-08-31)
diff --git a/climpred/classes.py b/climpred/classes.py
index 400aa1c84..3aa0ca51a 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -2572,11 +2572,11 @@ def remove_bias(
         if train_test_split not in BIAS_CORRECTION_TRAIN_TEST_SPLIT_METHODS:
             raise NotImplementedError(
                 f"train_test_split='{train_test_split}' not implemented. Please choose "
-                f" `train_test_split` from {BIAS_CORRECTION_TRAIN_TEST_SPLIT_METHODS}, "
+                f"`train_test_split` from {BIAS_CORRECTION_TRAIN_TEST_SPLIT_METHODS}, "
                 "see Risbey et al. 2021 "
                 "http://www.nature.com/articles/s41467-021-23771-z for description and "
                 "https://github.com/pangeo-data/climpred/issues/648 for implementation "
-                " status."
+                "status."
             )
 
         alignment = _check_valid_alignment(alignment)
diff --git a/climpred/tests/test_bias_removal.py b/climpred/tests/test_bias_removal.py
index 758728cad..fd5bfd010 100644
--- a/climpred/tests/test_bias_removal.py
+++ b/climpred/tests/test_bias_removal.py
@@ -538,7 +538,9 @@ def test_remove_bias_errors(hindcast_NMME_Nino34):
             how=how, alignment="same_verif", train_test_split="fair", train_time=2000
         )
 
-    with pytest.raises(ValueError, match="Please provide `cv="):
+    with pytest.raises(
+        ValueError, match="Please provide cross-validation keyword `cv="
+    ):
         he.remove_bias(how=how, alignment="same_verif", train_test_split="unfair-cv")
 
     with pytest.raises(NotImplementedError, match="please choose from"):

From 2c42418c697e804b826f836c6afb8536514120ef Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Fri, 10 Dec 2021 00:22:35 +0100
Subject: [PATCH 12/56] alignment.py

---
 .pre-commit-config.yaml             |   3 +
 CHANGELOG.rst                       | 135 ++++++++++++++++------------
 climpred/alignment.py               |  62 +++++++++----
 climpred/tests/test_bias_removal.py |   8 +-
 4 files changed, 132 insertions(+), 76 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e2936649d..f23a56627 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -63,3 +63,6 @@ repos:
     rev: v0.3.4
     hooks:
     -   id: blackdoc
+    args:
+    - --include=*.py
+    - --exclude=CHANGELOG.rst
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 587601826..a7dd1dd7c 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -33,7 +33,7 @@ New Features
 
   .. code-block:: python
 
-      >>> hind = climpred.tutorial.load_dataset('CESM-DP-SST')
+      >>> hind = climpred.tutorial.load_dataset("CESM-DP-SST")
       >>> hind.lead.attrs["units"] = "years"
       >>> climpred.HindcastEnsemble(hind).get_initialized()
       <xarray.Dataset>
@@ -85,10 +85,10 @@ New Features
       >>> skill.sst.plot(hue="model", col="month", col_wrap=3)
 
   (:issue:`635`, :pr:`690`) `Aaron Spring`_.
-- :py:meth:`~climpred.classes.HindcastEnsemble.plot_alignment` shows how forecast and observations are
-  aligned based on the `alignment <alignment.html>`_ keyword. This may help
-  understanding which dates are matched for the different ``alignment`` approaches.
-  (:issue:`701`, :pr:`702`) `Aaron Spring`_.
+- :py:meth:`~climpred.classes.HindcastEnsemble.plot_alignment` shows how forecast and
+  observations are aligned based on the `alignment <alignment.html>`_ keyword.
+  This may help understanding which dates are matched for the different ``alignment``
+  approaches. (:issue:`701`, :pr:`702`) `Aaron Spring`_.
 
   .. ipython:: python
       :okwarning:
@@ -119,8 +119,11 @@ Internals/Minor Fixes
   :py:meth:`~climpred.classes.HindcastEnsemble.bootstrap`. Now all keywords are
   captured in the skill dataset attributes ``.attrs``.
   (:issue:`475`, :pr:`694`) `Aaron Spring`_.
-- docstrings formatting with `blackdocs <https://github.com/keewis/blackdoc>`_. (:pr:`708`) `Aaron Spring`_.
-- documentation linting with `doc8 <https://github.com/PyCQA/doc8>`_ and `pydocstyle <http://www.pydocstyle.org/en/stable/usage.html>`_. (:pr:`708`) `Aaron Spring`_.
+- docstrings formatting with `blackdocs <https://github.com/keewis/blackdoc>`_.
+  (:pr:`708`) `Aaron Spring`_.
+- documentation linting with `doc8 <https://github.com/PyCQA/doc8>`_ and
+  `pydocstyle <http://www.pydocstyle.org/en/stable/usage.html>`_.
+  (:pr:`708`) `Aaron Spring`_.
 
 Documentation
 -------------
@@ -137,11 +140,12 @@ are implemented.
 
 Bug Fixes
 ---------
-- Fix ``results='p'`` in :py:meth:`~climpred.classes.HindcastEnsemble.bootstrap` and
-  :py:meth:`~climpred.classes.PerfectModelEnsemble.bootstrap` when ``reference='climatology'``.
+- Fix ``results="p"`` in :py:meth:`~climpred.classes.HindcastEnsemble.bootstrap` and
+  :py:meth:`~climpred.classes.PerfectModelEnsemble.bootstrap` when
+  ``reference='climatology'``.
   (:issue:`668`, :pr:`670`) `Aaron Spring`_.
 - :py:meth:`~climpred.classes.HindcastEnsemble.remove_bias` for ``how`` in
-  [``modified_quantile``, ``basic_quantile``, ``gamma_mapping``, ``normal_mapping``]
+  ``["modified_quantile", "basic_quantile", "gamma_mapping", "normal_mapping"]``
   from `bias_correction <https://github.com/pankajkarman/bias_correction>`_
   takes all ``member``s to create model distribution. (:pr:`667`) `Aaron Spring`_.
 
@@ -151,12 +155,16 @@ New Features
   `xclim <https://xclim.readthedocs.io/en/stable/sdba_api.html>`_ in
   :py:meth:`~climpred.classes.HindcastEnsemble.remove_bias`:
 
-    * ``how="EmpiricalQuantileMapping"``: `Reference <https://xclim.readthedocs.io/en/stable/sdba_api.html#xclim.sdba.adjustment.EmpiricalQuantileMapping>`_
-    * ``how="DetrendedQuantileMapping"``: `Reference <https://xclim.readthedocs.io/en/stable/sdba_api.html#xclim.sdba.adjustment.DetrendedQuantileMapping>`_
-    * ``how="PrincipalComponents"``: `Reference <https://xclim.readthedocs.io/en/stable/sdba_api.html#xclim.sdba.adjustment.PrincipalComponents>`_
-    * ``how="QuantileDeltaMapping"``: `Reference <https://xclim.readthedocs.io/en/stable/sdba_api.html#xclim.sdba.adjustment.QuantileDeltaMapping>`_
-    * ``how="Scaling"``: `Reference <https://xclim.readthedocs.io/en/stable/sdba_api.html#xclim.sdba.adjustment.Scaling>`_
-    * ``how="LOCI"``: `Reference <https://xclim.readthedocs.io/en/stable/sdba_api.html#xclim.sdba.adjustment.LOCI>`_
+    * ``how="EmpiricalQuantileMapping"``:
+      :py:class:`~xclim.sdba.adjustment.EmpiricalQuantileMapping`
+    * ``how="DetrendedQuantileMapping"``:
+      :py:class:`~xclim.sdba.adjustment.DetrendedQuantileMapping`
+    * ``how="PrincipalComponents"``:
+      :py:class:`~xclim.sdba.adjustment.PrincipalComponents`
+    * ``how="QuantileDeltaMapping"``:
+      :py:class:`~xclim.sdba.adjustment.QuantileDeltaMapping`
+    * ``how="Scaling"``: :py:class:`~xclim.sdba.adjustment.Scaling`
+    * ``how="LOCI"``: :py:class:`~xclim.sdba.adjustment.LOCI`
 
   These methods do not respond to ``OPTIONS['seasonality']`` like the other methods.
   Provide ``group="init.month"`` to group by month or ``group='init'`` to skip grouping.
@@ -167,8 +175,10 @@ New Features
 climpred v2.1.5 (2021-08-12)
 ============================
 
-While ``climpred`` has used in the `ASP summer colloquium 2021 <https://asp.ucar.edu/asp-colloquia>`_,
-many new features in :py:meth:`~climpred.classes.HindcastEnsemble.remove_bias` were implemented.
+While ``climpred`` has used in the
+`ASP summer colloquium 2021 <https://asp.ucar.edu/asp-colloquia>`_,
+many new features in :py:meth:`~climpred.classes.HindcastEnsemble.remove_bias` were
+implemented.
 
 Breaking changes
 ----------------
@@ -189,9 +199,11 @@ New Features
   ``train_test_split='fair/unfair/unfair-cv'`` (default ``unfair``) following
   `Risbey et al. 2021 <http://www.nature.com/articles/s41467-021-23771-z>`_.
   (:issue:`648`, :pr:`655`) `Aaron Spring`_.
-- allow more `bias reduction <bias_removal.html>`_ methods in :py:meth:`~climpred.classes.HindcastEnsemble.remove_bias`:
+- allow more `bias reduction <bias_removal.html>`_ methods in
+  :py:meth:`~climpred.classes.HindcastEnsemble.remove_bias`:
 
-    * ``how="additive_mean"``: correcting the mean forecast additively (already implemented)
+    * ``how="additive_mean"``: correcting the mean forecast additively
+      (already implemented)
     * ``how="multiplicative_mean"``: correcting the mean forecast multiplicatively
     * ``how="multiplicative_std"``: correcting the standard deviation multiplicatively
 
@@ -206,7 +218,8 @@ New Features
   `leave-one-out cross validation <https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.LeaveOneOut.html>`_
   when passing ``cv='LOO'`` and ``train_test_split='unfair-cv'``.
   ``cv=True`` falls  back to ``cv='LOO'``. (:issue:`643`, :pr:`646`) `Aaron Spring`_.
-- Add new metrics :py:func:`~climpred.metrics._spread` and :py:func:`~climpred.metrics._mul_bias` (:pr:`638`) `Aaron Spring`_.
+- Add new metrics :py:func:`~climpred.metrics._spread` and
+  :py:func:`~climpred.metrics._mul_bias` (:pr:`638`) `Aaron Spring`_.
 - Add new tutorial datasets: (:pr:`651`) `Aaron Spring`_.
 
     * ``NMME_OIv2_Nino34_sst`` and ``NMME_hindcast_Nino34_sst`` with monthly leads
@@ -231,7 +244,8 @@ Documentation
   (see `context <https://twitter.com/realaaronspring/status/1406980080883150848?s=21>`_)
   (:issue:`594`, :pr:`633`) `Aaron Spring`_.
 - Add `CITATION.cff <https://github.com/pangeo-data/climpred/blob/main/CITATION.cff>`_.
-  Please cite `Brady and Spring, 2020 <https://joss.theoj.org/papers/10.21105/joss.02781>`_.
+  Please cite
+  `Brady and Spring, 2020 <https://joss.theoj.org/papers/10.21105/joss.02781>`_.
   (`GH <https://github.com/pangeo-data/climpred/commit/eceb3f46d78c7dd8eb25243b2e0b673ddd78a4b2>`_) `Aaron Spring`_.
 - Use ``NMME_OIv2_Nino34_sst`` and ``NMME_hindcast_Nino34_sst`` with monthly leads for
   `bias reduction <bias_removal.html>`_ demonstrating
@@ -290,11 +304,11 @@ Documentation
   (:issue:`510`, :issue:`561`, :pr:`600`) `Aaron Spring`_.
 - Add `GEFS example <examples/NWP/NWP_GEFS_6h_forecasts.html>`_ for numerical weather
   prediction. (:issue:`602`, :pr:`603`) `Aaron Spring`_.
-- Add subseasonal `daily ECMWF example <examples/subseasonal/daily-S2S-ECMWF.html>`_ using
-  `climetlab <https://github.com/ecmwf-lab/climetlab-s2s-ai-challenge>`_ to access
+- Add subseasonal `daily ECMWF example <examples/subseasonal/daily-S2S-ECMWF.html>`_
+  using `climetlab <https://github.com/ecmwf-lab/climetlab-s2s-ai-challenge>`_ to access
   hindcasts from ECMWF cloud.  (:issue:`587`, :pr:`603`) `Aaron Spring`_.
-- Add subseasonal `daily S2S example <examples/subseasonal/daily-S2S-IRIDL.html>`_ accessing
-  `S2S <http://s2sprediction.net/>`_ output on
+- Add subseasonal `daily S2S example <examples/subseasonal/daily-S2S-IRIDL.html>`_
+  accessing `S2S <http://s2sprediction.net/>`_ output on
   `IRIDL <https://iridl.ldeo.columbia.edu/SOURCES/.ECMWF/.S2S/>`_ with a cookie and
   working with "on-the-fly" reforecasts with ``hdate`` dimension.
   (:issue:`588`, :pr:`593`) `Aaron Spring`_.
@@ -438,7 +452,8 @@ the number of ``iterations`` for bootstrapping.
   ``persistence`` is the evaluation of the persistence forecast
   (:issue:`460`, :pr:`478`, :issue:`476`, :pr:`480`) `Aaron Spring`_.
 - ``reference`` keyword in :py:meth:`~climpred.classes.HindcastEnsemble.verify` should
-  be choosen from [``uninitialized``, ``persistence``]. ``historical`` no longer works (:issue:`460`, :pr:`478`, :issue:`476`, :pr:`480`) `Aaron Spring`_.
+  be choosen from [``uninitialized``, ``persistence``]. ``historical`` no longer works.
+  (:issue:`460`, :pr:`478`, :issue:`476`, :pr:`480`) `Aaron Spring`_.
 - :py:meth:`~climpred.classes.HindcastEnsemble.verify` returns no ``skill`` dimension
   if ``reference=None``  (:pr:`480`) `Aaron Spring`_.
 - ``comparison`` is not applied to uninitialized skill in
@@ -468,14 +483,17 @@ of bias removal for :py:class:`~climpred.classes.HindcastEnsemble`.
   are present. (:pr:`383`) `Aaron Spring`_.
 - Bootstrapping now available for :py:class:`~climpred.classes.HindcastEnsemble` as
   :py:meth:`~climpred.classes.HindcastEnsemble.bootstrap`, which is analogous to
-  the :py:class:`~climpred.classes.PerfectModelEnsemble` method (:issue:`257`, :pr:`418`) `Aaron Spring`_.
+  the :py:class:`~climpred.classes.PerfectModelEnsemble` method.
+  (:issue:`257`, :pr:`418`) `Aaron Spring`_.
 - :py:meth:`~climpred.classes.HindcastEnsemble.verify` allows all dimensions from
   ``initialized`` ensemble as ``dim``. This allows e.g. spatial dimensions to be used
-  for pattern correlation. Make sure to use ``skipna=True`` when using spatial dimensions
-  and output has nans (in the case of land, for instance) (:issue:`282`, :pr:`407`) `Aaron Spring`_.
-- Allow binary forecasts at when calling :py:meth:`~climpred.classes.HindcastEnsemble.verify`,
+  for pattern correlation. Make sure to use ``skipna=True`` when using spatial
+  dimensions and output has NaNs (in the case of land, for instance).
+  (:issue:`282`, :pr:`407`) `Aaron Spring`_.
+- Allow binary forecasts at when calling
+ :py:meth:`~climpred.classes.HindcastEnsemble.verify`,
   rather than needing to supply binary results beforehand. In other words,
-  ``hindcast.verify(metric='brier_score', comparison='m2o', dim='member', logical=logical)``
+  ``hindcast.verify(metric='bs', comparison='m2o', dim='member', logical=logical)``
   is now the same as
   ``hindcast.map(logical).verify(metric='brier_score', comparison='m2o', dim='member'``.
   (:pr:`431`) `Aaron Spring`_.
@@ -513,7 +531,8 @@ Deprecated
 - ``compute_metric``, ``compute_uninitialized`` and ``compute_persistence`` no longer
   in use for :py:class:`~climpred.classes.PerfectModelEnsemble` in favor of
   :py:meth:`~climpred.classes.PerfectModelEnsemble.verify` with the ``reference``
-  keyword instead. (:pr:`436`, :issue:`468`, :pr:`472`) `Aaron Spring`_ and `Riley X. Brady`_.
+  keyword instead.
+  (:pr:`436`, :issue:`468`, :pr:`472`) `Aaron Spring`_ and `Riley X. Brady`_.
 - ``'historical'`` no longer a valid choice for ``reference``. Use ``'uninitialized'``
   instead. (:pr:`478`) `Aaron Spring`_.
 
@@ -521,13 +540,14 @@ Bug Fixes
 ---------
 
 - :py:meth:`~climpred.classes.PredictionEnsemble.verify` and
-  :py:meth:`~climpred.classes.PredictionEnsemble.bootstrap` now accept ``metric_kwargs``.
-  (:pr:`387`) `Aaron Spring`_.
-- :py:meth:`~climpred.classes.PerfectModelEnsemble.verify` now accepts ``'uninitialized'``
-  as a reference. (:pr:`395`) `Riley X. Brady`_.
-- Spatial and temporal smoothing :py:meth:`~climpred.classes.PredictionEnsemble.smooth` now
-  work as expected and rename time dimensions after
-  :py:meth:`~climpred.classes.PredictionEnsembleEnsemble.verify`. (:pr:`391`) `Aaron Spring`_.
+  :py:meth:`~climpred.classes.PredictionEnsemble.bootstrap` now accept
+  ``metric_kwargs``. (:pr:`387`) `Aaron Spring`_.
+- :py:meth:`~climpred.classes.PerfectModelEnsemble.verify` now accepts
+  ``'uninitialized'`` as a reference. (:pr:`395`) `Riley X. Brady`_.
+- Spatial and temporal smoothing :py:meth:`~climpred.classes.PredictionEnsemble.smooth`
+  now work as expected and rename time dimensions after
+  :py:meth:`~climpred.classes.PredictionEnsembleEnsemble.verify`.
+  (:pr:`391`) `Aaron Spring`_.
 - ``PredictionEnsemble.verify(comparison='m2o', references=['uninitialized',
   'persistence']`` does not fail anymore. (:issue:`385`, :pr:`400`) `Aaron Spring`_.
 - Remove bias using ``dayofyear`` in
@@ -539,7 +559,8 @@ Documentation
 -------------
 - Updates ``climpred`` tagline to "Verification of weather and climate forecasts."
   (:pr:`420`) `Riley X. Brady`_.
-- Adds section on how to use arithmetic with :py:class:`~climpred.classes.HindcastEnsemble`.
+- Adds section on how to use arithmetic with
+  :py:class:`~climpred.classes.HindcastEnsemble`.
   (:pr:`378`) `Riley X. Brady`_.
 - Add docs section for similar open-source forecasting packages.
   (:pr:`432`) `Riley X. Brady`_.
@@ -577,27 +598,28 @@ climpred v2.1.0 (2020-06-08)
 Breaking Changes
 ----------------
 
-- Keyword ``bootstrap`` has been replaced with ``iterations``. We feel that this more accurately
-  describes the argument, since "bootstrap" is really the process as a whole.
+- Keyword ``bootstrap`` has been replaced with ``iterations``. We feel that this more
+  accurately describes the argument, since "bootstrap" is really the process as a whole.
   (:pr:`354`) `Aaron Spring`_.
 
 New Features
 ------------
 
 - :py:class:`~climpred.classes.HindcastEnsemble` and
-  :py:class:`~climpred.classes.PerfectModelEnsemble` now use an HTML representation, following the
-  more recent versions of ``xarray``. (:pr:`371`) `Aaron Spring`_.
+  :py:class:`~climpred.classes.PerfectModelEnsemble` now use an HTML representation,
+  following the more recent versions of ``xarray``. (:pr:`371`) `Aaron Spring`_.
 - ``HindcastEnsemble.verify()`` now takes ``reference=...`` keyword. Current options are
   ``'persistence'`` for a persistence forecast of the observations and
   ``'uninitialized'`` for an uninitialized/historical reference, such as an
   uninitialized/forced run. (:pr:`341`) `Riley X. Brady`_.
 - We now only enforce a union of the initialization dates with observations if
-  ``reference='persistence'`` for :py:class:`~climpred.classes.HindcastEnsemble`. This is to ensure
-  that the same set of initializations is used
-  by the observations to construct a persistence forecast. (:pr:`341`) `Riley X. Brady`_.
-- :py:func:`~climpred.prediction.compute_perfect_model` now accepts initialization (``init``) as
-  ``cftime`` and ``int``. ``cftime`` is now implemented into the bootstrap uninitialized functions
-  for the perfect model configuration. (:pr:`332`) `Aaron Spring`_.
+  ``reference='persistence'`` for :py:class:`~climpred.classes.HindcastEnsemble`.
+  This is to ensure that the same set of initializations is used by the observations to
+  construct a persistence forecast. (:pr:`341`) `Riley X. Brady`_.
+- :py:func:`~climpred.prediction.compute_perfect_model` now accepts initialization
+  (``init``) as ``cftime`` and ``int``. ``cftime`` is now implemented into the
+  bootstrap uninitialized functions for the perfect model configuration.
+  (:pr:`332`) `Aaron Spring`_.
 - New explicit keywords in bootstrap functions for ``resampling_dim`` and
   ``reference_compute`` (:pr:`320`) `Aaron Spring`_.
 - Logging now included for ``compute_hindcast`` which displays the ``inits`` and
@@ -619,12 +641,13 @@ New Features
 Performance
 -----------
 
-The major change for this release is a dramatic speedup in bootstrapping functions, led by
-`Aaron Spring`_. We focused on scalability with ``dask`` and found many places we could compute
-skill simultaneously over all bootstrapped ensemble members rather than at each iteration.
+The major change for this release is a dramatic speedup in bootstrapping functions, led
+by `Aaron Spring`_. We focused on scalability with ``dask`` and found many places we
+could compute skill simultaneously over all bootstrapped ensemble members rather than
+at each iteration.
 
-- Bootstrapping uninitialized skill in the perfect model framework is now sped up significantly for
-  annual lead resolution. (:pr:`332`) `Aaron Spring`_.
+- Bootstrapping uninitialized skill in the perfect model framework is now sped up
+  significantly for annual lead resolution. (:pr:`332`) `Aaron Spring`_.
 - General speedup in :py:func:`~climpred.bootstrap.bootstrap_hindcast` and
   :py:func:`~climpred.bootstrap.bootstrap_perfect_model`: (:pr:`285`) `Aaron Spring`_.
 
diff --git a/climpred/alignment.py b/climpred/alignment.py
index a699b1ae7..ddb28d2d0 100644
--- a/climpred/alignment.py
+++ b/climpred/alignment.py
@@ -1,3 +1,7 @@
+"""Align ``initialized`` ``valid_time=init+lead`` with ``observations`` ``time``."""
+
+from typing import Dict, List, Optional, Tuple, Union
+
 import dask
 import numpy as np
 import xarray as xr
@@ -7,10 +11,17 @@
 from .exceptions import CoordinateError
 from .utils import get_multiple_lead_cftime_shift_args, shift_cftime_index
 
+returnType = Tuple[Dict[float, xr.DataArray], Dict[float, xr.CFTimeIndex]]
+
 
-def return_inits_and_verif_dates(forecast, verif, alignment, reference=None, hist=None):
-    """Returns initializations and verification dates for an arbitrary number of leads
-    per a given alignment strategy.
+def return_inits_and_verif_dates(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    alignment: str,
+    reference: Optional[Union[str, List[str]]] = None,
+    hist: Optional[xr.Dataset] = None,
+) -> returnType:
+    """Return initializations and verification dates per a given alignment strategy.
 
     Args:
         forecast (``xarray`` object): Prediction ensemble with ``init`` dim renamed to
@@ -27,7 +38,7 @@ def return_inits_and_verif_dates(forecast, verif, alignment, reference=None, his
                against the observations provided. This changes both the set of
                initializations and the verification window used at each lead.
 
-    Returns:
+    Return:
         inits (dict): Keys are the lead time integer, values are an ``xr.DataArray`` of
             initialization dates.
         verif_dates (dict): Keys are the lead time integer, values are an
@@ -81,11 +92,14 @@ def return_inits_and_verif_dates(forecast, verif, alignment, reference=None, his
         )
     elif alignment == "maximize":
         return _maximize_alignment(init_lead_matrix, all_verifs, leads)
+    else:
+        raise ValueError
 
 
-def _maximize_alignment(init_lead_matrix, all_verifs, leads):
-    """Returns initializations and verification dates, maximizing the degrees of freedom
-    at each lead individually.
+def _maximize_alignment(
+    init_lead_matrix: xr.DataArray, all_verifs: xr.DataArray, leads: xr.DataArray
+) -> returnType:
+    """Return inits and verif dates, maximizing the samples at each lead individually.
 
     See ``return_inits_and_verif_dates`` for descriptions of expected variables.
     """
@@ -106,9 +120,15 @@ def _maximize_alignment(init_lead_matrix, all_verifs, leads):
     return inits, verif_dates
 
 
-def _same_inits_alignment(init_lead_matrix, valid_inits, all_verifs, leads, n, freq):
-    """Returns initializations and verification dates, maintaining a common set of inits
-    at all leads.
+def _same_inits_alignment(
+    init_lead_matrix: xr.DataArray,
+    valid_inits: xr.DataArray,
+    all_verifs: xr.DataArray,
+    leads: xr.DataArray,
+    n: int,
+    freq: str,
+) -> returnType:
+    """Return inits and verif dates, maintaining a common set of inits at all leads.
 
     See ``return_inits_and_verif_dates`` for descriptions of expected variables.
     """
@@ -117,14 +137,20 @@ def _same_inits_alignment(init_lead_matrix, valid_inits, all_verifs, leads, n, f
     inits = {lead: inits for lead in leads}
     verif_dates = {
         lead: shift_cftime_index(inits[lead], "time", n, freq)
-        for (lead, n) in zip(leads, n)
+        for (lead, n) in zip(leads, n)  # type: ignore
     }
     return inits, verif_dates
 
 
-def _same_verifs_alignment(init_lead_matrix, valid_inits, all_verifs, leads, n, freq):
-    """Returns initializations and verification dates, maintaining a common verification
-    window at all leads.
+def _same_verifs_alignment(
+    init_lead_matrix: xr.DataArray,
+    valid_inits: xr.DataArray,
+    all_verifs: xr.DataArray,
+    leads: xr.DataArray,
+    n: int,
+    freq: str,
+) -> returnType:
+    """Return inits and verifs, maintaining a common verification window at all leads.
 
     See ``return_inits_and_verif_dates`` for descriptions of expected variables.
     """
@@ -150,8 +176,10 @@ def _same_verifs_alignment(init_lead_matrix, valid_inits, all_verifs, leads, n,
     return inits, verif_dates
 
 
-def _construct_init_lead_matrix(forecast, n, freq, leads):
-    """Returns xr.DataArray of "real time" (init + lead) over all inits and leads.
+def _construct_init_lead_matrix(
+    forecast: xr.Dataset, n: Tuple[int], freq: str, leads: xr.DataArray
+) -> xr.DataArray:
+    """Return xr.DataArray of "valid time" (init + lead) over all inits and leads.
 
     Arguments:
         forecast (``xarray object``): Prediction ensemble with ``init`` dim renamed to
@@ -162,7 +190,7 @@ def _construct_init_lead_matrix(forecast, n, freq, leads):
             ``CFTimeIndex.shift(value, str)``.
         leads (list, array, xr.DataArray of ints): Leads to return offset for.
 
-    Returns:
+    Return:
         init_lead_matrix (``xr.DataArray``): DataArray with x=inits and y=lead with
             values corresponding to "real time", or ``init + lead`` over all inits and
             leads.
diff --git a/climpred/tests/test_bias_removal.py b/climpred/tests/test_bias_removal.py
index fd5bfd010..4d22a59fe 100644
--- a/climpred/tests/test_bias_removal.py
+++ b/climpred/tests/test_bias_removal.py
@@ -1,3 +1,5 @@
+"""Test bias_removal.py."""
+
 import numpy as np
 import pytest
 import xarray as xr
@@ -374,7 +376,7 @@ def test_remove_bias_unfair_artificial_skill_over_fair_xclim(
 def test_remove_bias_xclim_grouper_diff(
     hindcast_NMME_Nino34,
 ):
-    """Test remove_bias(how='xclim_method') is sensitive to grouper"""
+    """Test remove_bias(how='xclim_method') is sensitive to grouper."""
     alignment = "same_init"
     how = "DetrendedQuantileMapping"
     he = (
@@ -413,7 +415,7 @@ def test_remove_bias_xclim_grouper_diff(
 def test_remove_bias_xclim_adjust_kwargs_diff(
     hindcast_NMME_Nino34,
 ):
-    """Test remove_bias(how='xclim_method') is sensitive to adjust_kwargs"""
+    """Test remove_bias(how='xclim_method') is sensitive to adjust_kwargs."""
     alignment = "same_init"
     how = "EmpiricalQuantileMapping"
     he = (
@@ -492,7 +494,7 @@ def test_remove_bias_group(hindcast_NMME_Nino34):
 
 @requires_xclim
 def test_remove_bias_compare_scaling_and_mean(hindcast_recon_1d_mm):
-    """Compare Scaling and additive_mean to be similar"""
+    """Compare Scaling and additive_mean to be similar."""
     he = hindcast_recon_1d_mm.isel(lead=[0, 1])
     hind_scaling = he.remove_bias(
         how="Scaling",

From 998230da674daf25b1035e4dc96d9c372b4103c3 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Fri, 10 Dec 2021 00:41:10 +0100
Subject: [PATCH 13/56] started with metrics.py

---
 climpred/metrics.py | 729 +++++++++++++++++++++++++++-----------------
 1 file changed, 456 insertions(+), 273 deletions(-)

diff --git a/climpred/metrics.py b/climpred/metrics.py
index 7ed35cd04..6226a13c7 100644
--- a/climpred/metrics.py
+++ b/climpred/metrics.py
@@ -1,5 +1,5 @@
 import warnings
-from typing import Callable, List, Optional
+from typing import Any, Callable, List, Optional, Union
 
 import numpy as np
 import pandas as pd
@@ -34,6 +34,9 @@
 
 from .constants import CLIMPRED_DIMS
 
+dimType = Optional[Union[str, List[str]]]
+metric_kwargsType = Any
+
 
 def _get_norm_factor(comparison):
     """Get normalization factor for normalizing distance metrics.
@@ -263,17 +266,22 @@ def __repr__(self):
 #####################
 # CORRELATION METRICS
 #####################
-def _pearson_r(forecast, verif, dim=None, **metric_kwargs):
-    """Pearson product-moment correlation coefficient.
+def _pearson_r(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Pearson product-moment correlation coefficient.
 
     A measure of the linear association between the forecast and verification data that
     is independent of the mean and variance of the individual distributions. This is
     also known as the Anomaly Correlation Coefficient (ACC) when correlating anomalies.
 
     .. math::
-        corr = \\frac{cov(f, o)}{\\sigma_{f}\\cdot\\sigma_{o}},
+        corr = \frac{cov(f, o)}{\sigma_{f}\cdot\sigma_{o}},
 
-    where :math:`\\sigma_{f}` and :math:`\\sigma_{o}` represent the standard deviation
+    where :math:`\sigma_{f}` and :math:`\sigma_{o}` represent the standard deviation
     of the forecast and verification data over the experimental period, respectively.
 
     .. note::
@@ -281,10 +289,10 @@ def _pearson_r(forecast, verif, dim=None, **metric_kwargs):
         corresponding p value.
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over.
-        metric_kwargs (dict): see :py:func:`~xskillscore.pearson_r`
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over.
+        metric_kwargs: see :py:func:`~xskillscore.pearson_r`
 
     Details:
         +-----------------+-----------+
@@ -347,7 +355,12 @@ def _pearson_r(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _pearson_r_p_value(forecast, verif, dim=None, **metric_kwargs):
+def _pearson_r_p_value(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
     """Probability that forecast and verification data are linearly uncorrelated.
 
     Two-tailed p value associated with the Pearson product-moment correlation
@@ -356,10 +369,10 @@ def _pearson_r_p_value(forecast, verif, dim=None, **metric_kwargs):
     and verification data.
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over.
-        metric_kwargs (dict): see xskillscore.pearson_r_p_value
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over.
+        metric_kwargs: see xskillscore.pearson_r_p_value
 
     Details:
         +-----------------+-----------+
@@ -425,8 +438,13 @@ def _pearson_r_p_value(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _effective_sample_size(forecast, verif, dim=None, **metric_kwargs):
-    """Effective sample size for temporally correlated data.
+def _effective_sample_size(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Effective sample size for temporally correlated data.
 
     .. note::
         Weights are not included here due to the dependence on temporal autocorrelation.
@@ -444,17 +462,17 @@ def _effective_sample_size(forecast, verif, dim=None, **metric_kwargs):
     ``pearson_r_eff_p_value`` and ``spearman_r_eff_p_value``.
 
     .. math::
-        N_{eff} = N\\left( \\frac{1 -
-                   \\rho_{f}\\rho_{o}}{1 + \\rho_{f}\\rho_{o}} \\right),
+        N_{eff} = N\left( \frac{1 -
+                   \rho_{f}\rho_{o}}{1 + \rho_{f}\rho_{o}} \right),
 
-    where :math:`\\rho_{f}` and :math:`\\rho_{o}` are the lag-1 autocorrelation
+    where :math:`\rho_{f}` and :math:`\rho_{o}` are the lag-1 autocorrelation
     coefficients for the forecast and verification data.
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over.
-        metric_kwargs (dict): see :py:func:`~xskillscore.effective_sample_size`
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over.
+        metric_kwargs: see :py:func:`~xskillscore.effective_sample_size`
 
     Details:
         +-----------------+-----------------+
@@ -514,9 +532,13 @@ def _effective_sample_size(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _pearson_r_eff_p_value(forecast, verif, dim=None, **metric_kwargs):
-    """Probability that forecast and verification data are linearly uncorrelated,
-    accounting for autocorrelation.
+def _pearson_r_eff_p_value(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""pearson_r_p_value accounting for autocorrelation.
 
     .. note::
         Weights are not included here due to the dependence on temporal autocorrelation.
@@ -531,24 +553,24 @@ def _pearson_r_eff_p_value(forecast, verif, dim=None, **metric_kwargs):
 
     .. math::
 
-        t = r\\sqrt{ \\frac{N_{eff} - 2}{1 - r^{2}} },
+        t = r\sqrt{ \frac{N_{eff} - 2}{1 - r^{2}} },
 
     where :math:`N_{eff}` is computed via the autocorrelation in the forecast and
     verification data.
 
     .. math::
 
-        N_{eff} = N\\left( \\frac{1 -
-                   \\rho_{f}\\rho_{o}}{1 + \\rho_{f}\\rho_{o}} \\right),
+        N_{eff} = N\left( \frac{1 -
+                   \rho_{f}\rho_{o}}{1 + \rho_{f}\rho_{o}} \right),
 
-    where :math:`\\rho_{f}` and :math:`\\rho_{o}` are the lag-1 autocorrelation
+    where :math:`\rho_{f}` and :math:`\rho_{o}` are the lag-1 autocorrelation
     coefficients for the forecast and verification data.
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over.
-        metric_kwargs (dict): see :py:func:`~xskillscore.pearson_r_eff_p_value`
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over.
+        metric_kwargs: see :py:func:`~xskillscore.pearson_r_eff_p_value`
 
     Details:
         +-----------------+-----------+
@@ -619,11 +641,16 @@ def _pearson_r_eff_p_value(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _spearman_r(forecast, verif, dim=None, **metric_kwargs):
-    """Spearman's rank correlation coefficient.
+def _spearman_r(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Spearman's rank correlation coefficient.
 
     .. math::
-        corr = \\mathrm{pearsonr}(ranked(f), ranked(o))
+        corr = \mathrm{pearsonr}(ranked(f), ranked(o))
 
     This correlation coefficient is nonparametric and assesses how well the relationship
     between the forecast and verification data can be described using a monotonic
@@ -639,10 +666,10 @@ def _spearman_r(forecast, verif, dim=None, **metric_kwargs):
         corresponding p value.
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over.
-        metric_kwargs (dict): see :py:func:`~xskillscore.spearman_r`
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over.
+        metric_kwargs: see :py:func:`~xskillscore.spearman_r`
 
     Details:
         +-----------------+-----------+
@@ -705,8 +732,13 @@ def _spearman_r(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _spearman_r_p_value(forecast, verif, dim=None, **metric_kwargs):
-    """Probability that forecast and verification data are monotonically uncorrelated.
+def _spearman_r_p_value(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Probability that forecast and verification data are monotonically uncorrelated.
 
     Two-tailed p value associated with the Spearman's rank correlation
     coefficient (``spearman_r``), assuming that all samples are independent. Use
@@ -714,10 +746,10 @@ def _spearman_r_p_value(forecast, verif, dim=None, **metric_kwargs):
     and verification data.
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over.
-        metric_kwargs (dict): see :py:func:`~xskillscore.spearman_r_p_value`
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over.
+        metric_kwargs: see :py:func:`~xskillscore.spearman_r_p_value`
 
     Details:
         +-----------------+-----------+
@@ -783,9 +815,13 @@ def _spearman_r_p_value(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _spearman_r_eff_p_value(forecast, verif, dim=None, **metric_kwargs):
-    """Probability that forecast and verification data are monotonically uncorrelated,
-    accounting for autocorrelation.
+def _spearman_r_eff_p_value(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""_spearman_r_p_value accounting for autocorrelation.
 
     .. note::
         Weights are not included here due to the dependence on temporal autocorrelation.
@@ -800,24 +836,24 @@ def _spearman_r_eff_p_value(forecast, verif, dim=None, **metric_kwargs):
 
     .. math::
 
-        t = r\\sqrt{ \\frac{N_{eff} - 2}{1 - r^{2}} },
+        t = r\sqrt{ \frac{N_{eff} - 2}{1 - r^{2}} },
 
     where :math:`N_{eff}` is computed via the autocorrelation in the forecast and
     verification data.
 
     .. math::
 
-        N_{eff} = N\\left( \\frac{1 -
-                   \\rho_{f}\\rho_{o}}{1 + \\rho_{f}\\rho_{o}} \\right),
+        N_{eff} = N\left( \frac{1 -
+                   \rho_{f}\rho_{o}}{1 + \rho_{f}\rho_{o}} \right),
 
-    where :math:`\\rho_{f}` and :math:`\\rho_{o}` are the lag-1 autocorrelation
+    where :math:`\rho_{f}` and :math:`\rho_{o}` are the lag-1 autocorrelation
     coefficients for the forecast and verification data.
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over.
-        metric_kwargs (dict): see :py:func:`~xskillscore.spearman_r_eff_p_value`
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over.
+        metric_kwargs: see :py:func:`~xskillscore.spearman_r_eff_p_value`
 
     Details:
         +-----------------+-----------+
@@ -891,11 +927,16 @@ def _spearman_r_eff_p_value(forecast, verif, dim=None, **metric_kwargs):
 ##################
 # DISTANCE METRICS
 ##################
-def _mse(forecast, verif, dim=None, **metric_kwargs):
-    """Mean Sqaure Error (MSE).
+def _mse(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Mean Sqaure Error (MSE).
 
     .. math::
-        MSE = \\overline{(f - o)^{2}}
+        MSE = \overline{(f - o)^{2}}
 
     The average of the squared difference between forecasts and verification data. This
     incorporates both the variance and bias of the estimator. Because the error is
@@ -905,10 +946,10 @@ def _mse(forecast, verif, dim=None, **metric_kwargs):
     for ``mse``. See Jolliffe and Stephenson, 2011.
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over.
-        metric_kwargs (dict): see :py:func:`~xskillscore.mse`
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over.
+        metric_kwargs: see :py:func:`~xskillscore.mse`
 
     Details:
         +-----------------+-----------+
@@ -968,17 +1009,22 @@ def _mse(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _spread(forecast, verif, dim=None, **metric_kwargs):
-    """Ensemble spread taking the standard deviation over the member dimension.
+def _spread(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Ensemble spread taking the standard deviation over the member dimension.
 
     .. math::
-        spread = std(f) = \\sigma^2(f) = \\sqrt\\frac{\\sum{(f-\\overline{f})^2}}{N}
+        spread = std(f) = \sigma^2(f) = \sqrt\frac{\sum{(f-\overline{f})^2}}{N}
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data (not used).
-        dim (str): Dimension(s) to perform metric over.
-        metric_kwargs (dict): see :py:func:`~xarray.std`
+        forecast: Forecast.
+        verif: Verification data (not used).
+        dim: Dimension(s) to perform metric over.
+        metric_kwargs: see :py:func:`~xarray.std`
 
     Details:
         +-----------------+-----------+
@@ -1033,20 +1079,25 @@ def _spread(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _rmse(forecast, verif, dim=None, **metric_kwargs):
-    """Root Mean Sqaure Error (RMSE).
+def _rmse(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Root Mean Sqaure Error (RMSE).
 
     .. math::
-        RMSE = \\sqrt{\\overline{(f - o)^{2}}}
+        RMSE = \sqrt{\overline{(f - o)^{2}}}
 
     The square root of the average of the squared differences between forecasts and
     verification data.
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over.
-        metric_kwargs (dict): see :py:func:`~xskillscore.rmse`
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over.
+        metric_kwargs: see :py:func:`~xskillscore.rmse`
 
     Details:
         +-----------------+-----------+
@@ -1100,21 +1151,26 @@ def _rmse(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _mae(forecast, verif, dim=None, **metric_kwargs):
-    """Mean Absolute Error (MAE).
+def _mae(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Mean Absolute Error (MAE).
 
     .. math::
-        MAE = \\overline{|f - o|}
+        MAE = \overline{|f - o|}
 
     The average of the absolute differences between forecasts and verification data.
     A more robust measure of forecast accuracy than ``mse`` which is sensitive to large
     outlier forecast errors.
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over.
-        metric_kwargs (dict): see :py:func:`~xskillscore.mae`
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over.
+        metric_kwargs: see :py:func:`~xskillscore.mae`
 
     Details:
         +-----------------+-----------+
@@ -1175,7 +1231,12 @@ def _mae(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _median_absolute_error(forecast, verif, dim=None, **metric_kwargs):
+def _median_absolute_error(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
     """Median Absolute Error.
 
     .. math::
@@ -1185,10 +1246,10 @@ def _median_absolute_error(forecast, verif, dim=None, **metric_kwargs):
     Applying the median function to absolute error makes it more robust to outliers.
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over.
-        metric_kwargs (dict): see :py:func:`~xskillscore.median_absolute_error`
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over.
+        metric_kwargs: see :py:func:`~xskillscore.median_absolute_error`
 
     Details:
         +-----------------+-----------+
@@ -1248,14 +1309,19 @@ def _median_absolute_error(forecast, verif, dim=None, **metric_kwargs):
 #############################
 # NORMALIZED DISTANCE METRICS
 #############################
-def _nmse(forecast, verif, dim=None, **metric_kwargs):
-    """Normalized MSE (NMSE), also known as Normalized Ensemble Variance (NEV).
+def _nmse(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Normalized MSE (NMSE), also known as Normalized Ensemble Variance (NEV).
 
     Mean Square Error (``mse``) normalized by the variance of the verification data.
 
     .. math::
-        NMSE = NEV = \\frac{MSE}{\\sigma^2_{o}\\cdot fac}
-             = \\frac{\\overline{(f - o)^{2}}}{\\sigma^2_{o} \\cdot fac},
+        NMSE = NEV = \frac{MSE}{\sigma^2_{o}\cdot fac}
+             = \frac{\overline{(f - o)^{2}}}{\sigma^2_{o} \cdot fac},
 
     where :math:`fac` is 1 when using comparisons involving the ensemble mean (``m2e``,
     ``e2c``, ``e2o``) and 2 when using comparisons involving individual ensemble
@@ -1269,13 +1335,13 @@ def _nmse(forecast, verif, dim=None, **metric_kwargs):
         window for normalizing MSE.
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over.
-        comparison (str): Name comparison needed for normalization factor `fac`, see
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over.
+        comparison: Name comparison needed for normalization factor `fac`, see
             :py:func:`~climpred.metrics._get_norm_factor`
             (Handled internally by the compute functions)
-        metric_kwargs (dict): see :py:func:`~xskillscore.mse`
+        metric_kwargs: see :py:func:`~xskillscore.mse`
 
     Details:
         +----------------------------+-----------+
@@ -1351,15 +1417,20 @@ def _nmse(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _nmae(forecast, verif, dim=None, **metric_kwargs):
-    """Normalized Mean Absolute Error (NMAE).
+def _nmae(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Normalized Mean Absolute Error (NMAE).
 
     Mean Absolute Error (``mae``) normalized by the standard deviation of the
     verification data.
 
     .. math::
-        NMAE = \\frac{MAE}{\\sigma_{o} \\cdot fac}
-             = \\frac{\\overline{|f - o|}}{\\sigma_{o} \\cdot fac},
+        NMAE = \frac{MAE}{\sigma_{o} \cdot fac}
+             = \frac{\overline{|f - o|}}{\sigma_{o} \cdot fac},
 
     where :math:`fac` is 1 when using comparisons involving the ensemble mean (``m2e``,
     ``e2c``, ``e2o``) and 2 when using comparisons involving individual ensemble
@@ -1373,13 +1444,13 @@ def _nmae(forecast, verif, dim=None, **metric_kwargs):
         experimental window for normalizing MAE.
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over.
-        comparison (str): Name comparison needed for normalization factor `fac`, see
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over.
+        comparison: Name comparison needed for normalization factor `fac`, see
             :py:func:`~climpred.metrics._get_norm_factor`
             (Handled internally by the compute functions)
-        metric_kwargs (dict): see :py:func:`~xskillscore.mae`
+        metric_kwargs: see :py:func:`~xskillscore.mae`
 
     Details:
         +----------------------------+-----------+
@@ -1454,17 +1525,22 @@ def _nmae(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _nrmse(forecast, verif, dim=None, **metric_kwargs):
-    """Normalized Root Mean Square Error (NRMSE).
+def _nrmse(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Normalized Root Mean Square Error (NRMSE).
 
     Root Mean Square Error (``rmse``) normalized by the standard deviation of the
     verification data.
 
     .. math::
 
-        NRMSE = \\frac{RMSE}{\\sigma_{o}\\cdot\\sqrt{fac}}
-              = \\sqrt{\\frac{MSE}{\\sigma^{2}_{o}\\cdot fac}}
-              = \\sqrt{ \\frac{\\overline{(f - o)^{2}}}{ \\sigma^2_{o}\\cdot fac}},
+        NRMSE = \frac{RMSE}{\sigma_{o}\cdot\sqrt{fac}}
+              = \sqrt{\frac{MSE}{\sigma^{2}_{o}\cdot fac}}
+              = \sqrt{ \frac{\overline{(f - o)^{2}}}{ \sigma^2_{o}\cdot fac}},
 
     where :math:`fac` is 1 when using comparisons involving the ensemble mean (``m2e``,
     ``e2c``, ``e2o``) and 2 when using comparisons involving individual ensemble
@@ -1478,13 +1554,13 @@ def _nrmse(forecast, verif, dim=None, **metric_kwargs):
         window for normalizing RMSE.
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over.
-        comparison (str): Name comparison needed for normalization factor `fac`, see
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over.
+        comparison: Name comparison needed for normalization factor `fac`, see
             :py:func:`~climpred.metrics._get_norm_factor`
             (Handled internally by the compute functions)
-        metric_kwargs (dict): see :py:func:`~xskillscore.rmse`
+        metric_kwargs: see :py:func:`~xskillscore.rmse`
 
     Details:
         +----------------------------+-----------+
@@ -1565,12 +1641,17 @@ def _nrmse(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _msess(forecast, verif, dim=None, **metric_kwargs):
-    """Mean Squared Error Skill Score (MSESS).
+def _msess(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Mean Squared Error Skill Score (MSESS).
 
     .. math::
-        MSESS = 1 - \\frac{MSE}{\\sigma^2_{ref} \\cdot fac} =
-               1 - \\frac{\\overline{(f - o)^{2}}}{\\sigma^2_{ref} \\cdot fac},
+        MSESS = 1 - \frac{MSE}{\sigma^2_{ref} \cdot fac} =
+               1 - \frac{\overline{(f - o)^{2}}}{\sigma^2_{ref} \cdot fac},
 
     where :math:`fac` is 1 when using comparisons involving the ensemble mean (``m2e``,
     ``e2c``, ``e2o``) and 2 when using comparisons involving individual ensemble
@@ -1587,13 +1668,13 @@ def _msess(forecast, verif, dim=None, **metric_kwargs):
         window for normalizing MSE.
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over.
-        comparison (str): Name comparison needed for normalization factor `fac`, see
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over.
+        comparison: Name comparison needed for normalization factor `fac`, see
             :py:func:`~climpred.metrics._get_norm_factor`
             (Handled internally by the compute functions)
-        metric_kwargs (dict): see :py:func:`~xskillscore.mse`
+        metric_kwargs: see :py:func:`~xskillscore.mse`
 
     Details:
         +----------------------------+-----------+
@@ -1680,20 +1761,25 @@ def _msess(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _mape(forecast, verif, dim=None, **metric_kwargs):
-    """Mean Absolute Percentage Error (MAPE).
+def _mape(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Mean Absolute Percentage Error (MAPE).
 
     Mean absolute error (``mae``) expressed as the fractional error relative to the
     verification data.
 
     .. math::
-        MAPE = \\frac{1}{n} \\sum \\frac{|f-o|}{|o|}
+        MAPE = \frac{1}{n} \sum \frac{|f-o|}{|o|}
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over.
-        metric_kwargs (dict): see :py:func:`~xskillscore.mape`
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over.
+        metric_kwargs: see :py:func:`~xskillscore.mape`
 
     Details:
         +-----------------+-----------+
@@ -1747,20 +1833,25 @@ def _mape(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _smape(forecast, verif, dim=None, **metric_kwargs):
-    """Symmetric Mean Absolute Percentage Error (sMAPE).
+def _smape(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Symmetric Mean Absolute Percentage Error (sMAPE).
 
     Similar to the Mean Absolute Percentage Error (``mape``), but sums the forecast and
     observation mean in the denominator.
 
     .. math::
-        sMAPE = \\frac{1}{n} \\sum \\frac{|f-o|}{|f|+|o|}
+        sMAPE = \frac{1}{n} \sum \frac{|f-o|}{|f|+|o|}
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over.
-        metric_kwargs (dict): see :py:func:`~xskillscore.smape`
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over.
+        metric_kwargs: see :py:func:`~xskillscore.smape`
 
     Details:
         +-----------------+-----------+
@@ -1814,8 +1905,13 @@ def _smape(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _uacc(forecast, verif, dim=None, **metric_kwargs):
-    """Bushuk's unbiased Anomaly Correlation Coefficient (uACC).
+def _uacc(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Bushuk's unbiased Anomaly Correlation Coefficient (uACC).
 
     This is typically used in perfect model studies. Because the perfect model Anomaly
     Correlation Coefficient (ACC) is strongly state dependent, a standard ACC (e.g. one
@@ -1825,8 +1921,8 @@ def _uacc(forecast, verif, dim=None, **metric_kwargs):
     Bushuk et al. 2019), so the unbiased ACC can be derived as ``uACC = sqrt(MESSS)``.
 
     .. math::
-        uACC = \\sqrt{MSESS}
-             = \\sqrt{1 - \\frac{\\overline{(f - o)^{2}}}{\\sigma^2_{ref} \\cdot fac}},
+        uACC = \sqrt{MSESS}
+             = \sqrt{1 - \frac{\overline{(f - o)^{2}}}{\sigma^2_{ref} \cdot fac}},
 
     where :math:`fac` is 1 when using comparisons involving the ensemble mean (``m2e``,
     ``e2c``, ``e2o``) and 2 when using comparisons involving individual ensemble
@@ -1838,13 +1934,13 @@ def _uacc(forecast, verif, dim=None, **metric_kwargs):
         automatically converted to NaNs.
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over.
-        comparison (str): Name comparison needed for normalization factor ``fac``, see
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over.
+        comparison: Name comparison needed for normalization factor ``fac``, see
             :py:func:`~climpred.metrics._get_norm_factor`
             (Handled internally by the compute functions)
-        metric_kwargs (dict): see :py:func:`~xskillscore.mse`
+        metric_kwargs: see :py:func:`~xskillscore.mse`
 
     Details:
         +----------------------------+-----------+
@@ -1916,19 +2012,24 @@ def _uacc(forecast, verif, dim=None, **metric_kwargs):
 ##############################
 # MURPHY DECOMPOSITION METRICS
 ##############################
-def _std_ratio(forecast, verif, dim=None, **metric_kwargs):
-    """Ratio of standard deviations of the forecast over the verification data.
+def _std_ratio(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Ratio of standard deviations of the forecast over the verification data.
 
-    .. math:: \\text{std ratio} = \\frac{\\sigma_f}{\\sigma_o},
+    .. math:: \text{std ratio} = \frac{\sigma_f}{\sigma_o},
 
-    where :math:`\\sigma_{f}` and :math:`\\sigma_{o}` are the standard deviations of the
+    where :math:`\sigma_{f}` and :math:`\sigma_{o}` are the standard deviations of the
     forecast and the verification data over the experimental period, respectively.
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over.
-        metric_kwargs (dict): see xarray.std
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over.
+        metric_kwargs: see xarray.std
 
     Details:
         +-----------------+-----------+
@@ -1985,17 +2086,22 @@ def _std_ratio(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _unconditional_bias(forecast, verif, dim=None, **metric_kwargs):
-    """Unconditional additive bias.
+def _unconditional_bias(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Unconditional additive bias.
 
     .. math::
-        bias = f - o
+        \text{bias} = f - o
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over
-        metric_kwargs (dict): see xarray.mean
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over
+        metric_kwargs: see xarray.mean
 
     Details:
         +-----------------+-----------+
@@ -2082,17 +2188,22 @@ def _unconditional_bias(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _mul_bias(forecast, verif, dim=None, **metric_kwargs):
-    """Multiplicative bias.
+def _mul_bias(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Multiplicative bias.
 
     .. math::
-        multiplicative bias = f / o
+        \text{multiplicative bias} = f / o
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over
-        metric_kwargs (dict): see xarray.mean
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over
+        metric_kwargs: see xarray.mean
 
     Details:
         +-----------------+-----------+
@@ -2145,24 +2256,29 @@ def _mul_bias(forecast, verif, dim=None, **metric_kwargs):
     aliases=["m_b", "multiplicative_bias"],
     minimum=-np.inf,
     maximum=np.inf,
-    perfect=False,  # 1.0
+    perfect=False,
 )
 
 
-def _conditional_bias(forecast, verif, dim=None, **metric_kwargs):
-    """Conditional bias between forecast and verification data.
+def _conditional_bias(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Conditional bias between forecast and verification data.
 
     .. math::
-        \\text{conditional bias} = r_{fo} - \\frac{\\sigma_f}{\\sigma_o},
+        \text{conditional bias} = r_{fo} - \frac{\sigma_f}{\sigma_o},
 
-    where :math:`\\sigma_{f}` and :math:`\\sigma_{o}` are the standard deviations of the
+    where :math:`\sigma_{f}` and :math:`\sigma_{o}` are the standard deviations of the
     forecast and verification data over the experimental period, respectively.
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over.
-        metric_kwargs (dict): see :py:func:`~xskillscore.pearson_r`
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over.
+        metric_kwargs: see :py:func:`~xskillscore.pearson_r`
         and :py:meth:`~xarray.Datasetstd`
 
     Details:
@@ -2222,11 +2338,16 @@ def _conditional_bias(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _bias_slope(forecast, verif, dim=None, **metric_kwargs):
-    """Bias slope between verification data and forecast standard deviations.
+def _bias_slope(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Bias slope between verification data and forecast standard deviations.
 
     .. math::
-        \\text{bias slope} = \\frac{s_{o}}{s_{f}} \\cdot r_{fo},
+        \text{bias slope} = \frac{s_{o}}{s_{f}} \cdot r_{fo},
 
     where :math:`r_{fo}` is the Pearson product-moment correlation between the forecast
     and the verification data and :math:`s_{o}` and :math:`s_{f}` are the standard
@@ -2234,10 +2355,10 @@ def _bias_slope(forecast, verif, dim=None, **metric_kwargs):
     respectively.
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over.
-        metric_kwargs (dict): see :py:func:`~xskillscore.pearson_r` and
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over.
+        metric_kwargs: see :py:func:`~xskillscore.pearson_r` and
         :py:meth:`~xarray.Dataset.std`
 
     Details:
@@ -2297,24 +2418,29 @@ def _bias_slope(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _msess_murphy(forecast, verif, dim=None, **metric_kwargs):
-    """Murphy's Mean Square Error Skill Score (MSESS).
+def _msess_murphy(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Murphy's Mean Square Error Skill Score (MSESS).
 
     .. math::
-        MSESS_{Murphy} = r_{fo}^2 - [\\text{conditional bias}]^2 -\
-         [\\frac{\\text{(unconditional) bias}}{\\sigma_o}]^2,
+        MSESS_{Murphy} = r_{fo}^2 - [\text{conditional bias}]^2 -\
+         [\frac{\text{(unconditional) bias}}{\sigma_o}]^2,
 
     where :math:`r_{fo}^{2}` represents the Pearson product-moment correlation
-    coefficient between the forecast and verification data and :math:`\\sigma_{o}`
+    coefficient between the forecast and verification data and :math:`\sigma_{o}`
     represents the standard deviation of the verification data over the experimental
     period. See ``conditional_bias`` and ``unconditional_bias`` for their respective
     formulations.
 
     Args:
-        forecast (xarray object): Forecast.
-        verif (xarray object): Verification data.
-        dim (str): Dimension(s) to perform metric over.
-        metric_kwargs (dict): see :py:func:`~xskillscore.pearson_r`,
+        forecast: Forecast.
+        verif: Verification data.
+        dim: Dimension(s) to perform metric over.
+        metric_kwargs: see :py:func:`~xskillscore.pearson_r`,
         :py:meth:`~xarray.Dataset.mean` and :py:meth:`~xarray.Dataset.std`
 
     Details:
@@ -2393,7 +2519,12 @@ def _msess_murphy(forecast, verif, dim=None, **metric_kwargs):
 #######################
 
 
-def _brier_score(forecast, verif, dim=None, **metric_kwargs):
+def _brier_score(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
     """Brier Score for binary events.
 
     The Mean Square Error (``mse``) of probabilistic two-category forecasts where the
@@ -2416,15 +2547,15 @@ def _brier_score(forecast, verif, dim=None, **metric_kwargs):
         This Brier Score is not the original formula given in Brier's 1950 paper.
 
     Args:
-        forecast (xr.object): Raw forecasts with ``member`` dimension if `logical`
+        forecast: Raw forecasts with ``member`` dimension if `logical`
             provided in `metric_kwargs`. Probability forecasts in [0,1] if `logical` is
             not provided.
-        verif (xr.object): Verification data without ``member`` dim. Raw verification if
+        verif: Verification data without ``member`` dim. Raw verification if
             `logical` provided, else binary verification.
-        dim (list or str): Dimensions to aggregate. Requires `member` if `logical`
+        dim: Dimensions to aggregate. Requires `member` if `logical`
             provided in `metric_kwargs` to create probability forecasts. If `logical`
             not provided in `metric_kwargs`, should not include `member`.
-        metric_kwargs (dict): optional
+        metric_kwargs: optional
             logical (callable): Function with bool result to be applied to verification
                 data and forecasts and then ``mean('member')`` to get forecasts and
                 verification data in interval [0,1].
@@ -2566,11 +2697,16 @@ def _brier_score(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _threshold_brier_score(forecast, verif, dim=None, **metric_kwargs):
-    """Brier score of an ensemble for exceeding given thresholds.
+def _threshold_brier_score(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Brier score of an ensemble for exceeding given thresholds.
 
     .. math::
-        CRPS = \\int_f BS(F(f), H(f - o)) df
+        CRPS = \int_f BS(F(f), H(f - o)) df
 
     where :math:`F(o) = \\int_{f \\leq o} p(f) df` is the cumulative distribution
     function (CDF) of the forecast distribution :math:`F`, :math:`o` is a point estimate
@@ -2579,13 +2715,13 @@ def _threshold_brier_score(forecast, verif, dim=None, **metric_kwargs):
     here as equal to 1 for :math:`x \\geq 0` and 0 otherwise.
 
     Args:
-        forecast (xr.object): Forecast with ``member`` dim.
-        verif (xr.object): Verification data without ``member`` dim.
-        dim (list of str): Dimension to apply metric over. Expects at least
+        forecast: Forecast with ``member`` dim.
+        verif: Verification data without ``member`` dim.
+        dim: Dimension to apply metric over. Expects at least
             `member`. Other dimensions are passed to `xskillscore` and averaged.
         threshold (int, float, xr.object): Threshold to check exceedance, see
             properscoring.threshold_brier_score.
-        metric_kwargs (dict): optional, see
+        metric_kwargs: optional, see
             :py:func:`~xskillscore.threshold_brier_score`
 
     Details:
@@ -2692,15 +2828,20 @@ def _threshold_brier_score(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _crps(forecast, verif, dim=None, **metric_kwargs):
-    """Continuous Ranked Probability Score (CRPS).
+def _crps(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Continuous Ranked Probability Score (CRPS).
 
     The CRPS can also be considered as the probabilistic Mean Absolute Error (``mae``).
     It compares the empirical distribution of an ensemble forecast to a scalar
     observation. Smaller scores indicate better skill.
 
     .. math::
-        CRPS = \\int_{-\\infty}^{\\infty} (F(f) - H(f - o))^{2} df,
+        CRPS = \int_{-\infty}^{\infty} (F(f) - H(f - o))^{2} df,
 
     where :math:`F(f)` is the cumulative distribution function (CDF) of the forecast
     (since the verification data are not assigned a probability), and H() is the
@@ -2715,11 +2856,11 @@ def _crps(forecast, verif, dim=None, **metric_kwargs):
         determinstic.
 
     Args:
-        forecast (xr.object): Forecast with `member` dim.
-        verif (xr.object): Verification data without `member` dim.
-        dim (list of str): Dimension to apply metric over. Expects at least
+        forecast: Forecast with `member` dim.
+        verif: Verification data without `member` dim.
+        dim: Dimension to apply metric over. Expects at least
             `member`. Other dimensions are passed to `xskillscore` and averaged.
-        metric_kwargs (dict): optional, see :py:func:`~xskillscore.crps_ensemble`
+        metric_kwargs: optional, see :py:func:`~xskillscore.crps_ensemble`
 
     Details:
         +-----------------+-----------+
@@ -2785,8 +2926,10 @@ def _crps(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _crps_quadrature(verification, cdf_or_dist, dim=None, **metric_kwargs):
-    """Compute the continuously ranked probability score (CPRS) for a given
+def _crps_quadrature(verification: xr.Dataset, cdf_or_dist:Callable, dim:dimType=None, **metric_kwargs:metric_kwargsType)-> xr.Dataset:
+    """Compute the continuously ranked probability score (CPRS).
+
+    For a given
     forecast distribution (``cdf``) and observation (``o``) using numerical quadrature.
 
     This implementation allows the computation of CRPSS for arbitrary forecast
@@ -2796,10 +2939,10 @@ def _crps_quadrature(verification, cdf_or_dist, dim=None, **metric_kwargs):
         This is a helper function for CRPS and cannot be called directly by a user.
 
     Args:
-        forecast (xr.object): Forecast with ``member`` dim.
+        forecast: Forecast with ``member`` dim.
         cdf_or_dist (callable or scipy.stats.distribution): Function which returns the
             cumulative density of the forecast distribution at value x.
-        metric_kwargs (dict): see :py:func:`~xskillscore.crps_quadrature`
+        metric_kwargs: see :py:func:`~xskillscore.crps_quadrature`
 
     See also:
         * :py:func:`~properscoring.crps_quadrature`
@@ -2808,15 +2951,20 @@ def _crps_quadrature(verification, cdf_or_dist, dim=None, **metric_kwargs):
     return crps_quadrature(verification, cdf_or_dist, dim=dim, **metric_kwargs)
 
 
-def _crpss(forecast, verif, dim=None, **metric_kwargs):
-    """Continuous Ranked Probability Skill Score.
+def _crpss(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Continuous Ranked Probability Skill Score.
 
     This can be used to assess whether the ensemble spread is a useful measure for the
     forecast uncertainty by comparing the CRPS of the ensemble forecast to that of a
     reference forecast with the desired spread.
 
     .. math::
-        CRPSS = 1 - \\frac{CRPS_{initialized}}{CRPS_{clim}}
+        CRPSS = 1 - \frac{CRPS_{initialized}}{CRPS_{clim}}
 
     .. note::
         When assuming a Gaussian distribution of forecasts, use default
@@ -2825,11 +2973,11 @@ def _crpss(forecast, verif, dim=None, **metric_kwargs):
         (see :py:func:`~xskillscore.crps_quadrature`).
 
     Args:
-        forecast (xr.object): Forecast with ``member`` dim.
-        verif (xr.object): Verification data without ``member`` dim.
-        dim (list of str): Dimension to apply metric over. Expects at least
+        forecast: Forecast with ``member`` dim.
+        verif: Verification data without ``member`` dim.
+        dim: Dimension to apply metric over. Expects at least
             `member`. Other dimensions are passed to `xskillscore` and averaged.
-        metric_kwargs (dict): optional
+        metric_kwargs: optional
             gaussian (bool, optional): If ``True``, assume Gaussian distribution for
                 baseline skill. Defaults to ``True``.
             see :py:func:`~xskillscore.crps_ensemble`,
@@ -2953,22 +3101,27 @@ def _crpss(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _crpss_es(forecast, verif, dim=None, **metric_kwargs):
-    """Continuous Ranked Probability Skill Score Ensemble Spread.
+def _crpss_es(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Continuous Ranked Probability Skill Score Ensemble Spread.
 
     If the ensemble variance is smaller than the observed ``mse``, the ensemble is
     said to be under-dispersive (or overconfident). An ensemble with variance larger
     than the verification data indicates one that is over-dispersive (underconfident).
 
     .. math::
-        CRPSS = 1 - \\frac{CRPS(\\sigma^2_f)}{CRPS(\\sigma^2_o)}
+        CRPSS = 1 - \frac{CRPS(\sigma^2_f)}{CRPS(\sigma^2_o)}
 
     Args:
-        forecast (xr.object): Forecast with ``member`` dim.
-        verif (xr.object): Verification data without ``member`` dim.
-        dim (list of str): Dimension to apply metric over. Expects at least
+        forecast: Forecast with ``member`` dim.
+        verif: Verification data without ``member`` dim.
+        dim: Dimension to apply metric over. Expects at least
             `member`. Other dimensions are passed to `xskillscore` and averaged.
-        metric_kwargs (dict): see :py:func:`~xskillscore.crps_ensemble`
+        metric_kwargs: see :py:func:`~xskillscore.crps_ensemble`
         and :py:func:`~xskillscore.mse`
 
     Details:
@@ -3058,19 +3211,26 @@ def _crpss_es(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _discrimination(forecast, verif, dim=None, **metric_kwargs):
+def _discrimination(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
     """
-    Returns the data required to construct the discrimination diagram for an event. The
+    Discrimination.
+
+    Return the data required to construct the discrimination diagram for an event. The
     histogram of forecasts likelihood when observations indicate an event has occurred
     and has not occurred.
 
     Args:
-        forecast (xr.object): Raw forecasts with ``member`` dimension if `logical`
+        forecast: Raw forecasts with ``member`` dimension if `logical`
             provided in `metric_kwargs`. Probability forecasts in [0,1] if `logical` is
             not provided.
-        verif (xr.object): Verification data without ``member`` dim. Raw verification if
+        verif: Verification data without ``member`` dim. Raw verification if
             `logical` provided, else binary verification.
-        dim (list or str): Dimensions to aggregate. Requires `member` if `logical`
+        dim: Dimensions to aggregate. Requires `member` if `logical`
             provided in `metric_kwargs` to create probability forecasts. If `logical`
             not provided in `metric_kwargs`, should not include `member`. At least one
             dimension other than `member` is required.
@@ -3084,7 +3244,7 @@ def _discrimination(forecast, verif, dim=None, **metric_kwargs):
 
 
     Returns:
-        Discrimination (xr.object) with added dimension "event" containing the
+        Discrimination with added dimension "event" containing the
         histograms of forecast probabilities when the event was observed and not
         observed
 
@@ -3182,20 +3342,26 @@ def _discrimination(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _reliability(forecast, verif, dim=None, **metric_kwargs):
+def _reliability(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
     """
+    Reliability.
+
     Returns the data required to construct the reliability diagram for an event. The
     the relative frequencies of occurrence of an event for a range of forecast
     probability bins.
 
-
     Args:
-        forecast (xr.object): Raw forecasts with ``member`` dimension if `logical`
+        forecast: Raw forecasts with ``member`` dimension if `logical`
             provided in `metric_kwargs`. Probability forecasts in [0,1] if `logical` is
             not provided.
-        verif (xr.object): Verification data without ``member`` dim. Raw verification if
+        verif: Verification data without ``member`` dim. Raw verification if
             `logical` provided, else binary verification.
-        dim (list or str): Dimensions to aggregate. Requires `member` if `logical`
+        dim: Dimensions to aggregate. Requires `member` if `logical`
             provided in `metric_kwargs` to create probability forecasts. If `logical`
             not provided in `metric_kwargs`, should not include `member`.
         logical (callable, optional): Function with bool result to be applied to
@@ -3207,7 +3373,7 @@ def _reliability(forecast, verif, dim=None, **metric_kwargs):
             0 and 1+1e-8.
 
     Returns:
-        reliability (xr.object): The relative frequency of occurrence for each
+        reliability: The relative frequency of occurrence for each
             probability bin
 
 
@@ -3335,14 +3501,18 @@ def _reliability(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _rank_histogram(forecast, verif, dim=None, **metric_kwargs):
+def _rank_histogram(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
     """Rank histogram or Talagrand diagram.
 
-
     Args:
-        forecast (xr.object): Raw forecasts with ``member`` dimension.
-        verif (xr.object): Verification data without ``member`` dim.
-        dim (list or str): Dimensions to aggregate. Requires to contain `member` and at
+        forecast: Raw forecasts with ``member`` dimension.
+        verif: Verification data without ``member`` dim.
+        dim: Dimensions to aggregate. Requires to contain `member` and at
             least one additional dimension.
 
     Details:
@@ -3422,17 +3592,22 @@ def _rank_histogram(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _rps(forecast, verif, dim=None, **metric_kwargs):
-    """Ranked Probability Score.
+def _rps(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""Ranked Probability Score.
 
     .. math::
-        RPS(p, k) = \\sum_{m=1}^{M} [(\\sum_{k=1}^{m} p_k) - (\\sum_{k=1}^{m} \
+        RPS(p, k) = \sum_{m=1}^{M} [(\sum_{k=1}^{m} p_k) - (\sum_{k=1}^{m} \
             o_k)]^{2}
 
     Args:
-        forecast (xr.object): Forecasts.
-        verif (xr.object): Verification.
-        dim (list or str): Dimensions to aggregate.
+        forecast: Forecasts.
+        verif: Verification.
+        dim: Dimensions to aggregate.
         **metric_kwargs, see :py:func:`~xskillscore.rps`
 
     .. note::
@@ -3520,7 +3695,6 @@ def _rps(forecast, verif, dim=None, **metric_kwargs):
             reference:                     []
             category_edges:                <xarray.Dataset>\\nDimensions:        (cate...
 
-
         Provide ``category_edges`` as tuple for different category edges to categorize
         forecasts and observations.
 
@@ -3566,7 +3740,6 @@ def _rps(forecast, verif, dim=None, **metric_kwargs):
             reference:                     []
             category_edges:                (<xarray.Dataset>\\nDimensions:        (mon...
     """
-
     if "category_edges" in metric_kwargs:
         category_edges = metric_kwargs.pop("category_edges")
     else:
@@ -3624,9 +3797,9 @@ def _contingency(forecast, verif, score="table", dim=None, **metric_kwargs):
     """Contingency table.
 
     Args:
-        forecast (xr.object): Raw forecasts.
-        verif (xr.object): Verification data.
-        dim (list or str): Dimensions to aggregate.
+        forecast: Raw forecasts.
+        verif: Verification data.
+        dim: Dimensions to aggregate.
         score (str): Score derived from contingency table. Attribute from
             :py:class:`~xskillscore.Contingency`. Use ``score=table`` to return a contingency table
             or any other contingency score, e.g. ``score=hit_rate``.
@@ -3721,7 +3894,12 @@ def _contingency(forecast, verif, score="table", dim=None, **metric_kwargs):
 )
 
 
-def _roc(forecast, verif, dim=None, **metric_kwargs):
+def _roc(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
     """Receiver Operating Characteristic.
 
     Args:
@@ -3729,7 +3907,7 @@ def _roc(forecast, verif, dim=None, **metric_kwargs):
             If ``bin_edges=='continuous'``, observations are binary.
         forecasts (xarray.object): Labeled array(s) over which to apply the function.
             If ``bin_edges=='continuous'``, forecasts are probabilities.
-        dim (str, list of str): The dimension(s) over which to aggregate. Defaults to
+        dim: The dimension(s) over which to aggregate. Defaults to
             None, meaning aggregation over all dims other than ``lead``.
         logical (callable, optional): Function with bool result to be applied to
             verification data and forecasts and then ``mean('member')`` to get
@@ -3755,7 +3933,7 @@ def _roc(forecast, verif, dim=None, **metric_kwargs):
                   concatinated into new ``metric`` dimension
 
     Returns:
-        roc (xr.object): reduced by dimensions ``dim``, see ``return_results``
+        roc: reduced by dimensions ``dim``, see ``return_results``
             parameter. ``true positive rate`` and ``false positive rate`` contain
             ``probability_bin`` dimension with ascending ``bin_edges`` as coordinates.
 
@@ -3844,20 +4022,25 @@ def _roc(forecast, verif, dim=None, **metric_kwargs):
 )
 
 
-def _less(forecast, verif, dim=None, **metric_kwargs):
-    """
+def _less(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
+    r"""
     Logarithmic Ensemble Spread Score.
 
-    .. math:: LESS = ln(\\frac{variance}{MSE})= ln(\\frac{\\sigma^2_f}{\\sigma^2_o})
+    .. math:: LESS = ln(\frac{variance}{MSE})= ln(\frac{\sigma^2_f}{\sigma^2_o})
 
     Args:
-        forecast (xr.object): Forecasts.
-        verif (xr.object): Verification.
-        dim (str, list of str): The dimension(s) over which to aggregate. Defaults to
+        forecast: Forecasts.
+        verif: Verification.
+        dim: The dimension(s) over which to aggregate. Defaults to
             None, meaning aggregation over all dims other than ``lead``.
 
     Returns:
-        less (xr.object): reduced by dimensions ``dim``
+        less: reduced by dimensions ``dim``
 
     Details:
         +-----------------+--------------------------------+

From d45b84d46fa3d4ec8643d0e11c690299bc58b45e Mon Sep 17 00:00:00 2001
From: Aaron Spring <aaronspring@users.noreply.github.com>
Date: Fri, 10 Dec 2021 01:53:26 +0100
Subject: [PATCH 14/56] Update climpred/tutorial.py

---
 climpred/tutorial.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/climpred/tutorial.py b/climpred/tutorial.py
index af3f11311..5a83dc48f 100644
--- a/climpred/tutorial.py
+++ b/climpred/tutorial.py
@@ -126,7 +126,7 @@ def load_dataset(
     github_url: str = "https://github.com/pangeo-data/climpred-data",
     branch: str = "master",
     extension: Optional[str] = None,
-    proxy_dict: Optional[dict[str, str]] = None,
+    proxy_dict: Optional[Dict[str, str]] = None,
     **kws,
 ) -> xr.Dataset:
     """Load example data or a mask from an online repository.

From 0aa78eea9bd2720522d7d070dee8b9f27fdc9255 Mon Sep 17 00:00:00 2001
From: Aaron Spring <aaronspring@users.noreply.github.com>
Date: Fri, 10 Dec 2021 01:54:06 +0100
Subject: [PATCH 15/56] Update climpred/tutorial.py

---
 climpred/tutorial.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/climpred/tutorial.py b/climpred/tutorial.py
index 5a83dc48f..42694af27 100644
--- a/climpred/tutorial.py
+++ b/climpred/tutorial.py
@@ -3,7 +3,7 @@
 import hashlib
 import os as _os
 import urllib
-from typing import Optional, Union
+from typing import Optional, Union, Dict
 from urllib.request import urlretrieve as _urlretrieve
 
 import xarray as xr

From 845bdd8d3689c6fb809e2e8083e38c091a3b7ef5 Mon Sep 17 00:00:00 2001
From: Aaron Spring <aaronspring@users.noreply.github.com>
Date: Fri, 10 Dec 2021 02:28:28 +0100
Subject: [PATCH 16/56] Update docs/source/conf.py

---
 docs/source/conf.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index ded83fe60..9f2670be0 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -16,6 +16,8 @@
 import climpred
 
 xarray.DataArray.__module__ = "xarray"
+xarray.Dataset.__module__ = "xarray"
+
 
 
 sys.path.insert(0, os.path.abspath("../.."))

From 69c913e06a9cd620f085ba79c0677aa8b724faaa Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Fri, 10 Dec 2021 13:03:42 +0100
Subject: [PATCH 17/56] fix \

---
 CHANGELOG.rst       | 153 ++++++++++++++++++++++++--------------------
 climpred/metrics.py |  21 ++++--
 climpred/stats.py   |   2 +-
 3 files changed, 98 insertions(+), 78 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index a7dd1dd7c..6dcd2169b 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -8,8 +8,9 @@ What's New
     import climpred
     from climpred import HindcastEnsemble
     import matplotlib as mpl
+
     mpl.rcdefaults()
-    mpl.use('Agg')
+    mpl.use("Agg")
     # cut border when saving (for maps)
     mpl.rcParams["savefig.bbox"] = "tight"
 
@@ -31,20 +32,20 @@ New Features
   2-dimensional coordinate ``valid_time`` for ``initialized`` from ``init`` and
   ``lead``, which is matched with ``time`` from ``verification`` during alignment.
 
-  .. code-block:: python
-
-      >>> hind = climpred.tutorial.load_dataset("CESM-DP-SST")
-      >>> hind.lead.attrs["units"] = "years"
-      >>> climpred.HindcastEnsemble(hind).get_initialized()
-      <xarray.Dataset>
-      Dimensions:     (lead: 10, member: 10, init: 64)
-      Coordinates:
-        * lead        (lead) int32 1 2 3 4 5 6 7 8 9 10
-        * member      (member) int32 1 2 3 4 5 6 7 8 9 10
-        * init        (init) object 1954-01-01 00:00:00 ... 2017-01-01 00:00:00
-          valid_time  (lead, init) object 1955-01-01 00:00:00 ... 2027-01-01 00:00:00
-      Data variables:
-          SST         (init, lead, member) float64 ...
+.. :: python
+
+>>> hind = climpred.tutorial.load_dataset("CESM-DP-SST")
+>>> hind.lead.attrs["units"] = "years"
+>>> climpred.HindcastEnsemble(hind).get_initialized()
+<xarray.Dataset>
+Dimensions:     (lead: 10, member: 10, init: 64)
+Coordinates:
+  * lead        (lead) int32 1 2 3 4 5 6 7 8 9 10
+  * member      (member) int32 1 2 3 4 5 6 7 8 9 10
+  * init        (init) object 1954-01-01 00:00:00 ... 2017-01-01 00:00:00
+    valid_time  (lead, init) object 1955-01-01 00:00:00 ... 2027-01-01 00:00:00
+Data variables:
+    SST         (init, lead, member) float64 ...
 
   (:issue:`575`, :pr:`675`, :pr:`678`) `Aaron Spring`_.
 - Allow ``lead`` as ``float`` also if ``calendar="360_day"`` or ``lead.attrs["units"]``
@@ -63,26 +64,32 @@ New Features
   :py:meth:`~climpred.classes.PerfectModelEnsemble.bootstrap` to group skill by
   initializations seasonality.
 
-  .. code-block:: python
-
-      >>> import climpred
-      >>> hind = climpred.tutorial.load_dataset("NMME_hindcast_Nino34_sst")
-      >>> obs = climpred.tutorial.load_dataset("NMME_OIv2_Nino34_sst")
-      >>> hindcast = climpred.HindcastEnsemble(hind).add_observations(obs)
-      >>> # skill for each init month separated
-      >>> skill = hindcast.verify(metric="rmse", dim="init", comparison="e2o",
-      ...                         skipna=True, alignment="maximize", groupby="month")
-      >>> skill
-      <xarray.Dataset>
-      Dimensions:  (month: 12, lead: 12, model: 12)
-      Coordinates:
-        * lead     (lead) float64 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0
-        * model    (model) object 'NCEP-CFSv2' 'NCEP-CFSv1' ... 'GEM-NEMO'
-          skill    <U11 'initialized'
-        * month    (month) int64 1 2 3 4 5 6 7 8 9 10 11 12
-      Data variables:
-          sst      (month, lead, model) float64 0.4127 0.3837 0.3915 ... 1.255 3.98
-      >>> skill.sst.plot(hue="model", col="month", col_wrap=3)
+.. :: python
+
+>>> import climpred
+>>> hind = climpred.tutorial.load_dataset("NMME_hindcast_Nino34_sst")
+>>> obs = climpred.tutorial.load_dataset("NMME_OIv2_Nino34_sst")
+>>> hindcast = climpred.HindcastEnsemble(hind).add_observations(obs)
+>>> # skill for each init month separated
+>>> skill = hindcast.verify(
+...     metric="rmse",
+...     dim="init",
+...     comparison="e2o",
+...     skipna=True,
+...     alignment="maximize",
+...     groupby="month",
+... )
+>>> skill
+<xarray.Dataset>
+Dimensions:  (month: 12, lead: 12, model: 12)
+Coordinates:
+  * lead     (lead) float64 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0
+  * model    (model) object 'NCEP-CFSv2' 'NCEP-CFSv1' ... 'GEM-NEMO'
+    skill    <U11 'initialized'
+  * month    (month) int64 1 2 3 4 5 6 7 8 9 10 11 12
+Data variables:
+    sst      (month, lead, model) float64 0.4127 0.3837 0.3915 ... 1.255 3.98
+>>> skill.sst.plot(hue="model", col="month", col_wrap=3)
 
   (:issue:`635`, :pr:`690`) `Aaron Spring`_.
 - :py:meth:`~climpred.classes.HindcastEnsemble.plot_alignment` shows how forecast and
@@ -94,7 +101,10 @@ New Features
       :okwarning:
 
       from climpred.tutorial import load_dataset
-      hindcast = climpred.HindcastEnsemble(load_dataset("CESM-DP-SST")).add_observations(load_dataset("ERSST"))
+
+      hindcast = climpred.HindcastEnsemble(
+          load_dataset("CESM-DP-SST")
+      ).add_observations(load_dataset("ERSST"))
       @savefig plotting_MEOW.png width=100%
       hindcast.plot_alignment(edgecolor="w")
 
@@ -147,7 +157,7 @@ Bug Fixes
 - :py:meth:`~climpred.classes.HindcastEnsemble.remove_bias` for ``how`` in
   ``["modified_quantile", "basic_quantile", "gamma_mapping", "normal_mapping"]``
   from `bias_correction <https://github.com/pankajkarman/bias_correction>`_
-  takes all ``member``s to create model distribution. (:pr:`667`) `Aaron Spring`_.
+  takes all ``member`` to create model distribution. (:pr:`667`) `Aaron Spring`_.
 
 New Features
 ------------
@@ -677,7 +687,8 @@ Internals/Minor Fixes
   :py:func:`~climpred.prediction.compute_perfect_model`. (:pr:`330`) `Aaron Spring`_.
 - Changed lead0 coordinate modifications to be compliant with ``xarray=0.15.1`` in
   :py:func:`~climpred.reference.compute_persistence`. (:pr:`348`) `Aaron Spring`_.
-- Exchanged ``my_quantile`` with ``xr.quantile(skipna=False)``. (:pr:`348`) `Aaron Spring`_.
+- Exchanged ``my_quantile`` with ``xr.quantile(skipna=False)``.
+  (:pr:`348`) `Aaron Spring`_.
 - Remove ``sig`` from
   :py:func:`~climpred.graphics.plot_bootstrapped_skill_over_leadyear`.
   (:pr:`351`) `Aaron Spring`_.
@@ -712,10 +723,10 @@ New Features
   what resolution the predictions are at. (:pr:`294`) `Kathy Pegion`_ and
   `Riley X. Brady`_.
 
-.. code-block:: python
+.. :: python
 
-        >>> hind = climpred.tutorial.load_dataset('CESM-DP-SST')
-        >>> hind.lead.attrs['units'] = 'years'
+    >>> hind = climpred.tutorial.load_dataset("CESM-DP-SST")
+    >>> hind.lead.attrs["units"] = "years"
 
 - ``HindcastEnsemble`` now has ``.add_observations()`` and ``.get_observations()``
   methods. These are the same as ``.add_reference()`` and ``.get_reference()``, which
@@ -866,36 +877,36 @@ New Features
   :py:class:`~climpred.classes.PerfectModelEnsemble` to retrieve ``xarray`` datasets
   from the objects. (:pr:`243`) `Riley X. Brady`_.
 
-    .. code-block:: python
-
-        >>> hind = climpred.tutorial.load_dataset('CESM-DP-SST')
-        >>> ref = climpred.tutorial.load_dataset('ERSST')
-        >>> hindcast = climpred.HindcastEnsemble(hind)
-        >>> hindcast = hindcast.add_reference(ref, 'ERSST')
-        >>> print(hindcast)
-        <climpred.HindcastEnsemble>
-        Initialized Ensemble:
-            SST      (init, lead, member) float64 ...
-        ERSST:
-            SST      (time) float32 ...
-        Uninitialized:
-            None
-        >>> print(hindcast.get_initialized())
-        <xarray.Dataset>
-        Dimensions:  (init: 64, lead: 10, member: 10)
-        Coordinates:
-        * lead     (lead) int32 1 2 3 4 5 6 7 8 9 10
-        * member   (member) int32 1 2 3 4 5 6 7 8 9 10
-        * init     (init) float32 1954.0 1955.0 1956.0 1957.0 ... 2015.0 2016.0 2017.0
-        Data variables:
-            SST      (init, lead, member) float64 ...
-        >>> print(hindcast.get_reference('ERSST'))
-        <xarray.Dataset>
-        Dimensions:  (time: 61)
-        Coordinates:
-        * time     (time) int64 1955 1956 1957 1958 1959 ... 2011 2012 2013 2014 2015
-        Data variables:
-            SST      (time) float32 ...
+.. :: python
+
+>>> hind = climpred.tutorial.load_dataset("CESM-DP-SST")
+>>> ref = climpred.tutorial.load_dataset("ERSST")
+>>> hindcast = climpred.HindcastEnsemble(hind)
+>>> hindcast = hindcast.add_reference(ref, "ERSST")
+>>> print(hindcast)
+<climpred.HindcastEnsemble>
+Initialized Ensemble:
+    SST      (init, lead, member) float64 ...
+ERSST:
+    SST      (time) float32 ...
+Uninitialized:
+    None
+>>> print(hindcast.get_initialized())
+<xarray.Dataset>
+Dimensions:  (init: 64, lead: 10, member: 10)
+Coordinates:
+* lead     (lead) int32 1 2 3 4 5 6 7 8 9 10
+* member   (member) int32 1 2 3 4 5 6 7 8 9 10
+* init     (init) float32 1954.0 1955.0 1956.0 1957.0 ... 2015.0 2016.0 2017.0
+Data variables:
+    SST      (init, lead, member) float64 ...
+>>> print(hindcast.get_reference("ERSST"))
+<xarray.Dataset>
+Dimensions:  (time: 61)
+Coordinates:
+* time     (time) int64 1955 1956 1957 1958 1959 ... 2011 2012 2013 2014 2015
+Data variables:
+    SST      (time) float32 ...
 
 - ``metric_kwargs`` can be passed to :py:class:`~climpred.metrics.Metric`.
   (:pr:`264`) `Aaron Spring`_.
diff --git a/climpred/metrics.py b/climpred/metrics.py
index 6226a13c7..bec08bc58 100644
--- a/climpred/metrics.py
+++ b/climpred/metrics.py
@@ -1,3 +1,5 @@
+"""Metrics for ``PredictionEnsemble.verify() and ``PredictionEnsemble.bootstrap()."""
+
 import warnings
 from typing import Any, Callable, List, Optional, Union
 
@@ -102,7 +104,7 @@ def _preprocess_dims(dim):
 
 
 def _rename_dim(dim, forecast, verif):
-    """rename `dim` to `time` or `init` if forecast and verif dims require."""
+    """Rename `dim` to `time` or `init` if forecast and verif dims require."""
     if "init" in dim and "time" in forecast.dims and "time" in verif.dims:
         dim = dim.copy()
         dim.remove("init")
@@ -420,7 +422,7 @@ def _pearson_r_p_value(
     # model grid. We can avoid this annoying output by specifically suppressing
     # warning here.
     with warnings.catch_warnings():
-        warnings.simplefilter("ignore", category=(RuntimeWarning, DeprecationWarning))
+        warnings.simplefilter("ignore", category=Warning)
         return pearson_r_p_value(forecast, verif, dim=dim, **metric_kwargs)
 
 
@@ -620,7 +622,7 @@ def _pearson_r_eff_p_value(
     # model grid. We can avoid this annoying output by specifically suppressing
     # warning here.
     with warnings.catch_warnings():
-        warnings.simplefilter("ignore", category=(RuntimeWarning, DeprecationWarning))
+        warnings.simplefilter("ignore", category=Warning)
         return pearson_r_eff_p_value(forecast, verif, dim=dim, **metric_kwargs)
 
 
@@ -903,7 +905,7 @@ def _spearman_r_eff_p_value(
     # model grid. We can avoid this annoying output by specifically suppressing
     # warning here.
     with warnings.catch_warnings():
-        warnings.simplefilter("ignore", category=(RuntimeWarning, DeprecationWarning))
+        warnings.simplefilter("ignore", category=Warning)
         return spearman_r_eff_p_value(forecast, verif, dim=dim, **metric_kwargs)
 
 
@@ -2926,7 +2928,12 @@ def _crps(
 )
 
 
-def _crps_quadrature(verification: xr.Dataset, cdf_or_dist:Callable, dim:dimType=None, **metric_kwargs:metric_kwargsType)-> xr.Dataset:
+def _crps_quadrature(
+    verification: xr.Dataset,
+    cdf_or_dist: Callable,
+    dim: dimType = None,
+    **metric_kwargs: metric_kwargsType,
+) -> xr.Dataset:
     """Compute the continuously ranked probability score (CPRS).
 
     For a given
@@ -3056,7 +3063,9 @@ def _crpss(
         * :py:func:`~xskillscore.crps_ensemble`
     """
     if dim is None:
-        dim = verif.dims
+        dim = list(verif.dims)
+    if isinstance(dim, str):
+        dim = list(dim)
     # available climpred dimensions to take mean and std over
     rdim = [tdim for tdim in verif.dims if tdim in CLIMPRED_DIMS]
     mu = verif.mean(rdim)
diff --git a/climpred/stats.py b/climpred/stats.py
index 7cc6dcda4..b127601f0 100644
--- a/climpred/stats.py
+++ b/climpred/stats.py
@@ -91,7 +91,7 @@ def dpp(
     .. math::
 
         DPP_{\mathrm{unbiased}}(m) = \frac{\sigma^{2}_{m} -
-        \frac{1}{m}\cdot\\sigma^{2}}{\sigma^{2}}
+        \frac{1}{m}\cdot\sigma^{2}}{\sigma^{2}}
 
     Note:
         Resplandy et al. 2015 and Seferian et al. 2018 calculate unbiased DPP

From 240d452cefe1c81a0db63bb65e815ed0f40a14ba Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Fri, 10 Dec 2021 13:11:22 +0100
Subject: [PATCH 18/56] link to docs more

---
 climpred/classes.py | 65 +++++++++++++++++++++++++--------------------
 1 file changed, 36 insertions(+), 29 deletions(-)

diff --git a/climpred/classes.py b/climpred/classes.py
index 3aa0ca51a..c00db0bfe 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -105,7 +105,7 @@
 
 def _display_metadata(self) -> str:
     """
-    Print the contents of the ``PredictionEnsemble`` as text.
+    Print the contents of the :py:class:`~climpred.classes.PredictionEnsemble` as text.
 
     Example:
         >>> init = climpred.tutorial.load_dataset("CESM-DP-SST")
@@ -157,7 +157,7 @@ def _display_metadata(self) -> str:
 
 
 def _display_metadata_html(self) -> str:
-    """Print the contents of the ``PredictionEnsemble`` as html."""
+    """Print the contents of the :py:class:`~climpred.classes.PredictionEnsemble` as html."""
     header = f"<h4>climpred.{type(self).__name__}</h4>"
     display_html(header, raw=True)
     init_repr_str = dataset_repr(self._datasets["initialized"])
@@ -189,15 +189,22 @@ def _display_metadata_html(self) -> str:
 
 class PredictionEnsemble:
     """
-    The main object ``PredictionEnsemble``.
+    The main object :py:class:`~climpred.classes.PredictionEnsemble`.
+
+    This is the super of both :py:class:`~climpred.classes.PerfectModelEnsemble` and
+    :py:class:`~climpred.classes.HindcastEnsemble`. This cannot be called directly by
+    a user, but should house functions that both ensemble types can use.
+
+    Associated xarray.Dataset are stored in:
+    * ``PredictionEnsemble._datasets["initialized"]``
+    * ``PredictionEnsemble._datasets["uninitialized"]``
+    * ``PredictionEnsemble._datasets["control"]`` in `:py:class:`~climpred.classes.PerfectModelEnsemble`
+    * ``PredictionEnsemble._datasets[observations"]`` in :py:class:`~climpred.classes.HindcastEnsemble`
 
-    This is the super of both ```PerfectModelEnsemble`` and
-    ```HindcastEnsemble``. This cannot be called directly by a user, but
-    should house functions that both ensemble types can use.
     """
 
     def __init__(self, initialized: Union[xr.DataArray, xr.Dataset]):
-        """Create a ``PredictionEnsemble`` object."""
+        """Create a :py:class:`~climpred.classes.PredictionEnsemble` object."""
         if isinstance(initialized, xr.DataArray):
             # makes applying prediction functions easier, etc.
             initialized = initialized.to_dataset()
@@ -251,7 +258,7 @@ def _groupby(self, call: str, groupby: Union[str, xr.DataArray], **kwargs: Any):
 
     @property
     def coords(self) -> DatasetCoordinates:
-        """Return coordinates of ``PredictionEnsemble``.
+        """Return coordinates of :py:class:`~climpred.classes.PredictionEnsemble`.
 
         Dictionary of xarray.DataArray objects corresponding to coordinate
         variables available in all PredictionEnsemble._datasets.
@@ -283,7 +290,7 @@ def nbytes(self) -> int:
     @property
     def sizes(self) -> Mapping[Hashable, int]:
         """
-        Return sizes of ``PredictionEnsemble``.
+        Return sizes of :py:class:`~climpred.classes.PredictionEnsemble`.
 
         Mapping from dimension names to lengths for all PredictionEnsemble._datasets.
 
@@ -299,7 +306,7 @@ def sizes(self) -> Mapping[Hashable, int]:
     @property
     def dims(self) -> Mapping[Hashable, int]:
         """
-        Return dimension of ``PredictionEnsemble``.
+        Return dimension of :py:class:`~climpred.classes.PredictionEnsemble`.
 
         Mapping from dimension names to lengths all PredictionEnsemble._datasets.
 
@@ -311,7 +318,7 @@ def dims(self) -> Mapping[Hashable, int]:
     @property
     def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
         """
-        Return chunks of ``PredictionEnsemble``.
+        Return chunks of :py:class:`~climpred.classes.PredictionEnsemble`.
 
         Mapping from chunks all PredictionEnsemble._datasets.
 
@@ -328,7 +335,7 @@ def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
 
     @property
     def chunksizes(self) -> Mapping[Hashable, Tuple[int, ...]]:
-        """Return chunksizes of ``PredictionEnsemble``.
+        """Return chunksizes of :py:class:`~climpred.classes.PredictionEnsemble`.
 
         Mapping from dimension names to block lengths for this dataset's data, or
         None if the underlying data is not a dask array.
@@ -343,7 +350,7 @@ def chunksizes(self) -> Mapping[Hashable, Tuple[int, ...]]:
     @property
     def data_vars(self) -> DataVariables:
         """
-        Return data variables of ``PredictionEnsemble``.
+        Return data variables of :py:class:`~climpred.classes.PredictionEnsemble`.
 
         Dictionary of DataArray objects corresponding to data variables available in
         all PredictionEnsemble._datasets.
@@ -369,7 +376,7 @@ def __repr__(self) -> str:
             return _display_metadata(self)
 
     def __len__(self) -> int:
-        """Return number of all variables in ``PredictionEnsemble``."""
+        """Return number of all variables in :py:class:`~climpred.classes.PredictionEnsemble`."""
         return len(self.data_vars)
 
     def __iter__(self) -> Iterator[Hashable]:
@@ -377,7 +384,7 @@ def __iter__(self) -> Iterator[Hashable]:
         return iter(self._datasets.values())
 
     def __delitem__(self, key: Hashable) -> None:
-        """Remove a variable from ``PredictionEnsemble``."""
+        """Remove a variable from :py:class:`~climpred.classes.PredictionEnsemble`."""
         del self._datasets["initialized"][key]
         for ds in self._datasets.values():
             if isinstance(ds, xr.Dataset):
@@ -385,7 +392,7 @@ def __delitem__(self, key: Hashable) -> None:
                     del ds[key]
 
     def __contains__(self, key: Hashable) -> bool:
-        """Check variable in ``PredictionEnsemble``.
+        """Check variable in :py:class:`~climpred.classes.PredictionEnsemble`.
 
         The ``"in"`` operator will return true or false depending on whether
         ``"key"`` is in any PredictionEnsemble._datasets.
@@ -398,9 +405,9 @@ def __contains__(self, key: Hashable) -> bool:
         return contained
 
     def equals(self, other: Union["PredictionEnsemble", Any]) -> bool:
-        """Check if ``PredictionEnsemble`` is equal to other ``PredictionEnsemble``.
+        """Check if :py:class:`~climpred.classes.PredictionEnsemble` is equal to other :py:class:`~climpred.classes.PredictionEnsemble`.
 
-        Two ``PredictionEnsemble``s are equal if they have matching variables and
+        Two :py:class:`~climpred.classes.PredictionEnsemble`s are equal if they have matching variables and
         coordinates, all of which are equal.
         ``PredictionEnsembles`` can still be equal (like pandas objects) if they have NaN
         values in the same locations.
@@ -426,7 +433,7 @@ def equals(self, other: Union["PredictionEnsemble", Any]) -> bool:
 
     def identical(self, other: Union["PredictionEnsemble", Any]) -> bool:
         """
-        Check if ``PredictionEnsemble`` is identical to other ``PredictionEnsemble``.
+        Check if :py:class:`~climpred.classes.PredictionEnsemble` is identical to other :py:class:`~climpred.classes.PredictionEnsemble`.
 
         Like ``equals``, but also checks all dataset attributes and the
         attributes on all variables and coordinates.
@@ -455,7 +462,7 @@ def plot(
         cmap: Optional[str] = None,
         x: str = "time",
     ) -> "plt.Axes":
-        """Plot datasets from ``PredictionEnsemble``.
+        """Plot datasets from :py:class:`~climpred.classes.PredictionEnsemble`.
 
         Args:
             variable: `variable` to show. Defaults to first in data_vars.
@@ -606,7 +613,7 @@ def __truediv__(self, other: mathType) -> "PredictionEnsemble":
         return self._math(other, operator="div")
 
     def __getitem__(self, varlist: Union[str, List[str]]) -> "PredictionEnsemble":
-        """Allow subsetting variable(s) from ``PredictionEnsemble`` as from xr.Dataset.
+        """Allow subsetting variable(s) from :py:class:`~climpred.classes.PredictionEnsemble` as from xr.Dataset.
 
         Args:
             * varlist: list of names or name of data variable(s) to subselect
@@ -760,7 +767,7 @@ def smooth(
         how: str = "mean",
         **xesmf_kwargs: str,
     ):
-        """Smooth in space and/or aggregate in time ``PredictionEnsemble``.
+        """Smooth in space and/or aggregate in time :py:class:`~climpred.classes.PredictionEnsemble`.
 
         Args:
             smooth_kws: Dictionary to specify the dims to
@@ -983,7 +990,7 @@ def _warn_if_chunked_along_init_member_time(self) -> None:
 class PerfectModelEnsemble(PredictionEnsemble):
     """An object for "perfect model" prediction ensembles.
 
-    ``PerfectModelEnsemble`` is a sub-class of ``PredictionEnsemble``. It tracks
+    :py:class:`~climpred.classes.PerfectModelEnsemble` is a sub-class of :py:class:`~climpred.classes.PredictionEnsemble`. It tracks
     the control run used to initialize the ensemble for easy computations,
     bootstrapping, etc.
 
@@ -992,7 +999,7 @@ class PerfectModelEnsemble(PredictionEnsemble):
     """
 
     def __init__(self, initialized: Union[xr.DataArray, xr.Dataset]) -> None:
-        """Create a ``PerfectModelEnsemble`` object.
+        """Create a :py:class:`~climpred.classes.PerfectModelEnsemble` object.
 
         Args:
           initialized: prediction ensemble output.
@@ -1088,7 +1095,7 @@ def generate_uninitialized(self) -> "PerfectModelEnsemble":
         """Generate an uninitialized ensemble by resampling from the control simulation.
 
         Returns:
-            ``PerfectModelEnsemble`` with resampled (uninitialized) ensemble from
+            :py:class:`~climpred.classes.PerfectModelEnsemble` with resampled (uninitialized) ensemble from
             control
         """
         has_dataset(
@@ -1614,7 +1621,7 @@ def bootstrap(
 class HindcastEnsemble(PredictionEnsemble):
     """An object for initialized prediction ensembles.
 
-    ``HindcastEnsemble`` is a sub-class of ``PredictionEnsemble``. It tracks a
+    :py:class:`~climpred.classes.HindcastEnsemble` is a sub-class of :py:class:`~climpred.classes.PredictionEnsemble`. It tracks a
     verification dataset (i.e., observations) associated with the hindcast ensemble
     for easy computation across multiple variables.
 
@@ -1623,7 +1630,7 @@ class HindcastEnsemble(PredictionEnsemble):
     """
 
     def __init__(self, initialized: Union[xr.DataArray, xr.Dataset]) -> None:
-        """Create ``HindcastEnsemble`` from initialized prediction ensemble output.
+        """Create :py:class:`~climpred.classes.HindcastEnsemble` from initialized prediction ensemble output.
 
         Args:
           initialized: initialized prediction ensemble output.
@@ -1690,7 +1697,7 @@ def add_observations(
         Same as :py:meth:`~climpred.classes.HindcastEnsemble.add_verification`.
 
         Args:
-            obs: observations added to ``HindcastEnsemble``.
+            obs: observations added to :py:class:`~climpred.classes.HindcastEnsemble`.
         """
         if isinstance(obs, xr.DataArray):
             obs = obs.to_dataset()
@@ -1714,7 +1721,7 @@ def add_verification(
         Same as :py:meth:`~climpred.classes.HindcastEnsemble.add_observations`.
 
         Args:
-            verif: verification added to ``HindcastEnsemble``.
+            verif: verification added to :py:class:`~climpred.classes.HindcastEnsemble`.
         """
         return self.add_observations(verif)
 

From c99a67f8808206e001c7b0aa23c3aed157644632 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Fri, 10 Dec 2021 13:14:44 +0100
Subject: [PATCH 19/56] fix doctests

---
 climpred/classes.py | 9 ++++++++-
 climpred/metrics.py | 4 ++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/climpred/classes.py b/climpred/classes.py
index c00db0bfe..656a70554 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -111,6 +111,13 @@ def _display_metadata(self) -> str:
         >>> init = climpred.tutorial.load_dataset("CESM-DP-SST")
         >>> hindcast = climpred.HindcastEnsemble(init)
         >>> print(hindcast)
+        <climpred.HindcastEnsemble>
+        Initialized Ensemble:
+            SST      (init, lead, member) float64 -0.2404 -0.2085 ... 0.7442 0.7384
+        Observations:
+            None
+        Uninitialized:
+            None
 
     """
     SPACE = "    "
@@ -1864,7 +1871,7 @@ def plot_alignment(
             Coordinates:
               * init       (init) object 1954-01-01 00:00:00 ... 2014-01-01 00:00:00
               * lead       (lead) int32 1 2 3 4 5 6 7 8 9 10
-              * alignment  (alignment) <U10 'same_init' 'same_verif' ""maximize
+              * alignment  (alignment) <U10 'same_init' 'same_verif' 'maximize'
             Attributes:
                 units:    days since 1960-01-01
 
diff --git a/climpred/metrics.py b/climpred/metrics.py
index bec08bc58..04fbcd9a0 100644
--- a/climpred/metrics.py
+++ b/climpred/metrics.py
@@ -3702,7 +3702,7 @@ def _rps(
             comparison:                    m2c
             dim:                           ['member', 'init']
             reference:                     []
-            category_edges:                <xarray.Dataset>\\nDimensions:        (cate...
+            category_edges:                <xarray.Dataset>\nDimensions:        (cate...
 
         Provide ``category_edges`` as tuple for different category edges to categorize
         forecasts and observations.
@@ -3747,7 +3747,7 @@ def _rps(
             comparison:                    m2o
             dim:                           ['member', 'init']
             reference:                     []
-            category_edges:                (<xarray.Dataset>\\nDimensions:        (mon...
+            category_edges:                (<xarray.Dataset>\nDimensions:        (mon...
     """
     if "category_edges" in metric_kwargs:
         category_edges = metric_kwargs.pop("category_edges")

From 04abd1f4cb92235673b26ea0b788d28e7a20c36c Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Fri, 10 Dec 2021 18:19:18 +0100
Subject: [PATCH 20/56] comparisons.py

---
 climpred/comparisons.py                       | 233 ++++++-----
 climpred/metrics.py                           | 377 ++++++++++--------
 climpred/reference.py                         |   2 +-
 .../climpred.classes.PredictionEnsemble.rst   |  40 ++
 ...impred.comparisons.Comparison.__init__.rst |   6 +
 ...impred.comparisons.Comparison.__repr__.rst |   6 +
 .../api/climpred.metrics.Metric.__init__.rst  |   6 +
 .../api/climpred.metrics.Metric.__repr__.rst  |   6 +
 8 files changed, 396 insertions(+), 280 deletions(-)
 create mode 100644 docs/source/api/climpred.classes.PredictionEnsemble.rst
 create mode 100644 docs/source/api/climpred.comparisons.Comparison.__init__.rst
 create mode 100644 docs/source/api/climpred.comparisons.Comparison.__repr__.rst
 create mode 100644 docs/source/api/climpred.metrics.Metric.__init__.rst
 create mode 100644 docs/source/api/climpred.metrics.Metric.__repr__.rst

diff --git a/climpred/comparisons.py b/climpred/comparisons.py
index 955ea3089..1d99b5103 100644
--- a/climpred/comparisons.py
+++ b/climpred/comparisons.py
@@ -1,4 +1,6 @@
-from typing import Callable, List, Optional, Tuple
+"""Comparisons: How to compare forecast with verification."""
+
+from typing import Callable, List, Optional, Tuple, Union
 
 import dask
 import numpy as np
@@ -10,30 +12,14 @@
 
 
 def _transpose_and_rechunk_to(new_chunk_ds, ori_chunk_ds):
-    """Chunk xr.object `new_chunk_ds` as another xr.object `ori_chunk_ds`.
+    """
+    Chunk xr.Dataset `new_chunk_ds` as another xr.Dataset `ori_chunk_ds`.
+
     This is needed after some operations which reduce chunks to size 1.
-    First transpose a to ds.dims then apply ds chunking to a."""
+    First transpose a to ds.dims then apply ds chunking to a.
+    """
     # supposed to be in .utils but circular imports therefore here
-    transpose_kwargs = (
-        {"transpose_coords": False} if isinstance(new_chunk_ds, xr.DataArray) else {}
-    )
-    return new_chunk_ds.transpose(*ori_chunk_ds.dims, **transpose_kwargs).chunk(
-        ori_chunk_ds.chunks
-    )
-
-
-def _display_comparison_metadata(self) -> str:
-    summary = "----- Comparison metadata -----\n"
-    summary += f"Name: {self.name}\n"
-    # probabilistic or only deterministic
-    if not self.probabilistic:
-        summary += "Kind: deterministic\n"
-    else:
-        summary += "Kind: deterministic and probabilistic\n"
-    summary += f"long_name: {self.long_name}\n"
-    # doc
-    summary += f"Function: {self.function.__doc__}\n"
-    return summary
+    return new_chunk_ds.transpose(*ori_chunk_ds.dims).chunk(ori_chunk_ds.chunks)
 
 
 class Comparison:
@@ -43,7 +29,7 @@ def __init__(
         self,
         name: str,
         function: Callable[
-            [xr.Dataset, Optional[Metric]], Tuple[xr.Dataset, xr.Dataset]
+            [xr.Dataset, xr.Dataset, Metric], Tuple[xr.Dataset, xr.Dataset]
         ],
         hindcast: bool,
         probabilistic: bool,
@@ -53,21 +39,18 @@ def __init__(
         """Comparison initialization.
 
         Args:
-            name (str): name of comparison.
-            function (Callable): comparison function.
-            hindcast (bool): Can comparison be used in ``HindcastEnsemble``?
-                ``False`` means ``PerfectModelEnsemble``
-            probabilistic (bool): Can this comparison be used for probabilistic
+            name: name of comparison.
+            function: comparison function.
+            hindcast: Can comparison be used in
+                :py:class:`~climpred.classes.HindcastEnsemble`?
+                ``False`` means only :py:class:`~climpred.classes.PerfectModelEnsemble`
+            probabilistic: Can this comparison be used for probabilistic
                 metrics also? Probabilistic metrics require multiple forecasts.
-                `False` means that comparison is only deterministic.
-                `True` means that comparison can be used both deterministic and
+                ``False`` means that comparison is only deterministic.
+                ``True`` means that comparison can be used both deterministic and
                 probabilistic.
-            long_name (str, optional): longname of comparison. Defaults to ``None``.
-            aliases (list of str, optional): Allowed aliases for this comparison.
-                Defaults to ``None``.
-
-        Returns:
-            comparison: comparison class Comparison.
+            long_name: longname of comparison.
+            aliases: Allowed aliases for this comparison.
 
         """
         self.name = name
@@ -79,7 +62,17 @@ def __init__(
 
     def __repr__(self) -> str:
         """Show metadata of comparison class."""
-        return _display_comparison_metadata(self)
+        summary = "----- Comparison metadata -----\n"
+        summary += f"Name: {self.name}\n"
+        # probabilistic or only deterministic
+        if not self.probabilistic:
+            summary += "Kind: deterministic\n"
+        else:
+            summary += "Kind: deterministic and probabilistic\n"
+        summary += f"long_name: {self.long_name}\n"
+        # doc
+        summary += f"Function: {self.function.__doc__}\n"
+        return summary
 
 
 # --------------------------------------------#
@@ -87,36 +80,43 @@ def __repr__(self) -> str:
 # --------------------------------------------#
 
 
-def _m2m(ds, metric=None):
-    """Compare all members to all others in turn while leaving out the verification
-    ``member``.
+def _m2m(
+    ds: xr.Dataset, metric: Metric, verif: Optional[xr.Dataset] = None
+) -> Tuple[xr.Dataset, xr.Dataset]:
+    """Compare all members to all others in turn while leaving out verification member.
+
+    :ref:`comparisons` for :py:class:`~climpred.classes.PerfectModelEnsemble`
 
     Args:
-        ds (xr.Dataset): initialized with ``member`` dimension.
-        metric (Metric):
-            If deterministic, forecast and reference have ``member`` dim.
+        ds: initialized with ``member`` dimension.
+        metric:
+            If deterministic, forecast and verif have ``member`` dim.
             If probabilistic, only forecast has ``member`` dim.
+        verif: not used in :py:class:`~climpred.classes.PerfectModelEnsemble`
 
     Returns:
-        (xr.Dataset, xr.Dataset): forecast, reference.
+        forecast, verification
     """
-    reference_list = []
+    if verif is not None:
+        raise ValueError("`verif` not expected.")
+
+    verif_list = []
     forecast_list = []
     for m in ds.member.values:
         forecast = ds.drop_sel(member=m)
         # set incrementing members to avoid nans from broadcasting
         forecast["member"] = np.arange(1, 1 + forecast.member.size)
-        reference = ds.sel(member=m, drop=True)
-        # Tiles the singular "reference" member to compare directly to all other members
+        verif = ds.sel(member=m, drop=True)
+        # Tiles the singular "verif" member to compare directly to all other members
         if not metric.probabilistic:
-            forecast, reference = xr.broadcast(forecast, reference)
-        reference_list.append(reference)
+            forecast, verif = xr.broadcast(forecast, verif)
+        verif_list.append(verif)
         forecast_list.append(forecast)
-    reference = xr.concat(reference_list, M2M_MEMBER_DIM)
+    verif = xr.concat(verif_list, M2M_MEMBER_DIM)
     forecast = xr.concat(forecast_list, M2M_MEMBER_DIM)
-    reference[M2M_MEMBER_DIM] = np.arange(reference[M2M_MEMBER_DIM].size)
+    verif[M2M_MEMBER_DIM] = np.arange(verif[M2M_MEMBER_DIM].size)
     forecast[M2M_MEMBER_DIM] = np.arange(forecast[M2M_MEMBER_DIM].size)
-    return forecast, reference
+    return forecast, verif
 
 
 __m2m = Comparison(
@@ -128,36 +128,41 @@ def _m2m(ds, metric=None):
 )
 
 
-def _m2e(ds, metric=None):
+def _m2e(
+    ds: xr.Dataset, metric: Optional[Metric] = None, verif: Optional[xr.Dataset] = None
+) -> Tuple[xr.Dataset, xr.Dataset]:
     """
-    Compare all members to ensemble mean while leaving out the reference in
-     ensemble mean.
+    Compare all members to ensemble mean while leaving out the verif in ensemble mean.
+
+    :ref:`comparisons` for :py:class:`~climpred.classes.PerfectModelEnsemble`
 
     Args:
-        ds (xr.Dataset): ``initialized`` with ``member`` dimension.
-        metric (Metric): needed for probabilistic metrics.
-                      therefore useless in ``m2e`` comparison,
-                      but expected by internal API.
+        ds: ``initialized`` with ``member`` dimension.
+        metric: needed for probabilistic metrics. Therefore useless in ``m2e``
+            comparison, but expected by internal API.
+        verif: not used in :py:class:`~climpred.classes.PerfectModelEnsemble`
 
     Returns:
-        (xr.Dataset, xr.Dataset): forecast, reference.
+        forecast, verification
     """
-    reference_list = []
+    if verif is not None:
+        raise ValueError("`verif` not expected.")
+    verif_list = []
     forecast_list = []
     M2E_COMPARISON_DIM = "member"
     for m in ds.member.values:
         forecast = ds.drop_sel(member=m).mean("member")
-        reference = ds.sel(member=m, drop=True)
+        verif = ds.sel(member=m, drop=True)
         forecast_list.append(forecast)
-        reference_list.append(reference)
-    reference = xr.concat(reference_list, M2E_COMPARISON_DIM)
+        verif_list.append(verif)
+    verif = xr.concat(verif_list, M2E_COMPARISON_DIM)
     forecast = xr.concat(forecast_list, M2E_COMPARISON_DIM)
     forecast[M2E_COMPARISON_DIM] = np.arange(forecast[M2E_COMPARISON_DIM].size)
-    reference[M2E_COMPARISON_DIM] = np.arange(reference[M2E_COMPARISON_DIM].size)
+    verif[M2E_COMPARISON_DIM] = np.arange(verif[M2E_COMPARISON_DIM].size)
     if dask.is_dask_collection(forecast):
         forecast = _transpose_and_rechunk_to(forecast, ds)
-        reference = _transpose_and_rechunk_to(reference, ds)
-    return forecast, reference
+        verif = _transpose_and_rechunk_to(verif, ds)
+    return forecast, verif
 
 
 __m2e = Comparison(
@@ -170,29 +175,37 @@ def _m2e(ds, metric=None):
 )
 
 
-def _m2c(ds, metric=None):
+def _m2c(
+    ds: xr.Dataset, metric: Metric, verif: Optional[xr.Dataset] = None
+) -> Tuple[xr.Dataset, xr.Dataset]:
     """
-    Compare all other member forecasts to a single member verification, which is the
-    first member.
+    Compare all other member forecasts to a single member verification.
+
+    Verification member is the first member.
     If the initialized dataset is concatinated in a way that the first member
     is taken from the control simulation, this compares all other member forecasts
     to the control simulation.
 
+    :ref:`comparisons` for :py:class:`~climpred.classes.PerfectModelEnsemble`
+
     Args:
-        ds (xr.Dataset): ``initialized`` with ``member`` dimension.
-        metric (Metric): if deterministic, forecast and reference both have member dim
-                      if probabilistic, only forecast has ``member`` dim
+        ds: ``initialized`` with ``member`` dimension.
+        metric: if deterministic, forecast and verif both have member dim
+            if probabilistic, only forecast has ``member`` dim
+        verif: not used in :py:class:`~climpred.classes.PerfectModelEnsemble`
 
     Returns:
-        (xr.Dataset, xr.Dataset): forecast, reference.
+        forecast, verification
     """
+    if verif is not None:
+        raise ValueError("`verif` not expected.")
     control_member = ds.member.values[0]
-    reference = ds.sel(member=control_member, drop=True)
-    # drop the member being reference
+    verif = ds.sel(member=control_member, drop=True)
+    # drop the member being verif
     forecast = ds.drop_sel(member=control_member)
     if not metric.probabilistic:
-        forecast, reference = xr.broadcast(forecast, reference)
-    return forecast, reference
+        forecast, verif = xr.broadcast(forecast, verif)
+    return forecast, verif
 
 
 __m2c = Comparison(
@@ -204,27 +217,34 @@ def _m2c(ds, metric=None):
 )
 
 
-def _e2c(ds, metric=None):
+def _e2c(
+    ds: xr.Dataset, metric: Optional[Metric] = None, verif: Optional[xr.Dataset] = None
+) -> Tuple[xr.Dataset, xr.Dataset]:
     """
     Compare ensemble mean forecast to single member verification.
+
     If the initialized dataset is concatinated in a way that the first member
     is taken from the control simulation, this compares the member mean of all
     other member forecasts to the control simulation.
 
+    :ref:`comparisons` for :py:class:`~climpred.classes.PerfectModelEnsemble`
+
     Args:
-        ds (xr.Dataset): ``initialized`` with ``member`` dimension.
-        metric (Metric): needed for probabilistic metrics.
-                      therefore useless in ``e2c`` comparison,
-                      but expected by internal API.
+        ds: ``initialized`` with ``member`` dimension.
+        metric: needed for probabilistic metrics. Therefore useless in ``e2c``
+            comparison, but expected by internal API.
+        verif: not used in :py:class:`~climpred.classes.PerfectModelEnsemble`
 
     Returns:
-        (xr.Dataset, xr.Dataset): forecast, reference.
+        forecast, verification
     """
+    if verif is not None:
+        raise ValueError("`verif` not expected.")
     control_member = ds.member.values[0]
-    reference = ds.sel(member=control_member, drop=True)
+    verif = ds.sel(member=control_member, drop=True)
     ds = ds.drop_sel(member=control_member)
     forecast = ds.mean("member")
-    return forecast, reference
+    return forecast, verif
 
 
 __e2c = Comparison(
@@ -239,20 +259,24 @@ def _e2c(ds, metric=None):
 # --------------------------------------------#
 # HINDCAST COMPARISONS
 # --------------------------------------------#
-def _e2o(hind, verif, metric=None):
-    """Compare the ensemble mean forecast to the verification data for a
-    ``HindcastEnsemble`` setup.
+def _e2o(
+    hind: xr.Dataset, verif: xr.Dataset, metric: Metric
+) -> Tuple[xr.Dataset, xr.Dataset]:
+    """Compare the ensemble mean forecast to the verification data.
+
+    :ref:`comparisons` for :py:class:`~climpred.classes.HindcastEnsemble`
 
     Args:
-        hind (xr.Dataset): Hindcast with optional ``member`` dimension.
-        verif (xr.Dataset): Verification data.
-        metric (Metric): needed for probabilistic metrics.
-                      therefore useless in ``e2o`` comparison,
-                      but expected by internal API.
+        hind: Hindcast with optional ``member`` dimension.
+        verif: Verification data.
+        metric: needed for probabilistic metrics. Therefore useless in ``e2o``
+            comparison, but expected by internal API.
 
     Returns:
-        xr.object: forecast, verif.
+        forecast, verification
     """
+    if verif is not None:
+        raise ValueError("`verif` not expected.")
     if "member" in hind.dims:
         forecast = hind.mean("member")
     else:
@@ -270,19 +294,22 @@ def _e2o(hind, verif, metric=None):
 )
 
 
-def _m2o(hind, verif, metric=None):
-    """Compares each ensemble member individually to the verification data for a
-    ``HindcastEnsemble`` setup.
+def _m2o(
+    hind: xr.Dataset, verif: xr.Dataset, metric: Metric
+) -> Tuple[xr.Dataset, xr.Dataset]:
+    """Compare each ensemble member individually to the verification data.
+
+    :ref:`comparisons` for :py:class:`~climpred.classes.HindcastEnsemble`
 
     Args:
-        hind (xr.Dataset): ``initialized`` with ``member`` dimension.
-        verif (xr.Dataset): Verification data.
-        metric (Metric):
+        hind: ``initialized`` with ``member`` dimension.
+        verif: Verification data.
+        metric:
             If deterministic, forecast and verif both have ``member`` dim;
             If probabilistic, only forecast has ``member`` dim.
 
     Returns:
-        (xr.Dataset, xr.Dataset): forecast, verif.
+        forecast, verification
     """
     # check that this contains more than one member
     has_dims(hind, "member", "decadal prediction ensemble")
diff --git a/climpred/metrics.py b/climpred/metrics.py
index 04fbcd9a0..3606eff58 100644
--- a/climpred/metrics.py
+++ b/climpred/metrics.py
@@ -1,7 +1,7 @@
 """Metrics for ``PredictionEnsemble.verify() and ``PredictionEnsemble.bootstrap()."""
 
 import warnings
-from typing import Any, Callable, List, Optional, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import numpy as np
 import pandas as pd
@@ -40,7 +40,7 @@
 metric_kwargsType = Any
 
 
-def _get_norm_factor(comparison):
+def _get_norm_factor(comparison: Any) -> int:  # Comparison instead of Any
     """Get normalization factor for normalizing distance metrics.
 
     A distance metric is normalized by the standard deviation or variance
@@ -59,10 +59,10 @@ def _get_norm_factor(comparison):
          This is used for NMSE, NRMSE, MSSS, NMAE.
 
     Args:
-        comparison (class): comparison class.
+        comparison: comparison class.
 
     Returns:
-        fac (int): normalization factor.
+        fac: normalization factor.
 
     Raises:
         KeyError: if comparison is not matching.
@@ -75,7 +75,7 @@ def _get_norm_factor(comparison):
         ...
         [...
 
-    Reference:
+    References:
         * Séférian, Roland, Sarah Berthet, and Matthieu Chevallier. “Assessing
           the Decadal Predictability of Land and Ocean Carbon Uptake.”
           Geophysical Research Letters, March 15, 2018. https://doi.org/10/gdb424.
@@ -89,22 +89,27 @@ def _get_norm_factor(comparison):
     return fac
 
 
-def _preprocess_dims(dim):
+def _preprocess_dims(dim: dimType) -> List[str]:
     """Convert input argument ``dim`` into a list of dimensions.
 
     Args:
-        dim (str or list): The dimension(s) to apply the function along.
+        dim: The dimension(s) to apply the function along.
 
     Returns:
-        dim (list): List of dimensions to apply function over.
+        dim: List of dimensions to apply function over.
     """
+    if dim is None:
+        dim = ["time"]
     if isinstance(dim, str):
         dim = [dim]
+    else:
+        raise ValueError
     return dim
 
 
-def _rename_dim(dim, forecast, verif):
+def _rename_dim(dim: dimType, forecast: xr.Dataset, verif: xr.Dataset) -> List[str]:
     """Rename `dim` to `time` or `init` if forecast and verif dims require."""
+    dim = _preprocess_dims(dim)
     if "init" in dim and "time" in forecast.dims and "time" in verif.dims:
         dim = dim.copy()
         dim.remove("init")
@@ -116,9 +121,13 @@ def _rename_dim(dim, forecast, verif):
     return dim
 
 
-def _remove_member_from_dim_or_raise(dim):
-    """delete `member` from `dim` to not pass to `xskillscore` where expected as
-    default `member_dim`."""
+def _remove_member_from_dim_or_raise(dim: dimType) -> List[str]:
+    """
+    Delete ``member`` from ``dim``.
+
+    Not pass to ``xskillscore`` where expected as default ``member_dim``.
+    """
+    dim = _preprocess_dims(dim)
     if "member" in dim:
         dim = dim.copy()
         dim.remove("member")
@@ -127,9 +136,18 @@ def _remove_member_from_dim_or_raise(dim):
     return dim
 
 
-def _extract_and_apply_logical(forecast, verif, metric_kwargs, dim):
-    """Extract callable `logical` from `metric_kwargs` and apply to `forecast` and
-    `verif`."""
+def _extract_and_apply_logical(
+    forecast: xr.Dataset,
+    verif: xr.Dataset,
+    metric_kwargs: Dict[Any, Any],
+    dim: dimType,
+) -> Tuple[xr.Dataset, xr.Dataset, Dict[Any, Any], List[str]]:
+    """Extract and apply callable.
+
+    Extract ``logical`` from ``metric_kwargs`` and apply to ``forecast`` and
+    ``verif``.
+    """
+    dim = _preprocess_dims(dim)
     if "comparison" in metric_kwargs:
         metric_kwargs = metric_kwargs.copy()
         comparison = metric_kwargs.pop("comparison")
@@ -160,9 +178,16 @@ def _extract_and_apply_logical(forecast, verif, metric_kwargs, dim):
         )
 
 
-def _maybe_member_mean_reduce_dim(forecast, dim):
-    """Take member mean if member in dim. To allow Option 2 in
-    discrimination and reliability, which both dont expect member dim."""
+def _maybe_member_mean_reduce_dim(
+    forecast: xr.Dataset, dim: dimType
+) -> Tuple[xr.Dataset, List[str]]:
+    """
+    Take ``member`` mean if ``member`` in ``dim``.
+
+    To allow Option 2 in discrimination and reliability, which both dont expect
+    ``member`` dim.
+    """
+    dim = _preprocess_dims(dim)
     if "member" in dim and "member" in forecast.dims:
         forecast = forecast.mean("member")
         dim = dim.copy()
@@ -170,7 +195,7 @@ def _maybe_member_mean_reduce_dim(forecast, dim):
     return forecast, dim
 
 
-def _display_metric_metadata(self) -> str:
+def _display_metric_metadata(self: "Metric") -> str:
     summary = "----- Metric metadata -----\n"
     summary += f"Name: {self.name}\n"
     summary += f"Alias: {self.aliases}\n"
@@ -218,29 +243,28 @@ def __init__(
         """Metric initialization.
 
         Args:
-            name (str): name of metric.
-            function (function): metric function.
-            positive (bool or None): Is metric positively oriented?
+            name: name of metric.
+            function: metric function.
+            positive: Is metric positively oriented?
                 If ``True``, higher skill value means better skill.
                 If ``False``, lower metric value means better skill.
                 ``None`` if different differentiation.
-            probabilistic (bool): Is metric probabilistic?
+            probabilistic: Is metric probabilistic?
                 ``False`` means deterministic.
-            unit_power (float, int): Power of the unit of skill based on unit
+            unit_power: Power of the unit of skill based on unit
                 of input, e.g. input unit [m]: skill unit [(m)**unit_power]
-            long_name (str, optional): long name of metric. Defaults to ``None``.
-            aliases (list of str, optional): Allowed aliases for this metric.
+            long_name: long name of metric. Defaults to ``None``.
+            aliases: Allowed aliases for this metric.
                 Defaults to ``None``.
-            min (float, optional): Minimum skill for metric. Defaults to ``None``.
-            max (float, optional): Maxmimum skill for metric. Defaults to ``None``.
-            perfect (float, optional): Perfect skill for metric. Defaults to ``None``.
-            normalize (bool, optional): Will the metric be normalized? Then metric
+            min: Minimum skill for metric. Defaults to ``None``.
+            max: Maxmimum skill for metric. Defaults to ``None``.
+            perfect: Perfect skill for metric. Defaults to ``None``.
+            normalize: Will the metric be normalized? Then metric
                 function will require to get Comparison passed. Defaults to ``False``.
-            allows_logical (bool, optional): Does the metric allow a logical to be
+            allows_logical: Does the metric allow a logical to be
                 passed in metric_kwargs? Some probabilistic metrics allow this.
                 Defaults to ``False``.
-            requires_member_dim (bool, optional):
-                Does xskillscore.metric expect a member dimension?
+            requires_member_dim: Does xskillscore.metric expect a member dimension?
 
         Returns:
             Metric: metric class Metric.
@@ -260,7 +284,7 @@ def __init__(
         self.allows_logical = allows_logical
         self.requires_member_dim = requires_member_dim
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         """Show metadata of metric class."""
         return _display_metric_metadata(self)
 
@@ -296,7 +320,7 @@ def _pearson_r(
         dim: Dimension(s) to perform metric over.
         metric_kwargs: see :py:func:`~xskillscore.pearson_r`
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | -1.0      |
         +-----------------+-----------+
@@ -336,7 +360,7 @@ def _pearson_r(
             metric:                        pearson_r
             comparison:                    e2o
             dim:                           ['init']
-            reference:                     []
+            References:                     []
     """
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", category=RuntimeWarning)
@@ -376,7 +400,7 @@ def _pearson_r_p_value(
         dim: Dimension(s) to perform metric over.
         metric_kwargs: see xskillscore.pearson_r_p_value
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | 0.0       |
         +-----------------+-----------+
@@ -416,7 +440,7 @@ def _pearson_r_p_value(
             metric:                        pearson_r_p_value
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     # p value returns a runtime error when working with NaNs, such as on a climate
     # model grid. We can avoid this annoying output by specifically suppressing
@@ -476,7 +500,7 @@ def _effective_sample_size(
         dim: Dimension(s) to perform metric over.
         metric_kwargs: see :py:func:`~xskillscore.effective_sample_size`
 
-    Details:
+    Notes:
         +-----------------+-----------------+
         | **minimum**     | 0.0             |
         +-----------------+-----------------+
@@ -487,7 +511,7 @@ def _effective_sample_size(
         | **orientation** | positive        |
         +-----------------+-----------------+
 
-    Reference:
+    References:
         * Bretherton, Christopher S., et al. "The effective number of spatial degrees of
           freedom of a time-varying field." Journal of climate 12.7 (1999): 1990-2009.
 
@@ -514,7 +538,7 @@ def _effective_sample_size(
             metric:                        effective_sample_size
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", category=RuntimeWarning)
@@ -574,7 +598,7 @@ def _pearson_r_eff_p_value(
         dim: Dimension(s) to perform metric over.
         metric_kwargs: see :py:func:`~xskillscore.pearson_r_eff_p_value`
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | 0.0       |
         +-----------------+-----------+
@@ -612,9 +636,9 @@ def _pearson_r_eff_p_value(
             metric:                        pearson_r_eff_p_value
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
 
-    Reference:
+    References:
         * Bretherton, Christopher S., et al. "The effective number of spatial degrees of
           freedom of a time-varying field." Journal of climate 12.7 (1999): 1990-2009.
     """
@@ -673,7 +697,7 @@ def _spearman_r(
         dim: Dimension(s) to perform metric over.
         metric_kwargs: see :py:func:`~xskillscore.spearman_r`
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | -1.0      |
         +-----------------+-----------+
@@ -713,7 +737,7 @@ def _spearman_r(
             metric:                        spearman_r
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", category=RuntimeWarning)
@@ -753,7 +777,7 @@ def _spearman_r_p_value(
         dim: Dimension(s) to perform metric over.
         metric_kwargs: see :py:func:`~xskillscore.spearman_r_p_value`
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | 0.0       |
         +-----------------+-----------+
@@ -793,7 +817,7 @@ def _spearman_r_p_value(
             metric:                        spearman_r_p_value
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     # p value returns a runtime error when working with NaNs, such as on a climate
     # model grid. We can avoid this annoying output by specifically suppressing
@@ -857,7 +881,7 @@ def _spearman_r_eff_p_value(
         dim: Dimension(s) to perform metric over.
         metric_kwargs: see :py:func:`~xskillscore.spearman_r_eff_p_value`
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | 0.0       |
         +-----------------+-----------+
@@ -872,7 +896,7 @@ def _spearman_r_eff_p_value(
         * :py:func:`~climpred.metrics._effective_sample_size`
         * :py:func:`~climpred.metrics._pearson_r_eff_p_value`
 
-    Reference:
+    References:
         * Bretherton, Christopher S., et al. "The effective number of spatial degrees of
           freedom of a time-varying field." Journal of climate 12.7 (1999): 1990-2009.
 
@@ -899,7 +923,7 @@ def _spearman_r_eff_p_value(
             metric:                        spearman_r_eff_p_value
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     # p value returns a runtime error when working with NaNs, such as on a climate
     # model grid. We can avoid this annoying output by specifically suppressing
@@ -953,7 +977,7 @@ def _mse(
         dim: Dimension(s) to perform metric over.
         metric_kwargs: see :py:func:`~xskillscore.mse`
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | 0.0       |
         +-----------------+-----------+
@@ -967,11 +991,11 @@ def _mse(
     See also:
         * :py:func:`~xskillscore.mse`
 
-    Reference:
-        * Ian T. Jolliffe and David B. Stephenson. Forecast Verification: A
-          Practitioner’s Guide in Atmospheric Science. John Wiley & Sons, Ltd,
-          Chichester, UK, December 2011. ISBN 978-1-119-96000-3 978-0-470-66071-3.
-          URL: http://doi.wiley.com/10.1002/9781119960003.
+    References:
+        Ian T. Jolliffe and David B. Stephenson. Forecast Verification: A
+        Practitioner’s Guide in Atmospheric Science. John Wiley & Sons, Ltd,
+        Chichester, UK, December 2011. ISBN 978-1-119-96000-3 978-0-470-66071-3.
+        http://doi.wiley.com/10.1002/9781119960003.
 
     Example:
         >>> HindcastEnsemble.verify(
@@ -993,7 +1017,7 @@ def _mse(
             metric:                        mse
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     return mse(forecast, verif, dim=dim, **metric_kwargs)
 
@@ -1028,7 +1052,7 @@ def _spread(
         dim: Dimension(s) to perform metric over.
         metric_kwargs: see :py:func:`~xarray.std`
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | 0.0       |
         +-----------------+-----------+
@@ -1063,7 +1087,7 @@ def _spread(
             metric:                        spread
             comparison:                    m2o
             dim:                           ['member', 'init']
-            reference:                     []
+            References:                     []
     """
     return forecast.std(dim=dim, **metric_kwargs)
 
@@ -1101,7 +1125,7 @@ def _rmse(
         dim: Dimension(s) to perform metric over.
         metric_kwargs: see :py:func:`~xskillscore.rmse`
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | 0.0       |
         +-----------------+-----------+
@@ -1135,7 +1159,7 @@ def _rmse(
             metric:                        rmse
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     return rmse(forecast, verif, dim=dim, **metric_kwargs)
 
@@ -1174,7 +1198,7 @@ def _mae(
         dim: Dimension(s) to perform metric over.
         metric_kwargs: see :py:func:`~xskillscore.mae`
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | 0.0       |
         +-----------------+-----------+
@@ -1188,11 +1212,11 @@ def _mae(
     See also:
         * :py:func:`~xskillscore.mae`
 
-    Reference:
-        * Ian T. Jolliffe and David B. Stephenson. Forecast Verification: A
-          Practitioner’s Guide in Atmospheric Science. John Wiley & Sons, Ltd,
-          Chichester, UK, December 2011. ISBN 978-1-119-96000-3 978-0-470-66071-3.
-          URL: http://doi.wiley.com/10.1002/9781119960003.
+    References:
+        Ian T. Jolliffe and David B. Stephenson. Forecast Verification: A
+        Practitioner’s Guide in Atmospheric Science. John Wiley & Sons, Ltd,
+        Chichester, UK, December 2011. ISBN 978-1-119-96000-3 978-0-470-66071-3.
+        http://doi.wiley.com/10.1002/9781119960003.
 
 
     Example:
@@ -1215,7 +1239,7 @@ def _mae(
             metric:                        mae
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     return mae(forecast, verif, dim=dim, **metric_kwargs)
 
@@ -1253,7 +1277,7 @@ def _median_absolute_error(
         dim: Dimension(s) to perform metric over.
         metric_kwargs: see :py:func:`~xskillscore.median_absolute_error`
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | 0.0       |
         +-----------------+-----------+
@@ -1290,7 +1314,7 @@ def _median_absolute_error(
             metric:                        median_absolute_error
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     return median_absolute_error(forecast, verif, dim=dim, **metric_kwargs)
 
@@ -1317,7 +1341,7 @@ def _nmse(
     dim: dimType = None,
     **metric_kwargs: metric_kwargsType,
 ) -> xr.Dataset:
-    r"""Normalized MSE (NMSE), also known as Normalized Ensemble Variance (NEV).
+    r"""Compte Normalized MSE (NMSE), also known as Normalized Ensemble Variance (NEV).
 
     Mean Square Error (``mse``) normalized by the variance of the verification data.
 
@@ -1345,7 +1369,7 @@ def _nmse(
             (Handled internally by the compute functions)
         metric_kwargs: see :py:func:`~xskillscore.mse`
 
-    Details:
+    Notes:
         +----------------------------+-----------+
         | **minimum**                | 0.0       |
         +----------------------------+-----------+
@@ -1360,14 +1384,13 @@ def _nmse(
         | **worse than climatology** | > 1.0     |
         +----------------------------+-----------+
 
-    Reference:
-        * Griffies, S. M., and K. Bryan. “A Predictability Study of Simulated
-          North Atlantic Multidecadal Variability.” Climate Dynamics 13,
-          no. 7–8 (August 1, 1997): 459–87. https://doi.org/10/ch4kc4.
-        * Murphy, Allan H. “Skill Scores Based on the Mean Square Error and
-          Their Relationships to the Correlation Coefficient.” Monthly Weather
-          Review 116, no. 12 (December 1, 1988): 2417–24.
-          https://doi.org/10/fc7mxd.
+    References:
+        Griffies, S. M., and K. Bryan. “A Predictability Study of Simulated
+        North Atlantic Multidecadal Variability.” Climate Dynamics 13,
+        no. 7–8 (August 1, 1997): 459–87. https://doi.org/10/ch4kc4.
+        Murphy, Allan H. “Skill Scores Based on the Mean Square Error and
+        Their Relationships to the Correlation Coefficient.” Monthly Weather
+        Review 116, no. 12 (December 1, 1988): 2417–24. https://doi.org/10/fc7mxd.
 
     Example:
         >>> HindcastEnsemble.verify(
@@ -1389,7 +1412,7 @@ def _nmse(
             metric:                        nmse
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     if "comparison" in metric_kwargs:
         comparison = metric_kwargs.pop("comparison")
@@ -1425,7 +1448,7 @@ def _nmae(
     dim: dimType = None,
     **metric_kwargs: metric_kwargsType,
 ) -> xr.Dataset:
-    r"""Normalized Mean Absolute Error (NMAE).
+    r"""Compute Normalized Mean Absolute Error (NMAE).
 
     Mean Absolute Error (``mae``) normalized by the standard deviation of the
     verification data.
@@ -1454,7 +1477,7 @@ def _nmae(
             (Handled internally by the compute functions)
         metric_kwargs: see :py:func:`~xskillscore.mae`
 
-    Details:
+    Notes:
         +----------------------------+-----------+
         | **minimum**                | 0.0       |
         +----------------------------+-----------+
@@ -1469,14 +1492,15 @@ def _nmae(
         | **worse than climatology** | > 1.0     |
         +----------------------------+-----------+
 
-    Reference:
-        * Griffies, S. M., and K. Bryan. “A Predictability Study of Simulated
-          North Atlantic Multidecadal Variability.” Climate Dynamics 13, no.
-          7–8 (August 1, 1997): 459–87. https://doi.org/10/ch4kc4.
-        * Murphy, Allan H. “Skill Scores Based on the Mean Square Error and
-          Their Relationships to the Correlation Coefficient.” Monthly Weather
-          Review 116, no. 12 (December 1, 1988): 2417–24.
-          https://doi.org/10/fc7mxd.
+    References:
+        Griffies, S. M., and K. Bryan. “A Predictability Study of Simulated
+        North Atlantic Multidecadal Variability.” Climate Dynamics 13, no.
+        7–8 (August 1, 1997): 459–87. https://doi.org/10/ch4kc4.
+
+        Murphy, Allan H. “Skill Scores Based on the Mean Square Error and
+        Their Relationships to the Correlation Coefficient.” Monthly Weather
+        Review 116, no. 12 (December 1, 1988): 2417–24.
+        https://doi.org/10/fc7mxd.
 
     Example:
         >>> HindcastEnsemble.verify(
@@ -1498,7 +1522,7 @@ def _nmae(
             metric:                        nmae
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     if "comparison" in metric_kwargs:
         comparison = metric_kwargs.pop("comparison")
@@ -1533,7 +1557,7 @@ def _nrmse(
     dim: dimType = None,
     **metric_kwargs: metric_kwargsType,
 ) -> xr.Dataset:
-    r"""Normalized Root Mean Square Error (NRMSE).
+    r"""Compute Normalized Root Mean Square Error (NRMSE).
 
     Root Mean Square Error (``rmse``) normalized by the standard deviation of the
     verification data.
@@ -1564,7 +1588,7 @@ def _nrmse(
             (Handled internally by the compute functions)
         metric_kwargs: see :py:func:`~xskillscore.rmse`
 
-    Details:
+    Notes:
         +----------------------------+-----------+
         | **minimum**                | 0.0       |
         +----------------------------+-----------+
@@ -1579,17 +1603,19 @@ def _nrmse(
         | **worse than climatology** | > 1.0     |
         +----------------------------+-----------+
 
-    Reference:
-      * Bushuk, Mitchell, Rym Msadek, Michael Winton, Gabriel Vecchi, Xiaosong
+    References:
+        Bushuk, Mitchell, Rym Msadek, Michael Winton, Gabriel Vecchi, Xiaosong
         Yang, Anthony Rosati, and Rich Gudgel. “Regional Arctic Sea–Ice
         Prediction: Potential versus Operational Seasonal Forecast Skill.”
         Climate Dynamics, June 9, 2018. https://doi.org/10/gd7hfq.
-      * Hawkins, Ed, Steffen Tietsche, Jonathan J. Day, Nathanael Melia, Keith
+
+        Hawkins, Ed, Steffen Tietsche, Jonathan J. Day, Nathanael Melia, Keith
         Haines, and Sarah Keeley. “Aspects of Designing and Evaluating
         Seasonal-to-Interannual Arctic Sea-Ice Prediction Systems.” Quarterly
         Journal of the Royal Meteorological Society 142, no. 695
         (January 1, 2016): 672–83. https://doi.org/10/gfb3pn.
-      * Murphy, Allan H. “Skill Scores Based on the Mean Square Error and
+
+        Murphy, Allan H. “Skill Scores Based on the Mean Square Error and
         Their Relationships to the Correlation Coefficient.” Monthly Weather
         Review 116, no. 12 (December 1, 1988): 2417–24.
         https://doi.org/10/fc7mxd.
@@ -1614,7 +1640,7 @@ def _nrmse(
             metric:                        nrmse
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     if "comparison" in metric_kwargs:
         comparison = metric_kwargs.pop("comparison")
@@ -1678,7 +1704,7 @@ def _msess(
             (Handled internally by the compute functions)
         metric_kwargs: see :py:func:`~xskillscore.mse`
 
-    Details:
+    Notes:
         +----------------------------+-----------+
         | **minimum**                | -∞        |
         +----------------------------+-----------+
@@ -1695,19 +1721,19 @@ def _msess(
         | **worse than climatology** | < 0.0     |
         +----------------------------+-----------+
 
-    Reference:
+    References:
       * Griffies, S. M., and K. Bryan. “A Predictability Study of Simulated
         North Atlantic Multidecadal Variability.” Climate Dynamics 13, no. 7–8
         (August 1, 1997): 459–87. https://doi.org/10/ch4kc4.
-      * Murphy, Allan H. “Skill Scores Based on the Mean Square Error and
+        Murphy, Allan H. “Skill Scores Based on the Mean Square Error and
         Their Relationships to the Correlation Coefficient.” Monthly Weather
         Review 116, no. 12 (December 1, 1988): 2417–24.
         https://doi.org/10/fc7mxd.
-      * Pohlmann, Holger, Michael Botzet, Mojib Latif, Andreas Roesch, Martin
+        Pohlmann, Holger, Michael Botzet, Mojib Latif, Andreas Roesch, Martin
         Wild, and Peter Tschuck. “Estimating the Decadal Predictability of a
         Coupled AOGCM.” Journal of Climate 17, no. 22 (November 1, 2004):
         4463–72. https://doi.org/10/d2qf62.
-      * Bushuk, Mitchell, Rym Msadek, Michael Winton, Gabriel Vecchi, Xiaosong
+        Bushuk, Mitchell, Rym Msadek, Michael Winton, Gabriel Vecchi, Xiaosong
         Yang, Anthony Rosati, and Rich Gudgel. “Regional Arctic Sea–Ice
         Prediction: Potential versus Operational Seasonal Forecast Skill.
         Climate Dynamics, June 9, 2018. https://doi.org/10/gd7hfq.
@@ -1733,7 +1759,7 @@ def _msess(
             metric:                        msess
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     if "comparison" in metric_kwargs:
         comparison = metric_kwargs.pop("comparison")
@@ -1783,7 +1809,7 @@ def _mape(
         dim: Dimension(s) to perform metric over.
         metric_kwargs: see :py:func:`~xskillscore.mape`
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | 0.0       |
         +-----------------+-----------+
@@ -1817,7 +1843,7 @@ def _mape(
             metric:                        mape
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     return mape(forecast, verif, dim=dim, **metric_kwargs)
 
@@ -1855,7 +1881,7 @@ def _smape(
         dim: Dimension(s) to perform metric over.
         metric_kwargs: see :py:func:`~xskillscore.smape`
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | 0.0       |
         +-----------------+-----------+
@@ -1889,7 +1915,7 @@ def _smape(
             metric:                        smape
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     return smape(forecast, verif, dim=dim, **metric_kwargs)
 
@@ -1944,7 +1970,7 @@ def _uacc(
             (Handled internally by the compute functions)
         metric_kwargs: see :py:func:`~xskillscore.mse`
 
-    Details:
+    Notes:
         +----------------------------+-----------+
         | **minimum**                | 0.0       |
         +----------------------------+-----------+
@@ -1959,13 +1985,13 @@ def _uacc(
         | **equal to climatology**   | 0.0       |
         +----------------------------+-----------+
 
-    Reference:
-        * Bushuk, Mitchell, Rym Msadek, Michael Winton, Gabriel
+    References:
+          Bushuk, Mitchell, Rym Msadek, Michael Winton, Gabriel
           Vecchi, Xiaosong Yang, Anthony Rosati, and Rich Gudgel. “Regional
           Arctic Sea–Ice Prediction: Potential versus Operational Seasonal
           Forecast Skill." Climate Dynamics, June 9, 2018.
           https://doi.org/10/gd7hfq.
-        * Allan H. Murphy. Skill Scores Based on the Mean Square Error and Their
+          Allan H. Murphy. Skill Scores Based on the Mean Square Error and Their
           Relationships to the Correlation Coefficient. Monthly Weather Review,
           116(12):2417–2424, December 1988. https://doi.org/10/fc7mxd.
 
@@ -1989,7 +2015,7 @@ def _uacc(
             metric:                        uacc
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     messs_res = _msess(forecast, verif, dim=dim, **metric_kwargs)
     # Negative values are automatically turned into nans from xarray.
@@ -2033,7 +2059,7 @@ def _std_ratio(
         dim: Dimension(s) to perform metric over.
         metric_kwargs: see xarray.std
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | 0.0       |
         +-----------------+-----------+
@@ -2044,7 +2070,7 @@ def _std_ratio(
         | **orientation** | N/A       |
         +-----------------+-----------+
 
-    Reference:
+    References:
         * https://www-miklip.dkrz.de/about/murcss/
 
     Example:
@@ -2070,7 +2096,7 @@ def _std_ratio(
             metric:                        std_ratio
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     return forecast.std(dim=dim, **metric_kwargs) / verif.std(dim=dim, **metric_kwargs)
 
@@ -2105,7 +2131,7 @@ def _unconditional_bias(
         dim: Dimension(s) to perform metric over
         metric_kwargs: see xarray.mean
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | -∞        |
         +-----------------+-----------+
@@ -2116,9 +2142,10 @@ def _unconditional_bias(
         | **orientation** | negative  |
         +-----------------+-----------+
 
-    Reference:
-        * https://www.cawcr.gov.au/projects/verification/
-        * https://www-miklip.dkrz.de/about/murcss/
+    References:
+        https://www.cawcr.gov.au/projects/verification/
+
+        https://www-miklip.dkrz.de/about/murcss/
 
     Example:
         >>> HindcastEnsemble.verify(
@@ -2143,7 +2170,7 @@ def _unconditional_bias(
             metric:                        unconditional_bias
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
 
         Conditional bias is removed by
         :py:meth:`~climpred.classes.HindcastEnsemble.remove_bias`.
@@ -2171,7 +2198,7 @@ def _unconditional_bias(
             metric:                        unconditional_bias
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     return (forecast - verif).mean(dim=dim, **metric_kwargs)
 
@@ -2196,7 +2223,8 @@ def _mul_bias(
     dim: dimType = None,
     **metric_kwargs: metric_kwargsType,
 ) -> xr.Dataset:
-    r"""Multiplicative bias.
+    r"""
+    Multiplicative bias.
 
     .. math::
         \text{multiplicative bias} = f / o
@@ -2207,7 +2235,7 @@ def _mul_bias(
         dim: Dimension(s) to perform metric over
         metric_kwargs: see xarray.mean
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | -∞        |
         +-----------------+-----------+
@@ -2218,9 +2246,7 @@ def _mul_bias(
         | **orientation** | None      |
         +-----------------+-----------+
 
-
     Example:
-
         >>> HindcastEnsemble.verify(
         ...     metric="multiplicative_bias",
         ...     comparison="e2o",
@@ -2243,7 +2269,7 @@ def _mul_bias(
             metric:                        mul_bias
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     return (forecast / verif).mean(dim=dim, **metric_kwargs)
 
@@ -2283,7 +2309,7 @@ def _conditional_bias(
         metric_kwargs: see :py:func:`~xskillscore.pearson_r`
         and :py:meth:`~xarray.Datasetstd`
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | -∞        |
         +-----------------+-----------+
@@ -2294,8 +2320,8 @@ def _conditional_bias(
         | **orientation** | negative  |
         +-----------------+-----------+
 
-    Reference:
-        * https://www-miklip.dkrz.de/about/murcss/
+    References:
+        https://www-miklip.dkrz.de/about/murcss/
 
     Example:
         >>> HindcastEnsemble.verify(
@@ -2320,7 +2346,7 @@ def _conditional_bias(
             metric:                        conditional_bias
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     acc = _pearson_r(forecast, verif, dim=dim, **metric_kwargs)
     return acc - _std_ratio(forecast, verif, dim=dim, **metric_kwargs)
@@ -2363,7 +2389,7 @@ def _bias_slope(
         metric_kwargs: see :py:func:`~xskillscore.pearson_r` and
         :py:meth:`~xarray.Dataset.std`
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | 0.0       |
         +-----------------+-----------+
@@ -2374,8 +2400,8 @@ def _bias_slope(
         | **orientation** | negative  |
         +-----------------+-----------+
 
-    Reference:
-        * https://www-miklip.dkrz.de/about/murcss/
+    References:
+        https://www-miklip.dkrz.de/about/murcss/
 
     Example:
         >>> HindcastEnsemble.verify(
@@ -2400,7 +2426,7 @@ def _bias_slope(
             metric:                        bias_slope
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     std_ratio = _std_ratio(forecast, verif, dim=dim, **metric_kwargs)
     acc = _pearson_r(forecast, verif, dim=dim, **metric_kwargs)
@@ -2443,9 +2469,9 @@ def _msess_murphy(
         verif: Verification data.
         dim: Dimension(s) to perform metric over.
         metric_kwargs: see :py:func:`~xskillscore.pearson_r`,
-        :py:meth:`~xarray.Dataset.mean` and :py:meth:`~xarray.Dataset.std`
+            :py:meth:`~xarray.Dataset.mean` and :py:meth:`~xarray.Dataset.std`
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | -∞        |
         +-----------------+-----------+
@@ -2461,12 +2487,11 @@ def _msess_murphy(
         * :py:func:`~climpred.metrics._conditional_bias`
         * :py:func:`~climpred.metrics._unconditional_bias`
 
-    Reference:
+    References:
         * https://www-miklip.dkrz.de/about/murcss/
         * Murphy, Allan H. “Skill Scores Based on the Mean Square Error and
           Their Relationships to the Correlation Coefficient.” Monthly Weather
-          Review 116, no. 12 (December 1, 1988): 2417–24.
-          https://doi.org/10/fc7mxd.
+          Review 116, no. 12 (December 1, 1988): 2417–24. https://doi.org/10/fc7mxd.
 
     Example:
         >>> HindcastEnsemble = HindcastEnsemble.remove_bias(alignment="same_verifs")
@@ -2492,7 +2517,7 @@ def _msess_murphy(
             metric:                        msess_murphy
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     acc = _pearson_r(forecast, verif, dim=dim, **metric_kwargs)
     conditional_bias = _conditional_bias(forecast, verif, dim=dim, **metric_kwargs)
@@ -2563,7 +2588,7 @@ def _brier_score(
                 verification data in interval [0,1].
             see :py:func:`~xskillscore.brier_score`
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | 0.0       |
         +-----------------+-----------+
@@ -2574,7 +2599,7 @@ def _brier_score(
         | **orientation** | negative  |
         +-----------------+-----------+
 
-    Reference:
+    References:
         * https://www.nws.noaa.gov/oh/rfcdev/docs/
           Glossary_Forecast_Verification_Metrics.pdf
         * https://en.wikipedia.org/wiki/Brier_score
@@ -2617,7 +2642,7 @@ def _brier_score(
             metric:                        brier_score
             comparison:                    m2o
             dim:                           ['member', 'init']
-            reference:                     []
+            References:                     []
             logical:                       Callable
 
         Option 2. Pre-process to generate a binary multi-member forecast and
@@ -2645,7 +2670,7 @@ def _brier_score(
             metric:                        brier_score
             comparison:                    m2o
             dim:                           ['member', 'init']
-            reference:                     []
+            References:                     []
 
         Option 3. Pre-process to generate a probability forecast and binary
         verification product. because ``member`` not present in ``hindcast`` anymore,
@@ -2672,7 +2697,7 @@ def _brier_score(
             metric:                        brier_score
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
     """
     forecast, verif, metric_kwargs, dim = _extract_and_apply_logical(
         forecast, verif, metric_kwargs, dim
@@ -2726,7 +2751,7 @@ def _threshold_brier_score(
         metric_kwargs: optional, see
             :py:func:`~xskillscore.threshold_brier_score`
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | 0.0       |
         +-----------------+-----------+
@@ -2737,7 +2762,7 @@ def _threshold_brier_score(
         | **orientation** | negative  |
         +-----------------+-----------+
 
-    Reference:
+    References:
         * Brier, Glenn W. Verification of forecasts expressed in terms of
           probability.” Monthly Weather Review 78, no. 1 (1950).
           https://doi.org/10.1175/1520-0493(1950)078<0001:VOFEIT>2.0.CO;2.
@@ -2774,7 +2799,7 @@ def _threshold_brier_score(
             metric:                        threshold_brier_score
             comparison:                    m2o
             dim:                           member
-            reference:                     []
+            References:                     []
             threshold:                     0.2
 
         >>> # multiple thresholds averaging over init dimension
@@ -2802,7 +2827,7 @@ def _threshold_brier_score(
             metric:                        threshold_brier_score
             comparison:                    m2o
             dim:                           ['member', 'init']
-            reference:                     []
+            References:                     []
             threshold:                     [0.2, 0.3]
 
     """
@@ -2864,7 +2889,7 @@ def _crps(
             `member`. Other dimensions are passed to `xskillscore` and averaged.
         metric_kwargs: optional, see :py:func:`~xskillscore.crps_ensemble`
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | 0.0       |
         +-----------------+-----------+
@@ -2875,7 +2900,7 @@ def _crps(
         | **orientation** | negative  |
         +-----------------+-----------+
 
-    Reference:
+    References:
         * Matheson, James E., and Robert L. Winkler. “Scoring Rules for
           Continuous Probability Distributions.” Management Science 22, no. 10
           (June 1, 1976): 1087–96. https://doi.org/10/cwwt4g.
@@ -2906,7 +2931,7 @@ def _crps(
             metric:                        crps
             comparison:                    m2o
             dim:                           member
-            reference:                     []
+            References:                     []
 
     """
     dim = _remove_member_from_dim_or_raise(dim)
@@ -2991,7 +3016,7 @@ def _crpss(
             :py:func:`~xskillscore.crps_gaussian` and
             :py:func:`~xskillscore.crps_quadrature`
 
-    Details:
+    Notes:
         +----------------------------+-----------+
         | **minimum**                | -∞        |
         +----------------------------+-----------+
@@ -3006,7 +3031,7 @@ def _crpss(
         | **worse than climatology** | < 0.0     |
         +----------------------------+-----------+
 
-    Reference:
+    References:
         * Matheson, James E., and Robert L. Winkler. “Scoring Rules for
           Continuous Probability Distributions.” Management Science 22, no. 10
           (June 1, 1976): 1087–96. https://doi.org/10/cwwt4g.
@@ -3036,7 +3061,7 @@ def _crpss(
             metric:                        crpss
             comparison:                    m2o
             dim:                           member
-            reference:                     []
+            References:                     []
 
         >>> import scipy
         >>> PerfectModelEnsemble.isel(lead=[0, 1]).verify(
@@ -3133,7 +3158,7 @@ def _crpss_es(
         metric_kwargs: see :py:func:`~xskillscore.crps_ensemble`
         and :py:func:`~xskillscore.mse`
 
-    Details:
+    Notes:
         +----------------------------+-----------+
         | **minimum**                | -∞        |
         +----------------------------+-----------+
@@ -3148,7 +3173,7 @@ def _crpss_es(
         | **over-dispersive**        | < 0.0     |
         +----------------------------+-----------+
 
-    Reference:
+    References:
         * Kadow, Christopher, Sebastian Illing, Oliver Kunst, Henning W. Rust,
           Holger Pohlmann, Wolfgang A. Müller, and Ulrich Cubasch. “Evaluation
           of Forecasts by Accuracy and Spread in the MiKlip Decadal Climate
@@ -3179,7 +3204,7 @@ def _crpss_es(
             metric:                        crpss_es
             comparison:                    m2o
             dim:                           member
-            reference:                     []
+            References:                     []
 
     """
     if dim is None:
@@ -3257,7 +3282,7 @@ def _discrimination(
         histograms of forecast probabilities when the event was observed and not
         observed
 
-    Details:
+    Notes:
         +-----------------+------------------------+
         | **perfect**     | distinct distributions |
         +-----------------+------------------------+
@@ -3386,7 +3411,7 @@ def _reliability(
             probability bin
 
 
-    Details:
+    Notes:
         +-----------------+-------------------+
         | **perfect**     | flat distribution |
         +-----------------+-------------------+
@@ -3430,7 +3455,7 @@ def _reliability(
             metric:                        reliability
             comparison:                    m2o
             dim:                           ['member', 'init']
-            reference:                     []
+            References:                     []
             logical:                       Callable
 
         Option 2. Pre-process to generate a binary forecast and verification product:
@@ -3459,7 +3484,7 @@ def _reliability(
             metric:                        reliability
             comparison:                    m2o
             dim:                           ['init', 'member']
-            reference:                     []
+            References:                     []
 
         Option 3. Pre-process to generate a probability forecast and binary
         verification product. because ``member`` not present in ``hindcast``, use
@@ -3488,7 +3513,7 @@ def _reliability(
             metric:                        reliability
             comparison:                    e2o
             dim:                           init
-            reference:                     []
+            References:                     []
 
     """
     if "logical" in metric_kwargs:
@@ -3524,7 +3549,7 @@ def _rank_histogram(
         dim: Dimensions to aggregate. Requires to contain `member` and at
             least one additional dimension.
 
-    Details:
+    Notes:
         +-----------------+------------------------------+
         | **flat**        | perfect                      |
         +-----------------+------------------------------+
@@ -3563,7 +3588,7 @@ def _rank_histogram(
             metric:                        rank_histogram
             comparison:                    m2o
             dim:                           ['member', 'init']
-            reference:                     []
+            References:                     []
 
         >>> PerfectModelEnsemble.verify(
         ...     metric="rank_histogram", comparison="m2c", dim=["member", "init"]
@@ -3583,7 +3608,7 @@ def _rank_histogram(
             metric:                        rank_histogram
             comparison:                    m2c
             dim:                           ['member', 'init']
-            reference:                     []
+            References:                     []
 
     """
     dim = _remove_member_from_dim_or_raise(dim)
@@ -3625,7 +3650,7 @@ def _rps(
         ``dayfofyear`` onto the dimensions ``init`` for forecast and ``time`` for
         observations. see ``climpred.utils.broadcast_time_grouped_to_time``.
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | 0.0       |
         +-----------------+-----------+
@@ -3666,7 +3691,7 @@ def _rps(
             metric:                        rps
             comparison:                    m2o
             dim:                           ['member', 'init']
-            reference:                     []
+            References:                     []
             category_edges:                [-0.5  0.   0.5  1. ]
 
 
@@ -3701,7 +3726,7 @@ def _rps(
             metric:                        rps
             comparison:                    m2c
             dim:                           ['member', 'init']
-            reference:                     []
+            References:                     []
             category_edges:                <xarray.Dataset>\nDimensions:        (cate...
 
         Provide ``category_edges`` as tuple for different category edges to categorize
@@ -3746,7 +3771,7 @@ def _rps(
             metric:                        rps
             comparison:                    m2o
             dim:                           ['member', 'init']
-            reference:                     []
+            References:                     []
             category_edges:                (<xarray.Dataset>\nDimensions:        (mon...
     """
     if "category_edges" in metric_kwargs:
@@ -3877,7 +3902,7 @@ def _contingency(forecast, verif, score="table", dim=None, **metric_kwargs):
             metric:                        contingency
             comparison:                    m2c
             dim:                           ['member', 'init']
-            reference:                     []
+            References:                     []
             score:                         hit_rate
             observation_category_edges:    [ 9.5 10.  10.5]
             forecast_category_edges:       [ 9.5 10.  10.5]
@@ -3946,7 +3971,7 @@ def _roc(
             parameter. ``true positive rate`` and ``false positive rate`` contain
             ``probability_bin`` dimension with ascending ``bin_edges`` as coordinates.
 
-    Details:
+    Notes:
         +-----------------+-----------+
         | **minimum**     | 0.0       |
         +-----------------+-----------+
@@ -4051,7 +4076,7 @@ def _less(
     Returns:
         less: reduced by dimensions ``dim``
 
-    Details:
+    Notes:
         +-----------------+--------------------------------+
         | **maximum**     | ∞                              |
         +-----------------+--------------------------------+
diff --git a/climpred/reference.py b/climpred/reference.py
index 62ba9459d..7324d186f 100644
--- a/climpred/reference.py
+++ b/climpred/reference.py
@@ -571,7 +571,7 @@ def compute_uninitialized(
         comparison = COMPARISON_ALIASES.get(comparison, comparison)
         comparison = get_comparison_class(comparison, HINDCAST_COMPARISONS)
 
-    forecast, verif = comparison.function(uninit, verif)
+    forecast, verif = comparison.function(uninit, verif, metric)
 
     initialized = initialized.rename({"init": "time"})
 
diff --git a/docs/source/api/climpred.classes.PredictionEnsemble.rst b/docs/source/api/climpred.classes.PredictionEnsemble.rst
new file mode 100644
index 000000000..cfa416fe4
--- /dev/null
+++ b/docs/source/api/climpred.classes.PredictionEnsemble.rst
@@ -0,0 +1,40 @@
+climpred.classes.PredictionEnsemble
+===================================
+
+.. currentmodule:: climpred.classes
+
+.. autoclass:: PredictionEnsemble
+
+
+   .. automethod:: __init__
+
+
+   .. rubric:: Methods
+
+   .. autosummary::
+
+      ~PredictionEnsemble.__init__
+      ~PredictionEnsemble.equals
+      ~PredictionEnsemble.get_initialized
+      ~PredictionEnsemble.get_uninitialized
+      ~PredictionEnsemble.identical
+      ~PredictionEnsemble.plot
+      ~PredictionEnsemble.remove_seasonality
+      ~PredictionEnsemble.smooth
+
+
+
+
+
+   .. rubric:: Attributes
+
+   .. autosummary::
+
+      ~PredictionEnsemble.chunks
+      ~PredictionEnsemble.chunksizes
+      ~PredictionEnsemble.coords
+      ~PredictionEnsemble.data_vars
+      ~PredictionEnsemble.dims
+      ~PredictionEnsemble.mathType
+      ~PredictionEnsemble.nbytes
+      ~PredictionEnsemble.sizes
diff --git a/docs/source/api/climpred.comparisons.Comparison.__init__.rst b/docs/source/api/climpred.comparisons.Comparison.__init__.rst
new file mode 100644
index 000000000..a208c7221
--- /dev/null
+++ b/docs/source/api/climpred.comparisons.Comparison.__init__.rst
@@ -0,0 +1,6 @@
+﻿climpred.comparisons.Comparison.\_\_init\_\_
+============================================
+
+.. currentmodule:: climpred.comparisons
+
+.. automethod:: Comparison.__init__
diff --git a/docs/source/api/climpred.comparisons.Comparison.__repr__.rst b/docs/source/api/climpred.comparisons.Comparison.__repr__.rst
new file mode 100644
index 000000000..123746e3f
--- /dev/null
+++ b/docs/source/api/climpred.comparisons.Comparison.__repr__.rst
@@ -0,0 +1,6 @@
+﻿climpred.comparisons.Comparison.\_\_repr\_\_
+============================================
+
+.. currentmodule:: climpred.comparisons
+
+.. automethod:: Comparison.__repr__
diff --git a/docs/source/api/climpred.metrics.Metric.__init__.rst b/docs/source/api/climpred.metrics.Metric.__init__.rst
new file mode 100644
index 000000000..938742d94
--- /dev/null
+++ b/docs/source/api/climpred.metrics.Metric.__init__.rst
@@ -0,0 +1,6 @@
+﻿climpred.metrics.Metric.\_\_init\_\_
+====================================
+
+.. currentmodule:: climpred.metrics
+
+.. automethod:: Metric.__init__
diff --git a/docs/source/api/climpred.metrics.Metric.__repr__.rst b/docs/source/api/climpred.metrics.Metric.__repr__.rst
new file mode 100644
index 000000000..b3265a0c1
--- /dev/null
+++ b/docs/source/api/climpred.metrics.Metric.__repr__.rst
@@ -0,0 +1,6 @@
+﻿climpred.metrics.Metric.\_\_repr\_\_
+====================================
+
+.. currentmodule:: climpred.metrics
+
+.. automethod:: Metric.__repr__

From a626959b6b095bd672b217887b62fb5801a95714 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Fri, 10 Dec 2021 18:21:27 +0100
Subject: [PATCH 21/56] transpose_and_rechunk only ds

---
 climpred/utils.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/climpred/utils.py b/climpred/utils.py
index 84a24ec62..22a2a9afe 100644
--- a/climpred/utils.py
+++ b/climpred/utils.py
@@ -497,15 +497,12 @@ def shift_cftime_singular(cftime, n, freq):
 
 def _transpose_and_rechunk_to(new_chunk_ds, ori_chunk_ds):
     """
-    Assume same chunks and dimension order.
+    Ensure same chunks and dimension order.
 
     This is needed after some operations which reduce chunks to size 1.
     First transpose a to ds.dims then apply ds chunking to a.
     """
-    transpose_kwargs = (
-        {"transpose_coords": False} if isinstance(new_chunk_ds, xr.DataArray) else {}
-    )
-    return new_chunk_ds.transpose(*ori_chunk_ds.dims, **transpose_kwargs).chunk(
+    return new_chunk_ds.transpose(*ori_chunk_ds.dims).chunk(
         ori_chunk_ds.chunks
     )
 

From 1e858a7d7cc0f75d7494b7569cebd60cbae6ada5 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Fri, 10 Dec 2021 18:34:25 +0100
Subject: [PATCH 22/56] fix

---
 .pre-commit-config.yaml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f23a56627..e2936649d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -63,6 +63,3 @@ repos:
     rev: v0.3.4
     hooks:
     -   id: blackdoc
-    args:
-    - --include=*.py
-    - --exclude=CHANGELOG.rst

From 801137bb3576ec4d391b8506b76c11554d6d78c7 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Fri, 10 Dec 2021 18:40:40 +0100
Subject: [PATCH 23/56] fix

---
 climpred/comparisons.py | 61 ++++++++++++++++++-----------------------
 docs/source/conf.py     |  1 -
 docs/source/metrics.rst |  9 ++++--
 3 files changed, 33 insertions(+), 38 deletions(-)

diff --git a/climpred/comparisons.py b/climpred/comparisons.py
index 1d99b5103..cd24d4c22 100644
--- a/climpred/comparisons.py
+++ b/climpred/comparisons.py
@@ -11,17 +11,6 @@
 from .metrics import Metric
 
 
-def _transpose_and_rechunk_to(new_chunk_ds, ori_chunk_ds):
-    """
-    Chunk xr.Dataset `new_chunk_ds` as another xr.Dataset `ori_chunk_ds`.
-
-    This is needed after some operations which reduce chunks to size 1.
-    First transpose a to ds.dims then apply ds chunking to a.
-    """
-    # supposed to be in .utils but circular imports therefore here
-    return new_chunk_ds.transpose(*ori_chunk_ds.dims).chunk(ori_chunk_ds.chunks)
-
-
 class Comparison:
     """Master class for all comparisons."""
 
@@ -81,14 +70,14 @@ def __repr__(self) -> str:
 
 
 def _m2m(
-    ds: xr.Dataset, metric: Metric, verif: Optional[xr.Dataset] = None
+    initialized: xr.Dataset, metric: Metric, verif: Optional[xr.Dataset] = None
 ) -> Tuple[xr.Dataset, xr.Dataset]:
     """Compare all members to all others in turn while leaving out verification member.
 
     :ref:`comparisons` for :py:class:`~climpred.classes.PerfectModelEnsemble`
 
     Args:
-        ds: initialized with ``member`` dimension.
+        initialized: initialized with ``member`` dimension.
         metric:
             If deterministic, forecast and verif have ``member`` dim.
             If probabilistic, only forecast has ``member`` dim.
@@ -102,11 +91,11 @@ def _m2m(
 
     verif_list = []
     forecast_list = []
-    for m in ds.member.values:
-        forecast = ds.drop_sel(member=m)
+    for m in initialized.member.values:
+        forecast = initialized.drop_sel(member=m)
         # set incrementing members to avoid nans from broadcasting
         forecast["member"] = np.arange(1, 1 + forecast.member.size)
-        verif = ds.sel(member=m, drop=True)
+        verif = initialized.sel(member=m, drop=True)
         # Tiles the singular "verif" member to compare directly to all other members
         if not metric.probabilistic:
             forecast, verif = xr.broadcast(forecast, verif)
@@ -129,7 +118,9 @@ def _m2m(
 
 
 def _m2e(
-    ds: xr.Dataset, metric: Optional[Metric] = None, verif: Optional[xr.Dataset] = None
+    initialized: xr.Dataset,
+    metric: Optional[Metric] = None,
+    verif: Optional[xr.Dataset] = None,
 ) -> Tuple[xr.Dataset, xr.Dataset]:
     """
     Compare all members to ensemble mean while leaving out the verif in ensemble mean.
@@ -137,7 +128,7 @@ def _m2e(
     :ref:`comparisons` for :py:class:`~climpred.classes.PerfectModelEnsemble`
 
     Args:
-        ds: ``initialized`` with ``member`` dimension.
+        initialized: ``initialized`` with ``member`` dimension.
         metric: needed for probabilistic metrics. Therefore useless in ``m2e``
             comparison, but expected by internal API.
         verif: not used in :py:class:`~climpred.classes.PerfectModelEnsemble`
@@ -150,9 +141,9 @@ def _m2e(
     verif_list = []
     forecast_list = []
     M2E_COMPARISON_DIM = "member"
-    for m in ds.member.values:
-        forecast = ds.drop_sel(member=m).mean("member")
-        verif = ds.sel(member=m, drop=True)
+    for m in initialized.member.values:
+        forecast = initialized.drop_sel(member=m).mean("member")
+        verif = initialized.sel(member=m, drop=True)
         forecast_list.append(forecast)
         verif_list.append(verif)
     verif = xr.concat(verif_list, M2E_COMPARISON_DIM)
@@ -160,8 +151,8 @@ def _m2e(
     forecast[M2E_COMPARISON_DIM] = np.arange(forecast[M2E_COMPARISON_DIM].size)
     verif[M2E_COMPARISON_DIM] = np.arange(verif[M2E_COMPARISON_DIM].size)
     if dask.is_dask_collection(forecast):
-        forecast = _transpose_and_rechunk_to(forecast, ds)
-        verif = _transpose_and_rechunk_to(verif, ds)
+        forecast = forecast.transpose(*initialized.dims).chunk(initialized.chunks)
+        verif = verif.transpose(*initialized.dims).chunk(initialized.chunks)
     return forecast, verif
 
 
@@ -176,7 +167,7 @@ def _m2e(
 
 
 def _m2c(
-    ds: xr.Dataset, metric: Metric, verif: Optional[xr.Dataset] = None
+    initialized: xr.Dataset, metric: Metric, verif: Optional[xr.Dataset] = None
 ) -> Tuple[xr.Dataset, xr.Dataset]:
     """
     Compare all other member forecasts to a single member verification.
@@ -189,7 +180,7 @@ def _m2c(
     :ref:`comparisons` for :py:class:`~climpred.classes.PerfectModelEnsemble`
 
     Args:
-        ds: ``initialized`` with ``member`` dimension.
+        initialized: ``initialized`` with ``member`` dimension.
         metric: if deterministic, forecast and verif both have member dim
             if probabilistic, only forecast has ``member`` dim
         verif: not used in :py:class:`~climpred.classes.PerfectModelEnsemble`
@@ -199,10 +190,10 @@ def _m2c(
     """
     if verif is not None:
         raise ValueError("`verif` not expected.")
-    control_member = ds.member.values[0]
-    verif = ds.sel(member=control_member, drop=True)
+    control_member = initialized.member.values[0]
+    verif = initialized.sel(member=control_member, drop=True)
     # drop the member being verif
-    forecast = ds.drop_sel(member=control_member)
+    forecast = initialized.drop_sel(member=control_member)
     if not metric.probabilistic:
         forecast, verif = xr.broadcast(forecast, verif)
     return forecast, verif
@@ -218,7 +209,9 @@ def _m2c(
 
 
 def _e2c(
-    ds: xr.Dataset, metric: Optional[Metric] = None, verif: Optional[xr.Dataset] = None
+    initialized: xr.Dataset,
+    metric: Optional[Metric] = None,
+    verif: Optional[xr.Dataset] = None,
 ) -> Tuple[xr.Dataset, xr.Dataset]:
     """
     Compare ensemble mean forecast to single member verification.
@@ -230,7 +223,7 @@ def _e2c(
     :ref:`comparisons` for :py:class:`~climpred.classes.PerfectModelEnsemble`
 
     Args:
-        ds: ``initialized`` with ``member`` dimension.
+        initialized: ``initialized`` with ``member`` dimension.
         metric: needed for probabilistic metrics. Therefore useless in ``e2c``
             comparison, but expected by internal API.
         verif: not used in :py:class:`~climpred.classes.PerfectModelEnsemble`
@@ -240,10 +233,10 @@ def _e2c(
     """
     if verif is not None:
         raise ValueError("`verif` not expected.")
-    control_member = ds.member.values[0]
-    verif = ds.sel(member=control_member, drop=True)
-    ds = ds.drop_sel(member=control_member)
-    forecast = ds.mean("member")
+    control_member = initialized.member.values[0]
+    verif = initialized.sel(member=control_member, drop=True)
+    initialized = initialized.drop_sel(member=control_member)
+    forecast = initialized.mean("member")
     return forecast, verif
 
 
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 9f2670be0..f274b3ee8 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -19,7 +19,6 @@
 xarray.Dataset.__module__ = "xarray"
 
 
-
 sys.path.insert(0, os.path.abspath("../.."))
 
 
diff --git a/docs/source/metrics.rst b/docs/source/metrics.rst
index 3833d208f..7748fc486 100644
--- a/docs/source/metrics.rst
+++ b/docs/source/metrics.rst
@@ -1,9 +1,10 @@
 .. currentmodule:: climpred.metrics
 
 .. ipython:: python
-   :suppress:
+    :suppress:
 
     from climpred.metrics import __ALL_METRICS__ as all_metrics
+
     metric_aliases = {}
     for m in all_metrics:
         if m.aliases is not None:
@@ -16,8 +17,10 @@
 Metrics
 #######
 
-All high-level functions like :py:meth:`~climpred.classes.HindcastEnsemble.verify` and
-:py:meth:`~climpred.classes.HindcastEnsemble.bootstrap` (for both ``HindcastEnsemble`` and ``PerfectModelEnsemble`` objects) have a ``metric`` argument
+All high-level functions like :py:meth:`~climpred.classes.HindcastEnsemble.verify`,
+:py:meth:`~climpred.classes.HindcastEnsemble.bootstrap`,
+:py:meth:`~climpred.classes.PerfectModelEnsemble.verify` and
+:py:meth:`~climpred.classes.PerfectModelEnsemble.bootstrap` have a ``metric`` argument
 that has to be called to determine which metric is used in computing predictability.
 
 .. note::

From 2a088c53dd24aa79ddc15b697cd6ab7e8c959430 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Fri, 10 Dec 2021 19:21:38 +0100
Subject: [PATCH 24/56] fix .pre-commit pydocstyles ignoring tests

---
 .pre-commit-config.yaml | 10 ++++++----
 climpred/prediction.py  | 22 +++++++++++-----------
 setup.cfg               |  1 +
 3 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e2936649d..d861ef7df 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -49,15 +49,17 @@ repos:
       rev: 6.1.1
       hooks:
       -   id: pydocstyle
-      args:
-      - --ignore=D301
-      - --max-line-length=93
-      - --convention google # https://google.github.io/styleguide/pyguide.html#Comments
+          args: ['--match="(?!test_).*\.py"']
+          # , "--convention=google"] # https://google.github.io/styleguide/pyguide.html#Comments
 
   -   repo: https://github.com/pycqa/doc8
       rev: 0.10.1
       hooks:
       -   id: doc8
+  #        args: ["--ignore-path climpred/tests", "--max-line-length 93"]
+      #- --max-line-length 93
+      #- --ignore-path climpred/tests
+
 
   - repo: https://github.com/keewis/blackdoc
     rev: v0.3.4
diff --git a/climpred/prediction.py b/climpred/prediction.py
index c4023a17b..9cf904a59 100644
--- a/climpred/prediction.py
+++ b/climpred/prediction.py
@@ -48,11 +48,11 @@ def _apply_metric_at_given_lead(
     """Apply a metric between two time series at a given lead.
 
     Args:
-        verif (xr object): Verification data.
+        verif (xr.Dataset): Verification data.
         verif_dates (dict): Lead-dependent verification dates for alignment.
         lead (int): Given lead to score.
-        hind (xr object): Initialized hindcast. Not required in a persistence forecast.
-        hist (xr object): Uninitialized/historical simulation. Required when
+        hind (xr.Dataset): Initialized hindcast. Not required in a persistence forecast.
+        hist (xr.Dataset): Uninitialized/historical simulation. Required when
             ``reference='uninitialized'``.
         inits (dict): Lead-dependent initialization dates for alignment.
         reference (str): If not ``None``, return score for this reference forecast.
@@ -63,7 +63,7 @@ def _apply_metric_at_given_lead(
         dim (str): Dimension to apply metric over.
 
     Returns:
-        result (xr object): Metric results for the given lead for the initialized
+        result (xr.Dataset): Metric results for the given lead for the initialized
             forecast or reference forecast.
     """
     # naming:: lforecast: forecast at lead; lverif: verification at lead
@@ -217,8 +217,8 @@ def compute_perfect_model(
     Compute a predictability skill score in a perfect-model framework.
 
     Args:
-        init_pm (xarray object): ensemble with dims ``lead``, ``init``, ``member``.
-        control (xarray object): NOTE that this is a legacy argument from a former
+        init_pm (xr.Dataset): ensemble with dims ``lead``, ``init``, ``member``.
+        control (xr.Dataset): NOTE that this is a legacy argument from a former
             release. ``control`` is not used in ``compute_perfect_model`` anymore.
         metric (str): `metric` name, see
          :py:func:`climpred.utils.get_metric_class` and (see :ref:`Metrics`).
@@ -231,7 +231,7 @@ def compute_perfect_model(
             (see the arguments required for a given metric in metrics.py)
 
     Returns:
-        skill (xarray object): skill score with dimensions as input `ds`
+        skill (xr.Dataset): skill score with dimensions as input `ds`
                                without `dim`.
 
     """
@@ -267,12 +267,12 @@ def compute_hindcast(
     """Verify hindcast predictions against verification data.
 
     Args:
-        hind (xarray object): Hindcast ensemble.
+        hind (xr.Dataset): Hindcast ensemble.
             Expected to follow package conventions:
             * ``init`` : dim of initialization dates
             * ``lead`` : dim of lead time from those initializations
             Additional dims can be member, lat, lon, depth, ...
-        verif (xarray object): Verification data with some temporal overlap with the
+        verif (xr.Dataset): Verification data with some temporal overlap with the
             hindcast.
         metric (str): Metric used in comparing the decadal prediction ensemble with the
             verification data. (see :py:func:`~climpred.utils.get_metric_class` and
@@ -287,7 +287,7 @@ def compute_hindcast(
         alignment (str): which inits or verification times should be aligned?
             - maximize/None: maximize the degrees of freedom by slicing ``hind`` and
             ``verif`` to a common time frame at each lead.
-            - same_inits: slice to a common init frame prior to computing
+            - same_inits: slice to a common ``init`` frame prior to computing
             metric. This philosophy follows the thought that each lead should be based
             on the same set of initializations.
             - same_verif: slice to a common/consistent verification time frame prior to
@@ -297,7 +297,7 @@ def compute_hindcast(
             (see the arguments required for a given metric in :ref:`Metrics`).
 
     Returns:
-        result (xarray object):
+        result (xr.Dataset):
             Verification metric over ``lead`` reduced by dimension(s) ``dim``.
     """
     metric, comparison, dim = _get_metric_comparison_dim(
diff --git a/setup.cfg b/setup.cfg
index ccb9bf454..3d428cd11 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -61,6 +61,7 @@ test = pytest
 
 [doc8]
 max-line-length=93
+ignore-path=climpred/tests
 
 [mypy]
 exclude = asv_bench|doc

From 83fc9901f0ff5277c5bef5286e1cd9fe38ec3316 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sat, 11 Dec 2021 11:18:23 +0100
Subject: [PATCH 25/56] fix tests

---
 climpred/comparisons.py |  2 --
 climpred/metrics.py     |  4 ++-
 climpred/options.py     | 61 +++++++++++++++++------------------------
 climpred/tutorial.py    |  2 +-
 climpred/utils.py       |  4 +--
 setup.cfg               |  1 +
 6 files changed, 31 insertions(+), 43 deletions(-)

diff --git a/climpred/comparisons.py b/climpred/comparisons.py
index cd24d4c22..720d1bb1d 100644
--- a/climpred/comparisons.py
+++ b/climpred/comparisons.py
@@ -268,8 +268,6 @@ def _e2o(
     Returns:
         forecast, verification
     """
-    if verif is not None:
-        raise ValueError("`verif` not expected.")
     if "member" in hind.dims:
         forecast = hind.mean("member")
     else:
diff --git a/climpred/metrics.py b/climpred/metrics.py
index 3606eff58..01e9994e1 100644
--- a/climpred/metrics.py
+++ b/climpred/metrics.py
@@ -100,8 +100,10 @@ def _preprocess_dims(dim: dimType) -> List[str]:
     """
     if dim is None:
         dim = ["time"]
-    if isinstance(dim, str):
+    elif isinstance(dim, str):
         dim = [dim]
+    elif isinstance(dim, List):
+        pass
     else:
         raise ValueError
     return dim
diff --git a/climpred/options.py b/climpred/options.py
index 94df35c0c..dac81bf40 100644
--- a/climpred/options.py
+++ b/climpred/options.py
@@ -26,44 +26,33 @@
 
 
 class set_options:
-    """Set options for climpred in a controlled context. Analogous to
+    """Set options for ``climpred`` in a controlled context. Analogous to
     :py:class:`~xarray.options.set_options`.
 
-    Currently supported options:
-
-    - ``seasonality``
-        - Attribute to group dimension ``groupby(f"{dim}.{seasonality}"")``.
-            Used in ``reference=climatology`` and
-            :py:meth:`~climpred.classes.HindcastEnsemble.remove_bias`.
-        - Allowed: [``"dayofyear"``, ``"weekofyear"``, ``"month"``, ``"season"``]
-        - Default: ``dayofyear``.
-    - ``PerfectModel_persistence_from_initialized_lead_0``
-        - Which persistence function to use in
-            ``PerfectModelEnsemble.verify/bootstrap(reference="persistence")``.
-            If ``False`` use :py:func:`~climpred.reference.compute_persistence`.
-            If ``True`` use
-            :py:func:`~climpred.reference.compute_persistence_from_first_lead`.
-        - Allowed: [``True``, ``False``]
-        - Default: ``False``
-    - ``warn_for_failed_PredictionEnsemble_xr_call``
-        - Raise ``UserWarning`` when ``PredictionEnsemble.xr_call``,
-            e.g. ``.sel(lead=[1])`` fails on one of the datasets.
-        - Allowed: [``True``, ``False``]
-        - Default: ``True``
-    - ``warn_for_rename_to_climpred_dims``
-        - Raise ``UserWarning`` when dimensions are renamed to ``CLIMPRED_DIMS`` when
-            :py:class:`~climpred.classes.PredictionEnsemble` is instantiated.
-        - Allowed: [``True``, ``False``]
-        - Default: ``True``
-    - ``warn_for_init_coords_int_to_annual``
-        - Raise ``UserWarning`` when ``init`` coordinate is of type integer and gets
-            converted to annual cftime_range when :py:class:`~climpred.classes.PredictionEnsemble` is instantiated.
-        - Allowed: [``True``, ``False``]
-        - Default: ``True``
-    - ``climpred_warnings``
-        - Overwrites all options containing ``"*warn*"``.
-        - Allowed: [``True``, ``False``]
-        - Default: ``True``
+    Parameters
+    ----------
+    ``seasonality`` : {``"dayofyear"``, ``"weekofyear"``, ``"month"``, ``"season"``}, default: ``"month"``
+        Attribute to group dimension ``groupby(f"{dim}.{seasonality}"")``.
+        Used in ``reference=climatology`` and
+        :py:meth:`~climpred.classes.HindcastEnsemble.remove_bias`.
+    ``PerfectModel_persistence_from_initialized_lead_0`` : {``True``, ``False``}, default ``False``
+        Which persistence function to use in
+        ``PerfectModelEnsemble.verify/bootstrap(reference="persistence")``.
+        If ``False`` use :py:func:`~climpred.reference.compute_persistence`.
+        If ``True`` use
+        :py:func:`~climpred.reference.compute_persistence_from_first_lead`.
+    ``warn_for_failed_PredictionEnsemble_xr_call`` : {``True``, ``False``}, default ``True``
+        Raise ``UserWarning`` when ``PredictionEnsemble.xr_call``, e.g.
+        ``.sel(lead=[1])`` fails on one of the datasets.
+    ``warn_for_rename_to_climpred_dims`` : {``True``, ``False``}, default ``True``
+        Raise ``UserWarning`` when dimensions are renamed to ``CLIMPRED_DIMS`` when
+        :py:class:`~climpred.classes.PredictionEnsemble` is instantiated.
+    ``warn_for_init_coords_int_to_annual`` : {``True``, ``False``}, default ``True``
+        Raise ``UserWarning`` when ``init`` coordinate is of type integer and gets
+        converted to annual cftime_range when
+        :py:class:`~climpred.classes.PredictionEnsemble` is instantiated.
+    ``climpred_warnings`` : {``True``, ``False``}, default ``True``
+        Overwrites all options containing ``"*warn*"``.
 
     Examples:
         You can use ``set_options`` either as a context manager:
diff --git a/climpred/tutorial.py b/climpred/tutorial.py
index 42694af27..59636711f 100644
--- a/climpred/tutorial.py
+++ b/climpred/tutorial.py
@@ -3,7 +3,7 @@
 import hashlib
 import os as _os
 import urllib
-from typing import Optional, Union, Dict
+from typing import Dict, Optional, Union
 from urllib.request import urlretrieve as _urlretrieve
 
 import xarray as xr
diff --git a/climpred/utils.py b/climpred/utils.py
index 22a2a9afe..6a40cbe5c 100644
--- a/climpred/utils.py
+++ b/climpred/utils.py
@@ -502,9 +502,7 @@ def _transpose_and_rechunk_to(new_chunk_ds, ori_chunk_ds):
     This is needed after some operations which reduce chunks to size 1.
     First transpose a to ds.dims then apply ds chunking to a.
     """
-    return new_chunk_ds.transpose(*ori_chunk_ds.dims).chunk(
-        ori_chunk_ds.chunks
-    )
+    return new_chunk_ds.transpose(*ori_chunk_ds.dims).chunk(ori_chunk_ds.chunks)
 
 
 def convert_Timedelta_to_lead_units(ds):
diff --git a/setup.cfg b/setup.cfg
index 3d428cd11..c1ace186a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -55,6 +55,7 @@ markers =
 
 [aliases]
 test = pytest
+# doctest = pytest --doctest-modules climpred --ignore climpred/tests
 
 [pydocstyle]
 

From dab8865d347a381eb50225fc376d1a3c7bb09db2 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sat, 11 Dec 2021 14:40:08 +0100
Subject: [PATCH 26/56] new doctests with crps

---
 .pre-commit-config.yaml            |   1 +
 climpred/classes.py                | 118 ++++++++++++-------------
 climpred/comparisons.py            |   6 +-
 climpred/metrics.py                |  87 ++++++++++---------
 docs/source/api.rst                |   4 +
 docs/source/comparisons.rst        | 135 +++++++++++++++++------------
 docs/source/contributors.rst       |   2 +-
 docs/source/metrics.rst            |  83 +++++++++---------
 docs/source/reference_forecast.rst |  47 +++++-----
 docs/source/scope.rst              |  33 +++----
 docs/source/terminology.rst        |  45 +++++++---
 docs/source/why-climpred.rst       |  16 ++--
 12 files changed, 320 insertions(+), 257 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d861ef7df..fe44a6422 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -56,6 +56,7 @@ repos:
       rev: 0.10.1
       hooks:
       -   id: doc8
+
   #        args: ["--ignore-path climpred/tests", "--max-line-length 93"]
       #- --max-line-length 93
       #- --ignore-path climpred/tests
diff --git a/climpred/classes.py b/climpred/classes.py
index 656a70554..2e01649dc 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -664,9 +664,9 @@ def _apply_xr_func(v, name, *args, **kwargs):
                 dim that's being called. E.g., ``.isel(lead=0)`` should only
                 be applied to the initialized dataset.
 
-                Reference:
-                  * https://stackoverflow.com/questions/1528237/
-                    how-to-handle-exceptions-in-a-list-comprehensions
+                References:
+                    * https://stackoverflow.com/questions/1528237/
+                      how-to-handle-exceptions-in-a-list-comprehensions
                 """
                 try:
                     return getattr(v, name)(*args, **kwargs)
@@ -1102,8 +1102,8 @@ def generate_uninitialized(self) -> "PerfectModelEnsemble":
         """Generate an uninitialized ensemble by resampling from the control simulation.
 
         Returns:
-            :py:class:`~climpred.classes.PerfectModelEnsemble` with resampled (uninitialized) ensemble from
-            control
+            ``uninitialzed`` resampled from ``control`` added
+            to:py:class:`~climpred.classes.PerfectModelEnsemble`
         """
         has_dataset(
             self._datasets["control"], "control", "generate an uninitialized ensemble."
@@ -1159,8 +1159,8 @@ def verify(
             **metric_kwargs: Arguments passed to ``metric``.
 
         Returns:
-            Dataset with dimension skill reduced by dim containing initialized and
-            reference skill(s) if specified.
+            ``initialized`` and ``reference`` forecast skill reduced by dimensions
+            ``dim``
 
         Example:
             Root mean square error (``rmse``) comparing every member with the
@@ -1187,13 +1187,13 @@ def verify(
                 reference:                     []
 
 
-            Pearson's Anomaly Correlation (``"acc"``) comparing every member to every
+            Continuous Ranked Probability Score (``"crps"``) comparing every member to every
             other member (``"m2m"``) reducing dimensions ``member`` and ``init`` while
             also calculating reference skill for the ``persistence``, ``climatology``
             and ``uninitialized`` forecast.
 
             >>> PerfectModelEnsemble.verify(
-            ...     metric="acc",
+            ...     metric="crps",
             ...     comparison="m2m",
             ...     dim=["init", "member"],
             ...     reference=["persistence", "climatology", "uninitialized"],
@@ -1204,16 +1204,16 @@ def verify(
               * lead     (lead) int64 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
               * skill    (skill) <U13 'initialized' 'persistence' ... 'uninitialized'
             Data variables:
-                tos      (skill, lead) float64 0.7941 0.7489 0.5623 ... 0.1327 0.4547 0.3253
+                tos      (skill, lead) float64 0.0621 0.07352 0.08678 ... 0.1188 0.09737
             Attributes:
-                prediction_skill_software:                         climpred https://clim...
-                skill_calculated_by_function:                      PerfectModelEnsemble....
+                prediction_skill_software:                         climpred https://climp...
+                skill_calculated_by_function:                      PerfectModelEnsemble.v...
                 number_of_initializations:                         12
                 number_of_members:                                 10
-                metric:                                            pearson_r
+                metric:                                            crps
                 comparison:                                        m2m
                 dim:                                               ['init', 'member']
-                reference:                                         ['persistence', 'clim...
+                reference:                                         ['persistence', 'clima...
                 PerfectModel_persistence_from_initialized_lead_0:  False
         """
         if groupby is not None:
@@ -1298,7 +1298,8 @@ def _compute_uninitialized(
             **metric_kwargs: Arguments passed to ``metric``.
 
         Returns:
-            Dataset with dimension skill containing initialized and reference skill(s).
+            ``initialized`` and ``reference`` forecast skill reduced by dimensions
+            ``dim``
         """
         has_dataset(
             self._datasets["uninitialized"],
@@ -1353,9 +1354,9 @@ def _compute_persistence(
             **metric_kwargs: Arguments passed to ``metric``.
 
         Returns:
-            Dataset of persistence forecast results.
+            persistence forecast skill.
 
-        Reference:
+        References:
             * Chapter 8 (Short-Term Climate Prediction) in
               Van den Dool, Huug. Empirical methods in short-term climate
               prediction. Oxford University Press, 2007.
@@ -1425,9 +1426,9 @@ def _compute_climatology(
             **metric_kwargs: Arguments passed to ``metric``.
 
         Returns:
-            Dataset of persistence forecast results.
+            climatology forecast skill
 
-        Reference:
+        References:
             * Chapter 8 (Short-Term Climate Prediction) in
               Van den Dool, Huug. Empirical methods in short-term climate
               prediction. Oxford University Press, 2007.
@@ -1505,7 +1506,7 @@ def bootstrap(
             **metric_kwargs: arguments passed to ``metric``.
 
         Returns:
-            xr.Datasets: with dimensions ``results`` (holding ``verify skill``, ``p``,
+            xr.Dataset with dimensions ``results`` (holding ``verify skill``, ``p``,
             ``low_ci`` and ``high_ci``) and ``skill`` (holding ``initialized``,
             ``persistence`` and/or ``uninitialized``):
                 * results='verify skill', skill='initialized':
@@ -1530,7 +1531,7 @@ def bootstrap(
             https://doi.org/10/f4jjvf.
 
         Example:
-            Calculate the Pearson's Anomaly Correlation (``"acc"``) comparing every
+            Continuous Ranked Probability Score (``"crps"``) comparing every
             member to every other member (``"m2m"``) reducing dimensions ``member`` and
             ``init`` 50 times after resampling ``member`` dimension with replacement.
             Also calculate reference skill for the ``"persistence"``, ``"climatology"``
@@ -1540,7 +1541,7 @@ def bootstrap(
             upper bound of the resample.
 
             >>> PerfectModelEnsemble.bootstrap(
-            ...     metric="acc",
+            ...     metric="crps",
             ...     comparison="m2m",
             ...     dim=["init", "member"],
             ...     iterations=50,
@@ -1554,16 +1555,16 @@ def bootstrap(
               * results  (results) <U12 'verify skill' 'p' 'low_ci' 'high_ci'
               * skill    (skill) <U13 'initialized' 'persistence' ... 'uninitialized'
             Data variables:
-                tos      (skill, results, lead) float64 0.7941 0.7489 ... 0.1494 0.1466
+                tos      (skill, results, lead) float64 0.0621 0.07352 ... 0.1607 0.1439
             Attributes: (12/13)
-                prediction_skill_software:                         climpred https://clim...
-                skill_calculated_by_function:                      PerfectModelEnsemble...
+                prediction_skill_software:                         climpred https://climp...
+                skill_calculated_by_function:                      PerfectModelEnsemble.b...
                 number_of_initializations:                         12
                 number_of_members:                                 10
-                metric:                                            pearson_r
+                metric:                                            crps
                 comparison:                                        m2m
-                ...
-                reference:                                         ['persistence', 'clim...
+                ...                                                ...
+                reference:                                         ['persistence', 'clima...
                 PerfectModel_persistence_from_initialized_lead_0:  False
                 resample_dim:                                      member
                 sig:                                               95
@@ -1760,7 +1761,7 @@ def get_observations(self) -> xr.Dataset:
         """Return xarray.Dataset of the observations/verification data.
 
         Returns:
-            observations.
+            observations
         """
         return self._datasets["observations"]
 
@@ -1773,7 +1774,8 @@ def generate_uninitialized(
             resample_dim: dimension to resample from. Must contain ``"init"``.
 
         Returns:
-            resampled uninitialized ensemble added to HindcastEnsemble
+            resampled ``uninitialized`` ensemble added to
+            :py:class:`~climpred.classes.HindcastEnsemble`
 
         Example:
             >>> HindcastEnsemble  # uninitialized from historical simulations
@@ -1964,11 +1966,11 @@ def verify(
             **metric_kwargs: arguments passed to ``metric``.
 
         Returns:
-            Dataset with dimension skill reduced by dim containing initialized and
-            reference skill(s) if specified.
+            ``initialized`` and ``reference`` forecast skill reduced by dimensions
+            ``dim``
 
         Example:
-            Root mean square error (``rmse``) comparing every member with the
+            Continuous Ranked Probability Score (``crps``) comparing every member with the
             verification (``m2o``) over the same verification time (``same_verifs``)
             for all leads reducing dimensions ``init`` and ``member``:
 
@@ -1996,14 +1998,14 @@ def verify(
                 dim:                           ['init', 'member']
                 reference:                     []
 
-            Pearson's Anomaly Correlation (``"acc"``) comparing the ensemble mean with
+            Root mean square error (``"rmse"``) comparing the ensemble mean with
             the verification (``"e2o"``) over the same initializations
             (``"same_inits"``) for all leads reducing dimension ``init`` while also
             calculating reference skill for the ``"persistence"``, ``"climatology"``
             and ``'uninitialized'`` forecast.
 
             >>> HindcastEnsemble.verify(
-            ...     metric="acc",
+            ...     metric="rmse",
             ...     comparison="e2o",
             ...     alignment="same_inits",
             ...     dim="init",
@@ -2015,14 +2017,14 @@ def verify(
               * lead     (lead) int32 1 2 3 4 5 6 7 8 9 10
               * skill    (skill) <U13 'initialized' 'persistence' ... 'uninitialized'
             Data variables:
-                SST      (skill, lead) float64 0.9023 0.8807 0.8955 ... 0.9078 0.9128 0.9159
+                SST      (skill, lead) float64 0.08135 0.08254 0.086 ... 0.07377 0.07409
             Attributes:
                 prediction_skill_software:     climpred https://climpred.readthedocs.io/
                 skill_calculated_by_function:  HindcastEnsemble.verify()
                 number_of_initializations:     64
                 number_of_members:             10
                 alignment:                     same_inits
-                metric:                        pearson_r
+                metric:                        rmse
                 comparison:                    e2o
                 dim:                           init
                 reference:                     ['persistence', 'climatology', 'uninitiali...
@@ -2241,7 +2243,7 @@ def bootstrap(
             **metric_kwargs: arguments passed to ``metric``.
 
         Returns:
-            xr.Datasets: with dimensions ``results`` (holding ``skill``, ``p``,
+            xr.Dataset with dimensions ``results`` (holding ``skill``, ``p``,
             ``low_ci`` and ``high_ci``) and ``skill`` (holding ``initialized``,
             ``persistence`` and/or ``uninitialized``):
                 * results='verify skill', skill='initialized':
@@ -2260,10 +2262,9 @@ def bootstrap(
                     bootstrapping with replacement.
 
         Example:
-            Calculate the Pearson's Anomaly Correlation (``"acc"``) comparing the
-            ensemble mean forecast to the verification (``"e2o"``) over the same
-            verification times (``"same_verifs"``) for all leads reducing dimensions
-            ``init`` 50 times after resampling ``member`` dimension with replacement.
+            Continuous Ranked Probability Score (``"crps"``) comparing every member forecast to the verification (``"m2o"``) over the same
+            initializations (``"same_inits"``) for all leads reducing dimensions
+            ``member`` 50 times after resampling ``member`` dimension with replacement. Note that dimension ``init`` remains.
             Also calculate reference skill for the ``"persistence"``, ``"climatology"``
             and ``"uninitialized"`` forecast and compare whether initialized skill is
             better than reference skill: Returns verify skill, probability that
@@ -2271,31 +2272,32 @@ def bootstrap(
             upper bound of the resample.
 
             >>> HindcastEnsemble.bootstrap(
-            ...     metric="acc",
-            ...     comparison="e2o",
-            ...     dim="init",
+            ...     metric="crps",
+            ...     comparison="m2o",
+            ...     dim="member",
             ...     iterations=50,
             ...     resample_dim="member",
-            ...     alignment="same_verifs",
+            ...     alignment="same_inits",
             ...     reference=["persistence", "climatology", "uninitialized"],
             ... )
             <xarray.Dataset>
-            Dimensions:  (skill: 4, results: 4, lead: 10)
+            Dimensions:     (skill: 4, results: 4, lead: 10, init: 51)
             Coordinates:
-              * lead     (lead) int32 1 2 3 4 5 6 7 8 9 10
-              * results  (results) <U12 'verify skill' 'p' 'low_ci' 'high_ci'
-              * skill    (skill) <U13 'initialized' 'persistence' ... 'uninitialized'
+              * init        (init) object 1955-01-01 00:00:00 ... 2005-01-01 00:00:00
+              * lead        (lead) int32 1 2 3 4 5 6 7 8 9 10
+                valid_time  (lead, init) object 1956-01-01 00:00:00 ... 2015-01-01 00:00:00
+              * results     (results) <U12 'verify skill' 'p' 'low_ci' 'high_ci'
+              * skill       (skill) <U13 'initialized' 'persistence' ... 'uninitialized'
             Data variables:
-                SST      (skill, results, lead) float64 0.9313 0.9119 ... 0.8078 0.8078
-            Attributes: (12/13)
+                SST         (skill, results, lead, init) float64 0.1202 0.01764 ... 0.1033
+            Attributes:
                 prediction_skill_software:     climpred https://climpred.readthedocs.io/
                 skill_calculated_by_function:  HindcastEnsemble.bootstrap()
-                number_of_initializations:     64
                 number_of_members:             10
-                alignment:                     same_verifs
-                metric:                        pearson_r
-                ...                            ...
-                dim:                           init
+                alignment:                     same_inits
+                metric:                        crps
+                comparison:                    m2o
+                dim:                           member
                 reference:                     ['persistence', 'climatology', 'uninitiali...
                 resample_dim:                  member
                 sig:                           95
@@ -2440,7 +2442,7 @@ def remove_bias(
                 or ``XBias_Correction``
 
         Returns:
-            HindcastEnsemble: bias removed HindcastEnsemble.
+            bias removed :py:class:`~climpred.classes.HindcastEnsemble`.
 
         Example:
 
diff --git a/climpred/comparisons.py b/climpred/comparisons.py
index 720d1bb1d..76c8a79c3 100644
--- a/climpred/comparisons.py
+++ b/climpred/comparisons.py
@@ -1,6 +1,6 @@
 """Comparisons: How to compare forecast with verification."""
 
-from typing import Callable, List, Optional, Tuple, Union
+from typing import Any, Callable, List, Optional, Tuple, Union
 
 import dask
 import numpy as np
@@ -17,9 +17,7 @@ class Comparison:
     def __init__(
         self,
         name: str,
-        function: Callable[
-            [xr.Dataset, xr.Dataset, Metric], Tuple[xr.Dataset, xr.Dataset]
-        ],
+        function: Callable[[Any, Any, Any], Tuple[xr.Dataset, xr.Dataset]],
         hindcast: bool,
         probabilistic: bool,
         long_name: Optional[str] = None,
diff --git a/climpred/metrics.py b/climpred/metrics.py
index 01e9994e1..277038c3c 100644
--- a/climpred/metrics.py
+++ b/climpred/metrics.py
@@ -362,7 +362,7 @@ def _pearson_r(
             metric:                        pearson_r
             comparison:                    e2o
             dim:                           ['init']
-            References:                     []
+            reference:                     []
     """
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", category=RuntimeWarning)
@@ -442,7 +442,7 @@ def _pearson_r_p_value(
             metric:                        pearson_r_p_value
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     # p value returns a runtime error when working with NaNs, such as on a climate
     # model grid. We can avoid this annoying output by specifically suppressing
@@ -540,7 +540,7 @@ def _effective_sample_size(
             metric:                        effective_sample_size
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", category=RuntimeWarning)
@@ -638,7 +638,7 @@ def _pearson_r_eff_p_value(
             metric:                        pearson_r_eff_p_value
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
 
     References:
         * Bretherton, Christopher S., et al. "The effective number of spatial degrees of
@@ -739,7 +739,7 @@ def _spearman_r(
             metric:                        spearman_r
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", category=RuntimeWarning)
@@ -819,7 +819,7 @@ def _spearman_r_p_value(
             metric:                        spearman_r_p_value
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     # p value returns a runtime error when working with NaNs, such as on a climate
     # model grid. We can avoid this annoying output by specifically suppressing
@@ -925,7 +925,7 @@ def _spearman_r_eff_p_value(
             metric:                        spearman_r_eff_p_value
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     # p value returns a runtime error when working with NaNs, such as on a climate
     # model grid. We can avoid this annoying output by specifically suppressing
@@ -1019,7 +1019,7 @@ def _mse(
             metric:                        mse
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     return mse(forecast, verif, dim=dim, **metric_kwargs)
 
@@ -1089,7 +1089,7 @@ def _spread(
             metric:                        spread
             comparison:                    m2o
             dim:                           ['member', 'init']
-            References:                     []
+            reference:                     []
     """
     return forecast.std(dim=dim, **metric_kwargs)
 
@@ -1161,7 +1161,7 @@ def _rmse(
             metric:                        rmse
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     return rmse(forecast, verif, dim=dim, **metric_kwargs)
 
@@ -1241,7 +1241,7 @@ def _mae(
             metric:                        mae
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     return mae(forecast, verif, dim=dim, **metric_kwargs)
 
@@ -1316,7 +1316,7 @@ def _median_absolute_error(
             metric:                        median_absolute_error
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     return median_absolute_error(forecast, verif, dim=dim, **metric_kwargs)
 
@@ -1414,7 +1414,7 @@ def _nmse(
             metric:                        nmse
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     if "comparison" in metric_kwargs:
         comparison = metric_kwargs.pop("comparison")
@@ -1524,7 +1524,7 @@ def _nmae(
             metric:                        nmae
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     if "comparison" in metric_kwargs:
         comparison = metric_kwargs.pop("comparison")
@@ -1642,7 +1642,7 @@ def _nrmse(
             metric:                        nrmse
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     if "comparison" in metric_kwargs:
         comparison = metric_kwargs.pop("comparison")
@@ -1761,7 +1761,7 @@ def _msess(
             metric:                        msess
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     if "comparison" in metric_kwargs:
         comparison = metric_kwargs.pop("comparison")
@@ -1845,7 +1845,7 @@ def _mape(
             metric:                        mape
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     return mape(forecast, verif, dim=dim, **metric_kwargs)
 
@@ -1917,7 +1917,7 @@ def _smape(
             metric:                        smape
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     return smape(forecast, verif, dim=dim, **metric_kwargs)
 
@@ -2017,7 +2017,7 @@ def _uacc(
             metric:                        uacc
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     messs_res = _msess(forecast, verif, dim=dim, **metric_kwargs)
     # Negative values are automatically turned into nans from xarray.
@@ -2098,7 +2098,7 @@ def _std_ratio(
             metric:                        std_ratio
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     return forecast.std(dim=dim, **metric_kwargs) / verif.std(dim=dim, **metric_kwargs)
 
@@ -2172,7 +2172,7 @@ def _unconditional_bias(
             metric:                        unconditional_bias
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
 
         Conditional bias is removed by
         :py:meth:`~climpred.classes.HindcastEnsemble.remove_bias`.
@@ -2200,7 +2200,7 @@ def _unconditional_bias(
             metric:                        unconditional_bias
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     return (forecast - verif).mean(dim=dim, **metric_kwargs)
 
@@ -2271,7 +2271,7 @@ def _mul_bias(
             metric:                        mul_bias
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     return (forecast / verif).mean(dim=dim, **metric_kwargs)
 
@@ -2348,7 +2348,7 @@ def _conditional_bias(
             metric:                        conditional_bias
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     acc = _pearson_r(forecast, verif, dim=dim, **metric_kwargs)
     return acc - _std_ratio(forecast, verif, dim=dim, **metric_kwargs)
@@ -2428,7 +2428,7 @@ def _bias_slope(
             metric:                        bias_slope
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     std_ratio = _std_ratio(forecast, verif, dim=dim, **metric_kwargs)
     acc = _pearson_r(forecast, verif, dim=dim, **metric_kwargs)
@@ -2519,7 +2519,7 @@ def _msess_murphy(
             metric:                        msess_murphy
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     acc = _pearson_r(forecast, verif, dim=dim, **metric_kwargs)
     conditional_bias = _conditional_bias(forecast, verif, dim=dim, **metric_kwargs)
@@ -2644,7 +2644,7 @@ def _brier_score(
             metric:                        brier_score
             comparison:                    m2o
             dim:                           ['member', 'init']
-            References:                     []
+            reference:                     []
             logical:                       Callable
 
         Option 2. Pre-process to generate a binary multi-member forecast and
@@ -2672,7 +2672,7 @@ def _brier_score(
             metric:                        brier_score
             comparison:                    m2o
             dim:                           ['member', 'init']
-            References:                     []
+            reference:                     []
 
         Option 3. Pre-process to generate a probability forecast and binary
         verification product. because ``member`` not present in ``hindcast`` anymore,
@@ -2699,7 +2699,7 @@ def _brier_score(
             metric:                        brier_score
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
     """
     forecast, verif, metric_kwargs, dim = _extract_and_apply_logical(
         forecast, verif, metric_kwargs, dim
@@ -2801,7 +2801,7 @@ def _threshold_brier_score(
             metric:                        threshold_brier_score
             comparison:                    m2o
             dim:                           member
-            References:                     []
+            reference:                     []
             threshold:                     0.2
 
         >>> # multiple thresholds averaging over init dimension
@@ -2829,7 +2829,7 @@ def _threshold_brier_score(
             metric:                        threshold_brier_score
             comparison:                    m2o
             dim:                           ['member', 'init']
-            References:                     []
+            reference:                     []
             threshold:                     [0.2, 0.3]
 
     """
@@ -2933,7 +2933,7 @@ def _crps(
             metric:                        crps
             comparison:                    m2o
             dim:                           member
-            References:                     []
+            reference:                     []
 
     """
     dim = _remove_member_from_dim_or_raise(dim)
@@ -3063,7 +3063,7 @@ def _crpss(
             metric:                        crpss
             comparison:                    m2o
             dim:                           member
-            References:                     []
+            reference:                     []
 
         >>> import scipy
         >>> PerfectModelEnsemble.isel(lead=[0, 1]).verify(
@@ -3206,7 +3206,7 @@ def _crpss_es(
             metric:                        crpss_es
             comparison:                    m2o
             dim:                           member
-            References:                     []
+            reference:                     []
 
     """
     if dim is None:
@@ -3457,7 +3457,7 @@ def _reliability(
             metric:                        reliability
             comparison:                    m2o
             dim:                           ['member', 'init']
-            References:                     []
+            reference:                     []
             logical:                       Callable
 
         Option 2. Pre-process to generate a binary forecast and verification product:
@@ -3486,7 +3486,7 @@ def _reliability(
             metric:                        reliability
             comparison:                    m2o
             dim:                           ['init', 'member']
-            References:                     []
+            reference:                     []
 
         Option 3. Pre-process to generate a probability forecast and binary
         verification product. because ``member`` not present in ``hindcast``, use
@@ -3515,7 +3515,7 @@ def _reliability(
             metric:                        reliability
             comparison:                    e2o
             dim:                           init
-            References:                     []
+            reference:                     []
 
     """
     if "logical" in metric_kwargs:
@@ -3590,7 +3590,7 @@ def _rank_histogram(
             metric:                        rank_histogram
             comparison:                    m2o
             dim:                           ['member', 'init']
-            References:                     []
+            reference:                     []
 
         >>> PerfectModelEnsemble.verify(
         ...     metric="rank_histogram", comparison="m2c", dim=["member", "init"]
@@ -3610,7 +3610,7 @@ def _rank_histogram(
             metric:                        rank_histogram
             comparison:                    m2c
             dim:                           ['member', 'init']
-            References:                     []
+            reference:                     []
 
     """
     dim = _remove_member_from_dim_or_raise(dim)
@@ -3693,7 +3693,7 @@ def _rps(
             metric:                        rps
             comparison:                    m2o
             dim:                           ['member', 'init']
-            References:                     []
+            reference:                     []
             category_edges:                [-0.5  0.   0.5  1. ]
 
 
@@ -3728,7 +3728,7 @@ def _rps(
             metric:                        rps
             comparison:                    m2c
             dim:                           ['member', 'init']
-            References:                     []
+            reference:                     []
             category_edges:                <xarray.Dataset>\nDimensions:        (cate...
 
         Provide ``category_edges`` as tuple for different category edges to categorize
@@ -3773,7 +3773,7 @@ def _rps(
             metric:                        rps
             comparison:                    m2o
             dim:                           ['member', 'init']
-            References:                     []
+            reference:                     []
             category_edges:                (<xarray.Dataset>\nDimensions:        (mon...
     """
     if "category_edges" in metric_kwargs:
@@ -3904,7 +3904,7 @@ def _contingency(forecast, verif, score="table", dim=None, **metric_kwargs):
             metric:                        contingency
             comparison:                    m2c
             dim:                           ['member', 'init']
-            References:                     []
+            reference:                     []
             score:                         hit_rate
             observation_category_edges:    [ 9.5 10.  10.5]
             forecast_category_edges:       [ 9.5 10.  10.5]
@@ -4055,6 +4055,7 @@ def _roc(
     positive=True,
     probabilistic=False,
     unit_power=0,
+    aliases=["Receiver Operating Characteristic", "receiver_operating_characteristic"],
 )
 
 
diff --git a/docs/source/api.rst b/docs/source/api.rst
index 7c2a861d3..6eab5a9f0 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -323,6 +323,8 @@ For a thorough look at our metrics library, please see the
     :toctree: api/
 
     Metric
+    Metric.__init__
+    Metric.__repr__
     _get_norm_factor
     _pearson_r
     _pearson_r_p_value
@@ -374,6 +376,8 @@ For a thorough look at our metrics library, please see the
     :toctree: api/
 
     Comparison
+    Comparison.__init__
+    Comparison.__repr__
     _e2o
     _m2o
     _m2m
diff --git a/docs/source/comparisons.rst b/docs/source/comparisons.rst
index a2831988b..9a4c57bac 100644
--- a/docs/source/comparisons.rst
+++ b/docs/source/comparisons.rst
@@ -3,30 +3,39 @@ Comparisons
 ***********
 
 Forecasts have to be verified against some product to evaluate their performance.
-However, when verifying against a product, there are many different ways one can compare
-the ensemble of forecasts. Here we cover the comparison options for both hindcast and
-perfect model ensembles. See `terminology <terminology.html>`__ for clarification on
-the differences between these two experimental setups.
-
-All high-level functions like :py:meth:`~climpred.classes.HindcastEnsemble.verify` and :py:meth:`~climpred.classes.HindcastEnsemble.bootstrap` (for both ``HindcastEnsemble`` and ``PerfectModelEnsemble`` objects) take a
-``comparison=''`` keyword to select the comparison style. See below for a detailed
+However, when verifying against a product, there are many different ways one can
+compare the ensemble of forecasts. Here, we cover the comparison options for both
+:py:class:`~climpred.classes.HindcastEnsemble` and
+:py:class:`~climpred.classes.PerfectModelEnsemble`.
+See `terminology <terminology.html>`__ for clarification on the differences between
+these two experimental setups.
+
+All high-level functions like :py:class:`~climpred.classes.HindcastEnsemble`
+:py:meth:`~climpred.classes.HindcastEnsemble.verify`,
+:py:class:`~climpred.classes.HindcastEnsemble`
+:py:meth:`~climpred.classes.HindcastEnsemble.bootstrap`,
+:py:class:`~climpred.classes.PerfectModelEnsemble`
+:py:meth:`~climpred.classes.PerfectModelEnsemble.verify` and
+:py:class:`~climpred.classes.PerfectModelEnsemble`
+:py:meth:`~climpred.classes.PerfectModelEnsemble.bootstrap` take a
+``comparison`` keyword to select the comparison style. See below for a detailed
 description on the differences between these comparisons.
 
 Hindcast Ensembles
 ##################
 
-In hindcast ensembles, the ensemble mean forecast (``comparison='e2o'``) is expected to
-perform better than individual ensemble members (``comparison='m2o'``) as the chaotic
-component of forecasts is expected to be suppressed by this averaging, while the memory
-of the system sustains. [Boer2016]_
+In :py:class:`~climpred.classes.HindcastEnsemble`, the ensemble mean forecast
+(``comparison="e2o"``) is expected to perform better than individual ensemble members
+(``comparison="m2o"``) as the chaotic component of forecasts is expected to be
+suppressed by this averaging, while the memory of the system sustains. [Boer2016]_
 
 .. currentmodule:: climpred.comparisons
 
-``keyword: 'e2o', 'e2r'``
+``keyword: "e2o", "e2r"``
 
 .. autosummary:: _e2o
 
-``keyword: 'm2o', 'm2r'``
+``keyword: "m2o", "m2r"``
 
 .. autosummary:: _m2o
 
@@ -34,26 +43,26 @@ of the system sustains. [Boer2016]_
 Perfect Model Ensembles
 #######################
 
-In perfect-model frameworks, there are many more ways of verifying forecasts.
-[Seferian2018]_ uses a comparison of all ensemble members against the
-control run (``comparison='m2c'``) and all ensemble members against all other ensemble
-members (``comparison='m2m'``). Furthermore, the ensemble mean forecast can be verified
-against one control member (``comparison='e2c'``) or all members (``comparison='m2e'``)
-as done in [Griffies1997]_.
+In :py:class:`~climpred.classes.PerfectModelEnsemble`, there are many more ways of
+verifying forecasts. [Seferian2018]_ uses a comparison of all ensemble members against
+the control run (``comparison="m2c"``) and all ensemble members against all other
+ensemble members (``comparison="m2m"``). Furthermore, the ensemble mean forecast can
+be verified against one control member (``comparison="e2c"``) or all members
+(``comparison="m2e"``) as done in Griffies1997_.
 
-``keyword: 'm2e'``
+``keyword: "m2e"``
 
 .. autosummary:: _m2e
 
-``keyword: 'm2c'``
+``keyword: "m2c"``
 
 .. autosummary:: _m2c
 
-``keyword: 'm2m'``
+``keyword: "m2m"``
 
 .. autosummary:: _m2m
 
-``keyword: 'e2c'``
+``keyword: "e2c"``
 
 .. autosummary:: _e2c
 
@@ -62,49 +71,55 @@ Normalization
 #############
 
 The goal of a normalized distance metric is to get a constant or comparable value of
-typically 1 (or 0 for metrics defined as 1 - metric) when the metric saturates and the
-predictability horizon is reached (see `metrics <metrics.html>`__).
+typically ``1`` (or ``0`` for metrics defined as ``1 - metric``) when the metric
+saturates and the predictability horizon is reached (see `metrics <metrics.html>`__).
 
-A factor is added in the normalized metric formula (see [Seferian2018]_) to accomodate
-different comparison styles. For example, ``nrmse`` gets smalled in comparison ``m2e``
-than ``m2m`` by design, since the ensembe mean is always closer to individual members
+A factor is added in the normalized metric formula [Seferian2018]_ to accomodate
+different comparison styles. For example, ``metric="nrmse"`` gets smaller in
+comparison ``"m2e"``.
+than ``"m2m"`` by design, since the ensembe mean is always closer to individual members
 than the ensemble members to each other. In turn, the normalization factor is ``2`` for
-comparisons ``m2c``, ``m2m``, and ``m2o``. It is 1 for ``m2e``, ``e2c``, and ``e2o``.
+comparisons ``"m2c"``, ``"m2m"``, and ``"m2o"``. It is 1 for ``"m2e"``, ``"e2c"``, and
+``"e2o"``.
 
 Interpretation of Results
 #########################
 
-When ``HindcastEnsemble`` skill is computed over all initializations ``dim='init'`` of the
-hindcast, the resulting skill is a mean forecast skill over all initializations.
+When :py:class:`~climpred.classes.HindcastEnsemble` skill is computed over all
+initializations ``dim="init"`` of the hindcast, the resulting skill is a mean forecast
+skill over all initializations.
 
-``PerfectModelEnsemble`` skill is computed over a supervector comprised of all
+:py:class:`~climpred.classes.PerfectModelEnsemble` skill is computed over a
+supervector comprised of all
 initializations and members, which allows the computation of the ACC-based skill
 [Bushuk2018]_, but also returns a mean forecast skill over all initializations.
 
-The supervector approach shown in [Bushuk2018]_ and just calculating a distance-based
-metric like ``rmse`` over the member dimension as in [Griffies1997]_ yield very similar
+The supervector approach shown in Bushuk2018_ and just calculating a distance-based
+metric like ``rmse`` over the member dimension as in Griffies1997_ yield very similar
 results.
 
 Compute over dimension
 ######################
 
 The argument ``dim`` defines over which dimension a metric is computed. We can
-apply a metric over all dimensions from the initialized dataset expect ``lead``.
+apply a metric over all dimensions from the ``initialized`` dataset expect ``lead``.
 The resulting skill is then
-reduced by this ``dim``. Therefore, applying a metric over ``dim='member'`` creates a
-skill for all initializations individually. This can show the initial conditions
-dependence of skill. Likewise when computing skill over ``'init'``, we get skill for
-each member. This ``dim`` argument is different from the ``comparison`` argument which
+reduced by this ``dim``. Therefore, applying a metric over ``dim="member"`` or
+``dim=[]`` creates a skill for all initializations individually.
+This can show the initial conditions dependence of skill.
+Likewise when computing skill over ``"init"``, we get skill for each member.
+This ``dim`` argument is different from the ``comparison`` argument which
 just specifies how ``forecast`` and ``observations`` are defined.
 
 However, this above logic applies to deterministic metrics. Probabilistic metrics need
-to be applied to the ``member`` dimension and ``comparison`` from [``'m2c'``, ``'m2m'``]
-in ``PerfectModelEnsemble`` :py:meth:`~climpred.classes.PerfectModelEnsemble.verify` and ``'m2o'`` comparison in ``HindcastEnsemble``
+to be applied to the ``member`` dimension and ``comparison`` from
+``["m2c", "m2m"]`` in :py:class:`~climpred.classes.PerfectModelEnsemble`
+:py:meth:`~climpred.classes.PerfectModelEnsemble.verify` and ``"m2o"`` comparison in
+:py:class:`~climpred.classes.HindcastEnsemble`
 :py:meth:`~climpred.classes.HindcastEnsemble.verify`.
 
-``dim`` should not contain
-``member`` when the comparison already computes ensemble means as in
-[``'e2o'``, ``'e2c'``].
+``dim`` should not contain ``member`` when the comparison already computes ensemble
+means as in ``["e2o", "e2c"]``.
 
 
 User-defined comparisons
@@ -127,9 +142,9 @@ comparison should also be used for probabilistic metrics, make sure that
       """Identical to m2e but median."""
       observations_list = []
       forecast_list = []
-      supervector_dim = 'member'
+      supervector_dim = "member"
       for m in ds.member.values:
-          forecast = _drop_members(ds, rmd_member=[m]).median('member')
+          forecast = _drop_members(ds, rmd_member=[m]).median("member")
           observations = ds.sel(member=m).squeeze()
           forecast_list.append(forecast)
           observations_list.append(observations)
@@ -143,16 +158,18 @@ Then initialize this comparison function with
 :py:class:`~climpred.comparisons.Comparison`::
 
   __my_m2median_comparison = Comparison(
-      name='m2me',
+      name="m2me",
       function=_my_m2median_comparison,
       probabilistic=False,
       hindcast=False)
 
 Finally, compute skill based on your own comparison::
 
-  skill = compute_perfect_model(ds, control,
-                                metric='rmse',
-                                comparison=__my_m2median_comparison)
+  PerfectModelEnsemble.verify(
+    metric="rmse",
+    comparison=__my_m2median_comparison,
+    dim=[],
+  )
 
 Once you come up with an useful comparison for your problem, consider contributing this
 comparison to ``climpred``, so all users can benefit from your comparison, see
@@ -162,10 +179,20 @@ comparison to ``climpred``, so all users can benefit from your comparison, see
 References
 ##########
 
-.. [Boer2016] Boer, G. J., D. M. Smith, C. Cassou, F. Doblas-Reyes, G. Danabasoglu, B. Kirtman, Y. Kushnir, et al. “The Decadal Climate Prediction Project (DCPP) Contribution to CMIP6.” Geosci. Model Dev. 9, no. 10 (October 25, 2016): 3751–77. https://doi.org/10/f89qdf.
+.. [Boer2016] Boer, G. J., D. M. Smith, C. Cassou, F. Doblas-Reyes, G. Danabasoglu,
+    B. Kirtman, Y. Kushnir, et al. “The Decadal Climate Prediction Project (DCPP)
+    Contribution to CMIP6.” Geosci. Model Dev. 9, no. 10 (October 25, 2016): 3751–77.
+    https://doi.org/10/f89qdf.
 
-.. [Bushuk2018] Mitchell Bushuk, Rym Msadek, Michael Winton, Gabriel Vecchi, Xiaosong Yang, Anthony Rosati, and Rich Gudgel. Regional Arctic sea–ice prediction: potential versus operational seasonal forecast skill. Climate Dynamics, June 2018. https://doi.org/10/gd7hfq.
+.. [Bushuk2018] Mitchell Bushuk, Rym Msadek, Michael Winton, Gabriel Vecchi,
+    Xiaosong Yang, Anthony Rosati, and Rich Gudgel. Regional Arctic sea–ice
+    prediction: potential versus operational seasonal forecast skill.
+    Climate Dynamics, June 2018. https://doi.org/10/gd7hfq.
 
-.. [Griffies1997] S. M. Griffies and K. Bryan. A predictability study of simulated North Atlantic multidecadal variability. Climate Dynamics, 13(7-8):459–487, August 1997. https://doi.org/10/ch4kc4.
+.. [Griffies1997] S. M. Griffies and K. Bryan. A predictability study of simulated
+    North Atlantic multidecadal variability. Climate Dynamics, 13(7-8):459–487,
+    August 1997. https://doi.org/10/ch4kc4.
 
-.. [Seferian2018] Roland Séférian, Sarah Berthet, and Matthieu Chevallier. Assessing the Decadal Predictability of Land and Ocean Carbon Uptake. Geophysical Research Letters, March 2018. https://doi.org/10/gdb424.
+.. [Seferian2018] Roland Séférian, Sarah Berthet, and Matthieu Chevallier. Assessing
+    the Decadal Predictability of Land and Ocean Carbon Uptake.
+    Geophysical Research Letters, March 2018. https://doi.org/10/gdb424.
diff --git a/docs/source/contributors.rst b/docs/source/contributors.rst
index c53e49b12..3d07fa0b2 100644
--- a/docs/source/contributors.rst
+++ b/docs/source/contributors.rst
@@ -18,8 +18,8 @@ Contributors
 
 Core Developers
 ===============
-* Riley X. Brady (`github <https://github.com/bradyrx/>`__)
 * Aaron Spring (`github <https://github.com/aaronspring/>`__)
+* Riley X. Brady (`github <https://github.com/bradyrx/>`__)
 
 Contributors
 ============
diff --git a/docs/source/metrics.rst b/docs/source/metrics.rst
index 7748fc486..082efcc0a 100644
--- a/docs/source/metrics.rst
+++ b/docs/source/metrics.rst
@@ -1,7 +1,6 @@
 .. currentmodule:: climpred.metrics
 
 .. ipython:: python
-    :suppress:
 
     from climpred.metrics import __ALL_METRICS__ as all_metrics
 
@@ -17,15 +16,19 @@
 Metrics
 #######
 
-All high-level functions like :py:meth:`~climpred.classes.HindcastEnsemble.verify`,
+All high-level functions like :py:class:`~climpred.classes.HindcastEnsemble`
+:py:meth:`~climpred.classes.HindcastEnsemble.verify`,
+:py:class:`~climpred.classes.HindcastEnsemble`
 :py:meth:`~climpred.classes.HindcastEnsemble.bootstrap`,
+:py:class:`~climpred.classes.PerfectModelEnsemble`
 :py:meth:`~climpred.classes.PerfectModelEnsemble.verify` and
+:py:class:`~climpred.classes.PerfectModelEnsemble`
 :py:meth:`~climpred.classes.PerfectModelEnsemble.bootstrap` have a ``metric`` argument
 that has to be called to determine which metric is used in computing predictability.
 
 .. note::
 
-    We use the phrase 'observations' ``o`` here to refer to the 'truth' data to which
+    We use the term 'observations' ``o`` here to refer to the 'truth' data to which
     we compare the forecast ``f``. These metrics can also be applied relative
     to a control simulation, reconstruction, observations, etc. This would just
     change the resulting score from quantifying skill to quantifying potential
@@ -68,7 +71,7 @@ Pearson Product-Moment Correlation Coefficient
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['pearson_r']}")
+    print(f"Keywords: {metric_aliases['pearson_r']}")
 
 .. autofunction:: _pearson_r
 
@@ -78,7 +81,7 @@ Pearson Correlation p value
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['pearson_r_p_value']}")
+    print(f"Keywords: {metric_aliases['pearson_r_p_value']}")
 
 .. autofunction:: _pearson_r_p_value
 
@@ -88,7 +91,7 @@ Effective Sample Size
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['effective_sample_size']}")
+    print(f"Keywords: {metric_aliases['effective_sample_size']}")
 
 .. autofunction:: _effective_sample_size
 
@@ -98,7 +101,7 @@ Pearson Correlation Effective p value
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['pearson_r_eff_p_value']}")
+    print(f"Keywords: {metric_aliases['pearson_r_eff_p_value']}")
 
 .. autofunction:: _pearson_r_eff_p_value
 
@@ -109,7 +112,7 @@ Spearman's Rank Correlation Coefficient
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['spearman_r']}")
+    print(f"Keywords: {metric_aliases['spearman_r']}")
 
 .. autofunction:: _spearman_r
 
@@ -120,7 +123,7 @@ Spearman's Rank Correlation Coefficient p value
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['spearman_r_p_value']}")
+    print(f"Keywords: {metric_aliases['spearman_r_p_value']}")
 
 .. autofunction:: _spearman_r_p_value
 
@@ -130,7 +133,7 @@ Spearman's Rank Correlation Effective p value
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['spearman_r_eff_p_value']}")
+    print(f"Keywords: {metric_aliases['spearman_r_eff_p_value']}")
 
 .. autofunction:: _spearman_r_eff_p_value
 
@@ -146,7 +149,7 @@ Mean Squared Error (MSE)
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['mse']}")
+    print(f"Keywords: {metric_aliases['mse']}")
 
 .. autofunction:: _mse
 
@@ -157,7 +160,7 @@ Root Mean Square Error (RMSE)
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['rmse']}")
+    print(f"Keywords: {metric_aliases['rmse']}")
 
 .. autofunction:: _rmse
 
@@ -168,7 +171,7 @@ Mean Absolute Error (MAE)
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['mae']}")
+    print(f"Keywords: {metric_aliases['mae']}")
 
 .. autofunction:: _mae
 
@@ -179,7 +182,7 @@ Median Absolute Error
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['median_absolute_error']}")
+    print(f"Keywords: {metric_aliases['median_absolute_error']}")
 
 .. autofunction:: _median_absolute_error
 
@@ -190,7 +193,7 @@ Spread
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['spread']}")
+    print(f"Keywords: {metric_aliases['spread']}")
 
 .. autofunction:: _spread
 
@@ -201,7 +204,7 @@ Multiplicative bias
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['mul_bias']}")
+    print(f"Keywords: {metric_aliases['mul_bias']}")
 
 .. autofunction:: _mul_bias
 
@@ -221,7 +224,7 @@ Normalized Mean Square Error (NMSE)
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['nmse']}")
+    print(f"Keywords: {metric_aliases['nmse']}")
 
 .. autofunction:: _nmse
 
@@ -232,7 +235,7 @@ Normalized Mean Absolute Error (NMAE)
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['nmae']}")
+    print(f"Keywords: {metric_aliases['nmae']}")
 
 .. autofunction:: _nmae
 
@@ -243,7 +246,7 @@ Normalized Root Mean Square Error (NRMSE)
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['nrmse']}")
+    print(f"Keywords: {metric_aliases['nrmse']}")
 
 .. autofunction:: _nrmse
 
@@ -254,7 +257,7 @@ Mean Square Error Skill Score (MSESS)
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['msess']}")
+    print(f"Keywords: {metric_aliases['msess']}")
 
 .. autofunction:: _msess
 
@@ -265,7 +268,7 @@ Mean Absolute Percentage Error (MAPE)
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['mape']}")
+    print(f"Keywords: {metric_aliases['mape']}")
 
 .. autofunction:: _mape
 
@@ -275,7 +278,7 @@ Symmetric Mean Absolute Percentage Error (sMAPE)
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['smape']}")
+    print(f"Keywords: {metric_aliases['smape']}")
 
 .. autofunction:: _smape
 
@@ -286,7 +289,7 @@ Unbiased Anomaly Correlation Coefficient (uACC)
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['uacc']}")
+    print(f"Keywords: {metric_aliases['uacc']}")
 
 .. autofunction:: _uacc
 
@@ -294,7 +297,7 @@ Unbiased Anomaly Correlation Coefficient (uACC)
 Murphy Decomposition Metrics
 ============================
 
-Metrics derived in [Murphy1988]_ which decompose the ``MSESS`` into a correlation term,
+Metrics derived in Murphy1988_ which decompose the ``MSESS`` into a correlation term,
 a conditional bias term, and an unconditional bias term. See
 https://www-miklip.dkrz.de/about/murcss/ for a walk through of the decomposition.
 
@@ -304,7 +307,7 @@ Standard Ratio
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['std_ratio']}")
+    print(f"Keywords: {metric_aliases['std_ratio']}")
 
 .. autofunction:: _std_ratio
 
@@ -314,7 +317,7 @@ Conditional Bias
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['conditional_bias']}")
+    print(f"Keywords: {metric_aliases['conditional_bias']}")
 
 .. autofunction:: _conditional_bias
 
@@ -324,7 +327,7 @@ Unconditional Bias
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['unconditional_bias']}")
+    print(f"Keywords: {metric_aliases['unconditional_bias']}")
 
 Simple bias of the forecast minus the observations.
 
@@ -336,7 +339,7 @@ Bias Slope
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['bias_slope']}")
+    print(f"Keywords: {metric_aliases['bias_slope']}")
 
 .. autofunction:: _bias_slope
 
@@ -346,7 +349,7 @@ Murphy's Mean Square Error Skill Score
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['msess_murphy']}")
+    print(f"Keywords: {metric_aliases['msess_murphy']}")
 
 .. autofunction:: _msess_murphy
 
@@ -364,7 +367,7 @@ Continuous Ranked Probability Score (CRPS)
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['crps']}")
+    print(f"Keywords: {metric_aliases['crps']}")
 
 .. autofunction:: _crps
 
@@ -374,7 +377,7 @@ Continuous Ranked Probability Skill Score (CRPSS)
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['crpss']}")
+    print(f"Keywords: {metric_aliases['crpss']}")
 
 .. autofunction:: _crpss
 
@@ -384,7 +387,7 @@ Continuous Ranked Probability Skill Score Ensemble Spread
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['crpss_es']}")
+    print(f"Keywords: {metric_aliases['crpss_es']}")
 
 .. autofunction:: _crpss_es
 
@@ -394,7 +397,7 @@ Brier Score
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['brier_score']}")
+    print(f"Keywords: {metric_aliases['brier_score']}")
 
 .. autofunction:: _brier_score
 
@@ -404,7 +407,7 @@ Threshold Brier Score
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['threshold_brier_score']}")
+    print(f"Keywords: {metric_aliases['threshold_brier_score']}")
 
 .. autofunction:: _threshold_brier_score
 
@@ -414,7 +417,7 @@ Ranked Probability Score
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['rps']}")
+    print(f"Keywords: {metric_aliases['rps']}")
 
 .. autofunction:: _rps
 
@@ -424,7 +427,7 @@ Reliability
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['reliability']}")
+    print(f"Keywords: {metric_aliases['reliability']}")
 
 .. autofunction:: _reliability
 
@@ -434,7 +437,7 @@ Discrimination
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['discrimination']}")
+    print(f"Keywords: {metric_aliases['discrimination']}")
 
 .. autofunction:: _discrimination
 
@@ -444,7 +447,7 @@ Rank Histogram
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['rank_histogram']}")
+    print(f"Keywords: {metric_aliases['rank_histogram']}")
 
 .. autofunction:: _rank_histogram
 
@@ -454,7 +457,7 @@ Logarithmic Ensemble Spread Score
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['less']}")
+    print(f"Keywords: {metric_aliases['less']}")
 
 .. autofunction:: _less
 
@@ -475,7 +478,7 @@ Receiver Operating Characteristic
 .. ipython:: python
 
     # Enter any of the below keywords in ``metric=...`` for the compute functions.
-    print(f"\n\nKeywords: {metric_aliases['roc']}")
+    print(f"Keywords: {metric_aliases['roc']}")
 
 .. autofunction:: _roc
 
diff --git a/docs/source/reference_forecast.rst b/docs/source/reference_forecast.rst
index 8f656ccae..01fc34bae 100644
--- a/docs/source/reference_forecast.rst
+++ b/docs/source/reference_forecast.rst
@@ -2,28 +2,28 @@
 Reference Forecasts
 *******************
 
-To quantify the quality of an initialized forecast, it is useful to judge it against some simple
-reference forecast. ``climpred`` currently supports a persistence forecast, but future releases
-will allow computation of other reference forecasts. Consider opening a
-`Pull Request <contributing.html>`_ to get it implemented more quickly.
+To quantify the quality of an initialized forecast, it is useful to judge it against
+some simple reference forecast. ``climpred`` currently supports a several reference
+forecasts, and we are open to adding other reference forecasts. Consider opening a
+`Pull Request <contributing.html>`_.
 
 **Persistence Forecast**: Whatever is observed at the time of initialization is forecasted to
 persist into the forecast period [Jolliffe2012]_. You can compute this by passing
-``reference='persistence'`` into the ``.verify()`` method for
+``reference='persistence'`` into the ``.verify()`` and ``.bootstrap()`` method for
 :py:class:`~climpred.classes.HindcastEnsemble` and
 :py:class:`~climpred.classes.PerfectModelEnsemble` objects.
 
-**Damped Persistence Forecast**: (*Not Implemented*) The amplitudes of the anomalies reduce in time
-exponentially at a time scale of the local autocorrelation [Yuan2016]_.
+**Damped Persistence Forecast**: (*Not Implemented*) The amplitudes of the anomalies
+reduce in time exponentially at a time scale of the local autocorrelation [Yuan2016]_.
 
 .. math::
 
     v_{dp}(t) = v(0)e^{-\alpha t}
 
-**Climatology**: (*Not Implemented*) The average values at the temporal forecast resolution
-(e.g., annual, monthly) over some long period, which is usually 30 years [Jolliffe2012]_. You can compute this by passing
-``reference='climatology'`` into the ``.verify()`` method for
-:py:class:`~climpred.classes.HindcastEnsemble` and
+**Climatology**: The average values at the temporal forecast resolution (e.g., annual,
+monthly, daily) over some long period, which is usually 30 years [Jolliffe2012]_.
+You can compute this by passing ``reference='climatology'`` into the ``.verify()`` and
+``.bootstrap()`` method for :py:class:`~climpred.classes.HindcastEnsemble` and
 :py:class:`~climpred.classes.PerfectModelEnsemble` objects.
 
 **Uninitialized**: *Uninitialized** ensembles are generated by perturbing initial
@@ -39,16 +39,17 @@ initialized prediction system (the CESM Decadal Prediction Large Ensemble). If t
 isn't available, one can approximate the unintiailized response by resampling a
 control simulation.
 
-**Random Mechanism**: (*Not Implemented*) A probability distribution is assigned to the possible
-range of the variable being forecasted, and a sequence of forecasts is produced by taking a sequence
-of independent values from that distribution [Jolliffe2012]_. This would be similar to computing an
-uninitialized forecast, using ``reference='uninitialized'`` in
-:py:class:`~climpred.classes.HindcastEnsemble` and
-:py:class:`~climpred.classes.PerfectModelEnsemble` objects. For ``HindcastEnsemble`` objects, an
-uninitialized ensemble has to be added through ``.add_uninitialized(...)``. This could be, for
-example, output from a Large Ensemble. For ``PerfectModelEnsemble`` objects, one can run
-``.generate_uninitialized()`` which uses a bootstrapping approach to create an uninitialized
-equivalent.
+**Random Mechanism**: (*Not Implemented*) A probability distribution is assigned to the
+possible range of the variable being forecasted, and a sequence of forecasts is
+produced by taking a sequence of independent values from that distribution
+[Jolliffe2012]_. This would be similar to computing an uninitialized forecast, using
+``reference='uninitialized'`` in :py:class:`~climpred.classes.HindcastEnsemble` and
+:py:class:`~climpred.classes.PerfectModelEnsemble` objects. For ``HindcastEnsemble``
+objects, an uninitialized ensemble has to be added through ``.add_uninitialized(...)``.
+This could be, for example, output from an uninitialized Large Ensemble.
+You may also run ``.generate_uninitialized()``, which resamples the ``initialized``
+from ``HindcastEnsemble`` or ``control`` from ``PerfectModelEnsemble`` to an
+uninitialized forecast.
 
 References
 ##########
@@ -56,5 +57,5 @@ References
 .. [Jolliffe2012] Jolliffe, Ian T., and David B. Stephenson, eds. Forecast verification:
    a practitioner's guide in atmospheric science. John Wiley & Sons, 2012.
 
-.. [Yuan2016] Yuan, Xiaojun, et al. "Arctic sea ice seasonal prediction by a linear Markov model."
-   Journal of Climate 29.22 (2016): 8151-8173.
+.. [Yuan2016] Yuan, Xiaojun, et al. "Arctic sea ice seasonal prediction by a linear
+   Markov model." Journal of Climate 29.22 (2016): 8151-8173.
diff --git a/docs/source/scope.rst b/docs/source/scope.rst
index 012677e59..5eeb1fb4c 100644
--- a/docs/source/scope.rst
+++ b/docs/source/scope.rst
@@ -1,20 +1,23 @@
 Scope of ``climpred``
 =====================
 
-``climpred`` aims to be the primary package used to analyze output from initialized dynamical
-forecast models, ranging from short-term weather forecasts to decadal climate forecasts. The code
-base will be driven entirely by the geoscientific prediction community through open source
-development. It leverages ``xarray`` to keep track of core prediction ensemble dimensions
-(e.g., ensemble member, initialization date, and lead time) and ``dask`` to perform out-of-memory
-computations on large datasets.
+``climpred`` aims to be the primary package used to analyze output from initialized
+dynamical forecast models, ranging from short-term weather forecasts to decadal climate
+forecasts. The code base is driven by the geoscientific prediction community through
+open source development. It leverages `xarray <http://xarray.pydata.org/en/stable/>`_
+to keep track of core prediction ensemble dimensions (e.g., ensemble member,
+initialization date, and lead time) and `dask <https://dask.org/>`_ to perform
+out-of-memory computations on large datasets.
 
-The primary goal of ``climpred`` is to offer a comprehensive set of analysis tools for assessing
-the forecasts relative to a validation product (e.g., observations, reanalysis products, control
-runs, baseline forecasts). This will range from simple deterministic and probabilistic verification
-metrics—such as mean absolute error and various skill scores—to more advanced analysis methods,
-such as relative entropy and mutual information. ``climpred`` expects users to handle their
-domain-specific post-processing of model output, so that the package can focus on the actual
-analysis of forecasts.
+The primary goal of ``climpred`` is to offer a comprehensive set of analysis tools for
+assessing the forecasts relative to a validation product (e.g., observations,
+reanalysis products, control simulations, baseline forecasts). This ranges from simple
+deterministic and probabilistic verification metrics — such as, e.g. mean absolute
+error or rank histogram — to more advanced analysis methods,
+such as contingency table-derived metrics. ``climpred`` expects users to handle their
+domain-specific post-processing of model output, so that the package can focus on the
+actual analysis of forecasts.
 
-Finally, the ``climpred`` documentation will serve as a repository of unified analysis methods
-through jupyter notebook examples, and will also collect relevant references and literature.
+Finally, the ``climpred`` documentation will serve as a repository of unified analysis
+methods through `jupyter <https://jupyter.org/>`_ notebook examples, and will also
+collect relevant references and literature.
diff --git a/docs/source/terminology.rst b/docs/source/terminology.rst
index ac27a340d..bcf44202c 100644
--- a/docs/source/terminology.rst
+++ b/docs/source/terminology.rst
@@ -104,16 +104,35 @@ volcanic eruptions [Meehl2013]_.
 References
 ##########
 
-.. [Griffies1997] Griffies, S. M., and K. Bryan. “A Predictability Study of Simulated North Atlantic Multidecadal Variability.” Climate Dynamics 13, no. 7–8 (1997): 459–87. https://doi.org/10/ch4kc4
-
-.. [Boer2016] Boer, G. J., Smith, D. M., Cassou, C., Doblas-Reyes, F., Danabasoglu, G., Kirtman, B., Kushnir, Y., Kimoto, M., Meehl, G. A., Msadek, R., Mueller, W. A., Taylor, K. E., Zwiers, F., Rixen, M., Ruprich-Robert, Y., and Eade, R.: The Decadal Climate Prediction Project (DCPP) contribution to CMIP6, Geosci. Model Dev., 9, 3751-3777, https://doi.org/10.5194/gmd-9-3751-2016, 2016.
-
-.. [Jolliffe2011] Ian T. Jolliffe and David B. Stephenson. Forecast Verification: A Practitioner’s Guide in Atmospheric Science. John Wiley & Sons, Ltd, Chichester, UK, 2011. ISBN 978-1-119-96000-3 978-0-470-66071-3. URL: http://doi.wiley.com/10.1002/9781119960003.
-
-.. [Meehl2013] Meehl, G. A., Goddard, L., Boer, G., Burgman, R., Branstator, G., Cassou, C., ... & Karspeck, A. (2014). Decadal climate prediction: an update from the trenches. Bulletin of the American Meteorological Society, 95(2), 243-267. https://doi.org/10.1175/BAMS-D-12-00241.1.
-
-.. [Murphy1985] Murphy, Allan H., and Daan, H. "Forecast evaluation." Probability, Statistics, and Decision Making in the Atmospheric Sciences, A. H. Murphy and R. W. Katz, Eds., Westview Press, 379-437.
-
-.. [Murphy1988] Murphy, Allan H. “Skill Scores Based on the Mean Square Error and Their Relationships to the Correlation Coefficient.” Monthly Weather Review 116, no. 12 (December 1, 1988): 2417–24. https://doi.org/10/fc7mxd.
-
-.. [Pegion2019] Pegion, K., T. Delsole, E. Becker, and T. Cicerone (2019). "Assessing the Fidelity of Predictability Estimates", Climate Dynamics, 53, 7251–7265 https://doi.org/10.1007/s00382-017-3903-7.
+.. [Griffies1997] Griffies, S. M., and K. Bryan. “A Predictability Study of Simulated
+    North Atlantic Multidecadal Variability.”
+    Climate Dynamics 13, no. 7–8 (1997): 459–87. https://doi.org/10/ch4kc4
+
+.. [Boer2016] Boer, G. J., Smith, D. M., Cassou, C., Doblas-Reyes, F.,
+    Danabasoglu, G., Kirtman, B., Kushnir, Y., Kimoto, M., Meehl, G. A., Msadek, R.,
+    Mueller, W. A., Taylor, K. E., Zwiers, F., Rixen, M., Ruprich-Robert, Y., and
+    Eade, R.: The Decadal Climate Prediction Project (DCPP) contribution to CMIP6,
+    Geosci. Model Dev., 9, 3751-3777, https://doi.org/10.5194/gmd-9-3751-2016, 2016.
+
+.. [Jolliffe2011] Ian T. Jolliffe and David B. Stephenson. "Forecast Verification:
+    A Practitioner’s Guide in Atmospheric Science.""
+    John Wiley & Sons, Ltd, Chichester, UK, 2011. ISBN 978-1-119-96000-3
+    978-0-470-66071-3. http://doi.wiley.com/10.1002/9781119960003.
+
+.. [Meehl2013] Meehl, G. A., Goddard, L., Boer, G., Burgman, R., Branstator, G.,
+    Cassou, C., ... & Karspeck, A. (2014).
+    Decadal climate prediction: an update from the trenches.
+    Bulletin of the American Meteorological Society, 95(2), 243-267.
+    https://doi.org/10.1175/BAMS-D-12-00241.1.
+
+.. [Murphy1985] Murphy, Allan H., and Daan, H. "Forecast evaluation. Probability,
+    Statistics, and Decision Making in the Atmospheric Sciences.",
+    A. H. Murphy and R. W. Katz, Eds., Westview Press, 379-437.
+
+.. [Murphy1988] Murphy, Allan H. “Skill Scores Based on the Mean Square Error and
+    Their Relationships to the Correlation Coefficient.” Monthly Weather Review 116,
+    no. 12 (December 1, 1988): 2417–24. https://doi.org/10/fc7mxd.
+
+.. [Pegion2019] Pegion, K., T. Delsole, E. Becker, and T. Cicerone (2019).
+    "Assessing the Fidelity of Predictability Estimates",
+    Climate Dynamics, 53, 7251–7265 https://doi.org/10.1007/s00382-017-3903-7.
diff --git a/docs/source/why-climpred.rst b/docs/source/why-climpred.rst
index b0f990a9b..e0bf10901 100644
--- a/docs/source/why-climpred.rst
+++ b/docs/source/why-climpred.rst
@@ -15,12 +15,16 @@ headache of bookkeeping for you. We offer
 objects that carry products to verify against (e.g., control runs,
 reconstructions, uninitialized ensembles) along with your decadal prediction output.
 
-
 When computing lead-dependent skill scores, ``climpred`` handles all of the
 lag-correlating for you, properly aligning the multiple time dimensions between
 the hindcast and  verification datasets. We offer a suite of vectorized
-`deterministic <metrics.html#deterministic>`__
-and `probabilistic <metrics.html#probabilistic>`__ metrics that can be applied to time
-series and grids. It's as easy as adding your decadal prediction output to an object and
-running a :py:meth:`~climpred.classes.HindcastEnsemble.verify` command:
-``HindcastEnsemble.verify(metric='rmse', comparison='e2o', dim='init', alignment='maximize')``.
+`deterministic <metrics.html#deterministic>`_
+and `probabilistic <metrics.html#probabilistic>`_ metrics that can be applied to time
+series and grids. It's as easy as concatenating your initialized prediction output into
+one xr.Dataset and running :py:meth:`~climpred.classes.HindcastEnsemble.verify` command:
+
+.. :: python
+
+    >>> HindcastEnsemble.verify(
+    ...     metric="rmse", comparison="e2o", dim="init", alignment="maximize"
+    ... )

From ffd8f167c633e2c013969623e844f13e253e2395 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sat, 11 Dec 2021 15:58:44 +0100
Subject: [PATCH 27/56] contrib

---
 docs/source/contributing.rst | 75 ++++++++++++++++++++----------------
 1 file changed, 42 insertions(+), 33 deletions(-)

diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst
index 45109a1b4..d895e12b8 100644
--- a/docs/source/contributing.rst
+++ b/docs/source/contributing.rst
@@ -3,8 +3,8 @@ Contribution Guide
 =====================
 
 Contributions are highly welcomed and appreciated.  Every little help counts,
-so do not hesitate! You can make a high impact on ``climpred`` just by using it and
-reporting `issues <https://github.com/pangeo-data/climpred/issues>`__.
+so do not hesitate! You can make a high impact on ``climpred`` just by using
+it and reporting `issues <https://github.com/pangeo-data/climpred/issues>`__.
 
 The following sections cover some general guidelines
 regarding development in ``climpred`` for maintainers and contributors.
@@ -26,12 +26,14 @@ Feel free to suggest improvements or changes in the workflow.
 Feature requests and feedback
 -----------------------------
 
-We are eager to hear about your requests for new features and any suggestions about the
-API, infrastructure, and so on. Feel free to submit these as
-`issues <https://github.com/pangeo-data/climpred/issues/new>`__ with the label "feature request."
+We are eager to hear about your requests for new features and any suggestions
+about the API, infrastructure, and so on. Feel free to submit these as
+`issues <https://github.com/pangeo-data/climpred/issues/new>`__ with the label
+"feature request."
 
-Please make sure to explain in detail how the feature should work and keep the scope as
-narrow as possible. This will make it easier to implement in small PRs.
+Please make sure to explain in detail how the feature should work and keep the
+scope as narrow as possible. This will make it easier to implement in small
+PRs.
 
 
 .. _reportbugs:
@@ -39,27 +41,29 @@ narrow as possible. This will make it easier to implement in small PRs.
 Report bugs
 -----------
 
-Report bugs for ``climpred`` in the `issue tracker <https://github.com/pangeo-data/climpred/issues>`_
-with the label "bug".
+Report bugs for ``climpred`` in the
+`issue tracker <https://github.com/pangeo-data/climpred/issues>`_ with the
+label "bug".
 
 If you are reporting a bug, please include:
 
-* Your operating system name and version.
 * Any details about your local setup that might be helpful in troubleshooting,
-  specifically the Python interpreter version, installed libraries, and ``climpred``
-  version.
+  specifically the Python interpreter version, installed libraries, and
+  ``climpred`` version.
 * Detailed steps to reproduce the bug.
 
 If you can write a demonstration test that currently fails but should pass,
-that is a very useful commit to make as well, even if you cannot fix the bug itself.
+that is a very useful commit to make as well, even if you cannot fix the bug
+itself.
 
 
 .. _fixbugs:
 
-Fix bugs
---------
+Bug Fix
+-------
 
-Look through the `GitHub issues for bugs <https://github.com/pangeo-data/climpred/labels/bug>`_.
+Look through the
+`GitHub issues for bugs <https://github.com/pangeo-data/climpred/labels/bug>`_.
 
 Talk to developers to find out how you can fix specific bugs.
 
@@ -70,17 +74,17 @@ Write documentation
 ``climpred`` could always use more documentation.  What exactly is needed?
 
 * More complementary documentation.  Have you perhaps found something unclear?
-* Docstrings. There can never be too many of them.
-* Example notebooks with different Earth System Models, lead times, etc. -- they're all very
-  appreciated.
+* Example notebooks with different Earth System Models, lead times, etc. --
+  they're all very appreciated.
 
 You can also edit documentation files directly in the GitHub web interface,
 without using a local copy.  This can be convenient for small fixes.
 
-Our documentation is written in reStructuredText. You can follow our conventions in already written
-documents. Some helpful guides are located
-`here <http://docutils.sourceforge.net/docs/user/rst/quickref.html>`__ and
-`here <https://github.com/ralsina/rst-cheatsheet/blob/master/rst-cheatsheet.rst>`__.
+Our documentation is written in reStructuredText. You can follow our
+conventions in already written documents. Some helpful guides are located
+`rst-quickref <http://docutils.sourceforge.net/docs/user/rst/quickref.html>`__
+and
+`rst-cheatsheet <https://github.com/ralsina/rst-cheatsheet/blob/master/rst-cheatsheet.rst>`__.
 
 .. note::
     Build the documentation locally with the following command:
@@ -93,8 +97,9 @@ documents. Some helpful guides are located
 
     The built documentation should be available in the ``docs/build/``.
 
-If you need to add new functions to the API, run ``sphinx-autogen -o api api.rst`` from the
-``docs/source`` directory and add the functions to ``api.rst``.
+If you need to add new functions to the API, run
+``sphinx-autogen -o api api.rst`` from the ``docs/source`` directory after
+adding functions to ``api.rst``.
 
  .. _`pull requests`:
  .. _pull-requests:
@@ -103,10 +108,11 @@ Preparing Pull Requests
 -----------------------
 
 #. Fork the `climpred GitHub repository <https://github.com/pangeo-data/climpred>`__.
-   It's fine to use ``climpred`` as your fork repository name because it will live
-   under your user.
+   It's fine to use ``climpred`` as your fork repository name because it will
+   live under your user.
 
-#. Clone your fork locally using `git <https://git-scm.com/>`_, connect your repository to the upstream (main project), and create a branch::
+#. Clone your fork locally using `git <https://git-scm.com/>`_, connect your
+   repository to the upstream (main project), and create a branch::
 
     $ git clone git@github.com:YOUR_GITHUB_USERNAME/climpred.git
     $ cd climpred
@@ -119,22 +125,25 @@ Preparing Pull Requests
    If you need some help with Git, follow this quick start
    `guide <https://git.wiki.kernel.org/index.php/QuickStart>`_.
 
-#. Install dependencies into a new `conda <https://conda.io/projects/conda/en/latest/user-guide/getting-started.html>`_ environment::
+#. Install dependencies into a new
+   `conda <https://conda.io/projects/conda/en/latest/user-guide/getting-started.html>`_
+   environment::
 
-    $ conda env update -f ci/requirements/climpred-dev.yml
+    $ conda env create -f ci/requirements/climpred-dev.yml
     $ conda activate climpred-dev
 
 #. Make an editable install of ``climpred`` by running::
 
     $ pip install -e .
 
-#. Install `pre-commit <https://pre-commit.com>`_ and its hook on the ``climpred`` repo::
+#. Install `pre-commit <https://pre-commit.com>`_ and its hook on the
+   ``climpred`` repo::
 
      $ pip install --user pre-commit
      $ pre-commit install
 
-   pre-commit automatically beautifies the code, makes it more maintainable and catches syntax errors.
-   Afterwards ``pre-commit`` will run whenever you commit.
+   ``pre-commit`` automatically beautifies the code, makes it more
+   maintainable and catches syntax errors. Afterwards ``pre-commit`` will run whenever you commit.
 
    https://pre-commit.com/ is a framework for managing and maintaining multi-language pre-commit
    hooks to ensure code-style and code formatting is consistent.

From af102840e85d21d33f18176cbff3fd6be8c145b3 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sat, 11 Dec 2021 16:11:06 +0100
Subject: [PATCH 28/56] contrib

---
 docs/source/contributing.rst | 87 +++++++++++++++++++++---------------
 1 file changed, 52 insertions(+), 35 deletions(-)

diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst
index d895e12b8..d09b2d638 100644
--- a/docs/source/contributing.rst
+++ b/docs/source/contributing.rst
@@ -143,22 +143,32 @@ Preparing Pull Requests
      $ pre-commit install
 
    ``pre-commit`` automatically beautifies the code, makes it more
-   maintainable and catches syntax errors. Afterwards ``pre-commit`` will run whenever you commit.
-
-   https://pre-commit.com/ is a framework for managing and maintaining multi-language pre-commit
-   hooks to ensure code-style and code formatting is consistent.
+   maintainable and catches syntax errors. Afterwards ``pre-commit`` will run
+   whenever you commit.
 
    Now you have an environment called ``climpred-dev`` that you can work in.
    You’ll need to make sure to activate that environment next time you want
    to use it after closing the terminal or your system.
 
-   You can now edit your local working copy and run/add tests as necessary. Please try
-   to follow PEP-8 for naming. When committing, ``pre-commit`` will modify the files as
-   needed, or will generally be quite clear about what you need to do to pass the
-   commit test.
+   You can now edit your local working copy and run/add tests as necessary.
+   Please try to follow
+   `PEP-8 <https://www.python.org/dev/peps/pep-0008/#naming-conventions>`_ for
+   naming. When committing, ``pre-commit`` will modify the files as
+   needed, or will generally be quite clear about what you need to do to pass
+   the commit test.
+
+   ``pre-commit`` also runs:
+    * `mypy <http://mypy-lang.org/>`_ for static type checking on
+      `type hints <https://docs.python.org/3/library/typing.html>`_.
+    * `doc8 <https://github.com/PyCQA/doc8>`_ for ``.rst`` files
+    * `isort <https://pycqa.github.io/isort/>`_ sorting imports
+    * `black <https://black.readthedocs.io/en/stable/>`_ code formatting
+    * `flake8 <https://flake8.pycqa.org/en/latest/>`
+    * `pydocstyle <https://github.com/pycqa/pydocstyle>`_ docstring style
+      checker
+    * `blackdoc <https://blackdoc.readthedocs.io/en/latest/>` docstring code
+      formatter
 
-   ``pre-commit`` also runs `mypy <http://mypy-lang.org/>`_ for static type checking on
-   `type hints <https://docs.python.org/3/library/typing.html>`_.
 
 #. Break your edits up into reasonably sized commits::
 
@@ -173,27 +183,33 @@ Preparing Pull Requests
 
     $ pytest climpred
 
-   Check that `doctests <https://docs.pytest.org/en/stable/doctest.html>`_ are passing::
+   Check that `doctests <https://docs.pytest.org/en/stable/doctest.html>`_ are
+   passing::
 
     $ pytest --doctest-modules climpred --ignore climpred/tests
 
-   Check that your contribution is covered by tests and therefore increases the overall test coverage::
+   Check that your contribution is covered by tests and therefore increases
+   the overall test coverage::
 
     $ coverage run --source climpred -m py.test
     $ coverage report
     $ coveralls
 
-   Please stick to `xarray <http://xarray.pydata.org/en/stable/contributing.html>`_'s testing recommendations.
+   Please stick to
+   `xarray <http://xarray.pydata.org/en/stable/contributing.html>`_'s testing
+   recommendations.
 
 #. Running the performance test suite
 
-   If you considerably changed to core of code of ``climpred``, it is worth considering
-   whether your code has introduced performance regressions. ``climpred`` has a suite of
-   benchmarking tests using `asv <https://asv.readthedocs.io/en/stable/>`_
-   to enable easy monitoring of the performance of critical ``climpred`` operations.
-   These benchmarks are all found in the ``asv_bench`` directory.
+   If you considerably changed to core of code of ``climpred``, it is worth
+   considering whether your code has introduced performance regressions.
+   ``climpred`` has a suite of benchmarking tests using
+   `asv <https://asv.readthedocs.io/en/stable/>`_
+   to enable easy monitoring of the performance of critical ``climpred``
+   operations. These benchmarks are all found in the ``asv_bench`` directory.
 
-   If you need to run a benchmark, change your directory to ``asv_bench/`` and run::
+   If you need to run a benchmark, change your directory to ``asv_bench/`` and
+   run::
 
       $ asv continuous -f 1.1 upstream/main HEAD
 
@@ -202,11 +218,12 @@ Preparing Pull Requests
    The command uses ``conda`` by default for creating the benchmark
    environments.
 
-   Running the full benchmark suite can take up to half an hour and use up a few GBs of
-   RAM. Usually it is sufficient to paste only a subset of the results into the pull
-   request to show that the committed changes do not cause unexpected performance
-   regressions.  You can run specific benchmarks using the ``-b`` flag, which
-   takes a regular expression.  For example, this will only run tests from a
+   Running the full benchmark suite can take up to half an hour and use up a
+   few GBs of RAM. Usually it is sufficient to paste only a subset of the
+   results into the pull request to show that the committed changes do not
+   cause unexpected performance regressions.  You can run specific benchmarks
+   using the ``-b`` flag, which takes a regular expression.  For example, this
+   will only run tests from a
    ``asv_bench/benchmarks/benchmarks_perfect_model.py`` file::
 
       $ asv continuous -f 1.1 upstream/main HEAD -b ^benchmarks_perfect_model
@@ -216,21 +233,21 @@ Preparing Pull Requests
 
       $ asv continuous -f 1.1 upstream/main HEAD -b benchmarks_perfect_model.Compute.time_bootstrap_perfect_model
 
-   will only run the ``time_bootstrap_perfect_model`` benchmark of class ``Compute``
-   defined in ``benchmarks_perfect_model.py``.
+   will only run the ``time_bootstrap_perfect_model`` benchmark of class
+   ``Compute`` defined in ``benchmarks_perfect_model.py``.
 
 #. Create a new changelog entry in ``CHANGELOG.rst``:
 
-   - The entry should be entered as:
+   The entry should be entered as:
 
    ``<description>`` (``:pr:`#<pull request number>```) ```<author's names>`_``
 
-   where ``<description>`` is the description of the PR related to the change and
-   ``<pull request number>`` is the pull request number and ``<author's names>`` are your first
-   and last names.
+   where ``<description>`` is the description of the PR related to the change
+   and ``<pull request number>`` is the pull request number and
+   ``<author's names>`` are your first and last names.
 
-   - Add yourself to list of authors at the end of ``CHANGELOG.rst`` file if not there yet, in
-   alphabetical order.
+   Add yourself to list of authors at the end of ``CHANGELOG.rst`` file if
+   not there yet, in alphabetical order.
 
 #. Add yourself to the `contributors <https://climpred.readthedocs.io/en/latest/contributors.html>`_ list via ``docs/source/contributors.rst``.
 
@@ -242,6 +259,6 @@ Preparing Pull Requests
     base-fork: pangeo-data/climpred
     base: main
 
-Note that you can create the `Pull Request <https://docs.github.com/en/github/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests>`_ while you're working on this. The PR will update
-as you add more commits. ``climpred`` developers and contributors can then review your code
-and offer suggestions.
+Note that you can create the ``Pull Request`` while you're working on this.
+The PR will update as you add more commits. ``climpred`` developers and
+contributors can then review your code and offer suggestions.

From 59d51133d44cd39dbcf38906e96708879dcb1fb8 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sat, 11 Dec 2021 17:19:11 +0100
Subject: [PATCH 29/56] settingupdata

---
 docs/source/setting-up-data.rst | 70 ++++++++++++++++++++-------------
 1 file changed, 43 insertions(+), 27 deletions(-)

diff --git a/docs/source/setting-up-data.rst b/docs/source/setting-up-data.rst
index f3227bdd3..e6acbefa0 100644
--- a/docs/source/setting-up-data.rst
+++ b/docs/source/setting-up-data.rst
@@ -2,43 +2,59 @@
 Setting Up Your Dataset
 ***********************
 
-``climpred`` relies on a consistent naming system for ``xarray`` dimensions.
+``climpred`` relies on a consistent naming system for
+`xarray <https://xarray.pydata.org/en/stable/>`_ dimensions.
 This allows things to run more easily under-the-hood.
 
 **Prediction ensembles** are expected at the minimum to contain dimensions
-``init`` and ``lead``. ``init`` is the initialization dimension, that relays the time
-steps at which the ensemble was initialized. ``init`` must be of type
-``pd.DatetimeIndex``, or ``xr.cftimeIndex``. If ``init`` is of type ``int``, it is assumed to
-be annual data. A user warning is issues when this assumption is made.
+``init`` and ``lead``.
 
-``lead`` is the lead time of the forecasts from initialization. The units for the ``lead``
-dimension must be specified in as an attribute. Valid options are
-``years, seasons, months, weeks, pentads, days, hours, minutes, seconds``.
-If ``lead`` is provided as ``pd.Timedelta`` up to weeks, ``lead`` is converted to
-``int`` and a corresponding ``lead.attrs['units']``. For larger ``lead`` as
-``pd.Timedelta`` (months or years), there is no conversion possible.
+``init`` is the initialization dimension, that relays the time
+steps at which the ensemble was initialized.
+``init`` is known as ``forecast_reference_time`` in the `CF convention <http://cfconventions.org/Data/cf-standard-names/77/build/cf-standard-name-table.html>`_.
+``init`` must be of type :py:class:`~pandas.DatetimeIndex`, or
+:py:class:`~xarray.CFTimeIndex`.
+If ``init`` is of type ``int``, it is assumed to be annual data starting Jan 1st.
+A UserWarning is issues when this assumption is made.
 
-``valid_time=init+lead`` will be calculated in ``PredictionEnsemble`` upon instantiation.
+``lead`` is the lead time of the forecasts from initialization.
+``lead`` is known as ``forecast_period`` in the `CF convention <http://cfconventions.org/Data/cf-standard-names/77/build/cf-standard-name-table.html>`_.
+``lead`` must be ``int`` or ``float``.
+The units for the ``lead`` dimension must be specified in as an attribute.
+Valid options are ``["years", "seasons", "months"]`` and
+["weeks", "pentads", "days", "hours", "minutes", "seconds"]``.
+If ``lead`` is provided as :py:class:`~pandas.Timedelta` up to ``"weeks"``, ``lead``
+is converted to ``int`` and a corresponding ``lead.attrs["units"]``.
+For larger ``lead`` as :py:class:`~pandas.Timedelta` ("months", "seasons" or "years"),
+no conversion is possible.
+
+``valid_time=init+lead`` will be calculated in
+:py:class:`~climpred.classes.PredictionEnsemble` upon instantiation.
+
+Another crucial dimension is ``member``, which holds the various ensemble members,
+which is only required for probabilistic metrics. ``member`` is known as
+``realization`` in the `CF convention <http://cfconventions.org/Data/cf-standard-names/77/build/cf-standard-name-table.html>`_
 
-Another crucial dimension is ``member``, which holds the various ensemble members.
 Any additional dimensions will
-be passed through ``climpred`` without issue: these could be things like ``lat``,
-``lon``, ``depth``, etc.
+be broadcasted: these could be dimensions like ``lat``, ``lon``, ``depth``, etc.
 
-If the expected dimensions are not found, but the matching ``standard_name`` in a
-coordinate attribute, the dimension is renamed to the corresponding ``climpred``
-ensemble dimension.
+If the expected dimensions are not found, but the matching `CF convention <http://cfconventions.org/Data/cf-standard-names/77/build/cf-standard-name-table.html>`_
+``standard_name`` in a coordinate attribute, the dimension is renamed to the
+corresponding ``climpred`` ensemble dimension.
 
 Check out the demo to setup a ``climpred``-ready prediction ensemble
-`from your own data <examples/misc/setup_your_own_data.html>`_ or via `intake-esm <https://intake-esm.readthedocs.io/>`_ from `CMIP DCPP <examples/misc/setup_your_own_data.html#intake-esm-for-cmorized-output>`_.
+`from your own data <examples/misc/setup_your_own_data.html>`_ or via
+`intake-esm <https://intake-esm.readthedocs.io/>`_ from `CMIP DCPP <examples/misc/setup_your_own_data.html#intake-esm-for-cmorized-output>`_.
 
 **Verification products** are expected to contain the ``time`` dimension at the minimum.
 For best use of ``climpred``, their ``time`` dimension should cover the full length of
-``init`` and be the same calendar type as the accompanying prediction ensemble, if possible. The ``time`` dimension
-must be of type ``pd.DatetimeIndex`` or ``xr.cftimeIndex``. ``time`` dimension
-of type ``int`` is assumed to be annual data.  A user warning is issued when this assumption
-is made. These products can also include additional dimensions, such as ``lat``,
-``lon``, ``depth``, etc.
+``init`` and be the same calendar type as the accompanying prediction ensemble.
+The ``time`` dimension must be :py:class:`~pandas.DatetimeIndex`, or
+:py:class:`~xarray.CFTimeIndex`.
+``time`` dimension of type ``int`` is assumed to be annual data starting Jan 1st.
+A UserWarning is issued when this assumption is made.
+These products can also include additional dimensions, such as ``lat``, ``lon``,
+``depth``, etc.
 
 See the below table for a summary of dimensions used in ``climpred``, and data types
 that ``climpred`` supports for them.
@@ -53,12 +69,12 @@ that ``climpred`` supports for them.
      - `CF convention <http://cfconventions.org/Data/cf-standard-names/77/build/cf-standard-name-table.html>`_
      - Attribute(s)
    * - ``lead``
-     - ``int``, ``float`` or ``pd.Timedelta`` up to weeks
+     - ``int``, ``float`` or :py:class:`~pandas.Timedelta` up to "weeks"
      - lead timestep after initialization ``init``
      - ``forecast_period``
-     - units (str) [years, seasons, months, weeks, pentads, days, hours, minutes, seconds] if not ``pd.Timedelta``
+     - units (str) [years, seasons, months, weeks, pentads, days, hours, minutes, seconds] if not :py:class:`~pandas.Timedelta`
    * - ``init``
-     - ``pd.DatetimeIndex``, ``xr.CFTimeIndex``
+     -  :py:class:`~pandas.DatetimeIndex` or :py:class:`~xarray.CFTimeIndex`.
      - initialization as start date of experiment
      - ``forecast_reference_time``
      - None

From 1c668212ccaa318db8dfe5e7e31d1aff9c3e8cc3 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sat, 11 Dec 2021 17:39:01 +0100
Subject: [PATCH 30/56] init datasets

---
 .pre-commit-config.yaml              |  2 +-
 docs/source/initialized-datasets.rst | 58 ++++++++++++++++++++--------
 2 files changed, 43 insertions(+), 17 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index fe44a6422..5bcbf7a62 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -56,7 +56,7 @@ repos:
       rev: 0.10.1
       hooks:
       -   id: doc8
-
+          #args: ["--ignore D000"]
   #        args: ["--ignore-path climpred/tests", "--max-line-length 93"]
       #- --max-line-length 93
       #- --ignore-path climpred/tests
diff --git a/docs/source/initialized-datasets.rst b/docs/source/initialized-datasets.rst
index 5b775a6bb..a7cecbaae 100644
--- a/docs/source/initialized-datasets.rst
+++ b/docs/source/initialized-datasets.rst
@@ -2,9 +2,13 @@
 Initialized Datasets
 ********************
 
-Probably the hardest part in working with ``climpred`` is getting the initialized datasets complying to the expectations and data model of ``climpred``. For names, data types and conventions of ``xr.Dataset`` dimensions and coordinates, please refer to `Setting up your Dataset <setting-up-data.html>`_.
+Probably the hardest part in working with ``climpred`` is getting the ``initialized``
+dataset complying to the expectations and data model of ``climpred``.
+For names, data types and conventions of :py:class:`~xarray.Dataset` dimensions and
+coordinates, please refer to `Setting up your Dataset <setting-up-data.html>`_.
 
-Here, we list publicly available initialized datasets and corresponding climpred examples:
+Here, we list publicly available initialized datasets and corresponding ``climpred``
+examples:
 
 .. list-table:: List of initialized Datasets
    :widths: 25 15 40 40 25 25
@@ -18,37 +22,37 @@ Here, we list publicly available initialized datasets and corresponding climpred
      - Example
    * - DCPP
      - decadal
-     - Decadal Climate Prediction Project (DCPP) contribution to CMIP6 `Website <https://www.wcrp-climate.org/dcp-overview>`_
+     - `Decadal Climate Prediction Project (DCPP) contribution to CMIP6 <https://www.wcrp-climate.org/dcp-overview>`_
      - `ESGF <https://esgf-data.dkrz.de/search/cmip6-dkrz/>`_, `pangeo <https://pangeo-data.github.io/pangeo-cmip6-cloud/accessing_data.html#loading-an-esm-collection>`_
      - [Boer2016]_
      - `with intake-esm <examples/misc/setup_your_own_data.html#intake-esm-for-cmorized-output>`_, `Anderson <https://github.com/andersy005>`_ at NOAA's 45th CDP Workshop: `slides <https://talks.andersonbanihirwe.dev/climpred-cdpw-2020.html>`_, `Notebook <https://nbviewer.jupyter.org/github/andersy005/talks/blob/gh-pages/notebooks/climpred-demo.ipynb>`_
    * - CESM-DPLE
      - decadal
-     - Decadal Prediction Large Ensemble Project `Website <http://www.cesm.ucar.edu/projects/community-projects/DPLE/>`_
+     - `Decadal Prediction Large Ensemble Project <http://www.cesm.ucar.edu/projects/community-projects/DPLE/>`_
      - `Data <https://www.earthsystemgrid.org/dataset/ucar.cgd.ccsm4.CESM1-CAM5-DP.html>`_
      - [Yeager2018]_
      - many standard climpred `examples <quick-start.html>`_
    * - NMME
      - seasonal
-     - The North American Multimodel Ensemble: Phase-1 Seasonal-to-Interannual Prediction `Website <https://www.cpc.ncep.noaa.gov/products/NMME/>`_
+     - `The North American Multimodel Ensemble: Phase-1 Seasonal-to-Interannual Prediction <https://www.cpc.ncep.noaa.gov/products/NMME/>`_
      - `IRIDL <http://iridl.ldeo.columbia.edu/SOURCES/.Models/.NMME/>`_
      - [Kirtman2014]_
      - `seasonal SubX <examples.html#monthly-and-seasonal>`_
    * - SubX
      - subseasonal
-     - A Multimodel Subseasonal Prediction Experiment `Website <http://cola.gmu.edu/subx/>`_
-     - `IRIDL <http://iridl.ldeo.columbia.edu/SOURCES/.Models/.SubX/>`_
+     - `A Multimodel Subseasonal Prediction Experiment <http://cola.gmu.edu/subx/>`_
+     - `IRIDL SubX OpenDap <http://iridl.ldeo.columbia.edu/SOURCES/.Models/.SubX/>`_
      - [Pegion2019]_
      - `subseasonal SubX <examples.html#subseasonal>`_
    * - S2S
      - subseasonal
-     - The Subseasonal to Seasonal (S2S) Prediction Project Database `Website <http://wwww.s2sprediction.net/>`_
-     - `IRIDL <https://iridl.ldeo.columbia.edu/SOURCES/.ECMWF/.S2S/>`_, `climetlab <https://github.com/ecmwf-lab/climetlab-s2s-ai-challenge>`_
+     - `The Subseasonal to Seasonal (S2S) Prediction Project Database <http://wwww.s2sprediction.net/>`_
+     - `IRIDL S2S OpenDap <https://iridl.ldeo.columbia.edu/SOURCES/.ECMWF/.S2S/>`_, `climetlab <https://github.com/ecmwf-lab/climetlab-s2s-ai-challenge>`_
      - [Vitart2017]_
-     - `IRIDL <examples/subseasonal/daily-S2S-IRIDL.html>`_, `EWC Cloud/climetlab <examples/subseasonal/daily-S2S-ECMWF.html>`_
+     - `IRIDL example <examples/subseasonal/daily-S2S-IRIDL.html>`_, `EWC Cloud/climetlab <examples/subseasonal/daily-S2S-ECMWF.html>`_
    * - GEFS
      - weather
-     - Global Ensemble Forecast System (GEFS), `Website <https://www.ncdc.noaa.gov/data-access/model-data/model-datasets/global-ensemble-forecast-system-gefs>`_
+     - `Global Ensemble Forecast System (GEFS) <https://www.ncdc.noaa.gov/data-access/model-data/model-datasets/global-ensemble-forecast-system-gefs>`_
      - `NOAA THREDDS <https://www.ncei.noaa.gov/thredds/catalog/model-gefs-003/catalog.html>`_
      - add publication
      - `GEFS NWP <examples/NWP/NWP_GEFS_6h_forecasts.html>`_
@@ -64,12 +68,34 @@ If you find or use another publicly available initialized datasets, please consi
 References
 ##########
 
-.. [Kirtman2014] Kirtman, Ben P., et al.: The North American Multimodel Ensemble: Phase-1 seasonal-to-interannual prediction; Phase-2 toward developing intraseasonal prediction. Bull. Amer. Meteor. Soc., 2014, 95, 585–601. doi: http://dx.doi.org/10.1175/BAMS-D-12-00050.1
+.. [Kirtman2014] Kirtman, Ben P., et al.: The North American Multimodel Ensemble:
+    Phase-1 seasonal-to-interannual prediction; Phase-2 toward developing intraseasonal
+    prediction. Bull. Amer. Meteor. Soc., 2014, 95, 585–601.
+    http://dx.doi.org/10.1175/BAMS-D-12-00050.1
 
-.. [Boer2016] Boer, G. J., Smith, D. M., Cassou, C., Doblas-Reyes, F., Danabasoglu, G., Kirtman, B., Kushnir, Y., Kimoto, M., Meehl, G. A., Msadek, R., Mueller, W. A., Taylor, K. E., Zwiers, F., Rixen, M., Ruprich-Robert, Y., and Eade, R.: The Decadal Climate Prediction Project (DCPP) contribution to CMIP6, Geosci. Model Dev., 2016, 9, 3751-3777, https://doi.org/10.5194/gmd-9-3751-2016
+.. [Boer2016] Boer, G. J., Smith, D. M., Cassou, C., Doblas-Reyes, F., Danabasoglu, G.,
+    Kirtman, B., Kushnir, Y., Kimoto, M., Meehl, G. A., Msadek, R., Mueller, W. A.,
+    Taylor, K. E., Zwiers, F., Rixen, M., Ruprich-Robert, Y., and Eade, R.:
+    The Decadal Climate Prediction Project (DCPP) contribution to CMIP6,
+    Geosci. Model Dev., 2016, 9, 3751-3777, https://doi.org/10.5194/gmd-9-3751-2016
 
-.. [Vitart2017] Vitart, F., Ardilouze, C., Bonet, A., Brookshaw, A., Chen, M., Codorean, C., Déqué, M., Ferranti, L., Fucile, E., Fuentes, M., Hendon, H., Hodgson, J., Kang, H.-S., Kumar, A., Lin, H., Liu, G., Liu, X., Malguzzi, P., Mallas, I., … Zhang, L.: The Subseasonal to Seasonal (S2S) Prediction Project Database. Bulletin of the American Meteorological Society, 2017, 98(1), 163–173. doi: https://doi.org/10.1175/BAMS-D-16-0017.1
+.. [Vitart2017] Vitart, F., Ardilouze, C., Bonet, A., Brookshaw, A., Chen, M.,
+    Codorean, C., Déqué, M., Ferranti, L., Fucile, E., Fuentes, M., Hendon, H.,
+    Hodgson, J., Kang, H.-S., Kumar, A., Lin, H., Liu, G., Liu, X., Malguzzi, P.,
+    Mallas, I., … Zhang, L.: The Subseasonal to Seasonal (S2S) Prediction Project
+    Database. Bulletin of the American Meteorological Society, 2017, 98(1), 163–173.
+    https://doi.org/10.1175/BAMS-D-16-0017.1
 
-.. [Yeager2018] Yeager, S. G., Danabasoglu, G., Rosenbloom, N., Strand, W., Bates, S., Meehl, G., Karspeck, A., Lindsay, K., Long, M. C., Teng, H., & Lovenduski, N. S.: Predicting near-term changes in the Earth System: A large ensemble of initialized decadal prediction simulations using the Community Earth System Model. Bulletin of the American Meteorological Society, 2018. doi: https://doi.org/10.1175/BAMS-D-17-0098.1
+.. [Yeager2018] Yeager, S. G., Danabasoglu, G., Rosenbloom, N., Strand, W., Bates, S.,
+    Meehl, G., Karspeck, A., Lindsay, K., Long, M. C., Teng, H., & Lovenduski, N. S.:
+    Predicting near-term changes in the Earth System: A large ensemble of initialized
+    decadal prediction simulations using the Community Earth System Model.
+    Bulletin of the American Meteorological Society, 2018.
+    https://doi.org/10.1175/BAMS-D-17-0098.1
 
-.. [Pegion2019] Pegion, K., Kirtman, B. P., Becker, E., Collins, D. C., LaJoie, E., Burgman, R., Bell, R., DelSole, T., Min, D., Zhu, Y., Li, W., Sinsky, E., Guan, H., Gottschalck, J., Metzger, E. J., Barton, N. P., Achuthavarier, D., Marshak, J., Koster, R. D., … Kim, H.: The Subseasonal Experiment (SubX): A Multimodel Subseasonal Prediction Experiment. Bulletin of the American Meteorological Society, 2019, 100(10), 2043–2060. doi: https://doi.org/10.1175/BAMS-D-18-0270.1
+.. [Pegion2019] Pegion, K., Kirtman, B. P., Becker, E., Collins, D. C., LaJoie, E.,
+    Burgman, R., Bell, R., DelSole, T., Min, D., Zhu, Y., Li, W., Sinsky, E., Guan, H.,
+    Gottschalck, J., Metzger, E. J., Barton, N. P., Achuthavarier, D., Marshak, J.,
+    Koster, R. D., … Kim, H.: The Subseasonal Experiment (SubX): A Multimodel
+    Subseasonal Prediction Experiment. Bulletin of the American Meteorological Society,
+    2019, 100(10), 2043–2060. https://doi.org/10.1175/BAMS-D-18-0270.1

From 494d763f0b7d26417bd7eb340ea3833df64da0ee Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sat, 11 Dec 2021 18:10:20 +0100
Subject: [PATCH 31/56] docs

---
 CHANGELOG.rst                        | 79 ++++++++++++++++------------
 climpred/classes.py                  |  4 +-
 docs/source/api.rst                  |  2 +-
 docs/source/conf.py                  |  1 +
 docs/source/contributors.rst         |  1 -
 docs/source/helpful-links.rst        | 11 ++--
 docs/source/initialized-datasets.rst | 18 +++----
 docs/source/publications.rst         | 38 +++++++------
 docs/source/reference_forecast.rst   | 66 ++++++++++++++++-------
 docs/source/related-packages.rst     |  2 +
 docs/source/significance.rst         | 15 +++---
 11 files changed, 142 insertions(+), 95 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 6dcd2169b..f90f883eb 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -31,6 +31,7 @@ New Features
 - Upon instantiation, :py:class:`~climpred.classes.PredictionEnsemble` generates new
   2-dimensional coordinate ``valid_time`` for ``initialized`` from ``init`` and
   ``lead``, which is matched with ``time`` from ``verification`` during alignment.
+  (:issue:`575`, :pr:`675`, :pr:`678`) `Aaron Spring`_.
 
 .. :: python
 
@@ -47,7 +48,6 @@ Coordinates:
 Data variables:
     SST         (init, lead, member) float64 ...
 
-  (:issue:`575`, :pr:`675`, :pr:`678`) `Aaron Spring`_.
 - Allow ``lead`` as ``float`` also if ``calendar="360_day"`` or ``lead.attrs["units"]``
   not in ``["years","seasons","months"]``. (:issue:`564`, :pr:`675`) `Aaron Spring`_.
 - Implement :py:meth:`~climpred.classes.HindcastEnsemble.generate_uninitialized` in
@@ -62,7 +62,8 @@ Data variables:
   :py:meth:`~climpred.classes.PerfectModelEnsemble.verify`,
   :py:meth:`~climpred.classes.HindcastEnsemble.bootstrap` and
   :py:meth:`~climpred.classes.PerfectModelEnsemble.bootstrap` to group skill by
-  initializations seasonality.
+  initializations seasonality. (:issue:`635`, :pr:`690`) `Aaron Spring`_.
+
 
 .. :: python
 
@@ -91,7 +92,6 @@ Data variables:
     sst      (month, lead, model) float64 0.4127 0.3837 0.3915 ... 1.255 3.98
 >>> skill.sst.plot(hue="model", col="month", col_wrap=3)
 
-  (:issue:`635`, :pr:`690`) `Aaron Spring`_.
 - :py:meth:`~climpred.classes.HindcastEnsemble.plot_alignment` shows how forecast and
   observations are aligned based on the `alignment <alignment.html>`_ keyword.
   This may help understanding which dates are matched for the different ``alignment``
@@ -145,7 +145,7 @@ climpred v2.1.6 (2021-08-31)
 ============================
 
 Adding on to ``v2.1.5``, more bias reduction methods wrapped from
-`xclim <https://xclim.readthedocs.io/en/latest/sdba.html>`_
+`xclim <https://xclim.readthedocs.io/en/latest/sdba.html>`__
 are implemented.
 
 Bug Fixes
@@ -156,13 +156,13 @@ Bug Fixes
   (:issue:`668`, :pr:`670`) `Aaron Spring`_.
 - :py:meth:`~climpred.classes.HindcastEnsemble.remove_bias` for ``how`` in
   ``["modified_quantile", "basic_quantile", "gamma_mapping", "normal_mapping"]``
-  from `bias_correction <https://github.com/pankajkarman/bias_correction>`_
+  from `bias_correction <https://github.com/pankajkarman/bias_correction>`__
   takes all ``member`` to create model distribution. (:pr:`667`) `Aaron Spring`_.
 
 New Features
 ------------
 - allow more `bias reduction <bias_removal.html>`_ methods wrapped from
-  `xclim <https://xclim.readthedocs.io/en/stable/sdba_api.html>`_ in
+  `xclim <https://xclim.readthedocs.io/en/stable/sdba_api.html>`__ in
   :py:meth:`~climpred.classes.HindcastEnsemble.remove_bias`:
 
     * ``how="EmpiricalQuantileMapping"``:
@@ -217,12 +217,11 @@ New Features
     * ``how="multiplicative_mean"``: correcting the mean forecast multiplicatively
     * ``how="multiplicative_std"``: correcting the standard deviation multiplicatively
 
-  Wrapped from `bias_correction <https://github.com/pankajkarman/bias_correction/blob/master/bias_correction.py>`_:
+  Wrapped from `bias_correction <https://github.com/pankajkarman/bias_correction/blob/master/bias_correction.py>`__:
 
-    * ``how="modified_quantile"``: `Reference <https://www.sciencedirect.com/science/article/abs/pii/S0034425716302000?via%3Dihub>`_
-    * ``how="basic_quantile"``: `Reference <https://rmets.onlinelibrary.wiley.com/doi/pdf/10.1002/joc.2168>`_
-    * ``how="gamma_mapping"``: `Reference <https://www.hydrol-earth-syst-sci.net/21/2649/2017/>`_
-    * ``how="normal_mapping"``: `Reference <https://www.hydrol-earth-syst-sci.net/21/2649/2017/>`_
+    * ``how="modified_quantile"``: `Bai et al. 2016 <https://www.sciencedirect.com/science/article/abs/pii/S0034425716302000?via%3Dihub>`_
+    * ``how="basic_quantile"``: `Themeßl et al. 2011 <https://rmets.onlinelibrary.wiley.com/doi/pdf/10.1002/joc.2168>`_
+    * ``how="gamma_mapping"`` and ``how="normal_mapping"``: `Switanek et al. 2017 <https://www.hydrol-earth-syst-sci.net/21/2649/2017/>`_
 
 - :py:meth:`~climpred.classes.HindcastEnsemble.remove_bias` now does
   `leave-one-out cross validation <https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.LeaveOneOut.html>`_
@@ -314,7 +313,7 @@ Documentation
   (:issue:`510`, :issue:`561`, :pr:`600`) `Aaron Spring`_.
 - Add `GEFS example <examples/NWP/NWP_GEFS_6h_forecasts.html>`_ for numerical weather
   prediction. (:issue:`602`, :pr:`603`) `Aaron Spring`_.
-- Add subseasonal `daily ECMWF example <examples/subseasonal/daily-S2S-ECMWF.html>`_
+- Add subseasonal `daily ECMWF example <examples/subseasonal/daily-S2S-ECMWF.html>`__
   using `climetlab <https://github.com/ecmwf-lab/climetlab-s2s-ai-challenge>`_ to access
   hindcasts from ECMWF cloud.  (:issue:`587`, :pr:`603`) `Aaron Spring`_.
 - Add subseasonal `daily S2S example <examples/subseasonal/daily-S2S-IRIDL.html>`_
@@ -327,7 +326,7 @@ Documentation
   <https://github.com/xarray-contrib/cupy-xarray>`_ finishes 10x faster.
   (:issue:`592`, :pr:`607`) `Aaron Spring`_.
 - How to work with biweekly aggregates in ``climpred``, see
-  `daily ECMWF example <examples/subseasonal/daily-S2S-ECMWF.html#biweekly-aggregates>`_.
+  `daily ECMWF example <examples/subseasonal/daily-S2S-ECMWF.html#biweekly-aggregates>`__.
   (:issue:`625`, :pr:`630`) `Aaron Spring`_.
 
 
@@ -482,11 +481,12 @@ have been implemented based on Contingency tables. We now include an early versi
 of bias removal for :py:class:`~climpred.classes.HindcastEnsemble`.
 
 - Use math operations like ``+-*/`` with :py:class:`~climpred.classes.HindcastEnsemble`
-  and :py:class:`~climpred.classes.PerfectModelEnsemble`. See a demo of this
-  `here <prediction-ensemble-object.html#Arithmetic-Operations-with-PredictionEnsemble-Objects>`__
-  (:pr:`377`) `Aaron Spring`_.
-- Subselect data variables from ``PredictionEnsemble`` as from ``xr.Dataset``:
-  ``PredictionEnsemble[['var1', 'var3']]`` (:pr:`409`) `Aaron Spring`_.
+  and :py:class:`~climpred.classes.PerfectModelEnsemble`. See
+  `demo <prediction-ensemble-object.html>`_
+  Arithmetic-Operations-with-PredictionEnsemble-Objects. (:pr:`377`) `Aaron Spring`_.
+- Subselect data variables from :py:class:`~climpred.classes.PerfectModelEnsemble` as
+  from :py:class:`~xarray.Dataset`:
+  ``PredictionEnsemble[["var1", "var3"]]`` (:pr:`409`) `Aaron Spring`_.
 - Plot all datasets in :py:class:`~climpred.classes.HindcastEnsemble` or
   :py:class:`~climpred.classes.PerfectModelEnsemble` by
   :py:meth:`~climpred.classes.PredictionEnsemble.plot` if no other spatial dimensions
@@ -501,13 +501,13 @@ of bias removal for :py:class:`~climpred.classes.HindcastEnsemble`.
   dimensions and output has NaNs (in the case of land, for instance).
   (:issue:`282`, :pr:`407`) `Aaron Spring`_.
 - Allow binary forecasts at when calling
- :py:meth:`~climpred.classes.HindcastEnsemble.verify`,
+  :py:meth:`~climpred.classes.HindcastEnsemble.verify`,
   rather than needing to supply binary results beforehand. In other words,
   ``hindcast.verify(metric='bs', comparison='m2o', dim='member', logical=logical)``
   is now the same as
   ``hindcast.map(logical).verify(metric='brier_score', comparison='m2o', dim='member'``.
   (:pr:`431`) `Aaron Spring`_.
-- Check calendar types when using
+- Check ``calendar`` types when using
   :py:meth:`~climpred.classes.HindcastEnsemble.add_observations`,
   :py:meth:`~climpred.classes.HindcastEnsemble.add_uninitialized`,
   :py:meth:`~climpred.classes.PerfectModelEnsemble.add_control` to ensure that the
@@ -676,7 +676,8 @@ Internals/Minor Fixes
 - Gather ``pytest.fixture in ``conftest.py``. (:pr:`313`) `Aaron Spring`_.
 - Move ``x_METRICS`` and ``COMPARISONS`` to ``metrics.py`` and ``comparisons.py`` in
   order to avoid circular import dependencies. (:pr:`315`) `Aaron Spring`_.
-- ``asv`` benchmarks added for ``HindcastEnsemble`` (:pr:`285`) `Aaron Spring`_.
+- ``asv`` benchmarks added for :py:class:`~climpred.classes.HindcastEnsemble`
+  (:pr:`285`) `Aaron Spring`_.
 - Ignore irrelevant warnings in ``pytest`` and mark slow tests
   (:pr:`333`) `Aaron Spring`_.
 - Default ``CONCAT_KWARGS`` now in all ``xr.concat`` to speed up bootstrapping.
@@ -706,8 +707,9 @@ Documentation
 -------------
 - Added demo to setup your own raw model output compliant to ``climpred``
   (:pr:`296`) `Aaron Spring`_. See (`here <examples/misc/setup_your_own_data.html>`__).
-- Added demo using ``intake-esm`` with ``climpred`` (:pr:`296`) `Aaron Spring`_.
-  See (`here <examples/misc/setup_your_own_data.html#intake-esm-for-cmorized-output>`__).
+- Added demo using ``intake-esm`` with ``climpred``.
+  See `demo <examples/misc/setup_your_own_data.html#intake-esm-for-cmorized-output>`__.
+  (:pr:`296`) `Aaron Spring`_.
 - Added `Verification Alignment <alignment.html>`_ page explaining how initializations
   are selected and aligned with verification data. (:pr:`328`) `Riley X. Brady`_.
   See (`here <alignment.html>`__).
@@ -728,15 +730,18 @@ New Features
     >>> hind = climpred.tutorial.load_dataset("CESM-DP-SST")
     >>> hind.lead.attrs["units"] = "years"
 
-- ``HindcastEnsemble`` now has ``.add_observations()`` and ``.get_observations()``
+- :py:class:`~climpred.classes.HindcastEnsemble` now has
+  :py:meth:`~climpred.classes.HindcastEnsemble.add_observations` and
+  :py:meth:`~climpred.classes.HindcastEnsemble.get_observations`
   methods. These are the same as ``.add_reference()`` and ``.get_reference()``, which
   will be deprecated eventually. The name change clears up confusion, since "reference"
-  is the appropriate name for a reference forecast, e.g. persistence. (:pr:`310`)
+  is the appropriate name for a reference forecast, e.g. ``"persistence"``. (:pr:`310`)
   `Riley X. Brady`_.
 
-- ``HindcastEnsemble`` now has ``.verify()`` function, which duplicates the
-  ``.compute_metric()`` function. We feel that ``.verify()`` is more clear and easy
-  to write, and follows the terminology of the field. (:pr:`310`) `Riley X. Brady`_.
+- :py:class:`~climpred.classes.HindcastEnsemble` now has ``.verify()`` function, which
+  duplicates the ``.compute_metric()`` function. We feel that ``.verify()`` is more
+  clear and easy to write, and follows the terminology of the field.
+  (:pr:`310`) `Riley X. Brady`_.
 
 - ``e2o`` and ``m2o`` are now the preferred keywords for comparing hindcast ensemble
   means and ensemble members to verification data, respectively. (:pr:`310`)
@@ -1002,23 +1007,26 @@ climpred v1.0.1 (2019-07-04)
 Bug Fixes
 ---------
 - Accomodate for lead-zero within the ``lead`` dimension (:pr:`196`) `Riley X. Brady`_.
-- Fix issue with adding uninitialized ensemble to ``HindcastEnsemble`` object
+- Fix issue with adding uninitialized ensemble to
+  :py:class:`~climpred.classes.HindcastEnsemble` object
   (:pr:`199`) `Riley X. Brady`_.
 - Allow ``max_dof`` keyword to be passed to ``compute_metric`` and
-  ``compute_persistence`` for ``HindcastEnsemble`` (:pr:`199`) `Riley X. Brady`_.
+  ``compute_persistence`` for :py:class:`~climpred.classes.HindcastEnsemble`.
+  (:pr:`199`) `Riley X. Brady`_.
 
 Internals/Minor Fixes
 ---------------------
 - Force ``xskillscore`` version 0.0.4 or higher to avoid ``ImportError``
   (:pr:`204`) `Riley X. Brady`_.
 - Change ``max_dfs`` keyword to ``max_dof`` (:pr:`199`) `Riley X. Brady`_.
-- Add testing for ``HindcastEnsemble`` and ``PerfectModelEnsemble`` (:pr:`199`)
-  `Riley X. Brady`_
+- Add tests for :py:class:`~climpred.classes.HindcastEnsemble` and
+  ``PerfectModelEnsemble``. (:pr:`199`) `Riley X. Brady`_
 
 climpred v1.0.0 (2019-07-03)
 ============================
 ``climpred`` v1.0.0 represents the first stable release of the package. It includes
-``HindcastEnsemble`` and ``PerfectModelEnsemble`` objects to perform analysis with.
+:py:class:`~climpred.classes.HindcastEnsemble` and ``PerfectModelEnsemble`` objects to
+perform analysis with.
 It offers a suite of deterministic and probabilistic metrics that are optimized to be
 run on single time series or grids of data (e.g., lat, lon, and depth). Currently,
 ``climpred`` only supports annual forecasts.
@@ -1074,7 +1082,8 @@ Features
     -   ``member``:  ensemble member dimension.
 - Updates ``open_dataset`` to display available dataset names when no argument is
   passed. (:pr:`123`) `Riley X. Brady`_
-- Change ``ReferenceEnsemble`` to ``HindcastEnsemble``. (:pr:`124`) `Riley X. Brady`_
+- Change ``ReferenceEnsemble`` to :py:class:`~climpred.classes.HindcastEnsemble`.
+  (:pr:`124`) `Riley X. Brady`_
 - Add probabilistic metrics to ``climpred``. (:pr:`128`) `Aaron Spring`_
 - Consolidate separate perfect-model and hindcast functions into singular functions
   (:pr:`128`) `Aaron Spring`_
@@ -1125,6 +1134,8 @@ climpred v0.1 (2018-12-20)
 
 Collaboration between Riley Brady and Aaron Spring begins.
 
+.. _`Anderson Banihirwe`: https://github.com/andersy005
+.. _`Ray Bell`: https://github.com/raybellwaves
 .. _`Riley X. Brady`: https://github.com/bradyrx
 .. _`Andrew Huang`: https://github.com/ahuang11
 .. _`Kathy Pegion`: https://github.com/kpegion
diff --git a/climpred/classes.py b/climpred/classes.py
index 2e01649dc..dca69dd86 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -649,7 +649,7 @@ def __getattr__(
         """
 
         def wrapper(*args, **kwargs):
-            """Apply arbitrary function to all datasets in ``PredictionEnsemble``.
+            """Apply arbitrary function to all datasets in :py:class:`~climpred.classes.PerfectModelEnsemble`.
 
             Got this from: https://stackoverflow.com/questions/41919499/
             how-to-call-undefined-methods-sequentially-in-python-class
@@ -730,7 +730,7 @@ def _construct_direct(cls, datasets, kind):
     def _apply_func(
         self, func: Callable[..., xr.Dataset], *args: Any, **kwargs: Any
     ) -> "PredictionEnsemble":
-        """Apply a function to all datasets in a ``PredictionEnsemble``."""
+        """Apply a function to all datasets in a :py:class:`~climpred.classes.PerfectModelEnsemble`."""
         # Create temporary copy to modify to avoid inplace operation.
         # isnt that essentially the same as .map(func)?
         datasets = self._datasets.copy()
diff --git a/docs/source/api.rst b/docs/source/api.rst
index 6eab5a9f0..215d76e9d 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -74,7 +74,7 @@ Properties
 HindcastEnsemble
 ~~~~~~~~~~~~~~~~
 
-A ``HindcastEnsemble`` is a prediction ensemble that is initialized off of some form of
+A :py:class:`~climpred.classes.HindcastEnsemble` is a prediction ensemble that is initialized off of some form of
 observations (an assimilation, renanalysis, etc.). Thus, it is anticipated that
 forecasts are verified against observation-like products. Read more about the
 terminology `here <terminology.html>`_.
diff --git a/docs/source/conf.py b/docs/source/conf.py
index f274b3ee8..5d1eeb083 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -94,6 +94,7 @@
 intersphinx_mapping = {
     "python": ("https://docs.python.org/3/", None),
     "xarray": ("https://xarray.pydata.org/en/stable/", None),
+    "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None),
     "numpy": ("https://docs.scipy.org/doc/numpy/", None),
     "xskillscore": ("https://xskillscore.readthedocs.io/en/stable", None),
     "xclim": ("https://xclim.readthedocs.io/en/latest/", None),
diff --git a/docs/source/contributors.rst b/docs/source/contributors.rst
index 3d07fa0b2..4ba5fcfb3 100644
--- a/docs/source/contributors.rst
+++ b/docs/source/contributors.rst
@@ -15,7 +15,6 @@ Contributors
   `good first issue <https://github.com/pangeo-data/climpred/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22>`_
   tag in the issues. Please reach out to us via `gitter <https://gitter.im/climpred>`_.
 
-
 Core Developers
 ===============
 * Aaron Spring (`github <https://github.com/aaronspring/>`__)
diff --git a/docs/source/helpful-links.rst b/docs/source/helpful-links.rst
index ea897424d..756c5c8de 100644
--- a/docs/source/helpful-links.rst
+++ b/docs/source/helpful-links.rst
@@ -2,13 +2,14 @@
 Helpful Links
 *************
 
-We hope to curate in the ``climpred`` documentation a comprehensive report of terminology, best
-practices, analysis methods, etc. in the prediction community. Here we suggest other resources for
-initialized prediction of the Earth system to round out the information provided in our
-documentation.
+We hope to curate in the ``climpred`` documentation a comprehensive report of
+terminology, best practices, analysis methods, etc. in the prediction community.
+Here we suggest other resources for initialized prediction of the Earth system to round
+out the information provided in our documentation.
 
 Forecast Verification
 #####################
 
 * `CAWCR Forecast Verification Overview <https://www.cawcr.gov.au/projects/verification/>`_:
-  A nice overview of forecast verification, including a suite of metrics and their derivation.
+  A nice overview of forecast verification, including a suite of metrics and their
+  derivation.
diff --git a/docs/source/initialized-datasets.rst b/docs/source/initialized-datasets.rst
index a7cecbaae..4245f7b5d 100644
--- a/docs/source/initialized-datasets.rst
+++ b/docs/source/initialized-datasets.rst
@@ -24,32 +24,32 @@ examples:
      - decadal
      - `Decadal Climate Prediction Project (DCPP) contribution to CMIP6 <https://www.wcrp-climate.org/dcp-overview>`_
      - `ESGF <https://esgf-data.dkrz.de/search/cmip6-dkrz/>`_, `pangeo <https://pangeo-data.github.io/pangeo-cmip6-cloud/accessing_data.html#loading-an-esm-collection>`_
-     - [Boer2016]_
+     - Boer2016_
      - `with intake-esm <examples/misc/setup_your_own_data.html#intake-esm-for-cmorized-output>`_, `Anderson <https://github.com/andersy005>`_ at NOAA's 45th CDP Workshop: `slides <https://talks.andersonbanihirwe.dev/climpred-cdpw-2020.html>`_, `Notebook <https://nbviewer.jupyter.org/github/andersy005/talks/blob/gh-pages/notebooks/climpred-demo.ipynb>`_
    * - CESM-DPLE
      - decadal
      - `Decadal Prediction Large Ensemble Project <http://www.cesm.ucar.edu/projects/community-projects/DPLE/>`_
      - `Data <https://www.earthsystemgrid.org/dataset/ucar.cgd.ccsm4.CESM1-CAM5-DP.html>`_
-     - [Yeager2018]_
+     - Yeager2018_
      - many standard climpred `examples <quick-start.html>`_
    * - NMME
      - seasonal
      - `The North American Multimodel Ensemble: Phase-1 Seasonal-to-Interannual Prediction <https://www.cpc.ncep.noaa.gov/products/NMME/>`_
-     - `IRIDL <http://iridl.ldeo.columbia.edu/SOURCES/.Models/.NMME/>`_
-     - [Kirtman2014]_
+     - `IRIDL <http://iridl.ldeo.columbia.edu/SOURCES/.Models/.NMME/>`__
+     - Kirtman2014_
      - `seasonal SubX <examples.html#monthly-and-seasonal>`_
    * - SubX
      - subseasonal
      - `A Multimodel Subseasonal Prediction Experiment <http://cola.gmu.edu/subx/>`_
-     - `IRIDL SubX OpenDap <http://iridl.ldeo.columbia.edu/SOURCES/.Models/.SubX/>`_
-     - [Pegion2019]_
+     - `IRIDL <http://iridl.ldeo.columbia.edu/SOURCES/.Models/.SubX/>`__
+     - Pegion2019_
      - `subseasonal SubX <examples.html#subseasonal>`_
    * - S2S
      - subseasonal
      - `The Subseasonal to Seasonal (S2S) Prediction Project Database <http://wwww.s2sprediction.net/>`_
-     - `IRIDL S2S OpenDap <https://iridl.ldeo.columbia.edu/SOURCES/.ECMWF/.S2S/>`_, `climetlab <https://github.com/ecmwf-lab/climetlab-s2s-ai-challenge>`_
-     - [Vitart2017]_
-     - `IRIDL example <examples/subseasonal/daily-S2S-IRIDL.html>`_, `EWC Cloud/climetlab <examples/subseasonal/daily-S2S-ECMWF.html>`_
+     - `IRIDL <https://iridl.ldeo.columbia.edu/SOURCES/.ECMWF/.S2S/>`__, `climetlab <https://github.com/ecmwf-lab/climetlab-s2s-ai-challenge>`_
+     - Vitart2017_
+     - `IRIDL <examples/subseasonal/daily-S2S-IRIDL.html>`_, `EWC Cloud/climetlab <examples/subseasonal/daily-S2S-ECMWF.html>`_
    * - GEFS
      - weather
      - `Global Ensemble Forecast System (GEFS) <https://www.ncdc.noaa.gov/data-access/model-data/model-datasets/global-ensemble-forecast-system-gefs>`_
diff --git a/docs/source/publications.rst b/docs/source/publications.rst
index 238c04681..80baeda23 100644
--- a/docs/source/publications.rst
+++ b/docs/source/publications.rst
@@ -2,30 +2,34 @@
 Publications Using ``climpred``
 *******************************
 
-Below is a list of publications that have made use of ``climpred`` in their analysis. You can nod
-to ``climpred``, e.g., in your acknowledgements section to help build the community. The main
-developers of the package intend to release a manuscript documenting ``climpred`` in 2020 with a
-citable DOI, so this can be referenced in the future.
+Below is a list of publications that have made use of ``climpred`` in their analysis.
+We appreciate a reference to ``climpred``, e.g., in your acknowledgements section to
+help build the community. Please also cite:
 
-Feel free to open a `Pull Request <contributing.html>`_ to add your publication to the list!
+* Brady, R. X., & Spring, A. (2021). "climpred: Verification of weather and climate
+  forecasts". *Journal of Open Source Software*, 6(59), 2781. https://doi.org/10/gh9646
+
+
+Feel free to open a `Pull Request <contributing.html>`_ to add your publication to the
+list!
 
 2021
 ####
 
-* Spring, A., Dunkl, I., Li, H., Brovkin, V., & Ilyina, T. (2021). Trivial improvements
-  of predictive skill due to direct reconstruction of global carbon cycle.
-  Earth System Dynamics Discussions, 1–36. https://doi.org/10/gh3tn3
-
+* Spring, A., Dunkl, I., Li, H., Brovkin, V., & Ilyina, T. (2021).
+  "Trivial improvements in predictive skill due to direct reconstruction of the global
+  carbon cycle." Earth System Dynamics, 12(4), 1139–1167. https://doi.org/10/gnjh74
 
 2020
 ####
 
-* Brady, R.X., Lovenduski, N.S., Yeager, S.G., Long, M.C., Lindsay, K (2020). Skillful multiyear
-  predictions of ocean acidification in the California Current System. *Nature Communications*,
-  11, 2166. https://doi.org/10.1038/s41467-020-15722-x
-* Spring, A., Ilyina, T. (2020). Predictability horizons in theglobal carbon cycle inferred
-  from a perfect-model framework. *Geophysical Research Letters*, 47, e2019GL085311.
-  https://doi.org/10.1029/2019GL085311
-* Krumhardt, K. M., Lovenduski, N. S., Long, M. C., Luo, J. Y., Lindsay, K., Yeager, S., &
-  Harrison, C. (2020). Potential Predictability of Net Primary Production in the Ocean.
+* Brady, R.X., Lovenduski, N.S., Yeager, S.G., Long, M.C., Lindsay, K (2020). Skillful
+  multiyear predictions of ocean acidification in the California Current System.
+  *Nature Communications*, 11, 2166. https://doi.org/10.1038/s41467-020-15722-x
+* Spring, A., Ilyina, T. (2020). Predictability horizons in the global carbon cycle
+  inferred from a perfect-model framework. *Geophysical Research Letters*, 47,
+  e2019GL085311. https://doi.org/10.1029/2019GL085311
+* Krumhardt, K. M., Lovenduski, N. S., Long, M. C., Luo, J. Y., Lindsay, K.,
+  Yeager, S., & Harrison, C. (2020).
+  "Potential Predictability of Net Primary Production in the Ocean."
   *Global Biogeochemical Cycles*, 34(6), e2020GB006531. https://doi.org/10/gg9ss8
diff --git a/docs/source/reference_forecast.rst b/docs/source/reference_forecast.rst
index 01fc34bae..658f57135 100644
--- a/docs/source/reference_forecast.rst
+++ b/docs/source/reference_forecast.rst
@@ -7,11 +7,17 @@ some simple reference forecast. ``climpred`` currently supports a several refere
 forecasts, and we are open to adding other reference forecasts. Consider opening a
 `Pull Request <contributing.html>`_.
 
-**Persistence Forecast**: Whatever is observed at the time of initialization is forecasted to
-persist into the forecast period [Jolliffe2012]_. You can compute this by passing
-``reference='persistence'`` into the ``.verify()`` and ``.bootstrap()`` method for
-:py:class:`~climpred.classes.HindcastEnsemble` and
-:py:class:`~climpred.classes.PerfectModelEnsemble` objects.
+**Persistence Forecast**: Whatever is observed at the time of initialization is
+forecasted to persist into the forecast period [Jolliffe2012]_.
+You can compute this by passing ``reference="persistence"`` into
+:py:class:`~climpred.classes.HindcastEnsemble`
+:py:meth:`~climpred.classes.HindcastEnsemble.verify`,
+:py:class:`~climpred.classes.HindcastEnsemble`
+:py:meth:`~climpred.classes.HindcastEnsemble.bootstrap`,
+:py:class:`~climpred.classes.PerfectModelEnsemble`
+:py:meth:`~climpred.classes.PerfectModelEnsemble.verify` and
+:py:class:`~climpred.classes.PerfectModelEnsemble`
+:py:meth:`~climpred.classes.PerfectModelEnsemble.bootstrap`.
 
 **Damped Persistence Forecast**: (*Not Implemented*) The amplitudes of the anomalies
 reduce in time exponentially at a time scale of the local autocorrelation [Yuan2016]_.
@@ -22,9 +28,15 @@ reduce in time exponentially at a time scale of the local autocorrelation [Yuan2
 
 **Climatology**: The average values at the temporal forecast resolution (e.g., annual,
 monthly, daily) over some long period, which is usually 30 years [Jolliffe2012]_.
-You can compute this by passing ``reference='climatology'`` into the ``.verify()`` and
-``.bootstrap()`` method for :py:class:`~climpred.classes.HindcastEnsemble` and
-:py:class:`~climpred.classes.PerfectModelEnsemble` objects.
+You can compute this by passing ``reference="climatology"`` into
+:py:class:`~climpred.classes.HindcastEnsemble`
+:py:meth:`~climpred.classes.HindcastEnsemble.verify`,
+:py:class:`~climpred.classes.HindcastEnsemble`
+:py:meth:`~climpred.classes.HindcastEnsemble.bootstrap`,
+:py:class:`~climpred.classes.PerfectModelEnsemble`
+:py:meth:`~climpred.classes.PerfectModelEnsemble.verify` and
+:py:class:`~climpred.classes.PerfectModelEnsemble`
+:py:meth:`~climpred.classes.PerfectModelEnsemble.bootstrap`.
 
 **Uninitialized**: *Uninitialized** ensembles are generated by perturbing initial
 conditions only at one point in the historical run.
@@ -33,23 +45,39 @@ completely different restart files) methods. Uninitialized ensembles are used to
 approximate the magnitude of internal climate variability and to confidently extract
 the forced response (ensemble mean) in the climate system. In ``climpred``, we use
 uninitialized ensembles as a baseline for how important (reoccurring) initializations
-are for lending predictability to the system. Some modeling centers (such as NCAR)
+are for lending predictability to the system.
+You can compute this by passing ``reference="uninitialized"`` into
+:py:class:`~climpred.classes.HindcastEnsemble`
+:py:meth:`~climpred.classes.HindcastEnsemble.verify`,
+:py:class:`~climpred.classes.HindcastEnsemble`
+:py:meth:`~climpred.classes.HindcastEnsemble.bootstrap`,
+:py:class:`~climpred.classes.PerfectModelEnsemble`
+:py:meth:`~climpred.classes.PerfectModelEnsemble.verify` and
+:py:class:`~climpred.classes.PerfectModelEnsemble`
+:py:meth:`~climpred.classes.PerfectModelEnsemble.bootstrap`.
+Some modeling centers (such as NCAR)
 provide a dynamical uninitialized ensemble (the CESM Large Ensemble) along with their
-initialized prediction system (the CESM Decadal Prediction Large Ensemble). If this
-isn't available, one can approximate the unintiailized response by resampling a
+initialized prediction system (the CESM Decadal Prediction Large Ensemble).
+Use :py:class:`~climpred.classes.HindcastEnsemble`
+:py:meth:`~climpred.classes.HindcastEnsemble.add_uninitialized` or
+:py:class:`~climpred.classes.PerfectModelEnsemble`
+:py:meth:`~climpred.classes.PerfectModelEnsemble.add_uninitialized`.
+If this
+isn't available, one can approximate the uninitialized response by resampling a
 control simulation.
+This could be, for example, output from an uninitialized Large Ensemble.
+You may also run :py:class:`~climpred.classes.HindcastEnsemble`
+:py:meth:`~climpred.classes.HindcastEnsemble.generate_uninitialized` or
+:py:class:`~climpred.classes.PerfectModelEnsemble`
+:py:meth:`~climpred.classes.PerfectModelEnsemble.generate_uninitialized`, which
+resamples the ``initialized`` from :py:class:`~climpred.classes.HindcastEnsemble` or
+``control`` from :py:class:`~climpred.classes.PerfectModelEnsemble` to an
+uninitialized forecast.
 
 **Random Mechanism**: (*Not Implemented*) A probability distribution is assigned to the
 possible range of the variable being forecasted, and a sequence of forecasts is
 produced by taking a sequence of independent values from that distribution
-[Jolliffe2012]_. This would be similar to computing an uninitialized forecast, using
-``reference='uninitialized'`` in :py:class:`~climpred.classes.HindcastEnsemble` and
-:py:class:`~climpred.classes.PerfectModelEnsemble` objects. For ``HindcastEnsemble``
-objects, an uninitialized ensemble has to be added through ``.add_uninitialized(...)``.
-This could be, for example, output from an uninitialized Large Ensemble.
-You may also run ``.generate_uninitialized()``, which resamples the ``initialized``
-from ``HindcastEnsemble`` or ``control`` from ``PerfectModelEnsemble`` to an
-uninitialized forecast.
+[Jolliffe2012]_. This would be similar to computing an uninitialized forecast.
 
 References
 ##########
diff --git a/docs/source/related-packages.rst b/docs/source/related-packages.rst
index 9a1a64159..6b20830b5 100644
--- a/docs/source/related-packages.rst
+++ b/docs/source/related-packages.rst
@@ -26,3 +26,5 @@ domain that are not on the list.
   Analysis and prediction of nowcasts for precipitation and weather phenomena.
 * `xskillscore <https://xskillscore.readthedocs.io>`_:
   Metrics for verifying forecasts (a key dependency to ``climpred``).
+* `doppyo <https://github.com/csiro-dcfp/doppyo>`_ with many metrics transferred to
+  `xskillscore <https://xskillscore.readthedocs.io>`_:
diff --git a/docs/source/significance.rst b/docs/source/significance.rst
index e66c8909f..6583ed7f9 100644
--- a/docs/source/significance.rst
+++ b/docs/source/significance.rst
@@ -14,9 +14,10 @@ system is skillful. Some questions that significance testing can answer are:
     - Are correlation coefficients statistically significant despite temporal and
       spatial autocorrelation?
 
-All of these questions deal with statistical significance. See below on how to use ``climpred``
-to address these questions. Please also have a look at the `significance testing
-example <examples/decadal/significance.html>`__.
+All of these questions deal with statistical significance. See below on how to use
+``climpred`` to address these questions.
+Please also have a look at the
+`significance testing example <examples/decadal/significance.html>`__.
 
 p value for temporal correlations
 #################################
@@ -26,17 +27,17 @@ For the correlation `metrics <metrics.html>`__, like
 ``climpred`` also hosts the associated p-value, like
 :py:func:`~climpred.metrics._pearson_r_p_value`,
 that this correlation is significantly different from zero.
-:py:func:`~climpred.metrics._pearson_r_eff_p_value` also incorporates the reduced degrees
-of freedom due to temporal autocorrelation. See
+:py:func:`~climpred.metrics._pearson_r_eff_p_value` also incorporates the reduced
+degrees of freedom due to temporal autocorrelation. See
 `example <examples/decadal/significance.html#p-value-for-temporal-correlations>`__.
 
 Bootstrapping with replacement
 ##############################
 
 Testing statistical significance through bootstrapping is commonly used in the field of
-climate prediction [could add some example citations here]. Bootstrapping relies on
+climate prediction. Bootstrapping relies on
 resampling the underlying data with replacement for a large number of ``iterations``, as
-proposed by the decadal prediction framework of Goddard et al. 2013 [Goddard2013]_.
+proposed by the decadal prediction framework of Goddard2013_.
 This means that the ``initialized`` ensemble is resampled with replacement along a
 dimension (``init`` or ``member``) and then that resampled ensemble is verified against
 the observations. This leads to a distribution of ``initialized`` skill. Further, a

From 36f834707defd8544cccdd98668f0dd98fa4bdf9 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sat, 11 Dec 2021 18:24:17 +0100
Subject: [PATCH 32/56] try fix

---
 .pre-commit-config.yaml                              | 1 +
 docs/source/api/climpred.comparisons.Comparison.rst  | 4 ++--
 docs/source/api/climpred.comparisons._e2c.rst        | 4 ++--
 docs/source/api/climpred.comparisons._e2o.rst        | 4 ++--
 docs/source/api/climpred.comparisons._m2c.rst        | 4 ++--
 docs/source/api/climpred.comparisons._m2e.rst        | 4 ++--
 docs/source/api/climpred.comparisons._m2m.rst        | 4 ++--
 docs/source/api/climpred.comparisons._m2o.rst        | 4 ++--
 docs/source/api/climpred.metrics.Metric.__init__.rst | 2 +-
 docs/source/api/climpred.metrics.Metric.__repr__.rst | 2 +-
 docs/source/api/climpred.metrics.Metric.rst          | 4 ++--
 11 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5bcbf7a62..ef08c7bdc 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -56,6 +56,7 @@ repos:
       rev: 0.10.1
       hooks:
       -   id: doc8
+          #args: ["--ignore-path docs/source/api/climpred*.rst ."]
           #args: ["--ignore D000"]
   #        args: ["--ignore-path climpred/tests", "--max-line-length 93"]
       #- --max-line-length 93
diff --git a/docs/source/api/climpred.comparisons.Comparison.rst b/docs/source/api/climpred.comparisons.Comparison.rst
index ee964dac3..34048782f 100644
--- a/docs/source/api/climpred.comparisons.Comparison.rst
+++ b/docs/source/api/climpred.comparisons.Comparison.rst
@@ -1,5 +1,5 @@
-climpred.comparisons.Comparison
-===============================
+﻿climpred.comparisons.Comparison
+================================
 
 .. currentmodule:: climpred.comparisons
 
diff --git a/docs/source/api/climpred.comparisons._e2c.rst b/docs/source/api/climpred.comparisons._e2c.rst
index 7b301ad18..2e0d359d1 100644
--- a/docs/source/api/climpred.comparisons._e2c.rst
+++ b/docs/source/api/climpred.comparisons._e2c.rst
@@ -1,5 +1,5 @@
-climpred.comparisons.\_e2c
-==========================
+﻿climpred.comparisons.\_e2c
+===========================
 
 .. currentmodule:: climpred.comparisons
 
diff --git a/docs/source/api/climpred.comparisons._e2o.rst b/docs/source/api/climpred.comparisons._e2o.rst
index 80e16c684..d163afaa5 100644
--- a/docs/source/api/climpred.comparisons._e2o.rst
+++ b/docs/source/api/climpred.comparisons._e2o.rst
@@ -1,5 +1,5 @@
-climpred.comparisons.\_e2o
-==========================
+﻿climpred.comparisons.\_e2o
+===========================
 
 .. currentmodule:: climpred.comparisons
 
diff --git a/docs/source/api/climpred.comparisons._m2c.rst b/docs/source/api/climpred.comparisons._m2c.rst
index 9a6babf9d..fbbacc52a 100644
--- a/docs/source/api/climpred.comparisons._m2c.rst
+++ b/docs/source/api/climpred.comparisons._m2c.rst
@@ -1,5 +1,5 @@
-climpred.comparisons.\_m2c
-==========================
+﻿climpred.comparisons.\_m2c
+===========================
 
 .. currentmodule:: climpred.comparisons
 
diff --git a/docs/source/api/climpred.comparisons._m2e.rst b/docs/source/api/climpred.comparisons._m2e.rst
index 198dae63e..503218f49 100644
--- a/docs/source/api/climpred.comparisons._m2e.rst
+++ b/docs/source/api/climpred.comparisons._m2e.rst
@@ -1,5 +1,5 @@
-climpred.comparisons.\_m2e
-==========================
+﻿climpred.comparisons.\_m2e
+===========================
 
 .. currentmodule:: climpred.comparisons
 
diff --git a/docs/source/api/climpred.comparisons._m2m.rst b/docs/source/api/climpred.comparisons._m2m.rst
index dd25972a1..1dfd1d119 100644
--- a/docs/source/api/climpred.comparisons._m2m.rst
+++ b/docs/source/api/climpred.comparisons._m2m.rst
@@ -1,5 +1,5 @@
-climpred.comparisons.\_m2m
-==========================
+﻿climpred.comparisons.\_m2m
+===========================
 
 .. currentmodule:: climpred.comparisons
 
diff --git a/docs/source/api/climpred.comparisons._m2o.rst b/docs/source/api/climpred.comparisons._m2o.rst
index 9782d344c..ff8b81078 100644
--- a/docs/source/api/climpred.comparisons._m2o.rst
+++ b/docs/source/api/climpred.comparisons._m2o.rst
@@ -1,5 +1,5 @@
-climpred.comparisons.\_m2o
-==========================
+﻿climpred.comparisons.\_m2o
+===========================
 
 .. currentmodule:: climpred.comparisons
 
diff --git a/docs/source/api/climpred.metrics.Metric.__init__.rst b/docs/source/api/climpred.metrics.Metric.__init__.rst
index 938742d94..32a11e50d 100644
--- a/docs/source/api/climpred.metrics.Metric.__init__.rst
+++ b/docs/source/api/climpred.metrics.Metric.__init__.rst
@@ -1,5 +1,5 @@
 ﻿climpred.metrics.Metric.\_\_init\_\_
-====================================
+=====================================
 
 .. currentmodule:: climpred.metrics
 
diff --git a/docs/source/api/climpred.metrics.Metric.__repr__.rst b/docs/source/api/climpred.metrics.Metric.__repr__.rst
index b3265a0c1..9aec7d89a 100644
--- a/docs/source/api/climpred.metrics.Metric.__repr__.rst
+++ b/docs/source/api/climpred.metrics.Metric.__repr__.rst
@@ -1,5 +1,5 @@
 ﻿climpred.metrics.Metric.\_\_repr\_\_
-====================================
+=====================================
 
 .. currentmodule:: climpred.metrics
 
diff --git a/docs/source/api/climpred.metrics.Metric.rst b/docs/source/api/climpred.metrics.Metric.rst
index 44f78536f..1d2ddaaa1 100644
--- a/docs/source/api/climpred.metrics.Metric.rst
+++ b/docs/source/api/climpred.metrics.Metric.rst
@@ -1,5 +1,5 @@
-climpred.metrics.Metric
-=======================
+﻿climpred.metrics.Metric
+========================
 
 .. currentmodule:: climpred.metrics
 

From 3bf568012fd0c83dfdf791c6807ede935c062d0a Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sat, 11 Dec 2021 18:28:16 +0100
Subject: [PATCH 33/56] fix __repr__ doc8 error

---
 .pre-commit-config.yaml                                      | 5 -----
 docs/source/api/climpred.comparisons.Comparison.__init__.rst | 2 +-
 docs/source/api/climpred.comparisons.Comparison.__repr__.rst | 2 +-
 3 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ef08c7bdc..110aba966 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -57,11 +57,6 @@ repos:
       hooks:
       -   id: doc8
           #args: ["--ignore-path docs/source/api/climpred*.rst ."]
-          #args: ["--ignore D000"]
-  #        args: ["--ignore-path climpred/tests", "--max-line-length 93"]
-      #- --max-line-length 93
-      #- --ignore-path climpred/tests
-
 
   - repo: https://github.com/keewis/blackdoc
     rev: v0.3.4
diff --git a/docs/source/api/climpred.comparisons.Comparison.__init__.rst b/docs/source/api/climpred.comparisons.Comparison.__init__.rst
index a208c7221..09c3bc921 100644
--- a/docs/source/api/climpred.comparisons.Comparison.__init__.rst
+++ b/docs/source/api/climpred.comparisons.Comparison.__init__.rst
@@ -1,5 +1,5 @@
 ﻿climpred.comparisons.Comparison.\_\_init\_\_
-============================================
+=============================================
 
 .. currentmodule:: climpred.comparisons
 
diff --git a/docs/source/api/climpred.comparisons.Comparison.__repr__.rst b/docs/source/api/climpred.comparisons.Comparison.__repr__.rst
index 123746e3f..45c454fad 100644
--- a/docs/source/api/climpred.comparisons.Comparison.__repr__.rst
+++ b/docs/source/api/climpred.comparisons.Comparison.__repr__.rst
@@ -1,5 +1,5 @@
 ﻿climpred.comparisons.Comparison.\_\_repr\_\_
-============================================
+=============================================
 
 .. currentmodule:: climpred.comparisons
 

From 0d1842e7114b13ab6e4095fb3dd586f775b9c48a Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sat, 11 Dec 2021 18:35:42 +0100
Subject: [PATCH 34/56] small fix

---
 climpred/classes.py | 25 ++++++++++++-----------
 climpred/metrics.py | 50 +++++++++++++++++++++------------------------
 2 files changed, 36 insertions(+), 39 deletions(-)

diff --git a/climpred/classes.py b/climpred/classes.py
index dca69dd86..7d103cd44 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -202,11 +202,12 @@ class PredictionEnsemble:
     :py:class:`~climpred.classes.HindcastEnsemble`. This cannot be called directly by
     a user, but should house functions that both ensemble types can use.
 
-    Associated xarray.Dataset are stored in:
-    * ``PredictionEnsemble._datasets["initialized"]``
-    * ``PredictionEnsemble._datasets["uninitialized"]``
-    * ``PredictionEnsemble._datasets["control"]`` in `:py:class:`~climpred.classes.PerfectModelEnsemble`
-    * ``PredictionEnsemble._datasets[observations"]`` in :py:class:`~climpred.classes.HindcastEnsemble`
+    Associated :py:class:`~xarray.Dataset` are stored in:
+
+        * ``PredictionEnsemble._datasets["initialized"]``
+        * ``PredictionEnsemble._datasets["uninitialized"]``
+        * ``PredictionEnsemble._datasets["control"]`` in :py:class:`~climpred.classes.PerfectModelEnsemble`
+        * ``PredictionEnsemble._datasets[observations"]`` in :py:class:`~climpred.classes.HindcastEnsemble`
 
     """
 
@@ -761,11 +762,11 @@ def _apply_func(
         return self._construct_direct(datasets, kind=self.kind)
 
     def get_initialized(self) -> xr.Dataset:
-        """Return the xarray.Dataset for the initialized ensemble."""
+        """Return the :py:class:`~xarray.Dataset` for the initialized ensemble."""
         return self._datasets["initialized"]
 
     def get_uninitialized(self) -> xr.Dataset:
-        """Return the xarray.Dataset for the uninitialized ensemble."""
+        """Return the :py:class:`~xarray.Dataset` for the uninitialized ensemble."""
         return self._datasets["uninitialized"]
 
     def smooth(
@@ -1002,7 +1003,7 @@ class PerfectModelEnsemble(PredictionEnsemble):
     bootstrapping, etc.
 
     This object is built on ``xarray`` and thus requires the input object to
-    be an xarray.Dataset or xr.DataArray.
+    be an xarray.Dataset or :py:class:`~xarray.DataArray`.
     """
 
     def __init__(self, initialized: Union[xr.DataArray, xr.Dataset]) -> None:
@@ -1118,7 +1119,7 @@ def generate_uninitialized(self) -> "PerfectModelEnsemble":
         return self._construct_direct(datasets, kind="perfect")
 
     def get_control(self) -> xr.Dataset:
-        """Return the control as an xarray.Dataset."""
+        """Return the control as an :py:class:`~xarray.Dataset`."""
         return self._datasets["control"]
 
     def verify(
@@ -1633,8 +1634,8 @@ class HindcastEnsemble(PredictionEnsemble):
     verification dataset (i.e., observations) associated with the hindcast ensemble
     for easy computation across multiple variables.
 
-    This object is built on xarray.Dataset and thus requires the input object to
-    be an xarray.Dataset or xr.DataArray.
+    This object is built on :py:class:`~xarray.Dataset` and thus requires the input object to
+    be an xarray.Dataset or :py:class:`~xarray.DataArray`.
     """
 
     def __init__(self, initialized: Union[xr.DataArray, xr.Dataset]) -> None:
@@ -1758,7 +1759,7 @@ def add_uninitialized(
         return self._construct_direct(datasets, kind="hindcast")
 
     def get_observations(self) -> xr.Dataset:
-        """Return xarray.Dataset of the observations/verification data.
+        """Return :py:class:`~xarray.Dataset` of the observations/verification data.
 
         Returns:
             observations
diff --git a/climpred/metrics.py b/climpred/metrics.py
index 277038c3c..4a6bfbcd1 100644
--- a/climpred/metrics.py
+++ b/climpred/metrics.py
@@ -197,32 +197,6 @@ def _maybe_member_mean_reduce_dim(
     return forecast, dim
 
 
-def _display_metric_metadata(self: "Metric") -> str:
-    summary = "----- Metric metadata -----\n"
-    summary += f"Name: {self.name}\n"
-    summary += f"Alias: {self.aliases}\n"
-    # positively oriented
-    if self.positive:
-        summary += "Orientation: positive\n"
-    else:
-        summary += "Orientation: negative\n"
-    # probabilistic or deterministic
-    if self.probabilistic:
-        summary += "Kind: probabilistic\n"
-    else:
-        summary += "Kind: deterministic\n"
-    summary += f"Power to units: {self.unit_power}\n"
-    summary += f"long_name: {self.long_name}\n"
-    summary += f"Minimum skill: {self.minimum}\n"
-    summary += f"Maximum skill: {self.maximum}\n"
-    summary += f"Perfect skill: {self.perfect}\n"
-    summary += f"Normalize: {self.normalize}\n"
-    summary += f"Allows logical: {self.allows_logical}\n"
-    # doc
-    summary += f"Function: {self.function.__doc__}\n"
-    return summary
-
-
 class Metric:
     """Master class for all metrics."""
 
@@ -288,7 +262,29 @@ def __init__(
 
     def __repr__(self) -> str:
         """Show metadata of metric class."""
-        return _display_metric_metadata(self)
+        summary = "----- Metric metadata -----\n"
+        summary += f"Name: {self.name}\n"
+        summary += f"Alias: {self.aliases}\n"
+        # positively oriented
+        if self.positive:
+            summary += "Orientation: positive\n"
+        else:
+            summary += "Orientation: negative\n"
+        # probabilistic or deterministic
+        if self.probabilistic:
+            summary += "Kind: probabilistic\n"
+        else:
+            summary += "Kind: deterministic\n"
+        summary += f"Power to units: {self.unit_power}\n"
+        summary += f"long_name: {self.long_name}\n"
+        summary += f"Minimum skill: {self.minimum}\n"
+        summary += f"Maximum skill: {self.maximum}\n"
+        summary += f"Perfect skill: {self.perfect}\n"
+        summary += f"Normalize: {self.normalize}\n"
+        summary += f"Allows logical: {self.allows_logical}\n"
+        # doc
+        summary += f"Function: {self.function.__doc__}\n"
+        return summary
 
 
 #####################

From 8fa69e146155a5fd6117eb528673b43f21351dd0 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sat, 11 Dec 2021 18:40:28 +0100
Subject: [PATCH 35/56] reference

---
 climpred/smoothing.py | 50 +++++++++++++++++++++----------------------
 climpred/stats.py     |  4 ++--
 climpred/tutorial.py  |  2 +-
 3 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/climpred/smoothing.py b/climpred/smoothing.py
index efe35db57..7bcccc6c5 100644
--- a/climpred/smoothing.py
+++ b/climpred/smoothing.py
@@ -29,7 +29,7 @@ def spatial_smoothing_xesmf(
         ds: Contain input and output grid coordinates.
             Look for coordinates ``lon``, ``lat``, and optionally ``lon_b``,
             ``lat_b`` for conservative method. Also any coordinate which is C/F
-            compliant, .i.e. standard_name in ['longitude', 'latitude'] is allowed.
+            compliant, .i.e. standard_name in ["longitude", "latitude"] is allowed.
             Shape can be 1D (Nlon,) and (Nlat,) for rectilinear grids,
             or 2D (Ny, Nx) for general curvilinear grids.
             Shape of bounds should be (N+1,) or (Ny+1, Nx+1).
@@ -37,11 +37,11 @@ def spatial_smoothing_xesmf(
             lon will equal 5 and lat will equal lon
         method: Regridding method. Options are:
 
-            - 'bilinear'
-            - 'conservative', **requires grid corner information**
-            - 'patch'
-            - 'nearest_s2d'
-            - 'nearest_d2s'
+            - "bilinear"
+            - "conservative", **requires grid corner information**
+            - "patch"
+            - "nearest_s2d"
+            - "nearest_d2s"
 
         periodic: Periodic in longitude? Defaults to ``False``.
             Only useful for global grids with non-conservative regridding.
@@ -146,20 +146,20 @@ def temporal_smoothing(
     Args:
         ds: input to be smoothed.
         tsmooth_kws: length of smoothing of timesteps.
-            Defaults to ``{'time': 4}`` (see Goddard et al. 2013).
+            Defaults to ``{"time": 4}`` (see Goddard et al. 2013).
         how: aggregation type for smoothing. Allowed: ``["mean", "sum"]``.
             Default: ``"mean"``.
         d_lon_lat_kws: leads nowhere but consistent with ``spatial_smoothing_xesmf``.
 
     Returns:
         input with ``smooth`` timesteps less and
-        labeling ``'1-(smooth-1)', '...', ...`` .
+        labeling ``"1-(smooth-1)", "...", ...`` .
 
-    Reference:
-        Goddard, L., A. Kumar, A. Solomon et al.
-        “A Verification Framework for Interannual to Decadal Predictions Experiments.”
-        Climate Dynamics 40, no. 1–2 (January 1, 2013): 245–72.
-        https://doi.org/10/f4jjvf.
+    References:
+        * Goddard, L., A. Kumar, A. Solomon et al.
+          “A Verification Framework for Interannual to Decadal Predictions Experiments.”
+          Climate Dynamics 40, no. 1–2 (January 1, 2013): 245–72.
+          https://doi.org/10/f4jjvf.
 
     """
     # unpack dict
@@ -205,8 +205,8 @@ def _reset_temporal_axis(
     Args:
         ds_smoothed: Smoothed dataset.
         tsmooth_kws: Keywords smoothing is performed over.
-        dim: Dimension smoothing is performed over. Defaults to ``'lead'``.
-        set_center: Whether to set new coord `{dim}_center`. Defaults to `True`.
+        dim: Dimension smoothing is performed over. Defaults to ``"lead"``.
+        set_center: Whether to set new coord `{dim}_center`. Defaults to ``True``.
 
     Returns
         Smoothed Dataset with updated labels for smoothed temporal dimension.
@@ -251,21 +251,21 @@ def smooth_goddard_2013(
         ds: input to be smoothed.
         tsmooth_kws: length of smoothing of timesteps (applies to ``lead``
             in forecast and ``time`` in verification data).
-            Default: ``{'time': 4}`` (see Goddard et al. 2013).
+            Default: ``{"time": 4}`` (see Goddard et al. 2013).
         d_lon_lat_kws: target grid for regridding.
-            Default: ``{'lon':5 , 'lat': 5}``.
+            Default: ``{"lon":5 , "lat": 5}``.
         how: aggregation type for smoothing. Allowed: ``["mean", "sum"]``.
-            Default: ``'mean'``.
-        **xesmf_kwargs: kwargs passed to `spatial_smoothing_xesmf`.
+            Default: ``"mean"``.
+        **xesmf_kwargs: kwargs passed to ``spatial_smoothing_xesmf``.
 
     Returns:
-        input with `smooth` timesteps less and labeling '1-(smooth-1)', '...' .
+        input with `smooth` timesteps less and labeling "1-(smooth-1)", "..." .
 
-    Reference:
-        Goddard, L., A. Kumar, A. Solomon et al.
-        “A Verification Framework for Interannual to Decadal Predictions Experiments.”
-        Climate Dynamics 40, no. 1–2 (January 1, 2013): 245–72.
-        https://doi.org/10/f4jjvf.
+    References:
+        * Goddard, L., A. Kumar, A. Solomon et al.
+          “A Verification Framework for Interannual to Decadal Predictions Experiments.”
+          Climate Dynamics 40, no. 1–2 (January 1, 2013): 245–72.
+          https://doi.org/10/f4jjvf.
     """
     # first temporal smoothing
     ds_smoothed = temporal_smoothing(ds, tsmooth_kws=tsmooth_kws)
diff --git a/climpred/stats.py b/climpred/stats.py
index b127601f0..be4ceb734 100644
--- a/climpred/stats.py
+++ b/climpred/stats.py
@@ -106,7 +106,7 @@ def dpp(
             If False, then uses Resplandy 2015 / Seferian 2018 method.
 
     Returns:
-        dpp: ds without time dimension.
+        ds without time dimension.
 
     References:
         * Boer, G. J. “Long Time-Scale Potential Predictability in an Ensemble of
@@ -141,7 +141,7 @@ def _chunking(
             number_chunks: number of chunks in the return data.
 
         Returns:
-            c: chunked ds, but with additional dimension c.
+            chunked ds but with additional dimension c.
 
         """
         if number_chunks and not chunk_length:
diff --git a/climpred/tutorial.py b/climpred/tutorial.py
index 59636711f..ecca17824 100644
--- a/climpred/tutorial.py
+++ b/climpred/tutorial.py
@@ -146,7 +146,7 @@ def load_dataset(
         kws: Keywords passed to :py:meth:`~xarray.open_dataset`.
 
     Returns:
-        The desired xarray.Dataset.
+        The desired :py:class:`~xarray.Dataset`
 
     Examples:
         >>> from climpred.tutorial import load_dataset

From c23b309649d90a983b231f2ba2cd142313f59b13 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sat, 11 Dec 2021 23:57:34 +0100
Subject: [PATCH 36/56] fix

---
 climpred/classes.py         | 123 ++++++++++++++++++------------------
 climpred/comparisons.py     |   4 +-
 climpred/metrics.py         |   7 +-
 climpred/options.py         |   3 +-
 docs/source/api.rst         |   8 +--
 docs/source/metrics.rst     |   1 +
 docs/source/terminology.rst |  18 +++---
 7 files changed, 83 insertions(+), 81 deletions(-)

diff --git a/climpred/classes.py b/climpred/classes.py
index 7d103cd44..1062bbec9 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -206,8 +206,10 @@ class PredictionEnsemble:
 
         * ``PredictionEnsemble._datasets["initialized"]``
         * ``PredictionEnsemble._datasets["uninitialized"]``
-        * ``PredictionEnsemble._datasets["control"]`` in :py:class:`~climpred.classes.PerfectModelEnsemble`
-        * ``PredictionEnsemble._datasets[observations"]`` in :py:class:`~climpred.classes.HindcastEnsemble`
+        * ``PredictionEnsemble._datasets["control"]`` in
+          :py:class:`~climpred.classes.PerfectModelEnsemble`
+        * ``PredictionEnsemble._datasets[observations"]`` in
+          :py:class:`~climpred.classes.HindcastEnsemble`
 
     """
 
@@ -268,7 +270,7 @@ def _groupby(self, call: str, groupby: Union[str, xr.DataArray], **kwargs: Any):
     def coords(self) -> DatasetCoordinates:
         """Return coordinates of :py:class:`~climpred.classes.PredictionEnsemble`.
 
-        Dictionary of xarray.DataArray objects corresponding to coordinate
+        Dictionary of :py:class:`~xarray.DataArray` objects corresponding to coordinate
         variables available in all PredictionEnsemble._datasets.
 
         See also:
@@ -347,8 +349,8 @@ def chunksizes(self) -> Mapping[Hashable, Tuple[int, ...]]:
 
         Mapping from dimension names to block lengths for this dataset's data, or
         None if the underlying data is not a dask array.
-        Cannot be modified directly, but can be modified by calling .chunk().
-        Same as Dataset.chunks.
+        Cannot be modified directly, but can be modified by calling ``.chunk()``.
+        Same as :py:meth:`~xarray.Dataset.chunks`.
 
         See also:
             :py:meth:`~xarray.Dataset.chunksizes`
@@ -480,16 +482,16 @@ def plot(
             cmap: Name of matplotlib-recognized colorbar. Defaults to `viridis`
                 for :py:class:`~climpred.classes.HindcastEnsemble`
                 and ``tab10`` for :py:class:`~climpred.classes.PerfectModelEnsemble`.
-            x: Name of x-axis. Use ``'time'`` to show observations and
-                hindcasts in real time. Use ``'init'`` to see hindcasts as
-                initializations. For ``x='init'`` only initialized is shown and only
+            x: Name of x-axis. Use ``time`` to show observations and
+                hindcasts in real time. Use ``init`` to see hindcasts as
+                initializations. For ``x=init`` only initialized is shown and only
                 works for :py:class:`~climpred.classes.HindcastEnsemble`.
 
         .. note::
             Alternatively inspect initialized datasets by
-            ``PredictionEnsemble.get_initialized()[v].plot.line(x='time')``
+            ``PredictionEnsemble.get_initialized()[v].plot.line(x=time)``
             to see ``validtime`` on x-axis or
-            ``PredictionEnsemble.get_initialized()[v].plot.line(x='init')``
+            ``PredictionEnsemble.get_initialized()[v].plot.line(x=init)``
             to see ``init`` on x-axis.
 
         Returns:
@@ -624,7 +626,7 @@ def __getitem__(self, varlist: Union[str, List[str]]) -> "PredictionEnsemble":
         """Allow subsetting variable(s) from :py:class:`~climpred.classes.PredictionEnsemble` as from xr.Dataset.
 
         Args:
-            * varlist: list of names or name of data variable(s) to subselect
+            varlist: list of names or name of data variable(s) to subselect
         """
         if isinstance(varlist, str):
             varlist = [varlist]
@@ -783,7 +785,7 @@ def smooth(
                 :py:func:`~climpred.smoothing.spatial_smoothing_xesmf` or
                 :py:func:`~climpred.smoothing.temporal_smoothing`.
                 Shortcut for Goddard et al. 2013 recommendations:
-                'goddard2013'. Defaults to None.
+                goddard2013. Defaults to ``None``.
             how: how to smooth temporally. From Choose from ``["mean", "sum"]``.
                 Defaults to ``"mean"``.
             **xesmf_kwargs: kwargs passed to
@@ -998,12 +1000,13 @@ def _warn_if_chunked_along_init_member_time(self) -> None:
 class PerfectModelEnsemble(PredictionEnsemble):
     """An object for "perfect model" prediction ensembles.
 
-    :py:class:`~climpred.classes.PerfectModelEnsemble` is a sub-class of :py:class:`~climpred.classes.PredictionEnsemble`. It tracks
+    :py:class:`~climpred.classes.PerfectModelEnsemble` is a sub-class of
+    :py:class:`~climpred.classes.PredictionEnsemble`. It tracks
     the control run used to initialize the ensemble for easy computations,
     bootstrapping, etc.
 
     This object is built on ``xarray`` and thus requires the input object to
-    be an xarray.Dataset or :py:class:`~xarray.DataArray`.
+    be an :py:class:`~xarray.Dataset` or :py:class:`~xarray.DataArray`.
     """
 
     def __init__(self, initialized: Union[xr.DataArray, xr.Dataset]) -> None:
@@ -1050,18 +1053,18 @@ def _vars_to_drop(self, init: bool = True) -> Tuple[List[str], List[str]]:
         """Return list of variables to drop when comparing datasets.
 
         This is useful if the two products being compared do not share the same
-        variables. I.e., if the control has ['SST'] and the initialized has
-        ['SST', 'SALT'], this will return a list with ['SALT'] to be dropped
+        variables. I.e., if the control has ["SST"] and the initialized has
+        ["SST", "SALT"], this will return a list with ["SALT"] to be dropped
         from the initialized.
 
         Args:
-          init (bool, default True):
-            If `True`, check variables on the initialized.
-            If `False`, check variables on the uninitialized.
+            init:
+                If ``True``, check variables on the initialized.
+                If ``False``, check variables on the uninitialized.
 
         Returns:
-          Lists of variables to drop from the initialized/uninitialized
-          and control Datasets.
+            Lists of variables to drop from the initialized/uninitialized
+            and control Datasets.
         """
         init_str = "initialized" if init else "uninitialized"
         init_vars = list(self._datasets[init_str])
@@ -1495,31 +1498,30 @@ def bootstrap(
                 replacement. Recommended >= 500.
             resample_dim: dimension to resample from. Defaults to `"member"``.
 
-                - 'member': select a different set of members from hind
-                - 'init': select a different set of initializations from hind
+                - "member": select a different set of members from forecast
+                - "init': select a different set of initializations from forecast
 
             sig: Significance level in percent for deciding whether
                 uninitialized and persistence beat initialized skill.
             pers_sig: If not ``None``, the separate significance level for
                 persistence. Defaults to ``None``, or the same significance as ``sig``.
-            groupby: group ``init`` before passing ``initialized``
-                to ``bootstrap``.
+            groupby: group ``init`` before passing ``initialized`` to ``bootstrap``.
             **metric_kwargs: arguments passed to ``metric``.
 
         Returns:
-            xr.Dataset with dimensions ``results`` (holding ``verify skill``, ``p``,
-            ``low_ci`` and ``high_ci``) and ``skill`` (holding ``initialized``,
-            ``persistence`` and/or ``uninitialized``):
-                * results='verify skill', skill='initialized':
+            :py:class:`~xarray.Dataset` with dimensions ``results`` (holding
+            ``verify skill``, ``p``, ``low_ci`` and ``high_ci``) and ``skill``
+            (holding ``initialized``, ``persistence`` and/or ``uninitialized``):
+                * results="verify skill", skill="initialized":
                     mean initialized skill
-                * results='high_ci', skill='initialized':
+                * results="high_ci", skill="initialized":
                     high confidence interval boundary for initialized skill
-                * results='p', skill='uninitialized':
+                * results="p", skill="uninitialized":
                     p value of the hypothesis that the
                     difference of skill between the initialized and
                     uninitialized simulations is smaller or equal to zero
                     based on bootstrapping with replacement.
-                * results='p', skill='persistence':
+                * results="p", skill="persistence":
                     p value of the hypothesis that the
                     difference of skill between the initialized and persistenceistence
                     simulations is smaller or equal to zero based on
@@ -1630,16 +1632,17 @@ def bootstrap(
 class HindcastEnsemble(PredictionEnsemble):
     """An object for initialized prediction ensembles.
 
-    :py:class:`~climpred.classes.HindcastEnsemble` is a sub-class of :py:class:`~climpred.classes.PredictionEnsemble`. It tracks a
+    :py:class:`~climpred.classes.HindcastEnsemble` is a sub-class of
+    :py:class:`~climpred.classes.PredictionEnsemble`. It tracks a
     verification dataset (i.e., observations) associated with the hindcast ensemble
     for easy computation across multiple variables.
 
-    This object is built on :py:class:`~xarray.Dataset` and thus requires the input object to
-    be an xarray.Dataset or :py:class:`~xarray.DataArray`.
+    This object is built on :py:class:`~xarray.Dataset` and thus requires the input
+    object to be an :py:class:`~xarray.Dataset` or :py:class:`~xarray.DataArray`.
     """
 
     def __init__(self, initialized: Union[xr.DataArray, xr.Dataset]) -> None:
-        """Create :py:class:`~climpred.classes.HindcastEnsemble` from initialized prediction ensemble output.
+        """Create ``HindcastEnsemble`` from initialized prediction ensemble output.
 
         Args:
           initialized: initialized prediction ensemble output.
@@ -1674,8 +1677,8 @@ def _vars_to_drop(self, init: bool = True) -> Tuple[List[str], List[str]]:
         When comparing initialized/uninitialized to observations.
 
         This is useful if the two products being compared do not share the same
-        variables. I.e., if the observations have ['SST'] and the initialized has
-        ['SST', 'SALT'], this will return a list with ['SALT'] to be dropped
+        variables. I.e., if the observations have ["SST"] and the initialized has
+        ["SST", "SALT"], this will return a list with ["SALT"] to be dropped
         from the initialized.
 
         Args:
@@ -1837,13 +1840,13 @@ def plot_alignment(
                 One or more of ``["uninitialized", "persistence", "climatology"]``.
                 Defaults to ``None`` meaning no reference.
             date2num_units: passed to ``cftime.date2num`` as units
-            return_xr: if ``True`` return xarray.DataArray else plot
+            return_xr: if ``True`` return :py:class:`~xarray.DataArray` else plot
             cmap: color palette
             edgecolors: color of the edges in the plot
             **plot_kwargs: arguments passed to ``plot``.
 
-        Return:
-            xarray.DataArray if return_xr else plot
+        Returns:
+            :py:class:`~xarray.DataArray` if ``return_xr`` else plot
 
         Example:
             >>> HindcastEnsemble.plot_alignment(alignment=None, return_xr=True)
@@ -2003,7 +2006,7 @@ def verify(
             the verification (``"e2o"``) over the same initializations
             (``"same_inits"``) for all leads reducing dimension ``init`` while also
             calculating reference skill for the ``"persistence"``, ``"climatology"``
-            and ``'uninitialized'`` forecast.
+            and ``"uninitialized"`` forecast.
 
             >>> HindcastEnsemble.verify(
             ...     metric="rmse",
@@ -2213,7 +2216,7 @@ def bootstrap(
             dim: Dimension(s) to apply metric over. ``dim`` is passed
                 on to xskillscore.{metric} and includes xskillscore's ``member_dim``.
                 ``dim`` should contain ``member`` when ``comparison`` is probabilistic
-                but should not contain ``member`` when ``comparison='e2o'``. Defaults to
+                but should not contain ``member`` when ``comparison="e2o"``. Defaults to
                 ``None`` meaning that all dimensions other than ``lead`` are reduced.
             reference: Type of reference forecasts with which to verify against.
                 One or more of ``["uninitialized", "persistence", "climatology"]``.
@@ -2234,29 +2237,29 @@ def bootstrap(
                 replacement. Recommended >= 500.
             sig: Significance level in percent for deciding whether
                 uninitialized and persistence beat initialized skill.
-            resample_dim (str or list): dimension to resample from. default: 'member'.
+            resample_dim: dimension to resample from. Default: ``"member"``.
 
-                - 'member': select a different set of members from hind
-                - 'init': select a different set of initializations from hind
+                - ``"member"``: select a different set of members from hind
+                - ``"init"``: select a different set of initializations from hind
 
-            pers_sig: If not None, the separate significance level for persistence.
+            pers_sig: If not ``None``, the separate significance level for persistence.
             groupby: group ``init`` before passing ``initialized`` to ``bootstrap``.
             **metric_kwargs: arguments passed to ``metric``.
 
         Returns:
-            xr.Dataset with dimensions ``results`` (holding ``skill``, ``p``,
-            ``low_ci`` and ``high_ci``) and ``skill`` (holding ``initialized``,
+            :py:class:`~xarray.Dataset` with dimensions ``results`` (holding ``skill``,
+            ``p``, ``low_ci`` and ``high_ci``) and ``skill`` (holding ``initialized``,
             ``persistence`` and/or ``uninitialized``):
-                * results='verify skill', skill='initialized':
+                * results="verify skill", skill="initialized":
                     mean initialized skill
-                * results='high_ci', skill='initialized':
+                * results="high_ci", skill="initialized":
                     high confidence interval boundary for initialized skill
-                * results='p', skill='uninitialized':
+                * results="p", skill="uninitialized":
                     p value of the hypothesis that the
                     difference of skill between the initialized and
                     uninitialized simulations is smaller or equal to zero
                     based on bootstrapping with replacement.
-                * results='p', skill='persistence':
+                * results="p", skill="persistence":
                     p value of the hypothesis that the
                     difference of skill between the initialized and persistence
                     simulations is smaller or equal to zero based on
@@ -2397,7 +2400,7 @@ def remove_bias(
                   each lead should be based on the same set of verification dates.
 
             how: what kind of bias removal to perform.
-                Defaults to 'additive_mean'. Select from:
+                Defaults to ``"additive_mean"``. Select from:
 
                 - ``"additive_mean"``: correcting the mean forecast additively
                 - ``"multiplicative_mean"``: correcting the mean forecast
@@ -2425,16 +2428,16 @@ def remove_bias(
                     (climpred default).
                 - ``"unfair-cv"```: overlapping `train` and `test` except for current
                     `init`, which is `left out <https://en.wikipedia.org/wiki/Cross-validation_(statistics)#Leave-one-out_cross-validation>`_
-                    (set `cv='LOO'`).
+                    (set `cv="LOO"`).
 
             train_init: Define initializations for training
-                when ``alignment='same_inits/maximize'``.
+                when ``alignment="same_inits/maximize"``.
             train_time: Define time for training
-                when ``alignment='same_verif'``.
-            cv: Only relevant when `train_test_split='unfair-cv'`.
-                Defaults to False.
+                when ``alignment="same_verif"``.
+            cv: Only relevant when `train_test_split="unfair-cv"`.
+                Defaults to ``False``.
 
-                - True/'LOO': Calculate bias by `leaving given initialization out <https://en.wikipedia.org/wiki/Cross-validation_(statistics)#Leave-one-out_cross-validation>`_
+                - True/"LOO": Calculate bias by `leaving given initialization out <https://en.wikipedia.org/wiki/Cross-validation_(statistics)#Leave-one-out_cross-validation>`_
                 - False: include all initializations in the calculation of bias, which
                     is much faster and but yields similar skill with a large N of
                     initializations.
diff --git a/climpred/comparisons.py b/climpred/comparisons.py
index 76c8a79c3..50c1515e6 100644
--- a/climpred/comparisons.py
+++ b/climpred/comparisons.py
@@ -12,7 +12,7 @@
 
 
 class Comparison:
-    """Master class for all comparisons."""
+    """Master class for all comparisons. See :ref:`comparisons`."""
 
     def __init__(
         self,
@@ -23,7 +23,7 @@ def __init__(
         long_name: Optional[str] = None,
         aliases: Optional[List[str]] = None,
     ) -> None:
-        """Comparison initialization.
+        """Comparison initialization See :ref:`comparisons`.
 
         Args:
             name: name of comparison.
diff --git a/climpred/metrics.py b/climpred/metrics.py
index 4a6bfbcd1..1ee56a74e 100644
--- a/climpred/metrics.py
+++ b/climpred/metrics.py
@@ -198,7 +198,7 @@ def _maybe_member_mean_reduce_dim(
 
 
 class Metric:
-    """Master class for all metrics."""
+    """Master class for all metrics. See :ref:`metrics`."""
 
     def __init__(
         self,
@@ -216,7 +216,7 @@ def __init__(
         allows_logical: bool = False,
         requires_member_dim: bool = False,
     ):
-        """Metric initialization.
+        """Metric initialization. See :ref:`metrics`.
 
         Args:
             name: name of metric.
@@ -242,9 +242,6 @@ def __init__(
                 Defaults to ``False``.
             requires_member_dim: Does xskillscore.metric expect a member dimension?
 
-        Returns:
-            Metric: metric class Metric.
-
         """
         self.name = name
         self.function = function
diff --git a/climpred/options.py b/climpred/options.py
index dac81bf40..6fd3f2ead 100644
--- a/climpred/options.py
+++ b/climpred/options.py
@@ -54,7 +54,8 @@ class set_options:
     ``climpred_warnings`` : {``True``, ``False``}, default ``True``
         Overwrites all options containing ``"*warn*"``.
 
-    Examples:
+    Examples
+    --------
         You can use ``set_options`` either as a context manager:
 
         >>> kw = dict(
diff --git a/docs/source/api.rst b/docs/source/api.rst
index 215d76e9d..873ade31e 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -74,10 +74,10 @@ Properties
 HindcastEnsemble
 ~~~~~~~~~~~~~~~~
 
-A :py:class:`~climpred.classes.HindcastEnsemble` is a prediction ensemble that is initialized off of some form of
-observations (an assimilation, renanalysis, etc.). Thus, it is anticipated that
-forecasts are verified against observation-like products. Read more about the
-terminology `here <terminology.html>`_.
+A :py:class:`~climpred.classes.HindcastEnsemble` is a prediction ensemble that is
+initialized off of some form of observations (an assimilation, reanalysis, etc.). Thus,
+it is anticipated that forecasts are verified against observation-like products. Read
+more about the terminology `here <terminology.html>`_.
 
 .. autosummary::
     :toctree: api/
diff --git a/docs/source/metrics.rst b/docs/source/metrics.rst
index 082efcc0a..de8da1ac5 100644
--- a/docs/source/metrics.rst
+++ b/docs/source/metrics.rst
@@ -1,6 +1,7 @@
 .. currentmodule:: climpred.metrics
 
 .. ipython:: python
+    :supress:
 
     from climpred.metrics import __ALL_METRICS__ as all_metrics
 
diff --git a/docs/source/terminology.rst b/docs/source/terminology.rst
index bcf44202c..1a1e96b20 100644
--- a/docs/source/terminology.rst
+++ b/docs/source/terminology.rst
@@ -9,15 +9,15 @@ how we use them in ``climpred``.
 Simulation Design
 #################
 
-*Hindcast Ensemble*: Ensemble members are initialized from a simulation
-(generally a reconstruction from reanalysis) or an analysis
-(representing the current state of the atmosphere, land, and ocean by assimilation of
-obsevations) at initialization dates and integrated for some lead years
-[Boer2016]_ (:py:class:`~climpred.classes.HindcastEnsemble`).
-
-*Perfect Model Experiment*: Ensemble members are initialized from a control
-simulation at randomly chosen initialization dates and integrated for some
-lead years [Griffies1997]_ (:py:class:`~climpred.classes.PerfectModelEnsemble`).
+*Hindcast Ensemble* (:py:class:`~climpred.classes.HindcastEnsemble`):
+Ensemble members are initialized from a simulation (generally a reconstruction from
+reanalysis) or an analysis (representing the current state of the atmosphere, land, and
+ocean by assimilation of observations) at initialization dates and integrated for some
+lead years [Boer2016]_.
+
+*Perfect Model Experiment* (:py:class:`~climpred.classes.PerfectModelEnsemble`):
+Ensemble members are initialized from a control simulation at randomly chosen
+initialization dates and integrated for some lead years [Griffies1997]_.
 
 *Reconstruction/Assimilation*: A "reconstruction" is a model solution that uses
 observations in some capacity to approximate historical or current conditions of the

From 160ecb376341a13476d49dc339538ecdc7dae6f5 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sun, 12 Dec 2021 00:05:27 +0100
Subject: [PATCH 37/56] fix

---
 climpred/comparisons.py      | 20 ++++++++++----------
 docs/source/why-climpred.rst | 13 +++++++------
 2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/climpred/comparisons.py b/climpred/comparisons.py
index 50c1515e6..c798bbb42 100644
--- a/climpred/comparisons.py
+++ b/climpred/comparisons.py
@@ -251,14 +251,14 @@ def _e2c(
 # HINDCAST COMPARISONS
 # --------------------------------------------#
 def _e2o(
-    hind: xr.Dataset, verif: xr.Dataset, metric: Metric
+    initialized: xr.Dataset, verif: xr.Dataset, metric: Optional[Metric]
 ) -> Tuple[xr.Dataset, xr.Dataset]:
     """Compare the ensemble mean forecast to the verification data.
 
     :ref:`comparisons` for :py:class:`~climpred.classes.HindcastEnsemble`
 
     Args:
-        hind: Hindcast with optional ``member`` dimension.
+        initialized: Hindcast with optional ``member`` dimension.
         verif: Verification data.
         metric: needed for probabilistic metrics. Therefore useless in ``e2o``
             comparison, but expected by internal API.
@@ -266,10 +266,10 @@ def _e2o(
     Returns:
         forecast, verification
     """
-    if "member" in hind.dims:
-        forecast = hind.mean("member")
+    if "member" in initialized.dims:
+        forecast = initialized.mean("member")
     else:
-        forecast = hind
+        forecast = initialized
     return forecast, verif
 
 
@@ -284,14 +284,14 @@ def _e2o(
 
 
 def _m2o(
-    hind: xr.Dataset, verif: xr.Dataset, metric: Metric
+    initialized: xr.Dataset, verif: xr.Dataset, metric: Metric
 ) -> Tuple[xr.Dataset, xr.Dataset]:
     """Compare each ensemble member individually to the verification data.
 
     :ref:`comparisons` for :py:class:`~climpred.classes.HindcastEnsemble`
 
     Args:
-        hind: ``initialized`` with ``member`` dimension.
+        initialized: ``initialized`` with ``member`` dimension.
         verif: Verification data.
         metric:
             If deterministic, forecast and verif both have ``member`` dim;
@@ -301,9 +301,9 @@ def _m2o(
         forecast, verification
     """
     # check that this contains more than one member
-    has_dims(hind, "member", "decadal prediction ensemble")
-    has_min_len(hind["member"], 1, "decadal prediction ensemble member")
-    forecast = hind
+    has_dims(initialized, "member", "decadal prediction ensemble")
+    has_min_len(initialized["member"], 1, "decadal prediction ensemble member")
+    forecast = initialized
     if not metric.probabilistic and "member" not in verif.dims:
         forecast, verif = xr.broadcast(
             forecast, verif, exclude=["time", "init", "lead"]
diff --git a/docs/source/why-climpred.rst b/docs/source/why-climpred.rst
index e0bf10901..a9a59af44 100644
--- a/docs/source/why-climpred.rst
+++ b/docs/source/why-climpred.rst
@@ -16,15 +16,16 @@ objects that carry products to verify against (e.g., control runs,
 reconstructions, uninitialized ensembles) along with your decadal prediction output.
 
 When computing lead-dependent skill scores, ``climpred`` handles all of the
-lag-correlating for you, properly aligning the multiple time dimensions between
-the hindcast and  verification datasets. We offer a suite of vectorized
+"init"-"lead"-"time"-matching for you, properly aligning the multiple time dimensions
+between the hindcast and verification datasets. We offer a suite of vectorized
 `deterministic <metrics.html#deterministic>`_
 and `probabilistic <metrics.html#probabilistic>`_ metrics that can be applied to time
 series and grids. It's as easy as concatenating your initialized prediction output into
-one xr.Dataset and running :py:meth:`~climpred.classes.HindcastEnsemble.verify` command:
+one :py:class:`~xarrray.Dataset` and running
+:py:meth:`~climpred.classes.HindcastEnsemble.verify` command:
 
 .. :: python
 
-    >>> HindcastEnsemble.verify(
-    ...     metric="rmse", comparison="e2o", dim="init", alignment="maximize"
-    ... )
+>>> HindcastEnsemble.verify(
+...     metric="rmse", comparison="e2o", dim="init", alignment="maximize"
+... )

From ca7b4cbf868a9ed446b0c18d0aa52154d6a6b39e Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sun, 12 Dec 2021 00:10:54 +0100
Subject: [PATCH 38/56] fix

---
 docs/source/initialized-datasets.rst | 3 ++-
 docs/source/metrics.rst              | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/source/initialized-datasets.rst b/docs/source/initialized-datasets.rst
index 4245f7b5d..92a96d458 100644
--- a/docs/source/initialized-datasets.rst
+++ b/docs/source/initialized-datasets.rst
@@ -63,7 +63,8 @@ examples:
      - appreciated
      - `examples to add <https://github.com/pangeo-data/climpred/issues/602>`_
 
-If you find or use another publicly available initialized datasets, please consider adding a `Pull Request <contributing.html>`_.
+If you find or use another publicly available initialized datasets, please consider
+adding a `Pull Request <contributing.html>`_.
 
 References
 ##########
diff --git a/docs/source/metrics.rst b/docs/source/metrics.rst
index de8da1ac5..983e6bb53 100644
--- a/docs/source/metrics.rst
+++ b/docs/source/metrics.rst
@@ -1,7 +1,7 @@
 .. currentmodule:: climpred.metrics
 
 .. ipython:: python
-    :supress:
+    :suppress:
 
     from climpred.metrics import __ALL_METRICS__ as all_metrics
 

From 79f723fb0248b403ba84541cc75d55d3bf84a96a Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sun, 12 Dec 2021 00:16:52 +0100
Subject: [PATCH 39/56] fix

---
 .pre-commit-config.yaml |  4 +++-
 climpred/classes.py     | 17 +++++++++--------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 110aba966..65f8e8fcd 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -49,13 +49,15 @@ repos:
       rev: 6.1.1
       hooks:
       -   id: pydocstyle
-          args: ['--match="(?!test_).*\.py"']
+          args: ['--match="(?!test_).*\.py"'] # ignoring tests
+
           # , "--convention=google"] # https://google.github.io/styleguide/pyguide.html#Comments
 
   -   repo: https://github.com/pycqa/doc8
       rev: 0.10.1
       hooks:
       -   id: doc8
+
           #args: ["--ignore-path docs/source/api/climpred*.rst ."]
 
   - repo: https://github.com/keewis/blackdoc
diff --git a/climpred/classes.py b/climpred/classes.py
index 1062bbec9..0bd8e9c2f 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -386,11 +386,11 @@ def __repr__(self) -> str:
             return _display_metadata(self)
 
     def __len__(self) -> int:
-        """Return number of all variables in :py:class:`~climpred.classes.PredictionEnsemble`."""
+        """Return number of all variables ``PredictionEnsemble``."""
         return len(self.data_vars)
 
     def __iter__(self) -> Iterator[Hashable]:
-        """Iterate over underlying xarray.Datasets."""
+        """Iterate over underlying :py:class:`~xarray.Dataset`s."""
         return iter(self._datasets.values())
 
     def __delitem__(self, key: Hashable) -> None:
@@ -415,12 +415,13 @@ def __contains__(self, key: Hashable) -> bool:
         return contained
 
     def equals(self, other: Union["PredictionEnsemble", Any]) -> bool:
-        """Check if :py:class:`~climpred.classes.PredictionEnsemble` is equal to other :py:class:`~climpred.classes.PredictionEnsemble`.
+        """Check if :py:class:`~climpred.classes.PredictionEnsemble` is equal to other
+        :py:class:`~climpred.classes.PredictionEnsemble`.
 
-        Two :py:class:`~climpred.classes.PredictionEnsemble`s are equal if they have matching variables and
-        coordinates, all of which are equal.
-        ``PredictionEnsembles`` can still be equal (like pandas objects) if they have NaN
-        values in the same locations.
+        Two :py:class:`~climpred.classes.PredictionEnsemble`s are equal if they have
+        matching variables and coordinates, all of which are equal.
+        ``PredictionEnsembles`` can still be equal (like pandas objects) if they have
+        NaN values in the same locations.
         This method is necessary because `v1 == v2` for ``PredictionEnsembles``
         does element-wise comparisons (like numpy.ndarrays).
 
@@ -443,7 +444,7 @@ def equals(self, other: Union["PredictionEnsemble", Any]) -> bool:
 
     def identical(self, other: Union["PredictionEnsemble", Any]) -> bool:
         """
-        Check if :py:class:`~climpred.classes.PredictionEnsemble` is identical to other :py:class:`~climpred.classes.PredictionEnsemble`.
+        Check if :py:class:`~climpred.classes.PredictionEnsemble` is identical to other.
 
         Like ``equals``, but also checks all dataset attributes and the
         attributes on all variables and coordinates.

From 0db71607735bc79bf4a1e5e09e2f5555b0247b2f Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sun, 12 Dec 2021 00:23:40 +0100
Subject: [PATCH 40/56] flake8 on docs

---
 CHANGELOG.rst       | 8 ++++----
 climpred/classes.py | 2 +-
 climpred/metrics.py | 5 +++--
 setup.cfg           | 6 +-----
 4 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index f90f883eb..07e5033ca 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -300,10 +300,10 @@ New Features
   ``warn_for_rename_to_climpred_dims``, ``warn_for_init_coords_int_to_annual``,
   ``climpred_warnings`` via :py:class:`~climpred.options.set_options`.
   (:issue:`628`, :pr:`631`) `Aaron Spring`_.
-- :py:class:`~climpred.classes.PredictionEnsemble` acts like ``xr.Dataset`` and
-  understands ``data_vars``, ``dims``, ``sizes``, ``coords``, ``nbytes``,
-  ``equals``, ``identical``, ``__iter__``, ``__len__``, ``__contains__``,
-  ``__delitem__``. (:issue:`568`, :pr:`632`) `Aaron Spring`_.
+- :py:class:`~climpred.classes.PredictionEnsemble` acts like
+  :py:class:`~xarray.Dataset` and understands ``data_vars``, ``dims``, ``sizes``,
+  ``coords``, ``nbytes``, ``equals``, ``identical``, ``__iter__``, ``__len__``,
+  ``__contains__``, ``__delitem__``. (:issue:`568`, :pr:`632`) `Aaron Spring`_.
 
 
 Documentation
diff --git a/climpred/classes.py b/climpred/classes.py
index 0bd8e9c2f..de47c656c 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -1763,7 +1763,7 @@ def add_uninitialized(
         return self._construct_direct(datasets, kind="hindcast")
 
     def get_observations(self) -> xr.Dataset:
-        """Return :py:class:`~xarray.Dataset` of the observations/verification data.
+        """Return the :py:class:`~xarray.Dataset` of the observations/verification data.
 
         Returns:
             observations
diff --git a/climpred/metrics.py b/climpred/metrics.py
index 1ee56a74e..fb6aa9ca5 100644
--- a/climpred/metrics.py
+++ b/climpred/metrics.py
@@ -241,6 +241,7 @@ def __init__(
                 passed in metric_kwargs? Some probabilistic metrics allow this.
                 Defaults to ``False``.
             requires_member_dim: Does xskillscore.metric expect a member dimension?
+                Defaults to ``False``.
 
         """
         self.name = name
@@ -3690,8 +3691,8 @@ def _rps(
             category_edges:                [-0.5  0.   0.5  1. ]
 
 
-        Provide ``category_edges`` as ``xr.Dataset`` for category edges varying along
-        dimensions.
+        Provide ``category_edges`` as :py:class:`~xarray.Dataset` for category edges
+        varying along dimensions.
 
         >>> category_edges = (
         ...     xr.DataArray([9.5, 10.0, 10.5, 11.0], dims="category_edge")
diff --git a/setup.cfg b/setup.cfg
index c1ace186a..5ed02f633 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -2,7 +2,7 @@
 universal = 1
 
 [flake8]
-exclude = docs
+# exclude = docs
 ignore = E203,E266,E501,W503,F401,W605,E402,C901
 max-line-length = 88
 max-complexity = 18
@@ -55,10 +55,6 @@ markers =
 
 [aliases]
 test = pytest
-# doctest = pytest --doctest-modules climpred --ignore climpred/tests
-
-[pydocstyle]
-
 
 [doc8]
 max-line-length=93

From 40ebd2efd40315417c95e59adcf58649ee0f9da6 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sun, 12 Dec 2021 00:27:04 +0100
Subject: [PATCH 41/56] fix

---
 climpred/classes.py | 4 ++--
 setup.cfg           | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/climpred/classes.py b/climpred/classes.py
index de47c656c..c9fdc8d7d 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -907,11 +907,11 @@ def smooth(
     def remove_seasonality(
         self, seasonality: Union[None, str] = None
     ) -> "PredictionEnsemble":
-        """Remove seasonal cycle from all climpred datasets.
+        """Remove seasonal cycle from :py:class:`~climpred.classes.PredictionEnsemble`.
 
         Args:
             seasonality: Seasonality to be removed. Choose from:
-                ``["season", "month", "dayofyear"]``.
+                ``["season", "month", "weekofyear", "dayofyear"]``.
                 Defaults to ``OPTIONS["seasonality"]``.
 
         Examples:
diff --git a/setup.cfg b/setup.cfg
index 5ed02f633..2c40121c9 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -2,7 +2,6 @@
 universal = 1
 
 [flake8]
-# exclude = docs
 ignore = E203,E266,E501,W503,F401,W605,E402,C901
 max-line-length = 88
 max-complexity = 18
@@ -57,7 +56,7 @@ markers =
 test = pytest
 
 [doc8]
-max-line-length=93
+max-line-length=88
 ignore-path=climpred/tests
 
 [mypy]

From dc87401dc93cca0016339e1e5dc418a0a5a5aa39 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sun, 12 Dec 2021 00:38:39 +0100
Subject: [PATCH 42/56] refactor hind -> initialized

---
 climpred/classes.py    |  4 ++--
 climpred/prediction.py | 43 ++++++++++++++++++++++--------------------
 2 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/climpred/classes.py b/climpred/classes.py
index c9fdc8d7d..6790eae85 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -2094,7 +2094,7 @@ def _verify(
                     verif,
                     verif_dates,
                     lead,
-                    hind=forecast,
+                    initialized=forecast,
                     hist=hist,
                     inits=inits,
                     # Ensure apply metric function returns skill and not reference
@@ -2119,7 +2119,7 @@ def _verify(
                             verif,
                             verif_dates,
                             lead,
-                            hind=forecast,
+                            initialized=forecast,
                             hist=hist,
                             inits=inits,
                             reference=r,
diff --git a/climpred/prediction.py b/climpred/prediction.py
index 9cf904a59..f101158c0 100644
--- a/climpred/prediction.py
+++ b/climpred/prediction.py
@@ -36,7 +36,7 @@ def _apply_metric_at_given_lead(
     verif,
     verif_dates,
     lead,
-    hind=None,
+    initialized=None,
     hist=None,
     inits=None,
     reference=None,
@@ -51,7 +51,8 @@ def _apply_metric_at_given_lead(
         verif (xr.Dataset): Verification data.
         verif_dates (dict): Lead-dependent verification dates for alignment.
         lead (int): Given lead to score.
-        hind (xr.Dataset): Initialized hindcast. Not required in a persistence forecast.
+        initialized (xr.Dataset): Initialized hindcast. Not required in a persistence
+            forecast.
         hist (xr.Dataset): Uninitialized/historical simulation. Required when
             ``reference='uninitialized'``.
         inits (dict): Lead-dependent initialization dates for alignment.
@@ -70,7 +71,9 @@ def _apply_metric_at_given_lead(
     if reference is None:
         # Use `.where()` instead of `.sel()` to account for resampled inits when
         # bootstrapping.
-        lforecast = hind.sel(lead=lead).where(hind["time"].isin(inits[lead]), drop=True)
+        lforecast = initialized.sel(lead=lead).where(
+            initialized["time"].isin(inits[lead]), drop=True
+        )
         lverif = verif.sel(time=verif_dates[lead])
     elif reference == "persistence":
         lforecast, lverif = persistence(verif, inits, verif_dates, lead)
@@ -88,7 +91,7 @@ def _apply_metric_at_given_lead(
     ]  # a bit dangerous: what if different? more clear once implemented
     # https://github.com/pangeo-data/climpred/issues/523#issuecomment-728951645
     dim = _rename_dim(
-        dim, hind, verif
+        dim, initialized, verif
     )  # dim should be much clearer once time in initialized.coords
     if metric.normalize or metric.allows_logical:
         metric_kwargs["comparison"] = comparison
@@ -97,7 +100,7 @@ def _apply_metric_at_given_lead(
     log_compute_hindcast_inits_and_verifs(dim, lead, inits, verif_dates, reference)
     # push time (later renamed to init) back by lead
     if "time" in result.dims:
-        n, freq = get_lead_cftime_shift_args(hind.lead.attrs["units"], lead)
+        n, freq = get_lead_cftime_shift_args(initialized.lead.attrs["units"], lead)
         result = result.assign_coords(time=shift_cftime_singular(result.time, -n, freq))
     return result
 
@@ -140,7 +143,7 @@ def _get_metric_comparison_dim(initialized, metric, comparison, dim, kind):
     """Return `metric`, `comparison` and `dim` for compute functions.
 
     Args:
-        initialized (xr.object): initialized dataset: init_pm or hind
+        initialized (xr.Dataset): initialized dataset
         metric (str): metric or alias string
         comparison (str): Description of parameter `comparison`.
         dim (list of str or str): dimension to apply metric to.
@@ -206,7 +209,7 @@ def _get_metric_comparison_dim(initialized, metric, comparison, dim, kind):
 
 
 def compute_perfect_model(
-    init_pm,
+    initialized,
     control=None,
     metric="pearson_r",
     comparison="m2e",
@@ -217,7 +220,7 @@ def compute_perfect_model(
     Compute a predictability skill score in a perfect-model framework.
 
     Args:
-        init_pm (xr.Dataset): ensemble with dims ``lead``, ``init``, ``member``.
+        initialized (xr.Dataset): ensemble with dims ``lead``, ``init``, ``member``.
         control (xr.Dataset): NOTE that this is a legacy argument from a former
             release. ``control`` is not used in ``compute_perfect_model`` anymore.
         metric (str): `metric` name, see
@@ -236,16 +239,16 @@ def compute_perfect_model(
 
     """
     # Check that init is int, cftime, or datetime; convert ints or datetime to cftime
-    init_pm = convert_time_index(
-        init_pm, "init", "init_pm[init]", calendar=PM_CALENDAR_STR
+    initialized = convert_time_index(
+        initialized, "init", "initialized[init]", calendar=PM_CALENDAR_STR
     )
 
     # check args compatible with each other
     metric, comparison, dim = _get_metric_comparison_dim(
-        init_pm, metric, comparison, dim, kind="PM"
+        initialized, metric, comparison, dim, kind="PM"
     )
 
-    forecast, verif = comparison.function(init_pm, metric=metric)
+    forecast, verif = comparison.function(initialized, metric=metric)
 
     if metric.normalize or metric.allows_logical:
         metric_kwargs["comparison"] = comparison
@@ -256,7 +259,7 @@ def compute_perfect_model(
 
 
 def compute_hindcast(
-    hind,
+    initialized,
     verif,
     metric="pearson_r",
     comparison="e2o",
@@ -267,7 +270,7 @@ def compute_hindcast(
     """Verify hindcast predictions against verification data.
 
     Args:
-        hind (xr.Dataset): Hindcast ensemble.
+        initialized (xr.Dataset): Initialized hindcast ensemble.
             Expected to follow package conventions:
             * ``init`` : dim of initialization dates
             * ``lead`` : dim of lead time from those initializations
@@ -285,7 +288,7 @@ def compute_hindcast(
                 (see :ref:`Comparisons`)
         dim (str or list): dimension to apply metric over. default: 'init'
         alignment (str): which inits or verification times should be aligned?
-            - maximize/None: maximize the degrees of freedom by slicing ``hind`` and
+            - maximize: maximize the degrees of freedom by slicing ``initialized`` and
             ``verif`` to a common time frame at each lead.
             - same_inits: slice to a common ``init`` frame prior to computing
             metric. This philosophy follows the thought that each lead should be based
@@ -301,13 +304,13 @@ def compute_hindcast(
             Verification metric over ``lead`` reduced by dimension(s) ``dim``.
     """
     metric, comparison, dim = _get_metric_comparison_dim(
-        hind, metric, comparison, dim, kind="hindcast"
+        initialized, metric, comparison, dim, kind="hindcast"
     )
-    hind = convert_time_index(hind, "init", "hind[init]")
+    initialized = convert_time_index(initialized, "init", "initialized[init]")
     verif = convert_time_index(verif, "time", "verif[time]")
-    has_valid_lead_units(hind)
+    has_valid_lead_units(initialized)
 
-    forecast, verif = comparison.function(hind, verif, metric=metric)
+    forecast, verif = comparison.function(initialized, verif, metric=metric)
 
     # think in real time dimension: real time = init + lag
     forecast = add_time_from_init_lead(forecast)  # add time afterwards
@@ -328,7 +331,7 @@ def compute_hindcast(
             verif,
             verif_dates,
             lead,
-            hind=forecast,
+            initialized=forecast,
             inits=inits,
             metric=metric,
             comparison=comparison,

From 4c21f826e346c00c14904866288d781a7d3e2dad Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sun, 12 Dec 2021 00:41:57 +0100
Subject: [PATCH 43/56] use resample_iterations from xskillscore

---
 climpred/bootstrap.py            | 147 +++----------------------------
 climpred/tests/test_bootstrap.py |   6 +-
 2 files changed, 14 insertions(+), 139 deletions(-)

diff --git a/climpred/bootstrap.py b/climpred/bootstrap.py
index 35020ea4e..5ff08178d 100644
--- a/climpred/bootstrap.py
+++ b/climpred/bootstrap.py
@@ -3,6 +3,10 @@
 import dask
 import numpy as np
 import xarray as xr
+from xskillscore.core.resampling import (
+    resample_iterations as _resample_iterations,
+    resample_iterations_idx as _resample_iterations_idx,
+)
 
 from climpred.constants import CLIMPRED_DIMS, CONCAT_KWARGS, PM_CALENDAR_STR
 
@@ -57,11 +61,11 @@ def _resample(hind, resample_dim):
     """Resample with replacement in dimension ``resample_dim``.
 
     Args:
-        hind (xr.object): input xr.object to be resampled.
+        hind (xr.Dataset): input xr.Dataset to be resampled.
         resample_dim (str): dimension to resample along.
 
     Returns:
-        xr.object: resampled along ``resample_dim``.
+        xr.Dataset: resampled along ``resample_dim``.
 
     """
     to_be_resampled = hind[resample_dim].values
@@ -73,137 +77,6 @@ def _resample(hind, resample_dim):
     return smp_hind
 
 
-def _resample_iterations(init, iterations, dim="member", dim_max=None, replace=True):
-    """Resample over ``dim`` by index ``iterations`` times.
-
-    .. note::
-        This gives the same result as `_resample_iterations_idx`. When using dask, the
-        number of tasks in `_resample_iterations` will scale with iterations but
-        constant chunksize, whereas the tasks in `_resample_iterations_idx` will stay
-        constant with increasing chunksize.
-
-    Args:
-        init (xr.DataArray, xr.Dataset): Initialized prediction ensemble.
-        iterations (int): Number of bootstrapping iterations.
-        dim (str): Dimension name to bootstrap over. Defaults to ``'member'``.
-        dim_max (int): Number of items to select in `dim`.
-        replace (bool): Bootstrapping with or without replacement. Defaults to ``True``.
-
-    Returns:
-        xr.DataArray, xr.Dataset: Bootstrapped data with additional dim ```iteration```
-
-    """
-    if dim_max is not None and dim_max <= init[dim].size:
-        # select only dim_max items
-        select_dim_items = dim_max
-        new_dim = init[dim].isel({dim: slice(None, dim_max)})
-    else:
-        select_dim_items = init[dim].size
-        new_dim = init[dim]
-
-    if replace:
-        idx = np.random.randint(0, init[dim].size, (iterations, select_dim_items))
-    elif not replace:
-        # create 2d np.arange()
-        idx = np.linspace(
-            (np.arange(select_dim_items)),
-            (np.arange(select_dim_items)),
-            iterations,
-            dtype="int",
-        )
-        # shuffle each line
-        for ndx in np.arange(iterations):
-            np.random.shuffle(idx[ndx])
-    idx_da = xr.DataArray(
-        idx,
-        dims=("iteration", dim),
-        coords=({"iteration": range(iterations), dim: new_dim}),
-    )
-    init_smp = []
-    for i in np.arange(iterations):
-        idx = idx_da.sel(iteration=i).data
-        init_smp2 = init.isel({dim: idx}).assign_coords({dim: new_dim})
-        init_smp.append(init_smp2)
-    init_smp = xr.concat(init_smp, dim="iteration", **CONCAT_KWARGS)
-    init_smp["iteration"] = np.arange(1, 1 + iterations)
-    return init_smp
-
-
-def _resample_iterations_idx(
-    init, iterations, dim="member", replace=True, chunk=True, dim_max=None
-):
-    """Resample over ``dim`` by index ``iterations`` times.
-
-    .. note::
-        This is a much faster way to bootstrap than resampling each iteration
-        individually and applying the function to it. However, this will create a
-        DataArray with dimension ``iteration`` of size ``iterations``. It is probably
-        best to do this out-of-memory with ``dask`` if you are doing a large number
-        of iterations or using spatial output (i.e., not time series data).
-
-    Args:
-        init (xr.DataArray, xr.Dataset): Initialized prediction ensemble.
-        iterations (int): Number of bootstrapping iterations.
-        dim (str): Dimension name to bootstrap over. Defaults to ``'member'``.
-        replace (bool): Bootstrapping with or without replacement. Defaults to ``True``.
-        chunk: (bool): Auto-chunk along chunking_dims to get optimal blocksize
-        dim_max (int): Number of indices from `dim` to return. Not implemented.
-
-    Returns:
-        xr.DataArray, xr.Dataset: Bootstrapped data with additional dim ```iteration```
-
-    """
-    if dask.is_dask_collection(init):
-        init = init.chunk({"lead": -1, "member": -1})
-        init = init.copy(deep=True)
-
-    def select_bootstrap_indices_ufunc(x, idx):
-        """Selects multi-level indices ``idx`` from xr.Dataset ``x`` for all
-        iterations."""
-        # `apply_ufunc` sometimes adds a singleton dimension on the end, so we squeeze
-        # it out here. This leverages multi-level indexing from numpy, so we can
-        # select a different set of, e.g., ensemble members for each iteration and
-        # construct one large DataArray with ``iterations`` as a dimension.
-        return np.moveaxis(x.squeeze()[idx.squeeze().transpose()], 0, -1)
-
-    if dask.is_dask_collection(init):
-        if chunk:
-            chunking_dims = [d for d in init.dims if d not in CLIMPRED_DIMS]
-            init = _chunk_before_resample_iterations_idx(
-                init, iterations, chunking_dims
-            )
-
-    # resample with or without replacement
-    if replace:
-        idx = np.random.randint(0, init[dim].size, (iterations, init[dim].size))
-    elif not replace:
-        # create 2d np.arange()
-        idx = np.linspace(
-            (np.arange(init[dim].size)),
-            (np.arange(init[dim].size)),
-            iterations,
-            dtype="int",
-        )
-        # shuffle each line
-        for ndx in np.arange(iterations):
-            np.random.shuffle(idx[ndx])
-    idx_da = xr.DataArray(
-        idx,
-        dims=("iteration", dim),
-        coords=({"iteration": range(iterations), dim: init[dim]}),
-    )
-    transpose_kwargs = (
-        {"transpose_coords": False} if isinstance(init, xr.DataArray) else {}
-    )
-    return xr.apply_ufunc(
-        select_bootstrap_indices_ufunc,
-        init.transpose(dim, ..., **transpose_kwargs),
-        idx_da,
-        dask="parallelized",
-        output_dtypes=[float],
-    )
-
-
 def _distribution_to_ci(ds, ci_low, ci_high, dim="iteration"):
     """Get confidence intervals from bootstrapped distribution.
 
@@ -601,11 +474,11 @@ def _maybe_auto_chunk(ds, dims):
     """Auto-chunk on dimension `dims`.
 
     Args:
-        ds (xr.object): input data.
+        ds (xr.Dataset): input data.
         dims (list of str or str): Dimensions to auto-chunk in.
 
     Returns:
-        xr.object: auto-chunked along `dims`
+        xr.Dataset: auto-chunked along `dims`
 
     """
     if dask.is_dask_collection(ds) and dims is not []:
@@ -631,7 +504,7 @@ def _chunk_before_resample_iterations_idx(
             Defaults to 100000000.
 
     Returns:
-        xr.object: chunked to have blocksize: optimal_blocksize/iterations.
+        xr.Dataset: chunked to have blocksize: optimal_blocksize/iterations.
 
     """
     if isinstance(chunking_dims, str):
@@ -1287,7 +1160,7 @@ def _bootstrap_func(
 
     Args:
         func (function): function to be bootstrapped.
-        ds (xr.object): first input argument of func. `chunk` ds on `dim` other
+        ds (xr.Dataset): first input argument of func. `chunk` ds on `dim` other
             than `resample_dim` for potential performance increase when multiple
             CPUs available.
         resample_dim (str): dimension to resample from.
diff --git a/climpred/tests/test_bootstrap.py b/climpred/tests/test_bootstrap.py
index 88bb4372d..d3a5bca15 100644
--- a/climpred/tests/test_bootstrap.py
+++ b/climpred/tests/test_bootstrap.py
@@ -2,13 +2,15 @@
 import numpy as np
 import pytest
 import xarray as xr
+from xskillscore.core.resampling import (
+    resample_iterations as _resample_iterations,
+    resample_iterations_idx as _resample_iterations_idx,
+)
 
 from climpred.bootstrap import (
     _bootstrap_by_stacking,
     _chunk_before_resample_iterations_idx,
     _resample,
-    _resample_iterations,
-    _resample_iterations_idx,
     bootstrap_hindcast,
     bootstrap_uninit_pm_ensemble_from_control_cftime,
 )

From 782c5877c1ab395302a67a50ad216c72a1561aed Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sun, 12 Dec 2021 00:53:58 +0100
Subject: [PATCH 44/56] enable pydocstyle

---
 .pre-commit-config.yaml |  2 +-
 climpred/bootstrap.py   | 47 ++++++++++++++++++++++-------------------
 climpred/classes.py     |  3 +--
 3 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 65f8e8fcd..dc86e78d9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -49,7 +49,7 @@ repos:
       rev: 6.1.1
       hooks:
       -   id: pydocstyle
-          args: ['--match="(?!test_).*\.py"'] # ignoring tests
+          # args: ['--match="(?!test_).*\.py"'] # ignoring tests
 
           # , "--convention=google"] # https://google.github.io/styleguide/pyguide.html#Comments
 
diff --git a/climpred/bootstrap.py b/climpred/bootstrap.py
index 5ff08178d..2f1217857 100644
--- a/climpred/bootstrap.py
+++ b/climpred/bootstrap.py
@@ -1,3 +1,5 @@
+"""Bootstrap or resampling operators for functional compute_ functions."""
+
 import warnings
 
 import dask
@@ -103,8 +105,7 @@ def _distribution_to_ci(ds, ci_low, ci_high, dim="iteration"):
 
 
 def _pvalue_from_distributions(ref_skill, init_skill, metric=None):
-    """Get probability that skill of a reference forecast (e.g., persistence or
-    uninitialized skill) is larger than initialized skill.
+    """Get probability that reference forecast skill is larger than initialized skill.
 
     Needed for bootstrapping confidence intervals and p_values of a metric in
     the hindcast framework. Checks whether a simple forecast like persistence,
@@ -119,7 +120,7 @@ def _pvalue_from_distributions(ref_skill, init_skill, metric=None):
 
     Returns:
         pv (xr.Dataset): probability that simple forecast performs better
-                            than initialized forecast.
+            than initialized forecast.
     """
     pv = ((ref_skill - init_skill) > 0).mean("iteration")
     if not metric.positive:
@@ -222,8 +223,7 @@ def bootstrap_uninit_pm_ensemble_from_control_cftime(init_pm, control):
     )
 
     def sel_time(start_year_int, suitable_start_dates):
-        """Select time segments from control from ``suitable_start_dates`` based on
-        year ``start_year_int``."""
+        """Select time of control from suitable_start_dates based on start_year_int."""
         start_time = suitable_start_dates.time.sel(time=str(start_year_int))
         end_time = shift_cftime_singular(start_time, block_length - 1, freq)
         new = control.sel(time=slice(*start_time, *end_time))
@@ -261,6 +261,8 @@ def create_pseudo_members(init):
 
 def resample_uninitialized_from_initialized(init, resample_dim=["init", "member"]):
     """
+    Generate ``uninitialized`` by resamplling from ``initialized``.
+
     Generate an uninitialized ensemble by resampling without replacement from the
     initialized prediction ensemble. Full years of the first lead present from the
     initialized are relabeled to a different year.
@@ -323,8 +325,12 @@ def resample_uninitialized_from_initialized(init, resample_dim=["init", "member"
 
 
 def _bootstrap_by_stacking(init_pm, control):
-    """Bootstrap member, lead, init from control by reshaping. Fast track of function
-    `bootstrap_uninit_pm_ensemble_from_control_cftime` when lead units is 'years'."""
+    """
+    Bootstrap member, lead, init from control by reshaping.
+
+    Fast track of function
+    `bootstrap_uninit_pm_ensemble_from_control_cftime` when lead units is 'years'.
+    """
     assert type(init_pm) == type(control)
     lead_unit = init_pm.lead.attrs["units"]
     if isinstance(init_pm, xr.Dataset):
@@ -451,7 +457,8 @@ def _bootstrap_hindcast_over_init_dim(
 
 
 def _get_resample_func(ds):
-    """Decide for resample function based on input `ds`.
+    """
+    Decide for resample function based on input `ds`.
 
     Returns:
       callable: `_resample_iterations`: if big and chunked `ds`
@@ -493,8 +500,7 @@ def _maybe_auto_chunk(ds, dims):
 def _chunk_before_resample_iterations_idx(
     ds, iterations, chunking_dims, optimal_blocksize=100000000
 ):
-    """Chunk ds so small that after _resample_iteration_idx chunks have optimal size
-    `optimal_blocksize`.
+    """Chunk that after _resample_iteration_idx chunks have optimal `optimal_blocksize`.
 
     Args:
         ds (xr.obejct): input data`.
@@ -936,8 +942,7 @@ def bootstrap_hindcast(
     pers_sig=None,
     **metric_kwargs,
 ):
-    """Bootstrap compute with replacement. Wrapper of
-     py:func:`bootstrap_compute` for hindcasts.
+    """Wrap py:func:`bootstrap_compute` for hindcasts.
 
     Args:
         hind (xr.Dataset): prediction ensemble.
@@ -1056,8 +1061,7 @@ def bootstrap_perfect_model(
     pers_sig=None,
     **metric_kwargs,
 ):
-    """Bootstrap compute with replacement. Wrapper of
-     py:func:`bootstrap_compute` for perfect-model framework.
+    """Wrap py:func:`bootstrap_compute` for perfect-model framework.
 
     Args:
         hind (xr.Dataset): prediction ensemble.
@@ -1150,13 +1154,13 @@ def _bootstrap_func(
     *func_args,
     **func_kwargs,
 ):
-    """Sig % threshold of function based on iterations resampling with replacement.
+    """Calc sig % threshold of function based on iterations resampling with replacement.
 
     Reference:
-    * Mason, S. J., and G. M. Mimmack. “The Use of Bootstrap Confidence
-     Intervals for the Correlation Coefficient in Climatology.” Theoretical and
-      Applied Climatology 45, no. 4 (December 1, 1992): 229–33.
-      https://doi.org/10/b6fnsv.
+        * Mason, S. J., and G. M. Mimmack. “The Use of Bootstrap Confidence
+          Intervals for the Correlation Coefficient in Climatology.” Theoretical and
+          Applied Climatology 45, no. 4 (December 1, 1992): 229–33.
+          https://doi.org/10/b6fnsv.
 
     Args:
         func (function): function to be bootstrapped.
@@ -1171,7 +1175,7 @@ def _bootstrap_func(
 
     Returns:
         sig_level: bootstrapped significance levels with
-                   dimensions of init_pm and len(sig) if sig is list
+            dimensions of init_pm and len(sig) if sig is list
     """
     if not callable(func):
         raise ValueError(f"Please provide func as a function, found {type(func)}")
@@ -1210,8 +1214,7 @@ def dpp_threshold(control, sig=95, iterations=500, dim="time", **dpp_kwargs):
 
 
 def varweighted_mean_period_threshold(control, sig=95, iterations=500, time_dim="time"):
-    """Calc the variance-weighted mean period significance levels from re-sampled
-    dataset.
+    """Calc variance-weighted mean period significance levels from resampled dataset.
 
     See also:
         * climpred.bootstrap._bootstrap_func
diff --git a/climpred/classes.py b/climpred/classes.py
index 6790eae85..eedf63a05 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -415,8 +415,7 @@ def __contains__(self, key: Hashable) -> bool:
         return contained
 
     def equals(self, other: Union["PredictionEnsemble", Any]) -> bool:
-        """Check if :py:class:`~climpred.classes.PredictionEnsemble` is equal to other
-        :py:class:`~climpred.classes.PredictionEnsemble`.
+        """Check if :py:class:`~climpred.classes.PredictionEnsemble` is equal to other.
 
         Two :py:class:`~climpred.classes.PredictionEnsemble`s are equal if they have
         matching variables and coordinates, all of which are equal.

From 291cf17080aed4d7b3417033bfce1eebecdf4286 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sun, 12 Dec 2021 01:04:50 +0100
Subject: [PATCH 45/56] ignore doc8

---
 .pre-commit-config.yaml      | 15 ---------------
 CHANGELOG.rst                |  4 +++-
 docs/source/contributing.rst |  7 ++-----
 3 files changed, 5 insertions(+), 21 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index dc86e78d9..9908a8872 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -45,21 +45,6 @@ repos:
             numpy,
           ]
 
-  -   repo: https://github.com/pycqa/pydocstyle
-      rev: 6.1.1
-      hooks:
-      -   id: pydocstyle
-          # args: ['--match="(?!test_).*\.py"'] # ignoring tests
-
-          # , "--convention=google"] # https://google.github.io/styleguide/pyguide.html#Comments
-
-  -   repo: https://github.com/pycqa/doc8
-      rev: 0.10.1
-      hooks:
-      -   id: doc8
-
-          #args: ["--ignore-path docs/source/api/climpred*.rst ."]
-
   - repo: https://github.com/keewis/blackdoc
     rev: v0.3.4
     hooks:
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 07e5033ca..75ffe856b 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -2,6 +2,7 @@
 What's New
 ==========
 
+
 .. ipython:: python
     :suppress:
 
@@ -769,7 +770,8 @@ Documentation
 
 Bug Fixes
 ---------
-- Fixed `m2m` broken comparison issue and removed correction (:pr:`290`) `Aaron Spring`_.
+- Fixed `m2m` broken comparison issue and removed correction.
+  (:pr:`290`) `Aaron Spring`_.
 
 Internals/Minor Fixes
 ---------------------
diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst
index d09b2d638..e49cd6b8a 100644
--- a/docs/source/contributing.rst
+++ b/docs/source/contributing.rst
@@ -160,13 +160,10 @@ Preparing Pull Requests
    ``pre-commit`` also runs:
     * `mypy <http://mypy-lang.org/>`_ for static type checking on
       `type hints <https://docs.python.org/3/library/typing.html>`_.
-    * `doc8 <https://github.com/PyCQA/doc8>`_ for ``.rst`` files
     * `isort <https://pycqa.github.io/isort/>`_ sorting imports
     * `black <https://black.readthedocs.io/en/stable/>`_ code formatting
-    * `flake8 <https://flake8.pycqa.org/en/latest/>`
-    * `pydocstyle <https://github.com/pycqa/pydocstyle>`_ docstring style
-      checker
-    * `blackdoc <https://blackdoc.readthedocs.io/en/latest/>` docstring code
+    * `flake8 <https://flake8.pycqa.org/en/latest/>`_
+    * `blackdoc <https://blackdoc.readthedocs.io/en/latest/>`_ docstring code
       formatter
 
 

From ace4528bc0813b6c8901f9393e8eb37950d45b7c Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sun, 12 Dec 2021 01:05:33 +0100
Subject: [PATCH 46/56] fix CL

---
 CHANGELOG.rst | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 75ffe856b..467f3998c 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -132,14 +132,10 @@ Internals/Minor Fixes
   (:issue:`475`, :pr:`694`) `Aaron Spring`_.
 - docstrings formatting with `blackdocs <https://github.com/keewis/blackdoc>`_.
   (:pr:`708`) `Aaron Spring`_.
-- documentation linting with `doc8 <https://github.com/PyCQA/doc8>`_ and
-  `pydocstyle <http://www.pydocstyle.org/en/stable/usage.html>`_.
-  (:pr:`708`) `Aaron Spring`_.
 
 Documentation
 -------------
 - Refresh all docs. (:issue:`707`, :pr:`708`) `Aaron Spring`_.
-- (:issue:`707`, :pr:`708`) `Aaron Spring`_.
 
 
 climpred v2.1.6 (2021-08-31)

From d9d4e6a88e3c9062e093fe77488744ac8d40a86a Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sun, 12 Dec 2021 13:08:34 +0100
Subject: [PATCH 47/56] rm flake8 ignores

---
 climpred/bias_removal.py | 12 +++++++-----
 climpred/checks.py       |  2 --
 climpred/classes.py      | 27 +++++++++++++++------------
 climpred/comparisons.py  |  2 +-
 climpred/graphics.py     |  5 ++---
 climpred/metrics.py      |  1 -
 climpred/options.py      |  4 ++--
 climpred/prediction.py   |  3 +--
 climpred/reference.py    |  6 ++----
 climpred/smoothing.py    |  4 ++--
 climpred/stats.py        |  3 +--
 climpred/tutorial.py     |  2 +-
 climpred/utils.py        | 16 +++++++++-------
 setup.cfg                |  7 +++----
 14 files changed, 46 insertions(+), 48 deletions(-)

diff --git a/climpred/bias_removal.py b/climpred/bias_removal.py
index cdeb6fb5c..e2baf06a7 100644
--- a/climpred/bias_removal.py
+++ b/climpred/bias_removal.py
@@ -4,9 +4,8 @@
 import numpy as np
 import pandas as pd
 import xarray as xr
-from xskillscore.core.utils import suppress_warnings
 
-from .constants import BIAS_CORRECTION_BIAS_CORRECTION_METHODS, GROUPBY_SEASONALITIES
+from .constants import GROUPBY_SEASONALITIES
 from .metrics import Metric
 from .options import OPTIONS
 from .utils import (
@@ -50,9 +49,11 @@ def leave_one_out(bias, dim):
 
 
 def leave_one_out_drop(bias, dim):
-    """Leave-one-out creating a new dimension 'sample'.
+    """
+    Leave-one-out creating a new dimension ``sample``.
 
-    See also: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.LeaveOneOut.html."""
+    See also: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.LeaveOneOut.html. # noqa: E501
+    """
     bias_nan = []
     for i in range(bias[dim].size):
         bias_nan.append(bias.drop_isel({dim: i}).rename({dim: "sample"}).drop("sample"))
@@ -452,7 +453,8 @@ def bc_func(
         datasets. Implemented methods include:
         - quantile_mapping:
             https://rmets.onlinelibrary.wiley.com/doi/pdf/10.1002/joc.2168)
-        - modified quantile mapping: https://www.sciencedirect.com/science/article/abs/pii/S0034425716302000?via%3Dihub
+        - modified quantile mapping:
+            https://www.sciencedirect.com/science/article/abs/pii/S0034425716302000?via%3Dihub # noqa: E501
         - scaled distribution mapping (Gamma and Normal Corrections):
             https://www.hydrol-earth-syst-sci.net/21/2649/2017/
         """
diff --git a/climpred/checks.py b/climpred/checks.py
index 40456c073..3193ebe1b 100644
--- a/climpred/checks.py
+++ b/climpred/checks.py
@@ -1,7 +1,6 @@
 """Common checks for climpred operations."""
 
 import warnings
-from functools import wraps
 from typing import List, Optional, Union
 
 import dask
@@ -10,7 +9,6 @@
 from .constants import (
     CF_LONG_NAMES,
     CF_STANDARD_NAMES,
-    CLIMPRED_ENSEMBLE_DIMS,
     VALID_ALIGNMENTS,
     VALID_LEAD_UNITS,
     VALID_REFERENCES,
diff --git a/climpred/classes.py b/climpred/classes.py
index eedf63a05..e31a0f06e 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -19,7 +19,6 @@
 import cf_xarray  # noqa
 import numpy as np
 import xarray as xr
-from dask import is_dask_collection
 from IPython.display import display_html
 from xarray.core.coordinates import DatasetCoordinates
 from xarray.core.dataset import DataVariables
@@ -56,11 +55,9 @@
     CONCAT_KWARGS,
     CROSS_VALIDATE_METHODS,
     INTERNAL_BIAS_CORRECTION_METHODS,
-    M2M_MEMBER_DIM,
     XCLIM_BIAS_CORRECTION_METHODS,
 )
 from .exceptions import CoordinateError, DimensionError, VariableError
-from .logging import log_compute_hindcast_header
 from .metrics import Metric
 from .options import OPTIONS
 from .prediction import (
@@ -164,7 +161,7 @@ def _display_metadata(self) -> str:
 
 
 def _display_metadata_html(self) -> str:
-    """Print the contents of the :py:class:`~climpred.classes.PredictionEnsemble` as html."""
+    """Print contents of :py:class:`~climpred.classes.PredictionEnsemble` as html."""
     header = f"<h4>climpred.{type(self).__name__}</h4>"
     display_html(header, raw=True)
     init_repr_str = dataset_repr(self._datasets["initialized"])
@@ -623,7 +620,11 @@ def __truediv__(self, other: mathType) -> "PredictionEnsemble":
         return self._math(other, operator="div")
 
     def __getitem__(self, varlist: Union[str, List[str]]) -> "PredictionEnsemble":
-        """Allow subsetting variable(s) from :py:class:`~climpred.classes.PredictionEnsemble` as from xr.Dataset.
+        """Allow subsetting variable(s) from
+
+        Allow subsetting variable(s) from
+        :py:class:`~climpred.classes.PredictionEnsemble` as from
+        :py:class:`~xarray.Dataset`.
 
         Args:
             varlist: list of names or name of data variable(s) to subselect
@@ -652,7 +653,7 @@ def __getattr__(
         """
 
         def wrapper(*args, **kwargs):
-            """Apply arbitrary function to all datasets in :py:class:`~climpred.classes.PerfectModelEnsemble`.
+            """Apply arbitrary function to all datasets in ``PerfectModelEnsemble``.
 
             Got this from: https://stackoverflow.com/questions/41919499/
             how-to-call-undefined-methods-sequentially-in-python-class
@@ -733,7 +734,7 @@ def _construct_direct(cls, datasets, kind):
     def _apply_func(
         self, func: Callable[..., xr.Dataset], *args: Any, **kwargs: Any
     ) -> "PredictionEnsemble":
-        """Apply a function to all datasets in a :py:class:`~climpred.classes.PerfectModelEnsemble`."""
+        """Apply a function to all datasets in a ``PerfectModelEnsemble``."""
         # Create temporary copy to modify to avoid inplace operation.
         # isnt that essentially the same as .map(func)?
         datasets = self._datasets.copy()
@@ -777,7 +778,7 @@ def smooth(
         how: str = "mean",
         **xesmf_kwargs: str,
     ):
-        """Smooth in space and/or aggregate in time :py:class:`~climpred.classes.PredictionEnsemble`.
+        """Smooth in space and/or aggregate in time in ``PredictionEnsemble``.
 
         Args:
             smooth_kws: Dictionary to specify the dims to
@@ -2266,9 +2267,11 @@ def bootstrap(
                     bootstrapping with replacement.
 
         Example:
-            Continuous Ranked Probability Score (``"crps"``) comparing every member forecast to the verification (``"m2o"``) over the same
-            initializations (``"same_inits"``) for all leads reducing dimensions
-            ``member`` 50 times after resampling ``member`` dimension with replacement. Note that dimension ``init`` remains.
+            Continuous Ranked Probability Score (``"crps"``) comparing every member
+            forecast to the verification (``"m2o"``) over the same initializations
+            (``"same_inits"``) for all leads reducing dimension ``member`` 50 times
+            after resampling ``member`` dimension with replacement. Note that dimension
+            ``init`` remains.
             Also calculate reference skill for the ``"persistence"``, ``"climatology"``
             and ``"uninitialized"`` forecast and compare whether initialized skill is
             better than reference skill: Returns verify skill, probability that
@@ -2639,7 +2642,7 @@ def remove_bias(
         else:
             raise NotImplementedError(
                 f"bias removal '{how}' is not implemented, please choose from "
-                f" {INTERNAL_BIAS_CORRECTION_METHODS+BIAS_CORRECTION_BIAS_CORRECTION_METHODS}."
+                f" {INTERNAL_BIAS_CORRECTION_METHODS+BIAS_CORRECTION_BIAS_CORRECTION_METHODS}."  # noqa: E501
             )
 
         if train_test_split in ["unfair-cv"]:
diff --git a/climpred/comparisons.py b/climpred/comparisons.py
index c798bbb42..ea1b1f3a5 100644
--- a/climpred/comparisons.py
+++ b/climpred/comparisons.py
@@ -1,6 +1,6 @@
 """Comparisons: How to compare forecast with verification."""
 
-from typing import Any, Callable, List, Optional, Tuple, Union
+from typing import Any, Callable, List, Optional, Tuple
 
 import dask
 import numpy as np
diff --git a/climpred/graphics.py b/climpred/graphics.py
index cd6bb783e..e2f382cca 100644
--- a/climpred/graphics.py
+++ b/climpred/graphics.py
@@ -5,14 +5,13 @@
 import cftime
 import numpy as np
 import xarray as xr
-from xarray.coding.times import infer_calendar_name
 
 from .alignment import return_inits_and_verif_dates
 from .checks import DimensionError
 from .classes import HindcastEnsemble, PerfectModelEnsemble
 from .constants import CLIMPRED_DIMS
-from .metrics import ALL_METRICS, PROBABILISTIC_METRICS
-from .utils import get_lead_cftime_shift_args, get_metric_class, shift_cftime_index
+from .metrics import ALL_METRICS
+from .utils import get_metric_class
 
 try:
     import matplotlib as mpl
diff --git a/climpred/metrics.py b/climpred/metrics.py
index fb6aa9ca5..10c14cbe2 100644
--- a/climpred/metrics.py
+++ b/climpred/metrics.py
@@ -4,7 +4,6 @@
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import numpy as np
-import pandas as pd
 import xarray as xr
 from scipy.stats import norm
 from xskillscore import (
diff --git a/climpred/options.py b/climpred/options.py
index 6fd3f2ead..30ee2aef8 100644
--- a/climpred/options.py
+++ b/climpred/options.py
@@ -31,11 +31,11 @@ class set_options:
 
     Parameters
     ----------
-    ``seasonality`` : {``"dayofyear"``, ``"weekofyear"``, ``"month"``, ``"season"``}, default: ``"month"``
+    ``seasonality`` : {``"dayofyear"``, ``"weekofyear"``, ``"month"``, ``"season"``}, default: ``"month"`` # noqa: E501
         Attribute to group dimension ``groupby(f"{dim}.{seasonality}"")``.
         Used in ``reference=climatology`` and
         :py:meth:`~climpred.classes.HindcastEnsemble.remove_bias`.
-    ``PerfectModel_persistence_from_initialized_lead_0`` : {``True``, ``False``}, default ``False``
+    ``PerfectModel_persistence_from_initialized_lead_0`` : {``True``, ``False``}, default ``False`` # noqa: E501
         Which persistence function to use in
         ``PerfectModelEnsemble.verify/bootstrap(reference="persistence")``.
         If ``False`` use :py:func:`~climpred.reference.compute_persistence`.
diff --git a/climpred/prediction.py b/climpred/prediction.py
index f101158c0..9b232df80 100644
--- a/climpred/prediction.py
+++ b/climpred/prediction.py
@@ -11,7 +11,7 @@
     PROBABILISTIC_HINDCAST_COMPARISONS,
     PROBABILISTIC_PM_COMPARISONS,
 )
-from .constants import CLIMPRED_DIMS, CONCAT_KWARGS, M2M_MEMBER_DIM, PM_CALENDAR_STR
+from .constants import CONCAT_KWARGS, M2M_MEMBER_DIM, PM_CALENDAR_STR
 from .exceptions import DimensionError
 from .logging import log_compute_hindcast_header, log_compute_hindcast_inits_and_verifs
 from .metrics import HINDCAST_METRICS, METRIC_ALIASES, PM_METRICS
@@ -23,7 +23,6 @@
 )
 from .utils import (
     add_time_from_init_lead,
-    assign_attrs,
     convert_time_index,
     get_comparison_class,
     get_lead_cftime_shift_args,
diff --git a/climpred/reference.py b/climpred/reference.py
index 7324d186f..a4df7890e 100644
--- a/climpred/reference.py
+++ b/climpred/reference.py
@@ -1,5 +1,5 @@
 """Reference forecasts: climatology, persistence, uninitialized."""
-import warnings
+
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 import pandas as pd
@@ -11,16 +11,14 @@
     ALL_COMPARISONS,
     COMPARISON_ALIASES,
     HINDCAST_COMPARISONS,
-    PM_COMPARISONS,
     Comparison,
     __e2c,
 )
-from .constants import CLIMPRED_DIMS, M2M_MEMBER_DIM
+from .constants import M2M_MEMBER_DIM
 from .metrics import (
     ALL_METRICS,
     DETERMINISTIC_HINDCAST_METRICS,
     METRIC_ALIASES,
-    PM_METRICS,
     Metric,
     _rename_dim,
 )
diff --git a/climpred/smoothing.py b/climpred/smoothing.py
index 7bcccc6c5..9819d8e4d 100644
--- a/climpred/smoothing.py
+++ b/climpred/smoothing.py
@@ -1,6 +1,6 @@
 """Spatial/temporal smoothing implemented in PredictionEnsemble.smooth()."""
 
-from typing import Any, Dict, Optional
+from typing import Dict, Optional
 
 import numpy as np
 import xarray as xr
@@ -23,7 +23,7 @@ def spatial_smoothing_xesmf(
 ) -> xr.Dataset:
     """Quick regridding function.
 
-    Adapted from https://github.com/JiaweiZhuang/xESMF/pull/27/files#diff-b537ef68c98c2ec11e64e4803fe4a113R105.
+    Adapted from https://github.com/JiaweiZhuang/xESMF/pull/27/files#diff-b537ef68c98c2ec11e64e4803fe4a113R105.  # noqa: E501
 
     Args:
         ds: Contain input and output grid coordinates.
diff --git a/climpred/stats.py b/climpred/stats.py
index be4ceb734..a14b5a4f2 100644
--- a/climpred/stats.py
+++ b/climpred/stats.py
@@ -1,7 +1,6 @@
 """Statistical functions to diagnose potential predictability due to variability."""
 
-import warnings
-from typing import Any, Dict, List, Union
+from typing import Any, List, Union
 
 import numpy as np
 import xarray as xr
diff --git a/climpred/tutorial.py b/climpred/tutorial.py
index ecca17824..0ca96df25 100644
--- a/climpred/tutorial.py
+++ b/climpred/tutorial.py
@@ -3,7 +3,7 @@
 import hashlib
 import os as _os
 import urllib
-from typing import Dict, Optional, Union
+from typing import Dict, Optional
 from urllib.request import urlretrieve as _urlretrieve
 
 import xarray as xr
diff --git a/climpred/utils.py b/climpred/utils.py
index 6a40cbe5c..b3f7f4efd 100644
--- a/climpred/utils.py
+++ b/climpred/utils.py
@@ -1,6 +1,5 @@
 """Utility functions used by other modules."""
 
-import datetime
 import logging
 import warnings
 from typing import List, Union
@@ -31,15 +30,15 @@ def add_attrs_to_climpred_coords(results):
                 "description": "new coordinate created by .bootstrap()",
                 "verify skill": "skill from verify",
                 "p": "probability that reference performs better than initialized",
-                "low_ci": "lower confidence interval threshold based on resampling with replacement",
-                "high_ci": "higher confidence interval threshold based on resampling with replacement",
+                "low_ci": "lower confidence interval threshold based on resampling with replacement",  # noqa: E501
+                "high_ci": "higher confidence interval threshold based on resampling with replacement",  # noqa: E501
             }
         )
     if "skill" in results.coords:
         results["skill"] = results["skill"].assign_attrs(
             {
-                "description": "new dimension prediction skill of initialized and reference forecasts created by .verify() or .bootstrap()",
-                "documentation": f"https://climpred.readthedocs.io/en/v{version}/reference_forecast.html",
+                "description": "new dimension prediction skill of initialized and reference forecasts created by .verify() or .bootstrap()",  # noqa: E501
+                "documentation": f"https://climpred.readthedocs.io/en/v{version}/reference_forecast.html",  # noqa: E501
             }
         )
     if "skill" in results.dims:
@@ -604,7 +603,9 @@ def my_shift(init, lead):
     if lead_unit in ["years", "seasons", "months"] and "360" not in init_calendar:
         if int(lead) != float(lead):
             raise CoordinateError(
-                f'Require integer leads if lead.attrs["units"]="{lead_unit}" in ["years", "seasons", "months"] and calendar="{init_calendar}" not "360_day".'
+                f'Require integer leads if lead.attrs["units"]="{lead_unit}" in '
+                f'["years", "seasons", "months"] and calendar="{init_calendar}" '
+                'not "360_day".'
             )
         lead = int(lead)
 
@@ -642,7 +643,8 @@ def my_shift(init, lead):
                         lead_freq = lead_freq + "-" + init_freq.split("-")[-1]
         else:
             raise ValueError(
-                f"could not shift init={init} in calendar={init_calendar} by lead={lead} {lead_unit}"
+                f"could not shift init={init} in calendar={init_calendar} by "
+                f" lead={lead} {lead_unit}"
             )
         return init.shift(lead, lead_freq)
     else:  # lower freq
diff --git a/setup.cfg b/setup.cfg
index 2c40121c9..ff63bd3f3 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -2,10 +2,9 @@
 universal = 1
 
 [flake8]
-ignore = E203,E266,E501,W503,F401,W605,E402,C901
-max-line-length = 88
-max-complexity = 18
-select = B,C,E,F,W,T4,B9
+ignore = W503
+# allowing doctests
+max-line-length = 93
 
 [black]
 line-length = 88

From b617532da19b47d43552bece04669af06af1c138 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sun, 12 Dec 2021 16:09:36 +0100
Subject: [PATCH 48/56] flake8 errors fixed

---
 climpred/bias_removal.py                      | 29 ++++++++++---------
 climpred/tests/test_HindcastEnsemble_class.py | 20 +++++++------
 climpred/tests/test_PredictionEnsemble.py     |  2 +-
 climpred/tests/test_bias_removal.py           | 17 ++---------
 climpred/tests/test_bootstrap.py              |  5 ++--
 climpred/tests/test_compute_dims.py           |  1 -
 climpred/tests/test_graphics.py               |  2 +-
 climpred/tests/test_hindcast_prediction.py    |  6 ++--
 climpred/tests/test_lead_time_resolutions.py  |  4 +--
 climpred/tests/test_map.py                    |  2 +-
 climpred/tests/test_options.py                | 19 ++++++++----
 .../tests/test_perfect_model_prediction.py    |  8 ++---
 climpred/tests/test_probabilistic.py          |  7 +----
 climpred/tests/test_reference.py              |  2 +-
 climpred/tests/test_relative_entropy.py       |  7 +----
 climpred/tests/test_uninitialized.py          |  2 +-
 climpred/tests/test_utils.py                  |  5 ++--
 17 files changed, 62 insertions(+), 76 deletions(-)

diff --git a/climpred/bias_removal.py b/climpred/bias_removal.py
index e2baf06a7..25d8709ab 100644
--- a/climpred/bias_removal.py
+++ b/climpred/bias_removal.py
@@ -26,6 +26,8 @@
 
 
 def sub(a, b):
+    print(a, b)
+    # print(a.coords,a.dims,b.coords,b.dims)
     return a - b
 
 
@@ -67,16 +69,17 @@ def _mean_bias_removal_func(hind, bias, dim, how):
     """Quick removal of mean bias over all initializations without cross validation.
 
     Args:
-        hind (xr.object): hindcast.
-        bias (xr.object): bias.
+        hind (xr.Dataset): hindcast.
+        bias (xr.Dataset): bias.
         dim (str): Time dimension name in bias.
 
     Returns:
-        xr.object: bias removed hind
+        xr.Dataset: bias removed hind
 
     """
     how_operator = sub if how == "additive" else div
     seasonality = OPTIONS["seasonality"]
+    print(hind.dims, bias.dims, dim, seasonality)
 
     with xr.set_options(keep_attrs=True):
         if seasonality == "weekofyear":
@@ -100,13 +103,13 @@ def _multiplicative_std_correction(hind, spread, dim, obs=None):
     """Quick removal of std bias over all initializations without cross validation.
 
     Args:
-        hind (xr.object): hindcast.
-        spread (xr.object): model spread.
+        hind (xr.Dataset): hindcast.
+        spread (xr.Dataset): model spread.
         dim (str): Time dimension name in bias.
-        obs (xr.object): observations
+        obs (xr.Dataset): observations
 
     Returns:
-        xr.object: bias removed hind
+        xr.Dataset: bias removed hind
 
     """
     seasonality = OPTIONS["seasonality"]
@@ -152,13 +155,13 @@ def _std_multiplicative_bias_removal_func_cv(hind, spread, dim, obs, cv="LOO"):
         the following one.
 
     Args:
-        hind (xr.object): hindcast.
-        bias (xr.object): bias.
+        hind (xr.Dataset): hindcast.
+        bias (xr.Dataset): bias.
         dim (str): Time dimension name in bias.
         how (str): additive or multiplicative bias.
 
     Returns:
-        xr.object: bias removed hind
+        xr.Dataset: bias removed hind
 
     Reference:
         * Jolliffe, Ian T., and David B. Stephenson. Forecast Verification: A
@@ -219,13 +222,13 @@ def _mean_bias_removal_func_cv(hind, bias, dim, how, cv="LOO"):
         the following one.
 
     Args:
-        hind (xr.object): hindcast.
-        bias (xr.object): bias.
+        hind (xr.Dataset): hindcast.
+        bias (xr.Dataset): bias.
         dim (str): Time dimension name in bias.
         how (str): additive or multiplicative bias.
 
     Returns:
-        xr.object: bias removed hind
+        xr.Dataset: bias removed hind
 
     Reference:
         * Jolliffe, Ian T., and David B. Stephenson. Forecast Verification: A
diff --git a/climpred/tests/test_HindcastEnsemble_class.py b/climpred/tests/test_HindcastEnsemble_class.py
index d0638653e..ed5fd45b1 100644
--- a/climpred/tests/test_HindcastEnsemble_class.py
+++ b/climpred/tests/test_HindcastEnsemble_class.py
@@ -1,10 +1,8 @@
 import pytest
 import xarray as xr
 
-from climpred import HindcastEnsemble, set_options
+from climpred import HindcastEnsemble
 from climpred.exceptions import CoordinateError, DimensionError
-from climpred.options import OPTIONS
-from climpred.utils import convert_time_index
 
 
 def test_hindcastEnsemble_init(hind_ds_initialized_1d):
@@ -221,7 +219,11 @@ def test_verify_reference_same_dims(hindcast_hist_obs_1d, metric):
 
 
 def test_HindcastEnsemble_multidim_initialized_lessdim_verif(hindcast_hist_obs_1d):
-    """Test to see if HindcastEnsemble allow broadcast over dimensions in initialized but not in observations, e.g. model dim which is not available in observations."""
+    """
+    Test HindcastEnsemble allow broadcast over dimensions in initialized only.
+
+    But not in observations, e.g. model dim which is not available in observations.
+    """
     initialized = hindcast_hist_obs_1d.get_initialized()
     obs = hindcast_hist_obs_1d.get_observations()
     hind = HindcastEnsemble(
@@ -234,7 +236,7 @@ def test_HindcastEnsemble_multidim_initialized_lessdim_verif(hindcast_hist_obs_1
 
 
 def test_HindcastEnsemble_multidim_verif_lessdim_initialized(hindcast_hist_obs_1d):
-    """Test if HindcastEnsemble initialization fails if observations contains more dims than initialized."""
+    """Test HindcastEnsemble.__init__ fails if obs has more dims than initialized."""
     initialized = hindcast_hist_obs_1d.get_initialized()
     obs = hindcast_hist_obs_1d.get_observations()
     with pytest.raises(
@@ -256,7 +258,7 @@ def test_HindcastEnsemble_multidim_verif_lessdim_initialized(hindcast_hist_obs_1
 def test_HindcastEnsemble_instantiating_standard_name(
     da_lead, dim, new_dim, cf_standard_name
 ):
-    """Instantiating a HindcastEnsemble without init dim only works if matching CF standard name is set."""
+    """Test PredictionEnsemble without init only works with matching standard name."""
     init = (
         da_lead.to_dataset(name="var").expand_dims("member").assign_coords(member=[1])
     )
@@ -268,7 +270,7 @@ def test_HindcastEnsemble_instantiating_standard_name(
     if dim != "member":  # member not required
         with pytest.raises(
             DimensionError,
-            match="Your PredictionEnsemble object must contain the following dimensions",
+            match="PredictionEnsemble object must contain the following dimensions",
         ):
             HindcastEnsemble(init)
 
@@ -301,7 +303,7 @@ def test_fractional_leads_360_day(hind_ds_initialized_1d, lead_freq):
 def test_fractional_leads_lower_than_month_lead_units(
     hind_ds_initialized_1d, lead_freq, calendar
 ):
-    """Test that lead can also contain floats when lead units is lower or equal to weeks'."""
+    """Test that lead can contain floats when lead units is lower or equal to weeks'."""
     hind_ds_initialized_1d["init"] = xr.cftime_range(
         start=str(hind_ds_initialized_1d.init[0].values),
         freq="YS",
@@ -314,7 +316,7 @@ def test_fractional_leads_lower_than_month_lead_units(
 
 
 def test_fractional_leads_fails(hind_ds_initialized_1d):
-    """Test that fractional leads fail for normal calendars and lead units in larger than days."""
+    """Test float leads fail for normal calendars and lead units in larger than days."""
     with xr.set_options(keep_attrs=True):
         hind_ds_initialized_1d["lead"] = hind_ds_initialized_1d["lead"] - 0.5
     with pytest.raises(CoordinateError, match="Require integer"):
diff --git a/climpred/tests/test_PredictionEnsemble.py b/climpred/tests/test_PredictionEnsemble.py
index f6be9f889..b05345637 100644
--- a/climpred/tests/test_PredictionEnsemble.py
+++ b/climpred/tests/test_PredictionEnsemble.py
@@ -175,7 +175,7 @@ def test_hindcastEnsemble_plus_broadcast(hind_ds_initialized_3d, operator):
 def test_hindcastEnsemble_operator_different_datasets(
     hind_ds_initialized_1d, observations_ds_1d
 ):
-    """Test that HindcastEnsemble math operator (+-*/) on different datasets attached to HindcastEnsemble."""
+    """Test that HindcastEnsemble math operator (+-*/) on HindcastEnsemble."""
     he = HindcastEnsemble(hind_ds_initialized_1d)
     he = he.add_observations(observations_ds_1d)
     he2 = HindcastEnsemble(hind_ds_initialized_1d)
diff --git a/climpred/tests/test_bias_removal.py b/climpred/tests/test_bias_removal.py
index 4d22a59fe..6e1aa9ee6 100644
--- a/climpred/tests/test_bias_removal.py
+++ b/climpred/tests/test_bias_removal.py
@@ -12,7 +12,6 @@
     XCLIM_BIAS_CORRECTION_METHODS,
 )
 from climpred.options import OPTIONS
-from climpred.testing import assert_PredictionEnsemble
 
 from . import requires_bias_correction, requires_xclim
 
@@ -39,7 +38,7 @@ def _adjust_metric_kwargs(metric_kwargs=None, how=None, he=None):
 @requires_bias_correction
 @pytest.mark.parametrize("how", BIAS_CORRECTION_METHODS)
 def test_remove_bias_difference_seasonality(hindcast_recon_1d_mm, how):
-    """Test HindcastEnsemble.remove_bias yields different results for different seasonality settings."""
+    """Test HindcastEnsemble.remove_bias yields different results for seasonality."""
     verify_kwargs = dict(
         metric="rmse", dim="init", comparison="e2o", alignment="same_inits", skipna=True
     )
@@ -86,20 +85,10 @@ def test_remove_bias(hindcast_recon_1d_mm, alignment, how, seasonality, cv):
     def check_hindcast_coords_maintained_except_init(hindcast, hindcast_bias_removed):
         # init only slighty cut due to alignment
         for c in hindcast.coords:
-            print(
-                "check coord",
-                c,
-                "hindcast_bias_removed.coords",
-                hindcast_bias_removed.coords,
-            )
             if c in ["init", "valid_time"]:
-                assert (
-                    hindcast.coords[c].size >= hindcast_bias_removed.coords[c].size
-                )  # , print(hindcast.coords[c].to_index(),'\n vs\n',hindcast_bias_removed.coords[c].to_index())
+                assert hindcast.coords[c].size >= hindcast_bias_removed.coords[c].size
             else:
-                assert (
-                    hindcast.coords[c].size == hindcast_bias_removed.coords[c].size
-                )  # , print(hindcast.coords[c].to_index(),'\n vs\n',hindcast_bias_removed.coords[c].to_index())
+                assert hindcast.coords[c].size == hindcast_bias_removed.coords[c].size
 
     with set_options(seasonality=seasonality):
         metric = "rmse"
diff --git a/climpred/tests/test_bootstrap.py b/climpred/tests/test_bootstrap.py
index d3a5bca15..fe48e2177 100644
--- a/climpred/tests/test_bootstrap.py
+++ b/climpred/tests/test_bootstrap.py
@@ -1,3 +1,5 @@
+"""Test bootstrap.py."""
+
 import dask
 import numpy as np
 import pytest
@@ -14,8 +16,7 @@
     bootstrap_hindcast,
     bootstrap_uninit_pm_ensemble_from_control_cftime,
 )
-from climpred.comparisons import HINDCAST_COMPARISONS
-from climpred.constants import CONCAT_KWARGS, VALID_ALIGNMENTS
+from climpred.constants import CONCAT_KWARGS
 from climpred.exceptions import KeywordError
 from climpred.utils import _transpose_and_rechunk_to
 
diff --git a/climpred/tests/test_compute_dims.py b/climpred/tests/test_compute_dims.py
index 269f4343e..2239f80c1 100644
--- a/climpred/tests/test_compute_dims.py
+++ b/climpred/tests/test_compute_dims.py
@@ -2,7 +2,6 @@
 import xarray as xr
 from xarray.testing import assert_allclose
 
-from climpred.bootstrap import bootstrap_hindcast
 from climpred.comparisons import (
     PM_COMPARISONS,
     PROBABILISTIC_HINDCAST_COMPARISONS,
diff --git a/climpred/tests/test_graphics.py b/climpred/tests/test_graphics.py
index 4bb8cc5e1..46bef074c 100644
--- a/climpred/tests/test_graphics.py
+++ b/climpred/tests/test_graphics.py
@@ -1,4 +1,4 @@
-import numpy as np
+"""Test graphics.py and PredictionEnsemble.plot()."""
 import pytest
 
 from climpred import HindcastEnsemble, PerfectModelEnsemble
diff --git a/climpred/tests/test_hindcast_prediction.py b/climpred/tests/test_hindcast_prediction.py
index 6854db4f6..7862f9325 100644
--- a/climpred/tests/test_hindcast_prediction.py
+++ b/climpred/tests/test_hindcast_prediction.py
@@ -1,10 +1,8 @@
+"""Test compute_hindcast."""
+
 import dask
-import numpy as np
 import pytest
 
-from climpred.bootstrap import bootstrap_hindcast
-from climpred.comparisons import HINDCAST_COMPARISONS
-from climpred.constants import CLIMPRED_DIMS
 from climpred.prediction import compute_hindcast
 from climpred.reference import compute_persistence
 
diff --git a/climpred/tests/test_lead_time_resolutions.py b/climpred/tests/test_lead_time_resolutions.py
index 72606d6cc..1b2cd7d8c 100644
--- a/climpred/tests/test_lead_time_resolutions.py
+++ b/climpred/tests/test_lead_time_resolutions.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pandas as pd
 import pytest
 import xarray as xr
 
@@ -80,9 +81,6 @@ def test_PerfectModelEnsemble_time_resolution_verify(HindcastEnsemble_time_resol
     assert pm.verify(**PerfectModelEnsemble_verify_kw).notnull().any()
 
 
-import pandas as pd
-
-
 @pytest.mark.parametrize(
     "lead_res", ["seconds", "minutes", "hours", "days", "pentads", "weeks"]
 )
diff --git a/climpred/tests/test_map.py b/climpred/tests/test_map.py
index c0d0ccb10..41a67cd92 100644
--- a/climpred/tests/test_map.py
+++ b/climpred/tests/test_map.py
@@ -50,6 +50,6 @@ def test_PredictionEnsemble_map_dim_or(hindcast_hist_obs_1d):
 def test_PredictionEnsemble_map_dim_or_fails_if_both_dims_in_dataset(
     hindcast_hist_obs_1d,
 ):
-    """Tests that PredictionEnsemble with dim0_or_dim1 as kwargs fails if both dims in any dataset."""
+    """Tests PredictionEnsemble dim0_or_dim1 in kwargs fails if both in all dataset."""
     with pytest.raises(ValueError, match="cannot be both in"):
         hindcast_hist_obs_1d.map(rm_poly, dim="init_or_lead", deg=2)
diff --git a/climpred/tests/test_options.py b/climpred/tests/test_options.py
index 3c5c259e7..ae902920c 100644
--- a/climpred/tests/test_options.py
+++ b/climpred/tests/test_options.py
@@ -3,18 +3,27 @@
 import climpred
 
 
-@pytest.mark.xfail(
-    reason="not properly implemented see https://github.com/pangeo-data/climpred/issues/605"
-)
+# @pytest.mark.xfail(
+#    reason="not properly implemented see https://github.com/pangeo-data/climpred/issues/605"
+# )
 @pytest.mark.parametrize(
     "cross_validate", [False, pytest.param(True, marks=pytest.mark.xfail)]
 )
 def test_seasonality_remove_bias(hindcast_recon_1d_dm, cross_validate):
-    """Test the climpred.set_option(seasonality) changes bias reduction. Currently fails for cross_validate bias reduction."""
+    """
+    Test the climpred.set_option(seasonality) changes bias reduction.
+
+    Currently fails for cross_validate bias reduction.
+    """
     hindcast = hindcast_recon_1d_dm
     hindcast._datasets["initialized"] = (
         hindcast.get_initialized().resample(init="1MS").interpolate("linear")
     )
+    print(
+        hindcast.get_initialized().coords["init"][:4],
+        hindcast.coords["lead"],
+        hindcast.get_observations().coords["time"][:4],
+    )
 
     alignment = "maximize"
     kw = {
@@ -29,6 +38,7 @@ def test_seasonality_remove_bias(hindcast_recon_1d_dm, cross_validate):
         dayofyear_seasonality = hindcast.remove_bias(
             alignment=alignment, cross_validate=cross_validate
         )
+    print("\n" * 4)
     with climpred.set_options(seasonality="weekofyear"):
         weekofyear_seasonality = hindcast.remove_bias(
             alignment=alignment, cross_validate=cross_validate
@@ -74,7 +84,6 @@ def test_option_warn_for_failed_PredictionEnsemble_xr_call(
 def test_climpred_warnings(hindcast_recon_1d_dm, option_bool):
     with climpred.set_options(warn_for_failed_PredictionEnsemble_xr_call=True):
         with climpred.set_options(climpred_warnings=option_bool):
-            print(climpred.options.OPTIONS)
             with pytest.warns(UserWarning if option_bool else None) as record:
                 hindcast_recon_1d_dm.sel(lead=[1, 2])
             if not option_bool:
diff --git a/climpred/tests/test_perfect_model_prediction.py b/climpred/tests/test_perfect_model_prediction.py
index 105adbb92..bd31016d0 100644
--- a/climpred/tests/test_perfect_model_prediction.py
+++ b/climpred/tests/test_perfect_model_prediction.py
@@ -1,11 +1,9 @@
-import dask
-import numpy as np
+"""Test compute_perfect_model."""
+
+
 import pytest
 import xarray as xr
 
-from climpred.bootstrap import bootstrap_perfect_model
-from climpred.constants import CLIMPRED_DIMS
-from climpred.metrics import DETERMINISTIC_PM_METRICS
 from climpred.prediction import compute_perfect_model
 from climpred.reference import compute_persistence
 
diff --git a/climpred/tests/test_probabilistic.py b/climpred/tests/test_probabilistic.py
index b3a2a94ac..d42bfc335 100644
--- a/climpred/tests/test_probabilistic.py
+++ b/climpred/tests/test_probabilistic.py
@@ -1,10 +1,7 @@
 import numpy as np
 import pytest
 import xarray as xr
-from scipy.stats import norm
 
-from climpred import HindcastEnsemble
-from climpred.bootstrap import bootstrap_hindcast, bootstrap_perfect_model
 from climpred.comparisons import (
     NON_PROBABILISTIC_PM_COMPARISONS,
     PROBABILISTIC_HINDCAST_COMPARISONS,
@@ -118,9 +115,7 @@ def f(x):
 def test_PerfectModelEnsemble_verify_bootstrap_not_nan_probabilistic(
     perfectModelEnsemble_initialized_control, metric, comparison, reference
 ):
-    """
-    Checks that PerfectModelEnsemble.verify() and PerfectModelEnsemble.bootstrap() works without breaking for all probabilistic metrics.
-    """
+    """Test PredictionEnsemble.verify/bootstrap() works for probabilistic metrics."""
     pm = perfectModelEnsemble_initialized_control.isel(lead=range(3), init=range(5))
     kwargs = {
         "comparison": comparison,
diff --git a/climpred/tests/test_reference.py b/climpred/tests/test_reference.py
index 31f2e2b0a..49b36bed0 100644
--- a/climpred/tests/test_reference.py
+++ b/climpred/tests/test_reference.py
@@ -23,7 +23,7 @@ def test_HindcastEnsemble_verify_reference(
 def test_PerfectModelEnsemble_verify_persistence_from_first_lead(
     perfectModelEnsemble_initialized_control, comparison
 ):
-    """Test compute_persistence_from_first_lead started with PerfectModel_persistence_from_initialized_lead_0."""
+    """Test compute_persistence_from_first_lead vs compute_persistence."""
     kw = dict(
         metric="mse",
         comparison=comparison,
diff --git a/climpred/tests/test_relative_entropy.py b/climpred/tests/test_relative_entropy.py
index 10784b740..2d9e76298 100644
--- a/climpred/tests/test_relative_entropy.py
+++ b/climpred/tests/test_relative_entropy.py
@@ -1,4 +1,4 @@
-import pytest
+"""Test relative_entropy.py"""
 
 from climpred.graphics import plot_relative_entropy
 from climpred.relative_entropy import (
@@ -8,11 +8,6 @@
 
 from . import requires_eofs
 
-try:
-    from eofs.xarray import Eof
-except ImportError:
-    pass
-
 
 @requires_eofs
 def test_compute_relative_entropy(PM_da_initialized_3d, PM_da_control_3d):
diff --git a/climpred/tests/test_uninitialized.py b/climpred/tests/test_uninitialized.py
index cf9c9dfef..8cf42165d 100644
--- a/climpred/tests/test_uninitialized.py
+++ b/climpred/tests/test_uninitialized.py
@@ -1,4 +1,4 @@
-import numpy as np
+"""Test compute_uninitialized."""
 import pytest
 
 from climpred.constants import VALID_ALIGNMENTS
diff --git a/climpred/tests/test_utils.py b/climpred/tests/test_utils.py
index 00625617e..300b17163 100644
--- a/climpred/tests/test_utils.py
+++ b/climpred/tests/test_utils.py
@@ -1,3 +1,5 @@
+"""Test utils.py"""
+
 import cftime
 import numpy as np
 import pandas as pd
@@ -5,11 +7,8 @@
 import xarray as xr
 from xarray.testing import assert_allclose
 
-from climpred.bootstrap import bootstrap_perfect_model
 from climpred.comparisons import PM_COMPARISONS, __m2c
 from climpred.metrics import DETERMINISTIC_PM_METRICS, __pearson_r
-from climpred.prediction import compute_hindcast, compute_perfect_model
-from climpred.tutorial import load_dataset
 from climpred.utils import (
     convert_time_index,
     find_start_dates_for_given_init,

From 4910de94519a7190d52f97bf1cd4f2a81a591b59 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sun, 12 Dec 2021 16:14:29 +0100
Subject: [PATCH 49/56] fix

---
 .pre-commit-config.yaml       | 12 +++++-------
 climpred/tests/test_checks.py |  1 -
 setup.py                      |  2 +-
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9908a8872..b345b83c4 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -17,18 +17,21 @@ repos:
       hooks:
       - id: black
 
-
   -   repo: https://gitlab.com/pycqa/flake8
       rev: 3.8.4
       hooks:
       - id: flake8
 
-
   -   repo: https://github.com/pre-commit/mirrors-isort
       rev: v5.6.4
       hooks:
       -   id: isort
 
+  - repo: https://github.com/keewis/blackdoc
+    rev: v0.3.4
+    hooks:
+    -   id: blackdoc
+
   -   repo: https://github.com/pre-commit/mirrors-mypy
       rev: v0.910-1
       hooks:
@@ -44,8 +47,3 @@ repos:
             # Dependencies that are typed
             numpy,
           ]
-
-  - repo: https://github.com/keewis/blackdoc
-    rev: v0.3.4
-    hooks:
-    -   id: blackdoc
diff --git a/climpred/tests/test_checks.py b/climpred/tests/test_checks.py
index 0d4b32822..7842d5127 100644
--- a/climpred/tests/test_checks.py
+++ b/climpred/tests/test_checks.py
@@ -1,6 +1,5 @@
 """Testing checks.py."""
 
-import numpy as np
 import pytest
 
 from climpred.checks import (
diff --git a/setup.py b/setup.py
index 8bd5b766c..cdccd7a7b 100644
--- a/setup.py
+++ b/setup.py
@@ -29,7 +29,7 @@
     "io": ["netcdf4"],
     "regridding": [
         "xesmf"
-    ],  # for installation see https://pangeo-xesmf.readthedocs.io/en/latest/installation.html
+    ],  # for installation see https://pangeo-xesmf.readthedocs.io/
     "relative_entropy": ["eofs"],
     "vwmp": ["xrft"],
 }

From 125999ce02cb7daceb55d4ac299a2fed07697e2e Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sun, 12 Dec 2021 16:37:54 +0100
Subject: [PATCH 50/56] fix bias_removal weeks

---
 climpred/bias_removal.py       | 11 +++++------
 climpred/classes.py            |  3 +++
 climpred/tests/test_options.py | 27 +++++++++------------------
 setup.py                       |  6 +++++-
 4 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/climpred/bias_removal.py b/climpred/bias_removal.py
index 25d8709ab..a333c5431 100644
--- a/climpred/bias_removal.py
+++ b/climpred/bias_removal.py
@@ -26,8 +26,6 @@
 
 
 def sub(a, b):
-    print(a, b)
-    # print(a.coords,a.dims,b.coords,b.dims)
     return a - b
 
 
@@ -79,7 +77,6 @@ def _mean_bias_removal_func(hind, bias, dim, how):
     """
     how_operator = sub if how == "additive" else div
     seasonality = OPTIONS["seasonality"]
-    print(hind.dims, bias.dims, dim, seasonality)
 
     with xr.set_options(keep_attrs=True):
         if seasonality == "weekofyear":
@@ -385,10 +382,12 @@ def gaussian_bias_removal(
             bias_removal_func = _std_multiplicative_bias_removal_func_cv
             bias_removal_func_kwargs = dict(obs=hindcast.get_observations(), cv=cv)
 
+    hind = hindcast_test.get_initialized()
+    hind, bias = xr.align(hind, bias)
+
     bias_removed_hind = bias_removal_func(
-        hindcast_test.get_initialized(), bias, "init", **bias_removal_func_kwargs
-    )
-    bias_removed_hind = bias_removed_hind.squeeze(drop=True)
+        hind, bias, "init", **bias_removal_func_kwargs
+    ).squeeze(drop=True)
 
     # remove groupby label from coords
     for c in GROUPBY_SEASONALITIES + ["skill"]:
diff --git a/climpred/classes.py b/climpred/classes.py
index e31a0f06e..f8c960576 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -953,6 +953,9 @@ def _warn_if_chunked_along_init_member_time(self) -> None:
         When more than one chunk to show how to circumvent ``xskillscore`` chunking
         ``ValueError``.
         """
+        print(type(self))
+        if self is None:
+            return
         suggest_one_chunk = []
         for d in self.chunks:
             if d in ["time", "init", "member"]:
diff --git a/climpred/tests/test_options.py b/climpred/tests/test_options.py
index ae902920c..fa2e2b10b 100644
--- a/climpred/tests/test_options.py
+++ b/climpred/tests/test_options.py
@@ -3,27 +3,16 @@
 import climpred
 
 
-# @pytest.mark.xfail(
-#    reason="not properly implemented see https://github.com/pangeo-data/climpred/issues/605"
-# )
 @pytest.mark.parametrize(
-    "cross_validate", [False, pytest.param(True, marks=pytest.mark.xfail)]
-)
+    "cross_validate", [False, True]
+)  # pytest.param(True, marks=pytest.mark.xfail)]
+# )
 def test_seasonality_remove_bias(hindcast_recon_1d_dm, cross_validate):
-    """
-    Test the climpred.set_option(seasonality) changes bias reduction.
-
-    Currently fails for cross_validate bias reduction.
-    """
+    """Test the climpred.set_option(seasonality) changes bias reduction."""
     hindcast = hindcast_recon_1d_dm
     hindcast._datasets["initialized"] = (
         hindcast.get_initialized().resample(init="1MS").interpolate("linear")
     )
-    print(
-        hindcast.get_initialized().coords["init"][:4],
-        hindcast.coords["lead"],
-        hindcast.get_observations().coords["time"][:4],
-    )
 
     alignment = "maximize"
     kw = {
@@ -38,15 +27,17 @@ def test_seasonality_remove_bias(hindcast_recon_1d_dm, cross_validate):
         dayofyear_seasonality = hindcast.remove_bias(
             alignment=alignment, cross_validate=cross_validate
         )
-    print("\n" * 4)
     with climpred.set_options(seasonality="weekofyear"):
         weekofyear_seasonality = hindcast.remove_bias(
             alignment=alignment, cross_validate=cross_validate
         )
-    assert not weekofyear_seasonality.get_initialized().identical(
+
+    assert not dayofyear_seasonality.get_initialized().to_array().isnull().all()
+    assert not weekofyear_seasonality.get_initialized().to_array().isnull().all()
+    assert not weekofyear_seasonality.get_initialized().equals(
         dayofyear_seasonality.get_initialized()
     )
-    assert not weekofyear_seasonality.verify(**kw).identical(
+    assert not weekofyear_seasonality.verify(**kw).equals(
         dayofyear_seasonality.verify(**kw)
     )
 
diff --git a/setup.py b/setup.py
index cdccd7a7b..30c5ebe43 100644
--- a/setup.py
+++ b/setup.py
@@ -12,14 +12,18 @@
     install_requires = f.read().strip().split("\n")
 
 CLASSIFIERS = [
-    "Development Status :: 3 - Alpha",
+    "Development Status :: 5 - Production/Stable",
     "License :: OSI Approved :: MIT License",
     "Operating System :: OS Independent",
     "Intended Audience :: Science/Research",
+    "Intended Audience :: Education",
+    "Natural Language :: English",
+    "Operating System :: OS Independent",
     "Programming Language :: Python :: 3 :: Only",
     "Programming Language :: Python :: 3.7",
     "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
+    "Topic :: Scientific/Engineering :: Atmospheric Science",
 ]
 
 extras_require = {

From 87eb677801318e0a84e6dd51794821c01194ba5d Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sun, 12 Dec 2021 16:38:22 +0100
Subject: [PATCH 51/56] fix

---
 climpred/tests/test_options.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/climpred/tests/test_options.py b/climpred/tests/test_options.py
index fa2e2b10b..766616360 100644
--- a/climpred/tests/test_options.py
+++ b/climpred/tests/test_options.py
@@ -3,10 +3,7 @@
 import climpred
 
 
-@pytest.mark.parametrize(
-    "cross_validate", [False, True]
-)  # pytest.param(True, marks=pytest.mark.xfail)]
-# )
+@pytest.mark.parametrize("cross_validate", [False, True])
 def test_seasonality_remove_bias(hindcast_recon_1d_dm, cross_validate):
     """Test the climpred.set_option(seasonality) changes bias reduction."""
     hindcast = hindcast_recon_1d_dm

From 0e6c3be8b77a551345fa6abe0274c02cc0097b80 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sun, 12 Dec 2021 16:46:04 +0100
Subject: [PATCH 52/56] fix

---
 climpred/classes.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/climpred/classes.py b/climpred/classes.py
index f8c960576..e31a0f06e 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -953,9 +953,6 @@ def _warn_if_chunked_along_init_member_time(self) -> None:
         When more than one chunk to show how to circumvent ``xskillscore`` chunking
         ``ValueError``.
         """
-        print(type(self))
-        if self is None:
-            return
         suggest_one_chunk = []
         for d in self.chunks:
             if d in ["time", "init", "member"]:

From 7143406adbf96a23b6339cc01a1e29a953d21da3 Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sun, 12 Dec 2021 16:55:57 +0100
Subject: [PATCH 53/56] fix weekofyear

---
 climpred/bias_removal.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/climpred/bias_removal.py b/climpred/bias_removal.py
index a333c5431..96badb327 100644
--- a/climpred/bias_removal.py
+++ b/climpred/bias_removal.py
@@ -383,7 +383,8 @@ def gaussian_bias_removal(
             bias_removal_func_kwargs = dict(obs=hindcast.get_observations(), cv=cv)
 
     hind = hindcast_test.get_initialized()
-    hind, bias = xr.align(hind, bias)
+    if OPTIONS["seasonality"] == "weekofyear":
+        hind, bias = xr.align(hind, bias)
 
     bias_removed_hind = bias_removal_func(
         hind, bias, "init", **bias_removal_func_kwargs

From 80376f29d29955c90bc0059d34423007ff641834 Mon Sep 17 00:00:00 2001
From: Aaron Spring <aaronspring@users.noreply.github.com>
Date: Sun, 12 Dec 2021 17:11:32 +0100
Subject: [PATCH 54/56] Update reference_forecast.rst

---
 docs/source/reference_forecast.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/reference_forecast.rst b/docs/source/reference_forecast.rst
index 658f57135..e83677318 100644
--- a/docs/source/reference_forecast.rst
+++ b/docs/source/reference_forecast.rst
@@ -38,7 +38,7 @@ You can compute this by passing ``reference="climatology"`` into
 :py:class:`~climpred.classes.PerfectModelEnsemble`
 :py:meth:`~climpred.classes.PerfectModelEnsemble.bootstrap`.
 
-**Uninitialized**: *Uninitialized** ensembles are generated by perturbing initial
+**Uninitialized**: Uninitialized ensembles are generated by perturbing initial
 conditions only at one point in the historical run.
 These are generated via micro (round-off error perturbations) or macro (starting from
 completely different restart files) methods. Uninitialized ensembles are used to

From c23e1a7c2128bdc0c018a2fcb5271243a9703a4e Mon Sep 17 00:00:00 2001
From: Aaron Spring <aaronspring@users.noreply.github.com>
Date: Sun, 12 Dec 2021 17:16:38 +0100
Subject: [PATCH 55/56] Update classes.py

---
 climpred/classes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/climpred/classes.py b/climpred/classes.py
index e31a0f06e..0fa4586d6 100644
--- a/climpred/classes.py
+++ b/climpred/classes.py
@@ -1108,7 +1108,7 @@ def generate_uninitialized(self) -> "PerfectModelEnsemble":
 
         Returns:
             ``uninitialzed`` resampled from ``control`` added
-            to:py:class:`~climpred.classes.PerfectModelEnsemble`
+            to :py:class:`~climpred.classes.PerfectModelEnsemble`
         """
         has_dataset(
             self._datasets["control"], "control", "generate an uninitialized ensemble."

From 1b5ebecda28dd60e7210f680859dc0b8d9a6bb8b Mon Sep 17 00:00:00 2001
From: AS <aaron.spring@mpimet.mpg.de>
Date: Sun, 12 Dec 2021 19:31:30 +0100
Subject: [PATCH 56/56] fix

---
 README.rst                                             | 10 +++-------
 docs/source/api.rst                                    |  1 +
 ...pred.preprocessing.shared.set_integer_time_axis.rst |  6 ++++++
 docs/source/index.rst                                  | 10 +++-------
 docs/source/terminology.rst                            |  6 +++---
 5 files changed, 16 insertions(+), 17 deletions(-)
 create mode 100644 docs/source/api/climpred.preprocessing.shared.set_integer_time_axis.rst

diff --git a/README.rst b/README.rst
index ee6be3490..bb63cb70c 100644
--- a/README.rst
+++ b/README.rst
@@ -12,13 +12,13 @@ Verification of weather and climate forecasts.
     * - docs
       - |docs| |joss| |doi|
     * - tests
-      - |ci| |upstream| |requires| |codecov|
+      - |ci| |upstream| |codecov|
     * - package
-      - |conda| |pypi|
+      - |conda| |conda downloads| |pypi| |pypi downloads|
     * - license
       - |license|
     * - community
-      - |gitter| |contributors| |conda downloads| |pypi downloads|
+      - |gitter| |contributors|
     * - tutorials
       - |gallery| |workshop| |cloud|
 
@@ -42,10 +42,6 @@ Verification of weather and climate forecasts.
     :target: https://github.com/pangeo-data/climpred/actions/workflows/upstream-dev-ci.yml
     :alt: upstream
 
-.. |requires| image:: https://requires.io/github/pangeo-data/climpred/requirements.svg?branch=main
-     :target: https://requires.io/github/pangeo-data/climpred/requirements/?branch=main
-     :alt: Requirements Status
-
 .. |codecov| image:: https://codecov.io/gh/pangeo-data/climpred/branch/main/graph/badge.svg
       :target: https://codecov.io/gh/pangeo-data/climpred
       :alt: coverage
diff --git a/docs/source/api.rst b/docs/source/api.rst
index 873ade31e..5f22adbe3 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -292,6 +292,7 @@ Preprocessing
     load_hindcast
     rename_to_climpred_dims
     rename_SLM_to_climpred_dims
+    set_integer_time_axis
 
 .. currentmodule:: climpred.preprocessing.mpi
 
diff --git a/docs/source/api/climpred.preprocessing.shared.set_integer_time_axis.rst b/docs/source/api/climpred.preprocessing.shared.set_integer_time_axis.rst
new file mode 100644
index 000000000..1c5eeb57d
--- /dev/null
+++ b/docs/source/api/climpred.preprocessing.shared.set_integer_time_axis.rst
@@ -0,0 +1,6 @@
+climpred.preprocessing.shared.set\_integer\_time\_axis
+======================================================
+
+.. currentmodule:: climpred.preprocessing.shared
+
+.. autofunction:: set_integer_time_axis
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 8b47cce64..1e44346e7 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -11,13 +11,13 @@ climpred: verification of weather and climate forecasts
     * - docs
       - |docs| |joss| |doi|
     * - tests
-      - |ci| |upstream| |requires| |codecov|
+      - |ci| |upstream| |codecov|
     * - package
-      - |conda| |pypi|
+      - |conda| |conda downloads| |pypi| |pypi downloads|
     * - license
       - |license|
     * - community
-      - |gitter| |contributors| |conda downloads| |pypi downloads|
+      - |gitter| |contributors|
     * - tutorials
       - |gallery| |workshop| |cloud|
 
@@ -41,10 +41,6 @@ climpred: verification of weather and climate forecasts
     :target: https://github.com/pangeo-data/climpred/actions/workflows/upstream-dev-ci.yml
     :alt: upstream
 
-.. |requires| image:: https://requires.io/github/pangeo-data/climpred/requirements.svg?branch=main
-    :target: https://requires.io/github/pangeo-data/climpred/requirements/?branch=main
-    :alt: Requirements Status
-
 .. |codecov| image:: https://codecov.io/gh/pangeo-data/climpred/branch/main/graph/badge.svg
     :target: https://codecov.io/gh/pangeo-data/climpred
     :alt: codecov
diff --git a/docs/source/terminology.rst b/docs/source/terminology.rst
index 1a1e96b20..5d5021be8 100644
--- a/docs/source/terminology.rst
+++ b/docs/source/terminology.rst
@@ -70,7 +70,7 @@ world [Meehl2013]_ [Pegion2019]_.
 [Murphy1988]_:
 
 .. math::
-    S = \\frac{A_{f} - A_{r}}{A_{p} - A_{r}},
+    S = \frac{A_{f} - A_{r}}{A_{p} - A_{r}},
 
 where :math:`A_{f}`, :math:`A_{p}`, and :math:`A_{r}` represent the accuracy of the
 forecast being assessed, the accuracy of a perfect forecast, and the accuracy of the
@@ -87,13 +87,13 @@ Forecasting
 integrated forward in time, also called re-forcasts.  Depending on the length of time
 of the integration, external forcings may or may not be included.  The longer the
 integration (e.g. decadal vs. daily), the more important it is to include external
-forcing.  [Boer2016]_.  Because they represent so-called forecasts over periods that
+forcing [Boer2016]_.  Because they represent so-called forecasts over periods that
 already occurred, their prediction skill can be evaluated.
 
 *Prediction*: Forecasts initialized from a reconstruction integrated into the future.
 Depending on the length of time of the integration, external forcings may or may not
 be included.  The longer the integration (e.g. decadal vs. daily), the more important
-it is to include external forcing. [Boer2016]_  Because predictions are made into the
+it is to include external forcing [Boer2016]_. Because predictions are made into the
 future, it is necessary to wait until the forecast occurs before one can quantify the
 skill of the forecast.